diff --git a/src/otoole/cli.py b/src/otoole/cli.py index c0305f7..c34abd7 100644 --- a/src/otoole/cli.py +++ b/src/otoole/cli.py @@ -38,6 +38,7 @@ --version, -V The version of otoole """ + import argparse import logging import os @@ -125,7 +126,6 @@ def setup(args): data_type = args.data_type data_path = args.data_path - write_defaults = args.write_defaults overwrite = args.overwrite if os.path.exists(data_path) and not overwrite: @@ -139,9 +139,7 @@ def setup(args): elif data_type == "csv": config = get_config_setup_data() input_data, default_values = get_csv_setup_data(config) - WriteCsv(user_config=config).write( - input_data, data_path, default_values, write_defaults=write_defaults - ) + WriteCsv(user_config=config).write(input_data, data_path, default_values) def get_parser(): @@ -172,7 +170,7 @@ def get_parser(): result_parser.add_argument( "to_format", help="Result data format to convert to", - choices=sorted(["csv"]), + choices=sorted(["csv", "excel"]), ) result_parser.add_argument( "from_path", help="Path to file or folder to convert from" @@ -271,12 +269,6 @@ def get_parser(): "data_type", help="Type of file to setup", choices=sorted(["config", "csv"]) ) setup_parser.add_argument("data_path", help="Path to file or folder to save to") - setup_parser.add_argument( - "--write_defaults", - help="Writes default values", - default=False, - action="store_true", - ) setup_parser.add_argument( "--overwrite", help="Overwrites existing data", diff --git a/src/otoole/convert.py b/src/otoole/convert.py index 4267ac7..e8b3243 100644 --- a/src/otoole/convert.py +++ b/src/otoole/convert.py @@ -30,6 +30,7 @@ def read_results( from_path: str, input_format: str, input_path: str, + write_defaults: bool = False, glpk_model: Optional[str] = None, ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, float]]: """Read OSeMOSYS results from CBC, GLPK, Gurobi, or CPLEX results files @@ -46,6 +47,8 @@ def read_results( Format of input data. Available options are 'datafile', 'csv' and 'excel' input_path: str Path to input data + write_defaults: bool, default: False + Expand default values to pad dataframes glpk_model : str Path to ``*.glp`` model file @@ -56,7 +59,9 @@ def read_results( """ user_config = _get_user_config(config) input_strategy = _get_read_strategy(user_config, input_format) - result_strategy = _get_read_result_strategy(user_config, from_format, glpk_model) + result_strategy = _get_read_result_strategy( + user_config, from_format, glpk_model, write_defaults + ) if input_strategy: input_data, _ = input_strategy.read(input_path) @@ -91,7 +96,7 @@ def convert_results( from_format : str Available options are 'cbc', 'cplex' and 'gurobi' to_format : str - Available options are 'csv' + Available options are 'csv', 'excel' from_path : str Path to cbc, cplex or gurobi solution file to_path : str @@ -100,8 +105,8 @@ def convert_results( Format of input data. Available options are 'datafile', 'csv' and 'excel' input_path: str Path to input data - write_defaults : bool - Write default values to CSVs + write_defaults: bool, default: False + Expand default values to pad dataframes glpk_model : str Path to ``*.glp`` model file @@ -119,16 +124,16 @@ def convert_results( # set read strategy - read_strategy = _get_read_result_strategy(user_config, from_format, glpk_model) + read_strategy = _get_read_result_strategy( + user_config, from_format, glpk_model, write_defaults + ) # set write strategy - write_defaults = True if write_defaults else False - if to_format == "csv": - write_strategy = WriteCsv( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy: WriteStrategy = WriteCsv(user_config=user_config) + elif to_format == "excel": + write_strategy = WriteExcel(user_config=user_config) else: raise NotImplementedError(msg) @@ -145,7 +150,7 @@ def convert_results( def _get_read_result_strategy( - user_config, from_format, glpk_model=None + user_config, from_format, glpk_model=None, write_defaults=False ) -> Union[ReadResults, None]: """Get ``ReadResults`` for gurobi, cbc, cplex, and glpk formats @@ -155,6 +160,8 @@ def _get_read_result_strategy( User configuration describing parameters and sets from_format : str Available options are 'cbc', 'gurobi', 'cplex', and 'glpk' + write_defaults: bool, default: False + Write default values to output format glpk_model : str Path to ``*.glp`` model file @@ -166,15 +173,25 @@ def _get_read_result_strategy( """ if from_format == "cbc": - read_strategy: ReadResults = ReadCbc(user_config) + read_strategy: ReadResults = ReadCbc( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "gurobi": - read_strategy = ReadGurobi(user_config=user_config) + read_strategy = ReadGurobi( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "cplex": - read_strategy = ReadCplex(user_config=user_config) + read_strategy = ReadCplex( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "glpk": if not glpk_model: raise OtooleError(resource="Read GLPK", message="Provide glpk model file") - read_strategy = ReadGlpk(user_config=user_config, glpk_model=glpk_model) + read_strategy = ReadGlpk( + user_config=user_config, + glpk_model=glpk_model, + write_defaults=write_defaults, + ) else: return None @@ -204,7 +221,9 @@ def _get_user_config(config) -> dict: return user_config -def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadStrategy: +def _get_read_strategy( + user_config, from_format, keep_whitespace=False, write_defaults=False +) -> ReadStrategy: """Get ``ReadStrategy`` for csv/datafile/excel format Arguments @@ -215,6 +234,8 @@ def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadS Available options are 'datafile', 'datapackage', 'csv' and 'excel' keep_whitespace: bool, default: False Keep whitespace in CSVs + write_defaults: bool, default: False + Expand default values to pad dataframes Returns ------- @@ -225,22 +246,30 @@ def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadS keep_whitespace = True if keep_whitespace else False if from_format == "datafile": - read_strategy: ReadStrategy = ReadDatafile(user_config=user_config) + read_strategy: ReadStrategy = ReadDatafile( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "datapackage": logger.warning( "Reading from datapackage is deprecated, trying to read from CSVs" ) logger.info("Successfully read folder of CSVs") read_strategy = ReadCsv( - user_config=user_config, keep_whitespace=keep_whitespace + user_config=user_config, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) # typing: ReadStrategy elif from_format == "csv": read_strategy = ReadCsv( - user_config=user_config, keep_whitespace=keep_whitespace + user_config=user_config, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) # typing: ReadStrategy elif from_format == "excel": read_strategy = ReadExcel( - user_config=user_config, keep_whitespace=keep_whitespace + user_config=user_config, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) # typing: ReadStrategy else: msg = f"Conversion from {from_format} is not supported" @@ -249,7 +278,7 @@ def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadS return read_strategy -def _get_write_strategy(user_config, to_format, write_defaults=False) -> WriteStrategy: +def _get_write_strategy(user_config, to_format) -> WriteStrategy: """Get ``WriteStrategy`` for csv/datafile/excel format Arguments @@ -258,8 +287,6 @@ def _get_write_strategy(user_config, to_format, write_defaults=False) -> WriteSt User configuration describing parameters and sets to_format : str Available options are 'datafile', 'datapackage', 'csv' and 'excel' - write_defaults: bool, default: False - Write default values to output format Returns ------- @@ -267,25 +294,15 @@ def _get_write_strategy(user_config, to_format, write_defaults=False) -> WriteSt A ReadStrategy object. Returns None if to_format is not recognised """ - # set write strategy - write_defaults = True if write_defaults else False if to_format == "datapackage": - write_strategy: WriteStrategy = WriteCsv( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy: WriteStrategy = WriteCsv(user_config=user_config) elif to_format == "excel": - write_strategy = WriteExcel( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteExcel(user_config=user_config) elif to_format == "datafile": - write_strategy = WriteDatafile( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteDatafile(user_config=user_config) elif to_format == "csv": - write_strategy = WriteCsv( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteCsv(user_config=user_config) else: msg = f"Conversion to {to_format} is not supported" raise NotImplementedError(msg) @@ -315,7 +332,7 @@ def convert( from_path : str Path to destination file (if datafile or excel) or folder (csv or datapackage) write_defaults: bool, default: False - Write default values to CSVs + Expand default values to pad dataframes keep_whitespace: bool, default: False Keep whitespace in CSVs @@ -327,12 +344,13 @@ def convert( user_config = _get_user_config(config) read_strategy = _get_read_strategy( - user_config, from_format, keep_whitespace=keep_whitespace + user_config, + from_format, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) - write_strategy = _get_write_strategy( - user_config, to_format, write_defaults=write_defaults - ) + write_strategy = _get_write_strategy(user_config, to_format) if from_format == "datapackage": logger.warning( @@ -348,7 +366,11 @@ def convert( def read( - config: str, from_format: str, from_path: str, keep_whitespace: bool = False + config: str, + from_format: str, + from_path: str, + keep_whitespace: bool = False, + write_defaults: bool = False, ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, float]]: """Read OSeMOSYS data from datafile, csv or Excel formats @@ -362,6 +384,8 @@ def read( Path to source file (if datafile or excel) or folder (csv) keep_whitespace: bool, default: False Keep whitespace in source files + write_defaults: bool, default: False + Expand default values to pad dataframes Returns ------- @@ -370,7 +394,10 @@ def read( """ user_config = _get_user_config(config) read_strategy = _get_read_strategy( - user_config, from_format, keep_whitespace=keep_whitespace + user_config, + from_format, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) if from_format == "datapackage": @@ -403,15 +430,10 @@ def write( """ user_config = _get_user_config(config) + write_strategy = _get_write_strategy(user_config, to_format) if default_values is None: - write_strategy = _get_write_strategy( - user_config, to_format, write_defaults=False - ) write_strategy.write(inputs, to_path, {}) else: - write_strategy = _get_write_strategy( - user_config, to_format, write_defaults=True - ) write_strategy.write(inputs, to_path, default_values) return True diff --git a/src/otoole/input.py b/src/otoole/input.py index 210647a..e1082f7 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -28,6 +28,7 @@ >>> converter.convert('my_datafile.txt', 'folder_of_csv_files') """ + from __future__ import annotations import logging @@ -110,6 +111,7 @@ def convert(self, input_filepath: str, output_filepath: str, **kwargs: Dict): input_filepath: str output_filepath: str """ + inputs, default_values = self._read(input_filepath, **kwargs) self._write(inputs, output_filepath, default_values, **kwargs) @@ -186,7 +188,6 @@ class WriteStrategy(Strategy): user_config: dict, default=None filepath: str, default=None default_values: dict, default=None - write_defaults: bool, default=False input_data: dict, default=None """ @@ -196,7 +197,6 @@ def __init__( user_config: Dict, filepath: Optional[str] = None, default_values: Optional[Dict] = None, - write_defaults: bool = False, input_data: Optional[Dict[str, pd.DataFrame]] = None, ): super().__init__(user_config=user_config) @@ -215,8 +215,6 @@ def __init__( else: self.input_data = {} - self.write_defaults = write_defaults - @abstractmethod def _header(self) -> Union[TextIO, Any]: raise NotImplementedError() @@ -256,14 +254,10 @@ def write( handle = self._header() logger.debug(default_values) - self.input_data = inputs - if self.write_defaults: - try: - self.input_data = self._expand_defaults(inputs, default_values) - except KeyError as ex: - logger.debug(ex) + self.inputs = inputs # parameter/set data OR result data + self.input_params = kwargs.get("input_data", None) # parameter/set data - for name, df in sorted(self.input_data.items()): + for name, df in sorted(self.inputs.items()): logger.debug("%s has %s columns: %s", name, len(df.index.names), df.columns) try: @@ -275,9 +269,12 @@ def write( raise KeyError("Cannot find %s in input or results config", name) if entity_type != "set": - default_value = default_values[name] self._write_parameter( - df, name, handle, default=default_value, input_data=self.input_data + df, + name, + handle, + default=default_values[name], + input_data=self.inputs, ) else: self._write_set(df, name, handle) @@ -287,70 +284,6 @@ def write( if isinstance(handle, TextIO): handle.close() - def _expand_defaults( - self, data_to_expand: Dict[str, pd.DataFrame], default_values: Dict[str, float] - ) -> Dict[str, pd.DataFrame]: - """Populates default value entry rows in dataframes - - Parameters - ---------- - data_to_expand : Dict[str, pd.DataFrame], - default_values : Dict[str, float] - - Returns - ------- - Dict[str, pd.DataFrame] - Input data with expanded default values replacing missing entries - - """ - - sets = [x for x in self.user_config if self.user_config[x]["type"] == "set"] - output = {} - for name, data in data_to_expand.items(): - logger.info(f"Writing defaults for {name}") - - # skip sets - if name in sets: - output[name] = data - continue - - # TODO - # Issue with how otoole handles trade route right now. - # The double definition of REGION throws an error. - if name == "TradeRoute": - output[name] = data - continue - - # save set information for each parameter - index_data = {} - for index in data.index.names: - index_data[index] = self.input_data[index]["VALUE"].to_list() - - # set index - if len(index_data) > 1: - new_index = pd.MultiIndex.from_product( - list(index_data.values()), names=list(index_data.keys()) - ) - else: - new_index = pd.Index( - list(index_data.values())[0], name=list(index_data.keys())[0] - ) - df_default = pd.DataFrame(index=new_index) - - # save default result value - df_default["VALUE"] = default_values[name] - - # combine result and default value dataframe - if not data.empty: - df = pd.concat([data, df_default]) - df = df[~df.index.duplicated(keep="first")] - else: - df = df_default - df = df.sort_index() - output[name] = df - - return output - class ReadStrategy(Strategy): """ @@ -360,6 +293,15 @@ class ReadStrategy(Strategy): Strategies. """ + def __init__( + self, + user_config: Dict, + write_defaults: bool = False, + ): + super().__init__(user_config=user_config) + + self.write_defaults = write_defaults + def _check_index( self, input_data: Dict[str, pd.DataFrame] ) -> Dict[str, pd.DataFrame]: @@ -588,6 +530,108 @@ def _compare_read_to_expected( logger.debug(f"data and config name errors are: {errors}") raise OtooleNameMismatchError(name=errors) + def _expand_dataframe( + self, + name: str, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, pd.DataFrame], + ) -> pd.DataFrame: + """Populates default value entry rows in dataframes + + Parameters + ---------- + name: str + Name of parameter/result to expand + input_data: Dict[str, pd.DataFrame], + internal datastore + default_values: Dict[str, pd.DataFrame], + + Returns + ------- + pd.DataFrame, + Input data with expanded default values replacing missing entries + """ + + df = input_data[name] + + # TODO: Issue with how otoole handles trade route right now. + # The double definition of REGION throws an error. + if name == "TradeRoute": + return df + + default_df = self._get_default_dataframe(name, input_data, default_values) + + df = pd.concat([df, default_df]) + df = df[~df.index.duplicated(keep="first")] + + df = self._check_index_dtypes(name, self.user_config[name], df) + + return df.sort_index() + + def _get_default_dataframe( + self, + name: str, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, pd.DataFrame], + ) -> pd.DataFrame: + """Creates default dataframe""" + + index_data = {} + indices = self.user_config[name]["indices"] + for index in indices: + index_data[index] = input_data[index]["VALUE"].to_list() + + if len(index_data) > 1: + new_index = pd.MultiIndex.from_product( + list(index_data.values()), names=list(index_data.keys()) + ) + else: + new_index = pd.Index( + list(index_data.values())[0], name=list(index_data.keys())[0] + ) + + df = pd.DataFrame(index=new_index).sort_index() + df["VALUE"] = default_values[name] + + return df + + def write_default_params( + self, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, Union[str, int, float]], + ) -> Dict[str, pd.DataFrame]: + """Returns paramter dataframes with default values expanded""" + names = [x for x in self.user_config if self.user_config[x]["type"] == "param"] + for name in names: + try: + logger.debug(f"Serching for {name} data to expand") + input_data[name] = self._expand_dataframe( + name, input_data, default_values + ) + except KeyError: + logger.warning(f"Can not expand {name} data") + return input_data + + def write_default_results( + self, + result_data: Dict[str, pd.DataFrame], + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, Union[str, int, float]], + ) -> Dict[str, pd.DataFrame]: + """Returns result dataframes with default values expanded""" + + all_data = {**result_data, **input_data} + names = [x for x in self.user_config if self.user_config[x]["type"] == "result"] + for name in names: + try: + logger.debug(f"Serching for {name} data to expand") + result_data[name] = self._expand_dataframe( + name, all_data, default_values + ) + except KeyError: + logger.debug(f"Can not expand {name} data") + return result_data + @abstractmethod def read( self, filepath: Union[str, TextIO], **kwargs diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index da362ce..3b6273e 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -43,8 +43,13 @@ def read( class _ReadTabular(ReadStrategy): - def __init__(self, user_config: Dict[str, Dict], keep_whitespace: bool = False): - super().__init__(user_config) + def __init__( + self, + user_config: Dict[str, Dict], + write_defaults: bool = False, + keep_whitespace: bool = False, + ): + super().__init__(user_config=user_config, write_defaults=write_defaults) self.keep_whitespace = keep_whitespace def _check_set(self, df: pd.DataFrame, config_details: Dict, name: str): @@ -174,6 +179,9 @@ def read( input_data, config_type=config_type ) + if self.write_defaults: + input_data = self.write_default_params(input_data, default_values) + input_data = self._check_index(input_data) return input_data, default_values @@ -248,6 +256,9 @@ def read( input_data = self._check_index(input_data) + if self.write_defaults: + input_data = self.write_default_params(input_data, default_values) + return input_data, default_values @staticmethod @@ -328,13 +339,17 @@ def read( # Check filepath exists if os.path.exists(filepath): amply_datafile = self.read_in_datafile(filepath, config) - inputs = self._convert_amply_to_dataframe(amply_datafile, config) + input_data = self._convert_amply_to_dataframe(amply_datafile, config) for config_type in ["param", "set"]: - inputs = self._get_missing_input_dataframes( - inputs, config_type=config_type + input_data = self._get_missing_input_dataframes( + input_data, config_type=config_type ) - inputs = self._check_index(inputs) - return inputs, default_values + input_data = self._check_index(input_data) + + if self.write_defaults: + input_data = self.write_default_params(input_data, default_values) + + return input_data, default_values else: raise FileNotFoundError(f"File not found: {filepath}") diff --git a/src/otoole/results/results.py b/src/otoole/results/results.py index ae45d73..d6acb65 100644 --- a/src/otoole/results/results.py +++ b/src/otoole/results/results.py @@ -32,8 +32,9 @@ def read( """ if "input_data" in kwargs: input_data = kwargs["input_data"] + param_default_values = self._read_default_values(self.input_config) else: - input_data = None + input_data = {} available_results = self.get_results_from_file( filepath, input_data @@ -41,10 +42,23 @@ def read( default_values = self._read_default_values(self.results_config) # type: Dict + # need to expand discount rate for results processing + if "DiscountRate" in input_data: + input_data["DiscountRate"] = self._expand_dataframe( + "DiscountRate", input_data, param_default_values + ) + if "DiscountRateIdv" in input_data: + input_data["DiscountRateIdv"] = self._expand_dataframe( + "DiscountRateIdv", input_data, param_default_values + ) + results = self.calculate_results( available_results, input_data ) # type: Dict[str, pd.DataFrame] + if self.write_defaults: + results = self.write_default_results(results, input_data, default_values) + return results, default_values @abstractmethod @@ -272,8 +286,13 @@ class ReadGlpk(ReadWideResults): Path to GLPK model file. Can be created using the `--wglp` flag. """ - def __init__(self, user_config: Dict[str, Dict], glpk_model: Union[str, TextIO]): - super().__init__(user_config) + def __init__( + self, + user_config: Dict[str, Dict], + glpk_model: Union[str, TextIO], + write_defaults: bool = False, + ): + super().__init__(user_config=user_config, write_defaults=write_defaults) if isinstance(glpk_model, str): with open(glpk_model, "r") as model_file: diff --git a/src/otoole/write_strategies.py b/src/otoole/write_strategies.py index d4472f8..921497a 100644 --- a/src/otoole/write_strategies.py +++ b/src/otoole/write_strategies.py @@ -152,8 +152,7 @@ def _write_parameter( default : int """ - if not self.write_defaults: - df = self._form_parameter(df, default) + df = self._form_parameter(df, default) handle.write("param default {} : {} :=\n".format(default, parameter_name)) df.to_csv( path_or_buf=handle, diff --git a/tests/test_input.py b/tests/test_input.py index e9a7b14..135d1e5 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -29,10 +29,21 @@ def capital_cost(): data=[ ["SIMPLICITY", "NGCC", 2014, 1.23], ["SIMPLICITY", "NGCC", 2015, 2.34], - ["SIMPLICITY", "NGCC", 2016, 3.45], - ["SIMPLICITY", "HYD1", 2014, 3.45], - ["SIMPLICITY", "HYD1", 2015, 2.34], - ["SIMPLICITY", "HYD1", 2016, 1.23], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 4.56], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + return df + + +@fixture +def new_capacity(): + df = pd.DataFrame( + data=[ + ["SIMPLICITY", "NGCC", 2016, 1.23], + ["SIMPLICITY", "HYD1", 2014, 2.34], + ["SIMPLICITY", "HYD1", 2015, 3.45], ], columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) @@ -49,15 +60,21 @@ def simple_default_values(): @fixture -def simple_input_data(region, year, technology, capital_cost): +def simple_input_data(region, year, technology, capital_cost, discount_rate): return { "REGION": region, "TECHNOLOGY": technology, "YEAR": year, "CapitalCost": capital_cost, + "DiscountRate": discount_rate, } +@fixture +def simple_available_results(new_capacity): + return {"NewCapacity": new_capacity} + + @fixture def simple_user_config(): return { @@ -65,9 +82,15 @@ def simple_user_config(): "indices": ["REGION", "TECHNOLOGY", "YEAR"], "type": "param", "dtype": "float", - "default": 0, + "default": -1, "short_name": "CAPEX", }, + "DiscountRate": { + "indices": ["REGION"], + "type": "param", + "dtype": "float", + "default": 0.25, + }, "REGION": { "dtype": "str", "type": "set", @@ -80,6 +103,12 @@ def simple_user_config(): "dtype": "int", "type": "set", }, + "NewCapacity": { + "indices": ["REGION", "TECHNOLOGY", "YEAR"], + "type": "result", + "dtype": "float", + "default": 20, + }, } @@ -115,191 +144,221 @@ def read( class TestExpandDefaults: - year = pd.DataFrame(data=[2014, 2015, 2016], columns=["VALUE"]) - region = pd.DataFrame(data=["SIMPLICITY"], columns=["VALUE"]) - technology = pd.DataFrame(data=["NGCC", "HYD1"], columns=["VALUE"]) - - def input_data_multi_index_no_defaults(region, technology, year): - capex_in = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, 2000], - ["SIMPLICITY", "HYD1", 2015, 1500], - ["SIMPLICITY", "HYD1", 2016, 1000], - ["SIMPLICITY", "NGCC", 2014, 1000], - ["SIMPLICITY", "NGCC", 2015, 900], - ["SIMPLICITY", "NGCC", 2016, 800], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out = capex_in.copy() - capex_out["VALUE"] = capex_out["VALUE"].astype(float) - - data = { - "CapitalCost": capex_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out - - def input_data_multi_index(region, technology, year): - capex_in = pd.DataFrame( - [ - ["SIMPLICITY", "NGCC", 2014, 1000], - ["SIMPLICITY", "NGCC", 2015, 900], - ["SIMPLICITY", "HYD1", 2015, 1500], - ["SIMPLICITY", "HYD1", 2016, 1000], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, -1], - ["SIMPLICITY", "HYD1", 2015, 1500], - ["SIMPLICITY", "HYD1", 2016, 1000], - ["SIMPLICITY", "NGCC", 2014, 1000], - ["SIMPLICITY", "NGCC", 2015, 900], - ["SIMPLICITY", "NGCC", 2016, -1], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out["VALUE"] = capex_out["VALUE"].astype(float) + # capital costs fixtures - data = { - "CapitalCost": capex_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out + input_data_multi_index_full = pd.DataFrame( + [ + ["SIMPLICITY", "HYD1", 2014, 2000.0], + ["SIMPLICITY", "HYD1", 2015, 1500.0], + ["SIMPLICITY", "HYD1", 2016, 1000.0], + ["SIMPLICITY", "NGCC", 2014, 1000.0], + ["SIMPLICITY", "NGCC", 2015, 900.0], + ["SIMPLICITY", "NGCC", 2016, 800.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - def input_data_multi_index_empty(region, technology, year): - capex_in = pd.DataFrame( - [], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, -1], - ["SIMPLICITY", "HYD1", 2015, -1], - ["SIMPLICITY", "HYD1", 2016, -1], - ["SIMPLICITY", "NGCC", 2014, -1], - ["SIMPLICITY", "NGCC", 2015, -1], - ["SIMPLICITY", "NGCC", 2016, -1], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out["VALUE"] = capex_out["VALUE"].astype(float) + output_data_multi_index_full = input_data_multi_index_full.copy() - data = { - "CapitalCost": capex_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out - - def input_data_single_index(region): - discount_rate_in = pd.DataFrame( - [["SIMPLICITY", 0.05]], columns=["REGION", "VALUE"] - ).set_index(["REGION"]) - discount_rate_out = discount_rate_in.copy() - discount_rate_out["VALUE"] = discount_rate_out["VALUE"].astype(float) - - data = { - "DiscountRate": discount_rate_in, - "REGION": region, - } - return data, "DiscountRate", discount_rate_out + input_data_multi_index_partial = pd.DataFrame( + [ + ["SIMPLICITY", "NGCC", 2014, 1000.0], + ["SIMPLICITY", "NGCC", 2015, 900.0], + ["SIMPLICITY", "HYD1", 2015, 1500.0], + ["SIMPLICITY", "HYD1", 2016, 1000.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - def input_data_single_index_empty(region): - discount_rate_in = pd.DataFrame([], columns=["REGION", "VALUE"]).set_index( - ["REGION"] - ) - discount_rate_out = pd.DataFrame( - [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] - ).set_index(["REGION"]) - discount_rate_out["VALUE"] = discount_rate_out["VALUE"].astype(float) - - data = { - "DiscountRate": discount_rate_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "DiscountRate", discount_rate_out - - @fixture - def result_data(region): - new_capacity_in = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2015, 100], - ["SIMPLICITY", "HYD1", 2016, 0.1], - ["SIMPLICITY", "NGCC", 2014, 0.5], - ["SIMPLICITY", "NGCC", 2015, 100], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - new_capacity_out = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, 20], - ["SIMPLICITY", "HYD1", 2015, 100], - ["SIMPLICITY", "HYD1", 2016, 0.1], - ["SIMPLICITY", "NGCC", 2014, 0.5], - ["SIMPLICITY", "NGCC", 2015, 100], - ["SIMPLICITY", "NGCC", 2016, 20], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + output_data_multi_index_partial = pd.DataFrame( + [ + ["SIMPLICITY", "HYD1", 2014, -1.0], + ["SIMPLICITY", "HYD1", 2015, 1500.0], + ["SIMPLICITY", "HYD1", 2016, 1000.0], + ["SIMPLICITY", "NGCC", 2014, 1000.0], + ["SIMPLICITY", "NGCC", 2015, 900.0], + ["SIMPLICITY", "NGCC", 2016, -1.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - data = { - "NewCapacity": new_capacity_in, - } - return data, "NewCapacity", new_capacity_out - - parameter_test_data = [ - input_data_multi_index_no_defaults(region, technology, year), - input_data_multi_index(region, technology, year), - input_data_multi_index_empty(region, technology, year), - input_data_single_index(region), - input_data_single_index_empty(region), + # discount rate fixtures + + input_data_multi_index_empty = pd.DataFrame( + [], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + output_data_multi_index_empty = pd.DataFrame( + [ + ["SIMPLICITY", "HYD1", 2014, -1.0], + ["SIMPLICITY", "HYD1", 2015, -1.0], + ["SIMPLICITY", "HYD1", 2016, -1.0], + ["SIMPLICITY", "NGCC", 2014, -1.0], + ["SIMPLICITY", "NGCC", 2015, -1.0], + ["SIMPLICITY", "NGCC", 2016, -1.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + input_data_single_index_full = pd.DataFrame( + [["SIMPLICITY", 0.05]], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + + output_data_single_index_full = input_data_single_index_full.copy() + + input_data_single_index_empty = pd.DataFrame( + [], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + + output_data_single_index_empty = pd.DataFrame( + [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + + # test expansion of dataframe + + test_data = [ + ("CapitalCost", input_data_multi_index_full, output_data_multi_index_full), + ( + "CapitalCost", + input_data_multi_index_partial, + output_data_multi_index_partial, + ), + ("CapitalCost", input_data_multi_index_empty, output_data_multi_index_empty), + ("DiscountRate", input_data_single_index_full, output_data_single_index_full), + ( + "DiscountRate", + input_data_single_index_empty, + output_data_single_index_empty, + ), ] - parameter_test_data_ids = [ - "multi_index_no_defaults", - "multi_index", + test_data_ids = [ + "multi_index_full", + "multi_index_partial", "multi_index_empty", - "single_index", + "single_index_full", "single_index_empty", ] @mark.parametrize( - "input_data,parameter,expected", - parameter_test_data, - ids=parameter_test_data_ids, + "name,input,expected", + test_data, + ids=test_data_ids, ) def test_expand_parameters_defaults( - self, user_config, simple_default_values, input_data, parameter, expected + self, + simple_user_config, + simple_default_values, + simple_input_data, + name, + input, + expected, ): - write_strategy = DummyWriteStrategy( - user_config=user_config, default_values=simple_default_values + input_data = simple_input_data.copy() + input_data[name] = input + + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual = read_strategy._expand_dataframe( + name, input_data, simple_default_values ) - write_strategy.input_data = input_data - actual = write_strategy._expand_defaults( - input_data, write_strategy.default_values + assert_frame_equal(actual, expected) + + def test_expand_results_key_error( + self, simple_user_config, simple_input_data, simple_default_values + ): + read_strategy = DummyReadStrategy( + user_config=simple_user_config, write_defaults=True + ) + + with raises(KeyError, match="SpecifiedAnnualDemand"): + read_strategy._expand_dataframe( + "SpecifiedAnnualDemand", simple_input_data, simple_default_values + ) + + # test get default dataframe + + test_data_defaults = [ + ("CapitalCost", output_data_multi_index_empty), + ("DiscountRate", output_data_single_index_empty), + ] + test_data_defaults_ids = [ + "multi_index", + "single_index", + ] + + @mark.parametrize( + "name,expected", + test_data_defaults, + ids=test_data_defaults_ids, + ) + def test_get_default_dataframe( + self, + simple_user_config, + simple_default_values, + simple_input_data, + name, + expected, + ): + + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual = read_strategy._get_default_dataframe( + name, simple_input_data, simple_default_values ) - assert_frame_equal(actual[parameter], expected) + assert_frame_equal(actual, expected) - def test_expand_result_defaults( - self, user_config, simple_default_values, simple_input_data, result_data + # test expand all input data + + def test_write_default_params( + self, simple_user_config, simple_input_data, simple_default_values ): - write_strategy = DummyWriteStrategy( - user_config=user_config, default_values=simple_default_values + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual_expanded = read_strategy.write_default_params( + simple_input_data, simple_default_values ) - write_strategy.input_data = simple_input_data - actual = write_strategy._expand_defaults( - result_data[0], write_strategy.default_values + actual = actual_expanded["CapitalCost"] + + expected = pd.DataFrame( + data=[ + ["SIMPLICITY", "HYD1", 2014, -1], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 4.56], + ["SIMPLICITY", "NGCC", 2014, 1.23], + ["SIMPLICITY", "NGCC", 2015, 2.34], + ["SIMPLICITY", "NGCC", 2016, -1], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + assert_frame_equal(actual, expected) + + def test_write_default_results( + self, + simple_user_config, + simple_input_data, + simple_available_results, + simple_default_values, + ): + + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual_expanded = read_strategy.write_default_results( + simple_available_results, simple_input_data, simple_default_values ) - assert_frame_equal(actual[result_data[1]], result_data[2]) + + actual = actual_expanded["NewCapacity"] + + expected = pd.DataFrame( + data=[ + ["SIMPLICITY", "HYD1", 2014, 2.34], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 20], + ["SIMPLICITY", "NGCC", 2014, 20], + ["SIMPLICITY", "NGCC", 2015, 20], + ["SIMPLICITY", "NGCC", 2016, 1.23], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + assert_frame_equal(actual, expected) class TestReadStrategy: @@ -315,8 +374,8 @@ class TestReadStrategy: ("set", "REGION", pd.DataFrame(columns=["VALUE"])), ) compare_read_to_expected_data = [ - [["CapitalCost", "REGION", "TECHNOLOGY", "YEAR"], False], - [["CAPEX", "REGION", "TECHNOLOGY", "YEAR"], True], + [["CapitalCost", "DiscountRate", "REGION", "TECHNOLOGY", "YEAR"], False], + [["CAPEX", "DiscountRate", "REGION", "TECHNOLOGY", "YEAR"], True], ] compare_read_to_expected_data_exception = [ ["CapitalCost", "REGION", "TECHNOLOGY"], diff --git a/tests/test_read_strategies.py b/tests/test_read_strategies.py index 574fcee..3c11997 100644 --- a/tests/test_read_strategies.py +++ b/tests/test_read_strategies.py @@ -19,6 +19,7 @@ identify_duplicate, rename_duplicate_column, ) +from otoole.utils import _read_file class TestReadCplex: @@ -80,7 +81,6 @@ def test_convert_to_dataframe(self, user_config): reader = ReadCplex(user_config) with StringIO(input_file) as file_buffer: actual = reader._convert_to_dataframe(file_buffer) - # print(actual) expected = pd.DataFrame( [ ["NewCapacity", "SIMPLICITY,ETHPLANT,2015", 0.030000000000000027], @@ -99,7 +99,6 @@ def test_solution_to_dataframe(self, user_config): reader = ReadCplex(user_config) with StringIO(input_file) as file_buffer: actual = reader.read(file_buffer) - # print(actual) expected = ( pd.DataFrame( [ @@ -146,6 +145,32 @@ def test_solution_to_dataframe(self, user_config): ) pd.testing.assert_frame_equal(actual[0]["RateOfActivity"], expected) + def test_solution_to_dataframe_with_defaults(self, user_config): + input_file = self.cplex_data + + regions = pd.DataFrame(data=["SIMPLICITY"], columns=["VALUE"]) + technologies = pd.DataFrame(data=["ETHPLANT"], columns=["VALUE"]) + years = pd.DataFrame(data=[2014, 2015, 2016], columns=["VALUE"]) + input_data = {"REGION": regions, "TECHNOLOGY": technologies, "YEAR": years} + + reader = ReadCplex(user_config, write_defaults=True) + with StringIO(input_file) as file_buffer: + actual = reader.read(file_buffer, input_data=input_data) + expected = ( + pd.DataFrame( + [ + ["SIMPLICITY", "ETHPLANT", 2014, 0], + ["SIMPLICITY", "ETHPLANT", 2015, 0.030000000000000027], + ["SIMPLICITY", "ETHPLANT", 2016, 0.030999999999999917], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ) + .astype({"REGION": str, "TECHNOLOGY": str, "YEAR": "int64", "VALUE": float}) + .set_index(["REGION", "TECHNOLOGY", "YEAR"]) + ) + + pd.testing.assert_frame_equal(actual[0]["NewCapacity"], expected) + class TestReadGurobi: @@ -169,7 +194,6 @@ def test_convert_to_dataframe(self, user_config): reader = ReadGurobi(user_config) with StringIO(input_file) as file_buffer: actual = reader._convert_to_dataframe(file_buffer) - # print(actual) expected = pd.DataFrame( [ ["TotalDiscountedCost", "SIMPLICITY,2014", 1.9360385416218188e02], @@ -191,7 +215,6 @@ def test_solution_to_dataframe(self, user_config): reader = ReadGurobi(user_config) with StringIO(input_file) as file_buffer: actual = reader.read(file_buffer) - # print(actual) expected = ( pd.DataFrame( [ @@ -970,6 +993,21 @@ def test_catch_error_no_parameter(self, caplog, user_config): in caplog.text ) + def test_read_datafile_with_defaults(self, user_config): + datafile = os.path.join("tests", "fixtures", "simplicity.txt") + reader = ReadDatafile(user_config=user_config, write_defaults=True) + actual, _ = reader.read(datafile) + data = [ + ["SIMPLICITY", "DAM", 2014, 0.0], + ["SIMPLICITY", "DAM", 2015, 0.0], + ["SIMPLICITY", "DAM", 2016, 0.0], + ] + expected = pd.DataFrame( + data, columns=["REGION", "STORAGE", "YEAR", "VALUE"] + ).set_index(["REGION", "STORAGE", "YEAR"]) + + pd.testing.assert_frame_equal(actual["CapitalCostStorage"].iloc[:3], expected) + class TestReadExcel: def test_read_excel_yearsplit(self, user_config): @@ -1023,6 +1061,21 @@ def test_read_excel_yearsplit(self, user_config): assert (actual_data == expected).all() + def test_read_excel_with_defaults(self, user_config): + spreadsheet = os.path.join("tests", "fixtures", "combined_inputs.xlsx") + reader = ReadExcel(user_config=user_config, write_defaults=True) + actual, _ = reader.read(spreadsheet) + data = [ + ["09_ROK", "CO2", 2017, -1.0], + ["09_ROK", "CO2", 2018, -1.0], + ["09_ROK", "CO2", 2019, -1.0], + ] + expected = pd.DataFrame( + data, columns=["REGION", "EMISSION", "YEAR", "VALUE"] + ).set_index(["REGION", "EMISSION", "YEAR"]) + + pd.testing.assert_frame_equal(actual["AnnualEmissionLimit"].iloc[:3], expected) + def test_narrow_parameters(self, user_config): data = [ ["IW0016", 0.238356164, 0.238356164, 0.238356164], @@ -1139,6 +1192,26 @@ def test_read_default_values_csv(self, user_config): expected = None assert actual == expected + def test_read_csv_with_defaults(self): + user_config_path = os.path.join( + "tests", "fixtures", "super_simple", "super_simple.yaml" + ) + with open(user_config_path, "r") as config_file: + user_config = _read_file(config_file, ".yaml") + + filepath = os.path.join("tests", "fixtures", "super_simple", "csv") + reader = ReadCsv(user_config=user_config, write_defaults=True) + actual, _ = reader.read(filepath) + data = [ + ["BB", "gas_import", 2016, 0.0], + ["BB", "gas_plant", 2016, 1.03456], + ] + expected = pd.DataFrame( + data, columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"] + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + pd.testing.assert_frame_equal(actual["CapitalCost"], expected) + class TestReadTabular: """Methods shared for csv and excel"""