From 324dfcd7df5a1a33d6aef1e769e3c524b7701668 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 31 Jan 2024 17:00:37 -0800 Subject: [PATCH 01/17] fix expand defaults --- src/otoole/input.py | 28 ++++++++++------ tests/test_input.py | 82 +++++++++++++++++++++++++-------------------- 2 files changed, 63 insertions(+), 47 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 210647a..28d747f 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -256,14 +256,17 @@ def write( handle = self._header() logger.debug(default_values) - self.input_data = inputs + self.inputs = inputs # parameter/set data OR result data + input_data = kwargs.get("input_data", None) + if self.write_defaults: try: - self.input_data = self._expand_defaults(inputs, default_values) + self.inputs = self._expand_defaults(inputs, default_values, input_data) except KeyError as ex: - logger.debug(ex) + logger.debug(f"Can not write default values due to missing {ex} data") + print(f"Can not write default values due to missing {ex} data") - for name, df in sorted(self.input_data.items()): + for name, df in sorted(self.inputs.items()): logger.debug("%s has %s columns: %s", name, len(df.index.names), df.columns) try: @@ -277,7 +280,7 @@ def write( if entity_type != "set": default_value = default_values[name] self._write_parameter( - df, name, handle, default=default_value, input_data=self.input_data + df, name, handle, default=default_value, input_data=self.inputs ) else: self._write_set(df, name, handle) @@ -288,25 +291,30 @@ def write( handle.close() def _expand_defaults( - self, data_to_expand: Dict[str, pd.DataFrame], default_values: Dict[str, float] + self, inputs: Dict[str, pd.DataFrame], default_values: Dict[str, float], input_data: Dict[str, pd.DataFrame] = None ) -> Dict[str, pd.DataFrame]: """Populates default value entry rows in dataframes Parameters ---------- - data_to_expand : Dict[str, pd.DataFrame], + inputs : Dict[str, pd.DataFrame], + param/set data or result data default_values : Dict[str, float] + defaults of param/result data + input_data: Dict[str, pd.DataFrame] + param/set data needed for expanding result data Returns ------- Dict[str, pd.DataFrame] Input data with expanded default values replacing missing entries - """ sets = [x for x in self.user_config if self.user_config[x]["type"] == "set"] + input_data = input_data if input_data else inputs.copy() + output = {} - for name, data in data_to_expand.items(): + for name, data in inputs.items(): logger.info(f"Writing defaults for {name}") # skip sets @@ -324,7 +332,7 @@ def _expand_defaults( # save set information for each parameter index_data = {} for index in data.index.names: - index_data[index] = self.input_data[index]["VALUE"].to_list() + index_data[index] = input_data[index]["VALUE"].to_list() # set index if len(index_data) > 1: diff --git a/tests/test_input.py b/tests/test_input.py index e9a7b14..6637fc0 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -38,6 +38,17 @@ def capital_cost(): ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) return df +@fixture +def new_capacity(): + df = pd.DataFrame( + data=[ + ["SIMPLICITY", "NGCC", 2016, 1.23], + ["SIMPLICITY", "HYD1", 2014, 2.34], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + return df @fixture() def simple_default_values(): @@ -58,6 +69,12 @@ def simple_input_data(region, year, technology, capital_cost): } +@fixture +def simple_result_data(new_capacity): + return { + "NewCapacity": new_capacity + } + @fixture def simple_user_config(): return { @@ -80,13 +97,19 @@ def simple_user_config(): "dtype": "int", "type": "set", }, + "NewCapacity": { + "indices": ["REGION", "TECHNOLOGY", "YEAR"], + "type": "result", + "dtype": "float", + "default": 0, + }, } # To instantiate abstract class WriteStrategy class DummyWriteStrategy(WriteStrategy): def _header(self) -> Union[TextIO, Any]: - raise NotImplementedError() + pass def _write_parameter( self, @@ -96,13 +119,13 @@ def _write_parameter( default: float, **kwargs ) -> pd.DataFrame: - raise NotImplementedError() + pass def _write_set(self, df: pd.DataFrame, set_name, handle: TextIO) -> pd.DataFrame: - raise NotImplementedError() + pass def _footer(self, handle: TextIO): - raise NotImplementedError() + pass # To instantiate abstract class ReadStrategy @@ -229,34 +252,6 @@ def input_data_single_index_empty(region): } return data, "DiscountRate", discount_rate_out - @fixture - def result_data(region): - new_capacity_in = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2015, 100], - ["SIMPLICITY", "HYD1", 2016, 0.1], - ["SIMPLICITY", "NGCC", 2014, 0.5], - ["SIMPLICITY", "NGCC", 2015, 100], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - new_capacity_out = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, 20], - ["SIMPLICITY", "HYD1", 2015, 100], - ["SIMPLICITY", "HYD1", 2016, 0.1], - ["SIMPLICITY", "NGCC", 2014, 0.5], - ["SIMPLICITY", "NGCC", 2015, 100], - ["SIMPLICITY", "NGCC", 2016, 20], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - - data = { - "NewCapacity": new_capacity_in, - } - return data, "NewCapacity", new_capacity_out - parameter_test_data = [ input_data_multi_index_no_defaults(region, technology, year), input_data_multi_index(region, technology, year), @@ -290,16 +285,28 @@ def test_expand_parameters_defaults( assert_frame_equal(actual[parameter], expected) def test_expand_result_defaults( - self, user_config, simple_default_values, simple_input_data, result_data + self, simple_user_config, simple_default_values, simple_input_data, simple_result_data ): write_strategy = DummyWriteStrategy( - user_config=user_config, default_values=simple_default_values + user_config=simple_user_config, default_values=simple_default_values ) - write_strategy.input_data = simple_input_data actual = write_strategy._expand_defaults( - result_data[0], write_strategy.default_values + simple_result_data, write_strategy.default_values, simple_input_data ) - assert_frame_equal(actual[result_data[1]], result_data[2]) + + expected = pd.DataFrame( + data=[ + ["SIMPLICITY", "HYD1", 2014, 2.34], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 20], + ["SIMPLICITY", "NGCC", 2014, 20], + ["SIMPLICITY", "NGCC", 2015, 20], + ["SIMPLICITY", "NGCC", 2016, 1.23], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + assert_frame_equal(actual["NewCapacity"], expected) class TestReadStrategy: @@ -524,3 +531,4 @@ def test_compare_read_to_expected_exception(self, simple_user_config, expected): reader = DummyReadStrategy(simple_user_config) with raises(OtooleNameMismatchError): reader._compare_read_to_expected(names=expected) + \ No newline at end of file From 2007bfe933efcb3dc588532e148326504ed75a56 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 31 Jan 2024 17:25:07 -0800 Subject: [PATCH 02/17] expand defaults keyerror test --- src/otoole/input.py | 16 +++++++++------- tests/test_input.py | 33 ++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 28d747f..0b26a3f 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -256,15 +256,14 @@ def write( handle = self._header() logger.debug(default_values) - self.inputs = inputs # parameter/set data OR result data + self.inputs = inputs # parameter/set data OR result data input_data = kwargs.get("input_data", None) - + if self.write_defaults: try: self.inputs = self._expand_defaults(inputs, default_values, input_data) except KeyError as ex: logger.debug(f"Can not write default values due to missing {ex} data") - print(f"Can not write default values due to missing {ex} data") for name, df in sorted(self.inputs.items()): logger.debug("%s has %s columns: %s", name, len(df.index.names), df.columns) @@ -291,13 +290,16 @@ def write( handle.close() def _expand_defaults( - self, inputs: Dict[str, pd.DataFrame], default_values: Dict[str, float], input_data: Dict[str, pd.DataFrame] = None + self, + inputs: Dict[str, pd.DataFrame], + default_values: Dict[str, float], + input_data: Dict[str, pd.DataFrame] = None, ) -> Dict[str, pd.DataFrame]: """Populates default value entry rows in dataframes Parameters ---------- - inputs : Dict[str, pd.DataFrame], + inputs : Dict[str, pd.DataFrame], param/set data or result data default_values : Dict[str, float] defaults of param/result data @@ -311,8 +313,8 @@ def _expand_defaults( """ sets = [x for x in self.user_config if self.user_config[x]["type"] == "set"] - input_data = input_data if input_data else inputs.copy() - + input_data = input_data if input_data else inputs.copy() + output = {} for name, data in inputs.items(): logger.info(f"Writing defaults for {name}") diff --git a/tests/test_input.py b/tests/test_input.py index 6637fc0..bfee24c 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -38,6 +38,7 @@ def capital_cost(): ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) return df + @fixture def new_capacity(): df = pd.DataFrame( @@ -50,6 +51,7 @@ def new_capacity(): ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) return df + @fixture() def simple_default_values(): default_values = {} @@ -71,9 +73,8 @@ def simple_input_data(region, year, technology, capital_cost): @fixture def simple_result_data(new_capacity): - return { - "NewCapacity": new_capacity - } + return {"NewCapacity": new_capacity} + @fixture def simple_user_config(): @@ -285,7 +286,11 @@ def test_expand_parameters_defaults( assert_frame_equal(actual[parameter], expected) def test_expand_result_defaults( - self, simple_user_config, simple_default_values, simple_input_data, simple_result_data + self, + simple_user_config, + simple_default_values, + simple_input_data, + simple_result_data, ): write_strategy = DummyWriteStrategy( user_config=simple_user_config, default_values=simple_default_values @@ -293,7 +298,7 @@ def test_expand_result_defaults( actual = write_strategy._expand_defaults( simple_result_data, write_strategy.default_values, simple_input_data ) - + expected = pd.DataFrame( data=[ ["SIMPLICITY", "HYD1", 2014, 2.34], @@ -305,9 +310,24 @@ def test_expand_result_defaults( ], columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - + assert_frame_equal(actual["NewCapacity"], expected) + def test_expand_results_key_error( + self, simple_user_config, simple_result_data, simple_default_values + ): + """When input data is just the result data""" + write_strategy = DummyWriteStrategy( + user_config=simple_user_config, + default_values=simple_default_values, + write_defaults=True, + ) + + with raises(KeyError, match="REGION"): + write_strategy._expand_defaults( + simple_result_data, write_strategy.default_values + ) + class TestReadStrategy: @@ -531,4 +551,3 @@ def test_compare_read_to_expected_exception(self, simple_user_config, expected): reader = DummyReadStrategy(simple_user_config) with raises(OtooleNameMismatchError): reader._compare_read_to_expected(names=expected) - \ No newline at end of file From 42589554a13a52183a40127c260ad8b93cc12b95 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Wed, 31 Jan 2024 17:38:19 -0800 Subject: [PATCH 03/17] fix typo --- tests/test_input.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_input.py b/tests/test_input.py index bfee24c..5a958ee 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -110,7 +110,7 @@ def simple_user_config(): # To instantiate abstract class WriteStrategy class DummyWriteStrategy(WriteStrategy): def _header(self) -> Union[TextIO, Any]: - pass + raise NotImplementedError() def _write_parameter( self, @@ -120,13 +120,13 @@ def _write_parameter( default: float, **kwargs ) -> pd.DataFrame: - pass + raise NotImplementedError() def _write_set(self, df: pd.DataFrame, set_name, handle: TextIO) -> pd.DataFrame: - pass + raise NotImplementedError() def _footer(self, handle: TextIO): - pass + raise NotImplementedError() # To instantiate abstract class ReadStrategy From 784935365aa9c78960075b09406d6579b8a54e6b Mon Sep 17 00:00:00 2001 From: Will Usher Date: Wed, 7 Feb 2024 10:59:26 +0100 Subject: [PATCH 04/17] Refactor expand defaults to reduce memory use --- src/otoole/input.py | 89 +++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 0b26a3f..299218a 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -259,12 +259,6 @@ def write( self.inputs = inputs # parameter/set data OR result data input_data = kwargs.get("input_data", None) - if self.write_defaults: - try: - self.inputs = self._expand_defaults(inputs, default_values, input_data) - except KeyError as ex: - logger.debug(f"Can not write default values due to missing {ex} data") - for name, df in sorted(self.inputs.items()): logger.debug("%s has %s columns: %s", name, len(df.index.names), df.columns) @@ -278,8 +272,26 @@ def write( if entity_type != "set": default_value = default_values[name] + # This should be moved inside the loop and performed once for each parameter + if self.write_defaults: + try: + logger.info(f"Expanding {name} with default values") + df_expand = self._expand_dataframe( + df, default_value, input_data + ) + except KeyError as ex: + logger.info( + f"Unable to write default values due to missing {ex} data" + ) + else: + df_expand = df + self._write_parameter( - df, name, handle, default=default_value, input_data=self.inputs + df_expand, + name, + handle, + default=default_value, + input_data=self.inputs, ) else: self._write_set(df, name, handle) @@ -289,6 +301,38 @@ def write( if isinstance(handle, TextIO): handle.close() + def _expand_dataframe( + self, data: pd.DataFrame, default: float, input_data: dict[str, pd.DataFrame] + ) -> pd.DataFrame: + """Expand an individual dataframe with default values""" + # save set information for each parameter + index_data = {} + for index in data.index.names: + index_data[index] = input_data[index]["VALUE"].to_list() + + # set index + if len(index_data) > 1: + new_index = pd.MultiIndex.from_product( + list(index_data.values()), names=list(index_data.keys()) + ) + else: + new_index = pd.Index( + list(index_data.values())[0], name=list(index_data.keys())[0] + ) + df_default = pd.DataFrame(index=new_index, dtype="float16") + + # save default result value + df_default["VALUE"] = default + + # combine result and default value dataframe + if not data.empty: + df = pd.concat([data, df_default]) + df = df[~df.index.duplicated(keep="first")] + else: + df = df_default + df = df.sort_index() + return df + def _expand_defaults( self, inputs: Dict[str, pd.DataFrame], @@ -317,7 +361,6 @@ def _expand_defaults( output = {} for name, data in inputs.items(): - logger.info(f"Writing defaults for {name}") # skip sets if name in sets: @@ -331,33 +374,9 @@ def _expand_defaults( output[name] = data continue - # save set information for each parameter - index_data = {} - for index in data.index.names: - index_data[index] = input_data[index]["VALUE"].to_list() - - # set index - if len(index_data) > 1: - new_index = pd.MultiIndex.from_product( - list(index_data.values()), names=list(index_data.keys()) - ) - else: - new_index = pd.Index( - list(index_data.values())[0], name=list(index_data.keys())[0] - ) - df_default = pd.DataFrame(index=new_index) - - # save default result value - df_default["VALUE"] = default_values[name] - - # combine result and default value dataframe - if not data.empty: - df = pd.concat([data, df_default]) - df = df[~df.index.duplicated(keep="first")] - else: - df = df_default - df = df.sort_index() - output[name] = df + output[name] = self._expand_dataframe( + data, default_values[name], input_data + ) return output From 2322bb6964ccddb633fcdf4f760ea5177ec5a2db Mon Sep 17 00:00:00 2001 From: Will Usher Date: Wed, 7 Feb 2024 12:51:56 +0100 Subject: [PATCH 05/17] Add hack to only expand defaults for Annual results --- src/otoole/input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 299218a..a9efc11 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -273,7 +273,7 @@ def write( if entity_type != "set": default_value = default_values[name] # This should be moved inside the loop and performed once for each parameter - if self.write_defaults: + if self.write_defaults and "Annual" in name: try: logger.info(f"Expanding {name} with default values") df_expand = self._expand_dataframe( From 551b8bfd380946d8f5fed899885391c0ff780533 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Wed, 7 Feb 2024 16:04:39 +0100 Subject: [PATCH 06/17] Allow results to be converted to excel format --- src/otoole/cli.py | 2 +- src/otoole/convert.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/otoole/cli.py b/src/otoole/cli.py index c0305f7..e1d971a 100644 --- a/src/otoole/cli.py +++ b/src/otoole/cli.py @@ -172,7 +172,7 @@ def get_parser(): result_parser.add_argument( "to_format", help="Result data format to convert to", - choices=sorted(["csv"]), + choices=sorted(["csv", "excel"]), ) result_parser.add_argument( "from_path", help="Path to file or folder to convert from" diff --git a/src/otoole/convert.py b/src/otoole/convert.py index 3cecd34..b1f1886 100644 --- a/src/otoole/convert.py +++ b/src/otoole/convert.py @@ -90,7 +90,7 @@ def convert_results( from_format : str Available options are 'cbc', 'cplex' and 'gurobi' to_format : str - Available options are 'csv' + Available options are 'csv', 'excel' from_path : str Path to cbc, cplex or gurobi solution file to_path : str @@ -125,7 +125,11 @@ def convert_results( write_defaults = True if write_defaults else False if to_format == "csv": - write_strategy = WriteCsv( + write_strategy: WriteStrategy = WriteCsv( + user_config=user_config, write_defaults=write_defaults + ) + elif to_format == "excel": + write_strategy = WriteExcel( user_config=user_config, write_defaults=write_defaults ) else: From d6aaf4e6f23a489b233f167ce8c07eab3e252e0c Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Mon, 12 Feb 2024 22:10:33 -0800 Subject: [PATCH 07/17] refactor expand defaults --- src/otoole/input.py | 124 ++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 78 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index a9efc11..ff828a7 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -257,7 +257,7 @@ def write( logger.debug(default_values) self.inputs = inputs # parameter/set data OR result data - input_data = kwargs.get("input_data", None) + self.input_params = kwargs.get("input_data", None) # parameter/set data for name, df in sorted(self.inputs.items()): logger.debug("%s has %s columns: %s", name, len(df.index.names), df.columns) @@ -271,26 +271,16 @@ def write( raise KeyError("Cannot find %s in input or results config", name) if entity_type != "set": - default_value = default_values[name] - # This should be moved inside the loop and performed once for each parameter - if self.write_defaults and "Annual" in name: - try: - logger.info(f"Expanding {name} with default values") - df_expand = self._expand_dataframe( - df, default_value, input_data - ) - except KeyError as ex: - logger.info( - f"Unable to write default values due to missing {ex} data" - ) + if self.write_defaults: + df_out = self._expand_dataframe(name, df) else: - df_expand = df + df_out = df self._write_parameter( - df_expand, + df_out, name, handle, - default=default_value, + default=default_values[name], input_data=self.inputs, ) else: @@ -301,84 +291,62 @@ def write( if isinstance(handle, TextIO): handle.close() - def _expand_dataframe( - self, data: pd.DataFrame, default: float, input_data: dict[str, pd.DataFrame] - ) -> pd.DataFrame: - """Expand an individual dataframe with default values""" - # save set information for each parameter - index_data = {} - for index in data.index.names: - index_data[index] = input_data[index]["VALUE"].to_list() - - # set index - if len(index_data) > 1: - new_index = pd.MultiIndex.from_product( - list(index_data.values()), names=list(index_data.keys()) - ) - else: - new_index = pd.Index( - list(index_data.values())[0], name=list(index_data.keys())[0] - ) - df_default = pd.DataFrame(index=new_index, dtype="float16") - - # save default result value - df_default["VALUE"] = default - - # combine result and default value dataframe - if not data.empty: - df = pd.concat([data, df_default]) - df = df[~df.index.duplicated(keep="first")] - else: - df = df_default - df = df.sort_index() - return df - - def _expand_defaults( - self, - inputs: Dict[str, pd.DataFrame], - default_values: Dict[str, float], - input_data: Dict[str, pd.DataFrame] = None, - ) -> Dict[str, pd.DataFrame]: + def _expand_dataframe(self, name: str, df: pd.DataFrame) -> Dict[str, pd.DataFrame]: """Populates default value entry rows in dataframes Parameters ---------- - inputs : Dict[str, pd.DataFrame], - param/set data or result data - default_values : Dict[str, float] - defaults of param/result data - input_data: Dict[str, pd.DataFrame] - param/set data needed for expanding result data + name: str + Name of parameter/result to expand + df: pd.DataFrame, + input parameter/result data to be expanded Returns ------- - Dict[str, pd.DataFrame] + pd.DataFrame, Input data with expanded default values replacing missing entries """ - sets = [x for x in self.user_config if self.user_config[x]["type"] == "set"] - input_data = input_data if input_data else inputs.copy() + # TODO: Issue with how otoole handles trade route right now. + # The double definition of REGION throws an error. + if name == "TradeRoute": + return df + + default_df = self._get_default_dataframe(name) - output = {} - for name, data in inputs.items(): + df = pd.concat([df, default_df]) + df = df[~df.index.duplicated(keep="first")] + return df.sort_index() - # skip sets - if name in sets: - output[name] = data - continue + # default_df.update(df) + # return default_df.sort_index() - # TODO - # Issue with how otoole handles trade route right now. - # The double definition of REGION throws an error. - if name == "TradeRoute": - output[name] = data - continue + def _get_default_dataframe(self, name: str) -> pd.DataFrame: + """Creates default dataframe""" + + index_data = {} + indices = self.user_config[name]["indices"] + try: # result data + for index in indices: + index_data[index] = self.input_params[index]["VALUE"].to_list() + except (TypeError, KeyError): # parameter data + for index in indices: + index_data[index] = self.inputs[index]["VALUE"].to_list() - output[name] = self._expand_dataframe( - data, default_values[name], input_data + if len(index_data) > 1: + new_index = pd.MultiIndex.from_product( + list(index_data.values()), names=list(index_data.keys()) + ) + else: + new_index = pd.Index( + list(index_data.values())[0], name=list(index_data.keys())[0] ) - return output + df = pd.DataFrame(index=new_index) + df["VALUE"] = self.default_values[name] + df["VALUE"] = df.VALUE.astype(self.user_config[name]["dtype"]) + + return df class ReadStrategy(Strategy): From 29c8c743771097057c007440817bb86b0d47b394 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Thu, 28 Mar 2024 16:32:54 -0700 Subject: [PATCH 08/17] write_defaults moved to ReadStrategy --- src/otoole/cli.py | 12 +- src/otoole/convert.py | 119 +++++++++++--------- src/otoole/input.py | 195 ++++++++++++++++++++++----------- src/otoole/read_strategies.py | 46 ++++++-- src/otoole/results/results.py | 9 +- src/otoole/write_strategies.py | 3 +- 6 files changed, 246 insertions(+), 138 deletions(-) diff --git a/src/otoole/cli.py b/src/otoole/cli.py index e1d971a..c34abd7 100644 --- a/src/otoole/cli.py +++ b/src/otoole/cli.py @@ -38,6 +38,7 @@ --version, -V The version of otoole """ + import argparse import logging import os @@ -125,7 +126,6 @@ def setup(args): data_type = args.data_type data_path = args.data_path - write_defaults = args.write_defaults overwrite = args.overwrite if os.path.exists(data_path) and not overwrite: @@ -139,9 +139,7 @@ def setup(args): elif data_type == "csv": config = get_config_setup_data() input_data, default_values = get_csv_setup_data(config) - WriteCsv(user_config=config).write( - input_data, data_path, default_values, write_defaults=write_defaults - ) + WriteCsv(user_config=config).write(input_data, data_path, default_values) def get_parser(): @@ -271,12 +269,6 @@ def get_parser(): "data_type", help="Type of file to setup", choices=sorted(["config", "csv"]) ) setup_parser.add_argument("data_path", help="Path to file or folder to save to") - setup_parser.add_argument( - "--write_defaults", - help="Writes default values", - default=False, - action="store_true", - ) setup_parser.add_argument( "--overwrite", help="Overwrites existing data", diff --git a/src/otoole/convert.py b/src/otoole/convert.py index b1f1886..ff2e8bf 100644 --- a/src/otoole/convert.py +++ b/src/otoole/convert.py @@ -7,6 +7,7 @@ >>> convert('config.yaml', 'excel', 'datafile', 'input.xlsx', 'output.dat') """ + import logging import os from typing import Dict, Optional, Tuple, Union @@ -45,6 +46,8 @@ def read_results( Format of input data. Available options are 'datafile', 'csv' and 'excel' input_path: str Path to input data + write_defaults: bool, default: False + Expand default values to pad dataframes glpk_model : str Path to ``*.glp`` model file @@ -99,8 +102,8 @@ def convert_results( Format of input data. Available options are 'datafile', 'csv' and 'excel' input_path: str Path to input data - write_defaults : bool - Write default values to CSVs + write_defaults: bool, default: False + Expand default values to pad dataframes glpk_model : str Path to ``*.glp`` model file @@ -118,20 +121,16 @@ def convert_results( # set read strategy - read_strategy = _get_read_result_strategy(user_config, from_format, glpk_model) + read_strategy = _get_read_result_strategy( + user_config, from_format, glpk_model, write_defaults + ) # set write strategy - write_defaults = True if write_defaults else False - if to_format == "csv": - write_strategy: WriteStrategy = WriteCsv( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy: WriteStrategy = WriteCsv(user_config=user_config) elif to_format == "excel": - write_strategy = WriteExcel( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteExcel(user_config=user_config) else: raise NotImplementedError(msg) @@ -148,7 +147,7 @@ def convert_results( def _get_read_result_strategy( - user_config, from_format, glpk_model=None + user_config, from_format, glpk_model=None, write_defaults=False ) -> Union[ReadResults, None]: """Get ``ReadResults`` for gurobi, cbc, cplex, and glpk formats @@ -158,6 +157,8 @@ def _get_read_result_strategy( User configuration describing parameters and sets from_format : str Available options are 'cbc', 'gurobi', 'cplex', and 'glpk' + write_defaults: bool, default: False + Write default values to output format glpk_model : str Path to ``*.glp`` model file @@ -169,15 +170,25 @@ def _get_read_result_strategy( """ if from_format == "cbc": - read_strategy: ReadResults = ReadCbc(user_config) + read_strategy: ReadResults = ReadCbc( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "gurobi": - read_strategy = ReadGurobi(user_config=user_config) + read_strategy = ReadGurobi( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "cplex": - read_strategy = ReadCplex(user_config=user_config) + read_strategy = ReadCplex( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "glpk": if not glpk_model: raise OtooleError(resource="Read GLPK", message="Provide glpk model file") - read_strategy = ReadGlpk(user_config=user_config, glpk_model=glpk_model) + read_strategy = ReadGlpk( + user_config=user_config, + glpk_model=glpk_model, + write_defaults=write_defaults, + ) else: return None @@ -207,7 +218,9 @@ def _get_user_config(config) -> dict: return user_config -def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadStrategy: +def _get_read_strategy( + user_config, from_format, keep_whitespace=False, write_defaults=False +) -> ReadStrategy: """Get ``ReadStrategy`` for csv/datafile/excel format Arguments @@ -218,6 +231,8 @@ def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadS Available options are 'datafile', 'datapackage', 'csv' and 'excel' keep_whitespace: bool, default: False Keep whitespace in CSVs + write_defaults: bool, default: False + Expand default values to pad dataframes Returns ------- @@ -228,22 +243,30 @@ def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadS keep_whitespace = True if keep_whitespace else False if from_format == "datafile": - read_strategy: ReadStrategy = ReadDatafile(user_config=user_config) + read_strategy: ReadStrategy = ReadDatafile( + user_config=user_config, write_defaults=write_defaults + ) elif from_format == "datapackage": logger.warning( "Reading from datapackage is deprecated, trying to read from CSVs" ) logger.info("Successfully read folder of CSVs") read_strategy = ReadCsv( - user_config=user_config, keep_whitespace=keep_whitespace + user_config=user_config, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) # typing: ReadStrategy elif from_format == "csv": read_strategy = ReadCsv( - user_config=user_config, keep_whitespace=keep_whitespace + user_config=user_config, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) # typing: ReadStrategy elif from_format == "excel": read_strategy = ReadExcel( - user_config=user_config, keep_whitespace=keep_whitespace + user_config=user_config, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) # typing: ReadStrategy else: msg = f"Conversion from {from_format} is not supported" @@ -252,7 +275,7 @@ def _get_read_strategy(user_config, from_format, keep_whitespace=False) -> ReadS return read_strategy -def _get_write_strategy(user_config, to_format, write_defaults=False) -> WriteStrategy: +def _get_write_strategy(user_config, to_format) -> WriteStrategy: """Get ``WriteStrategy`` for csv/datafile/excel format Arguments @@ -261,8 +284,6 @@ def _get_write_strategy(user_config, to_format, write_defaults=False) -> WriteSt User configuration describing parameters and sets to_format : str Available options are 'datafile', 'datapackage', 'csv' and 'excel' - write_defaults: bool, default: False - Write default values to output format Returns ------- @@ -270,25 +291,15 @@ def _get_write_strategy(user_config, to_format, write_defaults=False) -> WriteSt A ReadStrategy object. Returns None if to_format is not recognised """ - # set write strategy - write_defaults = True if write_defaults else False if to_format == "datapackage": - write_strategy: WriteStrategy = WriteCsv( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy: WriteStrategy = WriteCsv(user_config=user_config) elif to_format == "excel": - write_strategy = WriteExcel( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteExcel(user_config=user_config) elif to_format == "datafile": - write_strategy = WriteDatafile( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteDatafile(user_config=user_config) elif to_format == "csv": - write_strategy = WriteCsv( - user_config=user_config, write_defaults=write_defaults - ) + write_strategy = WriteCsv(user_config=user_config) else: msg = f"Conversion to {to_format} is not supported" raise NotImplementedError(msg) @@ -318,7 +329,7 @@ def convert( from_path : str Path to destination file (if datafile or excel) or folder (csv or datapackage) write_defaults: bool, default: False - Write default values to CSVs + Expand default values to pad dataframes keep_whitespace: bool, default: False Keep whitespace in CSVs @@ -330,12 +341,13 @@ def convert( user_config = _get_user_config(config) read_strategy = _get_read_strategy( - user_config, from_format, keep_whitespace=keep_whitespace + user_config, + from_format, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) - write_strategy = _get_write_strategy( - user_config, to_format, write_defaults=write_defaults - ) + write_strategy = _get_write_strategy(user_config, to_format) if from_format == "datapackage": logger.warning( @@ -351,7 +363,11 @@ def convert( def read( - config: str, from_format: str, from_path: str, keep_whitespace: bool = False + config: str, + from_format: str, + from_path: str, + keep_whitespace: bool = False, + write_defaults: bool = False, ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, float]]: """Read OSeMOSYS data from datafile, csv or Excel formats @@ -365,6 +381,8 @@ def read( Path to source file (if datafile or excel) or folder (csv) keep_whitespace: bool, default: False Keep whitespace in source files + write_defaults: bool, default: False + Expand default values to pad dataframes Returns ------- @@ -373,7 +391,10 @@ def read( """ user_config = _get_user_config(config) read_strategy = _get_read_strategy( - user_config, from_format, keep_whitespace=keep_whitespace + user_config, + from_format, + keep_whitespace=keep_whitespace, + write_defaults=write_defaults, ) if from_format == "datapackage": @@ -407,14 +428,10 @@ def write( """ user_config = _get_user_config(config) if default_values is None: - write_strategy = _get_write_strategy( - user_config, to_format, write_defaults=False - ) + write_strategy = _get_write_strategy(user_config, to_format) write_strategy.write(inputs, to_path, {}) else: - write_strategy = _get_write_strategy( - user_config, to_format, write_defaults=True - ) + write_strategy = _get_write_strategy(user_config, to_format) write_strategy.write(inputs, to_path, default_values) return True diff --git a/src/otoole/input.py b/src/otoole/input.py index ff828a7..28ced00 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -186,7 +186,6 @@ class WriteStrategy(Strategy): user_config: dict, default=None filepath: str, default=None default_values: dict, default=None - write_defaults: bool, default=False input_data: dict, default=None """ @@ -196,7 +195,6 @@ def __init__( user_config: Dict, filepath: Optional[str] = None, default_values: Optional[Dict] = None, - write_defaults: bool = False, input_data: Optional[Dict[str, pd.DataFrame]] = None, ): super().__init__(user_config=user_config) @@ -215,8 +213,6 @@ def __init__( else: self.input_data = {} - self.write_defaults = write_defaults - @abstractmethod def _header(self) -> Union[TextIO, Any]: raise NotImplementedError() @@ -271,13 +267,8 @@ def write( raise KeyError("Cannot find %s in input or results config", name) if entity_type != "set": - if self.write_defaults: - df_out = self._expand_dataframe(name, df) - else: - df_out = df - self._write_parameter( - df_out, + df, name, handle, default=default_values[name], @@ -291,62 +282,59 @@ def write( if isinstance(handle, TextIO): handle.close() - def _expand_dataframe(self, name: str, df: pd.DataFrame) -> Dict[str, pd.DataFrame]: - """Populates default value entry rows in dataframes - - Parameters - ---------- - name: str - Name of parameter/result to expand - df: pd.DataFrame, - input parameter/result data to be expanded - - Returns - ------- - pd.DataFrame, - Input data with expanded default values replacing missing entries - """ - - # TODO: Issue with how otoole handles trade route right now. - # The double definition of REGION throws an error. - if name == "TradeRoute": - return df - - default_df = self._get_default_dataframe(name) - - df = pd.concat([df, default_df]) - df = df[~df.index.duplicated(keep="first")] - return df.sort_index() - - # default_df.update(df) - # return default_df.sort_index() - - def _get_default_dataframe(self, name: str) -> pd.DataFrame: - """Creates default dataframe""" - - index_data = {} - indices = self.user_config[name]["indices"] - try: # result data - for index in indices: - index_data[index] = self.input_params[index]["VALUE"].to_list() - except (TypeError, KeyError): # parameter data - for index in indices: - index_data[index] = self.inputs[index]["VALUE"].to_list() - - if len(index_data) > 1: - new_index = pd.MultiIndex.from_product( - list(index_data.values()), names=list(index_data.keys()) - ) - else: - new_index = pd.Index( - list(index_data.values())[0], name=list(index_data.keys())[0] - ) - - df = pd.DataFrame(index=new_index) - df["VALUE"] = self.default_values[name] - df["VALUE"] = df.VALUE.astype(self.user_config[name]["dtype"]) - - return df + # def _expand_dataframe(self, name: str, df: pd.DataFrame) -> Dict[str, pd.DataFrame]: + # """Populates default value entry rows in dataframes + + # Parameters + # ---------- + # name: str + # Name of parameter/result to expand + # df: pd.DataFrame, + # input parameter/result data to be expanded + + # Returns + # ------- + # pd.DataFrame, + # Input data with expanded default values replacing missing entries + # """ + + # # TODO: Issue with how otoole handles trade route right now. + # # The double definition of REGION throws an error. + # if name == "TradeRoute": + # return df + + # default_df = self._get_default_dataframe(name) + + # df = pd.concat([df, default_df]) + # df = df[~df.index.duplicated(keep="first")] + # return df.sort_index() + + # def _get_default_dataframe(self, name: str) -> pd.DataFrame: + # """Creates default dataframe""" + + # index_data = {} + # indices = self.user_config[name]["indices"] + # try: # result data + # for index in indices: + # index_data[index] = self.input_params[index]["VALUE"].to_list() + # except (TypeError, KeyError): # parameter data + # for index in indices: + # index_data[index] = self.inputs[index]["VALUE"].to_list() + + # if len(index_data) > 1: + # new_index = pd.MultiIndex.from_product( + # list(index_data.values()), names=list(index_data.keys()) + # ) + # else: + # new_index = pd.Index( + # list(index_data.values())[0], name=list(index_data.keys())[0] + # ) + + # df = pd.DataFrame(index=new_index) + # df["VALUE"] = self.default_values[name] + # df["VALUE"] = df.VALUE.astype(self.user_config[name]["dtype"]) + + # return df class ReadStrategy(Strategy): @@ -357,6 +345,15 @@ class ReadStrategy(Strategy): Strategies. """ + def __init__( + self, + user_config: Dict, + write_defaults: bool = False, + ): + super().__init__(user_config=user_config) + + self.write_defaults = write_defaults + def _check_index( self, input_data: Dict[str, pd.DataFrame] ) -> Dict[str, pd.DataFrame]: @@ -585,6 +582,72 @@ def _compare_read_to_expected( logger.debug(f"data and config name errors are: {errors}") raise OtooleNameMismatchError(name=errors) + def _expand_dataframe( + self, + name: str, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, pd.DataFrame], + ) -> pd.DataFrame: + """Populates default value entry rows in dataframes + + Parameters + ---------- + name: str + Name of parameter/result to expand + df: pd.DataFrame, + input parameter/result data to be expanded + + Returns + ------- + pd.DataFrame, + Input data with expanded default values replacing missing entries + """ + + try: + df = input_data[name] + except KeyError as ex: + print(ex) + raise KeyError(f"No input data to expand for {name}") + + # TODO: Issue with how otoole handles trade route right now. + # The double definition of REGION throws an error. + if name == "TradeRoute": + return df + + default_df = self._get_default_dataframe(name, input_data, default_values) + + df = pd.concat([df, default_df]) + df = df[~df.index.duplicated(keep="first")] + return df.sort_index() + + def _get_default_dataframe( + self, + name: str, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, pd.DataFrame], + ) -> pd.DataFrame: + """Creates default dataframe""" + + index_data = {} + indices = self.user_config[name]["indices"] + for index in indices: + index_data[index] = input_data[index]["VALUE"].to_list() + + if len(index_data) > 1: + new_index = pd.MultiIndex.from_product( + list(index_data.values()), names=list(index_data.keys()) + ) + else: + new_index = pd.Index( + list(index_data.values())[0], name=list(index_data.keys())[0] + ) + + df = pd.DataFrame(index=new_index) + df["VALUE"] = default_values[name] + df["VALUE"] = df.VALUE.astype(self.user_config[name]["dtype"]) + + return df + @abstractmethod def read( self, filepath: Union[str, TextIO], **kwargs diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index 7f5805c..d886ff9 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -43,8 +43,13 @@ def read( class _ReadTabular(ReadStrategy): - def __init__(self, user_config: Dict[str, Dict], keep_whitespace: bool = False): - super().__init__(user_config) + def __init__( + self, + user_config: Dict[str, Dict], + write_defaults: bool = False, + keep_whitespace: bool = False, + ): + super().__init__(user_config=user_config, write_defaults=write_defaults) self.keep_whitespace = keep_whitespace def _check_set(self, df: pd.DataFrame, config_details: Dict, name: str): @@ -176,6 +181,14 @@ def read( input_data = self._check_index(input_data) + if self.write_defaults: + for name in [ + x for x in self.user_config if self.user_config[x]["type"] == "param" + ]: + input_data[name] = self._expand_dataframe( + name, input_data, default_values + ) + return input_data, default_values @@ -248,6 +261,14 @@ def read( input_data = self._check_index(input_data) + if self.write_defaults: + for name in [ + x for x in self.user_config if self.user_config[x]["type"] == "param" + ]: + input_data[name] = self._expand_dataframe( + name, input_data, default_values + ) + return input_data, default_values @staticmethod @@ -328,13 +349,24 @@ def read( # Check filepath exists if os.path.exists(filepath): amply_datafile = self.read_in_datafile(filepath, config) - inputs = self._convert_amply_to_dataframe(amply_datafile, config) + input_data = self._convert_amply_to_dataframe(amply_datafile, config) for config_type in ["param", "set"]: - inputs = self._get_missing_input_dataframes( - inputs, config_type=config_type + input_data = self._get_missing_input_dataframes( + input_data, config_type=config_type ) - inputs = self._check_index(inputs) - return inputs, default_values + input_data = self._check_index(input_data) + + if self.write_defaults: + for name in [ + x + for x in self.user_config + if self.user_config[x]["type"] == "param" + ]: + input_data[name] = self._expand_dataframe( + name, input_data, default_values + ) + + return input_data, default_values else: raise FileNotFoundError(f"File not found: {filepath}") diff --git a/src/otoole/results/results.py b/src/otoole/results/results.py index ae45d73..b8bebd4 100644 --- a/src/otoole/results/results.py +++ b/src/otoole/results/results.py @@ -272,8 +272,13 @@ class ReadGlpk(ReadWideResults): Path to GLPK model file. Can be created using the `--wglp` flag. """ - def __init__(self, user_config: Dict[str, Dict], glpk_model: Union[str, TextIO]): - super().__init__(user_config) + def __init__( + self, + user_config: Dict[str, Dict], + glpk_model: Union[str, TextIO], + write_defaults: bool = False, + ): + super().__init__(user_config=user_config, write_defaults=write_defaults) if isinstance(glpk_model, str): with open(glpk_model, "r") as model_file: diff --git a/src/otoole/write_strategies.py b/src/otoole/write_strategies.py index d4472f8..921497a 100644 --- a/src/otoole/write_strategies.py +++ b/src/otoole/write_strategies.py @@ -152,8 +152,7 @@ def _write_parameter( default : int """ - if not self.write_defaults: - df = self._form_parameter(df, default) + df = self._form_parameter(df, default) handle.write("param default {} : {} :=\n".format(default, parameter_name)) df.to_csv( path_or_buf=handle, From 0ad7a07e82ee48aa327df12b48fe6b8d7baea6e5 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Thu, 28 Mar 2024 18:57:44 -0700 Subject: [PATCH 09/17] update some tests --- src/otoole/input.py | 2 + src/otoole/read_strategies.py | 9 +-- tests/test_input.py | 102 ++++++++++++++++++++++++++++------ 3 files changed, 91 insertions(+), 22 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 28ced00..8715c20 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -28,6 +28,7 @@ >>> converter.convert('my_datafile.txt', 'folder_of_csv_files') """ + from __future__ import annotations import logging @@ -615,6 +616,7 @@ def _expand_dataframe( return df default_df = self._get_default_dataframe(name, input_data, default_values) + # default_df = self._check_index_dtypes(name, self.user_config[name], default_df) df = pd.concat([df, default_df]) df = df[~df.index.duplicated(keep="first")] diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index d886ff9..e5a6432 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -179,16 +179,17 @@ def read( input_data, config_type=config_type ) - input_data = self._check_index(input_data) - if self.write_defaults: - for name in [ + names = [ x for x in self.user_config if self.user_config[x]["type"] == "param" - ]: + ] + for name in names: input_data[name] = self._expand_dataframe( name, input_data, default_values ) + input_data = self._check_index(input_data) + return input_data, default_values diff --git a/tests/test_input.py b/tests/test_input.py index 5a958ee..18dc0f4 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -86,6 +86,12 @@ def simple_user_config(): "default": 0, "short_name": "CAPEX", }, + "DiscountRate": { + "indices": ["REGION"], + "type": "param", + "dtype": "float", + "default": 0.25, + }, "REGION": { "dtype": "str", "type": "set", @@ -276,14 +282,15 @@ def input_data_single_index_empty(region): def test_expand_parameters_defaults( self, user_config, simple_default_values, input_data, parameter, expected ): - write_strategy = DummyWriteStrategy( - user_config=user_config, default_values=simple_default_values - ) - write_strategy.input_data = input_data - actual = write_strategy._expand_defaults( - input_data, write_strategy.default_values + read_strategy = DummyReadStrategy(user_config=user_config) + actual = read_strategy._expand_dataframe( + parameter, input_data, simple_default_values ) - assert_frame_equal(actual[parameter], expected) + print("\n") + print(actual.index.dtypes) + print("\n") + print(expected.index.dtypes) + assert_frame_equal(actual, expected) def test_expand_result_defaults( self, @@ -292,11 +299,11 @@ def test_expand_result_defaults( simple_input_data, simple_result_data, ): - write_strategy = DummyWriteStrategy( - user_config=simple_user_config, default_values=simple_default_values + read_strategy = DummyReadStrategy( + user_config=simple_user_config, write_defaults=True ) - actual = write_strategy._expand_defaults( - simple_result_data, write_strategy.default_values, simple_input_data + actual = read_strategy._expand_dataframe( + "NewCapacity", simple_input_data, simple_default_values ) expected = pd.DataFrame( @@ -317,17 +324,76 @@ def test_expand_results_key_error( self, simple_user_config, simple_result_data, simple_default_values ): """When input data is just the result data""" - write_strategy = DummyWriteStrategy( - user_config=simple_user_config, - default_values=simple_default_values, - write_defaults=True, + read_strategy = DummyReadStrategy( + user_config=simple_user_config, write_defaults=True ) - with raises(KeyError, match="REGION"): - write_strategy._expand_defaults( - simple_result_data, write_strategy.default_values + with raises(KeyError, match="SpecifiedAnnualDemand"): + read_strategy._expand_dataframe( + "SpecifiedAnnualDemand", simple_result_data, simple_default_values ) + def defaults_dataframe_single_index(region): + discount_rate_out = pd.DataFrame( + [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + discount_rate_out["VALUE"] = discount_rate_out["VALUE"].astype(float) + + data = { + "REGION": region, + } + return data, "DiscountRate", discount_rate_out + + def defaults_dataframe_multi_index(region, technology, year): + capex_out = pd.DataFrame( + [ + ["SIMPLICITY", "NGCC", 2014, -1], + ["SIMPLICITY", "NGCC", 2015, -1], + ["SIMPLICITY", "NGCC", 2016, -1], + ["SIMPLICITY", "HYD1", 2014, -1], + ["SIMPLICITY", "HYD1", 2015, -1], + ["SIMPLICITY", "HYD1", 2016, -1], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + capex_out["VALUE"] = capex_out["VALUE"].astype(float) + + data = { + "TECHNOLOGY": technology, + "YEAR": year, + "REGION": region, + } + return data, "CapitalCost", capex_out + + parameter_test_data = [ + defaults_dataframe_single_index(region), + defaults_dataframe_multi_index(region, technology, year), + ] + parameter_test_data_ids = [ + "single_index", + "multi_index", + ] + + @mark.parametrize( + "input_data,parameter,expected", + parameter_test_data, + ids=parameter_test_data_ids, + ) + def test_get_default_dataframe( + self, + simple_user_config, + simple_default_values, + simple_input_data, + input_data, + parameter, + expected, + ): + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual = read_strategy._get_default_dataframe( + parameter, input_data, simple_default_values + ) + assert_frame_equal(actual, expected) + class TestReadStrategy: From 5599cfda2429f1fbe106e3c5d21d366fb80fd1a6 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sat, 30 Mar 2024 12:08:43 -0700 Subject: [PATCH 10/17] fix tests --- src/otoole/input.py | 61 +-------- tests/test_input.py | 323 +++++++++++++++++--------------------------- 2 files changed, 125 insertions(+), 259 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 8715c20..5e8579c 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -283,60 +283,6 @@ def write( if isinstance(handle, TextIO): handle.close() - # def _expand_dataframe(self, name: str, df: pd.DataFrame) -> Dict[str, pd.DataFrame]: - # """Populates default value entry rows in dataframes - - # Parameters - # ---------- - # name: str - # Name of parameter/result to expand - # df: pd.DataFrame, - # input parameter/result data to be expanded - - # Returns - # ------- - # pd.DataFrame, - # Input data with expanded default values replacing missing entries - # """ - - # # TODO: Issue with how otoole handles trade route right now. - # # The double definition of REGION throws an error. - # if name == "TradeRoute": - # return df - - # default_df = self._get_default_dataframe(name) - - # df = pd.concat([df, default_df]) - # df = df[~df.index.duplicated(keep="first")] - # return df.sort_index() - - # def _get_default_dataframe(self, name: str) -> pd.DataFrame: - # """Creates default dataframe""" - - # index_data = {} - # indices = self.user_config[name]["indices"] - # try: # result data - # for index in indices: - # index_data[index] = self.input_params[index]["VALUE"].to_list() - # except (TypeError, KeyError): # parameter data - # for index in indices: - # index_data[index] = self.inputs[index]["VALUE"].to_list() - - # if len(index_data) > 1: - # new_index = pd.MultiIndex.from_product( - # list(index_data.values()), names=list(index_data.keys()) - # ) - # else: - # new_index = pd.Index( - # list(index_data.values())[0], name=list(index_data.keys())[0] - # ) - - # df = pd.DataFrame(index=new_index) - # df["VALUE"] = self.default_values[name] - # df["VALUE"] = df.VALUE.astype(self.user_config[name]["dtype"]) - - # return df - class ReadStrategy(Strategy): """ @@ -616,10 +562,12 @@ def _expand_dataframe( return df default_df = self._get_default_dataframe(name, input_data, default_values) - # default_df = self._check_index_dtypes(name, self.user_config[name], default_df) df = pd.concat([df, default_df]) df = df[~df.index.duplicated(keep="first")] + + df = self._check_index_dtypes(name, self.user_config[name], df) + return df.sort_index() def _get_default_dataframe( @@ -644,9 +592,8 @@ def _get_default_dataframe( list(index_data.values())[0], name=list(index_data.keys())[0] ) - df = pd.DataFrame(index=new_index) + df = pd.DataFrame(index=new_index).sort_index() df["VALUE"] = default_values[name] - df["VALUE"] = df.VALUE.astype(self.user_config[name]["dtype"]) return df diff --git a/tests/test_input.py b/tests/test_input.py index 18dc0f4..1b78f2b 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -62,12 +62,13 @@ def simple_default_values(): @fixture -def simple_input_data(region, year, technology, capital_cost): +def simple_input_data(region, year, technology, capital_cost, discount_rate): return { "REGION": region, "TECHNOLOGY": technology, "YEAR": year, - "CapitalCost": capital_cost, + # "CapitalCost": capital_cost, + # "DiscountRate": discount_rate } @@ -145,180 +146,126 @@ def read( class TestExpandDefaults: + # simple set fixtures + year = pd.DataFrame(data=[2014, 2015, 2016], columns=["VALUE"]) region = pd.DataFrame(data=["SIMPLICITY"], columns=["VALUE"]) technology = pd.DataFrame(data=["NGCC", "HYD1"], columns=["VALUE"]) - def input_data_multi_index_no_defaults(region, technology, year): - capex_in = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, 2000], - ["SIMPLICITY", "HYD1", 2015, 1500], - ["SIMPLICITY", "HYD1", 2016, 1000], - ["SIMPLICITY", "NGCC", 2014, 1000], - ["SIMPLICITY", "NGCC", 2015, 900], - ["SIMPLICITY", "NGCC", 2016, 800], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out = capex_in.copy() - capex_out["VALUE"] = capex_out["VALUE"].astype(float) - - data = { - "CapitalCost": capex_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out - - def input_data_multi_index(region, technology, year): - capex_in = pd.DataFrame( - [ - ["SIMPLICITY", "NGCC", 2014, 1000], - ["SIMPLICITY", "NGCC", 2015, 900], - ["SIMPLICITY", "HYD1", 2015, 1500], - ["SIMPLICITY", "HYD1", 2016, 1000], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, -1], - ["SIMPLICITY", "HYD1", 2015, 1500], - ["SIMPLICITY", "HYD1", 2016, 1000], - ["SIMPLICITY", "NGCC", 2014, 1000], - ["SIMPLICITY", "NGCC", 2015, 900], - ["SIMPLICITY", "NGCC", 2016, -1], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out["VALUE"] = capex_out["VALUE"].astype(float) + # capital costs fixtures - data = { - "CapitalCost": capex_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out + input_data_multi_index_full = pd.DataFrame( + [ + ["SIMPLICITY", "HYD1", 2014, 2000.0], + ["SIMPLICITY", "HYD1", 2015, 1500.0], + ["SIMPLICITY", "HYD1", 2016, 1000.0], + ["SIMPLICITY", "NGCC", 2014, 1000.0], + ["SIMPLICITY", "NGCC", 2015, 900.0], + ["SIMPLICITY", "NGCC", 2016, 800.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - def input_data_multi_index_empty(region, technology, year): - capex_in = pd.DataFrame( - [], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out = pd.DataFrame( - [ - ["SIMPLICITY", "HYD1", 2014, -1], - ["SIMPLICITY", "HYD1", 2015, -1], - ["SIMPLICITY", "HYD1", 2016, -1], - ["SIMPLICITY", "NGCC", 2014, -1], - ["SIMPLICITY", "NGCC", 2015, -1], - ["SIMPLICITY", "NGCC", 2016, -1], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out["VALUE"] = capex_out["VALUE"].astype(float) + output_data_multi_index_full = input_data_multi_index_full.copy() - data = { - "CapitalCost": capex_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out - - def input_data_single_index(region): - discount_rate_in = pd.DataFrame( - [["SIMPLICITY", 0.05]], columns=["REGION", "VALUE"] - ).set_index(["REGION"]) - discount_rate_out = discount_rate_in.copy() - discount_rate_out["VALUE"] = discount_rate_out["VALUE"].astype(float) - - data = { - "DiscountRate": discount_rate_in, - "REGION": region, - } - return data, "DiscountRate", discount_rate_out + input_data_multi_index_partial = pd.DataFrame( + [ + ["SIMPLICITY", "NGCC", 2014, 1000.0], + ["SIMPLICITY", "NGCC", 2015, 900.0], + ["SIMPLICITY", "HYD1", 2015, 1500.0], + ["SIMPLICITY", "HYD1", 2016, 1000.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - def input_data_single_index_empty(region): - discount_rate_in = pd.DataFrame([], columns=["REGION", "VALUE"]).set_index( - ["REGION"] - ) - discount_rate_out = pd.DataFrame( - [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] - ).set_index(["REGION"]) - discount_rate_out["VALUE"] = discount_rate_out["VALUE"].astype(float) - - data = { - "DiscountRate": discount_rate_in, - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "DiscountRate", discount_rate_out - - parameter_test_data = [ - input_data_multi_index_no_defaults(region, technology, year), - input_data_multi_index(region, technology, year), - input_data_multi_index_empty(region, technology, year), - input_data_single_index(region), - input_data_single_index_empty(region), + output_data_multi_index_partial = pd.DataFrame( + [ + ["SIMPLICITY", "HYD1", 2014, -1.0], + ["SIMPLICITY", "HYD1", 2015, 1500.0], + ["SIMPLICITY", "HYD1", 2016, 1000.0], + ["SIMPLICITY", "NGCC", 2014, 1000.0], + ["SIMPLICITY", "NGCC", 2015, 900.0], + ["SIMPLICITY", "NGCC", 2016, -1.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + # discount rate fixtures + + input_data_multi_index_empty = pd.DataFrame( + [], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + output_data_multi_index_empty = pd.DataFrame( + [ + ["SIMPLICITY", "HYD1", 2014, -1.0], + ["SIMPLICITY", "HYD1", 2015, -1.0], + ["SIMPLICITY", "HYD1", 2016, -1.0], + ["SIMPLICITY", "NGCC", 2014, -1.0], + ["SIMPLICITY", "NGCC", 2015, -1.0], + ["SIMPLICITY", "NGCC", 2016, -1.0], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + input_data_single_index_full = pd.DataFrame( + [["SIMPLICITY", 0.05]], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + + output_data_single_index_full = input_data_single_index_full.copy() + + input_data_single_index_empty = pd.DataFrame( + [], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + + output_data_single_index_empty = pd.DataFrame( + [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] + ).set_index(["REGION"]) + + test_data = [ + ("CapitalCost", input_data_multi_index_full, output_data_multi_index_full), + ( + "CapitalCost", + input_data_multi_index_partial, + output_data_multi_index_partial, + ), + ("CapitalCost", input_data_multi_index_empty, output_data_multi_index_empty), + ("DiscountRate", input_data_single_index_full, output_data_single_index_full), + ( + "DiscountRate", + input_data_single_index_empty, + output_data_single_index_empty, + ), ] - parameter_test_data_ids = [ - "multi_index_no_defaults", - "multi_index", + test_data_ids = [ + "multi_index_full", + "multi_index_partial", "multi_index_empty", - "single_index", + "single_index_full", "single_index_empty", ] @mark.parametrize( - "input_data,parameter,expected", - parameter_test_data, - ids=parameter_test_data_ids, + "name,input,expected", + test_data, + ids=test_data_ids, ) def test_expand_parameters_defaults( - self, user_config, simple_default_values, input_data, parameter, expected + self, simple_user_config, simple_default_values, name, input, expected ): - read_strategy = DummyReadStrategy(user_config=user_config) - actual = read_strategy._expand_dataframe( - parameter, input_data, simple_default_values - ) - print("\n") - print(actual.index.dtypes) - print("\n") - print(expected.index.dtypes) - assert_frame_equal(actual, expected) + input_data = { + "REGION": self.region, + "TECHNOLOGY": self.technology, + "YEAR": self.year, + name: input, + } - def test_expand_result_defaults( - self, - simple_user_config, - simple_default_values, - simple_input_data, - simple_result_data, - ): - read_strategy = DummyReadStrategy( - user_config=simple_user_config, write_defaults=True - ) + read_strategy = DummyReadStrategy(user_config=simple_user_config) actual = read_strategy._expand_dataframe( - "NewCapacity", simple_input_data, simple_default_values + name, input_data, simple_default_values ) - - expected = pd.DataFrame( - data=[ - ["SIMPLICITY", "HYD1", 2014, 2.34], - ["SIMPLICITY", "HYD1", 2015, 3.45], - ["SIMPLICITY", "HYD1", 2016, 20], - ["SIMPLICITY", "NGCC", 2014, 20], - ["SIMPLICITY", "NGCC", 2015, 20], - ["SIMPLICITY", "NGCC", 2016, 1.23], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - - assert_frame_equal(actual["NewCapacity"], expected) + assert_frame_equal(actual, expected) def test_expand_results_key_error( self, simple_user_config, simple_result_data, simple_default_values @@ -333,64 +280,36 @@ def test_expand_results_key_error( "SpecifiedAnnualDemand", simple_result_data, simple_default_values ) - def defaults_dataframe_single_index(region): - discount_rate_out = pd.DataFrame( - [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] - ).set_index(["REGION"]) - discount_rate_out["VALUE"] = discount_rate_out["VALUE"].astype(float) - - data = { - "REGION": region, - } - return data, "DiscountRate", discount_rate_out - - def defaults_dataframe_multi_index(region, technology, year): - capex_out = pd.DataFrame( - [ - ["SIMPLICITY", "NGCC", 2014, -1], - ["SIMPLICITY", "NGCC", 2015, -1], - ["SIMPLICITY", "NGCC", 2016, -1], - ["SIMPLICITY", "HYD1", 2014, -1], - ["SIMPLICITY", "HYD1", 2015, -1], - ["SIMPLICITY", "HYD1", 2016, -1], - ], - columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], - ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) - capex_out["VALUE"] = capex_out["VALUE"].astype(float) - - data = { - "TECHNOLOGY": technology, - "YEAR": year, - "REGION": region, - } - return data, "CapitalCost", capex_out - - parameter_test_data = [ - defaults_dataframe_single_index(region), - defaults_dataframe_multi_index(region, technology, year), + test_data_defaults = [ + ("CapitalCost", output_data_multi_index_empty), + ("DiscountRate", output_data_single_index_empty), ] - parameter_test_data_ids = [ - "single_index", + test_data_defaults_ids = [ "multi_index", + "single_index", ] @mark.parametrize( - "input_data,parameter,expected", - parameter_test_data, - ids=parameter_test_data_ids, + "name,expected", + test_data_defaults, + ids=test_data_defaults_ids, ) def test_get_default_dataframe( self, simple_user_config, simple_default_values, - simple_input_data, - input_data, - parameter, + name, expected, ): + input_data = { + "REGION": self.region, + "TECHNOLOGY": self.technology, + "YEAR": self.year, + } + read_strategy = DummyReadStrategy(user_config=simple_user_config) actual = read_strategy._get_default_dataframe( - parameter, input_data, simple_default_values + name, input_data, simple_default_values ) assert_frame_equal(actual, expected) @@ -408,8 +327,8 @@ class TestReadStrategy: ("set", "REGION", pd.DataFrame(columns=["VALUE"])), ) compare_read_to_expected_data = [ - [["CapitalCost", "REGION", "TECHNOLOGY", "YEAR"], False], - [["CAPEX", "REGION", "TECHNOLOGY", "YEAR"], True], + [["CapitalCost", "DiscountRate", "REGION", "TECHNOLOGY", "YEAR"], False], + [["CAPEX", "DiscountRate", "REGION", "TECHNOLOGY", "YEAR"], True], ] compare_read_to_expected_data_exception = [ ["CapitalCost", "REGION", "TECHNOLOGY"], From 2115573bc11ce02db93214eacc15025eab36ef53 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 13:12:19 -0700 Subject: [PATCH 11/17] simplify existing df expansion tests --- tests/test_input.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/tests/test_input.py b/tests/test_input.py index 1b78f2b..fc5f735 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -146,12 +146,6 @@ def read( class TestExpandDefaults: - # simple set fixtures - - year = pd.DataFrame(data=[2014, 2015, 2016], columns=["VALUE"]) - region = pd.DataFrame(data=["SIMPLICITY"], columns=["VALUE"]) - technology = pd.DataFrame(data=["NGCC", "HYD1"], columns=["VALUE"]) - # capital costs fixtures input_data_multi_index_full = pd.DataFrame( @@ -223,6 +217,8 @@ class TestExpandDefaults: [["SIMPLICITY", 0.25]], columns=["REGION", "VALUE"] ).set_index(["REGION"]) + # test expansion of dataframe + test_data = [ ("CapitalCost", input_data_multi_index_full, output_data_multi_index_full), ( @@ -252,14 +248,16 @@ class TestExpandDefaults: ids=test_data_ids, ) def test_expand_parameters_defaults( - self, simple_user_config, simple_default_values, name, input, expected + self, + simple_user_config, + simple_default_values, + simple_input_data, + name, + input, + expected, ): - input_data = { - "REGION": self.region, - "TECHNOLOGY": self.technology, - "YEAR": self.year, - name: input, - } + input_data = simple_input_data.copy() + input_data[name] = input read_strategy = DummyReadStrategy(user_config=simple_user_config) actual = read_strategy._expand_dataframe( @@ -270,7 +268,6 @@ def test_expand_parameters_defaults( def test_expand_results_key_error( self, simple_user_config, simple_result_data, simple_default_values ): - """When input data is just the result data""" read_strategy = DummyReadStrategy( user_config=simple_user_config, write_defaults=True ) @@ -280,6 +277,8 @@ def test_expand_results_key_error( "SpecifiedAnnualDemand", simple_result_data, simple_default_values ) + # test get default dataframe + test_data_defaults = [ ("CapitalCost", output_data_multi_index_empty), ("DiscountRate", output_data_single_index_empty), @@ -298,18 +297,14 @@ def test_get_default_dataframe( self, simple_user_config, simple_default_values, + simple_input_data, name, expected, ): - input_data = { - "REGION": self.region, - "TECHNOLOGY": self.technology, - "YEAR": self.year, - } read_strategy = DummyReadStrategy(user_config=simple_user_config) actual = read_strategy._get_default_dataframe( - name, input_data, simple_default_values + name, simple_input_data, simple_default_values ) assert_frame_equal(actual, expected) From 4916003f0cc68909a75be6d6e63da710cb0ab46a Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 14:31:48 -0700 Subject: [PATCH 12/17] write default function added --- src/otoole/input.py | 22 ++++++++ src/otoole/read_strategies.py | 24 ++------- src/otoole/results/results.py | 3 ++ tests/test_input.py | 98 +++++++++++++++++++++++++++++++---- 4 files changed, 115 insertions(+), 32 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 5e8579c..2ffb8d1 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -597,6 +597,28 @@ def _get_default_dataframe( return df + def write_default_params( + self, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, Union[str, int, float]], + ) -> Dict[str, pd.DataFrame]: + """Returns paramter dataframes with default values expanded""" + names = [x for x in self.user_config if self.user_config[x]["type"] == "param"] + for name in names: + input_data[name] = self._expand_dataframe(name, input_data, default_values) + return input_data + + def write_default_results( + self, + input_data: Dict[str, pd.DataFrame], + default_values: Dict[str, Union[str, int, float]], + ) -> Dict[str, pd.DataFrame]: + """Returns result dataframes with default values expanded""" + names = [x for x in self.user_config if self.user_config[x]["type"] == "result"] + for name in names: + input_data[name] = self._expand_dataframe(name, input_data, default_values) + return input_data + @abstractmethod def read( self, filepath: Union[str, TextIO], **kwargs diff --git a/src/otoole/read_strategies.py b/src/otoole/read_strategies.py index 6bf30c6..3b6273e 100644 --- a/src/otoole/read_strategies.py +++ b/src/otoole/read_strategies.py @@ -180,13 +180,7 @@ def read( ) if self.write_defaults: - names = [ - x for x in self.user_config if self.user_config[x]["type"] == "param" - ] - for name in names: - input_data[name] = self._expand_dataframe( - name, input_data, default_values - ) + input_data = self.write_default_params(input_data, default_values) input_data = self._check_index(input_data) @@ -263,12 +257,7 @@ def read( input_data = self._check_index(input_data) if self.write_defaults: - for name in [ - x for x in self.user_config if self.user_config[x]["type"] == "param" - ]: - input_data[name] = self._expand_dataframe( - name, input_data, default_values - ) + input_data = self.write_default_params(input_data, default_values) return input_data, default_values @@ -358,14 +347,7 @@ def read( input_data = self._check_index(input_data) if self.write_defaults: - for name in [ - x - for x in self.user_config - if self.user_config[x]["type"] == "param" - ]: - input_data[name] = self._expand_dataframe( - name, input_data, default_values - ) + input_data = self.write_default_params(input_data, default_values) return input_data, default_values else: diff --git a/src/otoole/results/results.py b/src/otoole/results/results.py index b8bebd4..019dc09 100644 --- a/src/otoole/results/results.py +++ b/src/otoole/results/results.py @@ -45,6 +45,9 @@ def read( available_results, input_data ) # type: Dict[str, pd.DataFrame] + if self.write_defaults: + results = self.write_default_results(results, default_values) + return results, default_values @abstractmethod diff --git a/tests/test_input.py b/tests/test_input.py index fc5f735..ac41d5e 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -29,10 +29,8 @@ def capital_cost(): data=[ ["SIMPLICITY", "NGCC", 2014, 1.23], ["SIMPLICITY", "NGCC", 2015, 2.34], - ["SIMPLICITY", "NGCC", 2016, 3.45], - ["SIMPLICITY", "HYD1", 2014, 3.45], - ["SIMPLICITY", "HYD1", 2015, 2.34], - ["SIMPLICITY", "HYD1", 2016, 1.23], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 4.56], ], columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) @@ -67,13 +65,13 @@ def simple_input_data(region, year, technology, capital_cost, discount_rate): "REGION": region, "TECHNOLOGY": technology, "YEAR": year, - # "CapitalCost": capital_cost, - # "DiscountRate": discount_rate + "CapitalCost": capital_cost, + "DiscountRate": discount_rate, } @fixture -def simple_result_data(new_capacity): +def simple_available_results(new_capacity): return {"NewCapacity": new_capacity} @@ -84,7 +82,7 @@ def simple_user_config(): "indices": ["REGION", "TECHNOLOGY", "YEAR"], "type": "param", "dtype": "float", - "default": 0, + "default": -1, "short_name": "CAPEX", }, "DiscountRate": { @@ -109,7 +107,7 @@ def simple_user_config(): "indices": ["REGION", "TECHNOLOGY", "YEAR"], "type": "result", "dtype": "float", - "default": 0, + "default": 20, }, } @@ -266,7 +264,7 @@ def test_expand_parameters_defaults( assert_frame_equal(actual, expected) def test_expand_results_key_error( - self, simple_user_config, simple_result_data, simple_default_values + self, simple_user_config, simple_input_data, simple_default_values ): read_strategy = DummyReadStrategy( user_config=simple_user_config, write_defaults=True @@ -274,7 +272,7 @@ def test_expand_results_key_error( with raises(KeyError, match="SpecifiedAnnualDemand"): read_strategy._expand_dataframe( - "SpecifiedAnnualDemand", simple_result_data, simple_default_values + "SpecifiedAnnualDemand", simple_input_data, simple_default_values ) # test get default dataframe @@ -308,6 +306,84 @@ def test_get_default_dataframe( ) assert_frame_equal(actual, expected) + # test expand all input data + + def test_write_default_params( + self, simple_user_config, simple_input_data, simple_default_values + ): + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual_expanded = read_strategy.write_default_params( + simple_input_data, simple_default_values + ) + actual = actual_expanded["CapitalCost"] + + expected = pd.DataFrame( + data=[ + ["SIMPLICITY", "HYD1", 2014, -1], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 4.56], + ["SIMPLICITY", "NGCC", 2014, 1.23], + ["SIMPLICITY", "NGCC", 2015, 2.34], + ["SIMPLICITY", "NGCC", 2016, -1], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + assert_frame_equal(actual, expected) + + def test_write_default_results_correct( + self, + simple_user_config, + simple_input_data, + simple_available_results, + simple_default_values, + ): + # data merged by result package + input_data = simple_input_data.copy() + input_data.update(simple_available_results) + + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual_expanded = read_strategy.write_default_results( + input_data, simple_default_values + ) + + actual = actual_expanded["NewCapacity"] + + expected = pd.DataFrame( + data=[ + ["SIMPLICITY", "HYD1", 2014, 2.34], + ["SIMPLICITY", "HYD1", 2015, 3.45], + ["SIMPLICITY", "HYD1", 2016, 20], + ["SIMPLICITY", "NGCC", 2014, 20], + ["SIMPLICITY", "NGCC", 2015, 20], + ["SIMPLICITY", "NGCC", 2016, 1.23], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + assert_frame_equal(actual, expected) + + def test_write_default_results_incorrect( + self, + simple_user_config, + simple_input_data, + simple_available_results, + simple_default_values, + ): + # data merged by result package + input_data = simple_input_data.copy() + input_data.update(simple_available_results) + + read_strategy = DummyReadStrategy(user_config=simple_user_config) + actual_expanded = read_strategy.write_default_results( + input_data, simple_default_values + ) + + actual = actual_expanded["CapitalCost"] + expected = simple_input_data["CapitalCost"] + + assert_frame_equal(actual, expected) + class TestReadStrategy: From 9e5d1c7ad1217ba4f9f3ba7a985910a23f1e85d9 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 15:42:39 -0700 Subject: [PATCH 13/17] add expand defaults result test --- src/otoole/input.py | 22 +++++++++++++++------- src/otoole/results/results.py | 3 ++- tests/test_read_strategies.py | 30 ++++++++++++++++++++++++++---- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/otoole/input.py b/src/otoole/input.py index 2ffb8d1..f421425 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -550,11 +550,7 @@ def _expand_dataframe( Input data with expanded default values replacing missing entries """ - try: - df = input_data[name] - except KeyError as ex: - print(ex) - raise KeyError(f"No input data to expand for {name}") + df = input_data[name] # TODO: Issue with how otoole handles trade route right now. # The double definition of REGION throws an error. @@ -605,7 +601,13 @@ def write_default_params( """Returns paramter dataframes with default values expanded""" names = [x for x in self.user_config if self.user_config[x]["type"] == "param"] for name in names: - input_data[name] = self._expand_dataframe(name, input_data, default_values) + try: + logger.debug(f"Serching for {name} data to expand") + input_data[name] = self._expand_dataframe( + name, input_data, default_values + ) + except KeyError: + logger.warning(f"Can not expand {name} data") return input_data def write_default_results( @@ -616,7 +618,13 @@ def write_default_results( """Returns result dataframes with default values expanded""" names = [x for x in self.user_config if self.user_config[x]["type"] == "result"] for name in names: - input_data[name] = self._expand_dataframe(name, input_data, default_values) + try: + logger.debug(f"Serching for {name} data to expand") + input_data[name] = self._expand_dataframe( + name, input_data, default_values + ) + except KeyError: + logger.warning(f"Can not expand {name} data") return input_data @abstractmethod diff --git a/src/otoole/results/results.py b/src/otoole/results/results.py index 019dc09..ff5a4b7 100644 --- a/src/otoole/results/results.py +++ b/src/otoole/results/results.py @@ -46,7 +46,8 @@ def read( ) # type: Dict[str, pd.DataFrame] if self.write_defaults: - results = self.write_default_results(results, default_values) + all_data = {**input_data, **results} if input_data else results.copy() + results = self.write_default_results(all_data, default_values) return results, default_values diff --git a/tests/test_read_strategies.py b/tests/test_read_strategies.py index 574fcee..8369ed6 100644 --- a/tests/test_read_strategies.py +++ b/tests/test_read_strategies.py @@ -80,7 +80,6 @@ def test_convert_to_dataframe(self, user_config): reader = ReadCplex(user_config) with StringIO(input_file) as file_buffer: actual = reader._convert_to_dataframe(file_buffer) - # print(actual) expected = pd.DataFrame( [ ["NewCapacity", "SIMPLICITY,ETHPLANT,2015", 0.030000000000000027], @@ -99,7 +98,6 @@ def test_solution_to_dataframe(self, user_config): reader = ReadCplex(user_config) with StringIO(input_file) as file_buffer: actual = reader.read(file_buffer) - # print(actual) expected = ( pd.DataFrame( [ @@ -146,6 +144,32 @@ def test_solution_to_dataframe(self, user_config): ) pd.testing.assert_frame_equal(actual[0]["RateOfActivity"], expected) + def test_solution_to_dataframe_with_defaults(self, user_config): + input_file = self.cplex_data + + regions = pd.DataFrame(data=["SIMPLICITY"], columns=["VALUE"]) + technologies = pd.DataFrame(data=["ETHPLANT"], columns=["VALUE"]) + years = pd.DataFrame(data=[2014, 2015, 2016], columns=["VALUE"]) + input_data = {"REGION": regions, "TECHNOLOGY": technologies, "YEAR": years} + + reader = ReadCplex(user_config, write_defaults=True) + with StringIO(input_file) as file_buffer: + actual = reader.read(file_buffer, input_data=input_data) + expected = ( + pd.DataFrame( + [ + ["SIMPLICITY", "ETHPLANT", 2014, 0], + ["SIMPLICITY", "ETHPLANT", 2015, 0.030000000000000027], + ["SIMPLICITY", "ETHPLANT", 2016, 0.030999999999999917], + ], + columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"], + ) + .astype({"REGION": str, "TECHNOLOGY": str, "YEAR": "int64", "VALUE": float}) + .set_index(["REGION", "TECHNOLOGY", "YEAR"]) + ) + + pd.testing.assert_frame_equal(actual[0]["NewCapacity"], expected) + class TestReadGurobi: @@ -169,7 +193,6 @@ def test_convert_to_dataframe(self, user_config): reader = ReadGurobi(user_config) with StringIO(input_file) as file_buffer: actual = reader._convert_to_dataframe(file_buffer) - # print(actual) expected = pd.DataFrame( [ ["TotalDiscountedCost", "SIMPLICITY,2014", 1.9360385416218188e02], @@ -191,7 +214,6 @@ def test_solution_to_dataframe(self, user_config): reader = ReadGurobi(user_config) with StringIO(input_file) as file_buffer: actual = reader.read(file_buffer) - # print(actual) expected = ( pd.DataFrame( [ From 6e4408ec6630d029e28a8b8128d8bde2377277a9 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 16:17:40 -0700 Subject: [PATCH 14/17] add read tests for default expansion --- tests/fixtures/~$combined_inputs.xlsx | Bin 0 -> 165 bytes tests/test_read_strategies.py | 51 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tests/fixtures/~$combined_inputs.xlsx diff --git a/tests/fixtures/~$combined_inputs.xlsx b/tests/fixtures/~$combined_inputs.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e2571aa4c4769e232238e41917ab0cdfd16f7f14 GIT binary patch literal 165 ycmd-LDM~HNFH&$yEXqqQRv;GeGK4S`F{Cn-G2{bj1qLUEL?E396e$Mss0{&k>l9T0 literal 0 HcmV?d00001 diff --git a/tests/test_read_strategies.py b/tests/test_read_strategies.py index 8369ed6..3c11997 100644 --- a/tests/test_read_strategies.py +++ b/tests/test_read_strategies.py @@ -19,6 +19,7 @@ identify_duplicate, rename_duplicate_column, ) +from otoole.utils import _read_file class TestReadCplex: @@ -992,6 +993,21 @@ def test_catch_error_no_parameter(self, caplog, user_config): in caplog.text ) + def test_read_datafile_with_defaults(self, user_config): + datafile = os.path.join("tests", "fixtures", "simplicity.txt") + reader = ReadDatafile(user_config=user_config, write_defaults=True) + actual, _ = reader.read(datafile) + data = [ + ["SIMPLICITY", "DAM", 2014, 0.0], + ["SIMPLICITY", "DAM", 2015, 0.0], + ["SIMPLICITY", "DAM", 2016, 0.0], + ] + expected = pd.DataFrame( + data, columns=["REGION", "STORAGE", "YEAR", "VALUE"] + ).set_index(["REGION", "STORAGE", "YEAR"]) + + pd.testing.assert_frame_equal(actual["CapitalCostStorage"].iloc[:3], expected) + class TestReadExcel: def test_read_excel_yearsplit(self, user_config): @@ -1045,6 +1061,21 @@ def test_read_excel_yearsplit(self, user_config): assert (actual_data == expected).all() + def test_read_excel_with_defaults(self, user_config): + spreadsheet = os.path.join("tests", "fixtures", "combined_inputs.xlsx") + reader = ReadExcel(user_config=user_config, write_defaults=True) + actual, _ = reader.read(spreadsheet) + data = [ + ["09_ROK", "CO2", 2017, -1.0], + ["09_ROK", "CO2", 2018, -1.0], + ["09_ROK", "CO2", 2019, -1.0], + ] + expected = pd.DataFrame( + data, columns=["REGION", "EMISSION", "YEAR", "VALUE"] + ).set_index(["REGION", "EMISSION", "YEAR"]) + + pd.testing.assert_frame_equal(actual["AnnualEmissionLimit"].iloc[:3], expected) + def test_narrow_parameters(self, user_config): data = [ ["IW0016", 0.238356164, 0.238356164, 0.238356164], @@ -1161,6 +1192,26 @@ def test_read_default_values_csv(self, user_config): expected = None assert actual == expected + def test_read_csv_with_defaults(self): + user_config_path = os.path.join( + "tests", "fixtures", "super_simple", "super_simple.yaml" + ) + with open(user_config_path, "r") as config_file: + user_config = _read_file(config_file, ".yaml") + + filepath = os.path.join("tests", "fixtures", "super_simple", "csv") + reader = ReadCsv(user_config=user_config, write_defaults=True) + actual, _ = reader.read(filepath) + data = [ + ["BB", "gas_import", 2016, 0.0], + ["BB", "gas_plant", 2016, 1.03456], + ] + expected = pd.DataFrame( + data, columns=["REGION", "TECHNOLOGY", "YEAR", "VALUE"] + ).set_index(["REGION", "TECHNOLOGY", "YEAR"]) + + pd.testing.assert_frame_equal(actual["CapitalCost"], expected) + class TestReadTabular: """Methods shared for csv and excel""" From 1257fba6d2e46700615990adfb0a6c42d64b334e Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 16:18:37 -0700 Subject: [PATCH 15/17] restore xlsx fixture --- tests/fixtures/~$combined_inputs.xlsx | Bin 165 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/fixtures/~$combined_inputs.xlsx diff --git a/tests/fixtures/~$combined_inputs.xlsx b/tests/fixtures/~$combined_inputs.xlsx deleted file mode 100644 index e2571aa4c4769e232238e41917ab0cdfd16f7f14..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 165 ycmd-LDM~HNFH&$yEXqqQRv;GeGK4S`F{Cn-G2{bj1qLUEL?E396e$Mss0{&k>l9T0 From d9635ddc346a971a7bec754558058fb5e2ee40d2 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 18:00:32 -0700 Subject: [PATCH 16/17] fix result processing with defaults --- src/otoole/convert.py | 8 +++++--- src/otoole/input.py | 17 +++++++++++------ src/otoole/results/results.py | 3 +-- tests/test_input.py | 28 ++-------------------------- 4 files changed, 19 insertions(+), 37 deletions(-) diff --git a/src/otoole/convert.py b/src/otoole/convert.py index 025f674..e8b3243 100644 --- a/src/otoole/convert.py +++ b/src/otoole/convert.py @@ -30,6 +30,7 @@ def read_results( from_path: str, input_format: str, input_path: str, + write_defaults: bool = False, glpk_model: Optional[str] = None, ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, float]]: """Read OSeMOSYS results from CBC, GLPK, Gurobi, or CPLEX results files @@ -58,7 +59,9 @@ def read_results( """ user_config = _get_user_config(config) input_strategy = _get_read_strategy(user_config, input_format) - result_strategy = _get_read_result_strategy(user_config, from_format, glpk_model) + result_strategy = _get_read_result_strategy( + user_config, from_format, glpk_model, write_defaults + ) if input_strategy: input_data, _ = input_strategy.read(input_path) @@ -427,11 +430,10 @@ def write( """ user_config = _get_user_config(config) + write_strategy = _get_write_strategy(user_config, to_format) if default_values is None: - write_strategy = _get_write_strategy(user_config, to_format) write_strategy.write(inputs, to_path, {}) else: - write_strategy = _get_write_strategy(user_config, to_format) write_strategy.write(inputs, to_path, default_values) return True diff --git a/src/otoole/input.py b/src/otoole/input.py index f421425..e1082f7 100644 --- a/src/otoole/input.py +++ b/src/otoole/input.py @@ -111,6 +111,7 @@ def convert(self, input_filepath: str, output_filepath: str, **kwargs: Dict): input_filepath: str output_filepath: str """ + inputs, default_values = self._read(input_filepath, **kwargs) self._write(inputs, output_filepath, default_values, **kwargs) @@ -541,8 +542,9 @@ def _expand_dataframe( ---------- name: str Name of parameter/result to expand - df: pd.DataFrame, - input parameter/result data to be expanded + input_data: Dict[str, pd.DataFrame], + internal datastore + default_values: Dict[str, pd.DataFrame], Returns ------- @@ -612,20 +614,23 @@ def write_default_params( def write_default_results( self, + result_data: Dict[str, pd.DataFrame], input_data: Dict[str, pd.DataFrame], default_values: Dict[str, Union[str, int, float]], ) -> Dict[str, pd.DataFrame]: """Returns result dataframes with default values expanded""" + + all_data = {**result_data, **input_data} names = [x for x in self.user_config if self.user_config[x]["type"] == "result"] for name in names: try: logger.debug(f"Serching for {name} data to expand") - input_data[name] = self._expand_dataframe( - name, input_data, default_values + result_data[name] = self._expand_dataframe( + name, all_data, default_values ) except KeyError: - logger.warning(f"Can not expand {name} data") - return input_data + logger.debug(f"Can not expand {name} data") + return result_data @abstractmethod def read( diff --git a/src/otoole/results/results.py b/src/otoole/results/results.py index ff5a4b7..4853402 100644 --- a/src/otoole/results/results.py +++ b/src/otoole/results/results.py @@ -46,8 +46,7 @@ def read( ) # type: Dict[str, pd.DataFrame] if self.write_defaults: - all_data = {**input_data, **results} if input_data else results.copy() - results = self.write_default_results(all_data, default_values) + results = self.write_default_results(results, input_data, default_values) return results, default_values diff --git a/tests/test_input.py b/tests/test_input.py index ac41d5e..135d1e5 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -331,20 +331,17 @@ def test_write_default_params( assert_frame_equal(actual, expected) - def test_write_default_results_correct( + def test_write_default_results( self, simple_user_config, simple_input_data, simple_available_results, simple_default_values, ): - # data merged by result package - input_data = simple_input_data.copy() - input_data.update(simple_available_results) read_strategy = DummyReadStrategy(user_config=simple_user_config) actual_expanded = read_strategy.write_default_results( - input_data, simple_default_values + simple_available_results, simple_input_data, simple_default_values ) actual = actual_expanded["NewCapacity"] @@ -363,27 +360,6 @@ def test_write_default_results_correct( assert_frame_equal(actual, expected) - def test_write_default_results_incorrect( - self, - simple_user_config, - simple_input_data, - simple_available_results, - simple_default_values, - ): - # data merged by result package - input_data = simple_input_data.copy() - input_data.update(simple_available_results) - - read_strategy = DummyReadStrategy(user_config=simple_user_config) - actual_expanded = read_strategy.write_default_results( - input_data, simple_default_values - ) - - actual = actual_expanded["CapitalCost"] - expected = simple_input_data["CapitalCost"] - - assert_frame_equal(actual, expected) - class TestReadStrategy: From 86cb6735d59f4f9fa499bb1258bf0f7258545038 Mon Sep 17 00:00:00 2001 From: trevorb1 Date: Sun, 31 Mar 2024 18:40:08 -0700 Subject: [PATCH 17/17] add hack for DiscountRate --- src/otoole/results/results.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/otoole/results/results.py b/src/otoole/results/results.py index 4853402..d6acb65 100644 --- a/src/otoole/results/results.py +++ b/src/otoole/results/results.py @@ -32,8 +32,9 @@ def read( """ if "input_data" in kwargs: input_data = kwargs["input_data"] + param_default_values = self._read_default_values(self.input_config) else: - input_data = None + input_data = {} available_results = self.get_results_from_file( filepath, input_data @@ -41,6 +42,16 @@ def read( default_values = self._read_default_values(self.results_config) # type: Dict + # need to expand discount rate for results processing + if "DiscountRate" in input_data: + input_data["DiscountRate"] = self._expand_dataframe( + "DiscountRate", input_data, param_default_values + ) + if "DiscountRateIdv" in input_data: + input_data["DiscountRateIdv"] = self._expand_dataframe( + "DiscountRateIdv", input_data, param_default_values + ) + results = self.calculate_results( available_results, input_data ) # type: Dict[str, pd.DataFrame]