From 8d5cade7132c0381959fc437c9fa1915aad0d287 Mon Sep 17 00:00:00 2001 From: larsevj Date: Thu, 10 Oct 2024 17:33:08 +0200 Subject: [PATCH] Read parameter names seperately --- src/ert/config/design_matrix.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py index e4fb4c6ff8f..cf3cb9fde99 100644 --- a/src/ert/config/design_matrix.py +++ b/src/ert/config/design_matrix.py @@ -77,9 +77,6 @@ def from_config_list(cls, config_list: List[str]) -> "DesignMatrix": def read_design_matrix( self, ) -> None: - """ - Reads out all file content from different files and create dataframes - """ param_names = ( pd.read_excel( self.xls_filename, @@ -91,11 +88,13 @@ def read_design_matrix( .iloc[0] .apply(lambda x: x.strip() if isinstance(x, str) else x) ) - if len(param_names) - len(set(param_names)) != 0: - raise ValueError("Duplicate parameter names found in design sheet") design_matrix_df = DesignMatrix._read_excel( - self.xls_filename, self.design_sheet - ).rename(columns=lambda x: str(x).strip()) + self.xls_filename, + self.design_sheet, + header=None, + skiprows=1, + ) + design_matrix_df.columns = param_names if "REAL" in design_matrix_df.columns: if not is_integer_dtype(design_matrix_df.dtypes["REAL"]) or any( @@ -152,6 +151,7 @@ def _read_excel( sheet_name: str, usecols: Optional[Union[int, List[int]]] = None, header: Optional[int] = 0, + skiprows: Optional[int] = None, dtype: Optional[str] = None, ) -> pd.DataFrame: """ @@ -165,30 +165,30 @@ def _read_excel( sheet_name, usecols=usecols, header=header, + skiprows=skiprows, dtype=dtype, ) return dframe.dropna(axis=1, how="all") + @staticmethod def _validate_design_matrix(design_matrix: pd.DataFrame) -> List[str]: """ - Validate header in user inputted design matrix - :raises: ValueError if design matrix contains empty headers + Validate user inputted design matrix + :raises: ValueError if design matrix contains empty headers or empty cells """ if design_matrix.empty: return [] errors = [] + column_na_mask = design_matrix.columns.isna() column_indexes_unnamed = [ - index - for index, value in enumerate( - design_matrix.columns.str.contains("^Unnamed") - ) - if value + index for index, value in enumerate(column_na_mask) if value ] if len(column_indexes_unnamed) > 0: errors.append( f"Column headers not present in column {column_indexes_unnamed}" ) - + if not design_matrix.columns[~column_na_mask].is_unique: + errors.append("Duplicate parameter names found in design sheet") empties = [ f"Realization {design_matrix.index[i]}, column {design_matrix.columns[j]}" for i, j in zip(*np.where(pd.isna(design_matrix))) @@ -202,7 +202,7 @@ def _read_defaultssheet( xls_filename: Union[Path, str], defaults_sheetname: str ) -> Dict[str, Union[str, float]]: """ - Construct a dataframe of keys and values to be used as defaults from the + Construct a dict of keys and values to be used as defaults from the first two columns in a spreadsheet. Returns a dict of default values