Skip to content

Commit

Permalink
Read parameter names seperately
Browse files Browse the repository at this point in the history
  • Loading branch information
larsevj committed Oct 10, 2024
1 parent 897a4a0 commit 8d5cade
Showing 1 changed file with 16 additions and 16 deletions.
32 changes: 16 additions & 16 deletions src/ert/config/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ def from_config_list(cls, config_list: List[str]) -> "DesignMatrix":
def read_design_matrix(
self,
) -> None:
"""
Reads out all file content from different files and create dataframes
"""
param_names = (
pd.read_excel(
self.xls_filename,
Expand All @@ -91,11 +88,13 @@ def read_design_matrix(
.iloc[0]
.apply(lambda x: x.strip() if isinstance(x, str) else x)
)
if len(param_names) - len(set(param_names)) != 0:
raise ValueError("Duplicate parameter names found in design sheet")
design_matrix_df = DesignMatrix._read_excel(
self.xls_filename, self.design_sheet
).rename(columns=lambda x: str(x).strip())
self.xls_filename,
self.design_sheet,
header=None,
skiprows=1,
)
design_matrix_df.columns = param_names

if "REAL" in design_matrix_df.columns:
if not is_integer_dtype(design_matrix_df.dtypes["REAL"]) or any(
Expand Down Expand Up @@ -152,6 +151,7 @@ def _read_excel(
sheet_name: str,
usecols: Optional[Union[int, List[int]]] = None,
header: Optional[int] = 0,
skiprows: Optional[int] = None,
dtype: Optional[str] = None,
) -> pd.DataFrame:
"""
Expand All @@ -165,30 +165,30 @@ def _read_excel(
sheet_name,
usecols=usecols,
header=header,
skiprows=skiprows,
dtype=dtype,
)
return dframe.dropna(axis=1, how="all")

@staticmethod
def _validate_design_matrix(design_matrix: pd.DataFrame) -> List[str]:
"""
Validate header in user inputted design matrix
:raises: ValueError if design matrix contains empty headers
Validate user inputted design matrix
:raises: ValueError if design matrix contains empty headers or empty cells
"""
if design_matrix.empty:
return []
errors = []
column_na_mask = design_matrix.columns.isna()
column_indexes_unnamed = [
index
for index, value in enumerate(
design_matrix.columns.str.contains("^Unnamed")
)
if value
index for index, value in enumerate(column_na_mask) if value
]
if len(column_indexes_unnamed) > 0:
errors.append(
f"Column headers not present in column {column_indexes_unnamed}"
)

if not design_matrix.columns[~column_na_mask].is_unique:
errors.append("Duplicate parameter names found in design sheet")
empties = [
f"Realization {design_matrix.index[i]}, column {design_matrix.columns[j]}"
for i, j in zip(*np.where(pd.isna(design_matrix)))
Expand All @@ -202,7 +202,7 @@ def _read_defaultssheet(
xls_filename: Union[Path, str], defaults_sheetname: str
) -> Dict[str, Union[str, float]]:
"""
Construct a dataframe of keys and values to be used as defaults from the
Construct a dict of keys and values to be used as defaults from the
first two columns in a spreadsheet.
Returns a dict of default values
Expand Down

0 comments on commit 8d5cade

Please sign in to comment.