Skip to content

Commit

Permalink
File structure (#17)
Browse files Browse the repository at this point in the history
* fix assumption of file structure in creating the h5 file from the raw simulation
* Fix issues with parameter file not being generated
  • Loading branch information
deborahferguson authored Feb 20, 2024
1 parent 3b80e66 commit 2490150
Show file tree
Hide file tree
Showing 4 changed files with 603 additions and 124 deletions.
3 changes: 3 additions & 0 deletions mayawaves/coalescence.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,9 @@ def eccentricity_and_mean_anomaly_at_time(self, start_time, desired_time) -> tup

# rough estimate with initial velocity
time_momentum, momentum_vector = self.primary_compact_object.momentum_vector
if momentum_vector is None:
return -1, -1

initial_momentum = momentum_vector[0]
tangential_initial_momentum = initial_momentum[1]
qc_tangential_momentum = pn.tangential_momentum_from_separation(separation_magnitude[0], mass_ratio,
Expand Down
219 changes: 124 additions & 95 deletions mayawaves/utils/postprocessingutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,13 @@ def store_compact_object_data_from_filetype(filepaths, compact_object_dict, para
header_info = header_info + line

compact_object_count = None
parfile_content = parameter_file_group.attrs['par_content']
if 'par_content' in parameter_file_group.attrs:
parfile_content = parameter_file_group.attrs['par_content']
elif 'rpar_content' in parameter_file_group.attrs:
parfile_content = parameter_file_group.attrs['rpar_content']
else:
warnings.warn('No parameter file information. Unable to read QuasiLocalMeasures data.')
return
for line in parfile_content.splitlines():
if 'QuasiLocalMeasures::num_surfaces' in line:
compact_object_count = int(line.split('=')[-1].strip())
Expand Down Expand Up @@ -1039,42 +1045,86 @@ def _simulation_name(raw_directory: str) -> str:
simulation_name = raw_directory.split('/')[-1]
return simulation_name

def _get_parameter_file_name_and_content(raw_directory: str) -> tuple:
"""Store the parameter file in the h5 file
def _parameter_file_name_base(raw_directory: str) -> str:
"""Base name from the parameter file
From the raw directory, compute the base of the parameter file name, whether it be a .rpar or .par file.
Search for a .rpar file (or a .par file if there is no .rpar file) and store it in a dictionary
Args:
raw_directory (str): the directory that contains all simulation data
Returns:
str: the base name of the parameter file
tuple: base name of the parameter file and a dictionary containing the rpar and par content
"""
parfile_name = None
parfile_dict = {}

# check if SIMFACTORY directory exists
rpar_file = None
par_file = None
if os.path.isdir(os.path.join(raw_directory, "SIMFACTORY/par")):
parameter_files = glob.glob(os.path.join(raw_directory, "SIMFACTORY/par/*.rpar")) + glob.glob(
os.path.join(raw_directory, "SIMFACTORY/par/*.par"))
if len(parameter_files) > 0: # if there is an rpar file
parameter_file = parameter_files[0]
parfile_name = parameter_file.split('/')[-1]
parfile_name_base = parfile_name[:parfile_name.rfind('.')]
return parfile_name_base
files_with_rpar = glob.glob(os.path.join(raw_directory, "SIMFACTORY/par/*.rpar"))
if len(files_with_rpar) > 0: # if there is an rpar file
rpar_file = files_with_rpar[0]
files_with_par = glob.glob(os.path.join(raw_directory, "SIMFACTORY/par/*.par"))
if len(files_with_par) > 0: # if there is a par file
par_file = files_with_par[0]

output_directories, _ = _ordered_output_directories(raw_directory)
output_directories.reverse() # most recent output directory is first now
# if we haven't found a par file or a rpar file
if rpar_file is None and par_file is None:
output_directories, _ = _ordered_output_directories(raw_directory)
output_directories.reverse() # most recent output directory is first now

for output_dir in output_directories:
files_with_rpar = glob.glob(os.path.join(output_dir, "*.rpar"))
if len(files_with_rpar) > 0: # if there is a rpar file
rpar_file = files_with_rpar[0]
files_with_par = glob.glob(os.path.join(output_dir, "*.par"))
if len(files_with_par) > 0: # if there is a par file
par_file = files_with_par[0]

if rpar_file is not None or par_file is not None:
break

for output_dir in output_directories:
parameter_files = glob.glob(os.path.join(output_dir, "*.rpar")) + glob.glob(os.path.join(output_dir, "*.par"))
if len(parameter_files) > 0: # if there is an rpar file
parameter_file = parameter_files[0]
parfile_name = parameter_file.split('/')[-1]
parfile_name_base = parfile_name[:parfile_name.rfind('.')]
return parfile_name_base
# if we never found a parameter file
if rpar_file is None and par_file is None:
warnings.warn("Unable to locate a .rpar or .par file in this simulation directory.")
return None, None

warnings.warn(
"Unable to find a parameter file. Cannot determine parameter file name. Will assume same as simulation name.")
return _simulation_name(raw_directory)
created_par = False

# store the rpar file if it exists and create parfile
if rpar_file is not None:
rpar_name = rpar_file.split('/')[-1]
rpar_name_base = rpar_name[:rpar_name.rfind('.')]
parfile_name = rpar_name_base
with open(rpar_file, 'r') as f:
content = f.read()
parfile_dict['rpar_content'] = content
if par_file is None:
temporary_rpar = rpar_file[:rpar_file.rfind('/') + 1] + "temp.rpar"
par_file = rpar_file[:rpar_file.rfind('/') + 1] + "temp.par"
with open(temporary_rpar, 'w') as f:
f.write(parfile_dict['rpar_content'])
os.system(f'chmod +x {temporary_rpar}')
os.system('%s' % temporary_rpar)
created_par = True
os.remove(temporary_rpar)

# store the par file
if parfile_name is None:
par_name = par_file.split('/')[-1]
parfile_name = par_name[:par_name.rfind('.')]
if os.path.exists(par_file):
with open(par_file, 'r') as f:
content = f.read()
parfile_dict['par_content'] = content

if created_par and os.path.exists(par_file):
os.remove(par_file)

return parfile_name, parfile_dict


def _ordered_output_directories(raw_directory: str) -> tuple:
Expand Down Expand Up @@ -1102,7 +1152,7 @@ def _ordered_output_directories(raw_directory: str) -> tuple:
return output_directories, prestitched


def _ordered_data_directories(raw_directory: str, parameter_file_name_base: str) -> list:
def _ordered_data_directories(raw_directory: str, parameter_file: str, parameter_file_name_base: str) -> list:
"""The directories containing the simulation data files, ordered.
Within each output directory is a data directory. Returns an ordered list of all the data directories. If the data
Expand All @@ -1117,85 +1167,54 @@ def _ordered_data_directories(raw_directory: str, parameter_file_name_base: str)
"""
output_directories, prestitched = _ordered_output_directories(raw_directory)

simulation_name = raw_directory.split('/')[-1]

if prestitched:
# pre-stitched data
data_directories = output_directories
else:
data_directories = [os.path.join(output_directory, parameter_file_name_base) for output_directory in
if parameter_file is None:
return None
data_dir_name = ""

result = re.search('IO::out_dir\s*=\s*(\S*)\s*\n', parameter_file)
try:
data_dir_name = result.group(1)
data_dir_name = data_dir_name.strip('"\'')
except:
warnings.warn("Can't find name of the data directory, assuming it is an empty string")
if data_dir_name == '\$parfile' or data_dir_name == '$parfile':
data_dir_name = parameter_file_name_base
if data_dir_name == '@SIMULATION_NAME@':
data_dir_name = simulation_name

data_directories = [os.path.join(output_directory, data_dir_name) for output_directory in
output_directories]
return data_directories


def _store_parameter_file(raw_directory: str, h5_file: h5py.File):
def _store_parameter_file(parfile_dict: dict, h5_file: h5py.File):
"""Store the parameter file in the h5 file
Search for a .rpar file (or a .par file if there is no .rpar file) and store it
Store .rpar and .par file information
Args:
raw_directory (str): the directory that contains all simulation data
parfile_dict (dict): dictionary containing the rpar and par content
h5_file (h5py.file): the h5 file to store the parameter file in
"""
# check if SIMFACTORY directory exists
rpar_file = None
par_file = None
if os.path.isdir(os.path.join(raw_directory, "SIMFACTORY/par")):
files_with_rpar = glob.glob(os.path.join(raw_directory, "SIMFACTORY/par/*.rpar"))
if len(files_with_rpar) > 0: # if there is an rpar file
rpar_file = files_with_rpar[0]
files_with_par = glob.glob(os.path.join(raw_directory, "SIMFACTORY/par/*.par"))
if len(files_with_par) > 0: # if there is a par file
par_file = files_with_par[0]

# if we haven't found a par file or a rpar file
if rpar_file is None and par_file is None:
output_directories, _ = _ordered_output_directories(raw_directory)
output_directories.reverse() # most recent output directory is first now

for output_dir in output_directories:
files_with_rpar = glob.glob(os.path.join(output_dir, "*.rpar"))
if len(files_with_rpar) > 0: # if there is a rpar file
rpar_file = files_with_rpar[0]
files_with_par = glob.glob(os.path.join(output_dir, "*.par"))
if len(files_with_par) > 0: # if there is a par file
par_file = files_with_par[0]

if rpar_file is not None or par_file is not None:
break

# if we never found a parameter file
if rpar_file is None and par_file is None:
warnings.warn("Unable to locate a .rpar or .par file in this simulation directory.")
if parfile_dict is None or ("par_content" not in parfile_dict and "rpar_content" not in parfile_dict):
return

parfile_group = h5_file.create_group('parfile')
created_par = False

# store the rpar file if it exists and create parfile
if rpar_file is not None:
with open(rpar_file, 'r') as f:
content = f.read()
parfile_group.attrs['rpar_content'] = content
if par_file is None:
temporary_rpar = rpar_file[:rpar_file.rfind('/') + 1] + "temp.rpar"
par_file = rpar_file[:rpar_file.rfind('/') + 1] + "temp.par"
with open(temporary_rpar, 'w') as f:
f.write(parfile_group.attrs['rpar_content'])
os.system(f'chmod +x {temporary_rpar}')
os.system('%s' % temporary_rpar)
created_par = True
os.remove(temporary_rpar)

# store the par file
with open(par_file, 'r') as f:
content = f.read()
parfile_group.attrs['par_content'] = content
if "par_content" in parfile_dict:
parfile_group.attrs['par_content'] = parfile_dict['par_content']

if created_par:
os.remove(par_file)
if "rpar_content" in parfile_dict:
parfile_group.attrs['rpar_content'] = parfile_dict['rpar_content']


def _all_relevant_data_filepaths(raw_directory: str, parameter_file_name_base: str) -> dict:
def _all_relevant_data_filepaths(raw_directory: str, parameter_file: str, parameter_file_name_base: str) -> dict:
"""Dictionary of all relevant data files.
The dictionary points from a data type to a filename which in turn points to a list of filepaths with that filename.
Expand All @@ -1208,7 +1227,7 @@ def _all_relevant_data_filepaths(raw_directory: str, parameter_file_name_base: s
dict: dictionary containing prefix -> filename -> list of filepaths to relevant files
"""
data_directories = _ordered_data_directories(raw_directory, parameter_file_name_base=parameter_file_name_base)
data_directories = _ordered_data_directories(raw_directory, parameter_file=parameter_file, parameter_file_name_base=parameter_file_name_base)
relevant_data_filepaths = {"compact_object": {}, "radiative": {}, "misc": {}}

# go through all output directories
Expand Down Expand Up @@ -1972,12 +1991,7 @@ def create_h5_from_simulation(raw_directory: str, output_directory: str, catalog
return

simulation_name = _simulation_name(raw_directory)
parameter_file_name_base = _parameter_file_name_base(raw_directory)

# get all relevant filepaths
relevant_data_filepaths = _all_relevant_data_filepaths(raw_directory,
parameter_file_name_base=parameter_file_name_base)
relevant_output_filepaths = _all_relevant_output_filepaths(raw_directory)
parameter_file_name_base, parameter_file_dict = _get_parameter_file_name_and_content(raw_directory)

if catalog_id is not None:
h5_filename = os.path.join(output_directory, catalog_id + ".h5")
Expand All @@ -1989,7 +2003,18 @@ def create_h5_from_simulation(raw_directory: str, output_directory: str, catalog

# store parameter file
print("storing parameter file")
_store_parameter_file(raw_directory, h5_file)
_store_parameter_file(parameter_file_dict, h5_file)

# get all relevant filepaths
if "parfile" in h5_file.keys():
if 'par_content'in h5_file["parfile"].attrs:
parameter_file = h5_file["parfile"].attrs["par_content"]
elif 'rpar_content'in h5_file["parfile"].attrs:
parameter_file = h5_file["parfile"].attrs["rpar_content"]
else:
parameter_file = None
relevant_data_filepaths = _all_relevant_data_filepaths(raw_directory, parameter_file, parameter_file_name_base)
relevant_output_filepaths = _all_relevant_output_filepaths(raw_directory)

# process radiative data
print("storing radiative information")
Expand Down Expand Up @@ -2029,9 +2054,13 @@ def get_stitched_data(raw_directory: str, filename: str) -> np.ndarray:
warnings.warn("That directory does not exist")
return None

parameter_file_name_base = _parameter_file_name_base(raw_directory)
parameter_file_name_base, parameter_file_dict = _get_parameter_file_name_and_content(raw_directory)

if parameter_file_dict is None or 'par_content' not in parameter_file_dict:
warnings.warn('Unable to determine file structure due to lack of parameter file')
return None

data_directories = _ordered_data_directories(raw_directory, parameter_file_name_base=parameter_file_name_base)
data_directories = _ordered_data_directories(raw_directory, parameter_file=parameter_file_dict['par_content'], parameter_file_name_base=parameter_file_name_base)
filepaths = []

# go through all output directories
Expand Down
Loading

0 comments on commit 2490150

Please sign in to comment.