SMTG-Bham · Yaxuan-Lii · Jun 23, 2021 · Jun 29, 2021 · Jul 1, 2021 · Jul 1, 2021
diff --git a/galore/cross_sections.py b/galore/cross_sections.py
@@ -281,3 +281,261 @@ def _eval_fit(energy, coeffs):
                 orb: _eval_fit(energy, np_fromstr(coeffs))
                 for orb, coeffs in orbitals_fits}})
     return el_cross_sections
+
+
+
+import tarfile
+import numpy as np
+
+
+def read_csv_file(tar_file_name, file_path):
+    """
+    Generate data from csv archive without decompression
+
+    Args:
+        tar_file_name (str): path to tarfile of CSV data
+        file_path(str): path to individual CSV file within tarfile
+
+    Returns:
+        dict: containing 'headers', 'electron_counts' 
+        (lists of str and int respectively) and 'data_table', 
+        a 2-D nested list of floats. Missing data is represented as None.
+
+    """
+
+    # Open zipfile
+    with tarfile.open(tar_file_name) as tf:
+        with tf.extractfile(file_path) as hello:
+            # get data as string
+            data = hello.read().decode()
+    # string to list
+    data_string = data.split('\r\n')
+
+    # get number of colunm headers
+    colunm_headers = [colunm_header for colunm_header in data_string[0].split(
+        ',') if colunm_header != '']
+    lenth = len(colunm_headers)
+
+    # build main matrix
+    main_matrix = []
+    rows = range(len(data_string))
+    for row in rows:
+        data_each_row = data_string[row].split(',')[0:lenth]
+        main_matrix.append(data_each_row)
+
+    # build cross sections table
+    empty_value = main_matrix[-2]
+    # remove empty values
+    midterm = [row for row in main_matrix if row != empty_value]
+    data_table = midterm[1:-2]
+    electron_counts_list = midterm[-2]
+    electron_counts = [i for i in electron_counts_list if i != ''][1:]
+    # build result dict
+    result_dict = {}
+    result_dict['headers'] = colunm_headers
+    result_dict['electron_counts'] = electron_counts
+    result_dict['data_table'] = data_table
+
+    return result_dict
+
+
+def get_avg_orbital_cross_sections(subshells_cross_sections, electrons_number):
+    """
+    Obtain average cross sections of subshell of each obital when process Scofield data.
+
+    Args:
+        subshell_cross_sections(np.array): subshells cross sections of a certain obital 
+            correspond to a given input energy.                    
+        electrons_number(np.array): corresponding electrons number of subshells
+
+        Note: subshells are like ('2p1/2', '2p3/2', '3p1/2', '3p3/2',...) for obital p
+
+    Returns:
+        avg_orbital_cross_sections(np.array): average cross sections of obitals 
+
+        Note: the above array are like ('2p', '3p',...) for obital p
+    """
+
+    subshells_cross_section_arrays = np.array_split(
+        subshells_cross_sections, 2)
+    electrons_numbers_arrays = np.array_split(electrons_number, 2)
+
+    subshells_cross_sections_sum = np.array(
+        list(map(sum, subshells_cross_section_arrays)))
+
+    subshells_electrons_number_sum = np.array(
+        list(map(sum, electrons_numbers_arrays)))
+
+    avg_orbital_cross_sections = np.true_divide(
+        subshells_cross_sections_sum, subshells_electrons_number_sum)
+
+    return avg_orbital_cross_sections
+
+
+def _cross_sections_from_csv_data(energy, data, reference):
+    """
+    Get cross sections from data dict 
+    For known sources, data is based on tabulation of Yeh/Lindau (1985).[1]
+    Otherwise, energies in keV from 1-1500 are used with log-log polynomial
+    parametrisation of data from Scofield.[2]
+
+    References:
+      1. Yeh, J.J. and Lindau, I. (1985)
+         Atomic Data and Nuclear Data Tables 32 pp 1-155
+      2. J. H. Scofield (1973) Lawrence Livermore National Laboratory
+         Report No. UCRL-51326
+
+    Args:
+        energy(float): energy value  
+        data(dict): data from read_csv_file()
+        reference(str): 'Scofield' or 'Yeh'
+
+    Returns:
+        orbitals_cross_sections_dict: containing orbitals 's', 'p', 'd', 'f' and 
+                                      maximum cross sections of each orbital. 
+                                      Missing data is represented as None.
+
+    """
+
+    # replace '' in data table with NaN
+    for row in range(len(data['data_table'])):
+        data['data_table'][row] = [
+            float('NaN') if x == '' else x for x in data['data_table'][row]]
+
+    # change the data_table and electron_counts to float arrays
+    data['data_table'] = np.array(data['data_table']).astype(float).T
+    data['electron_counts'] = np.array(data['electron_counts']).astype(float)
+
+    # Build a dict_for_calculation which keys are 'PhotonEnergy' and orbitals like '1s1/2', '2s1/2', '2p1/2', '2p3/2', '3s1/2', '3p1/2', '3p3/2'...
+    # and values are connected cross sections and number of electrons of each orbital
+    # This is for calculating the max cross sections of 's', 'p', 'd', 'f' orbitals
+    new_lenth = len(data['electron_counts'])
+    cross_sections_by_orbitals = data['data_table'][-new_lenth:]
+    orbital_headers = data['headers'][-new_lenth:]
+
+    cross_sections_and_electron_numbers = np.concatenate(
+        (cross_sections_by_orbitals.T, [data['electron_counts']]), axis=0).T
+    dict_for_calculation = dict(
+        zip(orbital_headers, cross_sections_and_electron_numbers))
+    energy_array = data['data_table'][0]
+    dict_for_calculation['PhotonEnergy'] = energy_array
+
+    # match the import energy
+    index = np.where(dict_for_calculation['PhotonEnergy'] == energy)[0]
+
+    # build result dict
+    orbitals_cross_sections_dict = {}
+
+    # result for s orbital
+    s_cross_sections = np.array([dict_for_calculation[key]
+                                for key in dict_for_calculation if 's' in key]).T[index]
+    electrons_number = np.array([dict_for_calculation[key]
+                                for key in dict_for_calculation if 's' in key]).T[-1]
+    # get unit cross sections
+    unit_cross_sections = np.true_divide(s_cross_sections, electrons_number)
+    # get max cross section of obital s
+    max_cross_section = np.max(np.nan_to_num(unit_cross_sections))
+    orbitals_cross_sections_dict['s'] = max_cross_section
+
+    # result for 'p', 'd', 'f' orbitals
+    orbitals = ['p', 'd', 'f']
+    for orbital in orbitals:
+        interm_matrix = np.array([dict_for_calculation[key]
+                                 for key in dict_for_calculation if orbital in key]).T
+
+        if np.shape(interm_matrix) != (0,):
+            if reference == 'Scofield':
+                subshells_cross_sections = interm_matrix[index][0]
+
+                electrons_number = interm_matrix[-1]
+
+                result = get_avg_orbital_cross_sections(
+                    subshells_cross_sections, electrons_number)
+
+                # get max cross section of this obital
+                max_cross_section = np.max(np.nan_to_num(result))
+                orbitals_cross_sections_dict[orbital] = max_cross_section
+
+            elif reference == 'Yeh':
+                obital_cross_sections = interm_matrix[index][0]
+                electrons_number = interm_matrix[-1]
+                unit_cross_sections = np.true_divide(
+                    obital_cross_sections, electrons_number)
+                # get max cross section of this obital
+                max_cross_section = np.max(np.nan_to_num(unit_cross_sections))
+                orbitals_cross_sections_dict[orbital] = max_cross_section
+
+    return orbitals_cross_sections_dict
+
+
+def get_metadata(energy, reference):
+    """
+    Args:
+        energy(float): energy value  
+        reference(str): 'Scofield' or 'Yeh'
+
+        Note: 1.'Scofield' for J. H. Scofield (1973)
+                Lawrence Livermore National Laboratory Report No. UCRL-51326              
+              2.'Yeh' for Yeh, J.J. and Lindau, I. (1985) 
+                Atomic Data and Nuclear Data Tables 32 pp 1-155   
+
+    Returns:
+        metadata_dict: containing the input energy value 
+                       and description of input reference
+
+    """
+
+    metadata_dict = {}
+    metadata_dict['energy'] = energy
+    if reference == 'Scofield':
+        metadata_dict['reference'] = 'J.H. Scofield, Theoretical photoionization cross sections from 1 to 1500 keV'
+        metadata_dict['link'] = 'https://doi.org/10.2172/4545040'
+    elif reference == 'Yeh':
+        metadata_dict['reference'] = 'Yeh, J.J. and Lindau, I. (1985) Atomic Data and Nuclear Data Tables 32 pp 1-155'
+        metadata_dict['link'] = 'https://doi.org/10.1016/0092-640X(85)90016-6'
+    else:
+        metadata_dict('Wrong reference')
+    return metadata_dict
+
+
+def get_cross_section_from_csv(elements, energy, reference):
+    """
+    Args:
+        elements(string list): element name list
+                               for Scofiled data such as ['Z__1_H_','Z_13_Al',....]
+                               for Yeh data such as ['1_H','13_Al',...]
+
+        energy(float): energy value  
+        reference(str): 'Scofield' or 'Yeh'
+
+        Note: 1.'Scofield' for J. H. Scofield (1973)
+                Lawrence Livermore National Laboratory Report No. UCRL-51326              
+              2.'Yeh' for Yeh, J.J. and Lindau, I. (1985) 
+                Atomic Data and Nuclear Data Tables 32 pp 1-155   
+
+    Returns:
+        result(dict): containing energy value, reference information, 
+                      and orbital cross sections dict of input elements
+
+    """
+
+    result = {}
+    metadata = get_metadata(energy, reference)
+    result.update(metadata)
+
+    for element in elements:
+
+        if reference == 'Scofield':
+            filename = 'Scofield_csv_database.tar.gz'
+            filepath = 'Scofield_csv_database/{element1}.csv'
+        else:
+            filename = 'Yeh_Lindau_1985_Xsection_CSV_Database.tar.gz'
+            filepath = 'Yeh_Lindau_1985_Xsection_CSV_Database/{element1}.csv'
+
+        filepath = filepath.format(element1=element)
+        data = read_csv_file(filename, filepath)
+
+        cross_sections = _cross_sections_from_csv_data(energy, data, reference)
+        result[element] = cross_sections
+
+    return result