Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import cross sections from csv #41

Draft
wants to merge 28 commits into
base: master
Choose a base branch
from
Draft
Changes from 19 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0a0270c
add test for process_pdos
Jun 23, 2021
d5c8552
fix the imports
Yaxuan-Lii Jun 29, 2021
553738f
removeed irrelevant files
Yaxuan-Lii Jul 1, 2021
5300d62
import is fixed
Yaxuan-Lii Jul 1, 2021
dab091b
try to remove irrelevant fils second time
Yaxuan-Lii Jul 1, 2021
d69d61b
remove irrelevant files second time
Yaxuan-Lii Jul 1, 2021
7984c15
delet irrelevant files
Yaxuan-Lii Jul 1, 2021
66b1c6f
remove irrelevant files
Yaxuan-Lii Jul 1, 2021
4fdafa7
delete .DS_Store
Yaxuan-Lii Jul 1, 2021
dca2d49
Modified test_process_pdos.py
Yaxuan-Lii Jul 2, 2021
eb079f6
add test.py
Yaxuan-Lii Jul 2, 2021
8190183
Restore some files that were accidentally deleted
ajjackson Jul 2, 2021
946efaa
use flake8 to optimise format
Yaxuan-Lii Jul 5, 2021
cd0f3c0
import cross-sections from CSV archives
Yaxuan-Lii Jul 15, 2021
8f48e5e
form modify
Yaxuan-Lii Jul 17, 2021
a959150
modify follow the comments
Yaxuan-Lii Jul 21, 2021
067c289
make corrections
Yaxuan-Lii Jul 21, 2021
a621ab0
make correction
Yaxuan-Lii Jul 21, 2021
13911d0
Revert changes to galore/__init__.py
ajjackson Jul 21, 2021
4fb993d
modified as comments
Yaxuan-Lii Aug 2, 2021
cd4dc0e
merge the change of galore/__init__.py
Yaxuan-Lii Aug 2, 2021
5c9c31c
add cli to install data and get cross sections
Yaxuan-Lii Aug 9, 2021
b138c4b
merge get_cross_sections_from_csv into get_cross_sections
Yaxuan-Lii Aug 10, 2021
bb8a526
modify and add new test
Yaxuan-Lii Aug 24, 2021
5cdb67b
add a IF statement
Yaxuan-Lii Aug 31, 2021
39651d5
modify as comments
Yaxuan-Lii Sep 9, 2021
a2441b5
modify as comments
Yaxuan-Lii Sep 9, 2021
f59b50a
mistakes fix
Yaxuan-Lii Sep 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 258 additions & 0 deletions galore/cross_sections.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,261 @@ def _eval_fit(energy, coeffs):
orb: _eval_fit(energy, np_fromstr(coeffs))
for orb, coeffs in orbitals_fits}})
return el_cross_sections



import tarfile
import numpy as np


def read_csv_file(tar_file_name, file_path):
"""
Generate data from csv archive without decompression

Args:
tar_file_name (str): path to tarfile of CSV data
file_path(str): path to individual CSV file within tarfile

Returns:
dict: containing 'headers', 'electron_counts'
(lists of str and int respectively) and 'data_table',
a 2-D nested list of floats. Missing data is represented as None.

"""

# Open zipfile
with tarfile.open(tar_file_name) as tf:
with tf.extractfile(file_path) as hello:
# get data as string
data = hello.read().decode()
# string to list
data_string = data.split('\r\n')

# get number of colunm headers
colunm_headers = [colunm_header for colunm_header in data_string[0].split(
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
',') if colunm_header != '']
lenth = len(colunm_headers)

# build main matrix
main_matrix = []
rows = range(len(data_string))
for row in rows:
data_each_row = data_string[row].split(',')[0:lenth]
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
main_matrix.append(data_each_row)

# build cross sections table
empty_value = main_matrix[-2]
# remove empty values
midterm = [row for row in main_matrix if row != empty_value]
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
data_table = midterm[1:-2]
electron_counts_list = midterm[-2]
electron_counts = [i for i in electron_counts_list if i != ''][1:]
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
# build result dict
result_dict = {}
result_dict['headers'] = colunm_headers
result_dict['electron_counts'] = electron_counts
result_dict['data_table'] = data_table
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

return result_dict


def get_avg_orbital_cross_sections(subshells_cross_sections, electrons_number):
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
"""
Obtain average cross sections of subshell of each obital when process Scofield data.

Args:
subshell_cross_sections(np.array): subshells cross sections of a certain obital
correspond to a given input energy.
electrons_number(np.array): corresponding electrons number of subshells

ajjackson marked this conversation as resolved.
Show resolved Hide resolved
Note: subshells are like ('2p1/2', '2p3/2', '3p1/2', '3p3/2',...) for obital p

Returns:
avg_orbital_cross_sections(np.array): average cross sections of obitals

Note: the above array are like ('2p', '3p',...) for obital p
"""

subshells_cross_section_arrays = np.array_split(
subshells_cross_sections, 2)
electrons_numbers_arrays = np.array_split(electrons_number, 2)

subshells_cross_sections_sum = np.array(
list(map(sum, subshells_cross_section_arrays)))

subshells_electrons_number_sum = np.array(
list(map(sum, electrons_numbers_arrays)))

avg_orbital_cross_sections = np.true_divide(
subshells_cross_sections_sum, subshells_electrons_number_sum)

return avg_orbital_cross_sections


def _cross_sections_from_csv_data(energy, data, reference):
"""
Get cross sections from data dict
For known sources, data is based on tabulation of Yeh/Lindau (1985).[1]
Otherwise, energies in keV from 1-1500 are used with log-log polynomial
parametrisation of data from Scofield.[2]

References:
1. Yeh, J.J. and Lindau, I. (1985)
Atomic Data and Nuclear Data Tables 32 pp 1-155
2. J. H. Scofield (1973) Lawrence Livermore National Laboratory
Report No. UCRL-51326

Args:
energy(float): energy value
data(dict): data from read_csv_file()
reference(str): 'Scofield' or 'Yeh'

Returns:
orbitals_cross_sections_dict: containing orbitals 's', 'p', 'd', 'f' and
maximum cross sections of each orbital.
Missing data is represented as None.

"""

# replace '' in data table with NaN
for row in range(len(data['data_table'])):
data['data_table'][row] = [
float('NaN') if x == '' else x for x in data['data_table'][row]]

# change the data_table and electron_counts to float arrays
data['data_table'] = np.array(data['data_table']).astype(float).T
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
data['electron_counts'] = np.array(data['electron_counts']).astype(float)

# Build a dict_for_calculation which keys are 'PhotonEnergy' and orbitals like '1s1/2', '2s1/2', '2p1/2', '2p3/2', '3s1/2', '3p1/2', '3p3/2'...
# and values are connected cross sections and number of electrons of each orbital
# This is for calculating the max cross sections of 's', 'p', 'd', 'f' orbitals
new_lenth = len(data['electron_counts'])
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
cross_sections_by_orbitals = data['data_table'][-new_lenth:]
orbital_headers = data['headers'][-new_lenth:]
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

cross_sections_and_electron_numbers = np.concatenate(
(cross_sections_by_orbitals.T, [data['electron_counts']]), axis=0).T
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
dict_for_calculation = dict(
zip(orbital_headers, cross_sections_and_electron_numbers))
energy_array = data['data_table'][0]
dict_for_calculation['PhotonEnergy'] = energy_array

# match the import energy
index = np.where(dict_for_calculation['PhotonEnergy'] == energy)[0]
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

# build result dict
orbitals_cross_sections_dict = {}

# result for s orbital
s_cross_sections = np.array([dict_for_calculation[key]
for key in dict_for_calculation if 's' in key]).T[index]
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
electrons_number = np.array([dict_for_calculation[key]
for key in dict_for_calculation if 's' in key]).T[-1]
# get unit cross sections
unit_cross_sections = np.true_divide(s_cross_sections, electrons_number)
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
# get max cross section of obital s
max_cross_section = np.max(np.nan_to_num(unit_cross_sections))
orbitals_cross_sections_dict['s'] = max_cross_section
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

# result for 'p', 'd', 'f' orbitals
orbitals = ['p', 'd', 'f']
for orbital in orbitals:
interm_matrix = np.array([dict_for_calculation[key]
for key in dict_for_calculation if orbital in key]).T

if np.shape(interm_matrix) != (0,):
if reference == 'Scofield':
subshells_cross_sections = interm_matrix[index][0]

electrons_number = interm_matrix[-1]

result = get_avg_orbital_cross_sections(
subshells_cross_sections, electrons_number)

# get max cross section of this obital
max_cross_section = np.max(np.nan_to_num(result))
orbitals_cross_sections_dict[orbital] = max_cross_section

elif reference == 'Yeh':
obital_cross_sections = interm_matrix[index][0]
electrons_number = interm_matrix[-1]
unit_cross_sections = np.true_divide(
obital_cross_sections, electrons_number)
# get max cross section of this obital
max_cross_section = np.max(np.nan_to_num(unit_cross_sections))
orbitals_cross_sections_dict[orbital] = max_cross_section
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

return orbitals_cross_sections_dict


def get_metadata(energy, reference):
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
"""
Args:
energy(float): energy value
reference(str): 'Scofield' or 'Yeh'

Note: 1.'Scofield' for J. H. Scofield (1973)
Lawrence Livermore National Laboratory Report No. UCRL-51326
2.'Yeh' for Yeh, J.J. and Lindau, I. (1985)
Atomic Data and Nuclear Data Tables 32 pp 1-155

Returns:
metadata_dict: containing the input energy value
and description of input reference

"""

metadata_dict = {}
metadata_dict['energy'] = energy
if reference == 'Scofield':
metadata_dict['reference'] = 'J.H. Scofield, Theoretical photoionization cross sections from 1 to 1500 keV'
metadata_dict['link'] = 'https://doi.org/10.2172/4545040'
elif reference == 'Yeh':
metadata_dict['reference'] = 'Yeh, J.J. and Lindau, I. (1985) Atomic Data and Nuclear Data Tables 32 pp 1-155'
metadata_dict['link'] = 'https://doi.org/10.1016/0092-640X(85)90016-6'
else:
metadata_dict('Wrong reference')
return metadata_dict


def get_cross_section_from_csv(elements, energy, reference):
"""
Args:
elements(string list): element name list
for Scofiled data such as ['Z__1_H_','Z_13_Al',....]
for Yeh data such as ['1_H','13_Al',...]

energy(float): energy value
reference(str): 'Scofield' or 'Yeh'

Note: 1.'Scofield' for J. H. Scofield (1973)
Lawrence Livermore National Laboratory Report No. UCRL-51326
2.'Yeh' for Yeh, J.J. and Lindau, I. (1985)
Atomic Data and Nuclear Data Tables 32 pp 1-155

Returns:
result(dict): containing energy value, reference information,
and orbital cross sections dict of input elements

"""

result = {}
metadata = get_metadata(energy, reference)
result.update(metadata)

for element in elements:

if reference == 'Scofield':
ajjackson marked this conversation as resolved.
Show resolved Hide resolved
filename = 'Scofield_csv_database.tar.gz'
filepath = 'Scofield_csv_database/{element1}.csv'
else:
filename = 'Yeh_Lindau_1985_Xsection_CSV_Database.tar.gz'
filepath = 'Yeh_Lindau_1985_Xsection_CSV_Database/{element1}.csv'

filepath = filepath.format(element1=element)
data = read_csv_file(filename, filepath)
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

cross_sections = _cross_sections_from_csv_data(energy, data, reference)
result[element] = cross_sections
ajjackson marked this conversation as resolved.
Show resolved Hide resolved

return result