Skip to content

Commit

Permalink
fs counter handler easier to maintain
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed Aug 11, 2024
1 parent f4bbfd6 commit bed8f98
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 237 deletions.
152 changes: 40 additions & 112 deletions oc_ocdm/counter_handler/filesystem_counter_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@ def _set_numbers(self, file_path: str, updates: Dict[int, int]) -> None:

# Ensure the lines list is long enough
while len(lines) < max_line_number + 1:
lines.append(" \n") # Default counter value
lines.append("\n") # Default counter value

# Apply updates
for line_number, new_value in updates.items():
lines[line_number-1] = str(new_value).rstrip() + " \n"
lines[line_number-1] = str(new_value).rstrip() + "\n"

# Write updated lines back to file
with open(file_path, 'w') as file:
Expand All @@ -139,7 +139,7 @@ def read_counter(self, entity_short_name: str, prov_short_name: str = "", identi
file_path: str = self._get_prov_path(entity_short_name, supplier_prefix)
else:
file_path: str = self._get_info_path(entity_short_name, supplier_prefix)
return self._read_number(file_path, identifier)[0]
return self._read_number(file_path, identifier)

def increment_counter(self, entity_short_name: str, prov_short_name: str = "", identifier: int = 1, supplier_prefix: str = "") -> int:
"""
Expand Down Expand Up @@ -181,54 +181,44 @@ def __initialize_file_if_not_existing(self, file_path: str):
os.makedirs(os.path.dirname(file_path))

if not os.path.isfile(file_path):
with open(file_path, 'wb') as file:
first_line: str = self._trailing_char * (self._initial_line_len - 1) + '\n'
file.write(first_line.encode('ascii'))
with open(file_path, 'w') as file:
file.write("\n")

def _read_number(self, file_path: str, line_number: int) -> Tuple[int, int]:
def _read_number(self, file_path: str, line_number: int) -> int:
if line_number <= 0:
raise ValueError("line_number must be a positive non-zero integer number!")

self.__initialize_file_if_not_existing(file_path)

cur_number: int = 0
cur_line_len: int = 0
try:
with open(file_path, 'rb') as file:
cur_line_len = self._get_line_len(file)
line_offset = (line_number - 1) * cur_line_len
file.seek(line_offset)
line = file.readline(cur_line_len).decode('ascii')
cur_number = int(line.rstrip(self._trailing_char + '\n'))
except ValueError:
with open(file_path, 'r') as file:
for i, line in enumerate(file, 1):
if i == line_number:
line = line.strip()
if line:
cur_number = int(line)
break
else:
print(file_path)
print(f"WARNING: Line {line_number} not found in file")
except ValueError as e:
print(f"ValueError: {e}")
cur_number = 0
except Exception as e:
print(e)

return cur_number, cur_line_len
print(f"Unexpected error: {e}")
return cur_number

def _add_number(self, file_path: str, line_number: int = 1) -> int:
if line_number <= 0:
raise ValueError("line_number must be a positive non-zero integer number!")

self.__initialize_file_if_not_existing(file_path)

cur_number, cur_line_len = self._read_number(file_path, line_number)
cur_number += 1

cur_number_len: int = len(str(cur_number)) + 1
if cur_number_len > cur_line_len:
self._increase_line_len(file_path, new_length=cur_number_len)
cur_line_len = cur_number_len

with open(file_path, 'r+b') as file:
line_offset: int = (line_number - 1) * cur_line_len
file.seek(line_offset)
line: str = str(cur_number).ljust(cur_line_len - 1, self._trailing_char) + '\n'
file.write(line.encode('ascii'))
file.seek(-cur_line_len, os.SEEK_CUR)
self._fix_previous_lines(file, cur_line_len)
return cur_number
current_value = self._read_number(file_path, line_number)
new_value = current_value + 1
self._set_number(new_value, file_path, line_number)
return new_value

def _set_number(self, new_value: int, file_path: str, line_number: int = 1) -> None:
if new_value < 0:
Expand All @@ -239,83 +229,21 @@ def _set_number(self, new_value: int, file_path: str, line_number: int = 1) -> N

self.__initialize_file_if_not_existing(file_path)

cur_line_len = self._read_number(file_path, line_number)[1]

cur_number_len: int = len(str(new_value)) + 1
if cur_number_len > cur_line_len:
self._increase_line_len(file_path, new_length=cur_number_len)
cur_line_len = cur_number_len

with open(file_path, 'r+b') as file:
line_offset: int = (line_number - 1) * cur_line_len
file.seek(line_offset)
line: str = str(new_value).ljust(cur_line_len - 1, self._trailing_char) + '\n'
file.write(line.encode('ascii'))
file.seek(-cur_line_len, os.SEEK_CUR)
self._fix_previous_lines(file, cur_line_len)

@staticmethod
def _get_line_len(file: BinaryIO) -> int:
cur_char: str = file.read(1).decode("ascii")
count: int = 1
while cur_char is not None and len(cur_char) == 1 and cur_char != "\0":
cur_char = file.read(1).decode("ascii")
count += 1
if cur_char == "\n":
break

# Undo I/O pointer updates
file.seek(0)

if cur_char is None:
raise EOFError("Reached end-of-file without encountering a line separator!")
elif cur_char == "\0":
raise ValueError("Encountered a NULL byte!")
else:
return count

def _increase_line_len(self, file_path: str, new_length: int = 0) -> None:
if new_length <= 0:
raise ValueError("new_length must be a positive non-zero integer number!")

with open(file_path, 'rb') as cur_file:
if self._get_line_len(cur_file) >= new_length:
raise ValueError("Current line length is greater than new_length!")

fh, abs_path = mkstemp()
with os.fdopen(fh, 'wb') as new_file:
with open(file_path, 'rt', encoding='ascii') as old_file:
for line in old_file:
number: str = line.rstrip(self._trailing_char + '\n')
new_line: str = str(number).ljust(new_length - 1, self._trailing_char) + '\n'
new_file.write(new_line.encode('ascii'))

# Copy the file permissions from the old file to the new file
copymode(file_path, abs_path)

# Replace original file
os.remove(file_path)
move(abs_path, file_path)

@staticmethod
def _is_a_valid_line(buf: bytes) -> bool:
string: str = buf.decode("ascii")
return (string[-1] == "\n") and ("\0" not in string[:-1])

def _fix_previous_lines(self, file: BinaryIO, line_len: int) -> None:
if line_len < self._initial_line_len:
raise ValueError("line_len should be at least %d!" % self._initial_line_len)

while file.tell() >= line_len:
file.seek(-line_len, os.SEEK_CUR)
buf: bytes = file.read(line_len)
if self._is_a_valid_line(buf) or len(buf) < line_len:
break
else:
file.seek(-line_len, os.SEEK_CUR)
fixed_line: str = (self._trailing_char * (line_len - 1)) + "\n"
file.write(fixed_line.encode("ascii"))
file.seek(-line_len, os.SEEK_CUR)
lines = []
with open(file_path, 'r') as file:
lines = file.readlines()

# Ensure the file has enough lines
while len(lines) < line_number:
lines.append("\n")

# Update the specific line
lines[line_number - 1] = f"{new_value}\n"

# Write back to the file
with open(file_path, 'w') as file:
file.writelines(lines)


def set_metadata_counter(self, new_value: int, entity_short_name: str, dataset_name: str) -> None:
"""
Expand Down Expand Up @@ -361,7 +289,7 @@ def read_metadata_counter(self, entity_short_name: str, dataset_name: str) -> in
raise ValueError("entity_short_name is not a known metadata short name!")

file_path: str = self._get_metadata_path(entity_short_name, dataset_name)
return self._read_number(file_path, 1)[0]
return self._read_number(file_path, 1)

def increment_metadata_counter(self, entity_short_name: str, dataset_name: str) -> int:
"""
Expand Down
4 changes: 2 additions & 2 deletions oc_ocdm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ def graph_validation(self, graph: Graph, closed: bool = False) -> Graph:
return valid_graph

@staticmethod
def import_entities_from_graph(g_set: GraphSet, results: List[Dict], resp_agent: str,
def import_entities_from_graph(g_set: GraphSet, results: List[Dict]|Graph, resp_agent: str,
enable_validation: bool = False, closed: bool = False) -> List[GraphEntity]:
graph = build_graph_from_results(results)
graph = build_graph_from_results(results) if isinstance(results, list) else results
if enable_validation:
reader = Reader()
graph = reader.graph_validation(graph, closed)
Expand Down
142 changes: 29 additions & 113 deletions oc_ocdm/test/counter_handler/test_filesystem_counter_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,141 +29,48 @@ def setUpClass(cls) -> None:
if not os.path.exists(os.path.dirname(cls.file_path)):
os.makedirs(os.path.dirname(cls.file_path))

if not os.path.isfile(cls.file_path):
with open(cls.file_path, 'wb') as file:
first_line = cls.counter_handler._trailing_char * (cls.counter_handler._initial_line_len - 1) + '\n'
file.write(first_line.encode('ascii'))

def setUp(self):
# Reset test file content:
with open(self.file_path, 'wb') as file:
first_line = self.counter_handler._trailing_char * (self.counter_handler._initial_line_len - 1) + '\n'
file.write(first_line.encode('ascii'))

def test_get_line_len(self):
with open(self.file_path, 'rb') as test_file:
line_len = self.counter_handler._get_line_len(test_file)
self.assertIsNotNone(line_len)
self.assertEqual(line_len, self.counter_handler._initial_line_len)
self.assertEqual(test_file.tell(), 0)

def test_increase_line_len(self):
increment = 1
result = self.counter_handler._increase_line_len(self.file_path,
self.counter_handler._initial_line_len + increment)
self.assertIsNone(result)
with open(self.file_path, 'rt', encoding='ascii') as test_file:
for line in test_file:
self.assertEqual(len(line), self.counter_handler._initial_line_len + increment)

self.assertRaises(ValueError, self.counter_handler._increase_line_len, self.file_path, -1)
self.assertRaises(ValueError, self.counter_handler._increase_line_len, self.file_path,
self.counter_handler._initial_line_len - 1)

def test_is_a_valid_line(self):
with self.subTest("line is 'abc \\n'"):
line = 'abc \n'.encode('ascii')
result = self.counter_handler._is_a_valid_line(line)
self.assertIsNotNone(result)
self.assertTrue(result)
with self.subTest("line is 'a\\0c \\n'"):
line = 'a\0c \n'.encode('ascii')
result = self.counter_handler._is_a_valid_line(line)
self.assertIsNotNone(result)
self.assertFalse(result)
with self.subTest("line is 'abc'"):
line = 'abc'.encode('ascii')
result = self.counter_handler._is_a_valid_line(line)
self.assertIsNotNone(result)
self.assertFalse(result)
with self.subTest("line is 'a\\0c'"):
line = 'a\\0c'.encode('ascii')
result = self.counter_handler._is_a_valid_line(line)
self.assertIsNotNone(result)
self.assertFalse(result)

def test_fix_previous_lines(self):
with open(self.file_path, 'wb') as test_file:
num_lines = 10
for i in range(0, num_lines):
line = '\0' * self.counter_handler._initial_line_len
test_file.write(line.encode('ascii'))
last_line = '1'.ljust(self.counter_handler._initial_line_len - 1, self.counter_handler._trailing_char) + '\n'
test_file.write(last_line.encode('ascii'))

with open(self.file_path, 'r+b') as test_file:
test_file.seek(self.counter_handler._initial_line_len * num_lines)
result = self.counter_handler._fix_previous_lines(test_file, self.counter_handler._initial_line_len)
self.assertIsNone(result)

with open(self.file_path, 'rt', encoding='ascii') as test_file:
count = 0
for line in test_file:
count += 1
if count >= num_lines:
break
self.assertTrue(self.counter_handler._is_a_valid_line(line.encode('ascii')))
with open(self.file_path, 'w') as file:
file.write("\n")

def test_set_number(self):
number = 18
with open(self.file_path, 'r+b') as test_file:
num_of_line = 35
test_file.seek(self.counter_handler._initial_line_len * (num_of_line - 1))
line = str(number).ljust(self.counter_handler._initial_line_len - 1, self.counter_handler._trailing_char) + '\n'
test_file.write(line.encode('ascii'))

new_number = 205
result = self.counter_handler._set_number(new_number, self.file_path, num_of_line)
self.assertIsNone(result)
with open(self.file_path, 'rt', encoding='ascii') as test_file:
count = 0
for line in test_file:
count += 1
if count >= num_of_line:
self.assertEqual(int(line), new_number)
break
self.assertTrue(self.counter_handler._is_a_valid_line(line.encode('ascii')))

num_of_line = 35
self.counter_handler._set_number(number, self.file_path, num_of_line)
read_number = self.counter_handler._read_number(self.file_path, num_of_line)
self.assertEqual(read_number, number)
self.assertRaises(ValueError, self.counter_handler._set_number, -1, self.file_path, 1)
self.assertRaises(ValueError, self.counter_handler._set_number, 1, self.file_path, -1)

def test_read_number(self):
number = 18
with open(self.file_path, 'r+b') as test_file:
num_of_line = 35
test_file.seek(self.counter_handler._initial_line_len * (num_of_line - 1))
line = str(number).ljust(self.counter_handler._initial_line_len - 1, self.counter_handler._trailing_char) + '\n'
test_file.write(line.encode('ascii'))

read_number, line_len = self.counter_handler._read_number(self.file_path, num_of_line)
self.assertIsNotNone(read_number)
self.assertIsNotNone(line_len)
num_of_line = 35
self.counter_handler._set_number(number, self.file_path, num_of_line)

read_number = self.counter_handler._read_number(self.file_path, num_of_line)
self.assertEqual(read_number, number)
self.assertEqual(line_len, self.counter_handler._initial_line_len)

self.assertRaises(ValueError, self.counter_handler._read_number, self.file_path, -1)

def test_add_number(self):
number = 18
with open(self.file_path, 'r+b') as test_file:
num_of_line = 35
test_file.seek(self.counter_handler._initial_line_len * (num_of_line - 1))
line = str(number).ljust(self.counter_handler._initial_line_len - 1, self.counter_handler._trailing_char) + '\n'
test_file.write(line.encode('ascii'))
num_of_line = 35
self.counter_handler._set_number(number, self.file_path, num_of_line)

read_number = self.counter_handler._add_number(self.file_path, num_of_line)
self.assertIsNotNone(read_number)
self.assertEqual(read_number, number + 1)
with open(self.file_path, 'rt', encoding='ascii') as test_file:
count = 0
for line in test_file:
count += 1
if count >= num_of_line:
break
self.assertTrue(self.counter_handler._is_a_valid_line(line.encode('ascii')))

self.assertRaises(ValueError, self.counter_handler._add_number, self.file_path, -1)

def test_set_counters_batch(self):
updates = {("br", "se"): {1: 10, 2: 20, 3: 30}}
self.counter_handler.set_counters_batch(updates, "")

for line_number, expected_value in updates[("br", "se")].items():
read_value = self.counter_handler.read_counter("br", "se", line_number)
self.assertEqual(read_value, expected_value)

def test_read_metadata_counter(self):
dataset_name: str = "http://dataset/"
self.assertRaises(ValueError, self.counter_handler.read_metadata_counter, "xyz", dataset_name)
Expand All @@ -174,6 +81,15 @@ def test_increment_metadata_counter(self):
self.assertRaises(ValueError, self.counter_handler.increment_metadata_counter, "xyz", dataset_name)
self.assertRaises(ValueError, self.counter_handler.increment_metadata_counter, "di", None)

def test_set_metadata_counter(self):
dataset_name = "http://dataset/"
entity_short_name = "di"
value = 42

self.counter_handler.set_metadata_counter(value, entity_short_name, dataset_name)
read_value = self.counter_handler.read_metadata_counter(entity_short_name, dataset_name)
self.assertEqual(read_value, value)


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit bed8f98

Please sign in to comment.