diff --git a/cliboa/scenario/transform/file.py b/cliboa/scenario/transform/file.py index af3d4e10..736c3094 100644 --- a/cliboa/scenario/transform/file.py +++ b/cliboa/scenario/transform/file.py @@ -422,6 +422,7 @@ def __init__(self): super().__init__() self._divide_rows = None self._header = False + self._suffix_pattern = ".%d" def divide_rows(self, divide_rows): self._divide_rows = divide_rows @@ -429,6 +430,9 @@ def divide_rows(self, divide_rows): def header(self, header): self._header = header + def suffix_pattern(self, suffix_pattern): + self._suffix_pattern = suffix_pattern + def execute(self, *args): valid = EssentialParameters( self.__class__.__name__, @@ -459,7 +463,7 @@ def execute(self, *args): self._header_row = i.readline() row = self._ifile_reader(file) - newfilename = px + nameonly + ".%s" + ext + newfilename = px + nameonly + self._suffix_pattern + ext if self._dest_dir: os.makedirs(self._dest_dir, exist_ok=True) @@ -469,7 +473,7 @@ def execute(self, *args): has_left = True index = 1 while has_left: - ofile_path = os.path.join(dest_dir, newfilename % str(index)) + ofile_path = os.path.join(dest_dir, newfilename % index) has_left = self._ofile_generator(ofile_path, row) index = index + 1 diff --git a/cliboa/test/scenario/transform/test_file.py b/cliboa/test/scenario/transform/test_file.py index b24574e5..2001fd82 100644 --- a/cliboa/test/scenario/transform/test_file.py +++ b/cliboa/test/scenario/transform/test_file.py @@ -653,6 +653,99 @@ def test_execute_ok_4(self): else: break + def test_execute_ok_5(self): + file1 = os.path.join(self._data_dir, "test.txt") + with open(file1, mode="w", encoding="utf-8") as f: + f.write("idx\n") + for i in range(100): + f.write("%s\n" % str(i)) + + instance = FileDivide() + Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__)) + Helper.set_property(instance, "src_dir", self._data_dir) + Helper.set_property(instance, "src_pattern", r"test\.txt") + Helper.set_property(instance, "dest_dir", self._out_dir) + Helper.set_property(instance, "divide_rows", 10) + Helper.set_property(instance, "header", True) + Helper.set_property(instance, "suffix_pattern", "_%d") + instance.execute() + + row_index = 0 + for i in range(1, 11): + file = os.path.join(self._out_dir, "test_%s.txt" % i) + assert os.path.exists(file) + with open(file, "r", encoding="utf-8", newline="") as f: + line = f.readline() + assert line == "idx\n" + while line: + line = f.readline() + if line: + assert str(row_index) == line.splitlines()[0] + row_index += 1 + + def test_execute_ok_6(self): + file1 = os.path.join(self._data_dir, "test.txt") + with open(file1, mode="w", encoding="utf-8") as f: + f.write("idx\n") + for i in range(100): + f.write("%s\n" % str(i)) + + instance = FileDivide() + Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__)) + Helper.set_property(instance, "src_dir", self._data_dir) + Helper.set_property(instance, "src_pattern", r"test\.txt") + Helper.set_property(instance, "dest_dir", self._out_dir) + Helper.set_property(instance, "divide_rows", 10) + Helper.set_property(instance, "header", True) + Helper.set_property(instance, "suffix_pattern", ".%02d") + instance.execute() + + row_index = 0 + for i in range(1, 11): + if i < 10: + file = os.path.join(self._out_dir, "test.0%s.txt" % i) + else: + file = os.path.join(self._out_dir, "test.%s.txt" % i) + assert os.path.exists(file) + with open(file, "r", encoding="utf-8", newline="") as f: + line = f.readline() + assert line == "idx\n" + while line: + line = f.readline() + if line: + assert str(row_index) == line.splitlines()[0] + row_index += 1 + + def test_execute_ok_7(self): + file1 = os.path.join(self._data_dir, "test.txt") + with open(file1, mode="w", encoding="utf-8") as f: + f.write("idx\n") + for i in range(100): + f.write("%s\n" % str(i)) + + instance = FileDivide() + Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__)) + Helper.set_property(instance, "src_dir", self._data_dir) + Helper.set_property(instance, "src_pattern", r"test\.txt") + Helper.set_property(instance, "dest_dir", self._out_dir) + Helper.set_property(instance, "divide_rows", 1) + Helper.set_property(instance, "header", True) + Helper.set_property(instance, "suffix_pattern", ".%1d") + instance.execute() + + row_index = 0 + for i in range(1, 101): + file = os.path.join(self._out_dir, "test.%s.txt" % i) + assert os.path.exists(file) + with open(file, "r", encoding="utf-8", newline="") as f: + line = f.readline() + assert line == "idx\n" + while line: + line = f.readline() + if line: + assert str(row_index) == line.splitlines()[0] + row_index += 1 + class TestFileRename(TestFileTransform): def test_execute_ok(self): diff --git a/docs/modules/file_divide.md b/docs/modules/file_divide.md index 46f3aafc..fb6d1879 100644 --- a/docs/modules/file_divide.md +++ b/docs/modules/file_divide.md @@ -6,17 +6,18 @@ Either way index number of divided count will be added for the suffix of the new Ex. foo.txt -> [ foo.1.txt, foo.2.txt, foo.3.txt ... ] # Parameters -|Parameters|Explanation|Required|Default|Remarks| -|----------|-----------|--------|-------|-------| -|src_dir|Path of the directory which target files are placed.|Yes|None|| -|src_pattern|Regex which is to find target files.|Yes|None|| -|dest_dir|Path of the directory which is for output files.|No|None|If this parameter is not set, the file is created in the same directory as the processing file. If a non-existent directory path is specified, the directory is automatically created.| -|divide_rows|Number of the rows of individual files after divided|Yes|None|| -|header|Whether if header is added to the divided files|No|False|If True, Original file's header will be added to the all divided files.| -|encoding|Character encoding|No|utf-8||| -|nonfile_error|Whether an error is thrown when files are not found in src_dir.|No|False|| - -# Examples +| Parameters | Explanation | Required | Default | Remarks | +|----------------|-----------------------------------------------------------------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| src_dir | Path of the directory which target files are placed. | Yes | None | | +| src_pattern | Regex which is to find target files. | Yes | None | | +| dest_dir | Path of the directory which is for output files. | No | None | If this parameter is not set, the file is created in the same directory as the processing file. If a non-existent directory path is specified, the directory is automatically created. | +| divide_rows | Number of the rows of individual files after divided. | Yes | None | | +| header | Whether if header is added to the divided files. | No | False | If True, Original file's header will be added to the all divided files. | +| encoding | Character encoding. | No | utf-8 | | +| nonfile_error | Whether an error is thrown when files are not found in src_dir. | No | False | | +| suffix_pattern | The pattern of symbols to use as a suffix when splitting files. | No | .%d | | + +# Example1 ``` scenario: - step: @@ -51,3 +52,40 @@ id, name id, name 5, five ``` + +# Example2 +``` +scenario: +- step: + class: FileDivide + arguments: + src_dir: /in + src_pattern: test\.csv + dest_dir: /out + divided_rows: 2 + header: True + suffix: _%02d + +Input: /in/test.csv +id, name +1, one +2, two +3, three +4, four +5, five + +Output: +/out/test_01.csv +id, name +1, one +2, two + +/out/test_02.csv +id, name +3, three +4, four + +/out/test_03.csv +id, name +5, five +```