Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow changing suffix FileDivede Class #453

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cliboa/scenario/transform/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,13 +422,17 @@ def __init__(self):
super().__init__()
self._divide_rows = None
self._header = False
self._suffix_pattern = ".%d"

def divide_rows(self, divide_rows):
self._divide_rows = divide_rows

def header(self, header):
self._header = header

def suffix_pattern(self, suffix_pattern):
self._suffix_pattern = suffix_pattern

def execute(self, *args):
valid = EssentialParameters(
self.__class__.__name__,
Expand Down Expand Up @@ -459,7 +463,7 @@ def execute(self, *args):
self._header_row = i.readline()

row = self._ifile_reader(file)
newfilename = px + nameonly + ".%s" + ext
newfilename = px + nameonly + self._suffix_pattern + ext

if self._dest_dir:
os.makedirs(self._dest_dir, exist_ok=True)
Expand All @@ -469,7 +473,7 @@ def execute(self, *args):
has_left = True
index = 1
while has_left:
ofile_path = os.path.join(dest_dir, newfilename % str(index))
ofile_path = os.path.join(dest_dir, newfilename % index)
has_left = self._ofile_generator(ofile_path, row)
index = index + 1

Expand Down
93 changes: 93 additions & 0 deletions cliboa/test/scenario/transform/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,99 @@ def test_execute_ok_4(self):
else:
break

def test_execute_ok_5(self):
file1 = os.path.join(self._data_dir, "test.txt")
with open(file1, mode="w", encoding="utf-8") as f:
f.write("idx\n")
for i in range(100):
f.write("%s\n" % str(i))

instance = FileDivide()
Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
Helper.set_property(instance, "src_dir", self._data_dir)
Helper.set_property(instance, "src_pattern", r"test\.txt")
Helper.set_property(instance, "dest_dir", self._out_dir)
Helper.set_property(instance, "divide_rows", 10)
Helper.set_property(instance, "header", True)
Helper.set_property(instance, "suffix_pattern", "_%d")
instance.execute()

row_index = 0
for i in range(1, 11):
file = os.path.join(self._out_dir, "test_%s.txt" % i)
assert os.path.exists(file)
with open(file, "r", encoding="utf-8", newline="") as f:
line = f.readline()
assert line == "idx\n"
while line:
line = f.readline()
if line:
assert str(row_index) == line.splitlines()[0]
row_index += 1

def test_execute_ok_6(self):
file1 = os.path.join(self._data_dir, "test.txt")
with open(file1, mode="w", encoding="utf-8") as f:
f.write("idx\n")
for i in range(100):
f.write("%s\n" % str(i))

instance = FileDivide()
Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
Helper.set_property(instance, "src_dir", self._data_dir)
Helper.set_property(instance, "src_pattern", r"test\.txt")
Helper.set_property(instance, "dest_dir", self._out_dir)
Helper.set_property(instance, "divide_rows", 10)
Helper.set_property(instance, "header", True)
Helper.set_property(instance, "suffix_pattern", ".%02d")
instance.execute()

row_index = 0
for i in range(1, 11):
if i < 10:
file = os.path.join(self._out_dir, "test.0%s.txt" % i)
else:
file = os.path.join(self._out_dir, "test.%s.txt" % i)
assert os.path.exists(file)
with open(file, "r", encoding="utf-8", newline="") as f:
line = f.readline()
assert line == "idx\n"
while line:
line = f.readline()
if line:
assert str(row_index) == line.splitlines()[0]
row_index += 1

def test_execute_ok_7(self):
file1 = os.path.join(self._data_dir, "test.txt")
with open(file1, mode="w", encoding="utf-8") as f:
f.write("idx\n")
for i in range(100):
f.write("%s\n" % str(i))

instance = FileDivide()
Helper.set_property(instance, "logger", LisboaLog.get_logger(__name__))
Helper.set_property(instance, "src_dir", self._data_dir)
Helper.set_property(instance, "src_pattern", r"test\.txt")
Helper.set_property(instance, "dest_dir", self._out_dir)
Helper.set_property(instance, "divide_rows", 1)
Helper.set_property(instance, "header", True)
Helper.set_property(instance, "suffix_pattern", ".%1d")
instance.execute()

row_index = 0
for i in range(1, 101):
file = os.path.join(self._out_dir, "test.%s.txt" % i)
assert os.path.exists(file)
with open(file, "r", encoding="utf-8", newline="") as f:
line = f.readline()
assert line == "idx\n"
while line:
line = f.readline()
if line:
assert str(row_index) == line.splitlines()[0]
row_index += 1


class TestFileRename(TestFileTransform):
def test_execute_ok(self):
Expand Down
60 changes: 49 additions & 11 deletions docs/modules/file_divide.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ Either way index number of divided count will be added for the suffix of the new
Ex. foo.txt -> [ foo.1.txt, foo.2.txt, foo.3.txt ... ]

# Parameters
|Parameters|Explanation|Required|Default|Remarks|
|----------|-----------|--------|-------|-------|
|src_dir|Path of the directory which target files are placed.|Yes|None||
|src_pattern|Regex which is to find target files.|Yes|None||
|dest_dir|Path of the directory which is for output files.|No|None|If this parameter is not set, the file is created in the same directory as the processing file. If a non-existent directory path is specified, the directory is automatically created.|
|divide_rows|Number of the rows of individual files after divided|Yes|None||
|header|Whether if header is added to the divided files|No|False|If True, Original file's header will be added to the all divided files.|
|encoding|Character encoding|No|utf-8|||
|nonfile_error|Whether an error is thrown when files are not found in src_dir.|No|False||

# Examples
| Parameters | Explanation | Required | Default | Remarks |
|----------------|-----------------------------------------------------------------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| src_dir | Path of the directory which target files are placed. | Yes | None | |
| src_pattern | Regex which is to find target files. | Yes | None | |
| dest_dir | Path of the directory which is for output files. | No | None | If this parameter is not set, the file is created in the same directory as the processing file. If a non-existent directory path is specified, the directory is automatically created. |
| divide_rows | Number of the rows of individual files after divided. | Yes | None | |
| header | Whether if header is added to the divided files. | No | False | If True, Original file's header will be added to the all divided files. |
| encoding | Character encoding. | No | utf-8 | |
| nonfile_error | Whether an error is thrown when files are not found in src_dir. | No | False | |
| suffix_pattern | The pattern of symbols to use as a suffix when splitting files. | No | .%d | |

# Example1
```
scenario:
- step:
Expand Down Expand Up @@ -51,3 +52,40 @@ id, name
id, name
5, five
```

# Example2
```
scenario:
- step:
class: FileDivide
arguments:
src_dir: /in
src_pattern: test\.csv
dest_dir: /out
divided_rows: 2
header: True
suffix: _%02d

Input: /in/test.csv
id, name
1, one
2, two
3, three
4, four
5, five

Output:
/out/test_01.csv
id, name
1, one
2, two

/out/test_02.csv
id, name
3, three
4, four

/out/test_03.csv
id, name
5, five
```
Loading