diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d635a22b..37c21ab9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,6 +21,8 @@ jobs: - run: pip install . - run: pip uninstall -y montepy - run: pip install --user dist/*.whl + # run scripts + - run: change_to_ascii -h - run: pip uninstall -y montepy - run: pip install --user dist/*.tar.gz - run: pip install --user montepy[test] diff --git a/doc/source/api/montepy.input_parser.input_file.rst b/doc/source/api/montepy.input_parser.input_file.rst new file mode 100644 index 00000000..03cccada --- /dev/null +++ b/doc/source/api/montepy.input_parser.input_file.rst @@ -0,0 +1,9 @@ +montepy.input\_parser.input\_file module +======================================== + + +.. automodule:: montepy.input_parser.input_file + :members: + :undoc-members: + :show-inheritance: + :private-members: _convert_to_int, _convert_to_enum diff --git a/doc/source/api/montepy.input_parser.rst b/doc/source/api/montepy.input_parser.rst index 5ffd32e2..72bcd1b4 100644 --- a/doc/source/api/montepy.input_parser.rst +++ b/doc/source/api/montepy.input_parser.rst @@ -16,6 +16,7 @@ Submodules montepy.input_parser.block_type montepy.input_parser.cell_parser montepy.input_parser.data_parser + montepy.input_parser.input_file montepy.input_parser.input_reader montepy.input_parser.input_syntax_reader montepy.input_parser.mcnp_input diff --git a/doc/source/faq.rst b/doc/source/faq.rst new file mode 100644 index 00000000..b075d03b --- /dev/null +++ b/doc/source/faq.rst @@ -0,0 +1,70 @@ +Frequently Asked Questions +========================== + +Or more likely Frequent Error Debugging. + +Encoding Errors: UnicodeDecodeError +----------------------------------- + +If you received the error below while opening a file in MontePy, +there is like a non-ASCII character in your input file. +You can read more about :ref:`Character Encoding here `. + +To solve this problem you can: + +1. Try another encoding such as ``'utf8'`` or ``'cp1252'``. Pass it as an argument to :func:`~montepy.input_parser.input_reader.read_input`. +2. Remove all non-ASCII characters with :ref:`the change_to_ascii utility ` + +.. code-block:: python + + --------------------------------------------------------------------------- + UnicodeDecodeError Traceback (most recent call last) + Cell In[2], line 1 + ----> 1 problem = montepy.read_input("tests/inputs/bad_encoding.imcnp") + + File ~/dev/montepy/montepy/input_parser/input_reader.py:35, in read_input(input_file, mcnp_version, encoding) + 33 problem = mcnp_problem.MCNP_Problem(input_file) + 34 problem.mcnp_version = mcnp_version + ---> 35 problem.parse_input(encoding=encoding) + 36 return problem + + File ~/dev/montepy/montepy/mcnp_problem.py:262, in MCNP_Problem.parse_input(self, check_input, encoding) + 253 OBJ_MATCHER = { + 254 block_type.BlockType.CELL: (Cell, self._cells), + 255 block_type.BlockType.SURFACE: ( + (...) + 259 block_type.BlockType.DATA: (parse_data, self._data_inputs), + 260 } + 261 try: + --> 262 for i, input in enumerate( + 263 input_syntax_reader.read_input_syntax( + 264 self._input_file, self.mcnp_version, encoding=encoding + 265 ) + 266 ): + 267 self._original_inputs.append(input) + 268 if i == 0 and isinstance(input, mcnp_input.Message): + + File ~/dev/montepy/montepy/input_parser/input_syntax_reader.py:48, in read_input_syntax(input_file, mcnp_version, encoding) + 46 reading_queue = deque() + 47 with input_file.open("r", encoding=encoding) as fh: + ---> 48 yield from read_front_matters(fh, mcnp_version) + 49 yield from read_data(fh, mcnp_version) + + File ~/dev/montepy/montepy/input_parser/input_syntax_reader.py:79, in read_front_matters(fh, mcnp_version) + 77 lines = [] + 78 raw_lines = [] + ---> 79 for i, line in enumerate(fh): + 80 if i == 0 and line.upper().startswith("MESSAGE:"): + 81 is_in_message_block = True + + File ~/dev/montepy/montepy/input_parser/input_file.py:95, in MCNP_InputFile.__iter__(self) + 94 def __iter__(self): + ---> 95 for lineno, line in enumerate(self._fh): + 96 self._lineno = lineno + 1 + 97 yield line + + File ~/mambaforge/lib/python3.10/encodings/ascii.py:26, in IncrementalDecoder.decode(self, input, final) + 25 def decode(self, input, final=False): + ---> 26 return codecs.ascii_decode(input, self.errors)[0] + + UnicodeDecodeError: 'ascii' codec can't decode byte 0xff in position 159: ordinal not in range(128) diff --git a/doc/source/index.rst b/doc/source/index.rst index be171a76..9998c239 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -12,8 +12,12 @@ Welcome to MontePy's documentation! starting + utilities + tricks + faq + developing api/modules diff --git a/doc/source/utilities.rst b/doc/source/utilities.rst new file mode 100644 index 00000000..4d42e3ad --- /dev/null +++ b/doc/source/utilities.rst @@ -0,0 +1,115 @@ +Utility Scripts +=============== + +Package Level Execution Options +------------------------------- +.. code-block:: console + + usage: montepy [-h] [-c [input_file ...]] + + Tool for editing and working with MCNP input files. + + options: + -h, --help show this help message and exit + -c [input_file ...], --check [input_file ...] + Check the given input file(s) for errors. Accepts globs, and multiple arguments. + +Checking Input Files for Errors +------------------------------- +MontePy can be used to check for errors that it will check for. +MontePy will check for: + +* general syntax errors +* syntax errors for all MCNP Objects supported (e.g., cells, surfaces, materials, etc.) +* Bad references to other object when the object referring to another object is supported. +* Bad mode options + +It will print all errors it found in the input to the terminal. + +To use this run: + +.. code-block:: console + + python -m montepy -c [files] + +.. _convert_ascii: + +Converting Encoding to ASCII +---------------------------- + +.. _ascii_command: + +Command Line Options +++++++++++++++++++++ +.. code-block:: console + + usage: Change_to_ascii [-h] [-d | -w] in_file out_file + + Change the encoding of a file to strict ASCII. Everything not compliant will be removed. + + positional arguments: + in_file The input file to convert + out_file The input file to convert + + options: + -h, --help show this help message and exit + -d, --delete Delete any non-ascii characters. This is the default. + -w, --whitespace Replace non-ascii characters with a space. + + +.. _encoding_background: + +Background +++++++++++ +`Character encoding `_ is the process of representing all characters as numbers, +so they may be used by a computer. +It is the bane of almost all programmers. + +The `American Standard Code for Information Interchange (ASCII) `_ is one of the oldest, +and simplest encoding standards. +It uses one byte per character, +and only goes from 0 – 127. +This has some issues, being very American-centric, +and also only allowing 128 characters, +52 of them being the English alphabet. +One solution to this was `"Extended ASCII" `_, +which used the final bit, and allowed the encoding system +to include 0 – 255. +There isn't one "Extended ASCII", +but one of the most popular encodings is Windows CP-1252. +This isn't great. + +The most commonly used encoding now is `UTF-8 `_, or "unicode". +UTF-8 can support almost any printable character in any language, including emojis. +The complexity is that each character is a variable-length of bytes. +This means that older software, like fortran, may get confused by it. + +As far as I can tell MCNP does not document what encoding it uses. +ASCII is the most conservative bet, +so MontePy by default tries to read input files in strict ASCII. + +Dealing with Encoding Issues +++++++++++++++++++++++++++++ + +You are likely here because you got an error message something like this: + +>>> montepy.read_input("example.imcnp") +UnicodeDecodeError Traceback (most recent call last) + +UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 1132: ordinal not in range(128) + +You can either change the encoding used by :func:`~montepy.input_parser.input_reader.read_input`, +or just force the entire file to be strictly ASCII. + +MontePY offers the ``change_to_ascii`` script. +The options are listed above: :ref:`ascii_command`. +For any non-ASCII character it will either remove +the character or replace it with a space (``' '``). +It defaults to deleting. +To replace it with a space instead use ``-w``. +Otherwise the arguments are the input file to correct, +and the path to write the output file to. + +.. code-block:: console + + change_to_ascii -w unicode_input.imcnp ascii_input.imcnp diff --git a/montepy/__init__.py b/montepy/__init__.py index 52702af7..7f93effd 100644 --- a/montepy/__init__.py +++ b/montepy/__init__.py @@ -23,7 +23,7 @@ from montepy.universe import Universe import sys -__version__ = "0.2.6dev1" +__version__ = "0.3.0dev2" # enable deprecated warnings for users if not sys.warnoptions: diff --git a/montepy/_scripts/__init__.py b/montepy/_scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/montepy/_scripts/change_to_ascii.py b/montepy/_scripts/change_to_ascii.py new file mode 100644 index 00000000..0f056d7e --- /dev/null +++ b/montepy/_scripts/change_to_ascii.py @@ -0,0 +1,87 @@ +import argparse +import sys + + +def define_args(args): + """ + Parses the arguments from the command line. + + :param args: the arguments from the command line. + :type args: list + :returns: the parsed arguments (with argparse) + :rtype: argparse.Namespace + """ + parser = argparse.ArgumentParser( + prog="Change_to_ascii", + description="Change the encoding of a file to strict ASCII. Everything not compliant will be removed.", + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-d", + "--delete", + dest="delete", + action="store_true", + help="Delete any non-ascii characters. This is the default.", + ) + group.add_argument( + "-w", + "--whitespace", + dest="whitespace", + action="store_true", + help="Replace non-ascii characters with a space.", + ) + parser.add_argument("in_file", nargs=1, help="The input file to convert") + parser.add_argument("out_file", nargs=1, help="The input file to convert") + args = parser.parse_args(args) + return args + + +def strip_characters(args): + """ + Strips non-ascii characters from the input file, and writes out the output file. + + :param args: the parsed command line arguments. + :type args: argparse.Namespace + """ + if args.whitespace: + replacer = " " + elif args.delete: + replacer = "" + # default to delete + else: + replacer = "" + with open(args.in_file[0], "rb") as in_fh, open(args.out_file[0], "wb") as out_fh: + for line in in_fh: + utf8_line = line.decode(encoding="utf8", errors="replace") + utf8_line = utf8_line.replace("�", replacer) + + try: + out_fh.write(utf8_line.encode(encoding="ascii", errors="strict")) + except UnicodeError as e: + new_line = [] + # find the bad characters character by character + for char in utf8_line: + if ord(char) > 128: + new_line.append(replacer) + else: + new_line.append(char) + out_fh.write( + "".join(new_line).encode(encoding="ascii", errors="strict") + ) + + +def main(args=None): + """ + Main runner function. + + :param args: The arguments passed from the command line. + :type args: list + """ + if args is None: + args = sys.argv[1:] + args = define_args(args) + strip_characters(args) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/montepy/constants.py b/montepy/constants.py index 60bac6ec..3bf4d8b3 100644 --- a/montepy/constants.py +++ b/montepy/constants.py @@ -40,6 +40,13 @@ How many spaces a tab is expand to. """ +ASCII_CEILING = 127 +""" +The maximum allowed code point allowed by ASCII. + +Source: `Wikipedia `_ +""" + def get_max_line_length(mcnp_version=DEFAULT_VERSION): """ diff --git a/montepy/input_parser/input_file.py b/montepy/input_parser/input_file.py index c69bf8af..0ef736ce 100644 --- a/montepy/input_parser/input_file.py +++ b/montepy/input_parser/input_file.py @@ -1,5 +1,6 @@ # Copyright 2024, Battelle Energy Alliance, LLC All Rights Reserved. import itertools as it +from montepy.constants import ASCII_CEILING from montepy.utilities import * @@ -20,6 +21,8 @@ def __init__(self, path, parent_file=None): self._path = path self._parent_file = parent_file self._lineno = 1 + self._replace_with_space = False + self._mode = None self._fh = None @make_prop_pointer("_path") @@ -57,7 +60,7 @@ def lineno(self): """ pass - def open(self, mode): + def open(self, mode, encoding="ascii", replace=True): """ Opens the underlying file, and returns self. @@ -65,11 +68,29 @@ def open(self, mode): For this reason, a ``close`` functional is intentionally not provided. + + .. Note:: + For different encoding schemes see the available list + `here `_. + + CP1252 is commonly referred to as "extended-ASCII". + You may have success with this encoding for working with special characters. + :param mode: the mode to open the file in :type mode: str + :param encoding: The encoding scheme to use. If replace is true, this is ignored, and changed to ASCII + :type encoding: str + :param replace: replace all non-ASCII characters with a space (0x20) + :type replace: bool :returns: self """ - self._fh = open(self.path, mode, encoding="ascii") + if "r" in mode: + if replace: + self._replace_with_space = True + mode = "rb" + encoding = None + self._mode = mode + self._fh = open(self.path, mode, encoding=encoding) return self def __enter__(self): @@ -84,12 +105,23 @@ def __exit__(self, exc_type, exc_val, exc_tb): def __iter__(self): for lineno, line in enumerate(self._fh): self._lineno = lineno + 1 + if self._mode == "rb" and self._replace_with_space: + line = self._clean_line(line) yield line + @staticmethod + def _clean_line(line): + new_line = bytes([code if code < ASCII_CEILING else ord(" ") for code in line]) + line = new_line.decode("ascii") + line = line.replace("\r\n", "\n").replace("\r", "\n") + return line + def read(self, size=-1): """ """ if self._fh: ret = self._fh.read(size) + if self._mode == "rb" and self._replace_with_space: + ret = self._clean_line(ret) self._lineno += ret.count("\n") return ret @@ -97,6 +129,8 @@ def readline(self, size=-1): """ """ if self._fh: ret = self._fh.readline(size) + if self._mode == "rb" and self._replace_with_space: + ret = self._clean_line(ret) self._lineno += ret.count("\n") return ret diff --git a/montepy/input_parser/input_reader.py b/montepy/input_parser/input_reader.py index 3d32245b..400d5e1f 100644 --- a/montepy/input_parser/input_reader.py +++ b/montepy/input_parser/input_reader.py @@ -3,17 +3,20 @@ from montepy.constants import DEFAULT_VERSION -def read_input(input_file, mcnp_version=DEFAULT_VERSION): +def read_input(input_file, mcnp_version=DEFAULT_VERSION, replace=True): """ Reads the specified MCNP Input file. The MCNP version must be a three component tuple e.g., (6, 2, 0) and (5, 1, 60). + :param input_file: the path to the input file to read. :type input_file: str :param mcnp_version: The version of MCNP that the input is intended for. :type mcnp_version: tuple :returns: The MCNP_Problem instance representing this file. + :param replace: replace all non-ASCII characters with a space (0x20) + :type replace: bool :rtype: MCNP_Problem :raises UnsupportedFeature: If an input format is used that MontePy does not support. :raises MalformedInputError: If an input has a broken syntax. @@ -23,5 +26,5 @@ def read_input(input_file, mcnp_version=DEFAULT_VERSION): """ problem = mcnp_problem.MCNP_Problem(input_file) problem.mcnp_version = mcnp_version - problem.parse_input() + problem.parse_input(replace=replace) return problem diff --git a/montepy/input_parser/input_syntax_reader.py b/montepy/input_parser/input_syntax_reader.py index 540164a0..e6cf1b9e 100644 --- a/montepy/input_parser/input_syntax_reader.py +++ b/montepy/input_parser/input_syntax_reader.py @@ -15,7 +15,7 @@ reading_queue = [] -def read_input_syntax(input_file, mcnp_version=DEFAULT_VERSION): +def read_input_syntax(input_file, mcnp_version=DEFAULT_VERSION, replace=True): """ Creates a generator function to return a new MCNP input for every new one that is encountered. @@ -30,12 +30,14 @@ def read_input_syntax(input_file, mcnp_version=DEFAULT_VERSION): :type input_file: MCNP_InputFile :param mcnp_version: The version of MCNP that the input is intended for. :type mcnp_version: tuple + :param replace: replace all non-ASCII characters with a space (0x20) + :type replace: bool :returns: a generator of MCNP_Object objects :rtype: generator """ global reading_queue reading_queue = deque() - with input_file.open("r") as fh: + with input_file.open("r", replace=replace) as fh: yield from read_front_matters(fh, mcnp_version) yield from read_data(fh, mcnp_version) @@ -186,7 +188,7 @@ def flush_input(): ): yield from flush_input() # die if it is a vertical syntax format - if "#" in line[0:BLANK_SPACE_CONTINUE]: + if "#" in line[0:BLANK_SPACE_CONTINUE] and not line_is_comment: raise errors.UnsupportedFeature("Vertical Input format is not allowed") # cut line down to allowed length old_line = line diff --git a/montepy/mcnp_problem.py b/montepy/mcnp_problem.py index 7e4d8f3e..16b8426e 100644 --- a/montepy/mcnp_problem.py +++ b/montepy/mcnp_problem.py @@ -232,12 +232,14 @@ def transforms(self): """ return self._transforms - def parse_input(self, check_input=False): + def parse_input(self, check_input=False, replace=True): """ Semantically parses the MCNP file provided to the constructor. :param check_input: If true, will try to find all errors with input and collect them as warnings to log. :type check_input: bool + :param replace: replace all non-ASCII characters with a space (0x20) + :type replace: bool """ trailing_comment = None last_obj = None @@ -252,7 +254,7 @@ def parse_input(self, check_input=False): try: for i, input in enumerate( input_syntax_reader.read_input_syntax( - self._input_file, self.mcnp_version + self._input_file, self.mcnp_version, replace=replace ) ): self._original_inputs.append(input) diff --git a/pyproject.toml b/pyproject.toml index 6dd48dad..122589bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,9 @@ Repository = "https://github.com/idaholab/montepy.git" Documentation = "https://idaholab.github.io/MontePy/index.html" "Bug Tracker" = "https://github.com/idaholab/MontePy/issues" +[project.scripts] +"change_to_ascii" = "montepy._scripts.change_to_ascii:main" + [build-system] requires = ["setuptools >= 61.0.0"] build-backend = "setuptools.build_meta" diff --git a/tests/constants.py b/tests/constants.py index 27875fa8..7398f7e4 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -21,4 +21,11 @@ "testReadRec2.imcnp", "testReadRec3.imcnp", "testReadTarget.imcnp", + "bad_encoding.imcnp", + "unicode.imcnp", +} + +BAD_ENCODING_FILES = { + "bad_encoding.imcnp", + "unicode.imcnp", } diff --git a/tests/inputs/bad_encoding.imcnp b/tests/inputs/bad_encoding.imcnp new file mode 100644 index 00000000..7094afc7 --- /dev/null +++ b/tests/inputs/bad_encoding.imcnp @@ -0,0 +1,52 @@ +MESSAGE: this is a message +it should show up at the beginning +foo +Note: this file in encoded in "extended ascii": "CP-1252" + +MCNP Test Model for MOAA +C cells +c +1 1 20 + -1000 $ dollar comment + imp:n,p=1 U=350 trcl=5 +2 2 8 + -1005 + imp:n=1 + imp:p=0.5 +3 3 -1 + 1000 1005 -1010 + imp:n,p=1 +99 0 + 1010 + imp:n,p=0 +5 0 + #99 + imp:n,p=3 fill=350 (1 0 0 ) +c foo end comment + +C surfaces +1000 SO 1 +1005 RCC 0 1.5 -0.5 0 0 1 0.25 +1010 SO 3 + +C data +C materials +C UO2 5 atpt enriched +m1 92235.80c 5 & +92238.80c 95 +C Iron +m2 26054.80c 5.85 + 26056.80c 91.75 + 26057.80c 2.12 + 26058.80c 0.28 +C water +C foo +m3 1001.80c 2 + 8016.80c 1 +MT3 lwtr.23t h-zr.20t h/zr.28t +C execution +ksrc 0 0 0 +kcode 100000 1.000 50 1050 +phys:p j 1 2j 1 +mode n p +vol NO 2J 1 1.5 J diff --git a/tests/inputs/test.imcnp b/tests/inputs/test.imcnp index 4d9b70e3..fd0d4cbe 100644 --- a/tests/inputs/test.imcnp +++ b/tests/inputs/test.imcnp @@ -4,6 +4,7 @@ foo MCNP Test Model for MOAA C cells +c # hidden vertical Do not touch c 1 1 20 -1000 $ dollar comment diff --git a/tests/inputs/unicode.imcnp b/tests/inputs/unicode.imcnp new file mode 100644 index 00000000..7b7f09ba --- /dev/null +++ b/tests/inputs/unicode.imcnp @@ -0,0 +1,52 @@ +MESSAGE: this is a message +it should show up at the beginning +foo + +MCNP Test Model for MOAA +C cells 🔴⚪🐍 MontePy is great +c +1 1 20 + -1000 $ dollar comment + imp:n,p=1 U=350 trcl=5 +2 2 8 + -1005 + imp:n=1 + imp:p=0.5 +3 3 -1 + 1000 1005 -1010 + imp:n,p=1 +99 0 + 1010 + imp:n,p=0 +5 0 + #99 + imp:n,p=3 fill=350 (1 0 0 ) +c foo end comment + +C surfaces +1000 SO 1 +1005 RCC 0 1.5 -0.5 0 0 1 0.25 +1010 SO 3 + +C data +C materials +C UO2 5 atpt enriched +m1 92235.80c 5 & +92238.80c 95 +C Iron +m2 26054.80c 5.85 + 26056.80c 91.75 + 26057.80c 2.12 + 26058.80c 0.28 +C water +C foo +m3 1001.80c 2 + 8016.80c 1 +MT3 lwtr.23t h-zr.20t h/zr.28t +C execution +ksrc 0 0 0 +kcode 100000 1.000 50 1050 +phys:p j 1 2j 1 +mode n p +vol NO 2J 1 1.5 J + diff --git a/tests/test_integration.py b/tests/test_integration.py index a43836b5..7da34589 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -342,6 +342,7 @@ def test_cell_card_pass_through(self): # test input pass-through answer = [ "C cells", + "c # hidden vertical Do not touch", "c", "1 1 20", " -1000 $ dollar comment", @@ -354,20 +355,20 @@ def test_cell_card_pass_through(self): cell = new_prob.cells[1] output = cell.format_for_mcnp_input((6, 2, 0)) print(output) - self.assertEqual(int(output[3].split("$")[0]), -5) + self.assertEqual(int(output[4].split("$")[0]), -5) # test mass density printer cell.mass_density = 10.0 with self.assertWarns(LineExpansionWarning): output = cell.format_for_mcnp_input((6, 2, 0)) print(output) - self.assertAlmostEqual(float(output[2].split()[2]), -10) + self.assertAlmostEqual(float(output[3].split()[2]), -10) # ensure that surface number updated # Test material number change new_prob = copy.deepcopy(problem) new_prob.materials[1].number = 5 cell = new_prob.cells[1] output = cell.format_for_mcnp_input((6, 2, 0)) - self.assertEqual(int(output[2].split()[1]), 5) + self.assertEqual(int(output[3].split()[1]), 5) def test_thermal_scattering_pass_through(self): problem = copy.deepcopy(self.simple_problem) @@ -1031,3 +1032,12 @@ def test_expansion_warning_crash(self): os.remove(out) except FileNotFoundError: pass + + def test_alternate_encoding(self): + with self.assertRaises(UnicodeDecodeError): + problem = montepy.read_input( + os.path.join("tests", "inputs", "bad_encoding.imcnp"), replace=False + ) + problem = montepy.read_input( + os.path.join("tests", "inputs", "bad_encoding.imcnp"), replace=True + ) diff --git a/tests/test_scripts.py b/tests/test_scripts.py new file mode 100644 index 00000000..aa68de76 --- /dev/null +++ b/tests/test_scripts.py @@ -0,0 +1,88 @@ +import itertools +from unittest import TestCase +from tests import constants +import os +import subprocess + + +class TestChangeAsciiScript(TestCase): + @classmethod + def setUpClass(cls): + new_files = {} + for input_file in constants.BAD_ENCODING_FILES: + new_files[input_file] = {} + for flag in {"-w", "-d"}: + new_file = f"{input_file}{flag}.imcnp" + cls.run_script( + [flag, os.path.join("tests", "inputs", input_file), new_file] + ) + new_files[input_file][flag] = new_file + cls.files = new_files + + @classmethod + def tearDownClass(cls): + for group in cls.files.values(): + for file_name in group.values(): + try: + os.remove(file_name) + except FileNotFoundError: + pass + + @staticmethod + def run_script(args): + return subprocess.run( + ["python", os.path.join("montepy", "_scripts", "change_to_ascii.py")] + args + ) + + def test_delete_bad(self): + for in_file in self.files: + with open(os.path.join("tests", "inputs", in_file), "rb") as in_fh, open( + self.files[in_file]["-d"], "rb" + ) as out_fh: + for in_line, out_line in zip(in_fh, out_fh): + try: + in_line.decode("ascii") + self.assertEqual(in_line, out_line) + except UnicodeError: + new_line = [] + for char in in_line: + if char <= 128: + new_line.append(chr(char)) + self.assertEqual("".join(new_line), out_line.decode("ascii")) + + def test_whitespace_bad(self): + for in_file in self.files: + with open(os.path.join("tests", "inputs", in_file), "rb") as in_fh, open( + self.files[in_file]["-w"], "rb" + ) as out_fh: + for in_line, out_line in zip(in_fh, out_fh): + try: + in_line.decode("ascii") + self.assertEqual(in_line, out_line) + except UnicodeError: + new_line = [] + try: + # try to change to utf-8 + for char in in_line.decode(): + if ord(char) <= 128: + new_line.append(char) + else: + new_line.append(" ") + except UnicodeError: + # try to go bit by bit + for char in in_line: + if char <= 128: + new_line.append(chr(char)) + else: + new_line.append(" ") + self.assertEqual("".join(new_line), out_line.decode("ascii")) + + def test_bad_arguments(self): + ret_code = self.run_script( + [ + "-w", + "-d", + os.path.join("tests", "inputs", "bad_encode.imcnp", "foo.imcnp"), + ] + ) + self.assertNotEqual(ret_code.returncode, 0)