From d870eb32305ff07aa789dd0dbb90c6c309a22c67 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Tue, 18 Feb 2020 19:39:12 +0100 Subject: [PATCH] Enforce utf8 when opening files in python scripts (#16376) (#16384) Since the migration to Python 3 (#14798) these scripts fail in environments not configured with unicode locales with this error. We saw this in the new jenkins pipelines, and can be reproduced at least on Linux running make check with LANG=C. (cherry picked from commit c2f6358c656b59990f7fba8db0ae76454acc1d9a) --- dev-tools/generate_notice.py | 15 +++++---------- filebeat/scripts/docs_collector.py | 10 +++++----- libbeat/scripts/unpack_dashboards.py | 4 ++-- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/dev-tools/generate_notice.py b/dev-tools/generate_notice.py index 3fd42b320ff..a4c57ae4be6 100644 --- a/dev-tools/generate_notice.py +++ b/dev-tools/generate_notice.py @@ -15,13 +15,8 @@ def read_file(filename): print("File not found {}".format(filename)) return "" - try: - with open(filename, 'r') as f: - return f.read() - except UnicodeDecodeError: - # try latin-1 - with open(filename, 'r', encoding="ISO-8859-1") as f: - return f.read() + with open(filename, 'r', encoding='utf_8') as f: + return f.read() def get_library_path(license): @@ -37,7 +32,7 @@ def get_library_path(license): def read_versions(vendor): libs = [] - with open(os.path.join(vendor, "vendor.json")) as f: + with open(os.path.join(vendor, "vendor.json"), encoding='utf_8') as f: govendor = json.load(f) for package in govendor["package"]: libs.append(package) @@ -222,11 +217,11 @@ def get_url(repo): def create_notice(filename, beat, copyright, vendor_dirs, csvfile, overrides=None): dependencies = gather_dependencies(vendor_dirs, overrides=overrides) if not csvfile: - with open(filename, "w+") as f: + with open(filename, "w+", encoding='utf_8') as f: write_notice_file(f, beat, copyright, dependencies) print("Available at {}".format(filename)) else: - with open(csvfile, "wb") as f: + with open(csvfile, "wb", encoding='utf_8') as f: csvwriter = csv.writer(f) write_csv_file(csvwriter, dependencies) print("Available at {}".format(csvfile)) diff --git a/filebeat/scripts/docs_collector.py b/filebeat/scripts/docs_collector.py index 2f3b4d61f6f..3173146f4e0 100644 --- a/filebeat/scripts/docs_collector.py +++ b/filebeat/scripts/docs_collector.py @@ -38,13 +38,13 @@ def collect(beat_name): module_file = generated_note module_file += "[[filebeat-module-" + module + "]]\n" - with open(module_doc) as f: + with open(module_doc, encoding='utf_8') as f: module_file += f.read() beat_path = os.path.join(module_dir, "_meta") # Load title from fields.yml - with open(beat_path + "/fields.yml") as f: + with open(beat_path + "/fields.yml", encoding='utf_8') as f: fields = yaml.load(f.read()) title = fields[0]["title"] @@ -61,8 +61,8 @@ def collect(beat_name): """ # Write module docs - with open(os.path.abspath("docs") + "/modules/" + - module + ".asciidoc", 'w') as f: + docs_path = os.path.join(os.path.abspath("docs"), "modules", module + ".asciidoc") + with open(docs_path, 'w', encoding='utf_8') as f: f.write(module_file) module_list_output = generated_note @@ -76,7 +76,7 @@ def collect(beat_name): module_list_output += "include::modules/" + m + ".asciidoc[]\n" # Write module link list - with open(os.path.abspath("docs") + "/modules_list.asciidoc", 'w') as f: + with open(os.path.abspath("docs") + "/modules_list.asciidoc", 'w', encoding='utf_8') as f: f.write(module_list_output) diff --git a/libbeat/scripts/unpack_dashboards.py b/libbeat/scripts/unpack_dashboards.py index 194dececf1e..b0adbe09b91 100644 --- a/libbeat/scripts/unpack_dashboards.py +++ b/libbeat/scripts/unpack_dashboards.py @@ -27,7 +27,7 @@ def transform_data(data, method): def transform_file(path, method): - with open(path) as f: + with open(path, encoding='utf_8') as f: data = json.load(f) transform_data(data, method) @@ -52,5 +52,5 @@ def transform_file(path, method): data = transform_file(path, method) new_data = json.dumps(data, sort_keys=True, indent=4) - with open(path, 'w') as f: + with open(path, 'w', encoding='utf_8') as f: f.write(new_data)