From 38bcef630f8cb2f3ad06e8b5031350d632271719 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Mon, 18 Nov 2024 02:20:44 -0800 Subject: [PATCH] Parse Starlark files as raw bytes for Bzlmod As long as Bazel internally represents strings as raw bytes "encoded" in Latin-1, the same must be true for all Starlark files that may contain file system paths. Also includes changes to the Python test setup: * `ScratchFile` now always writes files as UTF-8 * `RunProgram` encodes and decodes stdin/stderr/stdout as UTF-8 * `download` no longer leaks a file Closes #24217. PiperOrigin-RevId: 697550082 Change-Id: If7f3fc7ddace2cda5e1f8e48a65406aa54f2a6d8 --- .../lib/bazel/bzlmod/CompiledModuleFile.java | 3 +- .../lib/bazel/bzlmod/VendorFileFunction.java | 2 +- .../build/lib/skyframe/RepoFileFunction.java | 2 +- src/test/py/bazel/bzlmod/bazel_module_test.py | 69 +++++++++++++++++++ src/test/py/bazel/bzlmod/test_utils.py | 4 +- src/test/py/bazel/test_base.py | 17 ++--- 6 files changed, 80 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java index 2cdcacf79bd31d..14c67ca65a6457 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/CompiledModuleFile.java @@ -63,7 +63,8 @@ public static CompiledModuleFile parseAndCompile( ExtendedEventHandler eventHandler) throws ExternalDepsException { StarlarkFile starlarkFile = - StarlarkFile.parse(ParserInput.fromUTF8(moduleFile.getContent(), moduleFile.getLocation())); + StarlarkFile.parse( + ParserInput.fromLatin1(moduleFile.getContent(), moduleFile.getLocation())); if (!starlarkFile.ok()) { Event.replayEventsOn(eventHandler, starlarkFile.errors()); throw ExternalDepsException.withMessage( diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java index 50f1102ed801bb..07eb48edd23d00 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java @@ -156,7 +156,7 @@ private static StarlarkFile readAndParseVendorFile(Path path, Environment env) new IOException("error reading VENDOR.bazel file", e), Transience.TRANSIENT); } StarlarkFile starlarkFile = - StarlarkFile.parse(ParserInput.fromUTF8(contents, path.getPathString())); + StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString())); if (!starlarkFile.ok()) { Event.replayEventsOn(env.getListener(), starlarkFile.errors()); throw new VendorFileFunctionException( diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java b/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java index 9e0283faa990f0..cbafb02610079a 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/RepoFileFunction.java @@ -108,7 +108,7 @@ private static StarlarkFile readAndParseRepoFile(Path path, Environment env) new IOException("error reading REPO.bazel file at " + path, e), Transience.TRANSIENT); } StarlarkFile starlarkFile = - StarlarkFile.parse(ParserInput.fromUTF8(contents, path.getPathString())); + StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString())); if (!starlarkFile.ok()) { Event.replayEventsOn(env.getListener(), starlarkFile.errors()); throw new RepoFileFunctionException( diff --git a/src/test/py/bazel/bzlmod/bazel_module_test.py b/src/test/py/bazel/bzlmod/bazel_module_test.py index bd931b575ca2fe..19a49a03d896d7 100644 --- a/src/test/py/bazel/bzlmod/bazel_module_test.py +++ b/src/test/py/bazel/bzlmod/bazel_module_test.py @@ -18,6 +18,7 @@ import pathlib import shutil import subprocess +import sys import tempfile from absl.testing import absltest from src.test.py.bazel import test_base @@ -1109,6 +1110,74 @@ def testRegression22754(self): self.ScratchFile('testdata/WORKSPACE') self.RunBazel(['build', ':all']) + def testUnicodePaths(self): + if sys.getfilesystemencoding() != 'utf-8': + self.skipTest('Test requires UTF-8 by default (Python 3.7+)') + + unicode_dir = 'äöüÄÖÜß' + self.ScratchFile(unicode_dir + '/MODULE.bazel', ['module(name = "module")']) + self.ScratchFile( + unicode_dir + '/BUILD', + [ + 'filegroup(name = "choose_me")', + ], + ) + self.writeMainProjectFiles() + self.ScratchFile( + 'MODULE.bazel', + [ + 'bazel_dep(name = "module")', + 'local_path_override(', + ' module_name = "module",', + ' path = "%s",' % unicode_dir, + ')', + ], + ) + self.RunBazel(['build', '@module//:choose_me']) + + def testUnicodeTags(self): + unicode_str = 'äöüÄÖÜß' + self.ScratchFile( + 'MODULE.bazel', + [ + 'ext = use_extension("extensions.bzl", "ext")', + 'ext.tag(attr = "%s")' % unicode_str, + 'use_repo(ext, "ext")', + ], + ) + self.ScratchFile('BUILD') + self.ScratchFile( + 'extensions.bzl', + [ + 'def repo_rule_impl(ctx):', + ' ctx.file("BUILD")', + ' print("DATA: " + ctx.attr.tag)', + 'repo_rule = repository_rule(', + ' implementation = repo_rule_impl,', + ' attrs = {', + ' "tag": attr.string(),', + ' },', + ')', + 'def ext_impl(module_ctx):', + ' repo_rule(', + ' name = "ext",', + ' tag = module_ctx.modules[0].tags.tag[0].attr,', + ' )', + 'tag = tag_class(', + ' attrs = {', + ' "attr": attr.string(),', + ' },', + ')', + 'ext = module_extension( implementation = ext_impl,', + ' tag_classes = {', + ' "tag": tag,', + ' },', + ')', + ], + ) + _, _, stderr = self.RunBazel(['build', '@ext//:all']) + self.assertIn('DATA: ' + unicode_str, '\n'.join(stderr)) + if __name__ == '__main__': absltest.main() diff --git a/src/test/py/bazel/bzlmod/test_utils.py b/src/test/py/bazel/bzlmod/test_utils.py index 907398b71d4df8..99c8e7def10adf 100644 --- a/src/test/py/bazel/bzlmod/test_utils.py +++ b/src/test/py/bazel/bzlmod/test_utils.py @@ -30,8 +30,8 @@ def download(url): """Download a file and return its content in bytes.""" - response = urllib.request.urlopen(url) - return response.read() + with urllib.request.urlopen(url) as response: + return response.read() def read(path): diff --git a/src/test/py/bazel/test_base.py b/src/test/py/bazel/test_base.py index d13a245f674cd0..1b77baf567332d 100644 --- a/src/test/py/bazel/test_base.py +++ b/src/test/py/bazel/test_base.py @@ -16,7 +16,6 @@ """Bazel Python integration test framework.""" -import locale import os import shutil import socket @@ -313,7 +312,7 @@ def ScratchFile(self, path, lines=None, executable=False): if os.path.exists(abspath) and not os.path.isfile(abspath): raise IOError('"%s" (%s) exists and is not a file' % (path, abspath)) self.ScratchDir(os.path.dirname(path)) - with open(abspath, 'w') as f: + with open(abspath, 'w', encoding='utf-8') as f: if lines: for l in lines: f.write(l) @@ -445,8 +444,7 @@ def StopRemoteWorker(self): self._worker_stdout.seek(0) stdout_lines = [ - l.decode(locale.getpreferredencoding()).strip() - for l in self._worker_stdout.readlines() + l.decode('utf-8').strip() for l in self._worker_stdout.readlines() ] if stdout_lines: print('Local remote worker stdout') @@ -455,8 +453,7 @@ def StopRemoteWorker(self): self._worker_stderr.seek(0) stderr_lines = [ - l.decode(locale.getpreferredencoding()).strip() - for l in self._worker_stderr.readlines() + l.decode('utf-8').strip() for l in self._worker_stderr.readlines() ] if stderr_lines: print('Local remote worker stderr') @@ -509,17 +506,13 @@ def RunProgram( stdout.seek(0) stdout_lines = [ - l.decode(locale.getpreferredencoding()).rstrip() - if rstrip - else l.decode(locale.getpreferredencoding()).strip() + l.decode('utf-8').rstrip() if rstrip else l.decode('utf-8').strip() for l in stdout.readlines() ] stderr.seek(0) stderr_lines = [ - l.decode(locale.getpreferredencoding()).rstrip() - if rstrip - else l.decode(locale.getpreferredencoding()).strip() + l.decode('utf-8').rstrip() if rstrip else l.decode('utf-8').strip() for l in stderr.readlines() ]