Skip to content

Commit

Permalink
Parse Starlark files as raw bytes for Bzlmod
Browse files Browse the repository at this point in the history
As long as Bazel internally represents strings as raw bytes "encoded" in Latin-1, the same must be true for all Starlark files that may contain file system paths.

Also includes changes to the Python test setup:
* `ScratchFile` now always writes files as UTF-8
* `RunProgram` encodes and decodes stdin/stderr/stdout as UTF-8
* `download` no longer leaks a file

Closes bazelbuild#24217.

PiperOrigin-RevId: 697550082
Change-Id: If7f3fc7ddace2cda5e1f8e48a65406aa54f2a6d8
  • Loading branch information
fmeum authored and bazel-io committed Nov 18, 2024
1 parent e18aad1 commit 38bcef6
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ public static CompiledModuleFile parseAndCompile(
ExtendedEventHandler eventHandler)
throws ExternalDepsException {
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromUTF8(moduleFile.getContent(), moduleFile.getLocation()));
StarlarkFile.parse(
ParserInput.fromLatin1(moduleFile.getContent(), moduleFile.getLocation()));
if (!starlarkFile.ok()) {
Event.replayEventsOn(eventHandler, starlarkFile.errors());
throw ExternalDepsException.withMessage(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ private static StarlarkFile readAndParseVendorFile(Path path, Environment env)
new IOException("error reading VENDOR.bazel file", e), Transience.TRANSIENT);
}
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromUTF8(contents, path.getPathString()));
StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString()));
if (!starlarkFile.ok()) {
Event.replayEventsOn(env.getListener(), starlarkFile.errors());
throw new VendorFileFunctionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private static StarlarkFile readAndParseRepoFile(Path path, Environment env)
new IOException("error reading REPO.bazel file at " + path, e), Transience.TRANSIENT);
}
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromUTF8(contents, path.getPathString()));
StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString()));
if (!starlarkFile.ok()) {
Event.replayEventsOn(env.getListener(), starlarkFile.errors());
throw new RepoFileFunctionException(
Expand Down
69 changes: 69 additions & 0 deletions src/test/py/bazel/bzlmod/bazel_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pathlib
import shutil
import subprocess
import sys
import tempfile
from absl.testing import absltest
from src.test.py.bazel import test_base
Expand Down Expand Up @@ -1109,6 +1110,74 @@ def testRegression22754(self):
self.ScratchFile('testdata/WORKSPACE')
self.RunBazel(['build', ':all'])

def testUnicodePaths(self):
if sys.getfilesystemencoding() != 'utf-8':
self.skipTest('Test requires UTF-8 by default (Python 3.7+)')

unicode_dir = 'äöüÄÖÜß'
self.ScratchFile(unicode_dir + '/MODULE.bazel', ['module(name = "module")'])
self.ScratchFile(
unicode_dir + '/BUILD',
[
'filegroup(name = "choose_me")',
],
)
self.writeMainProjectFiles()
self.ScratchFile(
'MODULE.bazel',
[
'bazel_dep(name = "module")',
'local_path_override(',
' module_name = "module",',
' path = "%s",' % unicode_dir,
')',
],
)
self.RunBazel(['build', '@module//:choose_me'])

def testUnicodeTags(self):
unicode_str = 'äöüÄÖÜß'
self.ScratchFile(
'MODULE.bazel',
[
'ext = use_extension("extensions.bzl", "ext")',
'ext.tag(attr = "%s")' % unicode_str,
'use_repo(ext, "ext")',
],
)
self.ScratchFile('BUILD')
self.ScratchFile(
'extensions.bzl',
[
'def repo_rule_impl(ctx):',
' ctx.file("BUILD")',
' print("DATA: " + ctx.attr.tag)',
'repo_rule = repository_rule(',
' implementation = repo_rule_impl,',
' attrs = {',
' "tag": attr.string(),',
' },',
')',
'def ext_impl(module_ctx):',
' repo_rule(',
' name = "ext",',
' tag = module_ctx.modules[0].tags.tag[0].attr,',
' )',
'tag = tag_class(',
' attrs = {',
' "attr": attr.string(),',
' },',
')',
'ext = module_extension( implementation = ext_impl,',
' tag_classes = {',
' "tag": tag,',
' },',
')',
],
)
_, _, stderr = self.RunBazel(['build', '@ext//:all'])
self.assertIn('DATA: ' + unicode_str, '\n'.join(stderr))


if __name__ == '__main__':
absltest.main()
4 changes: 2 additions & 2 deletions src/test/py/bazel/bzlmod/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@

def download(url):
"""Download a file and return its content in bytes."""
response = urllib.request.urlopen(url)
return response.read()
with urllib.request.urlopen(url) as response:
return response.read()


def read(path):
Expand Down
17 changes: 5 additions & 12 deletions src/test/py/bazel/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

"""Bazel Python integration test framework."""

import locale
import os
import shutil
import socket
Expand Down Expand Up @@ -313,7 +312,7 @@ def ScratchFile(self, path, lines=None, executable=False):
if os.path.exists(abspath) and not os.path.isfile(abspath):
raise IOError('"%s" (%s) exists and is not a file' % (path, abspath))
self.ScratchDir(os.path.dirname(path))
with open(abspath, 'w') as f:
with open(abspath, 'w', encoding='utf-8') as f:
if lines:
for l in lines:
f.write(l)
Expand Down Expand Up @@ -445,8 +444,7 @@ def StopRemoteWorker(self):

self._worker_stdout.seek(0)
stdout_lines = [
l.decode(locale.getpreferredencoding()).strip()
for l in self._worker_stdout.readlines()
l.decode('utf-8').strip() for l in self._worker_stdout.readlines()
]
if stdout_lines:
print('Local remote worker stdout')
Expand All @@ -455,8 +453,7 @@ def StopRemoteWorker(self):

self._worker_stderr.seek(0)
stderr_lines = [
l.decode(locale.getpreferredencoding()).strip()
for l in self._worker_stderr.readlines()
l.decode('utf-8').strip() for l in self._worker_stderr.readlines()
]
if stderr_lines:
print('Local remote worker stderr')
Expand Down Expand Up @@ -509,17 +506,13 @@ def RunProgram(

stdout.seek(0)
stdout_lines = [
l.decode(locale.getpreferredencoding()).rstrip()
if rstrip
else l.decode(locale.getpreferredencoding()).strip()
l.decode('utf-8').rstrip() if rstrip else l.decode('utf-8').strip()
for l in stdout.readlines()
]

stderr.seek(0)
stderr_lines = [
l.decode(locale.getpreferredencoding()).rstrip()
if rstrip
else l.decode(locale.getpreferredencoding()).strip()
l.decode('utf-8').rstrip() if rstrip else l.decode('utf-8').strip()
for l in stderr.readlines()
]

Expand Down

0 comments on commit 38bcef6

Please sign in to comment.