Skip to content

Commit

Permalink
[8.0.0] Parse Starlark files as raw bytes for Bzlmod (#24362)
Browse files Browse the repository at this point in the history
As long as Bazel internally represents strings as raw bytes "encoded" in
Latin-1, the same must be true for all Starlark files that may contain
file system paths.

Also includes changes to the Python test setup:
* `ScratchFile` now always writes files as UTF-8
* `RunProgram` encodes and decodes stdin/stderr/stdout as UTF-8
* `download` no longer leaks a file

Closes #24217.

PiperOrigin-RevId: 697550082
Change-Id: If7f3fc7ddace2cda5e1f8e48a65406aa54f2a6d8

Commit
fba8603

Co-authored-by: Fabian Meumertzheim <[email protected]>
  • Loading branch information
bazel-io and fmeum authored Nov 18, 2024
1 parent fd33ecc commit eb0c276
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ public static CompiledModuleFile parseAndCompile(
ExtendedEventHandler eventHandler)
throws ExternalDepsException {
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromUTF8(moduleFile.getContent(), moduleFile.getLocation()));
StarlarkFile.parse(
ParserInput.fromLatin1(moduleFile.getContent(), moduleFile.getLocation()));
if (!starlarkFile.ok()) {
Event.replayEventsOn(eventHandler, starlarkFile.errors());
throw ExternalDepsException.withMessage(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ private static StarlarkFile readAndParseVendorFile(Path path, Environment env)
new IOException("error reading VENDOR.bazel file", e), Transience.TRANSIENT);
}
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromUTF8(contents, path.getPathString()));
StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString()));
if (!starlarkFile.ok()) {
Event.replayEventsOn(env.getListener(), starlarkFile.errors());
throw new VendorFileFunctionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ private static StarlarkFile readAndParseRepoFile(Path path, Environment env)
new IOException("error reading REPO.bazel file at " + path, e), Transience.TRANSIENT);
}
StarlarkFile starlarkFile =
StarlarkFile.parse(ParserInput.fromUTF8(contents, path.getPathString()));
StarlarkFile.parse(ParserInput.fromLatin1(contents, path.getPathString()));
if (!starlarkFile.ok()) {
Event.replayEventsOn(env.getListener(), starlarkFile.errors());
throw new RepoFileFunctionException(
Expand Down
69 changes: 69 additions & 0 deletions src/test/py/bazel/bzlmod/bazel_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pathlib
import shutil
import subprocess
import sys
import tempfile
from absl.testing import absltest
from src.test.py.bazel import test_base
Expand Down Expand Up @@ -1109,6 +1110,74 @@ def testRegression22754(self):
self.ScratchFile('testdata/WORKSPACE')
self.RunBazel(['build', ':all'])

def testUnicodePaths(self):
if sys.getfilesystemencoding() != 'utf-8':
self.skipTest('Test requires UTF-8 by default (Python 3.7+)')

unicode_dir = 'äöüÄÖÜß'
self.ScratchFile(unicode_dir + '/MODULE.bazel', ['module(name = "module")'])
self.ScratchFile(
unicode_dir + '/BUILD',
[
'filegroup(name = "choose_me")',
],
)
self.writeMainProjectFiles()
self.ScratchFile(
'MODULE.bazel',
[
'bazel_dep(name = "module")',
'local_path_override(',
' module_name = "module",',
' path = "%s",' % unicode_dir,
')',
],
)
self.RunBazel(['build', '@module//:choose_me'])

def testUnicodeTags(self):
unicode_str = 'äöüÄÖÜß'
self.ScratchFile(
'MODULE.bazel',
[
'ext = use_extension("extensions.bzl", "ext")',
'ext.tag(attr = "%s")' % unicode_str,
'use_repo(ext, "ext")',
],
)
self.ScratchFile('BUILD')
self.ScratchFile(
'extensions.bzl',
[
'def repo_rule_impl(ctx):',
' ctx.file("BUILD")',
' print("DATA: " + ctx.attr.tag)',
'repo_rule = repository_rule(',
' implementation = repo_rule_impl,',
' attrs = {',
' "tag": attr.string(),',
' },',
')',
'def ext_impl(module_ctx):',
' repo_rule(',
' name = "ext",',
' tag = module_ctx.modules[0].tags.tag[0].attr,',
' )',
'tag = tag_class(',
' attrs = {',
' "attr": attr.string(),',
' },',
')',
'ext = module_extension( implementation = ext_impl,',
' tag_classes = {',
' "tag": tag,',
' },',
')',
],
)
_, _, stderr = self.RunBazel(['build', '@ext//:all'])
self.assertIn('DATA: ' + unicode_str, '\n'.join(stderr))


if __name__ == '__main__':
absltest.main()
4 changes: 2 additions & 2 deletions src/test/py/bazel/bzlmod/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@

def download(url):
"""Download a file and return its content in bytes."""
response = urllib.request.urlopen(url)
return response.read()
with urllib.request.urlopen(url) as response:
return response.read()


def read(path):
Expand Down
17 changes: 5 additions & 12 deletions src/test/py/bazel/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

"""Bazel Python integration test framework."""

import locale
import os
import shutil
import socket
Expand Down Expand Up @@ -313,7 +312,7 @@ def ScratchFile(self, path, lines=None, executable=False):
if os.path.exists(abspath) and not os.path.isfile(abspath):
raise IOError('"%s" (%s) exists and is not a file' % (path, abspath))
self.ScratchDir(os.path.dirname(path))
with open(abspath, 'w') as f:
with open(abspath, 'w', encoding='utf-8') as f:
if lines:
for l in lines:
f.write(l)
Expand Down Expand Up @@ -445,8 +444,7 @@ def StopRemoteWorker(self):

self._worker_stdout.seek(0)
stdout_lines = [
l.decode(locale.getpreferredencoding()).strip()
for l in self._worker_stdout.readlines()
l.decode('utf-8').strip() for l in self._worker_stdout.readlines()
]
if stdout_lines:
print('Local remote worker stdout')
Expand All @@ -455,8 +453,7 @@ def StopRemoteWorker(self):

self._worker_stderr.seek(0)
stderr_lines = [
l.decode(locale.getpreferredencoding()).strip()
for l in self._worker_stderr.readlines()
l.decode('utf-8').strip() for l in self._worker_stderr.readlines()
]
if stderr_lines:
print('Local remote worker stderr')
Expand Down Expand Up @@ -509,17 +506,13 @@ def RunProgram(

stdout.seek(0)
stdout_lines = [
l.decode(locale.getpreferredencoding()).rstrip()
if rstrip
else l.decode(locale.getpreferredencoding()).strip()
l.decode('utf-8').rstrip() if rstrip else l.decode('utf-8').strip()
for l in stdout.readlines()
]

stderr.seek(0)
stderr_lines = [
l.decode(locale.getpreferredencoding()).rstrip()
if rstrip
else l.decode(locale.getpreferredencoding()).strip()
l.decode('utf-8').rstrip() if rstrip else l.decode('utf-8').strip()
for l in stderr.readlines()
]

Expand Down

0 comments on commit eb0c276

Please sign in to comment.