Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bazel: rewrite MD5 .note.gnu.build-id with truncated git SHA1. #767

Merged
merged 9 commits into from
Apr 17, 2017
13 changes: 3 additions & 10 deletions BUILD
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
package(default_visibility = ["//visibility:public"])

load("//bazel:envoy_build_system.bzl", "envoy_cc_library")

genrule(
name = "envoy_version",
srcs = glob([".git/**"]),
outs = ["version_generated.cc"],
cmd = "touch $@ && $(location tools/gen_git_sha.sh) $$(dirname $(location tools/gen_git_sha.sh)) $@",
cmd = "touch $@ && $(location //tools:gen_git_sha.sh) " +
"$$(dirname $(location //tools:gen_git_sha.sh)) $@",
local = 1,
tools = ["tools/gen_git_sha.sh"],
)

envoy_cc_library(
name = "version_generated",
srcs = ["version_generated.cc"],
deps = ["//source/common/common:version_includes"],
tools = ["//tools:gen_git_sha.sh"],
)
38 changes: 37 additions & 1 deletion bazel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,49 @@ tools/bazel-test-gdb //test/common/http:async_client_impl_test

# Additional Envoy build and test options

In general, there are 3 [compilation
modes](https://bazel.build/versions/master/docs/bazel-user-manual.html#flag--compilation_mode)
that Bazel supports:

* `fastbuild`: `-O0`, aimed at developer speed (default).
* `opt`: `-O2 -DNDEBUG`, for production builds and performance benchmarking.
* `dbg`: `-O0 -ggdb3`, debug symbols.

You can use the `-c <compilation_mode>` flag to control this, e.g.

```
bazel build -c opt //source/exe:envoy-static
```

Debug symbols can also be explicitly added to any build type with `--define
debug_symbols=yes`, e.g.

```
bazel build -c opt --define debug_symbols=yes //source/exe:envoy-static
```

To build and run tests with the compiler's address sanitizer (ASAN) enabled:

```
bazel test -c dbg --config=asan //test/...
```

The ASAN failure stack traces include numbers as a results of running ASAN with a `dbg` build above.
The ASAN failure stack traces include line numbers as a results of running ASAN with a `dbg` build above.

# Release builds

Release builds should be built in `opt` mode, processed with `strip` and have a
`.note.gnu.build-id` section with the Git SHA1 at which the build took place.
They should also ignore any local `.bazelrc` for reproducibility. This can be
achieved with:

```
bazel --bazelrc=/dev/null build -c opt //source/exe:envoy-static.stripped.stamped
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I read correctly below, //source/exe:envoy-static.stamped is valid, right? (If I want a stamped, but not stripped build)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.

```

One caveat to note is that the Git SHA1 is truncated to 16 bytes today as a
result of the workaround in place for
https://github.com/bazelbuild/bazel/issues/2805.

# Adding or maintaining Envoy build rules

Expand Down
33 changes: 33 additions & 0 deletions bazel/envoy_build_system.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,34 @@ def envoy_cc_library(name,
alwayslink = 1,
)

def _git_stamped_genrule(name):
# To workaround https://github.com/bazelbuild/bazel/issues/2805, we
# do binary rewriting to replace the linker produced MD5 hash with the
# version_generated.cc git SHA1 hash (truncated).
native.genrule(
name = name + "_stamped",
srcs = [
name,
"//source/version_generated:version_generated.cc",
],
outs = [name + ".stamped"],
cmd = "cp $(location " + name + ") $@ && " +
"chmod u+w $@ && " +
"$(location //tools:git_sha_rewriter.py) " +
"$(location //source/version_generated:version_generated.cc) $@",
tools = ["//tools:git_sha_rewriter.py"],
)

# Envoy C++ binary targets should be specified with this function.
def envoy_cc_binary(name,
srcs = [],
data = [],
visibility = None,
repository = "",
deps = []):
# Implicit .stamped targets to obtain builds with the (truncated) git SHA1.
_git_stamped_genrule(name)
_git_stamped_genrule(name + ".stripped")
native.cc_binary(
name = name,
srcs = srcs,
Expand All @@ -96,12 +117,24 @@ def envoy_cc_binary(name,
linkopts = [
"-pthread",
"-lrt",
# Force MD5 hash in build. This is part of the workaround for
# https://github.com/bazelbuild/bazel/issues/2805. Bazel actually
# does this by itself prior to
# https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882.
# Ironically, forcing it here so that in future releases we will
# have the same behavior. When everyone is using an updated version
# of Bazel, we can use linkopts to set the git SHA1 directly in the
# --build-id and avoid doing the following.
'-Wl,--build-id=md5',
'-Wl,--hash-style=gnu',
"-static-libstdc++",
"-static-libgcc",
],
linkstatic = 1,
visibility = visibility,
malloc = tcmalloc_external_dep(repository),
# See above comment on MD5 hash.
stamp = 0,
deps = deps + [
repository + "//source/precompiled:precompiled_includes",
],
Expand Down
2 changes: 1 addition & 1 deletion source/common/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,6 @@ envoy_cc_library(
srcs = ["version.cc"],
deps = [
":version_includes",
"//:version_generated",
"//source/version_generated",
],
)
4 changes: 2 additions & 2 deletions source/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ envoy_cc_library(
hdrs = ["options_impl.h"],
external_deps = ["tclap"],
deps = [
"//:version_generated",
"//include/envoy/server:options_interface",
"//source/common/common:macros",
"//source/common/common:version_lib",
"//source/version_generated",
],
)

Expand All @@ -104,7 +104,6 @@ envoy_cc_library(
":connection_handler_lib",
":test_hooks_lib",
":worker_lib",
"//:version_generated",
"//include/envoy/common:optional",
"//include/envoy/event:dispatcher_interface",
"//include/envoy/event:signal_interface",
Expand All @@ -130,6 +129,7 @@ envoy_cc_library(
"//source/common/stats:statsd_lib",
"//source/common/thread_local:thread_local_lib",
"//source/server/http:admin_lib",
"//source/version_generated",
],
)

Expand Down
2 changes: 1 addition & 1 deletion source/server/http/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ envoy_cc_library(
srcs = ["admin.cc"],
hdrs = ["admin.h"],
deps = [
"//:version_generated",
"//include/envoy/filesystem:filesystem_interface",
"//include/envoy/http:filter_interface",
"//include/envoy/network:listen_socket_interface",
Expand Down Expand Up @@ -39,6 +38,7 @@ envoy_cc_library(
"//source/common/router:config_lib",
"//source/common/upstream:host_utility_lib",
"//source/server/config/network:http_connection_manager_lib",
"//source/version_generated",
],
)

Expand Down
9 changes: 9 additions & 0 deletions source/version_generated/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package(default_visibility = ["//visibility:public"])

load("//bazel:envoy_build_system.bzl", "envoy_cc_library")

envoy_cc_library(
name = "version_generated",
srcs = ["//:version_generated.cc"],
deps = ["//source/common/common:version_includes"],
)
1 change: 1 addition & 0 deletions test/run_envoy_bazel_coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set -e
# Run all tests under bazel coverage.
"${BAZEL_COVERAGE}" coverage //test/coverage:coverage_tests ${BAZEL_BUILD_OPTIONS} \
--cache_test_results=no --instrumentation_filter="" \
--test_output=all \
--coverage_support=@bazel_tools//tools/coverage:coverage_support

# Cleanup any artifacts from previous coverage runs.
Expand Down
6 changes: 6 additions & 0 deletions tools/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package(default_visibility = ["//visibility:public"])

exports_files([
"gen_git_sha.sh",
"git_sha_rewriter.py",
])
111 changes: 111 additions & 0 deletions tools/git_sha_rewriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python

# This tool takes an ELF binary that has been built with -Wl,--build-id=md5'
# '-Wl,--hash-style=gnu (as done by Bazel prior to
# https://github.com/bazelbuild/bazel/commit/724706ba4836c3366fc85b40ed50ccf92f4c3882,
# versions prior to 0.5), and replaces the MD5 compiler hash with a truncated
# git SHA1 hash found in Envoy's version_generated.cc.
#
# This is useful to folks who want the build commit in the .note.gnu.build-id
# section rather than the compiler hash of inputs. Please note that the hash is
# a 16 byte truncated git SHA1, rather than a complete 20 byte git SHA1.
# This is a workaround to https://github.com/bazelbuild/bazel/issues/2805.

import binascii
import re
import subprocess as sp
import sys

# This is what the part of .note.gnu.build-id prior to the MD5 hash looks like.
EXPECTED_BUILD_ID_NOTE_PREFIX = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your bazel hacking skills continue to astound me. 😉

# The "name" of the note is 4 bytes long.
0x04,
0x00,
0x00,
0x00,
# The "description" of the note is 16 bytes.
0x10,
0x00,
0x00,
0x00,
# The "type" of the note.
0x03,
0x00,
0x00,
0x00,
# 'G', 'N', 'U', '\0' (name)
0x47,
0x4e,
0x55,
0x00,
]
# We're expecting an MD5 hash, 16 bytes.
MD5_HASH_LEN = 16
EXPECTED_BUILD_ID_NOTE_LENGTH = len(EXPECTED_BUILD_ID_NOTE_PREFIX) + MD5_HASH_LEN


class RewriterException(Exception):
pass


# Extract MD5 hash hex string from version_generated.cc.
def ExtractGitSha(path):
with open(path, 'r') as f:
contents = f.read()
sr = re.search('GIT_SHA\("(\w+)"', contents, flags=re.MULTILINE)
if not sr:
raise RewriterException('Bad version_generated.cc: %s' % contents)
return sr.group(1)


# Scrape the offset of .note.gnu.build-id via readelf from the binary. Also
# verify the note section is what we expect.
def ExtractBuildIdNoteOffset(path):
try:
readelf_output = sp.check_output('readelf -SW %s' % path, shell=True)
# Sanity check the ordering of fields from readelf.
if not re.search('Name\s+Type\s+Address\s+Off\s+Size\s', readelf_output):
raise RewriterException('Invalid readelf output: %s' % readelf_output)
sr = re.search('.note.gnu.build-id\s+NOTE\s+\w+\s+(\w+)\s(\w+)\s',
readelf_output)
if not sr:
raise RewriterException(
'Unable to parse .note.gnu.build-id note: %s' % readelf_output)
raw_note_offset, raw_note_size = sr.groups()
if long(raw_note_size, 16) != EXPECTED_BUILD_ID_NOTE_LENGTH:
raise RewriterException(
'Incorrect .note.gnu.build-id note size: %s' % readelf_output)
note_offset = long(raw_note_offset, 16)
with open(path, 'rb') as f:
f.seek(note_offset)
note_prefix = [ord(b) for b in f.read(len(EXPECTED_BUILD_ID_NOTE_PREFIX))]
if note_prefix != EXPECTED_BUILD_ID_NOTE_PREFIX:
raise RewriterException(
'Unexpected .note.gnu.build-id prefix in %s: %s' % (path,
note_prefix))
return note_offset
except sp.CalledProcessError as e:
raise RewriterException('%s %s' % (e, readelf_output.output))


# Inplace binary rewriting of the 16 byte .note.gnu.build-id description with
# the truncated hash.
def RewriteBinary(path, offset, git5_sha1):
truncated_hash = git5_sha1[:2 * MD5_HASH_LEN]
print 'Writing %s truncated to %s at offset 0x%x in %s' % (git5_sha1,
truncated_hash,
offset, path)
with open(path, 'r+b') as f:
f.seek(offset + len(EXPECTED_BUILD_ID_NOTE_PREFIX))
f.write(binascii.unhexlify(truncated_hash))


if __name__ == '__main__':
if len(sys.argv) != 3:
print('Usage: %s <path to version_generated.cc <Envoy binary path> ' %
sys.argv[0])
sys.exit(1)
version_generated = ExtractGitSha(sys.argv[1])
envoy_bin_path = sys.argv[2]
build_id_note_offset = ExtractBuildIdNoteOffset(envoy_bin_path)
RewriteBinary(envoy_bin_path, build_id_note_offset, version_generated)