Bug 1470127 - Move binary checks to a standalone script. r=froydnj

We perform, on the binaries we build, a series of check, that are
implemented as half-baked make commands, invoked after linking them.

- check libstdc++ symbol versions to ensure binary compatibility with
  a baseline.
- check glibc symbol versions to ensure binary compatibility with a
  baseline.
- check that target binaries don't contain text relocations.
- check that libmozglue is linked before libc on android.
- on libxul, check that NSModules are laid out correctly.
- on libxul, check that there is more than one PT_LOAD segment.

Those checks happen to work where they matter, but their setup is
unreliable. For example, the checks for symbol versions are supposed to
work for libclang-plugin on cross osx builds, but in fact, don't,
because the readelf path doesn't exist, and the command doesn't fail in
that case.

So move them all to a standalone script, performing the checks more
thoroughly (especially the NSModules one, where we now also check that
they are all adjacent), and more verbosely.

--HG--
extra : rebase_source : 7072e622e95f363d4a6c3a8e272d3445d998b592
This commit is contained in:
Mike Hommey 2018-06-21 18:13:03 +09:00
parent 7db339076c
commit 354a9e27a0
4 changed files with 318 additions and 69 deletions

View File

@ -416,58 +416,6 @@ ifdef MOZ_DEBUG
JAVAC_FLAGS += -g
endif
# $(call CHECK_SYMBOLS,lib,PREFIX,dep_name,test)
# Checks that the given `lib` doesn't contain dependency on symbols with a
# version starting with `PREFIX`_ and matching the `test`. `dep_name` is only
# used for the error message.
# `test` is an awk expression using the information in the variable `v` which
# contains a list of version items ([major, minor, ...]).
define CHECK_SYMBOLS
@$(TOOLCHAIN_PREFIX)readelf -sW $(1) | \
awk '$$8 ~ /@$(2)_/ { \
split($$8,a,"@"); \
split(a[2],b,"_"); \
split(b[2],v,"."); \
if ($(4)) { \
if (!found) { \
print "TEST-UNEXPECTED-FAIL | check_stdcxx | We do not want these $(3) symbol versions to be used:" \
} \
print " ",$$8; \
found=1 \
} \
} \
END { \
if (found) { \
exit(1) \
} \
}'
endef
ifneq (,$(MOZ_LIBSTDCXX_TARGET_VERSION)$(MOZ_LIBSTDCXX_HOST_VERSION))
CHECK_STDCXX = $(call CHECK_SYMBOLS,$(1),GLIBCXX,libstdc++,v[1] > 3 || (v[1] == 3 && v[2] == 4 && v[3] > 16))
CHECK_GLIBC = $(call CHECK_SYMBOLS,$(1),GLIBC,libc,v[1] > 2 || (v[1] == 2 && v[2] > 12))
endif
ifeq (,$(filter $(OS_TARGET),WINNT Darwin))
CHECK_TEXTREL = @$(TOOLCHAIN_PREFIX)readelf -d $(1) | grep TEXTREL > /dev/null && echo 'TEST-UNEXPECTED-FAIL | check_textrel | We do not want text relocations in libraries and programs' || true
endif
ifeq ($(MOZ_WIDGET_TOOLKIT),android)
# While this is very unlikely (libc being added by the compiler at the end
# of the linker command line), if libmozglue.so ends up after libc.so, all
# hell breaks loose, so better safe than sorry, and check it's actually the
# case.
CHECK_MOZGLUE_ORDER = @$(TOOLCHAIN_PREFIX)readelf -d $(1) | grep NEEDED | awk '{ libs[$$NF] = ++n } END { if (libs["[libmozglue.so]"] && libs["[libc.so]"] < libs["[libmozglue.so]"]) { print "libmozglue.so must be linked before libc.so"; exit 1 } }'
endif
define CHECK_BINARY
$(call CHECK_GLIBC,$(1))
$(call CHECK_STDCXX,$(1))
$(call CHECK_TEXTREL,$(1))
$(call LOCAL_CHECKS,$(1))
$(call CHECK_MOZGLUE_ORDER,$(1))
endef
# autoconf.mk sets OBJ_SUFFIX to an error to avoid use before including
# this file
OBJ_SUFFIX := $(_OBJ_SUFFIX)

View File

@ -565,7 +565,7 @@ ifdef MOZ_PROFILE_GENERATE
endif
else # !WINNT || GNU_CC
$(call EXPAND_CC_OR_CXX,$@) -o $@ $(COMPUTED_CXX_LDFLAGS) $(PGO_CFLAGS) $($(notdir $@)_$(OBJS_VAR_SUFFIX)) $(RESFILE) $(WIN32_EXE_LDFLAGS) $(LDFLAGS) $(STATIC_LIBS) $(MOZ_PROGRAM_LDFLAGS) $(SHARED_LIBS) $(OS_LIBS)
$(call CHECK_BINARY,$@)
$(call py_action,check_binary,--target $@)
endif # WINNT && !GNU_CC
ifdef ENABLE_STRIP
@ -601,7 +601,7 @@ else
endif # HOST_CPP_PROG_LINK
endif
ifndef CROSS_COMPILE
$(call CHECK_STDCXX,$@)
$(call py_action,check_binary,--host $@)
endif
#
@ -624,7 +624,7 @@ ifdef MSMANIFEST_TOOL
endif # MSVC with manifest tool
else
$(call EXPAND_CC_OR_CXX,$@) $(COMPUTED_CXX_LDFLAGS) $(PGO_CFLAGS) -o $@ $($@_$(OBJS_VAR_SUFFIX)) $(WIN32_EXE_LDFLAGS) $(LDFLAGS) $(STATIC_LIBS) $(MOZ_PROGRAM_LDFLAGS) $(SHARED_LIBS) $(OS_LIBS)
$(call CHECK_BINARY,$@)
$(call py_action,check_binary,--target $@)
endif # WINNT && !GNU_CC
ifdef ENABLE_STRIP
@ -646,7 +646,7 @@ else
endif
endif
ifndef CROSS_COMPILE
$(call CHECK_STDCXX,$@)
$(call py_action,check_binary,--host $@)
endif
$(LIBRARY): $(OBJS) $(STATIC_LIBS) $(EXTRA_DEPS) $(GLOBAL_DEPS)
@ -681,7 +681,7 @@ ifndef INCREMENTAL_LINKER
$(RM) $@
endif
$(MKSHLIB) $($@_$(OBJS_VAR_SUFFIX)) $(RESFILE) $(LDFLAGS) $(STATIC_LIBS) $(RUST_STATIC_LIB_FOR_SHARED_LIB) $(SHARED_LIBS) $(EXTRA_DSO_LDOPTS) $(MOZ_GLUE_LDFLAGS) $(OS_LIBS)
$(call CHECK_BINARY,$@)
$(call py_action,check_binary,--target $@)
ifeq (_WINNT,$(GNU_CC)_$(OS_ARCH))
ifdef MSMANIFEST_TOOL

View File

@ -0,0 +1,313 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import print_function, unicode_literals
import argparse
import os
import subprocess
import sys
from distutils.version import StrictVersion as Version
import buildconfig
from mozbuild.util import memoize
from mozpack.executables import (
get_type,
ELF,
MACHO,
)
STDCXX_MAX_VERSION = Version('3.4.16')
GLIBC_MAX_VERSION = Version('2.12')
HOST = {
'MOZ_LIBSTDCXX_VERSION':
buildconfig.substs.get('MOZ_LIBSTDCXX_HOST_VERSION'),
'platform': buildconfig.substs['HOST_OS_ARCH'],
'readelf': 'readelf',
'nm': 'nm',
}
TARGET = {
'MOZ_LIBSTDCXX_VERSION':
buildconfig.substs.get('MOZ_LIBSTDCXX_TARGET_VERSION'),
'platform': buildconfig.substs['OS_TARGET'],
'readelf': '{}readelf'.format(
buildconfig.substs.get('TOOLCHAIN_PREFIX', '')),
'nm': '{}nm'.format(buildconfig.substs.get('TOOLCHAIN_PREFIX', '')),
}
if buildconfig.substs.get('MOZ_ASAN'):
GUESSED_NSMODULE_SIZE = 64
elif buildconfig.substs.get('HAVE_64BIT_BUILD'):
GUESSED_NSMODULE_SIZE = 8
else:
GUESSED_NSMODULE_SIZE = 4
get_type = memoize(get_type)
@memoize
def get_output(*cmd):
env = dict(os.environ)
env[b'LC_ALL'] = b'C'
return subprocess.check_output(cmd, env=env).splitlines()
class Skip(RuntimeError):
pass
class Empty(RuntimeError):
pass
def at_least_one(iter):
saw_one = False
for item in iter:
saw_one = True
yield item
if not saw_one:
raise Empty()
def iter_readelf_symbols(target, binary):
for line in get_output(target['readelf'], '-sW', binary):
data = line.split()
if len(data) >= 8 and data[0].endswith(':') and data[0][:-1].isdigit():
n, addr, size, type, bind, vis, index, name = data[:8]
if '@' in name:
name, ver = name.rsplit('@', 1)
while name.endswith('@'):
name = name[:-1]
else:
ver = None
yield {
'addr': int(addr, 16),
# readelf output may contain decimal values or hexadecimal
# values prefixed with 0x for the size. Let python autodetect.
'size': int(size, 0),
'type': type,
'binding': bind,
'visibility': vis,
'index': index,
'name': name,
'version': ver,
}
def iter_readelf_dynamic(target, binary):
for line in get_output(target['readelf'], '-d', binary):
data = line.split(None, 2)
if data and data[0].startswith('0x'):
yield data[1].rstrip(')').lstrip('('), data[2]
def check_dep_versions(target, binary, lib, prefix, max_version):
if get_type(binary) != ELF:
raise Skip()
unwanted = []
prefix = prefix + '_'
try:
for sym in at_least_one(iter_readelf_symbols(target, binary)):
if sym['index'] == 'UND' and sym['version'] and \
sym['version'].startswith(prefix):
version = Version(sym['version'][len(prefix):])
if version > max_version:
unwanted.append(sym)
except Empty:
raise RuntimeError('Could not parse readelf output?')
if unwanted:
raise RuntimeError('\n'.join([
'We do not want these {} symbol versions to be used:'.format(lib)
] + [
' {} ({})'.format(s['name'], s['version']) for s in unwanted
]))
def check_stdcxx(target, binary):
check_dep_versions(
target, binary, 'libstdc++', 'GLIBCXX', STDCXX_MAX_VERSION)
def check_glibc(target, binary):
check_dep_versions(target, binary, 'libc', 'GLIBC', GLIBC_MAX_VERSION)
def check_textrel(target, binary):
if target is HOST or get_type(binary) != ELF:
raise Skip()
try:
for tag, value in at_least_one(iter_readelf_dynamic(target, binary)):
if tag == 'TEXTREL' or (tag == 'FLAGS' and 'TEXTREL' in value):
raise RuntimeError(
'We do not want text relocations in libraries and programs'
)
except Empty:
raise RuntimeError('Could not parse readelf output?')
def ishex(s):
try:
int(s, 16)
return True
except ValueError:
return False
def is_libxul(binary):
basename = os.path.basename(binary).lower()
return 'xul' in basename
def check_nsmodules(target, binary):
if target is HOST or not is_libxul(binary):
raise Skip()
symbols = []
if buildconfig.substs.get('_MSC_VER'):
for line in get_output('dumpbin', '-exports', binary):
data = line.split(None, 3)
if data and len(data) == 4 and data[0].isdigit() and \
ishex(data[1]) and ishex(data[2]):
# - Some symbols in the table can be aliases, and appear as
# `foo = bar`.
# - The MSVC mangling has some type info following `@@`
# - Any namespacing that can happen on the symbol appears as a
# suffix, after a `@`.
name = data[3].split(' = ')[0].split('@@')[0].split('@')[0]
if name.endswith('_NSModule'):
symbols.append((int(data[2], 16), 0, name.lstrip('?')))
else:
for line in get_output(target['nm'], '-gP', binary):
data = line.split()
# NSModules symbols end with _NSModule or _NSModuleE when
# C++-mangled.
if len(data) == 4 and data[0].endswith(('_NSModule', '_NSModuleE')):
sym, _, addr, size = data
symbols.append((int(addr, 16), int(size, 16), sym))
if not symbols:
raise RuntimeError('Could not find NSModules')
def print_symbols(symbols):
for addr, size, sym in symbols:
print('%x %d %s' % (addr, size, sym))
symbols = sorted(symbols)
next_addr = None
for addr, size, sym in symbols:
if next_addr is not None and next_addr != addr:
print_symbols(symbols)
raise RuntimeError('NSModules are not adjacent')
# On mac, nm doesn't actually print anything other than 0 for the
# size. So take our best guess. On Windows, dumpbin doesn't give us
# any size at all.
if size == 0:
size = GUESSED_NSMODULE_SIZE
next_addr = addr + size
first = symbols[0][2]
last = symbols[-1][2]
# On some platforms, there are extra underscores on symbol names.
if first.lstrip('_') != 'start_kPStaticModules_NSModule' or \
last.lstrip('_') != 'end_kPStaticModules_NSModule':
print_symbols(symbols)
raise RuntimeError('NSModules are not ordered appropriately')
def check_pt_load(target, binary):
if target is HOST or get_type(binary) != ELF or not is_libxul(binary):
raise Skip()
count = 0
for line in get_output(target['readelf'], '-l', binary):
data = line.split()
if data and data[0] == 'LOAD':
count += 1
if count <= 1:
raise RuntimeError('Expected more than one PT_LOAD segment')
def check_mozglue_order(target, binary):
if target is HOST or target['platform'] != 'Android':
raise Skip()
# While this is very unlikely (libc being added by the compiler at the end
# of the linker command line), if libmozglue.so ends up after libc.so, all
# hell breaks loose, so better safe than sorry, and check it's actually the
# case.
try:
mozglue = libc = None
for n, (tag, value) in enumerate(
at_least_one(iter_readelf_dynamic(target, binary))):
if tag == 'NEEDED':
if '[libmozglue.so]' in value:
mozglue = n
elif '[libc.so]' in value:
libc = n
if libc is None:
raise RuntimeError('libc.so is not linked?')
if mozglue is not None and libc < mozglue:
raise RuntimeError('libmozglue.so must be linked before libc.so')
except Empty:
raise RuntimeError('Could not parse readelf output?')
def checks(target, binary):
# The clang-plugin is built as target but is really a host binary.
# Cheat and pretend we were passed the right argument.
if 'clang-plugin' in binary:
target = HOST
checks = []
if target['MOZ_LIBSTDCXX_VERSION']:
checks.append(check_stdcxx)
checks.append(check_glibc)
checks.append(check_textrel)
checks.append(check_nsmodules)
checks.append(check_pt_load)
checks.append(check_mozglue_order)
retcode = 0
basename = os.path.basename(binary)
for c in checks:
try:
name = c.__name__
c(target, binary)
if buildconfig.substs.get('MOZ_AUTOMATION'):
print('TEST-PASS | {} | {}'.format(name, basename))
except Skip:
pass
except RuntimeError as e:
print('TEST-UNEXPECTED-FAIL | {} | {} | {}'
.format(name, basename, e.message),
file=sys.stderr)
retcode = 1
return retcode
def main(args):
parser = argparse.ArgumentParser(
description='Check built binaries')
parser.add_argument('--host', action='store_true',
help='Perform checks for a host binary')
parser.add_argument('--target', action='store_true',
help='Perform checks for a target binary')
parser.add_argument('binary', metavar='PATH',
help='Location of the binary to check')
options = parser.parse_args(args)
if options.host == options.target:
print('Exactly one of --host or --target must be given',
file=sys.stderr)
return 1
if options.host:
return checks(HOST, options.binary)
elif options.target:
return checks(TARGET, options.binary)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

View File

@ -14,15 +14,3 @@ PP_TARGETS += LIBXUL_AUTOLOAD
LIBXUL_AUTOLOAD = $(topsrcdir)/toolkit/library/libxul.so-gdb.py.in
LIBXUL_AUTOLOAD_FLAGS := -Dtopsrcdir=$(abspath $(topsrcdir))
endif
ifdef _MSC_VER
get_first_and_last = dumpbin -exports $1 | grep _NSModule@@ | sort -k 3 | sed -n 's/^.*?\([^@]*\)@@.*$$/\1/;1p;$$p'
else
get_first_and_last = $(TOOLCHAIN_PREFIX)nm -g $1 | grep _NSModule$$ | grep -vw refptr | sort | sed -n 's/^.* _*\([^ ]*\)$$/\1/;1p;$$p'
endif
LOCAL_CHECKS = test "$$($(get_first_and_last) | xargs echo)" != "start_kPStaticModules_NSModule end_kPStaticModules_NSModule" && echo "NSModules are not ordered appropriately" && exit 1 || exit 0
ifeq (Linux,$(OS_ARCH))
LOCAL_CHECKS += ; test "$$($(TOOLCHAIN_PREFIX)readelf -l $1 | awk '$1 == "LOAD" { t += 1 } END { print t }')" -le 1 && echo "Only one PT_LOAD segment" && exit 1 || exit 0
endif