Bug 1636251: Patch Sentry events to ensure a raw username isn't sent to Sentry r=rstewart

To avoid sending identifying information, common absolute paths are patched with placeholder values. For example, devs
may place their Firefox repository within their home dir, so absolute paths are doctored to be prefixed with
"<topsrcdir"> instead.

Additionally, any paths including the user's home directory are patched to instead be a relate path from "~".

Differential Revision: https://phabricator.services.mozilla.com/D78962
This commit is contained in:
Mitchell Hentges 2020-06-10 19:50:57 +00:00
parent 47c53b361d
commit 2b514aa148
3 changed files with 63 additions and 10 deletions

View File

@ -376,7 +376,7 @@ def bootstrap(topsrcdir, mozilla_dir=None):
state_dir],
stdout=devnull, stderr=devnull)
def populate_context(context, key=None):
def populate_context(key=None):
if key is None:
return
if key == 'state_dir':

View File

@ -151,7 +151,7 @@ class ContextWrapper(object):
return getattr(object.__getattribute__(self, '_context'), key)
except AttributeError as e:
try:
ret = object.__getattribute__(self, '_handler')(self, key)
ret = object.__getattribute__(self, '_handler')(key)
except (AttributeError, TypeError):
# TypeError is in case the handler comes from old code not
# taking a key argument.
@ -320,7 +320,8 @@ To see more help for a specific command, run:
Returns the integer exit code that should be used. 0 means success. All
other values indicate failure.
"""
register_sentry()
topsrcdir = self.populate_context_handler('topdir')
register_sentry(topsrcdir)
# If no encoding is defined, we default to UTF-8 because without this
# Python 2.7 will assume the default encoding of ASCII. This will blow
@ -401,7 +402,6 @@ To see more help for a specific command, run:
commands=Registrar)
if self.populate_context_handler:
self.populate_context_handler(context)
context = ContextWrapper(context, self.populate_context_handler)
parser = self.get_argument_parser(context)

View File

@ -6,18 +6,20 @@ from __future__ import absolute_import
import os
import re
import sys
from os.path import expanduser
import mozpack.path as mozpath
import sentry_sdk
from six.moves.configparser import SafeConfigParser, NoOptionError
from mozboot.util import get_state_dir
from six import string_types
from six.moves.configparser import SafeConfigParser, NoOptionError
# https://sentry.prod.mozaws.net/operations/mach/
_SENTRY_DSN = "https://8228c9aff64949c2ba4a2154dc515f55@sentry.prod.mozaws.net/525"
def register_sentry():
def register_sentry(topsrcdir):
cfg_file = os.path.join(get_state_dir(), 'machrc')
config = SafeConfigParser()
@ -32,10 +34,17 @@ def register_sentry():
if not telemetry_enabled:
return
sentry_sdk.init(_SENTRY_DSN, before_send=_settle_mach_module_id)
sentry_sdk.init(_SENTRY_DSN,
before_send=lambda event, _: _process_event(event, topsrcdir))
def _settle_mach_module_id(sentry_event, exception):
def _process_event(sentry_event, topsrcdir):
for map_fn in (_settle_mach_module_id, _patch_absolute_paths):
sentry_event = map_fn(sentry_event, topsrcdir)
return sentry_event
def _settle_mach_module_id(sentry_event, _):
# Sentry groups issues according to the stack frames and their associated
# "module" properties. However, one of the modules is being reported
# like "mach.commands.26a828ef5164403eaff4305ab4cb0fab" (with a generated id).
@ -54,6 +63,50 @@ def _settle_mach_module_id(sentry_event, exception):
return sentry_event
def _resolve_topobjdir():
topobjdir = os.path.join(os.path.dirname(sys.prefix), "..")
return mozpath.normsep(os.path.normpath(topobjdir))
def _patch_absolute_paths(sentry_event, topsrcdir):
# As discussed here (https://bugzilla.mozilla.org/show_bug.cgi?id=1636251#c28),
# we remove usernames from file names with a best-effort basis. The most likely
# place for usernames to manifest in Sentry information is within absolute paths,
# such as: "/home/mitch/dev/firefox/mach"
# We replace the state_dir, obj_dir, src_dir with "<...>" placeholders.
# Note that we also do a blanket find-and-replace of the user's name with "<user>",
# which may have ill effects if the user's name is, by happenstance, a substring
# of some other value within the Sentry event.
def recursive_patch(value, needle, replacement):
if isinstance(value, list):
return [recursive_patch(v, needle, replacement) for v in value]
elif isinstance(value, dict):
for key in list(value.keys()):
next_value = value.pop(key)
key = key.replace(needle, replacement)
value[key] = recursive_patch(next_value, needle, replacement)
return value
elif isinstance(value, string_types):
return value.replace(needle, replacement)
else:
return value
for (needle, replacement) in (
(get_state_dir(), "<statedir>"),
(_resolve_topobjdir(), "<topobjdir>"),
(topsrcdir, "<topsrcdir>"),
(expanduser("~"), "~"),
# Sentry converts "vars" to their "representations". When paths are in local
# variables on Windows, "C:\Users\MozillaUser\Desktop" becomes
# "'C:\\Users\\MozillaUser\\Desktop'". To still catch this case, we "repr"
# the home directory and scrub the beginning and end quotes, then
# find-and-replace on that.
(repr(expanduser("~"))[1:-1], "~"),
):
sentry_event = recursive_patch(sentry_event, needle, replacement)
return sentry_event
def report_exception(exception):
# sentry_sdk won't report the exception if `sentry-sdk.init(...)` hasn't been called
sentry_sdk.capture_exception(exception)