From cc75cb97eb7563382501dc12d7783a69c69b701b Mon Sep 17 00:00:00 2001 From: Sebastian Hengst Date: Mon, 28 Nov 2016 14:19:47 +0100 Subject: [PATCH] Backed out changeset 626c5d74b74c (bug 1315976) for breaking Mac L10n nightlies. r=backout a=backout MozReview-Commit-ID: 8mie8tWYrcz --- .../compare_locales/__init__.py | 2 +- .../compare_locales/commands.py | 151 +++-- .../compare_locales/compare.py | 44 +- .../compare-locales/compare_locales/parser.py | 591 ++++++++---------- .../compare_locales/tests/__init__.py | 19 +- .../compare_locales/tests/test_checks.py | 12 +- .../compare_locales/tests/test_defines.py | 95 --- .../compare_locales/tests/test_dtd.py | 69 +- .../compare_locales/tests/test_ini.py | 78 +-- .../compare_locales/tests/test_merge.py | 12 +- .../compare_locales/tests/test_parser.py | 44 -- .../compare_locales/tests/test_properties.py | 63 +- .../compare_locales/webapps.py | 2 +- 13 files changed, 434 insertions(+), 748 deletions(-) delete mode 100644 python/compare-locales/compare_locales/tests/test_defines.py delete mode 100644 python/compare-locales/compare_locales/tests/test_parser.py diff --git a/python/compare-locales/compare_locales/__init__.py b/python/compare-locales/compare_locales/__init__.py index eb1d9a0ff04d..bad265e4fe3f 100644 --- a/python/compare-locales/compare_locales/__init__.py +++ b/python/compare-locales/compare_locales/__init__.py @@ -1 +1 @@ -version = "1.2.1" +version = "1.1" diff --git a/python/compare-locales/compare_locales/commands.py b/python/compare-locales/compare_locales/commands.py index 4b8d5b0e3537..61b58ec4b2ec 100644 --- a/python/compare-locales/compare_locales/commands.py +++ b/python/compare-locales/compare_locales/commands.py @@ -5,9 +5,8 @@ 'Commands exposed to commandlines' import logging -from argparse import ArgumentParser +from optparse import OptionParser, make_option -from compare_locales import version from compare_locales.paths import EnumerateApp from compare_locales.compare import compareApp, compareDirs from compare_locales.webapps import compare_web_app @@ -18,35 +17,37 @@ class BaseCommand(object): This handles command line parsing, and general sugar for setuptools entry_points. """ - - def __init__(self): - self.parser = None - - def get_parser(self): - """Get an ArgumentParser, with class docstring as description. - """ - parser = ArgumentParser(description=self.__doc__) - parser.add_argument('--version', action='version', - version='%(prog)s ' + version) - parser.add_argument('-v', '--verbose', action='count', dest='v', - default=0, help='Make more noise') - parser.add_argument('-q', '--quiet', action='count', dest='q', - default=0, help='Make less noise') - parser.add_argument('-m', '--merge', - help='''Use this directory to stage merged files, -use {ab_CD} to specify a different directory for each locale''') - return parser - - def add_data_argument(self, parser): - parser.add_argument('--data', choices=['text', 'exhibit', 'json'], - default='text', - help='''Choose data and format (one of text, + options = [ + make_option('-v', '--verbose', action='count', dest='v', default=0, + help='Make more noise'), + make_option('-q', '--quiet', action='count', dest='q', default=0, + help='Make less noise'), + make_option('-m', '--merge', + help='''Use this directory to stage merged files, +use {ab_CD} to specify a different directory for each locale'''), + ] + data_option = make_option('--data', choices=['text', 'exhibit', 'json'], + default='text', + help='''Choose data and format (one of text, exhibit, json); text: (default) Show which files miss which strings, together with warnings and errors. Also prints a summary; json: Serialize the internal tree, useful for tools. Also always succeeds; exhibit: Serialize the summary data in a json useful for Exhibit ''') + def __init__(self): + self.parser = None + + def get_parser(self): + """Get an OptionParser, with class docstring as usage, and + self.options. + """ + parser = OptionParser() + parser.set_usage(self.__doc__) + for option in self.options: + parser.add_option(option) + return parser + @classmethod def call(cls): """Entry_point for setuptools. @@ -59,15 +60,15 @@ data in a json useful for Exhibit def handle_(self): """The instance part of the classmethod call.""" self.parser = self.get_parser() - args = self.parser.parse_args() + (options, args) = self.parser.parse_args() # log as verbose or quiet as we want, warn by default logging.basicConfig() logging.getLogger().setLevel(logging.WARNING - - (args.v - args.q) * 10) - observer = self.handle(args) - print observer.serialize(type=args.data).encode('utf-8', 'replace') + (options.v - options.q)*10) + observer = self.handle(args, options) + print observer.serialize(type=options.data).encode('utf-8', 'replace') - def handle(self, args): + def handle(self, args, options): """Subclasses need to implement this method for the actual command handling. """ @@ -75,42 +76,39 @@ data in a json useful for Exhibit class CompareLocales(BaseCommand): - """Check the localization status of a gecko application. + """usage: %prog [options] l10n.ini l10n_base_dir [locale ...] + +Check the localization status of a gecko application. The first argument is a path to the l10n.ini file for the application, followed by the base directory of the localization repositories. Then you pass in the list of locale codes you want to compare. If there are not locales given, the list of locales will be taken from the all-locales file of the application\'s l10n.ini.""" - def get_parser(self): - parser = super(CompareLocales, self).get_parser() - parser.add_argument('ini_file', metavar='l10n.ini', - help='INI file for the project') - parser.add_argument('l10n_base_dir', metavar='l10n-base-dir', - help='Parent directory of localizations') - parser.add_argument('locales', nargs='*', metavar='locale-code', - help='Locale code and top-level directory of ' - 'each localization') - parser.add_argument('--clobber-merge', action="store_true", - default=False, dest='clobber', - help="""WARNING: DATALOSS. + options = BaseCommand.options + [ + make_option('--clobber-merge', action="store_true", default=False, + dest='clobber', + help="""WARNING: DATALOSS. Use this option with care. If specified, the merge directory will be clobbered for each module. That means, the subdirectory will be completely removed, any files that were there are lost. -Be careful to specify the right merge directory when using this option.""") - parser.add_argument('-r', '--reference', default='en-US', - dest='reference', - help='Explicitly set the reference ' - 'localization. [default: en-US]') - self.add_data_argument(parser) - return parser +Be careful to specify the right merge directory when using this option."""), + make_option('-r', '--reference', default='en-US', dest='reference', + help='Explicitly set the reference ' + 'localization. [default: en-US]'), + BaseCommand.data_option + ] - def handle(self, args): - app = EnumerateApp(args.ini_file, args.l10n_base_dir, args.locales) - app.reference = args.reference + def handle(self, args, options): + if len(args) < 2: + self.parser.error('Need to pass in list of languages') + inipath, l10nbase = args[:2] + locales = args[2:] + app = EnumerateApp(inipath, l10nbase, locales) + app.reference = options.reference try: - observer = compareApp(app, merge_stage=args.merge, - clobber=args.clobber) + observer = compareApp(app, merge_stage=options.merge, + clobber=options.clobber) except (OSError, IOError), exc: print "FAIL: " + str(exc) self.parser.exit(2) @@ -118,38 +116,39 @@ Be careful to specify the right merge directory when using this option.""") class CompareDirs(BaseCommand): - """Check the localization status of a directory tree. + """usage: %prog [options] reference localization + +Check the localization status of a directory tree. The first argument is a path to the reference data,the second is the localization to be tested.""" - def get_parser(self): - parser = super(CompareDirs, self).get_parser() - parser.add_argument('reference') - parser.add_argument('localization') - self.add_data_argument(parser) - return parser + options = BaseCommand.options + [ + BaseCommand.data_option + ] - def handle(self, args): - observer = compareDirs(args.reference, args.localization, - merge_stage=args.merge) + def handle(self, args, options): + if len(args) != 2: + self.parser.error('Reference and localizatino required') + reference, locale = args + observer = compareDirs(reference, locale, merge_stage=options.merge) return observer class CompareWebApp(BaseCommand): - """Check the localization status of a gaia-style web app. + """usage: %prog [options] webapp [locale locale] + +Check the localization status of a gaia-style web app. The first argument is the directory of the web app. Following arguments explicitly state the locales to test. If none are given, test all locales in manifest.webapp or files.""" - def get_parser(self): - parser = super(CompareWebApp, self).get_parser() - parser.add_argument('webapp') - parser.add_argument('locales', nargs='*', metavar='locale-code', - help='Locale code and top-level directory of ' - 'each localization') - self.add_data_argument(parser) - return parser + options = BaseCommand.options[:-1] + [ + BaseCommand.data_option] - def handle(self, args): - observer = compare_web_app(args.webapp, args.locales) + def handle(self, args, options): + if len(args) < 1: + self.parser.error('Webapp directory required') + basedir = args[0] + locales = args[1:] + observer = compare_web_app(basedir, locales) return observer diff --git a/python/compare-locales/compare_locales/compare.py b/python/compare-locales/compare_locales/compare.py index e7bc41546f23..4f71c46f8f4c 100644 --- a/python/compare-locales/compare_locales/compare.py +++ b/python/compare-locales/compare_locales/compare.py @@ -383,13 +383,13 @@ class ContentComparer: self.merge_stage = merge_stage def merge(self, ref_entities, ref_map, ref_file, l10n_file, missing, - skips, ctx, canMerge, encoding): + skips, p): outfile = os.path.join(self.merge_stage, l10n_file.module, l10n_file.file) outdir = os.path.dirname(outfile) if not os.path.isdir(outdir): os.makedirs(outdir) - if not canMerge: + if not p.canMerge: shutil.copyfile(ref_file.fullpath, outfile) print "copied reference to " + outfile return @@ -402,16 +402,16 @@ class ContentComparer: if not isinstance(skip, parser.Junk)]) if skips: # we need to skip a few errornous blocks in the input, copy by hand - f = codecs.open(outfile, 'wb', encoding) + f = codecs.open(outfile, 'wb', p.encoding) offset = 0 for skip in skips: chunk = skip.span - f.write(ctx.contents[offset:chunk[0]]) + f.write(p.contents[offset:chunk[0]]) offset = chunk[1] - f.write(ctx.contents[offset:]) + f.write(p.contents[offset:]) else: shutil.copyfile(l10n_file.fullpath, outfile) - f = codecs.open(outfile, 'ab', encoding) + f = codecs.open(outfile, 'ab', p.encoding) print "adding to " + outfile def ensureNewline(s): @@ -458,10 +458,20 @@ class ContentComparer: try: p.readContents(l10n.getContents()) l10n_entities, l10n_map = p.parse() - l10n_ctx = p.ctx except Exception, e: self.notify('error', l10n, str(e)) return + lines = [] + + def _getLine(offset): + if not lines: + lines.append(0) + for m in self.nl.finditer(p.contents): + lines.append(m.end()) + for i in xrange(len(lines), 0, -1): + if offset >= lines[i - 1]: + return (i, offset - lines[i - 1]) + return (1, offset) l10n_list = l10n_map.keys() l10n_list.sort() @@ -491,10 +501,9 @@ class ContentComparer: if isinstance(l10n_entities[l10n_map[item_or_pair]], parser.Junk): junk = l10n_entities[l10n_map[item_or_pair]] - params = (junk.val,) + junk.position() + junk.position(-1) + params = (junk.val,) + junk.span self.notify('error', l10n, - 'Unparsed content "%s" from line %d colum %d' - ' to line %d column %d' % params) + 'Unparsed content "%s" at %d-%d' % params) if self.merge_stage is not None: skips.append(junk) elif self.notify('obsoleteEntity', l10n, @@ -519,17 +528,17 @@ class ContentComparer: for tp, pos, msg, cat in checker.check(refent, l10nent): # compute real src position, if first line, # col needs adjustment + _l, _offset = _getLine(l10nent.val_span[0]) if isinstance(pos, tuple): - _l, col = l10nent.value_position() # line, column if pos[0] == 1: - col = col + pos[1] + col = pos[1] + _offset else: col = pos[1] - _l += pos[0] - 1 + _l += pos[0] - 1 else: - _l, col = l10nent.value_position(pos) - # skip error entities when merging + _l, col = _getLine(l10nent.val_span[0] + pos) + # skip error entities when merging if tp == 'error' and self.merge_stage is not None: skips.append(l10nent) self.notify(tp, l10n, @@ -539,10 +548,7 @@ class ContentComparer: if missing: self.notify('missing', l10n, missing) if self.merge_stage is not None and (missings or skips): - self.merge( - ref[0], ref[1], ref_file, - l10n, missings, skips, l10n_ctx, - p.canMerge, p.encoding) + self.merge(ref[0], ref[1], ref_file, l10n, missings, skips, p) if report: self.notify('report', l10n, report) if obsolete: diff --git a/python/compare-locales/compare_locales/parser.py b/python/compare-locales/compare_locales/parser.py index 0f95aafdec64..a97cf201be00 100644 --- a/python/compare-locales/compare_locales/parser.py +++ b/python/compare-locales/compare_locales/parser.py @@ -3,93 +3,76 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. import re -import bisect import codecs import logging +from HTMLParser import HTMLParser __constructors = [] -class EntityBase(object): +class Entity(object): ''' Abstraction layer for a localizable entity. Currently supported are grammars of the form: 1: pre white space - 2: entity definition - 3: entity key (name) - 4: entity value - 5: post white space + 2: pre comments + 3: entity definition + 4: entity key (name) + 5: entity value + 6: post comment (and white space) in the same line (dtd only) <--[1] - + <--[2] + - <-------[2]---------> + <-------[3]---------><------[6]------> ''' - def __init__(self, ctx, pp, pre_comment, - span, pre_ws_span, def_span, + def __init__(self, contents, pp, + span, pre_ws_span, pre_comment_span, def_span, key_span, val_span, post_span): - self.ctx = ctx + self.contents = contents self.span = span self.pre_ws_span = pre_ws_span + self.pre_comment_span = pre_comment_span self.def_span = def_span self.key_span = key_span self.val_span = val_span self.post_span = post_span self.pp = pp - self.pre_comment = pre_comment pass - def position(self, offset=0): - """Get the 1-based line and column of the character - with given offset into the Entity. - - If offset is negative, return the end of the Entity. - """ - if offset < 0: - pos = self.span[1] - else: - pos = self.span[0] + offset - return self.ctx.lines(pos)[0] - - def value_position(self, offset=0): - """Get the 1-based line and column of the character - with given offset into the value. - - If offset is negative, return the end of the value. - """ - if offset < 0: - pos = self.val_span[1] - else: - pos = self.val_span[0] + offset - return self.ctx.lines(pos)[0] - # getter helpers def get_all(self): - return self.ctx.contents[self.span[0]:self.span[1]] + return self.contents[self.span[0]:self.span[1]] def get_pre_ws(self): - return self.ctx.contents[self.pre_ws_span[0]:self.pre_ws_span[1]] + return self.contents[self.pre_ws_span[0]:self.pre_ws_span[1]] + + def get_pre_comment(self): + return self.contents[self.pre_comment_span[0]: + self.pre_comment_span[1]] def get_def(self): - return self.ctx.contents[self.def_span[0]:self.def_span[1]] + return self.contents[self.def_span[0]:self.def_span[1]] def get_key(self): - return self.ctx.contents[self.key_span[0]:self.key_span[1]] + return self.contents[self.key_span[0]:self.key_span[1]] def get_val(self): - return self.pp(self.ctx.contents[self.val_span[0]:self.val_span[1]]) + return self.pp(self.contents[self.val_span[0]:self.val_span[1]]) def get_raw_val(self): - return self.ctx.contents[self.val_span[0]:self.val_span[1]] + return self.contents[self.val_span[0]:self.val_span[1]] def get_post(self): - return self.ctx.contents[self.post_span[0]:self.post_span[1]] + return self.contents[self.post_span[0]:self.post_span[1]] # getters all = property(get_all) pre_ws = property(get_pre_ws) + pre_comment = property(get_pre_comment) definition = property(get_def) key = property(get_key) val = property(get_val) @@ -100,32 +83,6 @@ class EntityBase(object): return self.key -class Entity(EntityBase): - pass - - -class Comment(EntityBase): - def __init__(self, ctx, span, pre_ws_span, def_span, - post_span): - self.ctx = ctx - self.span = span - self.pre_ws_span = pre_ws_span - self.def_span = def_span - self.post_span = post_span - self.pp = lambda v: v - - @property - def key(self): - return None - - @property - def val(self): - return None - - def __repr__(self): - return self.all - - class Junk(object): ''' An almost-Entity, representing junk data that we didn't parse. @@ -134,28 +91,16 @@ class Junk(object): ''' junkid = 0 - def __init__(self, ctx, span): - self.ctx = ctx + def __init__(self, contents, span): + self.contents = contents self.span = span - self.pre_ws = self.definition = self.post = '' + self.pre_ws = self.pre_comment = self.definition = self.post = '' self.__class__.junkid += 1 self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1]) - def position(self, offset=0): - """Get the 1-based line and column of the character - with given offset into the Entity. - - If offset is negative, return the end of the Entity. - """ - if offset < 0: - pos = self.span[1] - else: - pos = self.span[0] + offset - return self.ctx.lines(pos)[0] - # getter helpers def get_all(self): - return self.ctx.contents[self.span[0]:self.span[1]] + return self.contents[self.span[0]:self.span[1]] # getters all = property(get_all) @@ -165,65 +110,26 @@ class Junk(object): return self.key -class Whitespace(EntityBase): - '''Entity-like object representing an empty file with whitespace, - if allowed - ''' - def __init__(self, ctx, span): - self.ctx = ctx - self.key_span = self.val_span = self.span = span - self.def_span = self.pre_ws_span = (span[0], span[0]) - self.post_span = (span[1], span[1]) - self.pp = lambda v: v - - def __repr__(self): - return self.raw_val - - class Parser: canMerge = True - tail = re.compile('\s+\Z') - - class Context(object): - "Fixture for content and line numbers" - def __init__(self, contents): - self.contents = contents - self._lines = None - - def lines(self, *positions): - # return line and column tuples, 1-based - if self._lines is None: - nl = re.compile('\n', re.M) - self._lines = [m.end() - for m in nl.finditer(self.contents)] - line_nrs = [bisect.bisect(self._lines, p) for p in positions] - # compute columns - pos_ = [ - (1 + line, 1 + p - (self._lines[line-1] if line else 0)) - for line, p in zip(line_nrs, positions)] - return pos_ def __init__(self): if not hasattr(self, 'encoding'): self.encoding = 'utf-8' - self.ctx = None - self.last_comment = None + pass def readFile(self, file): - with open(file, 'rU') as f: - try: - self.readContents(f.read()) - except UnicodeDecodeError, e: - (logging.getLogger('locales') - .error("Can't read file: " + file + '; ' + str(e))) + f = codecs.open(file, 'r', self.encoding) + try: + self.contents = f.read() + except UnicodeDecodeError, e: + (logging.getLogger('locales') + .error("Can't read file: " + file + '; ' + str(e))) + self.contents = u'' + f.close() def readContents(self, contents): - '''Read contents and create parsing context. - - contents are in native encoding, but with normalized line endings. - ''' - (contents, length) = codecs.getdecoder(self.encoding)(contents) - self.ctx = Parser.Context(contents) + (self.contents, length) = codecs.getdecoder(self.encoding)(contents) def parse(self): l = [] @@ -237,57 +143,52 @@ class Parser: return val def __iter__(self): - return self.walk(onlyEntities=True) - - def walk(self, onlyEntities=False): - if not self.ctx: - # loading file failed, or we just didn't load anything - return - ctx = self.ctx - contents = ctx.contents + contents = self.contents offset = 0 - entity, offset = self.getEntity(ctx, offset) + self.header, offset = self.getHeader(contents, offset) + self.footer = '' + entity, offset = self.getEntity(contents, offset) while entity: - if (not onlyEntities or - type(entity) is Entity or - type(entity) is Junk): - yield entity - entity, offset = self.getEntity(ctx, offset) + yield entity + entity, offset = self.getEntity(contents, offset) + f = self.reFooter.match(contents, offset) + if f: + self.footer = f.group() + offset = f.end() if len(contents) > offset: - yield Junk(ctx, (offset, len(contents))) + yield Junk(contents, (offset, len(contents))) + pass - def getEntity(self, ctx, offset): - m = self.reKey.match(ctx.contents, offset) + def getHeader(self, contents, offset): + header = '' + h = self.reHeader.match(contents) + if h: + header = h.group() + offset = h.end() + return (header, offset) + + def getEntity(self, contents, offset): + m = self.reKey.match(contents, offset) if m: offset = m.end() - entity = self.createEntity(ctx, m) + entity = self.createEntity(contents, m) return (entity, offset) - m = self.reComment.match(ctx.contents, offset) + # first check if footer has a non-empty match, + # 'cause then we don't find junk + m = self.reFooter.match(contents, offset) + if m and m.end() > offset: + return (None, offset) + m = self.reKey.search(contents, offset) if m: - offset = m.end() - self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)]) - return (self.last_comment, offset) - return self.getTrailing(ctx, offset, self.reKey, self.reComment) + # we didn't match, but search, so there's junk between offset + # and start. We'll match() on the next turn + junkend = m.start() + return (Junk(contents, (offset, junkend)), junkend) + return (None, offset) - def getTrailing(self, ctx, offset, *expressions): - junkend = None - for exp in expressions: - m = exp.search(ctx.contents, offset) - if m: - junkend = min(junkend, m.start()) if junkend else m.start() - if junkend is None: - if self.tail.match(ctx.contents, offset): - white_end = len(ctx.contents) - return (Whitespace(ctx, (offset, white_end)), white_end) - else: - return (None, offset) - return (Junk(ctx, (offset, junkend)), junkend) - - def createEntity(self, ctx, m): - pre_comment = unicode(self.last_comment) if self.last_comment else '' - self.last_comment = '' - return Entity(ctx, self.postProcessValue, pre_comment, - *[m.span(i) for i in xrange(6)]) + def createEntity(self, contents, m): + return Entity(contents, self.postProcessValue, + *[m.span(i) for i in xrange(7)]) def getParser(path): @@ -329,20 +230,22 @@ class DTDParser(Parser): # [#x0300-#x036F] | [#x203F-#x2040] NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040' Name = '[' + NameStartChar + '][' + NameChar + ']*' - reKey = re.compile('(?:(?P
\s*)(?P' + Name +
+    reKey = re.compile('(?:(?P
\s*)(?P(?:' + XmlComment +
+                       '\s*)*)(?P' + Name +
                        ')\s+(?P\"[^\"]*\"|\'[^\']*\'?)\s*>)'
-                       '(?P\s*)?)',
-                       re.DOTALL | re.M)
+                       '(?P[ \t]*(?:' + XmlComment + '\s*)*\n?)?)',
+                       re.DOTALL)
     # add BOM to DTDs, details in bug 435002
-    reHeader = re.compile(u'^\ufeff')
-    reComment = re.compile('(\s*)()(\s*)' % CharMinusDash,
-                           re.S)
-    rePE = re.compile(u'(?:(\s*)'
-                      u'(\s*%' + Name +
-                      u';)([ \t]*(?:' + XmlComment + u'\s*)*\n?)?)')
+    reHeader = re.compile(u'^\ufeff?'
+                          u'(\s*)?', re.S)
+    reFooter = re.compile('\s*(\s*)*$')
+    rePE = re.compile('(?:(\s*)((?:' + XmlComment + '\s*)*)'
+                      '(\s*%' + Name +
+                      ';)([ \t]*(?:' + XmlComment + '\s*)*\n?)?)')
 
-    def getEntity(self, ctx, offset):
+    def getEntity(self, contents, offset):
         '''
         Overload Parser.getEntity to special-case ParsedEntities.
         Just check for a parsed entity if that method claims junk.
@@ -350,26 +253,20 @@ class DTDParser(Parser):
         
         %foo;
         '''
-        if offset is 0 and self.reHeader.match(ctx.contents):
-            offset += 1
-        entity, inneroffset = Parser.getEntity(self, ctx, offset)
+        entity, inneroffset = Parser.getEntity(self, contents, offset)
         if (entity and isinstance(entity, Junk)) or entity is None:
-            m = self.rePE.match(ctx.contents, offset)
+            m = self.rePE.match(contents, offset)
             if m:
                 inneroffset = m.end()
-                self.last_comment = ''
-                entity = Entity(ctx, self.postProcessValue, '',
-                                *[m.span(i) for i in xrange(6)])
+                entity = Entity(contents, self.postProcessValue,
+                                *[m.span(i) for i in xrange(7)])
         return (entity, inneroffset)
 
-    def createEntity(self, ctx, m):
+    def createEntity(self, contents, m):
         valspan = m.span('val')
         valspan = (valspan[0]+1, valspan[1]-1)
-        pre_comment = unicode(self.last_comment) if self.last_comment else ''
-        self.last_comment = ''
-        return Entity(ctx, self.postProcessValue, pre_comment,
-                      m.span(),
-                      m.span('pre'),
+        return Entity(contents, self.postProcessValue, m.span(),
+                      m.span('pre'), m.span('precomment'),
                       m.span('entity'), m.span('key'), valspan,
                       m.span('post'))
 
@@ -381,30 +278,30 @@ class PropertiesParser(Parser):
 
     def __init__(self):
         self.reKey = re.compile('^(\s*)'
+                                '((?:[#!].*?\n\s*)*)'
                                 '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
-        self.reComment = re.compile('(\s*)(((?:[#!][^\n]*\n?)+))', re.M)
+        self.reHeader = re.compile('^\s*([#!].*\s*)+')
+        self.reFooter = re.compile('\s*([#!].*\s*)*$')
         self._escapedEnd = re.compile(r'\\+$')
-        self._trailingWS = re.compile(r'\s*(?:\n|\Z)', re.M)
+        self._trailingWS = re.compile(r'[ \t]*$')
         Parser.__init__(self)
 
-    def getEntity(self, ctx, offset):
+    def getHeader(self, contents, offset):
+        header = ''
+        h = self.reHeader.match(contents, offset)
+        if h:
+            candidate = h.group()
+            if 'http://mozilla.org/MPL/2.0/' in candidate or \
+                    'LICENSE BLOCK' in candidate:
+                header = candidate
+                offset = h.end()
+        return (header, offset)
+
+    def getEntity(self, contents, offset):
         # overwritten to parse values line by line
-        contents = ctx.contents
-        m = self.reComment.match(contents, offset)
-        if m:
-            spans = [m.span(i) for i in xrange(3)]
-            start_trailing = offset = m.end()
-            while offset < len(contents):
-                m = self._trailingWS.match(contents, offset)
-                if not m:
-                    break
-                offset = m.end()
-            spans.append((start_trailing, offset))
-            self.last_comment = Comment(ctx, *spans)
-            return (self.last_comment, offset)
         m = self.reKey.match(contents, offset)
         if m:
-            startline = offset = m.end()
+            offset = m.end()
             while True:
                 endval = nextline = contents.find('\n', offset)
                 if nextline == -1:
@@ -418,24 +315,26 @@ class PropertiesParser(Parser):
                 # backslashes at end of line, if 2*n, not escaped
                 if len(_e.group()) % 2 == 0:
                     break
-                startline = offset
             # strip trailing whitespace
-            ws = self._trailingWS.search(contents, startline)
+            ws = self._trailingWS.search(contents, m.end(), offset)
             if ws:
-                endval = ws.start()
-                offset = ws.end()
-            pre_comment = (unicode(self.last_comment) if self.last_comment
-                           else '')
-            self.last_comment = ''
-            entity = Entity(ctx, self.postProcessValue, pre_comment,
+                endval -= ws.end() - ws.start()
+            entity = Entity(contents, self.postProcessValue,
                             (m.start(), offset),   # full span
                             m.span(1),  # leading whitespan
-                            (m.start(2), offset),   # entity def span
-                            m.span(2),   # key span
+                            m.span(2),  # leading comment span
+                            (m.start(3), offset),   # entity def span
+                            m.span(3),   # key span
                             (m.end(), endval),   # value span
                             (offset, offset))  # post comment span, empty
             return (entity, offset)
-        return self.getTrailing(ctx, offset, self.reKey, self.reComment)
+        m = self.reKey.search(contents, offset)
+        if m:
+            # we didn't match, but search, so there's junk between offset
+            # and start. We'll match() on the next turn
+            junkend = m.start()
+            return (Junk(contents, (offset, junkend)), junkend)
+        return (None, offset)
 
     def postProcessValue(self, val):
 
@@ -450,77 +349,18 @@ class PropertiesParser(Parser):
         return val
 
 
-class DefinesInstruction(EntityBase):
-    '''Entity-like object representing processing instructions in inc files
-    '''
-    def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
-        self.ctx = ctx
-        self.span = span
-        self.pre_ws_span = pre_ws_span
-        self.def_span = def_span
-        self.key_span = self.val_span = val_span
-        self.post_span = post_span
-        self.pp = lambda v: v
-
-    def __repr__(self):
-        return self.raw_val
-
-
 class DefinesParser(Parser):
     # can't merge, #unfilter needs to be the last item, which we don't support
     canMerge = False
-    tail = re.compile(r'(?!)')  # never match
 
     def __init__(self):
-        self.reComment = re.compile(
-            '((?:[ \t]*\n)*)'
-            '((?:^# .*?(?:\n|\Z))+)'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reKey = re.compile('((?:[ \t]*\n)*)'
-                                '(#define[ \t]+(\w+)(?:[ \t](.*?))?(?:\n|\Z))'
-                                '((?:[ \t]*(?:\n|\Z))*)',
+        self.reKey = re.compile('^(\s*)((?:^#(?!define\s).*\s*)*)'
+                                '(#define[ \t]+(\w+)[ \t]+(.*?))([ \t]*$\n?)',
                                 re.M)
-        self.rePI = re.compile('((?:[ \t]*\n)*)'
-                               '(#(\w+)[ \t]+(.*?)(?:\n|\Z))'
-                               '((?:[ \t]*(?:\n|\Z))*)',
-                               re.M)
+        self.reHeader = re.compile('^\s*(#(?!define\s).*\s*)*')
+        self.reFooter = re.compile('\s*(#(?!define\s).*\s*)*$', re.M)
         Parser.__init__(self)
 
-    def getEntity(self, ctx, offset):
-        contents = ctx.contents
-        m = self.reComment.match(contents, offset)
-        if m:
-            offset = m.end()
-            self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
-            return (self.last_comment, offset)
-        m = self.reKey.match(contents, offset)
-        if m:
-            offset = m.end()
-            return (self.createEntity(ctx, m), offset)
-        m = self.rePI.match(contents, offset)
-        if m:
-            offset = m.end()
-            return (DefinesInstruction(ctx, *[m.span(i) for i in xrange(5)]),
-                    offset)
-        return self.getTrailing(ctx, offset,
-                                self.reComment, self.reKey, self.rePI)
-
-
-class IniSection(EntityBase):
-    '''Entity-like object representing sections in ini files
-    '''
-    def __init__(self, ctx, span, pre_ws_span, def_span, val_span, post_span):
-        self.ctx = ctx
-        self.span = span
-        self.pre_ws_span = pre_ws_span
-        self.def_span = def_span
-        self.key_span = self.val_span = val_span
-        self.post_span = post_span
-        self.pp = lambda v: v
-
-    def __repr__(self):
-        return self.raw_val
-
 
 class IniParser(Parser):
     '''
@@ -533,40 +373,149 @@ class IniParser(Parser):
     ...
     '''
     def __init__(self):
-        self.reComment = re.compile(
-            '((?:[ \t]*\n)*)'
-            '((?:^[;#].*?(?:\n|\Z))+)'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reSection = re.compile(
-            '((?:[ \t]*\n)*)'
-            '(\[(.*?)\])'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
-        self.reKey = re.compile(
-            '((?:[ \t]*\n)*)'
-            '((.+?)=(.*))'
-            '((?:[ \t]*(?:\n|\Z))*)', re.M)
+        self.reHeader = re.compile('^((?:\s*|[;#].*)\n)*\[.+?\]\n', re.M)
+        self.reKey = re.compile('(\s*)((?:[;#].*\n\s*)*)((.+?)=(.*))(\n?)')
+        self.reFooter = re.compile('\s*([;#].*\s*)*$')
         Parser.__init__(self)
 
-    def getEntity(self, ctx, offset):
-        contents = ctx.contents
-        m = self.reComment.match(contents, offset)
-        if m:
-            offset = m.end()
-            self.last_comment = Comment(ctx, *[m.span(i) for i in xrange(4)])
-            return (self.last_comment, offset)
-        m = self.reSection.match(contents, offset)
-        if m:
-            offset = m.end()
-            return (IniSection(ctx, *[m.span(i) for i in xrange(5)]), offset)
-        m = self.reKey.match(contents, offset)
-        if m:
-            offset = m.end()
-            return (self.createEntity(ctx, m), offset)
-        return self.getTrailing(ctx, offset,
-                                self.reComment, self.reSection, self.reKey)
+
+DECL, COMMENT, START, END, CONTENT = range(5)
+
+
+class BookmarksParserInner(HTMLParser):
+
+    class Token(object):
+        _type = None
+        content = ''
+
+        def __str__(self):
+            return self.content
+
+    class DeclToken(Token):
+        _type = DECL
+
+        def __init__(self, decl):
+            self.content = decl
+            pass
+
+        def __str__(self):
+            return '' % self.content
+        pass
+
+    class CommentToken(Token):
+        _type = COMMENT
+
+        def __init__(self, comment):
+            self.content = comment
+            pass
+
+        def __str__(self):
+            return '' % self.content
+        pass
+
+    class StartToken(Token):
+        _type = START
+
+        def __init__(self, tag, attrs, content):
+            self.tag = tag
+            self.attrs = dict(attrs)
+            self.content = content
+            pass
+        pass
+
+    class EndToken(Token):
+        _type = END
+
+        def __init__(self, tag):
+            self.tag = tag
+            pass
+
+        def __str__(self):
+            return '' % self.tag.upper()
+        pass
+
+    class ContentToken(Token):
+        _type = CONTENT
+
+        def __init__(self, content):
+            self.content = content
+            pass
+        pass
+
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.tokens = []
+
+    def parse(self, contents):
+        self.tokens = []
+        self.feed(contents)
+        self.close()
+        return self.tokens
+
+    # Called when we hit an end DL tag to reset the folder selections
+    def handle_decl(self, decl):
+        self.tokens.append(self.DeclToken(decl))
+
+    # Called when we hit an end DL tag to reset the folder selections
+    def handle_comment(self, comment):
+        self.tokens.append(self.CommentToken(comment))
+
+    def handle_starttag(self, tag, attrs):
+        self.tokens.append(self.StartToken(tag, attrs,
+                                           self.get_starttag_text()))
+
+    # Called when text data is encountered
+    def handle_data(self, data):
+        if self.tokens[-1]._type == CONTENT:
+            self.tokens[-1].content += data
+        else:
+            self.tokens.append(self.ContentToken(data))
+
+    def handle_charref(self, data):
+        self.handle_data('&#%s;' % data)
+
+    def handle_entityref(self, data):
+        self.handle_data('&%s;' % data)
+
+    # Called when we hit an end DL tag to reset the folder selections
+    def handle_endtag(self, tag):
+        self.tokens.append(self.EndToken(tag))
+
+
+class BookmarksParser(Parser):
+    canMerge = False
+
+    class BMEntity(object):
+        def __init__(self, key, val):
+            self.key = key
+            self.val = val
+
+    def __iter__(self):
+        p = BookmarksParserInner()
+        tks = p.parse(self.contents)
+        i = 0
+        k = []
+        for i in xrange(len(tks)):
+            t = tks[i]
+            if t._type == START:
+                k.append(t.tag)
+                keys = t.attrs.keys()
+                keys.sort()
+                for attrname in keys:
+                    yield self.BMEntity('.'.join(k) + '.@' + attrname,
+                                        t.attrs[attrname])
+                if i + 1 < len(tks) and tks[i+1]._type == CONTENT:
+                    i += 1
+                    t = tks[i]
+                    v = t.content.strip()
+                    if v:
+                        yield self.BMEntity('.'.join(k), v)
+            elif t._type == END:
+                k.pop()
 
 
 __constructors = [('\\.dtd$', DTDParser()),
                   ('\\.properties$', PropertiesParser()),
                   ('\\.ini$', IniParser()),
-                  ('\\.inc$', DefinesParser())]
+                  ('\\.inc$', DefinesParser()),
+                  ('bookmarks\\.html$', BookmarksParser())]
diff --git a/python/compare-locales/compare_locales/tests/__init__.py b/python/compare-locales/compare_locales/tests/__init__.py
index 8cfdd9dbbe09..8808d78f4c0f 100644
--- a/python/compare-locales/compare_locales/tests/__init__.py
+++ b/python/compare-locales/compare_locales/tests/__init__.py
@@ -9,7 +9,7 @@ from itertools import izip_longest
 from pkg_resources import resource_string
 import re
 
-from compare_locales import parser
+from compare_locales.parser import getParser
 
 
 class ParserTestMixin():
@@ -20,7 +20,7 @@ class ParserTestMixin():
     def setUp(self):
         '''Create a parser for this test.
         '''
-        self.parser = parser.getParser(self.filename)
+        self.parser = getParser(self.filename)
 
     def tearDown(self):
         'tear down this test'
@@ -38,13 +38,12 @@ class ParserTestMixin():
         of reference keys and values.
         '''
         self.parser.readContents(content)
-        entities = list(self.parser.walk())
+        entities = [entity for entity in self.parser]
         for entity, ref in izip_longest(entities, refs):
-            self.assertTrue(entity, 'excess reference entity ' + unicode(ref))
-            self.assertTrue(ref, 'excess parsed entity ' + unicode(entity))
-            if isinstance(entity, parser.Entity):
-                self.assertEqual(entity.key, ref[0])
-                self.assertEqual(entity.val, ref[1])
+            self.assertTrue(entity, 'excess reference entity')
+            self.assertTrue(ref, 'excess parsed entity')
+            self.assertEqual(entity.val, ref[1])
+            if ref[0].startswith('_junk'):
+                self.assertTrue(re.match(ref[0], entity.key))
             else:
-                self.assertEqual(type(entity).__name__, ref[0])
-                self.assertIn(ref[1], entity.all)
+                self.assertEqual(entity.key, ref[0])
diff --git a/python/compare-locales/compare_locales/tests/test_checks.py b/python/compare-locales/compare_locales/tests/test_checks.py
index 3871d9d3ee6e..b995d43f9000 100644
--- a/python/compare-locales/compare_locales/tests/test_checks.py
+++ b/python/compare-locales/compare_locales/tests/test_checks.py
@@ -6,7 +6,7 @@
 import unittest
 
 from compare_locales.checks import getChecker
-from compare_locales.parser import getParser, Parser, Entity
+from compare_locales.parser import getParser, Entity
 from compare_locales.paths import File
 
 
@@ -239,16 +239,14 @@ class TestAndroid(unittest.TestCase):
                u"\\u0022, or put string in apostrophes."
 
     def getEntity(self, v):
-        ctx = Parser.Context(v)
-        return Entity(ctx, lambda s: s, '', (0, len(v)), (), (), (),
+        return Entity(v, lambda s: s, (0, len(v)), (), (0, 0), (), (),
                       (0, len(v)), ())
 
     def getDTDEntity(self, v):
         v = v.replace('"', '"')
-        ctx = Parser.Context('' % v)
-        return Entity(ctx,
-                      lambda s: s, '',
-                      (0, len(v) + 16), (), (), (9, 12),
+        return Entity('' % v,
+                      lambda s: s,
+                      (0, len(v) + 16), (), (0, 0), (), (9, 12),
                       (14, len(v) + 14), ())
 
     def test_android_dtd(self):
diff --git a/python/compare-locales/compare_locales/tests/test_defines.py b/python/compare-locales/compare_locales/tests/test_defines.py
deleted file mode 100644
index 03a864dad282..000000000000
--- a/python/compare-locales/compare_locales/tests/test_defines.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# -*- coding: utf-8 -*-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-import unittest
-
-from compare_locales.tests import ParserTestMixin
-
-
-mpl2 = '''\
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this file,
-# You can obtain one at http://mozilla.org/MPL/2.0/.
-'''
-
-
-class TestDefinesParser(ParserTestMixin, unittest.TestCase):
-
-    filename = 'defines.inc'
-
-    def testBrowser(self):
-        self._test(mpl2 + '''#filter emptyLines
-
-#define MOZ_LANGPACK_CREATOR mozilla.org
-
-# If non-English locales wish to credit multiple contributors, uncomment this
-# variable definition and use the format specified.
-# #define MOZ_LANGPACK_CONTRIBUTORS Joe Solon
-
-#unfilter emptyLines
-
-''', (
-            ('Comment', mpl2),
-            ('DefinesInstruction', 'filter emptyLines'),
-            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
-            ('Comment', '#define'),
-            ('DefinesInstruction', 'unfilter emptyLines')))
-
-    def testBrowserWithContributors(self):
-        self._test(mpl2 + '''#filter emptyLines
-
-#define MOZ_LANGPACK_CREATOR mozilla.org
-
-# If non-English locales wish to credit multiple contributors, uncomment this
-# variable definition and use the format specified.
-#define MOZ_LANGPACK_CONTRIBUTORS Joe Solon
-
-#unfilter emptyLines
-
-''', (
-            ('Comment', mpl2),
-            ('DefinesInstruction', 'filter emptyLines'),
-            ('MOZ_LANGPACK_CREATOR', 'mozilla.org'),
-            ('Comment', 'non-English'),
-            ('MOZ_LANGPACK_CONTRIBUTORS',
-             'Joe Solon'),
-            ('DefinesInstruction', 'unfilter emptyLines')))
-
-    def testCommentWithNonAsciiCharacters(self):
-        self._test(mpl2 + '''#filter emptyLines
-
-# e.g. #define seamonkey_l10n 
SeaMonkey v češtině -#define seamonkey_l10n_long - -#unfilter emptyLines - -''', ( - ('Comment', mpl2), - ('DefinesInstruction', 'filter emptyLines'), - ('Comment', u'češtině'), - ('seamonkey_l10n_long', ''), - ('DefinesInstruction', 'unfilter emptyLines'))) - - def testToolkit(self): - self._test('''#define MOZ_LANG_TITLE English (US) -''', ( - ('MOZ_LANG_TITLE', 'English (US)'),)) - - def testToolkitEmpty(self): - self._test('', tuple()) - - def test_empty_file(self): - '''Test that empty files generate errors - - defines.inc are interesting that way, as their - content is added to the generated file. - ''' - self._test('\n', (('Junk', '\n'),)) - self._test('\n\n', (('Junk', '\n\n'),)) - self._test(' \n\n', (('Junk', ' \n\n'),)) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_dtd.py b/python/compare-locales/compare_locales/tests/test_dtd.py index 3c257b2dd731..87ddcde30135 100644 --- a/python/compare-locales/compare_locales/tests/test_dtd.py +++ b/python/compare-locales/compare_locales/tests/test_dtd.py @@ -8,7 +8,7 @@ import unittest import re -from compare_locales import parser +from compare_locales.parser import getParser from compare_locales.tests import ParserTestMixin @@ -30,9 +30,9 @@ class TestDTD(ParserTestMixin, unittest.TestCase): ''' quoteRef = ( ('good.one', 'one'), - ('Junk', ''), + ('_junk_\\d_25-56$', ''), ('good.two', 'two'), - ('Junk', ''), + ('_junk_\\d_82-119$', ''), ('good.three', 'three'), ('good.four', 'good \' quote'), ('good.five', 'good \'quoted\' word'),) @@ -62,68 +62,25 @@ class TestDTD(ParserTestMixin, unittest.TestCase): --> ''', - (('first', 'string'), ('second', 'string'), - ('Comment', 'out'))) + (('first', 'string'), ('second', 'string'))) def test_license_header(self): - p = parser.getParser('foo.dtd') + p = getParser('foo.dtd') p.readContents(self.resource('triple-license.dtd')) - entities = list(p.walk()) - self.assert_(isinstance(entities[0], parser.Comment)) - self.assertIn('MPL', entities[0].all) - e = entities[1] - self.assert_(isinstance(e, parser.Entity)) - self.assertEqual(e.key, 'foo') - self.assertEqual(e.val, 'value') - self.assertEqual(len(entities), 2) + for e in p: + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assert_('MPL' in p.header) p.readContents('''\ ''') - entities = list(p.walk()) - self.assert_(isinstance(entities[0], parser.Comment)) - self.assertIn('MPL', entities[0].all) - e = entities[1] - self.assert_(isinstance(e, parser.Entity)) - self.assertEqual(e.key, 'foo') - self.assertEqual(e.val, 'value') - self.assertEqual(len(entities), 2) - - def testBOM(self): - self._test(u'\ufeff'.encode('utf-8'), - (('foo.label', 'stuff'),)) - - def test_trailing_whitespace(self): - self._test('\n \n', - (('foo.label', 'stuff'),)) - - def test_unicode_comment(self): - self._test('', - (('Comment', u'\u53d6'),)) - - def test_empty_file(self): - self._test('', tuple()) - self._test('\n', (('Whitespace', '\n'),)) - self._test('\n\n', (('Whitespace', '\n\n'),)) - self._test(' \n\n', (('Whitespace', ' \n\n'),)) - - def test_positions(self): - self.parser.readContents('''\ - - -''') - one, two = list(self.parser) - self.assertEqual(one.position(), (1, 1)) - self.assertEqual(one.value_position(), (1, 16)) - self.assertEqual(one.position(-1), (2, 1)) - self.assertEqual(two.position(), (2, 1)) - self.assertEqual(two.value_position(), (2, 16)) - self.assertEqual(two.value_position(-1), (3, 14)) - self.assertEqual(two.value_position(10), (3, 5)) - + for e in p: + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assert_('MPL' in p.header) if __name__ == '__main__': unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_ini.py b/python/compare-locales/compare_locales/tests/test_ini.py index 1f10dd9ece34..4c8cc03e1595 100644 --- a/python/compare-locales/compare_locales/tests/test_ini.py +++ b/python/compare-locales/compare_locales/tests/test_ini.py @@ -23,30 +23,23 @@ class TestIniParser(ParserTestMixin, unittest.TestCase): self._test('''; This file is in the UTF-8 encoding [Strings] TitleText=Some Title -''', ( - ('Comment', 'UTF-8 encoding'), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'),)) +''', (('TitleText', 'Some Title'),)) + self.assert_('UTF-8' in self.parser.header) def testMPL2_Space_UTF(self): self._test(mpl2 + ''' ; This file is in the UTF-8 encoding [Strings] TitleText=Some Title -''', ( - ('Comment', mpl2), - ('Comment', 'UTF-8'), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'),)) +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) def testMPL2_Space(self): self._test(mpl2 + ''' [Strings] TitleText=Some Title -''', ( - ('Comment', mpl2), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'),)) +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) def testMPL2_MultiSpace(self): self._test(mpl2 + '''\ @@ -55,33 +48,26 @@ TitleText=Some Title [Strings] TitleText=Some Title -''', ( - ('Comment', mpl2), - ('Comment', 'more comments'), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'),)) +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) def testMPL2_JunkBeforeCategory(self): self._test(mpl2 + '''\ Junk [Strings] TitleText=Some Title -''', ( - ('Comment', mpl2), - ('Junk', 'Junk'), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'))) +''', (('_junk_\\d+_0-213$', mpl2 + '''\ +Junk +[Strings]'''), ('TitleText', 'Some Title'))) + self.assert_('MPL' not in self.parser.header) def test_TrailingComment(self): self._test(mpl2 + ''' [Strings] TitleText=Some Title ;Stray trailing comment -''', ( - ('Comment', mpl2), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'), - ('Comment', 'Stray trailing'))) +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) def test_SpacedTrailingComments(self): self._test(mpl2 + ''' @@ -91,11 +77,8 @@ TitleText=Some Title ;Stray trailing comment ;Second stray comment -''', ( - ('Comment', mpl2), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'), - ('Comment', 'Second stray comment'))) +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) def test_TrailingCommentsAndJunk(self): self._test(mpl2 + ''' @@ -106,13 +89,14 @@ TitleText=Some Title Junk ;Second stray comment -''', ( - ('Comment', mpl2), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'), - ('Comment', 'Stray trailing'), - ('Junk', 'Junk'), - ('Comment', 'Second stray comment'))) +''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-284$', '''\ + +;Stray trailing comment +Junk +;Second stray comment + +'''))) + self.assert_('MPL' in self.parser.header) def test_JunkInbetweenEntries(self): self._test(mpl2 + ''' @@ -122,18 +106,10 @@ TitleText=Some Title Junk Good=other string -''', ( - ('Comment', mpl2), - ('IniSection', 'Strings'), - ('TitleText', 'Some Title'), - ('Junk', 'Junk'), - ('Good', 'other string'))) +''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-236$', '''\ - def test_empty_file(self): - self._test('', tuple()) - self._test('\n', (('Whitespace', '\n'),)) - self._test('\n\n', (('Whitespace', '\n\n'),)) - self._test(' \n\n', (('Whitespace', ' \n\n'),)) +Junk'''), ('Good', 'other string'))) + self.assert_('MPL' in self.parser.header) if __name__ == '__main__': unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_merge.py b/python/compare-locales/compare_locales/tests/test_merge.py index dda0d671c8d4..c006edbb5d2d 100644 --- a/python/compare-locales/compare_locales/tests/test_merge.py +++ b/python/compare-locales/compare_locales/tests/test_merge.py @@ -13,6 +13,7 @@ from compare_locales.compare import ContentComparer class ContentMixin(object): + maxDiff = None # we got big dictionaries to compare extension = None # OVERLOAD def reference(self, content): @@ -28,7 +29,6 @@ class TestProperties(unittest.TestCase, ContentMixin): extension = '.properties' def setUp(self): - self.maxDiff = None self.tmp = mkdtemp() os.mkdir(os.path.join(self.tmp, "merge")) @@ -98,8 +98,7 @@ eff = effVal""") self.reference("""foo = fooVal bar = %d barVal eff = effVal""") - self.localized("""\ -bar = %S lBar + self.localized("""bar = %S lBar eff = leffVal """) cc = ContentComparer() @@ -117,7 +116,7 @@ eff = leffVal ('l10n.properties', {'value': { 'error': [u'argument 1 `S` should be `d` ' - u'at line 1, column 7 for bar'], + u'at line 1, column 6 for bar'], 'missingEntity': [u'foo']}} ) ]} @@ -161,7 +160,6 @@ class TestDTD(unittest.TestCase, ContentMixin): extension = '.dtd' def setUp(self): - self.maxDiff = None self.tmp = mkdtemp() os.mkdir(os.path.join(self.tmp, "merge")) @@ -250,9 +248,7 @@ class TestDTD(unittest.TestCase, ContentMixin): ('l10n.dtd', {'value': { 'error': [u'Unparsed content "" ' - u'from line 2 colum 1 to ' - u'line 2 column 22'], + u'\'gimmick\'>" at 23-44'], 'missingEntity': [u'bar']}} ) ]} diff --git a/python/compare-locales/compare_locales/tests/test_parser.py b/python/compare-locales/compare_locales/tests/test_parser.py deleted file mode 100644 index b8f632382ede..000000000000 --- a/python/compare-locales/compare_locales/tests/test_parser.py +++ /dev/null @@ -1,44 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -import unittest - -from compare_locales import parser - - -class TestParserContext(unittest.TestCase): - def test_lines(self): - "Test that Parser.Context.lines returns 1-based tuples" - ctx = parser.Parser.Context('''first line -second line -third line -''') - self.assertEqual( - ctx.lines(0, 1), - [(1, 1), (1, 2)] - ) - self.assertEqual( - ctx.lines(len('first line')), - [(1, len('first line') + 1)] - ) - self.assertEqual( - ctx.lines(len('first line') + 1), - [(2, 1)] - ) - self.assertEqual( - ctx.lines(len(ctx.contents)), - [(4, 1)] - ) - - def test_empty_parser(self): - p = parser.Parser() - entities, _map = p.parse() - self.assertListEqual( - entities, - [] - ) - self.assertDictEqual( - _map, - {} - ) diff --git a/python/compare-locales/compare_locales/tests/test_properties.py b/python/compare-locales/compare_locales/tests/test_properties.py index 62cd4b213a99..331a1a57c7c9 100644 --- a/python/compare-locales/compare_locales/tests/test_properties.py +++ b/python/compare-locales/compare_locales/tests/test_properties.py @@ -24,7 +24,7 @@ and still has another line coming ('one_line', 'This is one line'), ('two_line', u'This is the first of two lines'), ('one_line_trailing', u'This line ends in \\'), - ('Junk', 'and has junk\n'), + ('_junk_\\d+_113-126$', 'and has junk\n'), ('two_lines_triple', 'This line is one of two and ends in \\' 'and still has another line coming'))) @@ -63,7 +63,8 @@ and an end''', (('bar', 'one line with a # part that looks like a comment ' # file, You can obtain one at http://mozilla.org/MPL/2.0/. foo=value -''', (('Comment', 'MPL'), ('foo', 'value'))) +''', (('foo', 'value'),)) + self.assert_('MPL' in self.parser.header) def test_escapes(self): self.parser.readContents(r''' @@ -87,64 +88,8 @@ second = string # #commented out -''', (('first', 'string'), ('second', 'string'), - ('Comment', 'commented out'))) +''', (('first', 'string'), ('second', 'string'))) - def test_trailing_newlines(self): - self._test('''\ -foo = bar - -\x20\x20 - ''', (('foo', 'bar'),)) - - def test_just_comments(self): - self._test('''\ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# LOCALIZATION NOTE These strings are used inside the Promise debugger -# which is available as a panel in the Debugger. -''', (('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE'))) - - def test_just_comments_without_trailing_newline(self): - self._test('''\ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -# LOCALIZATION NOTE These strings are used inside the Promise debugger -# which is available as a panel in the Debugger.''', ( - ('Comment', 'MPL'), ('Comment', 'LOCALIZATION NOTE'))) - - def test_trailing_comment_and_newlines(self): - self._test('''\ -# LOCALIZATION NOTE These strings are used inside the Promise debugger -# which is available as a panel in the Debugger. - - - -''', (('Comment', 'LOCALIZATION NOTE'),)) - - def test_empty_file(self): - self._test('', tuple()) - self._test('\n', (('Whitespace', '\n'),)) - self._test('\n\n', (('Whitespace', '\n\n'),)) - self._test(' \n\n', (('Whitespace', ' \n\n'),)) - - def test_positions(self): - self.parser.readContents('''\ -one = value -two = other \\ -escaped value -''') - one, two = list(self.parser) - self.assertEqual(one.position(), (1, 1)) - self.assertEqual(one.value_position(), (1, 7)) - self.assertEqual(two.position(), (2, 1)) - self.assertEqual(two.value_position(), (2, 7)) - self.assertEqual(two.value_position(-1), (3, 14)) - self.assertEqual(two.value_position(10), (3, 3)) if __name__ == '__main__': unittest.main() diff --git a/python/compare-locales/compare_locales/webapps.py b/python/compare-locales/compare_locales/webapps.py index 620736320b05..42f5b56578aa 100644 --- a/python/compare-locales/compare_locales/webapps.py +++ b/python/compare-locales/compare_locales/webapps.py @@ -83,7 +83,7 @@ class Manifest(object): except (ValueError, IOError), e: if self.watcher: self.watcher.notify('error', self.file, str(e)) - return {} + return False return self.extract_manifest_strings(manifest) def extract_manifest_strings(self, manifest_fragment):