Bug 1065306 - Post: Handle utf-8 files in DotProperties. r=gps

This commit is contained in:
Nick Alexander 2014-10-01 23:23:30 -07:00
parent 81cf36b2a2
commit 860617fb40
4 changed files with 59 additions and 9 deletions

View File

@ -8,6 +8,7 @@
from __future__ import unicode_literals
import codecs
import re
import os
import sys
@ -31,7 +32,7 @@ class DotProperties:
Ignores empty lines and comment lines.'''
if isinstance(file, str_type):
f = open(file, 'rt')
f = codecs.open(file, 'r', 'utf-8')
else:
f = file

View File

@ -0,0 +1,12 @@
# A region.properties file with invalid unicode byte sequences. The
# sequences were cribbed from Markus Kuhn's "UTF-8 decoder capability
# and stress test", available at
# http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
# 3.5 Impossible bytes |
# |
# The following two bytes cannot appear in a correct UTF-8 string |
# |
# 3.5.1 fe = "þ" |
# 3.5.2 ff = "ÿ" |
# 3.5.3 fe fe ff ff = "þþÿÿ" |

View File

@ -0,0 +1,11 @@
# A region.properties file with unicode characters.
# Danish.
# #### ~~ Søren Munk Skrøder, sskroeder - 2009-05-30 @ #mozmae
# Korean.
A.title=한메일
# Russian.
list.0 = test
list.1 = Яндекс

View File

@ -1,15 +1,14 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import sys
import unittest
from StringIO import StringIO
import mozpack.path as mozpath
from mozbuild.dotproperties import (
DotProperties,
)
@ -18,10 +17,8 @@ from mozunit import (
main,
)
if sys.version_info[0] == 3:
str_type = 'str'
else:
str_type = 'unicode'
test_data_path = mozpath.abspath(mozpath.dirname(__file__))
test_data_path = mozpath.join(test_data_path, 'data')
class TestDotProperties(unittest.TestCase):
@ -84,6 +81,35 @@ B.url=url B
with self.assertRaises(ValueError):
p.get_dict('missing', required_keys=['key'])
def test_unicode(self):
contents = StringIO('''
# Danish.
# #### ~~ Søren Munk Skrøder, sskroeder - 2009-05-30 @ #mozmae
# Korean.
A.title=한메일
# Russian.
list.0 = test
list.1 = Яндекс
''')
p = DotProperties(contents)
self.assertEqual(p.get_dict('A'), {'title': '한메일'})
self.assertEqual(p.get_list('list'), ['test', 'Яндекс'])
def test_valid_unicode_from_file(self):
# The contents of valid.properties is identical to the contents of the
# test above. This specifically exercises reading from a file.
p = DotProperties(os.path.join(test_data_path, 'valid.properties'))
self.assertEqual(p.get_dict('A'), {'title': '한메일'})
self.assertEqual(p.get_list('list'), ['test', 'Яндекс'])
def test_bad_unicode_from_file(self):
# The contents of bad.properties is not valid Unicode; see the comments
# in the file itself for details.
with self.assertRaises(UnicodeDecodeError):
DotProperties(os.path.join(test_data_path, 'bad.properties'))
if __name__ == '__main__':
main()