diff --git a/MANIFEST.in b/MANIFEST.in index 843ef4a..7db390c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ include g2p_en/homographs.en -include g2p_en/logdir/* +include g2p_en/checkpoint20.npz \ No newline at end of file diff --git a/README.md b/README.md index 0cceeea..1f3cffd 100644 --- a/README.md +++ b/README.md @@ -53,15 +53,20 @@ nltk package will be automatically downloaded at your first run. ## Usage - from g2p_en import g2p - - text = "I refuse to collect the refuse around here." - print(g2p(text)) - >>>['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'EH1', 'R', '.'] - - text = "I am an activationist." - print(g2p(text)) - >>>['AY1', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'AH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T'] + from g2p_en import G2p + + texts = ["I have $250 in my pocket.", # number -> spell-out + "popular pets, e.g. cats and dogs", # e.g. -> for example + "I refuse to collect the refuse around here.", # homograph + "I'm an activationist."] # newly coined word + g2p = G2p() + for text in texts: + out = g2p(text) + print(out) + >>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.'] + >>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z'] + >>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.'] + >>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.'] May, 2018. diff --git a/README.rst b/README.rst index cd7a01c..ad2a3a4 100644 --- a/README.rst +++ b/README.rst @@ -82,15 +82,20 @@ Usage :: - from g2p_en import g2p - - text = "I refuse to collect the refuse around here." - print(g2p(text)) - >>>['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'EH1', 'R', '.'] - - text = "I am an activationist." - print(g2p(text)) - >>>['AY1', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'AH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T'] + from g2p_en import G2p + + texts = ["I have $250 in my pocket.", # number -> spell-out + "popular pets, e.g. cats and dogs", # e.g. -> for example + "I refuse to collect the refuse around here.", # homograph + "I'm an activationist."] # newly coined word + g2p = G2p() + for text in texts: + out = g2p(text) + print(out) + >>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.'] + >>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z'] + >>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.'] + >>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.'] May, 2018. diff --git a/g2p_en.egg-info/PKG-INFO b/g2p_en.egg-info/PKG-INFO index 7435ad9..cd03da2 100644 --- a/g2p_en.egg-info/PKG-INFO +++ b/g2p_en.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: g2p-en -Version: 1.0.0 +Version: 2.0.1 Summary: A Simple Python Module for English Grapheme To Phoneme Conversion Home-page: https://github.com/Kyubyong/g2p Author: Kyubyong Park & Jongseok Kim @@ -10,6 +10,8 @@ Download-URL: https://github.com/Kyubyong/g2p/archive/1.0.0.tar.gz Description: g2p\_en: A Simple Python Module for English Grapheme To Phoneme Conversion ========================================================================== + [Update] * We removed TensorFlow from the dependencies. After all, it changes its APIs quite often, and we don't expect you to have a GPU. Instead, NumPy is used for inference. + This module is designed to convert English graphemes (spelling) to phonemes (pronunciation). It is considered essential in several tasks such as speech synthesis. Unlike many languages like Spanish or German @@ -51,18 +53,16 @@ Description: g2p\_en: A Simple Python Module for English Grapheme To Phoneme Con Environment ----------- - - python 2.x or 3.x + - python 3.x Dependencies ------------ - numpy >= 1.13.1 - - tensorflow >= 1.3.0 - nltk >= 3.2.4 - python -m nltk.downloader "averaged\_perceptron\_tagger" "cmudict" - inflect >= 0.3.1 - Distance >= 0.1.3 - - future >= 0.16.0 Installation ------------ @@ -91,24 +91,21 @@ Description: g2p\_en: A Simple Python Module for English Grapheme To Phoneme Con :: - from g2p_en import g2p + from g2p_en import G2p + + texts = ["I have $250 in my pocket.", # number -> spell-out + "popular pets, e.g. cats and dogs", # e.g. -> for example + "I refuse to collect the refuse around here.", # homograph + "I'm an activationist."] # newly coined word + g2p = G2p() + for text in texts: + out = g2p(text) + print(out) + >>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.'] + >>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z'] + >>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.'] + >>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.'] - text = "I refuse to collect the refuse around here." - print(g2p(text)) - >>>[u'AY1', ' ', u'R', u'IH0', u'F', u'Y', u'UW1', u'Z', ' ', u'T', u'UW1', ' ', u'K', u'AH0', u'L', u'EH1', u'K', u'T', ' ', u'DH', u'AH0', ' ', u'R', u'EH1', u'F', u'Y', u'UW2', u'Z', ' ', u'ER0', u'AW1', u'N', u'D', ' ', u'HH', u'EH1', u'R'] - - text = "I am an activationist." - print(g2p(text)) - >>>[u'AY1', u'M', ' ', u'AE1', u'N', ' ', u'AE2', u'K', u'T', u'AH0', u'V', u'EY1', u'SH', u'AH0', u'N', u'IH0', u'S', u'T'] - - If you need to convert lots of texts, you can use the global tf session. - - :: - - import g2p_en as g2p - - with g2p.Session(): - phs = [g2p.g2p(text) for text in texts] May, 2018. diff --git a/g2p_en.egg-info/SOURCES.txt b/g2p_en.egg-info/SOURCES.txt index a10c2f8..d69385c 100644 --- a/g2p_en.egg-info/SOURCES.txt +++ b/g2p_en.egg-info/SOURCES.txt @@ -1,19 +1,14 @@ MANIFEST.in -README.md README.rst setup.cfg setup.py g2p_en/__init__.py +g2p_en/checkpoint20.npz g2p_en/expand.py g2p_en/g2p.py g2p_en/homographs.en -g2p_en/train.py g2p_en.egg-info/PKG-INFO g2p_en.egg-info/SOURCES.txt g2p_en.egg-info/dependency_links.txt g2p_en.egg-info/requires.txt -g2p_en.egg-info/top_level.txt -g2p_en/logdir/checkpoint -g2p_en/logdir/model_epoch_14_gs_27956.data-00000-of-00001 -g2p_en/logdir/model_epoch_14_gs_27956.index -g2p_en/logdir/model_epoch_14_gs_27956.meta \ No newline at end of file +g2p_en.egg-info/top_level.txt \ No newline at end of file diff --git a/g2p_en.egg-info/requires.txt b/g2p_en.egg-info/requires.txt index 33d6eca..7a7542a 100644 --- a/g2p_en.egg-info/requires.txt +++ b/g2p_en.egg-info/requires.txt @@ -1,6 +1,4 @@ numpy>=1.13.1 -tensorflow>=1.3.0 nltk>=3.2.4 inflect>=0.3.1 distance>=0.1.3 -future>=0.16.0 diff --git a/g2p_en/__init__.py b/g2p_en/__init__.py index f3bd26b..4d9ce97 100644 --- a/g2p_en/__init__.py +++ b/g2p_en/__init__.py @@ -1 +1 @@ -from .g2p import g2p, Session +from .g2p import G2p diff --git a/g2p_en/g2p.py b/g2p_en/g2p.py index 1bc7544..4d95f9d 100644 --- a/g2p_en/g2p.py +++ b/g2p_en/g2p.py @@ -14,7 +14,7 @@ import re import os import unicodedata from builtins import str as unicode -from expand import normalize_numbers +from .expand import normalize_numbers try: nltk.data.find('taggers/averaged_perceptron_tagger.zip') @@ -71,7 +71,7 @@ class G2p(object): self.homograph2features = construct_homograph_dictionary() def load_variables(self): - self.variables = np.load('checkpoint20.npz') + self.variables = np.load(os.path.join(dirname,'checkpoint20.npz')) self.enc_emb = self.variables["enc_emb"] # (29, 64). (len(graphemes), emb) self.enc_w_ih = self.variables["enc_w_ih"] # (3*128, 64) self.enc_w_hh = self.variables["enc_w_hh"] # (3*128, 128) diff --git a/setup.py b/setup.py index c765f59..82d114a 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f: setup( name = 'g2p_en', packages = ['g2p_en'], # this must be the same as the name above - version = '2.0.0', + version = '2.0.1', description = 'A Simple Python Module for English Grapheme To Phoneme Conversion', long_description=long_description, author = 'Kyubyong Park & Jongseok Kim',