mirror of
https://github.com/Mintplex-Labs/g2p-en.git
synced 2026-07-01 18:25:51 -04:00
update information to upload pip
This commit is contained in:
+1
-1
@@ -1,2 +1,2 @@
|
||||
include g2p_en/homographs.en
|
||||
include g2p_en/logdir/*
|
||||
include g2p_en/checkpoint20.npz
|
||||
@@ -53,15 +53,20 @@ nltk package will be automatically downloaded at your first run.
|
||||
|
||||
## Usage
|
||||
|
||||
from g2p_en import g2p
|
||||
|
||||
text = "I refuse to collect the refuse around here."
|
||||
print(g2p(text))
|
||||
>>>['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'EH1', 'R', '.']
|
||||
|
||||
text = "I am an activationist."
|
||||
print(g2p(text))
|
||||
>>>['AY1', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'AH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T']
|
||||
from g2p_en import G2p
|
||||
|
||||
texts = ["I have $250 in my pocket.", # number -> spell-out
|
||||
"popular pets, e.g. cats and dogs", # e.g. -> for example
|
||||
"I refuse to collect the refuse around here.", # homograph
|
||||
"I'm an activationist."] # newly coined word
|
||||
g2p = G2p()
|
||||
for text in texts:
|
||||
out = g2p(text)
|
||||
print(out)
|
||||
>>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.']
|
||||
>>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z']
|
||||
>>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.']
|
||||
>>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.']
|
||||
|
||||
|
||||
May, 2018.
|
||||
|
||||
+14
-9
@@ -82,15 +82,20 @@ Usage
|
||||
|
||||
::
|
||||
|
||||
from g2p_en import g2p
|
||||
|
||||
text = "I refuse to collect the refuse around here."
|
||||
print(g2p(text))
|
||||
>>>['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'EH1', 'R', '.']
|
||||
|
||||
text = "I am an activationist."
|
||||
print(g2p(text))
|
||||
>>>['AY1', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'AH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T']
|
||||
from g2p_en import G2p
|
||||
|
||||
texts = ["I have $250 in my pocket.", # number -> spell-out
|
||||
"popular pets, e.g. cats and dogs", # e.g. -> for example
|
||||
"I refuse to collect the refuse around here.", # homograph
|
||||
"I'm an activationist."] # newly coined word
|
||||
g2p = G2p()
|
||||
for text in texts:
|
||||
out = g2p(text)
|
||||
print(out)
|
||||
>>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.']
|
||||
>>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z']
|
||||
>>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.']
|
||||
>>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.']
|
||||
|
||||
|
||||
May, 2018.
|
||||
|
||||
+18
-21
@@ -1,6 +1,6 @@
|
||||
Metadata-Version: 1.1
|
||||
Name: g2p-en
|
||||
Version: 1.0.0
|
||||
Version: 2.0.1
|
||||
Summary: A Simple Python Module for English Grapheme To Phoneme Conversion
|
||||
Home-page: https://github.com/Kyubyong/g2p
|
||||
Author: Kyubyong Park & Jongseok Kim
|
||||
@@ -10,6 +10,8 @@ Download-URL: https://github.com/Kyubyong/g2p/archive/1.0.0.tar.gz
|
||||
Description: g2p\_en: A Simple Python Module for English Grapheme To Phoneme Conversion
|
||||
==========================================================================
|
||||
|
||||
[Update] * We removed TensorFlow from the dependencies. After all, it changes its APIs quite often, and we don't expect you to have a GPU. Instead, NumPy is used for inference.
|
||||
|
||||
This module is designed to convert English graphemes (spelling) to
|
||||
phonemes (pronunciation). It is considered essential in several tasks
|
||||
such as speech synthesis. Unlike many languages like Spanish or German
|
||||
@@ -51,18 +53,16 @@ Description: g2p\_en: A Simple Python Module for English Grapheme To Phoneme Con
|
||||
Environment
|
||||
-----------
|
||||
|
||||
- python 2.x or 3.x
|
||||
- python 3.x
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
- numpy >= 1.13.1
|
||||
- tensorflow >= 1.3.0
|
||||
- nltk >= 3.2.4
|
||||
- python -m nltk.downloader "averaged\_perceptron\_tagger" "cmudict"
|
||||
- inflect >= 0.3.1
|
||||
- Distance >= 0.1.3
|
||||
- future >= 0.16.0
|
||||
|
||||
Installation
|
||||
------------
|
||||
@@ -91,24 +91,21 @@ Description: g2p\_en: A Simple Python Module for English Grapheme To Phoneme Con
|
||||
|
||||
::
|
||||
|
||||
from g2p_en import g2p
|
||||
from g2p_en import G2p
|
||||
|
||||
texts = ["I have $250 in my pocket.", # number -> spell-out
|
||||
"popular pets, e.g. cats and dogs", # e.g. -> for example
|
||||
"I refuse to collect the refuse around here.", # homograph
|
||||
"I'm an activationist."] # newly coined word
|
||||
g2p = G2p()
|
||||
for text in texts:
|
||||
out = g2p(text)
|
||||
print(out)
|
||||
>>> ['AY1', ' ', 'HH', 'AE1', 'V', ' ', 'T', 'UW1', ' ', 'HH', 'AH1', 'N', 'D', 'R', 'AH0', 'D', ' ', 'F', 'IH1', 'F', 'T', 'IY0', ' ', 'D', 'AA1', 'L', 'ER0', 'Z', ' ', 'IH0', 'N', ' ', 'M', 'AY1', ' ', 'P', 'AA1', 'K', 'AH0', 'T', ' ', '.']
|
||||
>>> ['P', 'AA1', 'P', 'Y', 'AH0', 'L', 'ER0', ' ', 'P', 'EH1', 'T', 'S', ' ', ',', ' ', 'F', 'AO1', 'R', ' ', 'IH0', 'G', 'Z', 'AE1', 'M', 'P', 'AH0', 'L', ' ', 'K', 'AE1', 'T', 'S', ' ', 'AH0', 'N', 'D', ' ', 'D', 'AA1', 'G', 'Z']
|
||||
>>> ['AY1', ' ', 'R', 'IH0', 'F', 'Y', 'UW1', 'Z', ' ', 'T', 'UW1', ' ', 'K', 'AH0', 'L', 'EH1', 'K', 'T', ' ', 'DH', 'AH0', ' ', 'R', 'EH1', 'F', 'Y', 'UW2', 'Z', ' ', 'ER0', 'AW1', 'N', 'D', ' ', 'HH', 'IY1', 'R', ' ', '.']
|
||||
>>> ['AY1', ' ', 'AH0', 'M', ' ', 'AE1', 'N', ' ', 'AE2', 'K', 'T', 'IH0', 'V', 'EY1', 'SH', 'AH0', 'N', 'IH0', 'S', 'T', ' ', '.']
|
||||
|
||||
text = "I refuse to collect the refuse around here."
|
||||
print(g2p(text))
|
||||
>>>[u'AY1', ' ', u'R', u'IH0', u'F', u'Y', u'UW1', u'Z', ' ', u'T', u'UW1', ' ', u'K', u'AH0', u'L', u'EH1', u'K', u'T', ' ', u'DH', u'AH0', ' ', u'R', u'EH1', u'F', u'Y', u'UW2', u'Z', ' ', u'ER0', u'AW1', u'N', u'D', ' ', u'HH', u'EH1', u'R']
|
||||
|
||||
text = "I am an activationist."
|
||||
print(g2p(text))
|
||||
>>>[u'AY1', u'M', ' ', u'AE1', u'N', ' ', u'AE2', u'K', u'T', u'AH0', u'V', u'EY1', u'SH', u'AH0', u'N', u'IH0', u'S', u'T']
|
||||
|
||||
If you need to convert lots of texts, you can use the global tf session.
|
||||
|
||||
::
|
||||
|
||||
import g2p_en as g2p
|
||||
|
||||
with g2p.Session():
|
||||
phs = [g2p.g2p(text) for text in texts]
|
||||
|
||||
May, 2018.
|
||||
|
||||
|
||||
@@ -1,19 +1,14 @@
|
||||
MANIFEST.in
|
||||
README.md
|
||||
README.rst
|
||||
setup.cfg
|
||||
setup.py
|
||||
g2p_en/__init__.py
|
||||
g2p_en/checkpoint20.npz
|
||||
g2p_en/expand.py
|
||||
g2p_en/g2p.py
|
||||
g2p_en/homographs.en
|
||||
g2p_en/train.py
|
||||
g2p_en.egg-info/PKG-INFO
|
||||
g2p_en.egg-info/SOURCES.txt
|
||||
g2p_en.egg-info/dependency_links.txt
|
||||
g2p_en.egg-info/requires.txt
|
||||
g2p_en.egg-info/top_level.txt
|
||||
g2p_en/logdir/checkpoint
|
||||
g2p_en/logdir/model_epoch_14_gs_27956.data-00000-of-00001
|
||||
g2p_en/logdir/model_epoch_14_gs_27956.index
|
||||
g2p_en/logdir/model_epoch_14_gs_27956.meta
|
||||
g2p_en.egg-info/top_level.txt
|
||||
@@ -1,6 +1,4 @@
|
||||
numpy>=1.13.1
|
||||
tensorflow>=1.3.0
|
||||
nltk>=3.2.4
|
||||
inflect>=0.3.1
|
||||
distance>=0.1.3
|
||||
future>=0.16.0
|
||||
|
||||
+1
-1
@@ -1 +1 @@
|
||||
from .g2p import g2p, Session
|
||||
from .g2p import G2p
|
||||
|
||||
+2
-2
@@ -14,7 +14,7 @@ import re
|
||||
import os
|
||||
import unicodedata
|
||||
from builtins import str as unicode
|
||||
from expand import normalize_numbers
|
||||
from .expand import normalize_numbers
|
||||
|
||||
try:
|
||||
nltk.data.find('taggers/averaged_perceptron_tagger.zip')
|
||||
@@ -71,7 +71,7 @@ class G2p(object):
|
||||
self.homograph2features = construct_homograph_dictionary()
|
||||
|
||||
def load_variables(self):
|
||||
self.variables = np.load('checkpoint20.npz')
|
||||
self.variables = np.load(os.path.join(dirname,'checkpoint20.npz'))
|
||||
self.enc_emb = self.variables["enc_emb"] # (29, 64). (len(graphemes), emb)
|
||||
self.enc_w_ih = self.variables["enc_w_ih"] # (3*128, 64)
|
||||
self.enc_w_hh = self.variables["enc_w_hh"] # (3*128, 128)
|
||||
|
||||
@@ -12,7 +12,7 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
|
||||
setup(
|
||||
name = 'g2p_en',
|
||||
packages = ['g2p_en'], # this must be the same as the name above
|
||||
version = '2.0.0',
|
||||
version = '2.0.1',
|
||||
description = 'A Simple Python Module for English Grapheme To Phoneme Conversion',
|
||||
long_description=long_description,
|
||||
author = 'Kyubyong Park & Jongseok Kim',
|
||||
|
||||
Reference in New Issue
Block a user