mirror of
https://github.com/Mintplex-Labs/piper-tts-web.git
synced 2026-07-01 20:04:04 -04:00
v1.0.0 release
This commit is contained in:
@@ -22,3 +22,5 @@ dist-ssr
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
test-results
|
||||
playwright-report
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
[](https://github.com/diffusion-studio/ffmpeg-js/graphs/commit-activity)
|
||||
[](https://diffusion.studio)
|
||||
[](https://discord.gg/n3mpzfejAb)
|
||||
[](https://github.com/diffusion-studio/ffmpeg-js/blob/main/LICENSE)
|
||||
[](https://typescriptlang.org)
|
||||
|
||||
# Use VITS models in the browser powered by the [ONNX Runtime](https://onnxruntime.ai/)
|
||||
|
||||
A big shout-out goes to [Rhasspy Piper](https://github.com/rhasspy/piper), who open-sourced all the currently available models (MIT License) and to [@jozefchutka](https://github.com/jozefchutka) who came up with the wasm build steps.
|
||||
|
||||
## Usage
|
||||
First of all, you need to install the library:
|
||||
```bash
|
||||
npm i --save @diffusionstudio/vits-web
|
||||
```
|
||||
|
||||
Then you're able to import the library like this (ES only)
|
||||
```typescript
|
||||
import * as tts from '@diffusionstudio/vits-web';
|
||||
|
||||
// Hint: onnxruntime-web is a peer dependency
|
||||
```
|
||||
|
||||
Now you can start synthesizing speech!
|
||||
```typescript
|
||||
const wav = await tts.predict({
|
||||
text: "Text to speech in the browser is amazing!",
|
||||
voiceId: 'en_US-hfc_female-medium',
|
||||
});
|
||||
|
||||
// available in Web Worker
|
||||
|
||||
const audio = new Audio();
|
||||
audio.src = URL.createObjectURL(wav);
|
||||
audio.play();
|
||||
```
|
||||
|
||||
With the initial run of the predict function you will download the model which will then be stored in your [Origin private file system](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system). You can also do this manually in advance *(recommended)*, as follows:
|
||||
```typescript
|
||||
await tts.download('en_US-hfc_female-medium', (progress) => {
|
||||
console.log(`Downloading ${progress.url} - ${Math.round(progress.loaded * 100 / progress.total)}%`);
|
||||
});
|
||||
```
|
||||
|
||||
The predict function also accepts a download progress callback as the second argument (`tts.predict(..., console.log)`). <br>
|
||||
|
||||
If you want to know which models have already been stored, do the following
|
||||
```typescript
|
||||
console.log(await tts.stored());
|
||||
|
||||
// will log ['en_US-hfc_female-medium']
|
||||
```
|
||||
|
||||
You can remove models from opfs by calling
|
||||
```typescript
|
||||
await tts.remove('en_US-hfc_female-medium');
|
||||
|
||||
// alternatively delete all
|
||||
|
||||
await tts.flush();
|
||||
```
|
||||
|
||||
And last but not least use this snippet if you would like to retrieve all available voices:
|
||||
```typescript
|
||||
console.log(await tts.voices());
|
||||
|
||||
// Hint: the key can be used as voiceId
|
||||
```
|
||||
|
||||
### **That's it!** Happy coding :)
|
||||
Generated
+101
-18
@@ -8,13 +8,14 @@
|
||||
"name": "@diffusionstudio/vits-web",
|
||||
"version": "1.0.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"onnxruntime-web": "^1.18.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@playwright/test": "^1.35.1",
|
||||
"typescript": "^5.2.2",
|
||||
"vite": "^5.3.1",
|
||||
"vite-plugin-dts": "^3.9.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"onnxruntime-web": "^1.18.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/parser": {
|
||||
@@ -513,35 +514,56 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/@playwright/test": {
|
||||
"version": "1.45.1",
|
||||
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.45.1.tgz",
|
||||
"integrity": "sha512-Wo1bWTzQvGA7LyKGIZc8nFSTFf2TkthGIFBR+QVNilvwouGzFd4PYukZe3rvf5PSqjHi1+1NyKSDZKcQWETzaA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"playwright": "1.45.1"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
@@ -551,31 +573,36 @@
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
|
||||
"license": "BSD-3-Clause"
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/@rollup/pluginutils": {
|
||||
"version": "5.1.0",
|
||||
@@ -909,6 +936,7 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.9.tgz",
|
||||
"integrity": "sha512-06OCtnTXtWOZBJlRApleWndH4JsRVs1pDCc8dLSQp+7PpUpX3ePdHyeNSFTeSe7FtKyQkrlPvHwJOW3SLd8Oyg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
@@ -1200,7 +1228,8 @@
|
||||
"version": "1.12.0",
|
||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
|
||||
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==",
|
||||
"license": "SEE LICENSE IN LICENSE.txt"
|
||||
"license": "SEE LICENSE IN LICENSE.txt",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/fs-extra": {
|
||||
"version": "7.0.1",
|
||||
@@ -1253,7 +1282,8 @@
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
|
||||
"license": "ISC"
|
||||
"license": "ISC",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/has-flag": {
|
||||
"version": "4.0.0",
|
||||
@@ -1370,7 +1400,8 @@
|
||||
"version": "5.2.3",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
|
||||
"integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==",
|
||||
"license": "Apache-2.0"
|
||||
"license": "Apache-2.0",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/lru-cache": {
|
||||
"version": "6.0.0",
|
||||
@@ -1445,13 +1476,15 @@
|
||||
"version": "1.18.0",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz",
|
||||
"integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q==",
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/onnxruntime-web": {
|
||||
"version": "1.18.0",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz",
|
||||
"integrity": "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"flatbuffers": "^1.12.0",
|
||||
"guid-typescript": "^1.0.9",
|
||||
@@ -1499,7 +1532,55 @@
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
||||
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/playwright": {
|
||||
"version": "1.45.1",
|
||||
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.1.tgz",
|
||||
"integrity": "sha512-Hjrgae4kpSQBr98nhCj3IScxVeVUixqj+5oyif8TdIn2opTCPEzqAqNMeK42i3cWDCVu9MI+ZsGWw+gVR4ISBg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"playwright-core": "1.45.1"
|
||||
},
|
||||
"bin": {
|
||||
"playwright": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "2.3.2"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright-core": {
|
||||
"version": "1.45.1",
|
||||
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.45.1.tgz",
|
||||
"integrity": "sha512-LF4CUUtrUu2TCpDw4mcrAIuYrEjVDfT1cHbJMfwnE2+1b8PZcFzPNgvZCvq2JfQ4aTjRCCHw5EJ2tmr2NSzdPg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"playwright-core": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/playwright/node_modules/fsevents": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
|
||||
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/postcss": {
|
||||
"version": "8.4.39",
|
||||
@@ -1536,6 +1617,7 @@
|
||||
"integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
@@ -1731,7 +1813,8 @@
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/universalify": {
|
||||
"version": "0.1.2",
|
||||
|
||||
+5
-3
@@ -44,14 +44,16 @@
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "rm -r -f ./dist && tsc && vite build",
|
||||
"preview": "vite preview"
|
||||
"preview": "vite preview",
|
||||
"test": "npx playwright test --project=chromium"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.2.2",
|
||||
"vite": "^5.3.1",
|
||||
"vite-plugin-dts": "^3.9.1"
|
||||
"vite-plugin-dts": "^3.9.1",
|
||||
"@playwright/test": "^1.35.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"peerDependencies": {
|
||||
"onnxruntime-web": "^1.18.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
import { defineConfig, devices } from '@playwright/test';
|
||||
|
||||
/**
|
||||
* Read environment variables from file.
|
||||
* https://github.com/motdotla/dotenv
|
||||
*/
|
||||
// require('dotenv').config();
|
||||
|
||||
/**
|
||||
* See https://playwright.dev/docs/test-configuration.
|
||||
*/
|
||||
export default defineConfig({
|
||||
webServer: {
|
||||
command: 'npm run dev',
|
||||
url: 'http://localhost:5173/',
|
||||
},
|
||||
timeout: 5 * 60 * 1000,
|
||||
testDir: './src',
|
||||
/* Run tests in files in parallel */
|
||||
fullyParallel: false,
|
||||
/* Fail the build on CI if you accidentally left test.only in the source code. */
|
||||
forbidOnly: !!process.env.CI,
|
||||
/* Retry on CI only */
|
||||
retries: process.env.CI ? 2 : 0,
|
||||
/* Opt out of parallel tests on CI. */
|
||||
workers: 1,
|
||||
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
|
||||
reporter: 'html',
|
||||
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
|
||||
use: {
|
||||
/* Base URL to use in actions like `await page.goto('/')`. */
|
||||
// baseURL: 'http://127.0.0.1:3000',
|
||||
|
||||
/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
|
||||
trace: 'on-first-retry',
|
||||
},
|
||||
|
||||
/* Configure projects for major browsers */
|
||||
projects: [
|
||||
{
|
||||
name: 'chromium',
|
||||
use: { ...devices['Desktop Chrome'] },
|
||||
},
|
||||
|
||||
// {
|
||||
// name: 'firefox',
|
||||
// use: { ...devices['Desktop Firefox'] },
|
||||
// },
|
||||
|
||||
// {
|
||||
// name: 'webkit',
|
||||
// use: { ...devices['Desktop Safari'] },
|
||||
// },
|
||||
],
|
||||
});
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,3 +1,5 @@
|
||||
export * from './inference';
|
||||
export * from './storage';
|
||||
export * from './voices';
|
||||
export * from './types';
|
||||
export * from './fixtures';
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
import { test, expect, Page } from '@playwright/test';
|
||||
import * as tts from '.';
|
||||
|
||||
test.describe.configure({ mode: 'serial' });
|
||||
|
||||
let page: Page;
|
||||
|
||||
test.describe('The inference methods', () => {
|
||||
test.beforeAll(async ({ browser }) => {
|
||||
page = await browser.newPage();
|
||||
await page.goto('http://localhost:5173/');
|
||||
});
|
||||
|
||||
test.afterEach(async () => {
|
||||
await page.evaluate(async () => {
|
||||
// @ts-ignore
|
||||
await (await navigator.storage.getDirectory()).remove({ recursive: true });
|
||||
});
|
||||
})
|
||||
|
||||
test('should be able to generate a voice and download models', async () => {
|
||||
|
||||
let stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
// make sure opfs is empty
|
||||
expect(stored.length).toBe(0);
|
||||
|
||||
// load model from huggingface
|
||||
|
||||
const result = await page.evaluate(async () => {
|
||||
const calls: tts.Progress[] = [];
|
||||
|
||||
const fn: tts.ProgressCallback = (progress) => {
|
||||
calls.push(progress);
|
||||
}
|
||||
|
||||
const audio = await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn);
|
||||
const arrayBuffer = await audio.arrayBuffer();
|
||||
const { size, type } = audio;
|
||||
|
||||
return {
|
||||
calls,
|
||||
size,
|
||||
type,
|
||||
byteLength: arrayBuffer.byteLength,
|
||||
}
|
||||
});
|
||||
|
||||
// check progress
|
||||
expect(result.calls.length).toBeGreaterThan(10);
|
||||
expect(result.calls[10].url).toMatch('en_US-danny-low');
|
||||
expect(typeof result.calls[10].total == 'number').toBe(true);
|
||||
expect(typeof result.calls[10].loaded == 'number').toBe(true);
|
||||
|
||||
expect(result.byteLength).toBeGreaterThan(1e3);
|
||||
expect(result.size).toBeGreaterThan(1e3);
|
||||
expect(result.type).toBe('audio/x-wav');
|
||||
|
||||
stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
// make sure opfs is empty
|
||||
expect(stored.length).toBe(1);
|
||||
|
||||
// load model from memory
|
||||
// use the same model again
|
||||
const calls = await page.evaluate(async () => {
|
||||
const calls: tts.Progress[] = [];
|
||||
|
||||
const fn: tts.ProgressCallback = (progress) => {
|
||||
calls.push(progress);
|
||||
}
|
||||
|
||||
await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn);
|
||||
return calls
|
||||
});
|
||||
|
||||
expect(calls.length).toBe(0);
|
||||
});
|
||||
});
|
||||
+69
-25
@@ -1,37 +1,81 @@
|
||||
import { InferenceConfg, MessageData, ProgressCallback } from "./types";
|
||||
import Worker from './worker.ts?worker'
|
||||
import { InferenceConfg, ProgressCallback } from "./types";
|
||||
import { HF_BASE, ONNX_BASE, PATH_MAP, WASM_BASE } from './fixtures';
|
||||
import { readBlob, writeBlob } from './opfs';
|
||||
import { fetchBlob } from './http.js';
|
||||
import { pcm2wav } from './audio';
|
||||
|
||||
/**
|
||||
* Run text to speech inference in new worker thread. Fetches the model
|
||||
* first, if it has not yet been saved to opfs yet.
|
||||
*/
|
||||
export async function predict(config: InferenceConfg, callback?: ProgressCallback): Promise<Blob> {
|
||||
const worker = new Worker()
|
||||
// @ts-ignore
|
||||
const { createPiperPhonemize } = await import('./piper.js');
|
||||
const ort = await import('onnxruntime-web');
|
||||
|
||||
worker.postMessage({ type: 'init', ...config });
|
||||
const path = PATH_MAP[config.voiceId];
|
||||
const input = JSON.stringify([{ text: config.text.trim() }])
|
||||
|
||||
return await new Promise<Blob>((resolve, reject) => {
|
||||
function eventHandler(event: MessageEvent<MessageData>) {
|
||||
const data = event.data;
|
||||
const piperPhonemizeWasm = (await createBlobUrl(`${WASM_BASE}.wasm`)).url;
|
||||
const piperPhonemizeData = (await createBlobUrl(`${WASM_BASE}.data`)).url;
|
||||
|
||||
if (data.type == 'output') {
|
||||
worker.terminate();
|
||||
resolve(data.file);
|
||||
}
|
||||
if (data.type == 'stderr') {
|
||||
worker.terminate();
|
||||
reject(data.message);
|
||||
}
|
||||
if (data.type == 'fetch') {
|
||||
const { loaded, total, url } = data;
|
||||
callback?.({ loaded, total, url });
|
||||
}
|
||||
worker.onerror = () => {
|
||||
worker.terminate();
|
||||
reject()
|
||||
}
|
||||
}
|
||||
ort.env.wasm.numThreads = navigator.hardwareConcurrency;
|
||||
ort.env.wasm.wasmPaths = ONNX_BASE;
|
||||
|
||||
worker.addEventListener('message', eventHandler)
|
||||
const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob;
|
||||
const modelConfig = JSON.parse(await modelConfigBlob.text());
|
||||
|
||||
const phonemeIds: string[] = await new Promise(async resolve => {
|
||||
const module = await createPiperPhonemize({
|
||||
print: (data: any) => {
|
||||
resolve(JSON.parse(data).phoneme_ids);
|
||||
},
|
||||
printErr: (message: any) => {
|
||||
throw new Error(message);
|
||||
},
|
||||
locateFile: (url: string) => {
|
||||
if (url.endsWith(".wasm")) return piperPhonemizeWasm;
|
||||
if (url.endsWith(".data")) return piperPhonemizeData;
|
||||
return url;
|
||||
}
|
||||
});
|
||||
|
||||
module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]);
|
||||
});
|
||||
|
||||
const speakerId = 0;
|
||||
const sampleRate = modelConfig.audio.sample_rate;
|
||||
const noiseScale = modelConfig.inference.noise_scale;
|
||||
const lengthScale = modelConfig.inference.length_scale;
|
||||
const noiseW = modelConfig.inference.noise_w;
|
||||
|
||||
const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`, callback)).url;
|
||||
const session = await ort.InferenceSession.create(modelBlob);
|
||||
const feeds = {
|
||||
input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]),
|
||||
input_lengths: new ort.Tensor("int64", [phonemeIds.length]),
|
||||
scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW])
|
||||
}
|
||||
if (Object.keys(modelConfig.speaker_id_map).length) {
|
||||
Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) })
|
||||
}
|
||||
|
||||
const { output: { data: pcm } } = await session.run(feeds);
|
||||
|
||||
return new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" });
|
||||
}
|
||||
|
||||
async function createBlobUrl(url: string, callback?: ProgressCallback) {
|
||||
let blob: Blob | undefined = await readBlob(url);
|
||||
|
||||
if (!blob) {
|
||||
blob = await fetchBlob(url, callback);
|
||||
await writeBlob(url, blob);
|
||||
}
|
||||
|
||||
return {
|
||||
url: URL.createObjectURL(blob),
|
||||
blob
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import * as tts from './index';
|
||||
|
||||
Object.assign(window, { tts });
|
||||
|
||||
document.querySelector('#app')!.innerHTML = `
|
||||
<button id="btn" type="button">Predict</button>
|
||||
`
|
||||
|
||||
+7
-3
@@ -12,7 +12,9 @@ export async function writeBlob(url: string, blob: Blob): Promise<void> {
|
||||
const writable = await file.createWritable();
|
||||
await writable.write(blob);
|
||||
await writable.close();
|
||||
} catch (_) { }
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
export async function removeBlob(url: string) {
|
||||
@@ -21,8 +23,10 @@ export async function removeBlob(url: string) {
|
||||
const dir = await root.getDirectoryHandle('piper');
|
||||
const path = url.split('/').at(-1)!;
|
||||
const file = await dir.getFileHandle(path); // @ts-ignore
|
||||
file.remove();
|
||||
} catch (_) { }
|
||||
await file.remove();
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
export async function readBlob(url: string): Promise<Blob | undefined> {
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
import { test, expect, Page } from '@playwright/test';
|
||||
import * as tts from '.';
|
||||
import { PATH_MAP } from './fixtures';
|
||||
|
||||
test.describe.configure({ mode: 'serial' });
|
||||
|
||||
let page: Page;
|
||||
|
||||
test.describe('The storage methods', () => {
|
||||
test.beforeAll(async ({ browser }) => {
|
||||
page = await browser.newPage();
|
||||
await page.goto('http://localhost:5173/');
|
||||
});
|
||||
|
||||
test.afterEach(async () => {
|
||||
await page.evaluate(async () => {
|
||||
// @ts-ignore
|
||||
await (await navigator.storage.getDirectory()).remove({ recursive: true });
|
||||
});
|
||||
})
|
||||
|
||||
test('should be able to download new voices', async () => {
|
||||
let stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
// make sure opfs is empty
|
||||
expect(stored.length).toBe(0);
|
||||
|
||||
let calls = await page.evaluate(async () => {
|
||||
const calls: tts.Progress[] = [];
|
||||
|
||||
const fn: tts.ProgressCallback = (progress) => {
|
||||
calls.push(progress);
|
||||
}
|
||||
|
||||
await tts.download('en_US-amy-low', fn);
|
||||
|
||||
return calls;
|
||||
});
|
||||
|
||||
// check progress
|
||||
expect(calls.length).toBeGreaterThan(10);
|
||||
expect(calls[10].url).toMatch('en_US-amy-low');
|
||||
expect(typeof calls[10].total == 'number').toBe(true);
|
||||
expect(typeof calls[10].loaded == 'number').toBe(true);
|
||||
|
||||
// check stored file
|
||||
stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
expect(stored.length).toBe(1);
|
||||
expect(stored[0]).toBe('en_US-amy-low');
|
||||
});
|
||||
|
||||
test('should be able to delete selected voices', async () => {
|
||||
let stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
expect(stored.length).toBe(0);
|
||||
|
||||
await page.evaluate(async (pathmap) => {
|
||||
const root = await navigator.storage.getDirectory();
|
||||
const dir = await root.getDirectoryHandle('piper', { create: true });
|
||||
|
||||
const voice0 = pathmap['de_DE-eva_k-x_low'].split('/').at(-1)!;
|
||||
const voice1 = pathmap['ca_ES-upc_ona-medium'].split('/').at(-1)!;
|
||||
|
||||
await dir.getFileHandle(voice0, { create: true });
|
||||
await dir.getFileHandle(voice0 + '.json', { create: true });
|
||||
|
||||
await dir.getFileHandle(voice1, { create: true });
|
||||
await dir.getFileHandle(voice1 + '.json', { create: true });
|
||||
}, PATH_MAP);
|
||||
|
||||
|
||||
stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
expect(stored.length).toBe(2);
|
||||
|
||||
await page.evaluate(async () => {
|
||||
await tts.remove('de_DE-eva_k-x_low');
|
||||
});
|
||||
|
||||
stored = await page.evaluate(async () => {
|
||||
return await tts.stored();
|
||||
});
|
||||
expect(stored.length).toBe(1);
|
||||
expect(stored[0]).toBe('ca_ES-upc_ona-medium');
|
||||
});
|
||||
});
|
||||
+8
-8
@@ -12,7 +12,7 @@ export async function download(voiceId: VoiceId, callback?: ProgressCallback): P
|
||||
const urls = [`${HF_BASE}/${path}`, `${HF_BASE}/${path}.json`]
|
||||
|
||||
await Promise.all(urls.map(async (url) => {
|
||||
writeBlob(url, await fetchBlob(url, callback));
|
||||
writeBlob(url, await fetchBlob(url, url.endsWith('.onnx') ? callback : undefined));
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -51,11 +51,11 @@ export async function stored(): Promise<VoiceId[]> {
|
||||
* Delete the models directory
|
||||
*/
|
||||
export async function flush() {
|
||||
const root = await navigator.storage.getDirectory();
|
||||
const dir = await root.getDirectoryHandle('piper', {
|
||||
create: true,
|
||||
});
|
||||
|
||||
// @ts-ignore
|
||||
await dir.remove({ recursive: true });
|
||||
try {
|
||||
const root = await navigator.storage.getDirectory();
|
||||
const dir = await root.getDirectoryHandle('piper'); // @ts-ignore
|
||||
await dir.remove({ recursive: true });
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
+2
-15
@@ -26,18 +26,7 @@ export type Voice = {
|
||||
aliases: string[]
|
||||
}
|
||||
|
||||
export type ErrorMessage = {
|
||||
type: "stderr";
|
||||
message: string;
|
||||
}
|
||||
|
||||
export type OutputMessage = {
|
||||
type: "output";
|
||||
file: Blob;
|
||||
}
|
||||
|
||||
export type FetchMessage = {
|
||||
type: "fetch";
|
||||
export type Progress = {
|
||||
url: string;
|
||||
total: number;
|
||||
loaded: number;
|
||||
@@ -48,6 +37,4 @@ export type InferenceConfg = {
|
||||
voiceId: VoiceId
|
||||
};
|
||||
|
||||
export type MessageData = ErrorMessage | OutputMessage | FetchMessage;
|
||||
|
||||
export type ProgressCallback = (progress: Omit<FetchMessage, 'type'>) => void;
|
||||
export type ProgressCallback = (progress: Progress) => void;
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
import { test, expect, Page } from '@playwright/test';
|
||||
import * as tts from '.';
|
||||
|
||||
test.describe.configure({ mode: 'serial' });
|
||||
|
||||
let page: Page;
|
||||
|
||||
test.describe('The voices method', () => {
|
||||
test.beforeAll(async ({ browser }) => {
|
||||
page = await browser.newPage();
|
||||
await page.goto('http://localhost:5173/');
|
||||
});
|
||||
|
||||
test.afterEach(async () => {
|
||||
await page.evaluate(async () => {
|
||||
// @ts-ignore
|
||||
await (await navigator.storage.getDirectory()).remove({ recursive: true });
|
||||
});
|
||||
});
|
||||
|
||||
test('should be able to fetch more than one hundred voices', async () => {
|
||||
const voices = await page.evaluate(async () => {
|
||||
return await tts.voices()
|
||||
});
|
||||
expect(voices.length).toBeGreaterThan(100);
|
||||
|
||||
for (const voice of voices) {
|
||||
expect(typeof voice.key == 'string').toBe(true)
|
||||
expect(voice.key.length).toBeGreaterThan(0);
|
||||
|
||||
expect(typeof voice.name == 'string').toBe(true)
|
||||
expect(voice.name.length).toBeGreaterThan(0);
|
||||
|
||||
expect(typeof voice.language.code == 'string').toBe(true)
|
||||
expect(voice.language.code.length).toBeGreaterThan(0);
|
||||
|
||||
expect(typeof voice.quality == 'string').toBe(true)
|
||||
expect(voice.quality.length).toBeGreaterThan(0);
|
||||
|
||||
expect(typeof voice.num_speakers == 'number').toBe(true);
|
||||
|
||||
expect(Object.keys(voice.files).length).toBe(3);
|
||||
}
|
||||
})
|
||||
});
|
||||
@@ -1,95 +0,0 @@
|
||||
import * as ort from 'onnxruntime-web';
|
||||
// @ts-ignore
|
||||
import { createPiperPhonemize } from './piper.js';
|
||||
import { ErrorMessage, FetchMessage, InferenceConfg, OutputMessage } from './types';
|
||||
import { HF_BASE, ONNX_BASE, PATH_MAP } from './fixtures';
|
||||
import { readBlob, writeBlob } from './opfs';
|
||||
import { fetchBlob } from './http.js';
|
||||
import { pcm2wav } from './audio';
|
||||
|
||||
type MessageData = InferenceConfg & { type?: 'init' }
|
||||
|
||||
const WASM_URL = new URL('/piper.wasm', import.meta.url).href;
|
||||
const DATA_URL = new URL('/piper.data', import.meta.url).href;
|
||||
|
||||
async function handleMessage(event: MessageEvent<MessageData>) {
|
||||
const data = event.data;
|
||||
|
||||
if (data?.type != 'init') return;
|
||||
|
||||
const path = PATH_MAP[data.voiceId];
|
||||
const input = JSON.stringify([{ text: data.text.trim() }])
|
||||
|
||||
const piperPhonemizeWasm = (await createBlobUrl(WASM_URL)).url;
|
||||
const piperPhonemizeData = (await createBlobUrl(DATA_URL)).url;
|
||||
|
||||
ort.env.wasm.numThreads = navigator.hardwareConcurrency;
|
||||
ort.env.wasm.wasmPaths = ONNX_BASE;
|
||||
|
||||
const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob;
|
||||
const modelConfig = JSON.parse(await modelConfigBlob.text());
|
||||
|
||||
const phonemeIds: string[] = await new Promise(async resolve => {
|
||||
const module = await createPiperPhonemize({
|
||||
print: (data: any) => {
|
||||
resolve(JSON.parse(data).phoneme_ids);
|
||||
},
|
||||
printErr: (message: any) => {
|
||||
self.postMessage({ type: "stderr", message } satisfies ErrorMessage);
|
||||
},
|
||||
locateFile: (url: string) => {
|
||||
if (url.endsWith(".wasm")) return piperPhonemizeWasm;
|
||||
if (url.endsWith(".data")) return piperPhonemizeData;
|
||||
return url;
|
||||
}
|
||||
});
|
||||
|
||||
module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]);
|
||||
});
|
||||
|
||||
const speakerId = 0;
|
||||
const sampleRate = modelConfig.audio.sample_rate;
|
||||
const noiseScale = modelConfig.inference.noise_scale;
|
||||
const lengthScale = modelConfig.inference.length_scale;
|
||||
const noiseW = modelConfig.inference.noise_w;
|
||||
|
||||
const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`)).url;
|
||||
const session = await ort.InferenceSession.create(modelBlob);
|
||||
const feeds = {
|
||||
input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]),
|
||||
input_lengths: new ort.Tensor("int64", [phonemeIds.length]),
|
||||
scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW])
|
||||
}
|
||||
if (Object.keys(modelConfig.speaker_id_map).length) {
|
||||
Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) })
|
||||
}
|
||||
|
||||
const { output: { data: pcm } } = await session.run(feeds);
|
||||
|
||||
const file = new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" });
|
||||
|
||||
self.postMessage({ type: "output", file } satisfies OutputMessage);
|
||||
}
|
||||
|
||||
async function createBlobUrl(url: string) {
|
||||
let blob: Blob | undefined = await readBlob(url);
|
||||
|
||||
if (!blob) {
|
||||
blob = await fetchBlob(url, (data) => {
|
||||
if (url.match('https://huggingface.co')) {
|
||||
self.postMessage({
|
||||
...data,
|
||||
type: "fetch"
|
||||
} satisfies FetchMessage)
|
||||
}
|
||||
});
|
||||
await writeBlob(url, blob);
|
||||
}
|
||||
|
||||
return {
|
||||
url: URL.createObjectURL(blob),
|
||||
blob
|
||||
};
|
||||
}
|
||||
|
||||
self.addEventListener("message", handleMessage);
|
||||
@@ -1,29 +0,0 @@
|
||||
import path from 'path';
|
||||
import { defineConfig } from 'vite';
|
||||
import dts from 'vite-plugin-dts';
|
||||
|
||||
export default defineConfig(({ command }) => {
|
||||
let publicDir = true;
|
||||
if (command === 'build') {
|
||||
publicDir = false;
|
||||
}
|
||||
|
||||
return {
|
||||
publicDir,
|
||||
build: {
|
||||
lib: {
|
||||
entry: path.resolve(__dirname, 'src/index.ts'),
|
||||
name: 'vits-web',
|
||||
formats: ['es'],
|
||||
fileName: 'vits-web'
|
||||
},
|
||||
},
|
||||
plugins: [dts()],
|
||||
server: {
|
||||
headers: {
|
||||
'Cross-Origin-Embedder-Policy': 'require-corp',
|
||||
'Cross-Origin-Opener-Policy': 'same-origin',
|
||||
},
|
||||
},
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,26 @@
|
||||
import path from 'path';
|
||||
import { defineConfig } from 'vite';
|
||||
import dts from 'vite-plugin-dts';
|
||||
|
||||
export default defineConfig({
|
||||
build: {
|
||||
lib: {
|
||||
entry: path.resolve(__dirname, 'src/index.ts'),
|
||||
name: 'vits-web',
|
||||
formats: ['es']
|
||||
},
|
||||
rollupOptions: {
|
||||
external: [
|
||||
'**/*.spec.ts',
|
||||
'onnxruntime-web'
|
||||
],
|
||||
},
|
||||
},
|
||||
plugins: [dts({ exclude: "**/*.spec.ts" })],
|
||||
server: {
|
||||
headers: {
|
||||
'Cross-Origin-Embedder-Policy': 'require-corp',
|
||||
'Cross-Origin-Opener-Policy': 'same-origin',
|
||||
},
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user