v1.0.0 release

This commit is contained in:
konstantin-paulus
2024-07-06 15:54:25 +02:00
parent 77fa8c1257
commit 5b558c114b
19 changed files with 566 additions and 196 deletions
+2
View File
@@ -22,3 +22,5 @@ dist-ssr
*.njsproj
*.sln
*.sw?
test-results
playwright-report
+70
View File
@@ -0,0 +1,70 @@
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/diffusion-studio/ffmpeg-js/graphs/commit-activity)
[![Website shields.io](https://img.shields.io/website-up-down-green-red/http/shields.io.svg)](https://diffusion.studio)
[![Discord](https://badgen.net/badge/icon/discord?icon=discord&label)](https://discord.gg/n3mpzfejAb)
[![GitHub license](https://badgen.net/github/license/Naereen/Strapdown.js)](https://github.com/diffusion-studio/ffmpeg-js/blob/main/LICENSE)
[![TypeScript](https://badgen.net/badge/icon/typescript?icon=typescript&label)](https://typescriptlang.org)
# Use VITS models in the browser powered by the [ONNX Runtime](https://onnxruntime.ai/)
A big shout-out goes to [Rhasspy Piper](https://github.com/rhasspy/piper), who open-sourced all the currently available models (MIT License) and to [@jozefchutka](https://github.com/jozefchutka) who came up with the wasm build steps.
## Usage
First of all, you need to install the library:
```bash
npm i --save @diffusionstudio/vits-web
```
Then you're able to import the library like this (ES only)
```typescript
import * as tts from '@diffusionstudio/vits-web';
// Hint: onnxruntime-web is a peer dependency
```
Now you can start synthesizing speech!
```typescript
const wav = await tts.predict({
text: "Text to speech in the browser is amazing!",
voiceId: 'en_US-hfc_female-medium',
});
// available in Web Worker
const audio = new Audio();
audio.src = URL.createObjectURL(wav);
audio.play();
```
With the initial run of the predict function you will download the model which will then be stored in your [Origin private file system](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system). You can also do this manually in advance *(recommended)*, as follows:
```typescript
await tts.download('en_US-hfc_female-medium', (progress) => {
console.log(`Downloading ${progress.url} - ${Math.round(progress.loaded * 100 / progress.total)}%`);
});
```
The predict function also accepts a download progress callback as the second argument (`tts.predict(..., console.log)`). <br>
If you want to know which models have already been stored, do the following
```typescript
console.log(await tts.stored());
// will log ['en_US-hfc_female-medium']
```
You can remove models from opfs by calling
```typescript
await tts.remove('en_US-hfc_female-medium');
// alternatively delete all
await tts.flush();
```
And last but not least use this snippet if you would like to retrieve all available voices:
```typescript
console.log(await tts.voices());
// Hint: the key can be used as voiceId
```
### **That's it!** Happy coding :)
+101 -18
View File
@@ -8,13 +8,14 @@
"name": "@diffusionstudio/vits-web",
"version": "1.0.0",
"license": "MIT",
"dependencies": {
"onnxruntime-web": "^1.18.0"
},
"devDependencies": {
"@playwright/test": "^1.35.1",
"typescript": "^5.2.2",
"vite": "^5.3.1",
"vite-plugin-dts": "^3.9.1"
},
"peerDependencies": {
"onnxruntime-web": "^1.18.0"
}
},
"node_modules/@babel/parser": {
@@ -513,35 +514,56 @@
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/@playwright/test": {
"version": "1.45.1",
"resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.45.1.tgz",
"integrity": "sha512-Wo1bWTzQvGA7LyKGIZc8nFSTFf2TkthGIFBR+QVNilvwouGzFd4PYukZe3rvf5PSqjHi1+1NyKSDZKcQWETzaA==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"playwright": "1.45.1"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/base64": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/codegen": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/eventemitter": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/fetch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
"license": "BSD-3-Clause",
"peer": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.1",
"@protobufjs/inquire": "^1.1.0"
@@ -551,31 +573,36 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/inquire": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/path": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/pool": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@protobufjs/utf8": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
"license": "BSD-3-Clause"
"license": "BSD-3-Clause",
"peer": true
},
"node_modules/@rollup/pluginutils": {
"version": "5.1.0",
@@ -909,6 +936,7 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.9.tgz",
"integrity": "sha512-06OCtnTXtWOZBJlRApleWndH4JsRVs1pDCc8dLSQp+7PpUpX3ePdHyeNSFTeSe7FtKyQkrlPvHwJOW3SLd8Oyg==",
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~5.26.4"
}
@@ -1200,7 +1228,8 @@
"version": "1.12.0",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==",
"license": "SEE LICENSE IN LICENSE.txt"
"license": "SEE LICENSE IN LICENSE.txt",
"peer": true
},
"node_modules/fs-extra": {
"version": "7.0.1",
@@ -1253,7 +1282,8 @@
"version": "1.0.9",
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
"license": "ISC"
"license": "ISC",
"peer": true
},
"node_modules/has-flag": {
"version": "4.0.0",
@@ -1370,7 +1400,8 @@
"version": "5.2.3",
"resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
"integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==",
"license": "Apache-2.0"
"license": "Apache-2.0",
"peer": true
},
"node_modules/lru-cache": {
"version": "6.0.0",
@@ -1445,13 +1476,15 @@
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz",
"integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q==",
"license": "MIT"
"license": "MIT",
"peer": true
},
"node_modules/onnxruntime-web": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz",
"integrity": "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ==",
"license": "MIT",
"peer": true,
"dependencies": {
"flatbuffers": "^1.12.0",
"guid-typescript": "^1.0.9",
@@ -1499,7 +1532,55 @@
"version": "1.3.6",
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
"license": "MIT"
"license": "MIT",
"peer": true
},
"node_modules/playwright": {
"version": "1.45.1",
"resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.1.tgz",
"integrity": "sha512-Hjrgae4kpSQBr98nhCj3IScxVeVUixqj+5oyif8TdIn2opTCPEzqAqNMeK42i3cWDCVu9MI+ZsGWw+gVR4ISBg==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"playwright-core": "1.45.1"
},
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"fsevents": "2.3.2"
}
},
"node_modules/playwright-core": {
"version": "1.45.1",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.45.1.tgz",
"integrity": "sha512-LF4CUUtrUu2TCpDw4mcrAIuYrEjVDfT1cHbJMfwnE2+1b8PZcFzPNgvZCvq2JfQ4aTjRCCHw5EJ2tmr2NSzdPg==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"playwright-core": "cli.js"
},
"engines": {
"node": ">=18"
}
},
"node_modules/playwright/node_modules/fsevents": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
"integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/postcss": {
"version": "8.4.39",
@@ -1536,6 +1617,7 @@
"integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
"peer": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
@@ -1731,7 +1813,8 @@
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"license": "MIT"
"license": "MIT",
"peer": true
},
"node_modules/universalify": {
"version": "0.1.2",
+5 -3
View File
@@ -44,14 +44,16 @@
"scripts": {
"dev": "vite",
"build": "rm -r -f ./dist && tsc && vite build",
"preview": "vite preview"
"preview": "vite preview",
"test": "npx playwright test --project=chromium"
},
"devDependencies": {
"typescript": "^5.2.2",
"vite": "^5.3.1",
"vite-plugin-dts": "^3.9.1"
"vite-plugin-dts": "^3.9.1",
"@playwright/test": "^1.35.1"
},
"dependencies": {
"peerDependencies": {
"onnxruntime-web": "^1.18.0"
}
}
+55
View File
@@ -0,0 +1,55 @@
import { defineConfig, devices } from '@playwright/test';
/**
* Read environment variables from file.
* https://github.com/motdotla/dotenv
*/
// require('dotenv').config();
/**
* See https://playwright.dev/docs/test-configuration.
*/
export default defineConfig({
webServer: {
command: 'npm run dev',
url: 'http://localhost:5173/',
},
timeout: 5 * 60 * 1000,
testDir: './src',
/* Run tests in files in parallel */
fullyParallel: false,
/* Fail the build on CI if you accidentally left test.only in the source code. */
forbidOnly: !!process.env.CI,
/* Retry on CI only */
retries: process.env.CI ? 2 : 0,
/* Opt out of parallel tests on CI. */
workers: 1,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: 'html',
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
use: {
/* Base URL to use in actions like `await page.goto('/')`. */
// baseURL: 'http://127.0.0.1:3000',
/* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
trace: 'on-first-retry',
},
/* Configure projects for major browsers */
projects: [
{
name: 'chromium',
use: { ...devices['Desktop Chrome'] },
},
// {
// name: 'firefox',
// use: { ...devices['Desktop Firefox'] },
// },
// {
// name: 'webkit',
// use: { ...devices['Desktop Safari'] },
// },
],
});
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
+2
View File
@@ -1,3 +1,5 @@
export * from './inference';
export * from './storage';
export * from './voices';
export * from './types';
export * from './fixtures';
+81
View File
@@ -0,0 +1,81 @@
import { test, expect, Page } from '@playwright/test';
import * as tts from '.';
test.describe.configure({ mode: 'serial' });
let page: Page;
test.describe('The inference methods', () => {
test.beforeAll(async ({ browser }) => {
page = await browser.newPage();
await page.goto('http://localhost:5173/');
});
test.afterEach(async () => {
await page.evaluate(async () => {
// @ts-ignore
await (await navigator.storage.getDirectory()).remove({ recursive: true });
});
})
test('should be able to generate a voice and download models', async () => {
let stored = await page.evaluate(async () => {
return await tts.stored();
});
// make sure opfs is empty
expect(stored.length).toBe(0);
// load model from huggingface
const result = await page.evaluate(async () => {
const calls: tts.Progress[] = [];
const fn: tts.ProgressCallback = (progress) => {
calls.push(progress);
}
const audio = await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn);
const arrayBuffer = await audio.arrayBuffer();
const { size, type } = audio;
return {
calls,
size,
type,
byteLength: arrayBuffer.byteLength,
}
});
// check progress
expect(result.calls.length).toBeGreaterThan(10);
expect(result.calls[10].url).toMatch('en_US-danny-low');
expect(typeof result.calls[10].total == 'number').toBe(true);
expect(typeof result.calls[10].loaded == 'number').toBe(true);
expect(result.byteLength).toBeGreaterThan(1e3);
expect(result.size).toBeGreaterThan(1e3);
expect(result.type).toBe('audio/x-wav');
stored = await page.evaluate(async () => {
return await tts.stored();
});
// make sure opfs is empty
expect(stored.length).toBe(1);
// load model from memory
// use the same model again
const calls = await page.evaluate(async () => {
const calls: tts.Progress[] = [];
const fn: tts.ProgressCallback = (progress) => {
calls.push(progress);
}
await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn);
return calls
});
expect(calls.length).toBe(0);
});
});
+69 -25
View File
@@ -1,37 +1,81 @@
import { InferenceConfg, MessageData, ProgressCallback } from "./types";
import Worker from './worker.ts?worker'
import { InferenceConfg, ProgressCallback } from "./types";
import { HF_BASE, ONNX_BASE, PATH_MAP, WASM_BASE } from './fixtures';
import { readBlob, writeBlob } from './opfs';
import { fetchBlob } from './http.js';
import { pcm2wav } from './audio';
/**
* Run text to speech inference in new worker thread. Fetches the model
* first, if it has not yet been saved to opfs yet.
*/
export async function predict(config: InferenceConfg, callback?: ProgressCallback): Promise<Blob> {
const worker = new Worker()
// @ts-ignore
const { createPiperPhonemize } = await import('./piper.js');
const ort = await import('onnxruntime-web');
worker.postMessage({ type: 'init', ...config });
const path = PATH_MAP[config.voiceId];
const input = JSON.stringify([{ text: config.text.trim() }])
return await new Promise<Blob>((resolve, reject) => {
function eventHandler(event: MessageEvent<MessageData>) {
const data = event.data;
const piperPhonemizeWasm = (await createBlobUrl(`${WASM_BASE}.wasm`)).url;
const piperPhonemizeData = (await createBlobUrl(`${WASM_BASE}.data`)).url;
if (data.type == 'output') {
worker.terminate();
resolve(data.file);
}
if (data.type == 'stderr') {
worker.terminate();
reject(data.message);
}
if (data.type == 'fetch') {
const { loaded, total, url } = data;
callback?.({ loaded, total, url });
}
worker.onerror = () => {
worker.terminate();
reject()
}
}
ort.env.wasm.numThreads = navigator.hardwareConcurrency;
ort.env.wasm.wasmPaths = ONNX_BASE;
worker.addEventListener('message', eventHandler)
const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob;
const modelConfig = JSON.parse(await modelConfigBlob.text());
const phonemeIds: string[] = await new Promise(async resolve => {
const module = await createPiperPhonemize({
print: (data: any) => {
resolve(JSON.parse(data).phoneme_ids);
},
printErr: (message: any) => {
throw new Error(message);
},
locateFile: (url: string) => {
if (url.endsWith(".wasm")) return piperPhonemizeWasm;
if (url.endsWith(".data")) return piperPhonemizeData;
return url;
}
});
module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]);
});
const speakerId = 0;
const sampleRate = modelConfig.audio.sample_rate;
const noiseScale = modelConfig.inference.noise_scale;
const lengthScale = modelConfig.inference.length_scale;
const noiseW = modelConfig.inference.noise_w;
const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`, callback)).url;
const session = await ort.InferenceSession.create(modelBlob);
const feeds = {
input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]),
input_lengths: new ort.Tensor("int64", [phonemeIds.length]),
scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW])
}
if (Object.keys(modelConfig.speaker_id_map).length) {
Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) })
}
const { output: { data: pcm } } = await session.run(feeds);
return new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" });
}
async function createBlobUrl(url: string, callback?: ProgressCallback) {
let blob: Blob | undefined = await readBlob(url);
if (!blob) {
blob = await fetchBlob(url, callback);
await writeBlob(url, blob);
}
return {
url: URL.createObjectURL(blob),
blob
};
}
+2
View File
@@ -1,5 +1,7 @@
import * as tts from './index';
Object.assign(window, { tts });
document.querySelector('#app')!.innerHTML = `
<button id="btn" type="button">Predict</button>
`
+7 -3
View File
@@ -12,7 +12,9 @@ export async function writeBlob(url: string, blob: Blob): Promise<void> {
const writable = await file.createWritable();
await writable.write(blob);
await writable.close();
} catch (_) { }
} catch (e) {
console.error(e)
}
}
export async function removeBlob(url: string) {
@@ -21,8 +23,10 @@ export async function removeBlob(url: string) {
const dir = await root.getDirectoryHandle('piper');
const path = url.split('/').at(-1)!;
const file = await dir.getFileHandle(path); // @ts-ignore
file.remove();
} catch (_) { }
await file.remove();
} catch (e) {
console.error(e)
}
}
export async function readBlob(url: string): Promise<Blob | undefined> {
+91
View File
@@ -0,0 +1,91 @@
import { test, expect, Page } from '@playwright/test';
import * as tts from '.';
import { PATH_MAP } from './fixtures';
test.describe.configure({ mode: 'serial' });
let page: Page;
test.describe('The storage methods', () => {
test.beforeAll(async ({ browser }) => {
page = await browser.newPage();
await page.goto('http://localhost:5173/');
});
test.afterEach(async () => {
await page.evaluate(async () => {
// @ts-ignore
await (await navigator.storage.getDirectory()).remove({ recursive: true });
});
})
test('should be able to download new voices', async () => {
let stored = await page.evaluate(async () => {
return await tts.stored();
});
// make sure opfs is empty
expect(stored.length).toBe(0);
let calls = await page.evaluate(async () => {
const calls: tts.Progress[] = [];
const fn: tts.ProgressCallback = (progress) => {
calls.push(progress);
}
await tts.download('en_US-amy-low', fn);
return calls;
});
// check progress
expect(calls.length).toBeGreaterThan(10);
expect(calls[10].url).toMatch('en_US-amy-low');
expect(typeof calls[10].total == 'number').toBe(true);
expect(typeof calls[10].loaded == 'number').toBe(true);
// check stored file
stored = await page.evaluate(async () => {
return await tts.stored();
});
expect(stored.length).toBe(1);
expect(stored[0]).toBe('en_US-amy-low');
});
test('should be able to delete selected voices', async () => {
let stored = await page.evaluate(async () => {
return await tts.stored();
});
expect(stored.length).toBe(0);
await page.evaluate(async (pathmap) => {
const root = await navigator.storage.getDirectory();
const dir = await root.getDirectoryHandle('piper', { create: true });
const voice0 = pathmap['de_DE-eva_k-x_low'].split('/').at(-1)!;
const voice1 = pathmap['ca_ES-upc_ona-medium'].split('/').at(-1)!;
await dir.getFileHandle(voice0, { create: true });
await dir.getFileHandle(voice0 + '.json', { create: true });
await dir.getFileHandle(voice1, { create: true });
await dir.getFileHandle(voice1 + '.json', { create: true });
}, PATH_MAP);
stored = await page.evaluate(async () => {
return await tts.stored();
});
expect(stored.length).toBe(2);
await page.evaluate(async () => {
await tts.remove('de_DE-eva_k-x_low');
});
stored = await page.evaluate(async () => {
return await tts.stored();
});
expect(stored.length).toBe(1);
expect(stored[0]).toBe('ca_ES-upc_ona-medium');
});
});
+8 -8
View File
@@ -12,7 +12,7 @@ export async function download(voiceId: VoiceId, callback?: ProgressCallback): P
const urls = [`${HF_BASE}/${path}`, `${HF_BASE}/${path}.json`]
await Promise.all(urls.map(async (url) => {
writeBlob(url, await fetchBlob(url, callback));
writeBlob(url, await fetchBlob(url, url.endsWith('.onnx') ? callback : undefined));
}));
}
@@ -51,11 +51,11 @@ export async function stored(): Promise<VoiceId[]> {
* Delete the models directory
*/
export async function flush() {
const root = await navigator.storage.getDirectory();
const dir = await root.getDirectoryHandle('piper', {
create: true,
});
// @ts-ignore
await dir.remove({ recursive: true });
try {
const root = await navigator.storage.getDirectory();
const dir = await root.getDirectoryHandle('piper'); // @ts-ignore
await dir.remove({ recursive: true });
} catch (e) {
console.error(e)
}
}
+2 -15
View File
@@ -26,18 +26,7 @@ export type Voice = {
aliases: string[]
}
export type ErrorMessage = {
type: "stderr";
message: string;
}
export type OutputMessage = {
type: "output";
file: Blob;
}
export type FetchMessage = {
type: "fetch";
export type Progress = {
url: string;
total: number;
loaded: number;
@@ -48,6 +37,4 @@ export type InferenceConfg = {
voiceId: VoiceId
};
export type MessageData = ErrorMessage | OutputMessage | FetchMessage;
export type ProgressCallback = (progress: Omit<FetchMessage, 'type'>) => void;
export type ProgressCallback = (progress: Progress) => void;
+45
View File
@@ -0,0 +1,45 @@
import { test, expect, Page } from '@playwright/test';
import * as tts from '.';
test.describe.configure({ mode: 'serial' });
let page: Page;
test.describe('The voices method', () => {
test.beforeAll(async ({ browser }) => {
page = await browser.newPage();
await page.goto('http://localhost:5173/');
});
test.afterEach(async () => {
await page.evaluate(async () => {
// @ts-ignore
await (await navigator.storage.getDirectory()).remove({ recursive: true });
});
});
test('should be able to fetch more than one hundred voices', async () => {
const voices = await page.evaluate(async () => {
return await tts.voices()
});
expect(voices.length).toBeGreaterThan(100);
for (const voice of voices) {
expect(typeof voice.key == 'string').toBe(true)
expect(voice.key.length).toBeGreaterThan(0);
expect(typeof voice.name == 'string').toBe(true)
expect(voice.name.length).toBeGreaterThan(0);
expect(typeof voice.language.code == 'string').toBe(true)
expect(voice.language.code.length).toBeGreaterThan(0);
expect(typeof voice.quality == 'string').toBe(true)
expect(voice.quality.length).toBeGreaterThan(0);
expect(typeof voice.num_speakers == 'number').toBe(true);
expect(Object.keys(voice.files).length).toBe(3);
}
})
});
-95
View File
@@ -1,95 +0,0 @@
import * as ort from 'onnxruntime-web';
// @ts-ignore
import { createPiperPhonemize } from './piper.js';
import { ErrorMessage, FetchMessage, InferenceConfg, OutputMessage } from './types';
import { HF_BASE, ONNX_BASE, PATH_MAP } from './fixtures';
import { readBlob, writeBlob } from './opfs';
import { fetchBlob } from './http.js';
import { pcm2wav } from './audio';
type MessageData = InferenceConfg & { type?: 'init' }
const WASM_URL = new URL('/piper.wasm', import.meta.url).href;
const DATA_URL = new URL('/piper.data', import.meta.url).href;
async function handleMessage(event: MessageEvent<MessageData>) {
const data = event.data;
if (data?.type != 'init') return;
const path = PATH_MAP[data.voiceId];
const input = JSON.stringify([{ text: data.text.trim() }])
const piperPhonemizeWasm = (await createBlobUrl(WASM_URL)).url;
const piperPhonemizeData = (await createBlobUrl(DATA_URL)).url;
ort.env.wasm.numThreads = navigator.hardwareConcurrency;
ort.env.wasm.wasmPaths = ONNX_BASE;
const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob;
const modelConfig = JSON.parse(await modelConfigBlob.text());
const phonemeIds: string[] = await new Promise(async resolve => {
const module = await createPiperPhonemize({
print: (data: any) => {
resolve(JSON.parse(data).phoneme_ids);
},
printErr: (message: any) => {
self.postMessage({ type: "stderr", message } satisfies ErrorMessage);
},
locateFile: (url: string) => {
if (url.endsWith(".wasm")) return piperPhonemizeWasm;
if (url.endsWith(".data")) return piperPhonemizeData;
return url;
}
});
module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]);
});
const speakerId = 0;
const sampleRate = modelConfig.audio.sample_rate;
const noiseScale = modelConfig.inference.noise_scale;
const lengthScale = modelConfig.inference.length_scale;
const noiseW = modelConfig.inference.noise_w;
const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`)).url;
const session = await ort.InferenceSession.create(modelBlob);
const feeds = {
input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]),
input_lengths: new ort.Tensor("int64", [phonemeIds.length]),
scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW])
}
if (Object.keys(modelConfig.speaker_id_map).length) {
Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) })
}
const { output: { data: pcm } } = await session.run(feeds);
const file = new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" });
self.postMessage({ type: "output", file } satisfies OutputMessage);
}
async function createBlobUrl(url: string) {
let blob: Blob | undefined = await readBlob(url);
if (!blob) {
blob = await fetchBlob(url, (data) => {
if (url.match('https://huggingface.co')) {
self.postMessage({
...data,
type: "fetch"
} satisfies FetchMessage)
}
});
await writeBlob(url, blob);
}
return {
url: URL.createObjectURL(blob),
blob
};
}
self.addEventListener("message", handleMessage);
-29
View File
@@ -1,29 +0,0 @@
import path from 'path';
import { defineConfig } from 'vite';
import dts from 'vite-plugin-dts';
export default defineConfig(({ command }) => {
let publicDir = true;
if (command === 'build') {
publicDir = false;
}
return {
publicDir,
build: {
lib: {
entry: path.resolve(__dirname, 'src/index.ts'),
name: 'vits-web',
formats: ['es'],
fileName: 'vits-web'
},
},
plugins: [dts()],
server: {
headers: {
'Cross-Origin-Embedder-Policy': 'require-corp',
'Cross-Origin-Opener-Policy': 'same-origin',
},
},
}
});
+26
View File
@@ -0,0 +1,26 @@
import path from 'path';
import { defineConfig } from 'vite';
import dts from 'vite-plugin-dts';
export default defineConfig({
build: {
lib: {
entry: path.resolve(__dirname, 'src/index.ts'),
name: 'vits-web',
formats: ['es']
},
rollupOptions: {
external: [
'**/*.spec.ts',
'onnxruntime-web'
],
},
},
plugins: [dts({ exclude: "**/*.spec.ts" })],
server: {
headers: {
'Cross-Origin-Embedder-Policy': 'require-corp',
'Cross-Origin-Opener-Policy': 'same-origin',
},
},
});