diff --git a/.gitignore b/.gitignore
index a547bf3..2afd27a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,5 @@ dist-ssr
*.njsproj
*.sln
*.sw?
+test-results
+playwright-report
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..100b93a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,70 @@
+[](https://github.com/diffusion-studio/ffmpeg-js/graphs/commit-activity)
+[](https://diffusion.studio)
+[](https://discord.gg/n3mpzfejAb)
+[](https://github.com/diffusion-studio/ffmpeg-js/blob/main/LICENSE)
+[](https://typescriptlang.org)
+
+# Use VITS models in the browser powered by the [ONNX Runtime](https://onnxruntime.ai/)
+
+A big shout-out goes to [Rhasspy Piper](https://github.com/rhasspy/piper), who open-sourced all the currently available models (MIT License) and to [@jozefchutka](https://github.com/jozefchutka) who came up with the wasm build steps.
+
+## Usage
+First of all, you need to install the library:
+```bash
+npm i --save @diffusionstudio/vits-web
+```
+
+Then you're able to import the library like this (ES only)
+```typescript
+import * as tts from '@diffusionstudio/vits-web';
+
+// Hint: onnxruntime-web is a peer dependency
+```
+
+Now you can start synthesizing speech!
+```typescript
+const wav = await tts.predict({
+ text: "Text to speech in the browser is amazing!",
+ voiceId: 'en_US-hfc_female-medium',
+});
+
+// available in Web Worker
+
+const audio = new Audio();
+audio.src = URL.createObjectURL(wav);
+audio.play();
+```
+
+With the initial run of the predict function you will download the model which will then be stored in your [Origin private file system](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system). You can also do this manually in advance *(recommended)*, as follows:
+```typescript
+await tts.download('en_US-hfc_female-medium', (progress) => {
+ console.log(`Downloading ${progress.url} - ${Math.round(progress.loaded * 100 / progress.total)}%`);
+});
+```
+
+The predict function also accepts a download progress callback as the second argument (`tts.predict(..., console.log)`).
+
+If you want to know which models have already been stored, do the following
+```typescript
+console.log(await tts.stored());
+
+// will log ['en_US-hfc_female-medium']
+```
+
+You can remove models from opfs by calling
+```typescript
+await tts.remove('en_US-hfc_female-medium');
+
+// alternatively delete all
+
+await tts.flush();
+```
+
+And last but not least use this snippet if you would like to retrieve all available voices:
+```typescript
+console.log(await tts.voices());
+
+// Hint: the key can be used as voiceId
+```
+
+### **That's it!** Happy coding :)
diff --git a/package-lock.json b/package-lock.json
index 22fe2e8..a64ed11 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,13 +8,14 @@
"name": "@diffusionstudio/vits-web",
"version": "1.0.0",
"license": "MIT",
- "dependencies": {
- "onnxruntime-web": "^1.18.0"
- },
"devDependencies": {
+ "@playwright/test": "^1.35.1",
"typescript": "^5.2.2",
"vite": "^5.3.1",
"vite-plugin-dts": "^3.9.1"
+ },
+ "peerDependencies": {
+ "onnxruntime-web": "^1.18.0"
}
},
"node_modules/@babel/parser": {
@@ -513,35 +514,56 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/@playwright/test": {
+ "version": "1.45.1",
+ "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.45.1.tgz",
+ "integrity": "sha512-Wo1bWTzQvGA7LyKGIZc8nFSTFf2TkthGIFBR+QVNilvwouGzFd4PYukZe3rvf5PSqjHi1+1NyKSDZKcQWETzaA==",
+ "dev": true,
+ "license": "Apache-2.0",
+ "dependencies": {
+ "playwright": "1.45.1"
+ },
+ "bin": {
+ "playwright": "cli.js"
+ },
+ "engines": {
+ "node": ">=18"
+ }
+ },
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/base64": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/codegen": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/eventemitter": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/fetch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
"license": "BSD-3-Clause",
+ "peer": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.1",
"@protobufjs/inquire": "^1.1.0"
@@ -551,31 +573,36 @@
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/inquire": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/path": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/pool": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@protobufjs/utf8": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
- "license": "BSD-3-Clause"
+ "license": "BSD-3-Clause",
+ "peer": true
},
"node_modules/@rollup/pluginutils": {
"version": "5.1.0",
@@ -909,6 +936,7 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.9.tgz",
"integrity": "sha512-06OCtnTXtWOZBJlRApleWndH4JsRVs1pDCc8dLSQp+7PpUpX3ePdHyeNSFTeSe7FtKyQkrlPvHwJOW3SLd8Oyg==",
"license": "MIT",
+ "peer": true,
"dependencies": {
"undici-types": "~5.26.4"
}
@@ -1200,7 +1228,8 @@
"version": "1.12.0",
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
"integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==",
- "license": "SEE LICENSE IN LICENSE.txt"
+ "license": "SEE LICENSE IN LICENSE.txt",
+ "peer": true
},
"node_modules/fs-extra": {
"version": "7.0.1",
@@ -1253,7 +1282,8 @@
"version": "1.0.9",
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
- "license": "ISC"
+ "license": "ISC",
+ "peer": true
},
"node_modules/has-flag": {
"version": "4.0.0",
@@ -1370,7 +1400,8 @@
"version": "5.2.3",
"resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
"integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==",
- "license": "Apache-2.0"
+ "license": "Apache-2.0",
+ "peer": true
},
"node_modules/lru-cache": {
"version": "6.0.0",
@@ -1445,13 +1476,15 @@
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz",
"integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q==",
- "license": "MIT"
+ "license": "MIT",
+ "peer": true
},
"node_modules/onnxruntime-web": {
"version": "1.18.0",
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz",
"integrity": "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ==",
"license": "MIT",
+ "peer": true,
"dependencies": {
"flatbuffers": "^1.12.0",
"guid-typescript": "^1.0.9",
@@ -1499,7 +1532,55 @@
"version": "1.3.6",
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
- "license": "MIT"
+ "license": "MIT",
+ "peer": true
+ },
+ "node_modules/playwright": {
+ "version": "1.45.1",
+ "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.1.tgz",
+ "integrity": "sha512-Hjrgae4kpSQBr98nhCj3IScxVeVUixqj+5oyif8TdIn2opTCPEzqAqNMeK42i3cWDCVu9MI+ZsGWw+gVR4ISBg==",
+ "dev": true,
+ "license": "Apache-2.0",
+ "dependencies": {
+ "playwright-core": "1.45.1"
+ },
+ "bin": {
+ "playwright": "cli.js"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "optionalDependencies": {
+ "fsevents": "2.3.2"
+ }
+ },
+ "node_modules/playwright-core": {
+ "version": "1.45.1",
+ "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.45.1.tgz",
+ "integrity": "sha512-LF4CUUtrUu2TCpDw4mcrAIuYrEjVDfT1cHbJMfwnE2+1b8PZcFzPNgvZCvq2JfQ4aTjRCCHw5EJ2tmr2NSzdPg==",
+ "dev": true,
+ "license": "Apache-2.0",
+ "bin": {
+ "playwright-core": "cli.js"
+ },
+ "engines": {
+ "node": ">=18"
+ }
+ },
+ "node_modules/playwright/node_modules/fsevents": {
+ "version": "2.3.2",
+ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+ "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+ "dev": true,
+ "hasInstallScript": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "darwin"
+ ],
+ "engines": {
+ "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+ }
},
"node_modules/postcss": {
"version": "8.4.39",
@@ -1536,6 +1617,7 @@
"integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
+ "peer": true,
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
@@ -1731,7 +1813,8 @@
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
- "license": "MIT"
+ "license": "MIT",
+ "peer": true
},
"node_modules/universalify": {
"version": "0.1.2",
diff --git a/package.json b/package.json
index 20ce519..684937d 100644
--- a/package.json
+++ b/package.json
@@ -44,14 +44,16 @@
"scripts": {
"dev": "vite",
"build": "rm -r -f ./dist && tsc && vite build",
- "preview": "vite preview"
+ "preview": "vite preview",
+ "test": "npx playwright test --project=chromium"
},
"devDependencies": {
"typescript": "^5.2.2",
"vite": "^5.3.1",
- "vite-plugin-dts": "^3.9.1"
+ "vite-plugin-dts": "^3.9.1",
+ "@playwright/test": "^1.35.1"
},
- "dependencies": {
+ "peerDependencies": {
"onnxruntime-web": "^1.18.0"
}
}
\ No newline at end of file
diff --git a/playwright.config.ts b/playwright.config.ts
new file mode 100644
index 0000000..7aafb3c
--- /dev/null
+++ b/playwright.config.ts
@@ -0,0 +1,55 @@
+import { defineConfig, devices } from '@playwright/test';
+
+/**
+ * Read environment variables from file.
+ * https://github.com/motdotla/dotenv
+ */
+// require('dotenv').config();
+
+/**
+ * See https://playwright.dev/docs/test-configuration.
+ */
+export default defineConfig({
+ webServer: {
+ command: 'npm run dev',
+ url: 'http://localhost:5173/',
+ },
+ timeout: 5 * 60 * 1000,
+ testDir: './src',
+ /* Run tests in files in parallel */
+ fullyParallel: false,
+ /* Fail the build on CI if you accidentally left test.only in the source code. */
+ forbidOnly: !!process.env.CI,
+ /* Retry on CI only */
+ retries: process.env.CI ? 2 : 0,
+ /* Opt out of parallel tests on CI. */
+ workers: 1,
+ /* Reporter to use. See https://playwright.dev/docs/test-reporters */
+ reporter: 'html',
+ /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
+ use: {
+ /* Base URL to use in actions like `await page.goto('/')`. */
+ // baseURL: 'http://127.0.0.1:3000',
+
+ /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
+ trace: 'on-first-retry',
+ },
+
+ /* Configure projects for major browsers */
+ projects: [
+ {
+ name: 'chromium',
+ use: { ...devices['Desktop Chrome'] },
+ },
+
+ // {
+ // name: 'firefox',
+ // use: { ...devices['Desktop Firefox'] },
+ // },
+
+ // {
+ // name: 'webkit',
+ // use: { ...devices['Desktop Safari'] },
+ // },
+ ],
+});
\ No newline at end of file
diff --git a/public/piper.data b/public/piper.data
deleted file mode 100644
index b193ff2..0000000
Binary files a/public/piper.data and /dev/null differ
diff --git a/public/piper.wasm b/public/piper.wasm
deleted file mode 100644
index a5cb3b5..0000000
Binary files a/public/piper.wasm and /dev/null differ
diff --git a/src/index.ts b/src/index.ts
index b000e6c..39f87fc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1,3 +1,5 @@
export * from './inference';
export * from './storage';
export * from './voices';
+export * from './types';
+export * from './fixtures';
diff --git a/src/inference.spec.ts b/src/inference.spec.ts
new file mode 100644
index 0000000..620ac7f
--- /dev/null
+++ b/src/inference.spec.ts
@@ -0,0 +1,81 @@
+import { test, expect, Page } from '@playwright/test';
+import * as tts from '.';
+
+test.describe.configure({ mode: 'serial' });
+
+let page: Page;
+
+test.describe('The inference methods', () => {
+ test.beforeAll(async ({ browser }) => {
+ page = await browser.newPage();
+ await page.goto('http://localhost:5173/');
+ });
+
+ test.afterEach(async () => {
+ await page.evaluate(async () => {
+ // @ts-ignore
+ await (await navigator.storage.getDirectory()).remove({ recursive: true });
+ });
+ })
+
+ test('should be able to generate a voice and download models', async () => {
+
+ let stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ // make sure opfs is empty
+ expect(stored.length).toBe(0);
+
+ // load model from huggingface
+
+ const result = await page.evaluate(async () => {
+ const calls: tts.Progress[] = [];
+
+ const fn: tts.ProgressCallback = (progress) => {
+ calls.push(progress);
+ }
+
+ const audio = await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn);
+ const arrayBuffer = await audio.arrayBuffer();
+ const { size, type } = audio;
+
+ return {
+ calls,
+ size,
+ type,
+ byteLength: arrayBuffer.byteLength,
+ }
+ });
+
+ // check progress
+ expect(result.calls.length).toBeGreaterThan(10);
+ expect(result.calls[10].url).toMatch('en_US-danny-low');
+ expect(typeof result.calls[10].total == 'number').toBe(true);
+ expect(typeof result.calls[10].loaded == 'number').toBe(true);
+
+ expect(result.byteLength).toBeGreaterThan(1e3);
+ expect(result.size).toBeGreaterThan(1e3);
+ expect(result.type).toBe('audio/x-wav');
+
+ stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ // make sure opfs is empty
+ expect(stored.length).toBe(1);
+
+ // load model from memory
+ // use the same model again
+ const calls = await page.evaluate(async () => {
+ const calls: tts.Progress[] = [];
+
+ const fn: tts.ProgressCallback = (progress) => {
+ calls.push(progress);
+ }
+
+ await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn);
+ return calls
+ });
+
+ expect(calls.length).toBe(0);
+ });
+});
diff --git a/src/inference.ts b/src/inference.ts
index 320456c..4e60236 100644
--- a/src/inference.ts
+++ b/src/inference.ts
@@ -1,37 +1,81 @@
-import { InferenceConfg, MessageData, ProgressCallback } from "./types";
-import Worker from './worker.ts?worker'
+import { InferenceConfg, ProgressCallback } from "./types";
+import { HF_BASE, ONNX_BASE, PATH_MAP, WASM_BASE } from './fixtures';
+import { readBlob, writeBlob } from './opfs';
+import { fetchBlob } from './http.js';
+import { pcm2wav } from './audio';
/**
* Run text to speech inference in new worker thread. Fetches the model
* first, if it has not yet been saved to opfs yet.
*/
export async function predict(config: InferenceConfg, callback?: ProgressCallback): Promise {
- const worker = new Worker()
+ // @ts-ignore
+ const { createPiperPhonemize } = await import('./piper.js');
+ const ort = await import('onnxruntime-web');
- worker.postMessage({ type: 'init', ...config });
+ const path = PATH_MAP[config.voiceId];
+ const input = JSON.stringify([{ text: config.text.trim() }])
- return await new Promise((resolve, reject) => {
- function eventHandler(event: MessageEvent) {
- const data = event.data;
+ const piperPhonemizeWasm = (await createBlobUrl(`${WASM_BASE}.wasm`)).url;
+ const piperPhonemizeData = (await createBlobUrl(`${WASM_BASE}.data`)).url;
- if (data.type == 'output') {
- worker.terminate();
- resolve(data.file);
- }
- if (data.type == 'stderr') {
- worker.terminate();
- reject(data.message);
- }
- if (data.type == 'fetch') {
- const { loaded, total, url } = data;
- callback?.({ loaded, total, url });
- }
- worker.onerror = () => {
- worker.terminate();
- reject()
- }
- }
+ ort.env.wasm.numThreads = navigator.hardwareConcurrency;
+ ort.env.wasm.wasmPaths = ONNX_BASE;
- worker.addEventListener('message', eventHandler)
+ const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob;
+ const modelConfig = JSON.parse(await modelConfigBlob.text());
+
+ const phonemeIds: string[] = await new Promise(async resolve => {
+ const module = await createPiperPhonemize({
+ print: (data: any) => {
+ resolve(JSON.parse(data).phoneme_ids);
+ },
+ printErr: (message: any) => {
+ throw new Error(message);
+ },
+ locateFile: (url: string) => {
+ if (url.endsWith(".wasm")) return piperPhonemizeWasm;
+ if (url.endsWith(".data")) return piperPhonemizeData;
+ return url;
+ }
+ });
+
+ module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]);
});
+
+ const speakerId = 0;
+ const sampleRate = modelConfig.audio.sample_rate;
+ const noiseScale = modelConfig.inference.noise_scale;
+ const lengthScale = modelConfig.inference.length_scale;
+ const noiseW = modelConfig.inference.noise_w;
+
+ const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`, callback)).url;
+ const session = await ort.InferenceSession.create(modelBlob);
+ const feeds = {
+ input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]),
+ input_lengths: new ort.Tensor("int64", [phonemeIds.length]),
+ scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW])
+ }
+ if (Object.keys(modelConfig.speaker_id_map).length) {
+ Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) })
+ }
+
+ const { output: { data: pcm } } = await session.run(feeds);
+
+ return new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" });
}
+
+async function createBlobUrl(url: string, callback?: ProgressCallback) {
+ let blob: Blob | undefined = await readBlob(url);
+
+ if (!blob) {
+ blob = await fetchBlob(url, callback);
+ await writeBlob(url, blob);
+ }
+
+ return {
+ url: URL.createObjectURL(blob),
+ blob
+ };
+}
+
diff --git a/src/main.ts b/src/main.ts
index 651902f..0ff49e9 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,5 +1,7 @@
import * as tts from './index';
+Object.assign(window, { tts });
+
document.querySelector('#app')!.innerHTML = `
`
diff --git a/src/opfs.ts b/src/opfs.ts
index 511064d..4e61f51 100644
--- a/src/opfs.ts
+++ b/src/opfs.ts
@@ -12,7 +12,9 @@ export async function writeBlob(url: string, blob: Blob): Promise {
const writable = await file.createWritable();
await writable.write(blob);
await writable.close();
- } catch (_) { }
+ } catch (e) {
+ console.error(e)
+ }
}
export async function removeBlob(url: string) {
@@ -21,8 +23,10 @@ export async function removeBlob(url: string) {
const dir = await root.getDirectoryHandle('piper');
const path = url.split('/').at(-1)!;
const file = await dir.getFileHandle(path); // @ts-ignore
- file.remove();
- } catch (_) { }
+ await file.remove();
+ } catch (e) {
+ console.error(e)
+ }
}
export async function readBlob(url: string): Promise {
diff --git a/src/storage.spec.ts b/src/storage.spec.ts
new file mode 100644
index 0000000..2145311
--- /dev/null
+++ b/src/storage.spec.ts
@@ -0,0 +1,91 @@
+import { test, expect, Page } from '@playwright/test';
+import * as tts from '.';
+import { PATH_MAP } from './fixtures';
+
+test.describe.configure({ mode: 'serial' });
+
+let page: Page;
+
+test.describe('The storage methods', () => {
+ test.beforeAll(async ({ browser }) => {
+ page = await browser.newPage();
+ await page.goto('http://localhost:5173/');
+ });
+
+ test.afterEach(async () => {
+ await page.evaluate(async () => {
+ // @ts-ignore
+ await (await navigator.storage.getDirectory()).remove({ recursive: true });
+ });
+ })
+
+ test('should be able to download new voices', async () => {
+ let stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ // make sure opfs is empty
+ expect(stored.length).toBe(0);
+
+ let calls = await page.evaluate(async () => {
+ const calls: tts.Progress[] = [];
+
+ const fn: tts.ProgressCallback = (progress) => {
+ calls.push(progress);
+ }
+
+ await tts.download('en_US-amy-low', fn);
+
+ return calls;
+ });
+
+ // check progress
+ expect(calls.length).toBeGreaterThan(10);
+ expect(calls[10].url).toMatch('en_US-amy-low');
+ expect(typeof calls[10].total == 'number').toBe(true);
+ expect(typeof calls[10].loaded == 'number').toBe(true);
+
+ // check stored file
+ stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ expect(stored.length).toBe(1);
+ expect(stored[0]).toBe('en_US-amy-low');
+ });
+
+ test('should be able to delete selected voices', async () => {
+ let stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ expect(stored.length).toBe(0);
+
+ await page.evaluate(async (pathmap) => {
+ const root = await navigator.storage.getDirectory();
+ const dir = await root.getDirectoryHandle('piper', { create: true });
+
+ const voice0 = pathmap['de_DE-eva_k-x_low'].split('/').at(-1)!;
+ const voice1 = pathmap['ca_ES-upc_ona-medium'].split('/').at(-1)!;
+
+ await dir.getFileHandle(voice0, { create: true });
+ await dir.getFileHandle(voice0 + '.json', { create: true });
+
+ await dir.getFileHandle(voice1, { create: true });
+ await dir.getFileHandle(voice1 + '.json', { create: true });
+ }, PATH_MAP);
+
+
+ stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ expect(stored.length).toBe(2);
+
+ await page.evaluate(async () => {
+ await tts.remove('de_DE-eva_k-x_low');
+ });
+
+ stored = await page.evaluate(async () => {
+ return await tts.stored();
+ });
+ expect(stored.length).toBe(1);
+ expect(stored[0]).toBe('ca_ES-upc_ona-medium');
+ });
+});
diff --git a/src/storage.ts b/src/storage.ts
index abb285e..8584cc4 100644
--- a/src/storage.ts
+++ b/src/storage.ts
@@ -12,7 +12,7 @@ export async function download(voiceId: VoiceId, callback?: ProgressCallback): P
const urls = [`${HF_BASE}/${path}`, `${HF_BASE}/${path}.json`]
await Promise.all(urls.map(async (url) => {
- writeBlob(url, await fetchBlob(url, callback));
+ writeBlob(url, await fetchBlob(url, url.endsWith('.onnx') ? callback : undefined));
}));
}
@@ -51,11 +51,11 @@ export async function stored(): Promise {
* Delete the models directory
*/
export async function flush() {
- const root = await navigator.storage.getDirectory();
- const dir = await root.getDirectoryHandle('piper', {
- create: true,
- });
-
- // @ts-ignore
- await dir.remove({ recursive: true });
+ try {
+ const root = await navigator.storage.getDirectory();
+ const dir = await root.getDirectoryHandle('piper'); // @ts-ignore
+ await dir.remove({ recursive: true });
+ } catch (e) {
+ console.error(e)
+ }
}
diff --git a/src/types.ts b/src/types.ts
index 56ab816..9da5b5d 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -26,18 +26,7 @@ export type Voice = {
aliases: string[]
}
-export type ErrorMessage = {
- type: "stderr";
- message: string;
-}
-
-export type OutputMessage = {
- type: "output";
- file: Blob;
-}
-
-export type FetchMessage = {
- type: "fetch";
+export type Progress = {
url: string;
total: number;
loaded: number;
@@ -48,6 +37,4 @@ export type InferenceConfg = {
voiceId: VoiceId
};
-export type MessageData = ErrorMessage | OutputMessage | FetchMessage;
-
-export type ProgressCallback = (progress: Omit) => void;
+export type ProgressCallback = (progress: Progress) => void;
diff --git a/src/voices.spec.ts b/src/voices.spec.ts
new file mode 100644
index 0000000..99af360
--- /dev/null
+++ b/src/voices.spec.ts
@@ -0,0 +1,45 @@
+import { test, expect, Page } from '@playwright/test';
+import * as tts from '.';
+
+test.describe.configure({ mode: 'serial' });
+
+let page: Page;
+
+test.describe('The voices method', () => {
+ test.beforeAll(async ({ browser }) => {
+ page = await browser.newPage();
+ await page.goto('http://localhost:5173/');
+ });
+
+ test.afterEach(async () => {
+ await page.evaluate(async () => {
+ // @ts-ignore
+ await (await navigator.storage.getDirectory()).remove({ recursive: true });
+ });
+ });
+
+ test('should be able to fetch more than one hundred voices', async () => {
+ const voices = await page.evaluate(async () => {
+ return await tts.voices()
+ });
+ expect(voices.length).toBeGreaterThan(100);
+
+ for (const voice of voices) {
+ expect(typeof voice.key == 'string').toBe(true)
+ expect(voice.key.length).toBeGreaterThan(0);
+
+ expect(typeof voice.name == 'string').toBe(true)
+ expect(voice.name.length).toBeGreaterThan(0);
+
+ expect(typeof voice.language.code == 'string').toBe(true)
+ expect(voice.language.code.length).toBeGreaterThan(0);
+
+ expect(typeof voice.quality == 'string').toBe(true)
+ expect(voice.quality.length).toBeGreaterThan(0);
+
+ expect(typeof voice.num_speakers == 'number').toBe(true);
+
+ expect(Object.keys(voice.files).length).toBe(3);
+ }
+ })
+});
diff --git a/src/worker.ts b/src/worker.ts
deleted file mode 100644
index 53fbbd7..0000000
--- a/src/worker.ts
+++ /dev/null
@@ -1,95 +0,0 @@
-import * as ort from 'onnxruntime-web';
-// @ts-ignore
-import { createPiperPhonemize } from './piper.js';
-import { ErrorMessage, FetchMessage, InferenceConfg, OutputMessage } from './types';
-import { HF_BASE, ONNX_BASE, PATH_MAP } from './fixtures';
-import { readBlob, writeBlob } from './opfs';
-import { fetchBlob } from './http.js';
-import { pcm2wav } from './audio';
-
-type MessageData = InferenceConfg & { type?: 'init' }
-
-const WASM_URL = new URL('/piper.wasm', import.meta.url).href;
-const DATA_URL = new URL('/piper.data', import.meta.url).href;
-
-async function handleMessage(event: MessageEvent) {
- const data = event.data;
-
- if (data?.type != 'init') return;
-
- const path = PATH_MAP[data.voiceId];
- const input = JSON.stringify([{ text: data.text.trim() }])
-
- const piperPhonemizeWasm = (await createBlobUrl(WASM_URL)).url;
- const piperPhonemizeData = (await createBlobUrl(DATA_URL)).url;
-
- ort.env.wasm.numThreads = navigator.hardwareConcurrency;
- ort.env.wasm.wasmPaths = ONNX_BASE;
-
- const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob;
- const modelConfig = JSON.parse(await modelConfigBlob.text());
-
- const phonemeIds: string[] = await new Promise(async resolve => {
- const module = await createPiperPhonemize({
- print: (data: any) => {
- resolve(JSON.parse(data).phoneme_ids);
- },
- printErr: (message: any) => {
- self.postMessage({ type: "stderr", message } satisfies ErrorMessage);
- },
- locateFile: (url: string) => {
- if (url.endsWith(".wasm")) return piperPhonemizeWasm;
- if (url.endsWith(".data")) return piperPhonemizeData;
- return url;
- }
- });
-
- module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]);
- });
-
- const speakerId = 0;
- const sampleRate = modelConfig.audio.sample_rate;
- const noiseScale = modelConfig.inference.noise_scale;
- const lengthScale = modelConfig.inference.length_scale;
- const noiseW = modelConfig.inference.noise_w;
-
- const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`)).url;
- const session = await ort.InferenceSession.create(modelBlob);
- const feeds = {
- input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]),
- input_lengths: new ort.Tensor("int64", [phonemeIds.length]),
- scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW])
- }
- if (Object.keys(modelConfig.speaker_id_map).length) {
- Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) })
- }
-
- const { output: { data: pcm } } = await session.run(feeds);
-
- const file = new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" });
-
- self.postMessage({ type: "output", file } satisfies OutputMessage);
-}
-
-async function createBlobUrl(url: string) {
- let blob: Blob | undefined = await readBlob(url);
-
- if (!blob) {
- blob = await fetchBlob(url, (data) => {
- if (url.match('https://huggingface.co')) {
- self.postMessage({
- ...data,
- type: "fetch"
- } satisfies FetchMessage)
- }
- });
- await writeBlob(url, blob);
- }
-
- return {
- url: URL.createObjectURL(blob),
- blob
- };
-}
-
-self.addEventListener("message", handleMessage);
diff --git a/vite.config.js b/vite.config.js
deleted file mode 100644
index e0735ab..0000000
--- a/vite.config.js
+++ /dev/null
@@ -1,29 +0,0 @@
-import path from 'path';
-import { defineConfig } from 'vite';
-import dts from 'vite-plugin-dts';
-
-export default defineConfig(({ command }) => {
- let publicDir = true;
- if (command === 'build') {
- publicDir = false;
- }
-
- return {
- publicDir,
- build: {
- lib: {
- entry: path.resolve(__dirname, 'src/index.ts'),
- name: 'vits-web',
- formats: ['es'],
- fileName: 'vits-web'
- },
- },
- plugins: [dts()],
- server: {
- headers: {
- 'Cross-Origin-Embedder-Policy': 'require-corp',
- 'Cross-Origin-Opener-Policy': 'same-origin',
- },
- },
- }
-});
\ No newline at end of file
diff --git a/vite.config.ts b/vite.config.ts
new file mode 100644
index 0000000..8a54d3c
--- /dev/null
+++ b/vite.config.ts
@@ -0,0 +1,26 @@
+import path from 'path';
+import { defineConfig } from 'vite';
+import dts from 'vite-plugin-dts';
+
+export default defineConfig({
+ build: {
+ lib: {
+ entry: path.resolve(__dirname, 'src/index.ts'),
+ name: 'vits-web',
+ formats: ['es']
+ },
+ rollupOptions: {
+ external: [
+ '**/*.spec.ts',
+ 'onnxruntime-web'
+ ],
+ },
+ },
+ plugins: [dts({ exclude: "**/*.spec.ts" })],
+ server: {
+ headers: {
+ 'Cross-Origin-Embedder-Policy': 'require-corp',
+ 'Cross-Origin-Opener-Policy': 'same-origin',
+ },
+ },
+});
\ No newline at end of file