diff --git a/.gitignore b/.gitignore index a547bf3..2afd27a 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ dist-ssr *.njsproj *.sln *.sw? +test-results +playwright-report diff --git a/README.md b/README.md new file mode 100644 index 0000000..100b93a --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/diffusion-studio/ffmpeg-js/graphs/commit-activity) +[![Website shields.io](https://img.shields.io/website-up-down-green-red/http/shields.io.svg)](https://diffusion.studio) +[![Discord](https://badgen.net/badge/icon/discord?icon=discord&label)](https://discord.gg/n3mpzfejAb) +[![GitHub license](https://badgen.net/github/license/Naereen/Strapdown.js)](https://github.com/diffusion-studio/ffmpeg-js/blob/main/LICENSE) +[![TypeScript](https://badgen.net/badge/icon/typescript?icon=typescript&label)](https://typescriptlang.org) + +# Use VITS models in the browser powered by the [ONNX Runtime](https://onnxruntime.ai/) + +A big shout-out goes to [Rhasspy Piper](https://github.com/rhasspy/piper), who open-sourced all the currently available models (MIT License) and to [@jozefchutka](https://github.com/jozefchutka) who came up with the wasm build steps. + +## Usage +First of all, you need to install the library: +```bash +npm i --save @diffusionstudio/vits-web +``` + +Then you're able to import the library like this (ES only) +```typescript +import * as tts from '@diffusionstudio/vits-web'; + +// Hint: onnxruntime-web is a peer dependency +``` + +Now you can start synthesizing speech! +```typescript +const wav = await tts.predict({ + text: "Text to speech in the browser is amazing!", + voiceId: 'en_US-hfc_female-medium', +}); + +// available in Web Worker + +const audio = new Audio(); +audio.src = URL.createObjectURL(wav); +audio.play(); +``` + +With the initial run of the predict function you will download the model which will then be stored in your [Origin private file system](https://developer.mozilla.org/en-US/docs/Web/API/File_System_API/Origin_private_file_system). You can also do this manually in advance *(recommended)*, as follows: +```typescript +await tts.download('en_US-hfc_female-medium', (progress) => { + console.log(`Downloading ${progress.url} - ${Math.round(progress.loaded * 100 / progress.total)}%`); +}); +``` + +The predict function also accepts a download progress callback as the second argument (`tts.predict(..., console.log)`).
+ +If you want to know which models have already been stored, do the following +```typescript +console.log(await tts.stored()); + +// will log ['en_US-hfc_female-medium'] +``` + +You can remove models from opfs by calling +```typescript +await tts.remove('en_US-hfc_female-medium'); + +// alternatively delete all + +await tts.flush(); +``` + +And last but not least use this snippet if you would like to retrieve all available voices: +```typescript +console.log(await tts.voices()); + +// Hint: the key can be used as voiceId +``` + +### **That's it!** Happy coding :) diff --git a/package-lock.json b/package-lock.json index 22fe2e8..a64ed11 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,13 +8,14 @@ "name": "@diffusionstudio/vits-web", "version": "1.0.0", "license": "MIT", - "dependencies": { - "onnxruntime-web": "^1.18.0" - }, "devDependencies": { + "@playwright/test": "^1.35.1", "typescript": "^5.2.2", "vite": "^5.3.1", "vite-plugin-dts": "^3.9.1" + }, + "peerDependencies": { + "onnxruntime-web": "^1.18.0" } }, "node_modules/@babel/parser": { @@ -513,35 +514,56 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/@playwright/test": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.45.1.tgz", + "integrity": "sha512-Wo1bWTzQvGA7LyKGIZc8nFSTFf2TkthGIFBR+QVNilvwouGzFd4PYukZe3rvf5PSqjHi1+1NyKSDZKcQWETzaA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.45.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/base64": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/codegen": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz", "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/eventemitter": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/fetch": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", "license": "BSD-3-Clause", + "peer": true, "dependencies": { "@protobufjs/aspromise": "^1.1.1", "@protobufjs/inquire": "^1.1.0" @@ -551,31 +573,36 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/inquire": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz", "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/path": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/pool": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@protobufjs/utf8": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz", "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==", - "license": "BSD-3-Clause" + "license": "BSD-3-Clause", + "peer": true }, "node_modules/@rollup/pluginutils": { "version": "5.1.0", @@ -909,6 +936,7 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.9.tgz", "integrity": "sha512-06OCtnTXtWOZBJlRApleWndH4JsRVs1pDCc8dLSQp+7PpUpX3ePdHyeNSFTeSe7FtKyQkrlPvHwJOW3SLd8Oyg==", "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~5.26.4" } @@ -1200,7 +1228,8 @@ "version": "1.12.0", "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz", "integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==", - "license": "SEE LICENSE IN LICENSE.txt" + "license": "SEE LICENSE IN LICENSE.txt", + "peer": true }, "node_modules/fs-extra": { "version": "7.0.1", @@ -1253,7 +1282,8 @@ "version": "1.0.9", "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==", - "license": "ISC" + "license": "ISC", + "peer": true }, "node_modules/has-flag": { "version": "4.0.0", @@ -1370,7 +1400,8 @@ "version": "5.2.3", "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz", "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==", - "license": "Apache-2.0" + "license": "Apache-2.0", + "peer": true }, "node_modules/lru-cache": { "version": "6.0.0", @@ -1445,13 +1476,15 @@ "version": "1.18.0", "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz", "integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/onnxruntime-web": { "version": "1.18.0", "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz", "integrity": "sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ==", "license": "MIT", + "peer": true, "dependencies": { "flatbuffers": "^1.12.0", "guid-typescript": "^1.0.9", @@ -1499,7 +1532,55 @@ "version": "1.3.6", "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", - "license": "MIT" + "license": "MIT", + "peer": true + }, + "node_modules/playwright": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.45.1.tgz", + "integrity": "sha512-Hjrgae4kpSQBr98nhCj3IScxVeVUixqj+5oyif8TdIn2opTCPEzqAqNMeK42i3cWDCVu9MI+ZsGWw+gVR4ISBg==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.45.1" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.45.1.tgz", + "integrity": "sha512-LF4CUUtrUu2TCpDw4mcrAIuYrEjVDfT1cHbJMfwnE2+1b8PZcFzPNgvZCvq2JfQ4aTjRCCHw5EJ2tmr2NSzdPg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/playwright/node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } }, "node_modules/postcss": { "version": "8.4.39", @@ -1536,6 +1617,7 @@ "integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==", "hasInstallScript": true, "license": "BSD-3-Clause", + "peer": true, "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", @@ -1731,7 +1813,8 @@ "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/universalify": { "version": "0.1.2", diff --git a/package.json b/package.json index 20ce519..684937d 100644 --- a/package.json +++ b/package.json @@ -44,14 +44,16 @@ "scripts": { "dev": "vite", "build": "rm -r -f ./dist && tsc && vite build", - "preview": "vite preview" + "preview": "vite preview", + "test": "npx playwright test --project=chromium" }, "devDependencies": { "typescript": "^5.2.2", "vite": "^5.3.1", - "vite-plugin-dts": "^3.9.1" + "vite-plugin-dts": "^3.9.1", + "@playwright/test": "^1.35.1" }, - "dependencies": { + "peerDependencies": { "onnxruntime-web": "^1.18.0" } } \ No newline at end of file diff --git a/playwright.config.ts b/playwright.config.ts new file mode 100644 index 0000000..7aafb3c --- /dev/null +++ b/playwright.config.ts @@ -0,0 +1,55 @@ +import { defineConfig, devices } from '@playwright/test'; + +/** + * Read environment variables from file. + * https://github.com/motdotla/dotenv + */ +// require('dotenv').config(); + +/** + * See https://playwright.dev/docs/test-configuration. + */ +export default defineConfig({ + webServer: { + command: 'npm run dev', + url: 'http://localhost:5173/', + }, + timeout: 5 * 60 * 1000, + testDir: './src', + /* Run tests in files in parallel */ + fullyParallel: false, + /* Fail the build on CI if you accidentally left test.only in the source code. */ + forbidOnly: !!process.env.CI, + /* Retry on CI only */ + retries: process.env.CI ? 2 : 0, + /* Opt out of parallel tests on CI. */ + workers: 1, + /* Reporter to use. See https://playwright.dev/docs/test-reporters */ + reporter: 'html', + /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ + use: { + /* Base URL to use in actions like `await page.goto('/')`. */ + // baseURL: 'http://127.0.0.1:3000', + + /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ + trace: 'on-first-retry', + }, + + /* Configure projects for major browsers */ + projects: [ + { + name: 'chromium', + use: { ...devices['Desktop Chrome'] }, + }, + + // { + // name: 'firefox', + // use: { ...devices['Desktop Firefox'] }, + // }, + + // { + // name: 'webkit', + // use: { ...devices['Desktop Safari'] }, + // }, + ], +}); \ No newline at end of file diff --git a/public/piper.data b/public/piper.data deleted file mode 100644 index b193ff2..0000000 Binary files a/public/piper.data and /dev/null differ diff --git a/public/piper.wasm b/public/piper.wasm deleted file mode 100644 index a5cb3b5..0000000 Binary files a/public/piper.wasm and /dev/null differ diff --git a/src/index.ts b/src/index.ts index b000e6c..39f87fc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,5 @@ export * from './inference'; export * from './storage'; export * from './voices'; +export * from './types'; +export * from './fixtures'; diff --git a/src/inference.spec.ts b/src/inference.spec.ts new file mode 100644 index 0000000..620ac7f --- /dev/null +++ b/src/inference.spec.ts @@ -0,0 +1,81 @@ +import { test, expect, Page } from '@playwright/test'; +import * as tts from '.'; + +test.describe.configure({ mode: 'serial' }); + +let page: Page; + +test.describe('The inference methods', () => { + test.beforeAll(async ({ browser }) => { + page = await browser.newPage(); + await page.goto('http://localhost:5173/'); + }); + + test.afterEach(async () => { + await page.evaluate(async () => { + // @ts-ignore + await (await navigator.storage.getDirectory()).remove({ recursive: true }); + }); + }) + + test('should be able to generate a voice and download models', async () => { + + let stored = await page.evaluate(async () => { + return await tts.stored(); + }); + // make sure opfs is empty + expect(stored.length).toBe(0); + + // load model from huggingface + + const result = await page.evaluate(async () => { + const calls: tts.Progress[] = []; + + const fn: tts.ProgressCallback = (progress) => { + calls.push(progress); + } + + const audio = await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn); + const arrayBuffer = await audio.arrayBuffer(); + const { size, type } = audio; + + return { + calls, + size, + type, + byteLength: arrayBuffer.byteLength, + } + }); + + // check progress + expect(result.calls.length).toBeGreaterThan(10); + expect(result.calls[10].url).toMatch('en_US-danny-low'); + expect(typeof result.calls[10].total == 'number').toBe(true); + expect(typeof result.calls[10].loaded == 'number').toBe(true); + + expect(result.byteLength).toBeGreaterThan(1e3); + expect(result.size).toBeGreaterThan(1e3); + expect(result.type).toBe('audio/x-wav'); + + stored = await page.evaluate(async () => { + return await tts.stored(); + }); + // make sure opfs is empty + expect(stored.length).toBe(1); + + // load model from memory + // use the same model again + const calls = await page.evaluate(async () => { + const calls: tts.Progress[] = []; + + const fn: tts.ProgressCallback = (progress) => { + calls.push(progress); + } + + await tts.predict({ text: 'Hello World', voiceId: 'en_US-danny-low' }, fn); + return calls + }); + + expect(calls.length).toBe(0); + }); +}); diff --git a/src/inference.ts b/src/inference.ts index 320456c..4e60236 100644 --- a/src/inference.ts +++ b/src/inference.ts @@ -1,37 +1,81 @@ -import { InferenceConfg, MessageData, ProgressCallback } from "./types"; -import Worker from './worker.ts?worker' +import { InferenceConfg, ProgressCallback } from "./types"; +import { HF_BASE, ONNX_BASE, PATH_MAP, WASM_BASE } from './fixtures'; +import { readBlob, writeBlob } from './opfs'; +import { fetchBlob } from './http.js'; +import { pcm2wav } from './audio'; /** * Run text to speech inference in new worker thread. Fetches the model * first, if it has not yet been saved to opfs yet. */ export async function predict(config: InferenceConfg, callback?: ProgressCallback): Promise { - const worker = new Worker() + // @ts-ignore + const { createPiperPhonemize } = await import('./piper.js'); + const ort = await import('onnxruntime-web'); - worker.postMessage({ type: 'init', ...config }); + const path = PATH_MAP[config.voiceId]; + const input = JSON.stringify([{ text: config.text.trim() }]) - return await new Promise((resolve, reject) => { - function eventHandler(event: MessageEvent) { - const data = event.data; + const piperPhonemizeWasm = (await createBlobUrl(`${WASM_BASE}.wasm`)).url; + const piperPhonemizeData = (await createBlobUrl(`${WASM_BASE}.data`)).url; - if (data.type == 'output') { - worker.terminate(); - resolve(data.file); - } - if (data.type == 'stderr') { - worker.terminate(); - reject(data.message); - } - if (data.type == 'fetch') { - const { loaded, total, url } = data; - callback?.({ loaded, total, url }); - } - worker.onerror = () => { - worker.terminate(); - reject() - } - } + ort.env.wasm.numThreads = navigator.hardwareConcurrency; + ort.env.wasm.wasmPaths = ONNX_BASE; - worker.addEventListener('message', eventHandler) + const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob; + const modelConfig = JSON.parse(await modelConfigBlob.text()); + + const phonemeIds: string[] = await new Promise(async resolve => { + const module = await createPiperPhonemize({ + print: (data: any) => { + resolve(JSON.parse(data).phoneme_ids); + }, + printErr: (message: any) => { + throw new Error(message); + }, + locateFile: (url: string) => { + if (url.endsWith(".wasm")) return piperPhonemizeWasm; + if (url.endsWith(".data")) return piperPhonemizeData; + return url; + } + }); + + module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]); }); + + const speakerId = 0; + const sampleRate = modelConfig.audio.sample_rate; + const noiseScale = modelConfig.inference.noise_scale; + const lengthScale = modelConfig.inference.length_scale; + const noiseW = modelConfig.inference.noise_w; + + const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`, callback)).url; + const session = await ort.InferenceSession.create(modelBlob); + const feeds = { + input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]), + input_lengths: new ort.Tensor("int64", [phonemeIds.length]), + scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW]) + } + if (Object.keys(modelConfig.speaker_id_map).length) { + Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) }) + } + + const { output: { data: pcm } } = await session.run(feeds); + + return new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" }); } + +async function createBlobUrl(url: string, callback?: ProgressCallback) { + let blob: Blob | undefined = await readBlob(url); + + if (!blob) { + blob = await fetchBlob(url, callback); + await writeBlob(url, blob); + } + + return { + url: URL.createObjectURL(blob), + blob + }; +} + diff --git a/src/main.ts b/src/main.ts index 651902f..0ff49e9 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,5 +1,7 @@ import * as tts from './index'; +Object.assign(window, { tts }); + document.querySelector('#app')!.innerHTML = ` ` diff --git a/src/opfs.ts b/src/opfs.ts index 511064d..4e61f51 100644 --- a/src/opfs.ts +++ b/src/opfs.ts @@ -12,7 +12,9 @@ export async function writeBlob(url: string, blob: Blob): Promise { const writable = await file.createWritable(); await writable.write(blob); await writable.close(); - } catch (_) { } + } catch (e) { + console.error(e) + } } export async function removeBlob(url: string) { @@ -21,8 +23,10 @@ export async function removeBlob(url: string) { const dir = await root.getDirectoryHandle('piper'); const path = url.split('/').at(-1)!; const file = await dir.getFileHandle(path); // @ts-ignore - file.remove(); - } catch (_) { } + await file.remove(); + } catch (e) { + console.error(e) + } } export async function readBlob(url: string): Promise { diff --git a/src/storage.spec.ts b/src/storage.spec.ts new file mode 100644 index 0000000..2145311 --- /dev/null +++ b/src/storage.spec.ts @@ -0,0 +1,91 @@ +import { test, expect, Page } from '@playwright/test'; +import * as tts from '.'; +import { PATH_MAP } from './fixtures'; + +test.describe.configure({ mode: 'serial' }); + +let page: Page; + +test.describe('The storage methods', () => { + test.beforeAll(async ({ browser }) => { + page = await browser.newPage(); + await page.goto('http://localhost:5173/'); + }); + + test.afterEach(async () => { + await page.evaluate(async () => { + // @ts-ignore + await (await navigator.storage.getDirectory()).remove({ recursive: true }); + }); + }) + + test('should be able to download new voices', async () => { + let stored = await page.evaluate(async () => { + return await tts.stored(); + }); + // make sure opfs is empty + expect(stored.length).toBe(0); + + let calls = await page.evaluate(async () => { + const calls: tts.Progress[] = []; + + const fn: tts.ProgressCallback = (progress) => { + calls.push(progress); + } + + await tts.download('en_US-amy-low', fn); + + return calls; + }); + + // check progress + expect(calls.length).toBeGreaterThan(10); + expect(calls[10].url).toMatch('en_US-amy-low'); + expect(typeof calls[10].total == 'number').toBe(true); + expect(typeof calls[10].loaded == 'number').toBe(true); + + // check stored file + stored = await page.evaluate(async () => { + return await tts.stored(); + }); + expect(stored.length).toBe(1); + expect(stored[0]).toBe('en_US-amy-low'); + }); + + test('should be able to delete selected voices', async () => { + let stored = await page.evaluate(async () => { + return await tts.stored(); + }); + expect(stored.length).toBe(0); + + await page.evaluate(async (pathmap) => { + const root = await navigator.storage.getDirectory(); + const dir = await root.getDirectoryHandle('piper', { create: true }); + + const voice0 = pathmap['de_DE-eva_k-x_low'].split('/').at(-1)!; + const voice1 = pathmap['ca_ES-upc_ona-medium'].split('/').at(-1)!; + + await dir.getFileHandle(voice0, { create: true }); + await dir.getFileHandle(voice0 + '.json', { create: true }); + + await dir.getFileHandle(voice1, { create: true }); + await dir.getFileHandle(voice1 + '.json', { create: true }); + }, PATH_MAP); + + + stored = await page.evaluate(async () => { + return await tts.stored(); + }); + expect(stored.length).toBe(2); + + await page.evaluate(async () => { + await tts.remove('de_DE-eva_k-x_low'); + }); + + stored = await page.evaluate(async () => { + return await tts.stored(); + }); + expect(stored.length).toBe(1); + expect(stored[0]).toBe('ca_ES-upc_ona-medium'); + }); +}); diff --git a/src/storage.ts b/src/storage.ts index abb285e..8584cc4 100644 --- a/src/storage.ts +++ b/src/storage.ts @@ -12,7 +12,7 @@ export async function download(voiceId: VoiceId, callback?: ProgressCallback): P const urls = [`${HF_BASE}/${path}`, `${HF_BASE}/${path}.json`] await Promise.all(urls.map(async (url) => { - writeBlob(url, await fetchBlob(url, callback)); + writeBlob(url, await fetchBlob(url, url.endsWith('.onnx') ? callback : undefined)); })); } @@ -51,11 +51,11 @@ export async function stored(): Promise { * Delete the models directory */ export async function flush() { - const root = await navigator.storage.getDirectory(); - const dir = await root.getDirectoryHandle('piper', { - create: true, - }); - - // @ts-ignore - await dir.remove({ recursive: true }); + try { + const root = await navigator.storage.getDirectory(); + const dir = await root.getDirectoryHandle('piper'); // @ts-ignore + await dir.remove({ recursive: true }); + } catch (e) { + console.error(e) + } } diff --git a/src/types.ts b/src/types.ts index 56ab816..9da5b5d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -26,18 +26,7 @@ export type Voice = { aliases: string[] } -export type ErrorMessage = { - type: "stderr"; - message: string; -} - -export type OutputMessage = { - type: "output"; - file: Blob; -} - -export type FetchMessage = { - type: "fetch"; +export type Progress = { url: string; total: number; loaded: number; @@ -48,6 +37,4 @@ export type InferenceConfg = { voiceId: VoiceId }; -export type MessageData = ErrorMessage | OutputMessage | FetchMessage; - -export type ProgressCallback = (progress: Omit) => void; +export type ProgressCallback = (progress: Progress) => void; diff --git a/src/voices.spec.ts b/src/voices.spec.ts new file mode 100644 index 0000000..99af360 --- /dev/null +++ b/src/voices.spec.ts @@ -0,0 +1,45 @@ +import { test, expect, Page } from '@playwright/test'; +import * as tts from '.'; + +test.describe.configure({ mode: 'serial' }); + +let page: Page; + +test.describe('The voices method', () => { + test.beforeAll(async ({ browser }) => { + page = await browser.newPage(); + await page.goto('http://localhost:5173/'); + }); + + test.afterEach(async () => { + await page.evaluate(async () => { + // @ts-ignore + await (await navigator.storage.getDirectory()).remove({ recursive: true }); + }); + }); + + test('should be able to fetch more than one hundred voices', async () => { + const voices = await page.evaluate(async () => { + return await tts.voices() + }); + expect(voices.length).toBeGreaterThan(100); + + for (const voice of voices) { + expect(typeof voice.key == 'string').toBe(true) + expect(voice.key.length).toBeGreaterThan(0); + + expect(typeof voice.name == 'string').toBe(true) + expect(voice.name.length).toBeGreaterThan(0); + + expect(typeof voice.language.code == 'string').toBe(true) + expect(voice.language.code.length).toBeGreaterThan(0); + + expect(typeof voice.quality == 'string').toBe(true) + expect(voice.quality.length).toBeGreaterThan(0); + + expect(typeof voice.num_speakers == 'number').toBe(true); + + expect(Object.keys(voice.files).length).toBe(3); + } + }) +}); diff --git a/src/worker.ts b/src/worker.ts deleted file mode 100644 index 53fbbd7..0000000 --- a/src/worker.ts +++ /dev/null @@ -1,95 +0,0 @@ -import * as ort from 'onnxruntime-web'; -// @ts-ignore -import { createPiperPhonemize } from './piper.js'; -import { ErrorMessage, FetchMessage, InferenceConfg, OutputMessage } from './types'; -import { HF_BASE, ONNX_BASE, PATH_MAP } from './fixtures'; -import { readBlob, writeBlob } from './opfs'; -import { fetchBlob } from './http.js'; -import { pcm2wav } from './audio'; - -type MessageData = InferenceConfg & { type?: 'init' } - -const WASM_URL = new URL('/piper.wasm', import.meta.url).href; -const DATA_URL = new URL('/piper.data', import.meta.url).href; - -async function handleMessage(event: MessageEvent) { - const data = event.data; - - if (data?.type != 'init') return; - - const path = PATH_MAP[data.voiceId]; - const input = JSON.stringify([{ text: data.text.trim() }]) - - const piperPhonemizeWasm = (await createBlobUrl(WASM_URL)).url; - const piperPhonemizeData = (await createBlobUrl(DATA_URL)).url; - - ort.env.wasm.numThreads = navigator.hardwareConcurrency; - ort.env.wasm.wasmPaths = ONNX_BASE; - - const modelConfigBlob = (await createBlobUrl(`${HF_BASE}/${path}.json`)).blob; - const modelConfig = JSON.parse(await modelConfigBlob.text()); - - const phonemeIds: string[] = await new Promise(async resolve => { - const module = await createPiperPhonemize({ - print: (data: any) => { - resolve(JSON.parse(data).phoneme_ids); - }, - printErr: (message: any) => { - self.postMessage({ type: "stderr", message } satisfies ErrorMessage); - }, - locateFile: (url: string) => { - if (url.endsWith(".wasm")) return piperPhonemizeWasm; - if (url.endsWith(".data")) return piperPhonemizeData; - return url; - } - }); - - module.callMain(["-l", modelConfig.espeak.voice, "--input", input, "--espeak_data", "/espeak-ng-data"]); - }); - - const speakerId = 0; - const sampleRate = modelConfig.audio.sample_rate; - const noiseScale = modelConfig.inference.noise_scale; - const lengthScale = modelConfig.inference.length_scale; - const noiseW = modelConfig.inference.noise_w; - - const modelBlob = (await createBlobUrl(`${HF_BASE}/${path}`)).url; - const session = await ort.InferenceSession.create(modelBlob); - const feeds = { - input: new ort.Tensor("int64", phonemeIds, [1, phonemeIds.length]), - input_lengths: new ort.Tensor("int64", [phonemeIds.length]), - scales: new ort.Tensor("float32", [noiseScale, lengthScale, noiseW]) - } - if (Object.keys(modelConfig.speaker_id_map).length) { - Object.assign(feeds, { sid: new ort.Tensor("int64", [speakerId]) }) - } - - const { output: { data: pcm } } = await session.run(feeds); - - const file = new Blob([pcm2wav(pcm as Float32Array, 1, sampleRate)], { type: "audio/x-wav" }); - - self.postMessage({ type: "output", file } satisfies OutputMessage); -} - -async function createBlobUrl(url: string) { - let blob: Blob | undefined = await readBlob(url); - - if (!blob) { - blob = await fetchBlob(url, (data) => { - if (url.match('https://huggingface.co')) { - self.postMessage({ - ...data, - type: "fetch" - } satisfies FetchMessage) - } - }); - await writeBlob(url, blob); - } - - return { - url: URL.createObjectURL(blob), - blob - }; -} - -self.addEventListener("message", handleMessage); diff --git a/vite.config.js b/vite.config.js deleted file mode 100644 index e0735ab..0000000 --- a/vite.config.js +++ /dev/null @@ -1,29 +0,0 @@ -import path from 'path'; -import { defineConfig } from 'vite'; -import dts from 'vite-plugin-dts'; - -export default defineConfig(({ command }) => { - let publicDir = true; - if (command === 'build') { - publicDir = false; - } - - return { - publicDir, - build: { - lib: { - entry: path.resolve(__dirname, 'src/index.ts'), - name: 'vits-web', - formats: ['es'], - fileName: 'vits-web' - }, - }, - plugins: [dts()], - server: { - headers: { - 'Cross-Origin-Embedder-Policy': 'require-corp', - 'Cross-Origin-Opener-Policy': 'same-origin', - }, - }, - } -}); \ No newline at end of file diff --git a/vite.config.ts b/vite.config.ts new file mode 100644 index 0000000..8a54d3c --- /dev/null +++ b/vite.config.ts @@ -0,0 +1,26 @@ +import path from 'path'; +import { defineConfig } from 'vite'; +import dts from 'vite-plugin-dts'; + +export default defineConfig({ + build: { + lib: { + entry: path.resolve(__dirname, 'src/index.ts'), + name: 'vits-web', + formats: ['es'] + }, + rollupOptions: { + external: [ + '**/*.spec.ts', + 'onnxruntime-web' + ], + }, + }, + plugins: [dts({ exclude: "**/*.spec.ts" })], + server: { + headers: { + 'Cross-Origin-Embedder-Policy': 'require-corp', + 'Cross-Origin-Opener-Policy': 'same-origin', + }, + }, +}); \ No newline at end of file