Initial public commit

This commit is contained in:
Keith Horwood
2024-09-30 11:59:00 -07:00
commit 2cb4b7816a
75 changed files with 38554 additions and 0 deletions
+8
View File
@@ -0,0 +1,8 @@
{
"parserOptions": {
"sourceType": "module"
},
"env": {
"es2022": true
}
}
+31
View File
@@ -0,0 +1,31 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# production
/build
# packaging
*.zip
*.tar.gz
*.tar
*.tgz
*.bla
# misc
.DS_Store
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*
+5
View File
@@ -0,0 +1,5 @@
{
"tabWidth": 2,
"useTabs": false,
"singleQuote": true
}
+21
View File
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 OpenAI
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+346
View File
@@ -0,0 +1,346 @@
# OpenAI Realtime Console
The OpenAI Realtime Console is intended as an inspector and interactive API reference
for the OpenAI Realtime API. It comes packaged with two utility libraries,
[openai/openai-realtime-api-beta](https://github.com/openai/openai-reatime-api-beta)
that acts as a **Reference Client** (for browser and Node.js) and
[`/src/lib/wavtools`](./src/lib/wavtools) which allows for simple Audio
management in the browser.
<img src="/readme/realtime-console.png" width="800" />
# Starting the console
This is a React project created using `create-react-app` that is bundled via Webpack.
Install it by extracting the contents of this package and using;
```shell
$ npm i
```
Start your server with:
```shell
$ npm start
```
It should be available via `localhost:3000`.
# Table of contents
1. [Using the console](#using-the-console)
1. [Using a server relay](#using-a-server-relay)
1. [Realtime API reference client](#realtime-api-reference-client)
1. [Sending streaming audio](#sending-streaming-audio)
1. [Adding and using tools](#adding-and-using-tools)
1. [Interrupting the model](#interrupting-the-model)
1. [Reference client events](#reference-client-events)
1. [Wavtools](#wavtools)
1. [WavRecorder quickstart](#wavrecorder-quickstart)
1. [WavStreamPlayer quickstart](#wavstreamplayer-quickstart)
1. [Acknowledgements and contact](#acknowledgements-and-contact)
# Using the console
The console requires an OpenAI API key (**user key** or **project key**) that has access to the
Realtime API. You'll be prompted on startup to enter it. It will be saved via `localStorage` and can be
changed at any time from the UI.
To start a session you'll need to **connect**. This will require microphone access.
You can then choose between **manual** (Push-to-talk) and **vad** (Voice Activity Detection)
conversation modes, and switch between them at any time.
There are two functions enabled;
- `get_weather`: Ask for the weather anywhere and the model will do its best to pinpoint the
location, show it on a map, and get the weather for that location. Note that it doesn't
have location access, and coordinates are "guessed" from the model's training data so
accuracy might not be perfect.
- `set_memory`: You can ask the model to remember information for you, and it will store it in
a JSON blob on the left.
You can freely interrupt the model at any time in push-to-talk or VAD mode.
## Using a server relay
If you would like to build a more robust implementation and play around with the client
on the server, we have included a Node.js [Relay Server](/relay-server/index.js). You
can run it with:
```shell
$ npm run relay
```
It will start automatically on `localhost:8081`. **You will need to create a `.env` file**
with `OPENAI_API_KEY=` set to your API key. Note that you should change the following code
in [`ConsolePage.tsx`](/src/pages/ConsolePage.tsx):
```
/**
* Change this if you want to connect to a local relay server!
* This will require you to set OPENAI_API_KEY= in a `.env` file
* You can run it with `npm run relay`, in parallel with `npm start`
*
* Simply switch the lines by commenting one and removing the other
*/
// const USE_LOCAL_RELAY_SERVER_URL: string | undefined = 'http://localhost:8081';
const USE_LOCAL_RELAY_SERVER_URL: string | undefined = void 0;
```
# Realtime API reference client
The latest reference client and documentation are available on GitHub at
[openai/openai-realtime-api-beta](https://github.com/openai/openai-reatime-api-beta).
You can use this client yourself in any React (front-end) or Node.js project.
For full documentation, refer to the GitHub repository, but you can use the
guide here as a primer to get started.
```javascript
import { RealtimeClient } from '/src/lib/realtime-api-beta/index.js';
const client = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
// Can set parameters ahead of connecting
client.updateSettings({ instructions: 'You are a great, upbeat friend.' });
client.updateSettings({ voice: 'alloy' });
client.updateSettings({ turn_detection: 'server_vad' });
client.updateSession({ input_audio_transcription: { model: 'whisper-1' } });
// Set up event handling
client.on('conversation.updated', ({ item, delta }) => {
const items = client.conversation.getItems(); // can use this to render all items
/* includes all changes to conversations, delta may be populated */
});
// Connect to Realtime API
await client.connect();
// Send a item and triggers a generation
client.sendUserMessageContent([{ type: 'text', text: `How are you?` }]);
```
## Sending streaming audio
To send streaming audio, use the `.appendInputAudio()` method. If you're in `turn_detection: 'disabled'` mode,
then you need to use `.generate()` to tell the model to respond.
```javascript
// Send user audio, must be Int16Array or ArrayBuffer
// Default audio format is pcm16 with sample rate of 24,000 Hz
// This populates 1s of noise in 0.1s chunks
for (let i = 0; i < 10; i++) {
const data = new Int16Array(2400);
for (let n = 0; n < 2400; n++) {
const value = Math.floor((Math.random() * 2 - 1) * 0x8000);
data[n] = value;
}
client.appendInputAudio(data);
}
// Pending audio is committed and model is asked to generate
client.createResponse();
```
## Adding and using tools
Working with tools is easy. Just call `.addTool()` and set a callback as the second parameter.
The callback will be executed with the parameters for the tool, and the result will be automatically
sent back to the model.
```javascript
// We can add tools as well, with callbacks specified
client.addTool(
{
name: 'get_weather',
description:
'Retrieves the weather for a given lat, lng coordinate pair. Specify a label for the location.',
parameters: {
type: 'object',
properties: {
lat: {
type: 'number',
description: 'Latitude',
},
lng: {
type: 'number',
description: 'Longitude',
},
location: {
type: 'string',
description: 'Name of the location',
},
},
required: ['lat', 'lng', 'location'],
},
},
async ({ lat, lng, location }) => {
const result = await fetch(
`https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lng}&current=temperature_2m,wind_speed_10m`
);
const json = await result.json();
return json;
}
);
```
## Interrupting the model
You may want to manually interrupt the model, especially in `turn_detection: 'disabled'` mode.
To do this, we can use:
```javascript
// id is the id of the item currently being generated
// sampleCount is the number of audio samples that have been heard by the listener
client.cancelResponse(id, sampleCount);
```
This method will cause the model to immediately cease generation, but also truncate the
item being played by removing all audio after `sampleCount` and clearing the text
response. By using this method you can interrupt the model and prevent it from "remembering"
anything it has generated that is ahead of where the user's state is.
## Reference client events
There are five main client events for application control flow in `RealtimeClient`.
Note that this is only an overview of using the client, the full Realtime API
event specification is considerably larger, if you need more control check out the GitHub repository:
[openai/openai-realtime-api-beta](https://github.com/openai/openai-reatime-api-beta).
```javascript
// errors like connection failures
client.on('error', (event) => {
// do thing
});
// in VAD mode, the user starts speaking
// we can use this to stop audio playback of a previous response if necessary
client.on('conversation.interrupted', () => {
/* do something */
});
// includes all changes to conversations
// delta may be populated
client.on('conversation.updated', ({ item, delta }) => {
// get all items, e.g. if you need to update a chat window
const items = client.conversation.getItems();
switch (item.type) {
case 'message':
// system, user, or assistant message (item.role)
break;
case 'function_call':
// always a function call from the model
break;
case 'function_call_output':
// always a response from the user / application
break;
}
if (delta) {
// Only one of the following will be populated for any given event
// delta.audio = Int16Array, audio added
// delta.transcript = string, transcript added
// delta.arguments = string, function arguments added
}
});
// only triggered after item added to conversation
client.on('conversation.item.appended', ({ item }) => {
/* item status can be 'in_progress' or 'completed' */
});
// only triggered after item completed in conversation
// will always be triggered after conversation.item.appended
client.on('conversation.item.completed', ({ item }) => {
/* item status will always be 'completed' */
});
```
# Wavtools
Wavtools contains easy management of PCM16 audio streams in the browser, both
recording and playing.
## WavRecorder Quickstart
```javascript
import { WavRecorder } from '/src/lib/wavtools/index.js';
const wavRecorder = new WavRecorder({ sampleRate: 24000 });
wavRecorder.getStatus(); // "ended"
// request permissions, connect microphone
await wavRecorder.begin();
wavRecorder.getStatus(); // "paused"
// Start recording
// This callback will be triggered in chunks of 8192 samples by default
// { mono, raw } are Int16Array (PCM16) mono & full channel data
await wavRecorder.record((data) => {
const { mono, raw } = data;
});
wavRecorder.getStatus(); // "recording"
// Stop recording
await wavRecorder.pause();
wavRecorder.getStatus(); // "paused"
// outputs "audio/wav" audio file
const audio = await wavRecorder.save();
// clears current audio buffer and starts recording
await wavRecorder.clear();
await wavRecorder.record();
// get data for visualization
const frequencyData = wavRecorder.getFrequencies();
// Stop recording, disconnects microphone, output file
await wavRecorder.pause();
const finalAudio = await wavRecorder.end();
// Listen for device change; e.g. if somebody disconnects a microphone
// deviceList is array of MediaDeviceInfo[] + `default` property
wavRecorder.listenForDeviceChange((deviceList) => {});
```
## WavStreamPlayer Quickstart
```javascript
import { WavStreamPlayer } from '/src/lib/wavtools/index.js';
const wavStreamPlayer = new WavStreamPlayer({ sampleRate: 24000 });
// Connect to audio output
await wavStreamPlayer.connect();
// Create 1s of empty PCM16 audio
const audio = new Int16Array(24000);
// Queue 3s of audio, will start playing immediately
wavStreamPlayer.add16BitPCM(audio, 'my-track');
wavStreamPlayer.add16BitPCM(audio, 'my-track');
wavStreamPlayer.add16BitPCM(audio, 'my-track');
// get data for visualization
const frequencyData = wavStreamPlayer.getFrequencies();
// Interrupt the audio (halt playback) at any time
// To restart, need to call .add16BitPCM() again
const trackOffset = await wavStreamPlayer.interrupt();
trackOffset.trackId; // "my-track"
trackOffset.offset; // sample number
trackOffset.currentTime; // time in track
```
# Acknowledgements and contact
Thanks for checking out the Realtime Console. We hope you have fun with the Realtime API.
Special thanks to the whole Realtime API team for making this possible. Please feel free
to reach out, ask questions, or give feedback by creating an issue on the repository.
You can also reach out and let us know what you think directly!
- OpenAI Developers / [@OpenAIDevs](https://x.com/OpenAIDevs)
- Jordan Sitkin / API / [@dustmason](https://x.com/dustmason)
- Mark Hudnall / API /[@landakram](https://x.com/landakram)
- Peter Bakkum / API / [@pbbakkum](https://x.com/pbbakkum)
- Atty Eleti / API / [@athyuttamre](https://x.com/athyuttamre)
- Karolis Kosas / Design / [karoliskosas](https://x.com/karoliskosas)
- Keith Horwood / API + DX / [@keithwhor](https://x.com/keithwhor)
+32473
View File
File diff suppressed because it is too large Load Diff
+57
View File
@@ -0,0 +1,57 @@
{
"name": "realtime-js",
"version": "0.1.0",
"type": "module",
"private": true,
"dependencies": {
"@testing-library/jest-dom": "^5.17.0",
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
"@types/jest": "^27.5.2",
"@types/leaflet": "^1.9.12",
"@types/node": "^16.18.108",
"@types/react": "^18.3.5",
"@types/react-dom": "^18.3.0",
"leaflet": "^1.9.4",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-feather": "^2.0.10",
"react-leaflet": "^4.2.1",
"react-scripts": "^5.0.1",
"sass": "^1.78.0",
"save": "^2.9.0",
"typescript": "^4.9.5",
"web-vitals": "^2.1.4",
"ws": "^8.18.0"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject",
"zip": "zip -r realtime-api-console.zip . -x 'node_modules' 'node_modules/*' 'node_modules/**' '.git' '.git/*' '.git/**' '.DS_Store' '*/.DS_Store' 'package-lock.json' '*.zip' '*.tar.gz' '*.tar' '*.bla'",
"relay": "nodemon ./relay-server/index.js"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
},
"devDependencies": {
"@babel/plugin-proposal-private-property-in-object": "^7.21.11",
"nodemon": "^3.1.7"
}
}
+11
View File
@@ -0,0 +1,11 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="16" height="16" rx="4" fill="#715FDE"/>
<g clip-path="url(#clip0_14261_2043)">
<path d="M12.985 6.93776C13.1163 6.54363 13.1618 6.12606 13.1186 5.71294C13.0753 5.29981 12.9444 4.9007 12.7344 4.5423C12.4231 4.0002 11.9477 3.571 11.3767 3.31662C10.8057 3.06226 10.1687 2.99587 9.55745 3.12705C9.28178 2.81637 8.94294 2.56816 8.56356 2.39904C8.18418 2.22991 7.77308 2.14378 7.35774 2.14639C6.73286 2.14488 6.12364 2.34185 5.61791 2.70891C5.11218 3.07596 4.7361 3.59413 4.54385 4.18872C4.13677 4.27205 3.75219 4.44138 3.41585 4.68537C3.07951 4.92937 2.79918 5.24239 2.59362 5.6035C2.27988 6.14412 2.14596 6.77037 2.21117 7.39203C2.27638 8.01366 2.53734 8.5985 2.95643 9.06222C2.82521 9.45635 2.77969 9.87392 2.82291 10.287C2.86614 10.7002 2.99711 11.0993 3.20707 11.4577C3.51835 11.9998 3.99378 12.4289 4.56478 12.6833C5.13577 12.9377 5.77283 13.0041 6.38401 12.8729C6.65969 13.1836 6.99856 13.4318 7.3779 13.6009C7.75728 13.7701 8.16838 13.8562 8.58373 13.8536C9.20893 13.8552 9.81847 13.6582 10.3244 13.2909C10.8303 12.9236 11.2065 12.405 11.3985 11.8101C11.8056 11.7268 12.1902 11.5575 12.5265 11.3135C12.8629 11.0695 13.1432 10.7564 13.3488 10.3953C13.6621 9.85475 13.7957 9.22864 13.7304 8.60725C13.665 7.98585 13.404 7.40128 12.985 6.93776ZM8.58466 13.0883C8.07144 13.089 7.57435 12.9092 7.18025 12.5805C7.19802 12.5708 7.22919 12.5537 7.2495 12.5413L9.58055 11.1948C9.63905 11.1615 9.68764 11.1133 9.7213 11.055C9.75498 10.9967 9.77252 10.9304 9.77211 10.8632V7.57683L10.7574 8.14575C10.7625 8.14832 10.767 8.15213 10.7704 8.15684C10.7737 8.16155 10.7758 8.16703 10.7765 8.17273V10.8943C10.7758 11.4756 10.5447 12.0329 10.1339 12.4441C9.72302 12.8554 9.16593 13.087 8.58466 13.0883ZM3.87085 11.075C3.61379 10.6309 3.52114 10.1104 3.60912 9.60483C3.62643 9.61522 3.65667 9.63369 3.67836 9.64616L6.0094 10.9926C6.0675 11.0266 6.13359 11.0445 6.20088 11.0445C6.26814 11.0445 6.33422 11.0266 6.39232 10.9926L9.23828 9.34935V10.4872C9.2386 10.493 9.23749 10.4988 9.23503 10.5041C9.23257 10.5093 9.22886 10.5139 9.2242 10.5174L6.86776 11.878C6.36373 12.1683 5.7651 12.2467 5.20327 12.0962C4.64142 11.9457 4.16224 11.5784 3.87085 11.075ZM3.25762 5.98616C3.51357 5.5414 3.91783 5.20089 4.3996 5.0242C4.3996 5.04427 4.39845 5.07983 4.39845 5.10451V7.79745C4.39804 7.86471 4.41555 7.93086 4.44918 7.98913C4.48281 8.04738 4.53133 8.09561 4.58978 8.12889L7.43574 9.77195L6.45048 10.3408C6.44562 10.3441 6.44003 10.346 6.43423 10.3465C6.42844 10.347 6.42258 10.3461 6.41723 10.3439L4.06056 8.98215C3.55741 8.69075 3.19033 8.21175 3.03981 7.65012C2.88929 7.08852 2.96762 6.49013 3.25762 5.98616ZM11.3526 7.86992L8.50663 6.22665L9.49192 5.65797C9.49678 5.65478 9.50237 5.65282 9.50816 5.65229C9.51396 5.65177 9.51978 5.6527 9.52514 5.65499L11.8818 7.01552C12.2429 7.22409 12.537 7.53117 12.7299 7.90086C12.9227 8.27055 13.0062 8.6875 12.9706 9.10294C12.9351 9.51837 12.7819 9.91507 12.529 10.2466C12.2761 10.5781 11.934 10.8307 11.5428 10.9748C11.5428 10.9545 11.5428 10.919 11.5428 10.8943V8.20136C11.5433 8.13421 11.526 8.06813 11.4926 8.00988C11.4592 7.95164 11.4109 7.90335 11.3526 7.86992ZM12.3333 6.39398C12.316 6.38336 12.2857 6.36512 12.264 6.35268L9.93297 5.0062C9.87487 4.9723 9.80881 4.95442 9.74152 4.95442C9.67426 4.95442 9.60818 4.9723 9.55008 5.0062L6.70412 6.64949V5.51166C6.70379 5.50583 6.70491 5.50004 6.70737 5.49477C6.70982 5.4895 6.71354 5.48491 6.71819 5.48142L9.07464 4.12201C9.43564 3.91385 9.84847 3.8128 10.2648 3.83067C10.6812 3.84854 11.0838 3.98461 11.4256 4.22294C11.7674 4.46128 12.0343 4.79202 12.1951 5.17651C12.3558 5.56098 12.4038 5.98326 12.3333 6.39398ZM6.16845 8.42201L5.18293 7.85309C5.17777 7.85052 5.1733 7.84671 5.16996 7.842C5.16662 7.83729 5.16449 7.83182 5.16378 7.82608V5.10451C5.16405 4.68768 5.28305 4.27955 5.50684 3.92788C5.73065 3.57622 6.05 3.29558 6.4275 3.11881C6.805 2.94205 7.22503 2.87647 7.63845 2.92976C8.05189 2.98304 8.44157 3.15299 8.76191 3.4197C8.74415 3.42939 8.71321 3.44647 8.69266 3.45894L6.36162 4.80541C6.30311 4.83866 6.25456 4.88689 6.22087 4.94514C6.18721 5.00339 6.16968 5.06956 6.17006 5.13682L6.16845 8.42201ZM6.70365 7.26802L7.9712 6.53593L9.23872 7.26755V8.73126L7.9712 9.46291L6.70365 8.73126V7.26802Z" fill="white"/>
</g>
<defs>
<clipPath id="clip0_14261_2043">
<rect width="12" height="12" fill="white" transform="translate(2 2)"/>
</clipPath>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 4.3 KiB

+40
View File
@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%PUBLIC_URL%/favicon.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Realtime Console</title>
<!-- Fonts -->
<link
href="https://fonts.googleapis.com/css2?family=Roboto+Mono:ital,wght@0,100..700;1,100..700&display=swap"
rel="stylesheet"
/>
<!-- Leaflet / OpenStreetMap -->
<link
rel="stylesheet"
href="https://unpkg.com/leaflet@1.6.0/dist/leaflet.css"
integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
crossorigin=""
/>
<script
src="https://unpkg.com/leaflet@1.6.0/dist/leaflet.js"
integrity="sha512-gZwIG9x3wUXg2hdXF6+rVkLF/0Vi9U8D2Ntg4Ga5I5BZpVkVxlJWbSQtXPSiUTtC0TjtGOmxa1AJPuV0CPthew=="
crossorigin=""
></script>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
To begin the development, run `npm start` or `yarn start`.
To create a production bundle, use `npm run build` or `yarn build`.
-->
</body>
</html>
+1
View File
@@ -0,0 +1 @@
<svg viewBox="0 0 320 320" xmlns="http://www.w3.org/2000/svg"><path d="m297.06 130.97c7.26-21.79 4.76-45.66-6.85-65.48-17.46-30.4-52.56-46.04-86.84-38.68-15.25-17.18-37.16-26.95-60.13-26.81-35.04-.08-66.13 22.48-76.91 55.82-22.51 4.61-41.94 18.7-53.31 38.67-17.59 30.32-13.58 68.54 9.92 94.54-7.26 21.79-4.76 45.66 6.85 65.48 17.46 30.4 52.56 46.04 86.84 38.68 15.24 17.18 37.16 26.95 60.13 26.8 35.06.09 66.16-22.49 76.94-55.86 22.51-4.61 41.94-18.7 53.31-38.67 17.57-30.32 13.55-68.51-9.94-94.51zm-120.28 168.11c-14.03.02-27.62-4.89-38.39-13.88.49-.26 1.34-.73 1.89-1.07l63.72-36.8c3.26-1.85 5.26-5.32 5.24-9.07v-89.83l26.93 15.55c.29.14.48.42.52.74v74.39c-.04 33.08-26.83 59.9-59.91 59.97zm-128.84-55.03c-7.03-12.14-9.56-26.37-7.15-40.18.47.28 1.3.79 1.89 1.13l63.72 36.8c3.23 1.89 7.23 1.89 10.47 0l77.79-44.92v31.1c.02.32-.13.63-.38.83l-64.41 37.19c-28.69 16.52-65.33 6.7-81.92-21.95zm-16.77-139.09c7-12.16 18.05-21.46 31.21-26.29 0 .55-.03 1.52-.03 2.2v73.61c-.02 3.74 1.98 7.21 5.23 9.06l77.79 44.91-26.93 15.55c-.27.18-.61.21-.91.08l-64.42-37.22c-28.63-16.58-38.45-53.21-21.95-81.89zm221.26 51.49-77.79-44.92 26.93-15.54c.27-.18.61-.21.91-.08l64.42 37.19c28.68 16.57 38.51 53.26 21.94 81.94-7.01 12.14-18.05 21.44-31.2 26.28v-75.81c.03-3.74-1.96-7.2-5.2-9.06zm26.8-40.34c-.47-.29-1.3-.79-1.89-1.13l-63.72-36.8c-3.23-1.89-7.23-1.89-10.47 0l-77.79 44.92v-31.1c-.02-.32.13-.63.38-.83l64.41-37.16c28.69-16.55 65.37-6.7 81.91 22 6.99 12.12 9.52 26.31 7.15 40.1zm-168.51 55.43-26.94-15.55c-.29-.14-.48-.42-.52-.74v-74.39c.02-33.12 26.89-59.96 60.01-59.94 14.01 0 27.57 4.92 38.34 13.88-.49.26-1.33.73-1.89 1.07l-63.72 36.8c-3.26 1.85-5.26 5.31-5.24 9.06l-.04 89.79zm14.63-31.54 34.65-20.01 34.65 20v40.01l-34.65 20-34.65-20z"/></svg>

After

Width:  |  Height:  |  Size: 1.7 KiB

+3
View File
@@ -0,0 +1,3 @@
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow:
Binary file not shown.

After

Width:  |  Height:  |  Size: 758 KiB

+18
View File
@@ -0,0 +1,18 @@
import { RealtimeRelay } from './lib/relay.js';
import dotenv from 'dotenv';
dotenv.config({ override: true });
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
if (!OPENAI_API_KEY) {
console.error(
`Environment variable "OPENAI_API_KEY" is required.\n` +
`Please set it in your .env file.`
);
process.exit(1);
}
const PORT = parseInt(process.env.PORT) || 8081;
const relay = new RealtimeRelay(OPENAI_API_KEY);
relay.listen(PORT);
+84
View File
@@ -0,0 +1,84 @@
import { WebSocketServer } from 'ws';
import { RealtimeClient } from '../../src/lib/realtime-api-beta/index.js';
export class RealtimeRelay {
constructor(apiKey) {
this.apiKey = apiKey;
this.sockets = new WeakMap();
this.wss = null;
}
listen(port) {
this.wss = new WebSocketServer({ port });
this.wss.on('connection', this.connectionHandler.bind(this));
this.log(`Listening on ws://localhost:${port}`);
}
async connectionHandler(ws, req) {
if (!req.url) {
this.log('No URL provided, closing connection.');
ws.close();
return;
}
const url = new URL(req.url, `http://${req.headers.host}`);
const pathname = url.pathname;
if (pathname !== '/') {
this.log(`Invalid pathname: "${pathname}"`);
ws.close();
return;
}
// Instantiate new client
this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`);
const client = new RealtimeClient({ apiKey: this.apiKey });
// Relay: OpenAI Realtime API Event -> Browser Event
client.realtime.on('server.*', (event) => {
this.log(`Relaying "${event.type}" to Client`);
ws.send(JSON.stringify(event));
});
client.realtime.on('close', () => ws.close());
// Relay: Browser Event -> OpenAI Realtime API Event
// We need to queue data waiting for the OpenAI connection
const messageQueue = [];
const messageHandler = (data) => {
try {
const event = JSON.parse(data);
this.log(`Relaying "${event.type}" to OpenAI`);
client.realtime.send(event.type, event);
} catch (e) {
console.error(e.message);
this.log(`Error parsing event from client: ${data}`);
}
};
ws.on('message', (data) => {
if (!client.isConnected()) {
messageQueue.push(data);
} else {
messageHandler(data);
}
});
ws.on('close', () => client.disconnect());
// Connect to OpenAI Realtime API
try {
this.log(`Connecting to OpenAI...`);
await client.connect();
} catch (e) {
this.log(`Error connecting to OpenAI: ${e.message}`);
ws.close();
return;
}
this.log(`Connected to OpenAI successfully!`);
while (messageQueue.length) {
messageHandler(messageQueue.shift());
}
}
log(...args) {
console.log(`[RealtimeRelay]`, ...args);
}
}
+5
View File
@@ -0,0 +1,5 @@
[data-component='App'] {
height: 100%;
width: 100%;
position: relative;
}
+12
View File
@@ -0,0 +1,12 @@
import { ConsolePage } from './pages/ConsolePage';
import './App.scss';
function App() {
return (
<div data-component="App">
<ConsolePage />
</div>
);
}
export default App;
+9
View File
@@ -0,0 +1,9 @@
[data-component='Map'] {
position: absolute;
width: 100%;
height: 100%;
.leaflet-container {
height: 100%;
width: 100%;
}
}
+35
View File
@@ -0,0 +1,35 @@
import { MapContainer, TileLayer, Marker, Popup, useMap } from 'react-leaflet';
import { LatLngTuple } from 'leaflet';
import './Map.scss';
function ChangeView({ center, zoom }: { center: LatLngTuple; zoom: number }) {
const map = useMap();
map.setView(center, zoom);
return null;
}
export function Map({
center,
location = 'My Location',
}: {
center: LatLngTuple;
location?: string;
}) {
return (
<div data-component="Map">
<MapContainer
center={center}
zoom={11}
scrollWheelZoom={false}
zoomControl={false}
attributionControl={false}
>
<ChangeView center={center} zoom={11} />
<TileLayer url="https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png" />
<Marker position={center}>
<Popup>{location}</Popup>
</Marker>
</MapContainer>
</div>
);
}
+82
View File
@@ -0,0 +1,82 @@
[data-component='Button'] {
display: flex;
align-items: center;
gap: 8px;
font-family: 'Roboto Mono', monospace;
font-size: 12px;
font-optical-sizing: auto;
font-weight: 400;
font-style: normal;
border: none;
background-color: #ececf1;
color: #101010;
border-radius: 1000px;
padding: 8px 24px;
min-height: 42px;
transition: transform 0.1s ease-in-out, background-color 0.1s ease-in-out;
&.button-style-action {
background-color: #101010;
color: #ececf1;
&:hover:not([disabled]) {
background-color: #404040;
}
}
&.button-style-alert {
background-color: #f00;
color: #ececf1;
&:hover:not([disabled]) {
background-color: #f00;
}
}
&.button-style-flush {
background-color: rgba(255, 255, 255, 0);
}
&[disabled] {
color: #999;
}
&:not([disabled]) {
cursor: pointer;
}
&:hover:not([disabled]) {
background-color: #d8d8d8;
}
&:active:not([disabled]) {
transform: translateY(1px);
}
.icon {
display: flex;
&.icon-start {
margin-left: -8px;
}
&.icon-end {
margin-right: -8px;
}
svg {
width: 16px;
height: 16px;
}
}
&.icon-red .icon {
color: #cc0000;
}
&.icon-green .icon {
color: #009900;
}
&.icon-grey .icon {
color: #909090;
}
&.icon-fill {
svg {
fill: currentColor;
}
}
}
+50
View File
@@ -0,0 +1,50 @@
import React from 'react';
import './Button.scss';
import { Icon } from 'react-feather';
interface ButtonProps extends React.ButtonHTMLAttributes<HTMLButtonElement> {
label?: string;
icon?: Icon;
iconPosition?: 'start' | 'end';
iconColor?: 'red' | 'green' | 'grey';
iconFill?: boolean;
buttonStyle?: 'regular' | 'action' | 'alert' | 'flush';
}
export function Button({
label = 'Okay',
icon = void 0,
iconPosition = 'start',
iconColor = void 0,
iconFill = false,
buttonStyle = 'regular',
...rest
}: ButtonProps) {
const StartIcon = iconPosition === 'start' ? icon : null;
const EndIcon = iconPosition === 'end' ? icon : null;
const classList = [];
if (iconColor) {
classList.push(`icon-${iconColor}`);
}
if (iconFill) {
classList.push(`icon-fill`);
}
classList.push(`button-style-${buttonStyle}`);
return (
<button data-component="Button" className={classList.join(' ')} {...rest}>
{StartIcon && (
<span className="icon icon-start">
<StartIcon />
</span>
)}
<span className="label">{label}</span>
{EndIcon && (
<span className="icon icon-end">
<EndIcon />
</span>
)}
</button>
);
}
+54
View File
@@ -0,0 +1,54 @@
[data-component='Toggle'] {
position: relative;
display: flex;
align-items: center;
gap: 8px;
cursor: pointer;
overflow: hidden;
background-color: #ececf1;
color: #101010;
height: 40px;
border-radius: 1000px;
&:hover {
background-color: #d8d8d8;
}
div.label {
position: relative;
color: #666;
transition: color 0.1s ease-in-out;
padding: 0px 16px;
z-index: 2;
user-select: none;
}
div.label.right {
margin-left: -8px;
}
.toggle-background {
background-color: #101010;
position: absolute;
top: 0px;
left: 0px;
width: auto;
bottom: 0px;
z-index: 1;
border-radius: 1000px;
transition: left 0.1s ease-in-out, width 0.1s ease-in-out;
}
&[data-enabled='true'] {
div.label.right {
color: #fff;
}
}
&[data-enabled='false'] {
div.label.left {
color: #fff;
}
}
}
+66
View File
@@ -0,0 +1,66 @@
import { useState, useEffect, useRef } from 'react';
import './Toggle.scss';
export function Toggle({
defaultValue = false,
values,
labels,
onChange = () => {},
}: {
defaultValue?: string | boolean;
values?: string[];
labels?: string[];
onChange?: (isEnabled: boolean, value: string) => void;
}) {
if (typeof defaultValue === 'string') {
defaultValue = !!Math.max(0, (values || []).indexOf(defaultValue));
}
const leftRef = useRef<HTMLDivElement>(null);
const rightRef = useRef<HTMLDivElement>(null);
const bgRef = useRef<HTMLDivElement>(null);
const [value, setValue] = useState<boolean>(defaultValue);
const toggleValue = () => {
const v = !value;
const index = +v;
setValue(v);
onChange(v, (values || [])[index]);
};
useEffect(() => {
const leftEl = leftRef.current;
const rightEl = rightRef.current;
const bgEl = bgRef.current;
if (leftEl && rightEl && bgEl) {
if (value) {
bgEl.style.left = rightEl.offsetLeft + 'px';
bgEl.style.width = rightEl.offsetWidth + 'px';
} else {
bgEl.style.left = '';
bgEl.style.width = leftEl.offsetWidth + 'px';
}
}
}, [value]);
return (
<div
data-component="Toggle"
onClick={toggleValue}
data-enabled={value.toString()}
>
{labels && (
<div className="label left" ref={leftRef}>
{labels[0]}
</div>
)}
{labels && (
<div className="label right" ref={rightRef}>
{labels[1]}
</div>
)}
<div className="toggle-background" ref={bgRef}></div>
</div>
);
}
+21
View File
@@ -0,0 +1,21 @@
html,
body {
padding: 0px;
margin: 0px;
position: relative;
width: 100%;
height: 100%;
font-family: 'Assistant', sans-serif;
font-optical-sizing: auto;
font-weight: 400;
font-style: normal;
color: #18181b;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
#root {
position: relative;
width: 100%;
height: 100%;
}
+19
View File
@@ -0,0 +1,19 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import './index.css';
import App from './App';
import reportWebVitals from './reportWebVitals';
const root = ReactDOM.createRoot(
document.getElementById('root') as HTMLElement
);
root.render(
<React.StrictMode>
<App />
</React.StrictMode>
);
// If you want to start measuring performance in your app, pass a function
// to log results (for example: reportWebVitals(console.log))
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
reportWebVitals();
+6
View File
@@ -0,0 +1,6 @@
import { RealtimeAPI } from './lib/api.js';
import { RealtimeConversation } from './lib/conversation.js';
import { RealtimeClient } from './lib/client.js';
import { RealtimeUtils } from './lib/utils.js';
export { RealtimeAPI, RealtimeConversation, RealtimeClient, RealtimeUtils };
//# sourceMappingURL=index.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.js"],"names":[],"mappings":"4BAC4B,cAAc;qCACL,uBAAuB;+BAC7B,iBAAiB;8BAHlB,gBAAgB"}
+61
View File
@@ -0,0 +1,61 @@
export class RealtimeAPI extends RealtimeEventHandler {
/**
* Create a new RealtimeAPI instance
* @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
* @returns {RealtimeAPI}
*/
constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug }?: {
url?: string;
apiKey?: string;
dangerouslyAllowAPIKeyInBrowser?: boolean;
debug?: boolean;
});
defaultUrl: string;
url: string;
apiKey: string;
debug: boolean;
ws: any;
/**
* Tells us whether or not the WebSocket is connected
* @returns {boolean}
*/
isConnected(): boolean;
/**
* Writes WebSocket logs to console
* @param {...any} args
* @returns {true}
*/
log(...args: any[]): true;
/**
* Connects to Realtime API Websocket Server
* @param {{model?: string}} [settings]
* @returns {Promise<true>}
*/
connect({ model }?: {
model?: string;
}): Promise<true>;
/**
* Disconnects from Realtime API server
* @param {WebSocket} [ws]
* @returns {true}
*/
disconnect(ws?: WebSocket): true;
/**
* Receives an event from WebSocket and dispatches as "server.{eventName}" and "server.*" events
* @param {string} eventName
* @param {{[key: string]: any}} event
* @returns {true}
*/
receive(eventName: string, event: {
[key: string]: any;
}): true;
/**
* Sends an event to WebSocket and dispatches as "client.{eventName}" and "client.*" events
* @param {string} eventName
* @param {{[key: string]: any}} event
* @returns {true}
*/
send(eventName: string, data: any): true;
}
import { RealtimeEventHandler } from './event_handler.js';
//# sourceMappingURL=api.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../../lib/api.js"],"names":[],"mappings":"AAGA;IACE;;;;OAIG;IACH,sEAHW;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,+BAA+B,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAiBrG;IAZC,mBAAoD;IACpD,YAAiC;IACjC,eAA4B;IAC5B,eAAoB;IACpB,QAAc;IAUhB;;;OAGG;IACH,eAFa,OAAO,CAInB;IAED;;;;OAIG;IACH,aAHe,GAAG,EAAA,GACL,IAAI,CAehB;IAED;;;;OAIG;IACH,oBAHW;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAC,GACd,OAAO,CAAC,IAAI,CAAC,CAkGzB;IAED;;;;OAIG;IACH,gBAHW,SAAS,GACP,IAAI,CAQhB;IAED;;;;;OAKG;IACH,mBAJW,MAAM,SACN;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC,GAClB,IAAI,CAOhB;IAED;;;;;OAKG;IACH,gBAJW,MAAM,cAEJ,IAAI,CAoBhB;CACF;qCA/MoC,oBAAoB"}
+461
View File
@@ -0,0 +1,461 @@
/**
* Valid audio formats
* @typedef {"pcm16"|"g711-ulaw"|"g711-alaw"} AudioFormatType
*/
/**
* @typedef {Object} AudioTranscriptionType
* @property {boolean} [enabled]
* @property {"whisper-1"} model
*/
/**
* @typedef {Object} TurnDetectionServerVadType
* @property {"server_vad"} type
* @property {number} [threshold]
* @property {number} [prefix_padding_ms]
* @property {number} [silence_duration_ms]
*/
/**
* Tool definitions
* @typedef {Object} ToolDefinitionType
* @property {"function"} [type]
* @property {string} name
* @property {string} description
* @property {{[key: string]: any}} parameters
*/
/**
* @typedef {Object} SessionResourceType
* @property {string} [model]
* @property {string[]} [modalities]
* @property {string} [instructions]
* @property {"alloy"|"shimmer"|"echo"} [voice]
* @property {AudioFormatType} [input_audio_format]
* @property {AudioFormatType} [output_audio_format]
* @property {AudioTranscriptionType|null} [input_audio_transcription]
* @property {TurnDetectionServerVadType|null} [turn_detection]
* @property {ToolDefinitionType[]} [tools]
* @property {"auto"|"none"|"required"|{type:"function",name:string}} [tool_choice]
* @property {number} [temperature]
* @property {number|"inf"} [max_response_output_tokens]
*/
/**
* @typedef {"in_progress"|"completed"|"incomplete"} ItemStatusType
*/
/**
* @typedef {Object} InputTextContentType
* @property {"input_text"} type
* @property {string} text
*/
/**
* @typedef {Object} InputAudioContentType
* @property {"input_audio"} type
* @property {string} [audio] base64-encoded audio data
* @property {string|null} [transcript]
*/
/**
* @typedef {Object} TextContentType
* @property {"text"} type
* @property {string} text
*/
/**
* @typedef {Object} AudioContentType
* @property {"audio"} type
* @property {string} [audio] base64-encoded audio data
* @property {string|null} [transcript]
*/
/**
* @typedef {Object} SystemItemType
* @property {string|null} [previous_item_id]
* @property {"message"} type
* @property {ItemStatusType} status
* @property {"system"} role
* @property {Array<InputTextContentType>} content
*/
/**
* @typedef {Object} UserItemType
* @property {string|null} [previous_item_id]
* @property {"message"} type
* @property {ItemStatusType} status
* @property {"system"} role
* @property {Array<InputTextContentType|InputAudioContentType>} content
*/
/**
* @typedef {Object} AssistantItemType
* @property {string|null} [previous_item_id]
* @property {"message"} type
* @property {ItemStatusType} status
* @property {"assistant"} role
* @property {Array<TextContentType|AudioContentType>} content
*/
/**
* @typedef {Object} FunctionCallItemType
* @property {string|null} [previous_item_id]
* @property {"function_call"} type
* @property {ItemStatusType} status
* @property {string} call_id
* @property {string} name
* @property {string} arguments
*/
/**
* @typedef {Object} FunctionCallOutputItemType
* @property {string|null} [previous_item_id]
* @property {"function_call_output"} type
* @property {string} call_id
* @property {string} output
*/
/**
* @typedef {Object} FormattedToolType
* @property {"function"} type
* @property {string} name
* @property {string} call_id
* @property {string} arguments
*/
/**
* @typedef {Object} FormattedPropertyType
* @property {Int16Array} [audio]
* @property {string} [text]
* @property {string} [transcript]
* @property {FormattedToolType} [tool]
* @property {string} [output]
* @property {any} [file]
*/
/**
* @typedef {Object} FormattedItemType
* @property {string} id
* @property {string} object
* @property {"user"|"assistant"|"system"} [role]
* @property {FormattedPropertyType} formatted
*/
/**
* @typedef {SystemItemType|UserItemType|AssistantItemType|FunctionCallItemType|FunctionCallOutputItemType} BaseItemType
*/
/**
* @typedef {FormattedItemType & BaseItemType} ItemType
*/
/**
* @typedef {Object} IncompleteResponseStatusType
* @property {"incomplete"} type
* @property {"interruption"|"max_output_tokens"|"content_filter"} reason
*/
/**
* @typedef {Object} FailedResponseStatusType
* @property {"failed"} type
* @property {{code: string, message: string}|null} error
*/
/**
* @typedef {Object} UsageType
* @property {number} total_tokens
* @property {number} input_tokens
* @property {number} output_tokens
*/
/**
* @typedef {Object} ResponseResourceType
* @property {"in_progress"|"completed"|"incomplete"|"cancelled"|"failed"} status
* @property {IncompleteResponseStatusType|FailedResponseStatusType|null} status_details
* @property {ItemType[]} output
* @property {UsageType|null} usage
*/
/**
* RealtimeClient Class
* @class
*/
export class RealtimeClient extends RealtimeEventHandler {
/**
* Create a new RealtimeClient instance
* @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
*/
constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug }?: {
url?: string;
apiKey?: string;
dangerouslyAllowAPIKeyInBrowser?: boolean;
debug?: boolean;
});
defaultSessionConfig: {
modalities: string[];
instructions: string;
voice: string;
input_audio_format: string;
output_audio_format: string;
input_audio_transcription: any;
turn_detection: any;
tools: any[];
tool_choice: string;
temperature: number;
max_response_output_tokens: number;
};
sessionConfig: {};
transcriptionModels: {
model: string;
}[];
defaultServerVadConfig: {
type: string;
threshold: number;
prefix_padding_ms: number;
silence_duration_ms: number;
};
realtime: RealtimeAPI;
conversation: RealtimeConversation;
/**
* Resets sessionConfig and conversationConfig to default
* @private
* @returns {true}
*/
private _resetConfig;
sessionCreated: boolean;
tools: {};
inputAudioBuffer: any;
/**
* Sets up event handlers for a fully-functional application control flow
* @private
* @returns {true}
*/
private _addAPIEventHandlers;
/**
* Tells us whether the realtime socket is connected and the session has started
* @returns {boolean}
*/
isConnected(): boolean;
/**
* Resets the client instance entirely: disconnects and clears active config
* @returns {true}
*/
reset(): true;
/**
* Connects to the Realtime WebSocket API
* Updates session config and conversation config
* @returns {Promise<true>}
*/
connect(): Promise<true>;
/**
* Waits for a session.created event to be executed before proceeding
* @returns {Promise<true>}
*/
waitForSessionCreated(): Promise<true>;
/**
* Disconnects from the Realtime API and clears the conversation history
*/
disconnect(): void;
/**
* Gets the active turn detection mode
* @returns {"server_vad"|null}
*/
getTurnDetectionType(): "server_vad" | null;
/**
* Add a tool and handler
* @param {ToolDefinitionType} definition
* @param {function} handler
* @returns {{definition: ToolDefinitionType, handler: function}}
*/
addTool(definition: ToolDefinitionType, handler: Function): {
definition: ToolDefinitionType;
handler: Function;
};
/**
* Removes a tool
* @param {string} name
* @returns {true}
*/
removeTool(name: string): true;
/**
* Deletes an item
* @param {string} id
* @returns {true}
*/
deleteItem(id: string): true;
/**
* Updates session configuration
* If the client is not yet connected, will save details and instantiate upon connection
* @param {SessionResourceType} [sessionConfig]
*/
updateSession({ modalities, instructions, voice, input_audio_format, output_audio_format, input_audio_transcription, turn_detection, tools, tool_choice, temperature, max_response_output_tokens, }?: SessionResourceType): boolean;
/**
* Sends user message content and generates a response
* @param {Array<InputTextContentType|InputAudioContentType>} content
* @returns {true}
*/
sendUserMessageContent(content?: Array<InputTextContentType | InputAudioContentType>): true;
/**
* Appends user audio to the existing audio buffer
* @param {Int16Array|ArrayBuffer} arrayBuffer
* @returns {true}
*/
appendInputAudio(arrayBuffer: Int16Array | ArrayBuffer): true;
/**
* Forces a model response generation
* @returns {true}
*/
createResponse(): true;
/**
* Cancels the ongoing server generation and truncates ongoing generation, if applicable
* If no id provided, will simply call `cancel_generation` command
* @param {string} id The id of the message to cancel
* @param {number} [sampleCount] The number of samples to truncate past for the ongoing generation
* @returns {{item: (AssistantItemType | null)}}
*/
cancelResponse(id: string, sampleCount?: number): {
item: (AssistantItemType | null);
};
/**
* Utility for waiting for the next `conversation.item.appended` event to be triggered by the server
* @returns {Promise<{item: ItemType}>}
*/
waitForNextItem(): Promise<{
item: ItemType;
}>;
/**
* Utility for waiting for the next `conversation.item.completed` event to be triggered by the server
* @returns {Promise<{item: ItemType}>}
*/
waitForNextCompletedItem(): Promise<{
item: ItemType;
}>;
}
/**
* Valid audio formats
*/
export type AudioFormatType = "pcm16" | "g711-ulaw" | "g711-alaw";
export type AudioTranscriptionType = {
enabled?: boolean;
model: "whisper-1";
};
export type TurnDetectionServerVadType = {
type: "server_vad";
threshold?: number;
prefix_padding_ms?: number;
silence_duration_ms?: number;
};
/**
* Tool definitions
*/
export type ToolDefinitionType = {
type?: "function";
name: string;
description: string;
parameters: {
[key: string]: any;
};
};
export type SessionResourceType = {
model?: string;
modalities?: string[];
instructions?: string;
voice?: "alloy" | "shimmer" | "echo";
input_audio_format?: AudioFormatType;
output_audio_format?: AudioFormatType;
input_audio_transcription?: AudioTranscriptionType | null;
turn_detection?: TurnDetectionServerVadType | null;
tools?: ToolDefinitionType[];
tool_choice?: "auto" | "none" | "required" | {
type: "function";
name: string;
};
temperature?: number;
max_response_output_tokens?: number | "inf";
};
export type ItemStatusType = "in_progress" | "completed" | "incomplete";
export type InputTextContentType = {
type: "input_text";
text: string;
};
export type InputAudioContentType = {
type: "input_audio";
/**
* base64-encoded audio data
*/
audio?: string;
transcript?: string | null;
};
export type TextContentType = {
type: "text";
text: string;
};
export type AudioContentType = {
type: "audio";
/**
* base64-encoded audio data
*/
audio?: string;
transcript?: string | null;
};
export type SystemItemType = {
previous_item_id?: string | null;
type: "message";
status: ItemStatusType;
role: "system";
content: Array<InputTextContentType>;
};
export type UserItemType = {
previous_item_id?: string | null;
type: "message";
status: ItemStatusType;
role: "system";
content: Array<InputTextContentType | InputAudioContentType>;
};
export type AssistantItemType = {
previous_item_id?: string | null;
type: "message";
status: ItemStatusType;
role: "assistant";
content: Array<TextContentType | AudioContentType>;
};
export type FunctionCallItemType = {
previous_item_id?: string | null;
type: "function_call";
status: ItemStatusType;
call_id: string;
name: string;
arguments: string;
};
export type FunctionCallOutputItemType = {
previous_item_id?: string | null;
type: "function_call_output";
call_id: string;
output: string;
};
export type FormattedToolType = {
type: "function";
name: string;
call_id: string;
arguments: string;
};
export type FormattedPropertyType = {
audio?: Int16Array;
text?: string;
transcript?: string;
tool?: FormattedToolType;
output?: string;
file?: any;
};
export type FormattedItemType = {
id: string;
object: string;
role?: "user" | "assistant" | "system";
formatted: FormattedPropertyType;
};
export type BaseItemType = SystemItemType | UserItemType | AssistantItemType | FunctionCallItemType | FunctionCallOutputItemType;
export type ItemType = FormattedItemType & BaseItemType;
export type IncompleteResponseStatusType = {
type: "incomplete";
reason: "interruption" | "max_output_tokens" | "content_filter";
};
export type FailedResponseStatusType = {
type: "failed";
error: {
code: string;
message: string;
} | null;
};
export type UsageType = {
total_tokens: number;
input_tokens: number;
output_tokens: number;
};
export type ResponseResourceType = {
status: "in_progress" | "completed" | "incomplete" | "cancelled" | "failed";
status_details: IncompleteResponseStatusType | FailedResponseStatusType | null;
output: ItemType[];
usage: UsageType | null;
};
import { RealtimeEventHandler } from './event_handler.js';
import { RealtimeAPI } from './api.js';
import { RealtimeConversation } from './conversation.js';
//# sourceMappingURL=client.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../lib/client.js"],"names":[],"mappings":"AAOA;;;GAGG;AAEH;;;;GAIG;AAEH;;;;;;GAMG;AAEH;;;;;;;GAOG;AAEH;;;;;;;;;;;;;;GAcG;AAEH;;GAEG;AAEH;;;;GAIG;AAEH;;;;;GAKG;AAEH;;;;GAIG;AAEH;;;;;GAKG;AAEH;;;;;;;GAOG;AAEH;;;;;;;GAOG;AAEH;;;;;;;GAOG;AAEH;;;;;;;;GAQG;AAEH;;;;;;GAMG;AAEH;;;;;;GAMG;AAEH;;;;;;;;GAQG;AAEH;;;;;;GAMG;AAEH;;GAEG;AAEH;;GAEG;AAEH;;;;GAIG;AAEH;;;;GAIG;AAEH;;;;;GAKG;AAEH;;;;;;GAMG;AAEH;;;GAGG;AACH;IACE;;;OAGG;IACH,sEAFW;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,+BAA+B,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAsCrG;IAlCC;;;;;;;;;;;;MAYC;IACD,kBAAuB;IACvB;;QAIC;IACD;;;;;MAKC;IACD,sBAKE;IACF,mCAA8C;IAKhD;;;;OAIG;IACH,qBAMC;IALC,wBAA2B;IAC3B,UAAe;IAEf,sBAAyC;IAI3C;;;;OAIG;IACH,6BAiHC;IAED;;;OAGG;IACH,eAFa,OAAO,CAInB;IAED;;;OAGG;IACH,SAFa,IAAI,CAShB;IAED;;;;OAIG;IACH,WAFa,OAAO,CAAC,IAAI,CAAC,CASzB;IAED;;;OAGG;IACH,yBAFa,OAAO,CAAC,IAAI,CAAC,CAUzB;IAED;;OAEG;IACH,mBAIC;IAED;;;OAGG;IACH,wBAFa,YAAY,GAAC,IAAI,CAI7B;IAED;;;;;OAKG;IACH,oBAJW,kBAAkB,sBAEhB;QAAC,UAAU,EAAE,kBAAkB,CAAC;QAAC,OAAO,WAAU;KAAC,CAkB/D;IAED;;;;OAIG;IACH,iBAHW,MAAM,GACJ,IAAI,CAQhB;IAED;;;;OAIG;IACH,eAHW,MAAM,GACJ,IAAI,CAKhB;IAED;;;;OAIG;IACH,sMAFW,mBAAmB,WA4D7B;IAED;;;;OAIG;IACH,iCAHW,KAAK,CAAC,oBAAoB,GAAC,qBAAqB,CAAC,GAC/C,IAAI,CAqBhB;IAED;;;;OAIG;IACH,8BAHW,UAAU,GAAC,WAAW,GACpB,IAAI,CAahB;IAED;;;OAGG;IACH,kBAFa,IAAI,CAahB;IAED;;;;;;OAMG;IACH,mBAJW,MAAM,gBACN,MAAM,GACJ;QAAC,IAAI,EAAE,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAA;KAAC,CAgC9C;IAED;;;OAGG;IACH,mBAFa,OAAO,CAAC;QAAC,IAAI,EAAE,QAAQ,CAAA;KAAC,CAAC,CAYrC;IAED;;;OAGG;IACH,4BAFa,OAAO,CAAC;QAAC,IAAI,EAAE,QAAQ,CAAA;KAAC,CAAC,CAYrC;CACF;;;;8BA3pBY,OAAO,GAAC,WAAW,GAAC,WAAW;;cAK9B,OAAO;WACP,WAAW;;;UAKX,YAAY;gBACZ,MAAM;wBACN,MAAM;0BACN,MAAM;;;;;;WAMN,UAAU;UACV,MAAM;iBACN,MAAM;gBACN;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC;;;YAKpB,MAAM;iBACN,MAAM,EAAE;mBACR,MAAM;YACN,OAAO,GAAC,SAAS,GAAC,MAAM;yBACxB,eAAe;0BACf,eAAe;gCACf,sBAAsB,GAAC,IAAI;qBAC3B,0BAA0B,GAAC,IAAI;YAC/B,kBAAkB,EAAE;kBACpB,MAAM,GAAC,MAAM,GAAC,UAAU,GAAC;QAAC,IAAI,EAAC,UAAU,CAAC;QAAA,IAAI,EAAC,MAAM,CAAA;KAAC;kBACtD,MAAM;iCACN,MAAM,GAAC,KAAK;;6BAIb,aAAa,GAAC,WAAW,GAAC,YAAY;;UAKrC,YAAY;UACZ,MAAM;;;UAKN,aAAa;;;;YACb,MAAM;iBACN,MAAM,GAAC,IAAI;;;UAKX,MAAM;UACN,MAAM;;;UAKN,OAAO;;;;YACP,MAAM;iBACN,MAAM,GAAC,IAAI;;;uBAKX,MAAM,GAAC,IAAI;UACX,SAAS;YACT,cAAc;UACd,QAAQ;aACR,KAAK,CAAC,oBAAoB,CAAC;;;uBAK3B,MAAM,GAAC,IAAI;UACX,SAAS;YACT,cAAc;UACd,QAAQ;aACR,KAAK,CAAC,oBAAoB,GAAC,qBAAqB,CAAC;;;uBAKjD,MAAM,GAAC,IAAI;UACX,SAAS;YACT,cAAc;UACd,WAAW;aACX,KAAK,CAAC,eAAe,GAAC,gBAAgB,CAAC;;;uBAKvC,MAAM,GAAC,IAAI;UACX,eAAe;YACf,cAAc;aACd,MAAM;UACN,MAAM;eACN,MAAM;;;uBAKN,MAAM,GAAC,IAAI;UACX,sBAAsB;aACtB,MAAM;YACN,MAAM;;;UAKN,UAAU;UACV,MAAM;aACN,MAAM;eACN,MAAM;;;YAKN,UAAU;WACV,MAAM;iBACN,MAAM;WACN,iBAAiB;aACjB,MAAM;WACN,GAAG;;;QAKH,MAAM;YACN,MAAM;WACN,MAAM,GAAC,WAAW,GAAC,QAAQ;eAC3B,qBAAqB;;2BAItB,cAAc,GAAC,YAAY,GAAC,iBAAiB,GAAC,oBAAoB,GAAC,0BAA0B;uBAI7F,iBAAiB,GAAG,YAAY;;UAK/B,YAAY;YACZ,cAAc,GAAC,mBAAmB,GAAC,gBAAgB;;;UAKnD,QAAQ;WACR;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAC,GAAC,IAAI;;;kBAKpC,MAAM;kBACN,MAAM;mBACN,MAAM;;;YAKN,aAAa,GAAC,WAAW,GAAC,YAAY,GAAC,WAAW,GAAC,QAAQ;oBAC3D,4BAA4B,GAAC,wBAAwB,GAAC,IAAI;YAC1D,QAAQ,EAAE;WACV,SAAS,GAAC,IAAI;;qCAxLS,oBAAoB;4BAC7B,UAAU;qCACD,mBAAmB"}
+126
View File
@@ -0,0 +1,126 @@
/**
* Contains text and audio information about a item
* Can also be used as a delta
* @typedef {Object} ItemContentDeltaType
* @property {string} [text]
* @property {Int16Array} [audio]
* @property {string} [arguments]
* @property {string} [transcript]
*/
/**
* RealtimeConversation holds conversation history
* and performs event validation for RealtimeAPI
* @class
*/
export class RealtimeConversation {
defaultFrequency: number;
EventProcessors: {
'conversation.item.created': (event: any) => {
item: any;
delta: any;
};
'conversation.item.truncated': (event: any) => {
item: any;
delta: any;
};
'conversation.item.deleted': (event: any) => {
item: any;
delta: any;
};
'conversation.item.input_audio_transcription.completed': (event: any) => {
item: any;
delta: {
transcript: any;
};
};
'input_audio_buffer.speech_started': (event: any) => {
item: any;
delta: any;
};
'input_audio_buffer.speech_stopped': (event: any, inputAudioBuffer: any) => {
item: any;
delta: any;
};
'response.created': (event: any) => {
item: any;
delta: any;
};
'response.output_item.added': (event: any) => {
item: any;
delta: any;
};
'response.output_item.done': (event: any) => {
item: any;
delta: any;
};
'response.content_part.added': (event: any) => {
item: any;
delta: any;
};
'response.audio_transcript.delta': (event: any) => {
item: any;
delta: {
transcript: any;
};
};
'response.audio.delta': (event: any) => {
item: any;
delta: {
audio: Int16Array;
};
};
'response.function_call_arguments.delta': (event: any) => {
item: any;
delta: {
arguments: any;
};
};
};
queuedInputAudio: Int16Array;
/**
* Clears the conversation history and resets to default
* @returns {true}
*/
clear(): true;
itemLookup: {};
items: any[];
responseLookup: {};
responses: any[];
queuedSpeechItems: {};
queuedTranscriptItems: {};
/**
* Queue input audio for manual speech event
* @param {Int16Array} inputAudio
* @returns {Int16Array}
*/
queueInputAudio(inputAudio: Int16Array): Int16Array;
/**
* Process an event from the WebSocket server and compose items
* @param {Object} event
* @param {...any} args
* @returns {item: import('./client.js').ItemType | null, delta: ItemContentDeltaType | null}
*/
processEvent(event: any, ...args: any[]): item;
/**
* Retrieves a item by id
* @param {string} id
* @returns {import('./client.js').ItemType}
*/
getItem(id: string): import("./client.js").ItemType;
/**
* Retrieves all items in the conversation
* @returns {import('./client.js').ItemType[]}
*/
getItems(): import("./client.js").ItemType[];
}
/**
* Contains text and audio information about a item
* Can also be used as a delta
*/
export type ItemContentDeltaType = {
text?: string;
audio?: Int16Array;
arguments?: string;
transcript?: string;
};
//# sourceMappingURL=conversation.d.ts.map
@@ -0,0 +1 @@
{"version":3,"file":"conversation.d.ts","sourceRoot":"","sources":["../../lib/conversation.js"],"names":[],"mappings":"AAEA;;;;;;;;GAQG;AAEH;;;;GAIG;AACH;IACE,yBAA0B;IAE1B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MA6ME;IAxKQ,6BAA4B;IAkLtC;;;OAGG;IACH,SAFa,IAAI,CAWhB;IARC,eAAoB;IACpB,aAAe;IACf,mBAAwB;IACxB,iBAAmB;IACnB,sBAA2B;IAC3B,0BAA+B;IAKjC;;;;OAIG;IACH,4BAHW,UAAU,GACR,UAAU,CAKtB;IAED;;;;;OAKG;IACH,kCAHe,GAAG,EAAA,GACL,IAAI,CAkBhB;IAED;;;;OAIG;IACH,YAHW,MAAM,GACJ,OAAO,aAAa,EAAE,QAAQ,CAI1C;IAED;;;OAGG;IACH,YAFa,OAAO,aAAa,EAAE,QAAQ,EAAE,CAI5C;CACF;;;;;;WAtSa,MAAM;YACN,UAAU;gBACV,MAAM;iBACN,MAAM"}
+56
View File
@@ -0,0 +1,56 @@
/**
* EventHandler callback
* @typedef {(event: {[key: string]: any}): void} EventHandlerCallbackType
*/
/**
* Inherited class for RealtimeAPI and RealtimeClient
* Adds basic event handling
* @class
*/
export class RealtimeEventHandler {
eventHandlers: {};
nextEventHandlers: {};
/**
* Clears all event handlers
* @returns {true}
*/
clearEventHandlers(): true;
/**
* Listen to specific events
* @param {string} eventName The name of the event to listen to
* @param {EventHandlerCallbackType} callback Code to execute on event
* @returns {EventHandlerCallbackType}
*/
on(eventName: string, callback: EventHandlerCallbackType): EventHandlerCallbackType;
/**
* Listen for the next event of a specified type
* @param {string} eventName The name of the event to listen to
* @param {EventHandlerCallbackType} callback Code to execute on event
* @returns {EventHandlerCallbackType}
*/
onNext(eventName: string, callback: EventHandlerCallbackType): EventHandlerCallbackType;
/**
* Turns off event listening for specific events
* Calling without a callback will remove all listeners for the event
* @param {string} eventName
* @param {EventHandlerCallbackType} [callback]
* @returns {true}
*/
off(eventName: string, callback?: EventHandlerCallbackType): true;
/**
* Turns off event listening for the next event of a specific type
* Calling without a callback will remove all listeners for the next event
* @param {string} eventName
* @param {EventHandlerCallbackType} [callback]
* @returns {true}
*/
offNext(eventName: string, callback?: EventHandlerCallbackType): true;
/**
* Executes all events in the order they were added, with .on() event handlers executing before .onNext() handlers
* @param {string} eventName
* @param {any} event
* @returns {true}
*/
dispatch(eventName: string, event: any): true;
}
//# sourceMappingURL=event_handler.d.ts.map
@@ -0,0 +1 @@
{"version":3,"file":"event_handler.d.ts","sourceRoot":"","sources":["../../lib/event_handler.js"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;;GAIG;AACH;IAMI,kBAAuB;IACvB,sBAA2B;IAG7B;;;OAGG;IACH,sBAFa,IAAI,CAMhB;IAED;;;;;OAKG;IACH,cAJW,MAAM,YACN,wBAAwB,GACtB,wBAAwB,CAMpC;IAED;;;;;OAKG;IACH,kBAJW,MAAM,YACN,wBAAwB,GACtB,wBAAwB,CAKpC;IAED;;;;;;OAMG;IACH,eAJW,MAAM,aACN,wBAAwB,GACtB,IAAI,CAgBhB;IAED;;;;;;OAMG;IACH,mBAJW,MAAM,aACN,wBAAwB,GACtB,IAAI,CAgBhB;IAED;;;;;OAKG;IACH,oBAJW,MAAM,SACN,GAAG,GACD,IAAI,CAahB;CACF"}
+39
View File
@@ -0,0 +1,39 @@
/**
* Basic utilities for the RealtimeAPI
* @class
*/
export class RealtimeUtils {
/**
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
* @param {Float32Array} float32Array
* @returns {ArrayBuffer}
*/
static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer;
/**
* Converts a base64 string to an ArrayBuffer
* @param {string} base64
* @returns {ArrayBuffer}
*/
static base64ToArrayBuffer(base64: string): ArrayBuffer;
/**
* Converts an ArrayBuffer, Int16Array or Float32Array to a base64 string
* @param {ArrayBuffer|Int16Array|Float32Array} arrayBuffer
* @returns {string}
*/
static arrayBufferToBase64(arrayBuffer: ArrayBuffer | Int16Array | Float32Array): string;
/**
* Merge two Int16Arrays from Int16Arrays or ArrayBuffers
* @param {ArrayBuffer|Int16Array} left
* @param {ArrayBuffer|Int16Array} right
* @returns {Int16Array}
*/
static mergeInt16Arrays(left: ArrayBuffer | Int16Array, right: ArrayBuffer | Int16Array): Int16Array;
/**
* Generates an id to send with events and messages
* @param {string} prefix
* @param {number} [length]
* @returns {string}
*/
static generateId(prefix: string, length?: number): string;
}
//# sourceMappingURL=utils.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../lib/utils.js"],"names":[],"mappings":"AAGA;;;GAGG;AACH;IACE;;;;OAIG;IACH,qCAHW,YAAY,GACV,WAAW,CAWvB;IAED;;;;OAIG;IACH,mCAHW,MAAM,GACJ,WAAW,CAUvB;IAED;;;;OAIG;IACH,wCAHW,WAAW,GAAC,UAAU,GAAC,YAAY,GACjC,MAAM,CAgBlB;IAED;;;;;OAKG;IACH,8BAJW,WAAW,GAAC,UAAU,SACtB,WAAW,GAAC,UAAU,GACpB,UAAU,CAoBtB;IAED;;;;;OAKG;IACH,0BAJW,MAAM,WACN,MAAM,GACJ,MAAM,CAUlB;CACF"}
+6
View File
@@ -0,0 +1,6 @@
import { RealtimeUtils } from './lib/utils.js';
import { RealtimeAPI } from './lib/api.js';
import { RealtimeConversation } from './lib/conversation.js';
import { RealtimeClient } from './lib/client.js';
export { RealtimeAPI, RealtimeConversation, RealtimeClient, RealtimeUtils };
+208
View File
@@ -0,0 +1,208 @@
import { RealtimeEventHandler } from './event_handler.js';
import { RealtimeUtils } from './utils.js';
export class RealtimeAPI extends RealtimeEventHandler {
/**
* Create a new RealtimeAPI instance
* @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
* @returns {RealtimeAPI}
*/
constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug } = {}) {
super();
this.defaultUrl = 'wss://api.openai.com/v1/realtime';
this.url = url || this.defaultUrl;
this.apiKey = apiKey || null;
this.debug = !!debug;
this.ws = null;
if (globalThis.WebSocket && this.apiKey) {
if (!dangerouslyAllowAPIKeyInBrowser) {
throw new Error(
`Can not provide API key in the browser without "dangerouslyAllowAPIKeyInBrowser" set to true`
);
}
}
}
/**
* Tells us whether or not the WebSocket is connected
* @returns {boolean}
*/
isConnected() {
return !!this.ws;
}
/**
* Writes WebSocket logs to console
* @param {...any} args
* @returns {true}
*/
log(...args) {
const date = new Date().toISOString();
const logs = [`[Websocket/${date}]`].concat(args).map((arg) => {
if (typeof arg === 'object' && arg !== null) {
return JSON.stringify(arg, null, 2);
} else {
return arg;
}
});
if (this.debug) {
console.log(...logs);
}
return true;
}
/**
* Connects to Realtime API Websocket Server
* @param {{model?: string}} [settings]
* @returns {Promise<true>}
*/
async connect({ model } = { model: 'gpt-4o-realtime-preview-2024-10-01' }) {
if (!this.apiKey && this.url === this.defaultUrl) {
console.warn(`No apiKey provided for connection to "${this.url}"`);
}
if (this.isConnected()) {
throw new Error(`Already connected`);
}
if (globalThis.WebSocket) {
/**
* Web browser
*/
if (this.apiKey) {
console.warn(
'Warning: Connecting using API key in the browser, this is not recommended'
);
}
const WebSocket = globalThis.WebSocket;
const ws = new WebSocket(`${this.url}${model ? `?model=${model}` : ''}`, [
'realtime',
`openai-insecure-api-key.${this.apiKey}`,
'openai-beta.realtime-v1',
]);
ws.addEventListener('message', (event) => {
const message = JSON.parse(event.data);
this.receive(message.type, message);
});
return new Promise((resolve, reject) => {
const connectionErrorHandler = () => {
this.disconnect(ws);
reject(new Error(`Could not connect to "${this.url}"`));
};
ws.addEventListener('error', connectionErrorHandler);
ws.addEventListener('open', () => {
this.log(`Connected to "${this.url}"`);
ws.removeEventListener('error', connectionErrorHandler);
ws.addEventListener('error', () => {
this.disconnect(ws);
this.log(`Error, disconnected from "${this.url}"`);
this.dispatch('close', { error: true });
});
ws.addEventListener('close', () => {
this.disconnect(ws);
this.log(`Disconnected from "${this.url}"`);
this.dispatch('close', { error: false });
});
this.ws = ws;
resolve(true);
});
});
} else {
/**
* Node.js
*/
const moduleName = 'ws';
const wsModule = await import(/* webpackIgnore: true */ moduleName);
const WebSocket = wsModule.default;
const ws = new WebSocket(
'wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01',
[],
{
finishRequest: (request) => {
// Auth
request.setHeader('Authorization', `Bearer ${this.apiKey}`);
request.setHeader('OpenAI-Beta', 'realtime=v1');
request.end();
},
}
);
ws.on('message', (data) => {
const message = JSON.parse(data);
this.receive(message.type, message);
});
return new Promise((resolve, reject) => {
const connectionErrorHandler = () => {
this.disconnect(ws);
reject(new Error(`Could not connect to "${this.url}"`));
};
ws.on('error', connectionErrorHandler);
ws.on('open', () => {
this.log(`Connected to "${this.url}"`);
ws.removeListener('error', connectionErrorHandler);
ws.on('error', () => {
this.disconnect(ws);
this.log(`Error, disconnected from "${this.url}"`);
this.dispatch('close', { error: true });
});
ws.on('close', () => {
this.disconnect(ws);
this.log(`Disconnected from "${this.url}"`);
this.dispatch('close', { error: false });
});
this.ws = ws;
resolve(true);
});
});
}
}
/**
* Disconnects from Realtime API server
* @param {WebSocket} [ws]
* @returns {true}
*/
disconnect(ws) {
if (!ws || this.ws === ws) {
this.ws && this.ws.close();
this.ws = null;
return true;
}
}
/**
* Receives an event from WebSocket and dispatches as "server.{eventName}" and "server.*" events
* @param {string} eventName
* @param {{[key: string]: any}} event
* @returns {true}
*/
receive(eventName, event) {
this.log(`received:`, eventName, event);
this.dispatch(`server.${eventName}`, event);
this.dispatch('server.*', event);
return true;
}
/**
* Sends an event to WebSocket and dispatches as "client.{eventName}" and "client.*" events
* @param {string} eventName
* @param {{[key: string]: any}} event
* @returns {true}
*/
send(eventName, data) {
if (!this.isConnected()) {
throw new Error(`RealtimeAPI is not connected`);
}
data = data || {};
if (typeof data !== 'object') {
throw new Error(`data must be an object`);
}
const event = {
event_id: RealtimeUtils.generateId('evt_'),
type: eventName,
...data,
};
this.dispatch(`client.${eventName}`, event);
this.dispatch('client.*', event);
this.log(`sent:`, eventName, event);
this.ws.send(JSON.stringify(event));
return true;
}
}
+677
View File
@@ -0,0 +1,677 @@
import { RealtimeEventHandler } from './event_handler.js';
import { RealtimeAPI } from './api.js';
import { RealtimeConversation } from './conversation.js';
import { RealtimeUtils } from './utils.js';
const sleep = (t) => new Promise((r) => setTimeout(() => r(), t));
/**
* Valid audio formats
* @typedef {"pcm16"|"g711-ulaw"|"g711-alaw"} AudioFormatType
*/
/**
* @typedef {Object} AudioTranscriptionType
* @property {boolean} [enabled]
* @property {"whisper-1"} model
*/
/**
* @typedef {Object} TurnDetectionServerVadType
* @property {"server_vad"} type
* @property {number} [threshold]
* @property {number} [prefix_padding_ms]
* @property {number} [silence_duration_ms]
*/
/**
* Tool definitions
* @typedef {Object} ToolDefinitionType
* @property {"function"} [type]
* @property {string} name
* @property {string} description
* @property {{[key: string]: any}} parameters
*/
/**
* @typedef {Object} SessionResourceType
* @property {string} [model]
* @property {string[]} [modalities]
* @property {string} [instructions]
* @property {"alloy"|"shimmer"|"echo"} [voice]
* @property {AudioFormatType} [input_audio_format]
* @property {AudioFormatType} [output_audio_format]
* @property {AudioTranscriptionType|null} [input_audio_transcription]
* @property {TurnDetectionServerVadType|null} [turn_detection]
* @property {ToolDefinitionType[]} [tools]
* @property {"auto"|"none"|"required"|{type:"function",name:string}} [tool_choice]
* @property {number} [temperature]
* @property {number|"inf"} [max_response_output_tokens]
*/
/**
* @typedef {"in_progress"|"completed"|"incomplete"} ItemStatusType
*/
/**
* @typedef {Object} InputTextContentType
* @property {"input_text"} type
* @property {string} text
*/
/**
* @typedef {Object} InputAudioContentType
* @property {"input_audio"} type
* @property {string} [audio] base64-encoded audio data
* @property {string|null} [transcript]
*/
/**
* @typedef {Object} TextContentType
* @property {"text"} type
* @property {string} text
*/
/**
* @typedef {Object} AudioContentType
* @property {"audio"} type
* @property {string} [audio] base64-encoded audio data
* @property {string|null} [transcript]
*/
/**
* @typedef {Object} SystemItemType
* @property {string|null} [previous_item_id]
* @property {"message"} type
* @property {ItemStatusType} status
* @property {"system"} role
* @property {Array<InputTextContentType>} content
*/
/**
* @typedef {Object} UserItemType
* @property {string|null} [previous_item_id]
* @property {"message"} type
* @property {ItemStatusType} status
* @property {"system"} role
* @property {Array<InputTextContentType|InputAudioContentType>} content
*/
/**
* @typedef {Object} AssistantItemType
* @property {string|null} [previous_item_id]
* @property {"message"} type
* @property {ItemStatusType} status
* @property {"assistant"} role
* @property {Array<TextContentType|AudioContentType>} content
*/
/**
* @typedef {Object} FunctionCallItemType
* @property {string|null} [previous_item_id]
* @property {"function_call"} type
* @property {ItemStatusType} status
* @property {string} call_id
* @property {string} name
* @property {string} arguments
*/
/**
* @typedef {Object} FunctionCallOutputItemType
* @property {string|null} [previous_item_id]
* @property {"function_call_output"} type
* @property {string} call_id
* @property {string} output
*/
/**
* @typedef {Object} FormattedToolType
* @property {"function"} type
* @property {string} name
* @property {string} call_id
* @property {string} arguments
*/
/**
* @typedef {Object} FormattedPropertyType
* @property {Int16Array} [audio]
* @property {string} [text]
* @property {string} [transcript]
* @property {FormattedToolType} [tool]
* @property {string} [output]
* @property {any} [file]
*/
/**
* @typedef {Object} FormattedItemType
* @property {string} id
* @property {string} object
* @property {"user"|"assistant"|"system"} [role]
* @property {FormattedPropertyType} formatted
*/
/**
* @typedef {SystemItemType|UserItemType|AssistantItemType|FunctionCallItemType|FunctionCallOutputItemType} BaseItemType
*/
/**
* @typedef {FormattedItemType & BaseItemType} ItemType
*/
/**
* @typedef {Object} IncompleteResponseStatusType
* @property {"incomplete"} type
* @property {"interruption"|"max_output_tokens"|"content_filter"} reason
*/
/**
* @typedef {Object} FailedResponseStatusType
* @property {"failed"} type
* @property {{code: string, message: string}|null} error
*/
/**
* @typedef {Object} UsageType
* @property {number} total_tokens
* @property {number} input_tokens
* @property {number} output_tokens
*/
/**
* @typedef {Object} ResponseResourceType
* @property {"in_progress"|"completed"|"incomplete"|"cancelled"|"failed"} status
* @property {IncompleteResponseStatusType|FailedResponseStatusType|null} status_details
* @property {ItemType[]} output
* @property {UsageType|null} usage
*/
/**
* RealtimeClient Class
* @class
*/
export class RealtimeClient extends RealtimeEventHandler {
/**
* Create a new RealtimeClient instance
* @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
*/
constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug } = {}) {
super();
this.defaultSessionConfig = {
modalities: ['text', 'audio'],
instructions: '',
voice: 'alloy',
input_audio_format: 'pcm16',
output_audio_format: 'pcm16',
input_audio_transcription: null,
turn_detection: null,
tools: [],
tool_choice: 'auto',
temperature: 0.8,
max_response_output_tokens: 4096,
};
this.sessionConfig = {};
this.transcriptionModels = [
{
model: 'whisper-1',
},
];
this.defaultServerVadConfig = {
type: 'server_vad',
threshold: 0.5, // 0.0 to 1.0,
prefix_padding_ms: 300, // How much audio to include in the audio stream before the speech starts.
silence_duration_ms: 200, // How long to wait to mark the speech as stopped.
};
this.realtime = new RealtimeAPI({
url,
apiKey,
dangerouslyAllowAPIKeyInBrowser,
debug,
});
this.conversation = new RealtimeConversation();
this._resetConfig();
this._addAPIEventHandlers();
}
/**
* Resets sessionConfig and conversationConfig to default
* @private
* @returns {true}
*/
_resetConfig() {
this.sessionCreated = false;
this.tools = {};
this.sessionConfig = JSON.parse(JSON.stringify(this.defaultSessionConfig));
this.inputAudioBuffer = new Int16Array(0);
return true;
}
/**
* Sets up event handlers for a fully-functional application control flow
* @private
* @returns {true}
*/
_addAPIEventHandlers() {
// Event Logging handlers
this.realtime.on('client.*', (event) => {
const realtimeEvent = {
time: new Date().toISOString(),
source: 'client',
event: event,
};
this.dispatch('realtime.event', realtimeEvent);
});
this.realtime.on('server.*', (event) => {
const realtimeEvent = {
time: new Date().toISOString(),
source: 'server',
event: event,
};
this.dispatch('realtime.event', realtimeEvent);
});
// Handles session created event, can optionally wait for it
this.realtime.on(
'server.session.created',
() => (this.sessionCreated = true)
);
// Setup for application control flow
const handler = (event, ...args) => {
const { item, delta } = this.conversation.processEvent(event, ...args);
return { item, delta };
};
const handlerWithDispatch = (event, ...args) => {
const { item, delta } = handler(event, ...args);
if (item) {
// FIXME: If statement is only here because item.input_audio_transcription.completed
// can fire before `item.created`, resulting in empty item.
// This happens in VAD mode with empty audio
this.dispatch('conversation.updated', { item, delta });
}
return { item, delta };
};
const callTool = async (tool) => {
try {
const jsonArguments = JSON.parse(tool.arguments);
const toolConfig = this.tools[tool.name];
if (!toolConfig) {
throw new Error(`Tool "${tool.name}" has not been added`);
}
const result = await toolConfig.handler(jsonArguments);
this.realtime.send('conversation.item.create', {
item: {
type: 'function_call_output',
call_id: tool.call_id,
output: JSON.stringify(result),
},
});
} catch (e) {
this.realtime.send('conversation.item.create', {
item: {
type: 'function_call_output',
call_id: tool.call_id,
output: JSON.stringify({ error: e.message }),
},
});
}
this.createResponse();
};
// Handlers to update internal conversation state
this.realtime.on('server.response.created', handler);
this.realtime.on('server.response.output_item.added', handler);
this.realtime.on('server.response.content_part.added', handler);
this.realtime.on('server.input_audio_buffer.speech_started', (event) => {
handler(event);
this.dispatch('conversation.interrupted');
});
this.realtime.on('server.input_audio_buffer.speech_stopped', (event) =>
handler(event, this.inputAudioBuffer)
);
// Handlers to update application state
this.realtime.on('server.conversation.item.created', (event) => {
const { item } = handlerWithDispatch(event);
this.dispatch('conversation.item.appended', { item });
if (item.status === 'completed') {
this.dispatch('conversation.item.completed', { item });
}
});
this.realtime.on('server.conversation.item.truncated', handlerWithDispatch);
this.realtime.on('server.conversation.item.deleted', handlerWithDispatch);
this.realtime.on(
'server.conversation.item.input_audio_transcription.completed',
handlerWithDispatch
);
this.realtime.on(
'server.response.audio_transcript.delta',
handlerWithDispatch
);
this.realtime.on('server.response.audio.delta', handlerWithDispatch);
this.realtime.on(
'server.response.function_call_arguments.delta',
handlerWithDispatch
);
this.realtime.on('server.response.output_item.done', async (event) => {
const { item } = handlerWithDispatch(event);
if (item.status === 'completed') {
this.dispatch('conversation.item.completed', { item });
}
if (item.formatted.tool) {
callTool(item.formatted.tool);
}
});
return true;
}
/**
* Tells us whether the realtime socket is connected and the session has started
* @returns {boolean}
*/
isConnected() {
return this.realtime.isConnected();
}
/**
* Resets the client instance entirely: disconnects and clears active config
* @returns {true}
*/
reset() {
this.disconnect();
this.clearEventHandlers();
this.realtime.clearEventHandlers();
this._resetConfig();
this._addAPIEventHandlers();
return true;
}
/**
* Connects to the Realtime WebSocket API
* Updates session config and conversation config
* @returns {Promise<true>}
*/
async connect() {
if (this.isConnected()) {
throw new Error(`Already connected, use .disconnect() first`);
}
await this.realtime.connect();
this.updateSession();
return true;
}
/**
* Waits for a session.created event to be executed before proceeding
* @returns {Promise<true>}
*/
async waitForSessionCreated() {
if (!this.isConnected()) {
throw new Error(`Not connected, use .connect() first`);
}
while (!this.sessionCreated) {
await new Promise((r) => setTimeout(() => r(), 1));
}
return true;
}
/**
* Disconnects from the Realtime API and clears the conversation history
*/
disconnect() {
this.sessionCreated = false;
this.conversation.clear();
this.realtime.isConnected() && this.realtime.disconnect();
}
/**
* Gets the active turn detection mode
* @returns {"server_vad"|null}
*/
getTurnDetectionType() {
return this.sessionConfig.turn_detection?.type || null;
}
/**
* Add a tool and handler
* @param {ToolDefinitionType} definition
* @param {function} handler
* @returns {{definition: ToolDefinitionType, handler: function}}
*/
addTool(definition, handler) {
if (!definition?.name) {
throw new Error(`Missing tool name in definition`);
}
const name = definition?.name;
if (this.tools[name]) {
throw new Error(
`Tool "${name}" already added. Please use .removeTool("${name}") before trying to add again.`
);
}
if (typeof handler !== 'function') {
throw new Error(`Tool "${name}" handler must be a function`);
}
this.tools[name] = { definition, handler };
this.updateSession();
return this.tools[name];
}
/**
* Removes a tool
* @param {string} name
* @returns {true}
*/
removeTool(name) {
if (!this.tools[name]) {
throw new Error(`Tool "${name}" does not exist, can not be removed.`);
}
delete this.tools[name];
return true;
}
/**
* Deletes an item
* @param {string} id
* @returns {true}
*/
deleteItem(id) {
this.realtime.send('conversation.item.delete', { item_id: id });
return true;
}
/**
* Updates session configuration
* If the client is not yet connected, will save details and instantiate upon connection
* @param {SessionResourceType} [sessionConfig]
*/
updateSession({
modalities = void 0,
instructions = void 0,
voice = void 0,
input_audio_format = void 0,
output_audio_format = void 0,
input_audio_transcription = void 0,
turn_detection = void 0,
tools = void 0,
tool_choice = void 0,
temperature = void 0,
max_response_output_tokens = void 0,
} = {}) {
modalities !== void 0 && (this.sessionConfig.modalities = modalities);
instructions !== void 0 && (this.sessionConfig.instructions = instructions);
voice !== void 0 && (this.sessionConfig.voice = voice);
input_audio_format !== void 0 &&
(this.sessionConfig.input_audio_format = input_audio_format);
output_audio_format !== void 0 &&
(this.sessionConfig.output_audio_format = output_audio_format);
input_audio_transcription !== void 0 &&
(this.sessionConfig.input_audio_transcription =
input_audio_transcription);
turn_detection !== void 0 &&
(this.sessionConfig.turn_detection = turn_detection);
tools !== void 0 && (this.sessionConfig.tools = tools);
tool_choice !== void 0 && (this.sessionConfig.tool_choice = tool_choice);
temperature !== void 0 && (this.sessionConfig.temperature = temperature);
max_response_output_tokens !== void 0 &&
(this.sessionConfig.max_response_output_tokens =
max_response_output_tokens);
// Load tools from tool definitions + already loaded tools
const useTools = [].concat(
(tools || []).map((toolDefinition) => {
const definition = {
type: 'function',
...toolDefinition,
};
if (this.tools[definition?.name]) {
throw new Error(
`Tool "${definition?.name}" has already been defined`
);
}
return definition;
}),
Object.keys(this.tools).map((key) => {
return {
type: 'function',
...this.tools[key].definition,
};
})
);
const session = { ...this.sessionConfig };
session.tools = useTools;
if (this.realtime.isConnected()) {
this.realtime.send('session.update', { session });
}
return true;
}
/**
* Sends user message content and generates a response
* @param {Array<InputTextContentType|InputAudioContentType>} content
* @returns {true}
*/
sendUserMessageContent(content = []) {
if (content.length) {
for (const c of content) {
if (c.type === 'input_audio') {
if (c.audio instanceof ArrayBuffer || c.audio instanceof Int16Array) {
c.audio = RealtimeUtils.arrayBufferToBase64(c.audio);
}
}
}
this.realtime.send('conversation.item.create', {
item: {
type: 'message',
role: 'user',
content,
},
});
}
this.createResponse();
return true;
}
/**
* Appends user audio to the existing audio buffer
* @param {Int16Array|ArrayBuffer} arrayBuffer
* @returns {true}
*/
appendInputAudio(arrayBuffer) {
if (arrayBuffer.byteLength > 0) {
this.realtime.send('input_audio_buffer.append', {
audio: RealtimeUtils.arrayBufferToBase64(arrayBuffer),
});
this.inputAudioBuffer = RealtimeUtils.mergeInt16Arrays(
this.inputAudioBuffer,
arrayBuffer
);
}
return true;
}
/**
* Forces a model response generation
* @returns {true}
*/
createResponse() {
if (
this.getTurnDetectionType() === null &&
this.inputAudioBuffer.byteLength > 0
) {
this.realtime.send('input_audio_buffer.commit');
this.conversation.queueInputAudio(this.inputAudioBuffer);
this.inputAudioBuffer = new Int16Array(0);
}
this.realtime.send('response.create');
return true;
}
/**
* Cancels the ongoing server generation and truncates ongoing generation, if applicable
* If no id provided, will simply call `cancel_generation` command
* @param {string} id The id of the message to cancel
* @param {number} [sampleCount] The number of samples to truncate past for the ongoing generation
* @returns {{item: (AssistantItemType | null)}}
*/
cancelResponse(id, sampleCount = 0) {
if (!id) {
this.realtime.send('response.cancel');
return { item: null };
} else if (id) {
const item = this.conversation.getItem(id);
if (!item) {
throw new Error(`Could not find item "${id}"`);
}
if (item.type !== 'message') {
throw new Error(`Can only cancelResponse messages with type "message"`);
} else if (item.role !== 'assistant') {
throw new Error(
`Can only cancelResponse messages with role "assistant"`
);
}
this.realtime.send('response.cancel');
const audioIndex = item.content.findIndex((c) => c.type === 'audio');
if (audioIndex === -1) {
throw new Error(`Could not find audio on item to cancel`);
}
this.realtime.send('conversation.item.truncate', {
item_id: id,
content_index: audioIndex,
audio_end_ms: Math.floor(
(sampleCount / this.conversation.defaultFrequency) * 1000
),
});
return { item };
}
}
/**
* Utility for waiting for the next `conversation.item.appended` event to be triggered by the server
* @returns {Promise<{item: ItemType}>}
*/
async waitForNextItem() {
let nextItem;
this.onNext('conversation.item.appended', (event) => {
const { item } = event;
nextItem = item;
});
while (!nextItem) {
await sleep(1);
}
return { item: nextItem };
}
/**
* Utility for waiting for the next `conversation.item.completed` event to be triggered by the server
* @returns {Promise<{item: ItemType}>}
*/
async waitForNextCompletedItem() {
let nextItem;
this.onNext('conversation.item.completed', (event) => {
const { item } = event;
nextItem = item;
});
while (!nextItem) {
await sleep(1);
}
return { item: nextItem };
}
}
@@ -0,0 +1,301 @@
import { RealtimeUtils } from './utils.js';
/**
* Contains text and audio information about a item
* Can also be used as a delta
* @typedef {Object} ItemContentDeltaType
* @property {string} [text]
* @property {Int16Array} [audio]
* @property {string} [arguments]
* @property {string} [transcript]
*/
/**
* RealtimeConversation holds conversation history
* and performs event validation for RealtimeAPI
* @class
*/
export class RealtimeConversation {
defaultFrequency = 24_000; // 24,000 Hz
EventProcessors = {
'conversation.item.created': (event) => {
const { item } = event;
// deep copy values
const newItem = JSON.parse(JSON.stringify(item));
if (!this.itemLookup[newItem.id]) {
this.itemLookup[newItem.id] = newItem;
this.items.push(newItem);
}
newItem.formatted = {};
newItem.formatted.audio = new Int16Array(0);
newItem.formatted.text = '';
newItem.formatted.transcript = '';
// If we have a speech item, can populate audio
if (this.queuedSpeechItems[newItem.id]) {
newItem.formatted.audio = this.queuedSpeechItems[newItem.id].audio;
delete this.queuedSpeechItems[newItem.id]; // free up some memory
}
// Populate formatted text if it comes out on creation
if (newItem.content) {
const textContent = newItem.content.filter((c) =>
['text', 'input_text'].includes(c.type),
);
for (const content of textContent) {
newItem.formatted.text += content.text;
}
}
// If we have a transcript item, can pre-populate transcript
if (this.queuedTranscriptItems[newItem.id]) {
newItem.formatted.transcript = this.queuedTranscriptItems.transcript;
delete this.queuedTranscriptItems[newItem.id];
}
if (newItem.type === 'message') {
if (newItem.role === 'user') {
newItem.status = 'completed';
if (this.queuedInputAudio) {
newItem.formatted.audio = this.queuedInputAudio;
this.queuedInputAudio = null;
}
} else {
newItem.status = 'in_progress';
}
} else if (newItem.type === 'function_call') {
newItem.formatted.tool = {
type: 'function',
name: newItem.name,
call_id: newItem.call_id,
arguments: '',
};
newItem.status = 'in_progress';
} else if (newItem.type === 'function_call_output') {
newItem.status = 'completed';
newItem.formatted.output = newItem.output;
}
return { item: newItem, delta: null };
},
'conversation.item.truncated': (event) => {
const { item_id, audio_end_ms } = event;
const item = this.itemLookup[item_id];
if (!item) {
throw new Error(`item.truncated: Item "${item_id}" not found`);
}
const endIndex = Math.floor(
(audio_end_ms * this.defaultFrequency) / 1000,
);
item.formatted.transcript = '';
item.formatted.audio = item.formatted.audio.slice(0, endIndex);
return { item, delta: null };
},
'conversation.item.deleted': (event) => {
const { item_id } = event;
const item = this.itemLookup[item_id];
if (!item) {
throw new Error(`item.deleted: Item "${item_id}" not found`);
}
delete this.itemLookup[item.id];
const index = this.items.indexOf(item);
if (index > -1) {
this.items.splice(index, 1);
}
return { item, delta: null };
},
'conversation.item.input_audio_transcription.completed': (event) => {
const { item_id, content_index, transcript } = event;
const item = this.itemLookup[item_id];
// We use a single space to represent an empty transcript for .formatted values
// Otherwise it looks like no transcript provided
const formattedTranscript = transcript || ' ';
if (!item) {
// We can receive transcripts in VAD mode before item.created
// This happens specifically when audio is empty
this.queuedTranscriptItems[item_id] = {
transcript: formattedTranscript,
};
return { item: null, delta: null };
} else {
item.content[content_index].transcript = transcript;
item.formatted.transcript = formattedTranscript;
return { item, delta: { transcript } };
}
},
'input_audio_buffer.speech_started': (event) => {
const { item_id, audio_start_ms } = event;
this.queuedSpeechItems[item_id] = { audio_start_ms };
return { item: null, delta: null };
},
'input_audio_buffer.speech_stopped': (event, inputAudioBuffer) => {
const { item_id, audio_end_ms } = event;
const speech = this.queuedSpeechItems[item_id];
speech.audio_end_ms = audio_end_ms;
if (inputAudioBuffer) {
const startIndex = Math.floor(
(speech.audio_start_ms * this.defaultFrequency) / 1000,
);
const endIndex = Math.floor(
(speech.audio_end_ms * this.defaultFrequency) / 1000,
);
speech.audio = inputAudioBuffer.slice(startIndex, endIndex);
}
return { item: null, delta: null };
},
'response.created': (event) => {
const { response } = event;
if (!this.responseLookup[response.id]) {
this.responseLookup[response.id] = response;
this.responses.push(response);
}
return { item: null, delta: null };
},
'response.output_item.added': (event) => {
const { response_id, item } = event;
const response = this.responseLookup[response_id];
if (!response) {
throw new Error(
`response.output_item.added: Response "${response_id}" not found`,
);
}
response.output.push(item.id);
return { item: null, delta: null };
},
'response.output_item.done': (event) => {
const { item } = event;
if (!item) {
throw new Error(`response.output_item.done: Missing "item"`);
}
const foundItem = this.itemLookup[item.id];
if (!foundItem) {
throw new Error(
`response.output_item.done: Item "${item.id}" not found`,
);
}
foundItem.status = item.status;
return { item: foundItem, delta: null };
},
'response.content_part.added': (event) => {
const { item_id, part } = event;
const item = this.itemLookup[item_id];
if (!item) {
throw new Error(
`response.content_part.added: Item "${item_id}" not found`,
);
}
item.content.push(part);
return { item, delta: null };
},
'response.audio_transcript.delta': (event) => {
const { item_id, content_index, delta } = event;
const item = this.itemLookup[item_id];
if (!item) {
throw new Error(
`response.audio_transcript.delta: Item "${item_id}" not found`,
);
}
item.content[content_index].transcript += delta;
item.formatted.transcript += delta;
return { item, delta: { transcript: delta } };
},
'response.audio.delta': (event) => {
const { item_id, content_index, delta } = event;
const item = this.itemLookup[item_id];
if (!item) {
throw new Error(`response.audio.delta: Item "${item_id}" not found`);
}
// This never gets renderered, we care about the file data instead
// item.content[content_index].audio += delta;
const arrayBuffer = RealtimeUtils.base64ToArrayBuffer(delta);
const appendValues = new Int16Array(arrayBuffer);
item.formatted.audio = RealtimeUtils.mergeInt16Arrays(
item.formatted.audio,
appendValues,
);
return { item, delta: { audio: appendValues } };
},
'response.function_call_arguments.delta': (event) => {
const { item_id, delta } = event;
const item = this.itemLookup[item_id];
if (!item) {
throw new Error(
`response.function_call_arguments.delta: Item "${item_id}" not found`,
);
}
item.arguments += delta;
item.formatted.tool.arguments += delta;
return { item, delta: { arguments: delta } };
},
};
/**
* Create a new RealtimeConversation instance
* @returns {RealtimeConversation}
*/
constructor() {
this.clear();
}
/**
* Clears the conversation history and resets to default
* @returns {true}
*/
clear() {
this.itemLookup = {};
this.items = [];
this.responseLookup = {};
this.responses = [];
this.queuedSpeechItems = {};
this.queuedTranscriptItems = {};
this.queuedInputAudio = null;
return true;
}
/**
* Queue input audio for manual speech event
* @param {Int16Array} inputAudio
* @returns {Int16Array}
*/
queueInputAudio(inputAudio) {
this.queuedInputAudio = inputAudio;
return inputAudio;
}
/**
* Process an event from the WebSocket server and compose items
* @param {Object} event
* @param {...any} args
* @returns {item: import('./client.js').ItemType | null, delta: ItemContentDeltaType | null}
*/
processEvent(event, ...args) {
if (!event.event_id) {
console.error(event);
throw new Error(`Missing "event_id" on event`);
}
if (!event.type) {
console.error(event);
throw new Error(`Missing "type" on event`);
}
const eventProcessor = this.EventProcessors[event.type];
if (!eventProcessor) {
throw new Error(
`Missing conversation event processor for "${event.type}"`,
);
}
return eventProcessor.call(this, event, ...args);
}
/**
* Retrieves a item by id
* @param {string} id
* @returns {import('./client.js').ItemType}
*/
getItem(id) {
return this.itemLookup[id] || null;
}
/**
* Retrieves all items in the conversation
* @returns {import('./client.js').ItemType[]}
*/
getItems() {
return this.items.slice();
}
}
@@ -0,0 +1,118 @@
/**
* EventHandler callback
* @typedef {(event: {[key: string]: any}): void} EventHandlerCallbackType
*/
/**
* Inherited class for RealtimeAPI and RealtimeClient
* Adds basic event handling
* @class
*/
export class RealtimeEventHandler {
/**
* Create a new RealtimeEventHandler instance
* @returns {RealtimeEventHandler}
*/
constructor() {
this.eventHandlers = {};
this.nextEventHandlers = {};
}
/**
* Clears all event handlers
* @returns {true}
*/
clearEventHandlers() {
this.eventHandlers = {};
this.nextEventHandlers = {};
return true;
}
/**
* Listen to specific events
* @param {string} eventName The name of the event to listen to
* @param {EventHandlerCallbackType} callback Code to execute on event
* @returns {EventHandlerCallbackType}
*/
on(eventName, callback) {
this.eventHandlers[eventName] = this.eventHandlers[eventName] || [];
this.eventHandlers[eventName].push(callback);
callback;
}
/**
* Listen for the next event of a specified type
* @param {string} eventName The name of the event to listen to
* @param {EventHandlerCallbackType} callback Code to execute on event
* @returns {EventHandlerCallbackType}
*/
onNext(eventName, callback) {
this.nextEventHandlers[eventName] = this.nextEventHandlers[eventName] || [];
this.nextEventHandlers[eventName].push(callback);
}
/**
* Turns off event listening for specific events
* Calling without a callback will remove all listeners for the event
* @param {string} eventName
* @param {EventHandlerCallbackType} [callback]
* @returns {true}
*/
off(eventName, callback) {
const handlers = this.eventHandlers[eventName] || [];
if (callback) {
const index = handlers.indexOf(callback);
if (index === -1) {
throw new Error(
`Could not turn off specified event listener for "${eventName}": not found as a listener`,
);
}
handlers.splice(index, 1);
} else {
delete this.eventHandlers[eventName];
}
return true;
}
/**
* Turns off event listening for the next event of a specific type
* Calling without a callback will remove all listeners for the next event
* @param {string} eventName
* @param {EventHandlerCallbackType} [callback]
* @returns {true}
*/
offNext(eventName, callback) {
const nextHandlers = this.nextEventHandlers[eventName] || [];
if (callback) {
const index = nextHandlers.indexOf(callback);
if (index === -1) {
throw new Error(
`Could not turn off specified next event listener for "${eventName}": not found as a listener`,
);
}
nextHandlers.splice(index, 1);
} else {
delete this.nextEventHandlers[eventName];
}
return true;
}
/**
* Executes all events in the order they were added, with .on() event handlers executing before .onNext() handlers
* @param {string} eventName
* @param {any} event
* @returns {true}
*/
dispatch(eventName, event) {
const handlers = [].concat(this.eventHandlers[eventName] || []);
for (const handler of handlers) {
handler(event);
}
const nextHandlers = [].concat(this.nextEventHandlers[eventName] || []);
for (const nextHandler of nextHandlers) {
nextHandler(event);
}
delete this.nextEventHandlers[eventName];
return true;
}
}
+102
View File
@@ -0,0 +1,102 @@
const atob = globalThis.atob;
const btoa = globalThis.btoa;
/**
* Basic utilities for the RealtimeAPI
* @class
*/
export class RealtimeUtils {
/**
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
* @param {Float32Array} float32Array
* @returns {ArrayBuffer}
*/
static floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
/**
* Converts a base64 string to an ArrayBuffer
* @param {string} base64
* @returns {ArrayBuffer}
*/
static base64ToArrayBuffer(base64) {
const binaryString = atob(base64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
/**
* Converts an ArrayBuffer, Int16Array or Float32Array to a base64 string
* @param {ArrayBuffer|Int16Array|Float32Array} arrayBuffer
* @returns {string}
*/
static arrayBufferToBase64(arrayBuffer) {
if (arrayBuffer instanceof Float32Array) {
arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
} else if (arrayBuffer instanceof Int16Array) {
arrayBuffer = arrayBuffer.buffer;
}
let binary = '';
let bytes = new Uint8Array(arrayBuffer);
const chunkSize = 0x8000; // 32KB chunk size
for (let i = 0; i < bytes.length; i += chunkSize) {
let chunk = bytes.subarray(i, i + chunkSize);
binary += String.fromCharCode.apply(null, chunk);
}
return btoa(binary);
}
/**
* Merge two Int16Arrays from Int16Arrays or ArrayBuffers
* @param {ArrayBuffer|Int16Array} left
* @param {ArrayBuffer|Int16Array} right
* @returns {Int16Array}
*/
static mergeInt16Arrays(left, right) {
if (left instanceof ArrayBuffer) {
left = new Int16Array(left);
}
if (right instanceof ArrayBuffer) {
right = new Int16Array(right);
}
if (!(left instanceof Int16Array) || !(right instanceof Int16Array)) {
throw new Error(`Both items must be Int16Array`);
}
const newValues = new Int16Array(left.length + right.length);
for (let i = 0; i < left.length; i++) {
newValues[i] = left[i];
}
for (let j = 0; j < right.length; j++) {
newValues[left.length + j] = right[j];
}
return newValues;
}
/**
* Generates an id to send with events and messages
* @param {string} prefix
* @param {number} [length]
* @returns {string}
*/
static generateId(prefix, length = 21) {
// base58; non-repeating chars
const chars = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
const str = Array(length - prefix.length)
.fill(0)
.map((_) => chars[Math.floor(Math.random() * chars.length)])
.join('');
return `${prefix}${str}`;
}
}
+6
View File
@@ -0,0 +1,6 @@
import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
import { WavPacker } from './lib/wav_packer.js';
import { WavStreamPlayer } from './lib/wav_stream_player.js';
import { WavRecorder } from './lib/wav_recorder.js';
export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
//# sourceMappingURL=index.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.js"],"names":[],"mappings":"8BAC8B,kCAAkC;0BADtC,qBAAqB;gCAEf,4BAA4B;4BAChC,uBAAuB"}
+70
View File
@@ -0,0 +1,70 @@
/**
* Output of AudioAnalysis for the frequency domain of the audio
* @typedef {Object} AudioAnalysisOutputType
* @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
* @property {number[]} frequencies Raw frequency bucket values
* @property {string[]} labels Labels for the frequency bucket values
*/
/**
* Analyzes audio for visual output
* @class
*/
export class AudioAnalysis {
/**
* Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
* returns human-readable formatting and labels
* @param {AnalyserNode} analyser
* @param {number} sampleRate
* @param {Float32Array} [fftResult]
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
static getFrequencies(analyser: AnalyserNode, sampleRate: number, fftResult?: Float32Array, analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
/**
* Creates a new AudioAnalysis instance for an HTMLAudioElement
* @param {HTMLAudioElement} audioElement
* @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
* @returns {AudioAnalysis}
*/
constructor(audioElement: HTMLAudioElement, audioBuffer?: AudioBuffer | null);
fftResults: any[];
audio: HTMLAudioElement;
context: any;
analyser: any;
sampleRate: any;
audioBuffer: any;
/**
* Gets the current frequency domain data from the playing audio track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
/**
* Resume the internal AudioContext if it was suspended due to the lack of
* user interaction when the AudioAnalysis was instantiated.
* @returns {Promise<true>}
*/
resumeIfSuspended(): Promise<true>;
}
/**
* Output of AudioAnalysis for the frequency domain of the audio
*/
export type AudioAnalysisOutputType = {
/**
* Amplitude of this frequency between {0, 1} inclusive
*/
values: Float32Array;
/**
* Raw frequency bucket values
*/
frequencies: number[];
/**
* Labels for the frequency bucket values
*/
labels: string[];
};
//# sourceMappingURL=audio_analysis.d.ts.map
@@ -0,0 +1 @@
{"version":3,"file":"audio_analysis.d.ts","sourceRoot":"","sources":["../../../lib/analysis/audio_analysis.js"],"names":[],"mappings":"AAOA;;;;;;GAMG;AAEH;;;GAGG;AACH;IACE;;;;;;;;;;OAUG;IACH,gCARW,YAAY,cACZ,MAAM,cACN,YAAY,iBACZ,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwDnC;IAED;;;;;OAKG;IACH,0BAJW,gBAAgB,gBAChB,WAAW,GAAC,IAAI,EAkE1B;IA9DC,kBAAoB;IA2ClB,wBAAyB;IACzB,aAAkC;IAClC,cAAwB;IACxB,gBAA4B;IAC5B,iBAA8B;IAiBlC;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwBnC;IAED;;;;OAIG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAOzB;CACF;;;;;;;;YA9La,YAAY;;;;iBACZ,MAAM,EAAE;;;;YACR,MAAM,EAAE"}
+9
View File
@@ -0,0 +1,9 @@
/**
* All note frequencies from 1st to 8th octave
* in format "A#8" (A#, 8th octave)
*/
export const noteFrequencies: any[];
export const noteFrequencyLabels: any[];
export const voiceFrequencies: any[];
export const voiceFrequencyLabels: any[];
//# sourceMappingURL=constants.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../../../lib/analysis/constants.js"],"names":[],"mappings":"AA6BA;;;GAGG;AACH,oCAAkC;AAClC,wCAAsC;AActC,qCAKG;AACH,yCAKG"}
+58
View File
@@ -0,0 +1,58 @@
/**
* Raw wav audio file contents
* @typedef {Object} WavPackerAudioType
* @property {Blob} blob
* @property {string} url
* @property {number} channelCount
* @property {number} sampleRate
* @property {number} duration
*/
/**
* Utility class for assembling PCM16 "audio/wav" data
* @class
*/
export class WavPacker {
/**
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
* @param {Float32Array} float32Array
* @returns {ArrayBuffer}
*/
static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer;
/**
* Concatenates two ArrayBuffers
* @param {ArrayBuffer} leftBuffer
* @param {ArrayBuffer} rightBuffer
* @returns {ArrayBuffer}
*/
static mergeBuffers(leftBuffer: ArrayBuffer, rightBuffer: ArrayBuffer): ArrayBuffer;
/**
* Packs data into an Int16 format
* @private
* @param {number} size 0 = 1x Int16, 1 = 2x Int16
* @param {number} arg value to pack
* @returns
*/
private _packData;
/**
* Packs audio into "audio/wav" Blob
* @param {number} sampleRate
* @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
* @returns {WavPackerAudioType}
*/
pack(sampleRate: number, audio: {
bitsPerSample: number;
channels: Array<Float32Array>;
data: Int16Array;
}): WavPackerAudioType;
}
/**
* Raw wav audio file contents
*/
export type WavPackerAudioType = {
blob: Blob;
url: string;
channelCount: number;
sampleRate: number;
duration: number;
};
//# sourceMappingURL=wav_packer.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"wav_packer.d.ts","sourceRoot":"","sources":["../../lib/wav_packer.js"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;;GAGG;AACH;IACE;;;;OAIG;IACH,qCAHW,YAAY,GACV,WAAW,CAWvB;IAED;;;;;OAKG;IACH,gCAJW,WAAW,eACX,WAAW,GACT,WAAW,CASvB;IAED;;;;;;OAMG;IACH,kBAKC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,SACN;QAAC,aAAa,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;QAAC,IAAI,EAAE,UAAU,CAAA;KAAC,GACtE,kBAAkB,CA6C9B;CACF;;;;;UA3Ga,IAAI;SACJ,MAAM;kBACN,MAAM;gBACN,MAAM;cACN,MAAM"}
+167
View File
@@ -0,0 +1,167 @@
/**
* Decodes audio into a wav file
* @typedef {Object} DecodedAudioType
* @property {Blob} blob
* @property {string} url
* @property {Float32Array} values
* @property {AudioBuffer} audioBuffer
*/
/**
* Records live stream of user audio as PCM16 "audio/wav" data
* @class
*/
export class WavRecorder {
/**
* Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
* @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
* @param {number} sampleRate
* @param {number} fromSampleRate
* @returns {Promise<DecodedAudioType>}
*/
static decode(audioData: Blob | Float32Array | Int16Array | ArrayBuffer | number[], sampleRate?: number, fromSampleRate?: number): Promise<DecodedAudioType>;
/**
* Create a new WavRecorder instance
* @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
* @returns {WavRecorder}
*/
constructor({ sampleRate, outputToSpeakers, debug, }?: {
sampleRate?: number;
outputToSpeakers?: boolean;
debug?: boolean;
});
scriptSrc: any;
sampleRate: number;
outputToSpeakers: boolean;
debug: boolean;
_deviceChangeCallback: () => Promise<void>;
_devices: any[];
stream: any;
processor: any;
source: any;
node: any;
recording: boolean;
_lastEventId: number;
eventReceipts: {};
eventTimeout: number;
_chunkProcessor: () => void;
_chunkProcessorBuffer: {
raw: ArrayBuffer;
mono: ArrayBuffer;
};
/**
* Logs data in debug mode
* @param {...any} arguments
* @returns {true}
*/
log(...args: any[]): true;
/**
* Retrieves the current sampleRate for the recorder
* @returns {number}
*/
getSampleRate(): number;
/**
* Retrieves the current status of the recording
* @returns {"ended"|"paused"|"recording"}
*/
getStatus(): "ended" | "paused" | "recording";
/**
* Sends an event to the AudioWorklet
* @private
* @param {string} name
* @param {{[key: string]: any}} data
* @param {AudioWorkletNode} [_processor]
* @returns {Promise<{[key: string]: any}>}
*/
private _event;
/**
* Sets device change callback, remove if callback provided is `null`
* @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
* @returns {true}
*/
listenForDeviceChange(callback: any): true;
/**
* Manually request permission to use the microphone
* @returns {Promise<true>}
*/
requestPermission(): Promise<true>;
/**
* List all eligible devices for recording, will request permission to use microphone
* @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
*/
listDevices(): Promise<Array<MediaDeviceInfo & {
default: boolean;
}>>;
/**
* Begins a recording session and requests microphone permissions if not already granted
* Microphone recording indicator will appear on browser tab but status will be "paused"
* @param {string} [deviceId] if no device provided, default device will be used
* @returns {Promise<true>}
*/
begin(deviceId?: string): Promise<true>;
analyser: any;
/**
* Gets the current frequency domain data from the recording track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType;
/**
* Pauses the recording
* Keeps microphone stream open but halts storage of audio
* @returns {Promise<true>}
*/
pause(): Promise<true>;
/**
* Start recording stream and storing to memory from the connected audio source
* @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
* @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
* @returns {Promise<true>}
*/
record(chunkProcessor?: (data: {
mono: Int16Array;
raw: Int16Array;
}) => any, chunkSize?: number): Promise<true>;
_chunkProcessorSize: number;
/**
* Clears the audio buffer, empties stored recording
* @returns {Promise<true>}
*/
clear(): Promise<true>;
/**
* Reads the current audio stream data
* @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
*/
read(): Promise<{
meanValues: Float32Array;
channels: Array<Float32Array>;
}>;
/**
* Saves the current audio stream to a file
* @param {boolean} [force] Force saving while still recording
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
save(force?: boolean): Promise<import("./wav_packer.js").WavPackerAudioType>;
/**
* Ends the current recording session and saves the result
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
end(): Promise<import("./wav_packer.js").WavPackerAudioType>;
/**
* Performs a full cleanup of WavRecorder instance
* Stops actively listening via microphone and removes existing listeners
* @returns {Promise<true>}
*/
quit(): Promise<true>;
}
/**
* Decodes audio into a wav file
*/
export type DecodedAudioType = {
blob: Blob;
url: string;
values: Float32Array;
audioBuffer: AudioBuffer;
};
//# sourceMappingURL=wav_recorder.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"wav_recorder.d.ts","sourceRoot":"","sources":["../../lib/wav_recorder.js"],"names":[],"mappings":"AAIA;;;;;;;GAOG;AAEH;;;GAGG;AACH;IAsCE;;;;;;OAMG;IACH,yBALW,IAAI,GAAC,YAAY,GAAC,UAAU,GAAC,WAAW,GAAC,MAAM,EAAE,eACjD,MAAM,mBACN,MAAM,GACJ,OAAO,CAAC,gBAAgB,CAAC,CAqErC;IA/GD;;;;OAIG;IACH,uDAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAiC5E;IAxBC,eAAkC;IAElC,mBAA4B;IAC5B,0BAAwC;IACxC,eAAoB;IACpB,2CAAiC;IACjC,gBAAkB;IAElB,YAAkB;IAClB,eAAqB;IACrB,YAAkB;IAClB,UAAgB;IAChB,mBAAsB;IAEtB,qBAAqB;IACrB,kBAAuB;IACvB,qBAAwB;IAExB,4BAA+B;IAE/B;;;MAGC;IA+EH;;;;OAIG;IACH,qBAFa,IAAI,CAOhB;IAED;;;OAGG;IACH,iBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,aAFa,OAAO,GAAC,QAAQ,GAAC,WAAW,CAUxC;IAED;;;;;;;OAOG;IACH,eAqBC;IAED;;;;OAIG;IACH,sCAFa,IAAI,CAmChB;IAED;;;OAGG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAoBzB;IAED;;;OAGG;IACH,eAFa,OAAO,CAAC,KAAK,CAAC,eAAe,GAAG;QAAC,OAAO,EAAE,OAAO,CAAA;KAAC,CAAC,CAAC,CA8BhE;IAED;;;;;OAKG;IACH,iBAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAkFzB;IAHC,cAAwB;IAK1B;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAezB;IAED;;;;;OAKG;IACH,wBAJW,CAAC,IAAI,EAAE;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,GAAG,EAAE,UAAU,CAAA;KAAE,KAAK,GAAG,cACpD,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAoBzB;IATC,4BAAoC;IAWtC;;;OAGG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;IAED;;;OAGG;IACH,QAFa,OAAO,CAAC;QAAC,UAAU,EAAE,YAAY,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAA;KAAC,CAAC,CAS9E;IAED;;;;OAIG;IACH,aAHW,OAAO,GACL,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CAgBjE;IAED;;;OAGG;IACH,OAFa,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CA8BjE;IAED;;;;OAIG;IACH,QAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;CACF;;;;;UA1hBa,IAAI;SACJ,MAAM;YACN,YAAY;iBACZ,WAAW"}
+69
View File
@@ -0,0 +1,69 @@
/**
* Plays audio streams received in raw PCM16 chunks from the browser
* @class
*/
export class WavStreamPlayer {
/**
* Creates a new WavStreamPlayer instance
* @param {{sampleRate?: number}} options
* @returns {WavStreamPlayer}
*/
constructor({ sampleRate }?: {
sampleRate?: number;
});
scriptSrc: any;
sampleRate: number;
context: any;
stream: any;
analyser: any;
trackSampleOffsets: {};
interruptedTrackIds: {};
/**
* Connects the audio context and enables output to speakers
* @returns {Promise<true>}
*/
connect(): Promise<true>;
/**
* Gets the current frequency domain data from the playing track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType;
/**
* Starts audio streaming
* @private
* @returns {Promise<true>}
*/
private _start;
/**
* Adds 16BitPCM data to the currently playing audio stream
* You can add chunks beyond the current play point and they will be queued for play
* @param {ArrayBuffer|Int16Array} arrayBuffer
* @param {string} [trackId]
* @returns {Int16Array}
*/
add16BitPCM(arrayBuffer: ArrayBuffer | Int16Array, trackId?: string): Int16Array;
/**
* Gets the offset (sample count) of the currently playing stream
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
getTrackSampleOffset(interrupt?: boolean): {
trackId: string | null;
offset: number;
currentTime: number;
};
/**
* Strips the current stream and returns the sample offset of the audio
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
interrupt(): {
trackId: string | null;
offset: number;
currentTime: number;
};
}
//# sourceMappingURL=wav_stream_player.d.ts.map
+1
View File
@@ -0,0 +1 @@
{"version":3,"file":"wav_stream_player.d.ts","sourceRoot":"","sources":["../../lib/wav_stream_player.js"],"names":[],"mappings":"AAGA;;;GAGG;AACH;IACE;;;;OAIG;IACH,6BAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAC,EAW/B;IAPC,eAAmC;IACnC,mBAA4B;IAC5B,aAAmB;IACnB,YAAkB;IAClB,cAAoB;IACpB,uBAA4B;IAC5B,wBAA6B;IAG/B;;;OAGG;IACH,WAFa,OAAO,CAAC,IAAI,CAAC,CAkBzB;IAED;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,eAkBC;IAED;;;;;;OAMG;IACH,yBAJW,WAAW,GAAC,UAAU,YACtB,MAAM,GACJ,UAAU,CAqBtB;IAED;;;;OAIG;IACH,iCAHW,OAAO,GACL;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAqBvE;IAED;;;;OAIG;IACH,aAFa;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAIvE;CACF"}
@@ -0,0 +1,2 @@
export const AudioProcessorSrc: any;
//# sourceMappingURL=audio_processor.d.ts.map
@@ -0,0 +1 @@
{"version":3,"file":"audio_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/audio_processor.js"],"names":[],"mappings":"AAqNA,oCAAqC"}
@@ -0,0 +1,3 @@
export const StreamProcessorWorklet: "\nclass StreamProcessor extends AudioWorkletProcessor {\n constructor() {\n super();\n this.hasStarted = false;\n this.hasInterrupted = false;\n this.outputBuffers = [];\n this.bufferLength = 128;\n this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };\n this.writeOffset = 0;\n this.trackSampleOffsets = {};\n this.port.onmessage = (event) => {\n if (event.data) {\n const payload = event.data;\n if (payload.event === 'write') {\n const int16Array = payload.buffer;\n const float32Array = new Float32Array(int16Array.length);\n for (let i = 0; i < int16Array.length; i++) {\n float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32\n }\n this.writeData(float32Array, payload.trackId);\n } else if (\n payload.event === 'offset' ||\n payload.event === 'interrupt'\n ) {\n const requestId = payload.requestId;\n const trackId = this.write.trackId;\n const offset = this.trackSampleOffsets[trackId] || 0;\n this.port.postMessage({\n event: 'offset',\n requestId,\n trackId,\n offset,\n });\n if (payload.event === 'interrupt') {\n this.hasInterrupted = true;\n }\n } else {\n throw new Error(`Unhandled event \"${payload.event}\"`);\n }\n }\n };\n }\n\n writeData(float32Array, trackId = null) {\n let { buffer } = this.write;\n let offset = this.writeOffset;\n for (let i = 0; i < float32Array.length; i++) {\n buffer[offset++] = float32Array[i];\n if (offset >= buffer.length) {\n this.outputBuffers.push(this.write);\n this.write = { buffer: new Float32Array(this.bufferLength), trackId };\n buffer = this.write.buffer;\n offset = 0;\n }\n }\n this.writeOffset = offset;\n return true;\n }\n\n process(inputs, outputs, parameters) {\n const output = outputs[0];\n const outputChannelData = output[0];\n const outputBuffers = this.outputBuffers;\n if (this.hasInterrupted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else if (outputBuffers.length) {\n this.hasStarted = true;\n const { buffer, trackId } = outputBuffers.shift();\n for (let i = 0; i < outputChannelData.length; i++) {\n outputChannelData[i] = buffer[i] || 0;\n }\n if (trackId) {\n this.trackSampleOffsets[trackId] =\n this.trackSampleOffsets[trackId] || 0;\n this.trackSampleOffsets[trackId] += buffer.length;\n }\n return true;\n } else if (this.hasStarted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else {\n return true;\n }\n }\n}\n\nregisterProcessor('stream_processor', StreamProcessor);\n";
export const StreamProcessorSrc: any;
//# sourceMappingURL=stream_processor.d.ts.map
@@ -0,0 +1 @@
{"version":3,"file":"stream_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/stream_processor.js"],"names":[],"mappings":"AAAA,q4FAyFE;AAMF,qCAAsC"}
+6
View File
@@ -0,0 +1,6 @@
import { WavPacker } from './lib/wav_packer.js';
import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
import { WavStreamPlayer } from './lib/wav_stream_player.js';
import { WavRecorder } from './lib/wav_recorder.js';
export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
@@ -0,0 +1,203 @@
import {
noteFrequencies,
noteFrequencyLabels,
voiceFrequencies,
voiceFrequencyLabels,
} from './constants.js';
/**
* Output of AudioAnalysis for the frequency domain of the audio
* @typedef {Object} AudioAnalysisOutputType
* @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
* @property {number[]} frequencies Raw frequency bucket values
* @property {string[]} labels Labels for the frequency bucket values
*/
/**
* Analyzes audio for visual output
* @class
*/
export class AudioAnalysis {
/**
* Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
* returns human-readable formatting and labels
* @param {AnalyserNode} analyser
* @param {number} sampleRate
* @param {Float32Array} [fftResult]
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
static getFrequencies(
analyser,
sampleRate,
fftResult,
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
if (!fftResult) {
fftResult = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatFrequencyData(fftResult);
}
const nyquistFrequency = sampleRate / 2;
const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
let outputValues;
let frequencies;
let labels;
if (analysisType === 'music' || analysisType === 'voice') {
const useFrequencies =
analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
for (let i = 0; i < fftResult.length; i++) {
const frequency = i * frequencyStep;
const amplitude = fftResult[i];
for (let n = useFrequencies.length - 1; n >= 0; n--) {
if (frequency > useFrequencies[n]) {
aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
break;
}
}
}
outputValues = aggregateOutput;
frequencies =
analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
labels =
analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
} else {
outputValues = Array.from(fftResult);
frequencies = outputValues.map((_, i) => frequencyStep * i);
labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
}
// We normalize to {0, 1}
const normalizedOutput = outputValues.map((v) => {
return Math.max(
0,
Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
);
});
const values = new Float32Array(normalizedOutput);
return {
values,
frequencies,
labels,
};
}
/**
* Creates a new AudioAnalysis instance for an HTMLAudioElement
* @param {HTMLAudioElement} audioElement
* @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
* @returns {AudioAnalysis}
*/
constructor(audioElement, audioBuffer = null) {
this.fftResults = [];
if (audioBuffer) {
/**
* Modified from
* https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
*
* We do this to populate FFT values for the audio if provided an `audioBuffer`
* The reason to do this is that Safari fails when using `createMediaElementSource`
* This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
*/
const { length, sampleRate } = audioBuffer;
const offlineAudioContext = new OfflineAudioContext({
length,
sampleRate,
});
const source = offlineAudioContext.createBufferSource();
source.buffer = audioBuffer;
const analyser = offlineAudioContext.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
source.connect(analyser);
// limit is :: 128 / sampleRate;
// but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
const renderQuantumInSeconds = 1 / 60;
const durationInSeconds = length / sampleRate;
const analyze = (index) => {
const suspendTime = renderQuantumInSeconds * index;
if (suspendTime < durationInSeconds) {
offlineAudioContext.suspend(suspendTime).then(() => {
const fftResult = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatFrequencyData(fftResult);
this.fftResults.push(fftResult);
analyze(index + 1);
});
}
if (index === 1) {
offlineAudioContext.startRendering();
} else {
offlineAudioContext.resume();
}
};
source.start(0);
analyze(1);
this.audio = audioElement;
this.context = offlineAudioContext;
this.analyser = analyser;
this.sampleRate = sampleRate;
this.audioBuffer = audioBuffer;
} else {
const audioContext = new AudioContext();
const track = audioContext.createMediaElementSource(audioElement);
const analyser = audioContext.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
track.connect(analyser);
analyser.connect(audioContext.destination);
this.audio = audioElement;
this.context = audioContext;
this.analyser = analyser;
this.sampleRate = this.context.sampleRate;
this.audioBuffer = null;
}
}
/**
* Gets the current frequency domain data from the playing audio track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
let fftResult = null;
if (this.audioBuffer && this.fftResults.length) {
const pct = this.audio.currentTime / this.audio.duration;
const index = Math.min(
(pct * this.fftResults.length) | 0,
this.fftResults.length - 1,
);
fftResult = this.fftResults[index];
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
fftResult,
analysisType,
minDecibels,
maxDecibels,
);
}
/**
* Resume the internal AudioContext if it was suspended due to the lack of
* user interaction when the AudioAnalysis was instantiated.
* @returns {Promise<true>}
*/
async resumeIfSuspended() {
if (this.context.state === 'suspended') {
await this.context.resume();
}
return true;
}
}
globalThis.AudioAnalysis = AudioAnalysis;
@@ -0,0 +1,60 @@
/**
* Constants for help with visualization
* Helps map frequency ranges from Fast Fourier Transform
* to human-interpretable ranges, notably music ranges and
* human vocal ranges.
*/
// Eighth octave frequencies
const octave8Frequencies = [
4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
6644.88, 7040.0, 7458.62, 7902.13,
];
// Labels for each of the above frequencies
const octave8FrequencyLabels = [
'C',
'C#',
'D',
'D#',
'E',
'F',
'F#',
'G',
'G#',
'A',
'A#',
'B',
];
/**
* All note frequencies from 1st to 8th octave
* in format "A#8" (A#, 8th octave)
*/
export const noteFrequencies = [];
export const noteFrequencyLabels = [];
for (let i = 1; i <= 8; i++) {
for (let f = 0; f < octave8Frequencies.length; f++) {
const freq = octave8Frequencies[f];
noteFrequencies.push(freq / Math.pow(2, 8 - i));
noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
}
}
/**
* Subset of the note frequencies between 32 and 2000 Hz
* 6 octave range: C1 to B6
*/
const voiceFrequencyRange = [32.0, 2000.0];
export const voiceFrequencies = noteFrequencies.filter((_, i) => {
return (
noteFrequencies[i] > voiceFrequencyRange[0] &&
noteFrequencies[i] < voiceFrequencyRange[1]
);
});
export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
return (
noteFrequencies[i] > voiceFrequencyRange[0] &&
noteFrequencies[i] < voiceFrequencyRange[1]
);
});
+113
View File
@@ -0,0 +1,113 @@
/**
* Raw wav audio file contents
* @typedef {Object} WavPackerAudioType
* @property {Blob} blob
* @property {string} url
* @property {number} channelCount
* @property {number} sampleRate
* @property {number} duration
*/
/**
* Utility class for assembling PCM16 "audio/wav" data
* @class
*/
export class WavPacker {
/**
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
* @param {Float32Array} float32Array
* @returns {ArrayBuffer}
*/
static floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
/**
* Concatenates two ArrayBuffers
* @param {ArrayBuffer} leftBuffer
* @param {ArrayBuffer} rightBuffer
* @returns {ArrayBuffer}
*/
static mergeBuffers(leftBuffer, rightBuffer) {
const tmpArray = new Uint8Array(
leftBuffer.byteLength + rightBuffer.byteLength
);
tmpArray.set(new Uint8Array(leftBuffer), 0);
tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
return tmpArray.buffer;
}
/**
* Packs data into an Int16 format
* @private
* @param {number} size 0 = 1x Int16, 1 = 2x Int16
* @param {number} arg value to pack
* @returns
*/
_packData(size, arg) {
return [
new Uint8Array([arg, arg >> 8]),
new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
][size];
}
/**
* Packs audio into "audio/wav" Blob
* @param {number} sampleRate
* @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
* @returns {WavPackerAudioType}
*/
pack(sampleRate, audio) {
if (!audio?.bitsPerSample) {
throw new Error(`Missing "bitsPerSample"`);
} else if (!audio?.channels) {
throw new Error(`Missing "channels"`);
} else if (!audio?.data) {
throw new Error(`Missing "data"`);
}
const { bitsPerSample, channels, data } = audio;
const output = [
// Header
'RIFF',
this._packData(
1,
4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
), // Length
'WAVE',
// chunk 1
'fmt ', // Sub-chunk identifier
this._packData(1, 16), // Chunk length
this._packData(0, 1), // Audio format (1 is linear quantization)
this._packData(0, channels.length),
this._packData(1, sampleRate),
this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
this._packData(0, (channels.length * bitsPerSample) / 8),
this._packData(0, bitsPerSample),
// chunk 2
'data', // Sub-chunk identifier
this._packData(
1,
(channels[0].length * channels.length * bitsPerSample) / 8
), // Chunk length
data,
];
const blob = new Blob(output, { type: 'audio/mpeg' });
const url = URL.createObjectURL(blob);
return {
blob,
url,
channelCount: channels.length,
sampleRate,
duration: data.byteLength / (channels.length * sampleRate * 2),
};
}
}
globalThis.WavPacker = WavPacker;
+548
View File
@@ -0,0 +1,548 @@
import { AudioProcessorSrc } from './worklets/audio_processor.js';
import { AudioAnalysis } from './analysis/audio_analysis.js';
import { WavPacker } from './wav_packer.js';
/**
* Decodes audio into a wav file
* @typedef {Object} DecodedAudioType
* @property {Blob} blob
* @property {string} url
* @property {Float32Array} values
* @property {AudioBuffer} audioBuffer
*/
/**
* Records live stream of user audio as PCM16 "audio/wav" data
* @class
*/
export class WavRecorder {
/**
* Create a new WavRecorder instance
* @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
* @returns {WavRecorder}
*/
constructor({
sampleRate = 44100,
outputToSpeakers = false,
debug = false,
} = {}) {
// Script source
this.scriptSrc = AudioProcessorSrc;
// Config
this.sampleRate = sampleRate;
this.outputToSpeakers = outputToSpeakers;
this.debug = !!debug;
this._deviceChangeCallback = null;
this._devices = [];
// State variables
this.stream = null;
this.processor = null;
this.source = null;
this.node = null;
this.recording = false;
// Event handling with AudioWorklet
this._lastEventId = 0;
this.eventReceipts = {};
this.eventTimeout = 5000;
// Process chunks of audio
this._chunkProcessor = () => {};
this._chunkProcessorSize = void 0;
this._chunkProcessorBuffer = {
raw: new ArrayBuffer(0),
mono: new ArrayBuffer(0),
};
}
/**
* Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
* @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
* @param {number} sampleRate
* @param {number} fromSampleRate
* @returns {Promise<DecodedAudioType>}
*/
static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) {
const context = new AudioContext({ sampleRate });
let arrayBuffer;
let blob;
if (audioData instanceof Blob) {
if (fromSampleRate !== -1) {
throw new Error(
`Can not specify "fromSampleRate" when reading from Blob`,
);
}
blob = audioData;
arrayBuffer = await blob.arrayBuffer();
} else if (audioData instanceof ArrayBuffer) {
if (fromSampleRate !== -1) {
throw new Error(
`Can not specify "fromSampleRate" when reading from ArrayBuffer`,
);
}
arrayBuffer = audioData;
blob = new Blob([arrayBuffer], { type: 'audio/wav' });
} else {
let float32Array;
let data;
if (audioData instanceof Int16Array) {
data = audioData;
float32Array = new Float32Array(audioData.length);
for (let i = 0; i < audioData.length; i++) {
float32Array[i] = audioData[i] / 0x8000;
}
} else if (audioData instanceof Float32Array) {
float32Array = audioData;
} else if (audioData instanceof Array) {
float32Array = new Float32Array(audioData);
} else {
throw new Error(
`"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
);
}
if (fromSampleRate === -1) {
throw new Error(
`Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
);
} else if (fromSampleRate < 3000) {
throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
}
if (!data) {
data = WavPacker.floatTo16BitPCM(float32Array);
}
const audio = {
bitsPerSample: 16,
channels: [float32Array],
data,
};
const packer = new WavPacker();
const result = packer.pack(fromSampleRate, audio);
blob = result.blob;
arrayBuffer = await blob.arrayBuffer();
}
const audioBuffer = await context.decodeAudioData(arrayBuffer);
const values = audioBuffer.getChannelData(0);
const url = URL.createObjectURL(blob);
return {
blob,
url,
values,
audioBuffer,
};
}
/**
* Logs data in debug mode
* @param {...any} arguments
* @returns {true}
*/
log() {
if (this.debug) {
this.log(...arguments);
}
return true;
}
/**
* Retrieves the current sampleRate for the recorder
* @returns {number}
*/
getSampleRate() {
return this.sampleRate;
}
/**
* Retrieves the current status of the recording
* @returns {"ended"|"paused"|"recording"}
*/
getStatus() {
if (!this.processor) {
return 'ended';
} else if (!this.recording) {
return 'paused';
} else {
return 'recording';
}
}
/**
* Sends an event to the AudioWorklet
* @private
* @param {string} name
* @param {{[key: string]: any}} data
* @param {AudioWorkletNode} [_processor]
* @returns {Promise<{[key: string]: any}>}
*/
async _event(name, data = {}, _processor = null) {
_processor = _processor || this.processor;
if (!_processor) {
throw new Error('Can not send events without recording first');
}
const message = {
event: name,
id: this._lastEventId++,
data,
};
_processor.port.postMessage(message);
const t0 = new Date().valueOf();
while (!this.eventReceipts[message.id]) {
if (new Date().valueOf() - t0 > this.eventTimeout) {
throw new Error(`Timeout waiting for "${name}" event`);
}
await new Promise((res) => setTimeout(() => res(true), 1));
}
const payload = this.eventReceipts[message.id];
delete this.eventReceipts[message.id];
return payload;
}
/**
* Sets device change callback, remove if callback provided is `null`
* @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
* @returns {true}
*/
listenForDeviceChange(callback) {
if (callback === null && this._deviceChangeCallback) {
navigator.mediaDevices.removeEventListener(
'devicechange',
this._deviceChangeCallback,
);
this._deviceChangeCallback = null;
} else if (callback !== null) {
// Basically a debounce; we only want this called once when devices change
// And we only want the most recent callback() to be executed
// if a few are operating at the same time
let lastId = 0;
let lastDevices = [];
const serializeDevices = (devices) =>
devices
.map((d) => d.deviceId)
.sort()
.join(',');
const cb = async () => {
let id = ++lastId;
const devices = await this.listDevices();
if (id === lastId) {
if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
lastDevices = devices;
callback(devices.slice());
}
}
};
navigator.mediaDevices.addEventListener('devicechange', cb);
cb();
this._deviceChangeCallback = cb;
}
return true;
}
/**
* Manually request permission to use the microphone
* @returns {Promise<true>}
*/
async requestPermission() {
const permissionStatus = await navigator.permissions.query({
name: 'microphone',
});
if (permissionStatus.state === 'denied') {
window.alert('You must grant microphone access to use this feature.');
} else if (permissionStatus.state === 'prompt') {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
const tracks = stream.getTracks();
tracks.forEach((track) => track.stop());
} catch (e) {
window.alert('You must grant microphone access to use this feature.');
}
}
return true;
}
/**
* List all eligible devices for recording, will request permission to use microphone
* @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
*/
async listDevices() {
if (
!navigator.mediaDevices ||
!('enumerateDevices' in navigator.mediaDevices)
) {
throw new Error('Could not request user devices');
}
await this.requestPermission();
const devices = await navigator.mediaDevices.enumerateDevices();
const audioDevices = devices.filter(
(device) => device.kind === 'audioinput',
);
const defaultDeviceIndex = audioDevices.findIndex(
(device) => device.deviceId === 'default',
);
const deviceList = [];
if (defaultDeviceIndex !== -1) {
let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
let existingIndex = audioDevices.findIndex(
(device) => device.groupId === defaultDevice.groupId,
);
if (existingIndex !== -1) {
defaultDevice = audioDevices.splice(existingIndex, 1)[0];
}
defaultDevice.default = true;
deviceList.push(defaultDevice);
}
return deviceList.concat(audioDevices);
}
/**
* Begins a recording session and requests microphone permissions if not already granted
* Microphone recording indicator will appear on browser tab but status will be "paused"
* @param {string} [deviceId] if no device provided, default device will be used
* @returns {Promise<true>}
*/
async begin(deviceId) {
if (this.processor) {
throw new Error(
`Already connected: please call .end() to start a new session`,
);
}
if (
!navigator.mediaDevices ||
!('getUserMedia' in navigator.mediaDevices)
) {
throw new Error('Could not request user media');
}
try {
const config = { audio: true };
if (deviceId) {
config.audio = { deviceId: { exact: deviceId } };
}
this.stream = await navigator.mediaDevices.getUserMedia(config);
} catch (err) {
throw new Error('Could not start media stream');
}
const context = new AudioContext({ sampleRate: this.sampleRate });
const source = context.createMediaStreamSource(this.stream);
// Load and execute the module script.
try {
await context.audioWorklet.addModule(this.scriptSrc);
} catch (e) {
console.error(e);
throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
}
const processor = new AudioWorkletNode(context, 'audio_processor');
processor.port.onmessage = (e) => {
const { event, id, data } = e.data;
if (event === 'receipt') {
this.eventReceipts[id] = data;
} else if (event === 'chunk') {
if (this._chunkProcessorSize) {
const buffer = this._chunkProcessorBuffer;
this._chunkProcessorBuffer = {
raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
};
if (
this._chunkProcessorBuffer.mono.byteLength >=
this._chunkProcessorSize
) {
this._chunkProcessor(this._chunkProcessorBuffer);
this._chunkProcessorBuffer = {
raw: new ArrayBuffer(0),
mono: new ArrayBuffer(0),
};
}
} else {
this._chunkProcessor(data);
}
}
};
const node = source.connect(processor);
const analyser = context.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
node.connect(analyser);
if (this.outputToSpeakers) {
// eslint-disable-next-line no-console
console.warn(
'Warning: Output to speakers may affect sound quality,\n' +
'especially due to system audio feedback preventative measures.\n' +
'use only for debugging',
);
analyser.connect(context.destination);
}
this.source = source;
this.node = node;
this.analyser = analyser;
this.processor = processor;
return true;
}
/**
* Gets the current frequency domain data from the recording track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
null,
analysisType,
minDecibels,
maxDecibels,
);
}
/**
* Pauses the recording
* Keeps microphone stream open but halts storage of audio
* @returns {Promise<true>}
*/
async pause() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
} else if (!this.recording) {
throw new Error('Already paused: please call .record() first');
}
if (this._chunkProcessorBuffer.raw.byteLength) {
this._chunkProcessor(this._chunkProcessorBuffer);
}
this.log('Pausing ...');
await this._event('stop');
this.recording = false;
return true;
}
/**
* Start recording stream and storing to memory from the connected audio source
* @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
* @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
* @returns {Promise<true>}
*/
async record(chunkProcessor = () => {}, chunkSize = 8192) {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
} else if (this.recording) {
throw new Error('Already recording: please call .pause() first');
} else if (typeof chunkProcessor !== 'function') {
throw new Error(`chunkProcessor must be a function`);
}
this._chunkProcessor = chunkProcessor;
this._chunkProcessorSize = chunkSize;
this._chunkProcessorBuffer = {
raw: new ArrayBuffer(0),
mono: new ArrayBuffer(0),
};
this.log('Recording ...');
await this._event('start');
this.recording = true;
return true;
}
/**
* Clears the audio buffer, empties stored recording
* @returns {Promise<true>}
*/
async clear() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
await this._event('clear');
return true;
}
/**
* Reads the current audio stream data
* @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
*/
async read() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
this.log('Reading ...');
const result = await this._event('read');
return result;
}
/**
* Saves the current audio stream to a file
* @param {boolean} [force] Force saving while still recording
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
async save(force = false) {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
if (!force && this.recording) {
throw new Error(
'Currently recording: please call .pause() first, or call .save(true) to force',
);
}
this.log('Exporting ...');
const exportData = await this._event('export');
const packer = new WavPacker();
const result = packer.pack(this.sampleRate, exportData.audio);
return result;
}
/**
* Ends the current recording session and saves the result
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
async end() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
const _processor = this.processor;
this.log('Stopping ...');
await this._event('stop');
this.recording = false;
const tracks = this.stream.getTracks();
tracks.forEach((track) => track.stop());
this.log('Exporting ...');
const exportData = await this._event('export', {}, _processor);
this.processor.disconnect();
this.source.disconnect();
this.node.disconnect();
this.analyser.disconnect();
this.stream = null;
this.processor = null;
this.source = null;
this.node = null;
const packer = new WavPacker();
const result = packer.pack(this.sampleRate, exportData.audio);
return result;
}
/**
* Performs a full cleanup of WavRecorder instance
* Stops actively listening via microphone and removes existing listeners
* @returns {Promise<true>}
*/
async quit() {
this.listenForDeviceChange(null);
if (this.processor) {
await this.end();
}
return true;
}
}
globalThis.WavRecorder = WavRecorder;
+160
View File
@@ -0,0 +1,160 @@
import { StreamProcessorSrc } from './worklets/stream_processor.js';
import { AudioAnalysis } from './analysis/audio_analysis.js';
/**
* Plays audio streams received in raw PCM16 chunks from the browser
* @class
*/
export class WavStreamPlayer {
/**
* Creates a new WavStreamPlayer instance
* @param {{sampleRate?: number}} options
* @returns {WavStreamPlayer}
*/
constructor({ sampleRate = 44100 } = {}) {
this.scriptSrc = StreamProcessorSrc;
this.sampleRate = sampleRate;
this.context = null;
this.stream = null;
this.analyser = null;
this.trackSampleOffsets = {};
this.interruptedTrackIds = {};
}
/**
* Connects the audio context and enables output to speakers
* @returns {Promise<true>}
*/
async connect() {
this.context = new AudioContext({ sampleRate: this.sampleRate });
if (this.context.state === 'suspended') {
await this.context.resume();
}
try {
await this.context.audioWorklet.addModule(this.scriptSrc);
} catch (e) {
console.error(e);
throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
}
const analyser = this.context.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
this.analyser = analyser;
return true;
}
/**
* Gets the current frequency domain data from the playing track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30
) {
if (!this.analyser) {
throw new Error('Not connected, please call .connect() first');
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
null,
analysisType,
minDecibels,
maxDecibels
);
}
/**
* Starts audio streaming
* @private
* @returns {Promise<true>}
*/
_start() {
const streamNode = new AudioWorkletNode(this.context, 'stream_processor');
streamNode.connect(this.context.destination);
streamNode.port.onmessage = (e) => {
const { event } = e.data;
if (event === 'stop') {
streamNode.disconnect();
this.stream = null;
} else if (event === 'offset') {
const { requestId, trackId, offset } = e.data;
const currentTime = offset / this.sampleRate;
this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
}
};
this.analyser.disconnect();
streamNode.connect(this.analyser);
this.stream = streamNode;
return true;
}
/**
* Adds 16BitPCM data to the currently playing audio stream
* You can add chunks beyond the current play point and they will be queued for play
* @param {ArrayBuffer|Int16Array} arrayBuffer
* @param {string} [trackId]
* @returns {Int16Array}
*/
add16BitPCM(arrayBuffer, trackId = 'default') {
if (typeof trackId !== 'string') {
throw new Error(`trackId must be a string`);
} else if (this.interruptedTrackIds[trackId]) {
return;
}
if (!this.stream) {
this._start();
}
let buffer;
if (arrayBuffer instanceof Int16Array) {
buffer = arrayBuffer;
} else if (arrayBuffer instanceof ArrayBuffer) {
buffer = new Int16Array(arrayBuffer);
} else {
throw new Error(`argument must be Int16Array or ArrayBuffer`);
}
this.stream.port.postMessage({ event: 'write', buffer, trackId });
return buffer;
}
/**
* Gets the offset (sample count) of the currently playing stream
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
async getTrackSampleOffset(interrupt = false) {
if (!this.stream) {
return null;
}
const requestId = crypto.randomUUID();
this.stream.port.postMessage({
event: interrupt ? 'interrupt' : 'offset',
requestId,
});
let trackSampleOffset;
while (!trackSampleOffset) {
trackSampleOffset = this.trackSampleOffsets[requestId];
await new Promise((r) => setTimeout(() => r(), 1));
}
const { trackId } = trackSampleOffset;
if (interrupt && trackId) {
this.interruptedTrackIds[trackId] = true;
}
return trackSampleOffset;
}
/**
* Strips the current stream and returns the sample offset of the audio
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
async interrupt() {
return this.getTrackSampleOffset(true);
}
}
globalThis.WavStreamPlayer = WavStreamPlayer;
@@ -0,0 +1,214 @@
const AudioProcessorWorklet = `
class AudioProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.port.onmessage = this.receive.bind(this);
this.initialize();
}
initialize() {
this.foundAudio = false;
this.recording = false;
this.chunks = [];
}
/**
* Concatenates sampled chunks into channels
* Format is chunk[Left[], Right[]]
*/
readChannelData(chunks, channel = -1, maxChannels = 9) {
let channelLimit;
if (channel !== -1) {
if (chunks[0] && chunks[0].length - 1 < channel) {
throw new Error(
\`Channel \${channel} out of range: max \${chunks[0].length}\`
);
}
channelLimit = channel + 1;
} else {
channel = 0;
channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
}
const channels = [];
for (let n = channel; n < channelLimit; n++) {
const length = chunks.reduce((sum, chunk) => {
return sum + chunk[n].length;
}, 0);
const buffers = chunks.map((chunk) => chunk[n]);
const result = new Float32Array(length);
let offset = 0;
for (let i = 0; i < buffers.length; i++) {
result.set(buffers[i], offset);
offset += buffers[i].length;
}
channels[n] = result;
}
return channels;
}
/**
* Combines parallel audio data into correct format,
* channels[Left[], Right[]] to float32Array[LRLRLRLR...]
*/
formatAudioData(channels) {
if (channels.length === 1) {
// Simple case is only one channel
const float32Array = channels[0].slice();
const meanValues = channels[0].slice();
return { float32Array, meanValues };
} else {
const float32Array = new Float32Array(
channels[0].length * channels.length
);
const meanValues = new Float32Array(channels[0].length);
for (let i = 0; i < channels[0].length; i++) {
const offset = i * channels.length;
let meanValue = 0;
for (let n = 0; n < channels.length; n++) {
float32Array[offset + n] = channels[n][i];
meanValue += channels[n][i];
}
meanValues[i] = meanValue / channels.length;
}
return { float32Array, meanValues };
}
}
/**
* Converts 32-bit float data to 16-bit integers
*/
floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
/**
* Retrieves the most recent amplitude values from the audio stream
* @param {number} channel
*/
getValues(channel = -1) {
const channels = this.readChannelData(this.chunks, channel);
const { meanValues } = this.formatAudioData(channels);
return { meanValues, channels };
}
/**
* Exports chunks as an audio/wav file
*/
export() {
const channels = this.readChannelData(this.chunks);
const { float32Array, meanValues } = this.formatAudioData(channels);
const audioData = this.floatTo16BitPCM(float32Array);
return {
meanValues: meanValues,
audio: {
bitsPerSample: 16,
channels: channels,
data: audioData,
},
};
}
receive(e) {
const { event, id } = e.data;
let receiptData = {};
switch (event) {
case 'start':
this.recording = true;
break;
case 'stop':
this.recording = false;
break;
case 'clear':
this.initialize();
break;
case 'export':
receiptData = this.export();
break;
case 'read':
receiptData = this.getValues();
break;
default:
break;
}
// Always send back receipt
this.port.postMessage({ event: 'receipt', id, data: receiptData });
}
sendChunk(chunk) {
const channels = this.readChannelData([chunk]);
const { float32Array, meanValues } = this.formatAudioData(channels);
const rawAudioData = this.floatTo16BitPCM(float32Array);
const monoAudioData = this.floatTo16BitPCM(meanValues);
this.port.postMessage({
event: 'chunk',
data: {
mono: monoAudioData,
raw: rawAudioData,
},
});
}
process(inputList, outputList, parameters) {
// Copy input to output (e.g. speakers)
// Note that this creates choppy sounds with Mac products
const sourceLimit = Math.min(inputList.length, outputList.length);
for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
const input = inputList[inputNum];
const output = outputList[inputNum];
const channelCount = Math.min(input.length, output.length);
for (let channelNum = 0; channelNum < channelCount; channelNum++) {
input[channelNum].forEach((sample, i) => {
output[channelNum][i] = sample;
});
}
}
const inputs = inputList[0];
// There's latency at the beginning of a stream before recording starts
// Make sure we actually receive audio data before we start storing chunks
let sliceIndex = 0;
if (!this.foundAudio) {
for (const channel of inputs) {
sliceIndex = 0; // reset for each channel
if (this.foundAudio) {
break;
}
if (channel) {
for (const value of channel) {
if (value !== 0) {
// find only one non-zero entry in any channel
this.foundAudio = true;
break;
} else {
sliceIndex++;
}
}
}
}
}
if (inputs && inputs[0] && this.foundAudio && this.recording) {
// We need to copy the TypedArray, because the \`process\`
// internals will reuse the same buffer to hold each input
const chunk = inputs.map((input) => input.slice(sliceIndex));
this.chunks.push(chunk);
this.sendChunk(chunk);
}
return true;
}
}
registerProcessor('audio_processor', AudioProcessor);
`;
const script = new Blob([AudioProcessorWorklet], {
type: 'application/javascript',
});
const src = URL.createObjectURL(script);
export const AudioProcessorSrc = src;
@@ -0,0 +1,96 @@
export const StreamProcessorWorklet = `
class StreamProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.hasStarted = false;
this.hasInterrupted = false;
this.outputBuffers = [];
this.bufferLength = 128;
this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
this.writeOffset = 0;
this.trackSampleOffsets = {};
this.port.onmessage = (event) => {
if (event.data) {
const payload = event.data;
if (payload.event === 'write') {
const int16Array = payload.buffer;
const float32Array = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) {
float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
}
this.writeData(float32Array, payload.trackId);
} else if (
payload.event === 'offset' ||
payload.event === 'interrupt'
) {
const requestId = payload.requestId;
const trackId = this.write.trackId;
const offset = this.trackSampleOffsets[trackId] || 0;
this.port.postMessage({
event: 'offset',
requestId,
trackId,
offset,
});
if (payload.event === 'interrupt') {
this.hasInterrupted = true;
}
} else {
throw new Error(\`Unhandled event "\${payload.event}"\`);
}
}
};
}
writeData(float32Array, trackId = null) {
let { buffer } = this.write;
let offset = this.writeOffset;
for (let i = 0; i < float32Array.length; i++) {
buffer[offset++] = float32Array[i];
if (offset >= buffer.length) {
this.outputBuffers.push(this.write);
this.write = { buffer: new Float32Array(this.bufferLength), trackId };
buffer = this.write.buffer;
offset = 0;
}
}
this.writeOffset = offset;
return true;
}
process(inputs, outputs, parameters) {
const output = outputs[0];
const outputChannelData = output[0];
const outputBuffers = this.outputBuffers;
if (this.hasInterrupted) {
this.port.postMessage({ event: 'stop' });
return false;
} else if (outputBuffers.length) {
this.hasStarted = true;
const { buffer, trackId } = outputBuffers.shift();
for (let i = 0; i < outputChannelData.length; i++) {
outputChannelData[i] = buffer[i] || 0;
}
if (trackId) {
this.trackSampleOffsets[trackId] =
this.trackSampleOffsets[trackId] || 0;
this.trackSampleOffsets[trackId] += buffer.length;
}
return true;
} else if (this.hasStarted) {
this.port.postMessage({ event: 'stop' });
return false;
} else {
return true;
}
}
}
registerProcessor('stream_processor', StreamProcessor);
`;
const script = new Blob([StreamProcessorWorklet], {
type: 'application/javascript',
});
const src = URL.createObjectURL(script);
export const StreamProcessorSrc = src;
+1
View File
@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>

After

Width:  |  Height:  |  Size: 2.6 KiB

+277
View File
@@ -0,0 +1,277 @@
[data-component='ConsolePage'] {
font-family: 'Roboto Mono', monospace;
font-weight: 400;
font-style: normal;
font-size: 12px;
height: 100%;
display: flex;
flex-direction: column;
overflow: hidden;
margin: 0px 8px;
padding-bottom: 8px;
& > div {
flex-shrink: 0;
}
.spacer {
flex-grow: 1;
}
.content-top {
display: flex;
align-items: center;
padding: 8px 16px;
min-height: 40px;
.content-title {
flex-grow: 1;
display: flex;
align-items: center;
gap: 12px;
img {
width: 24px;
height: 24px;
}
}
}
.content-main {
flex-grow: 1;
flex-shrink: 1 !important;
margin: 0px 16px;
display: flex;
overflow: hidden;
margin-bottom: 24px;
.content-block {
position: relative;
display: flex;
flex-direction: column;
max-height: 100%;
width: 100%;
.content-block-title {
flex-shrink: 0;
padding-top: 16px;
padding-bottom: 4px;
position: relative;
}
.content-block-body {
color: #6e6e7f;
position: relative;
flex-grow: 1;
padding: 8px 0px;
padding-top: 4px;
line-height: 1.2em;
overflow: auto;
position: relative;
&.full {
padding: 0px;
}
}
}
.content-right {
width: 300px;
flex-shrink: 0;
display: flex;
flex-direction: column;
margin-left: 24px;
gap: 24px;
& > div {
border-radius: 16px;
flex-grow: 1;
flex-shrink: 0;
overflow: hidden;
position: relative;
.content-block-title {
position: absolute;
display: flex;
align-items: center;
top: 16px;
left: 16px;
padding: 4px 16px;
background-color: #fff;
border-radius: 1000px;
min-height: 32px;
z-index: 9999;
white-space: pre;
text-align: right;
&.bottom {
top: auto;
bottom: 16px;
}
}
}
& > div.kv {
height: 250px;
max-height: 250px;
white-space: pre;
background-color: #ececf1;
.content-block-body {
padding: 16px;
margin-top: 56px;
}
}
}
.content-logs {
flex-grow: 1;
display: flex;
flex-direction: column;
overflow: hidden;
& > div {
flex-grow: 1;
}
& > .content-actions {
flex-grow: 0;
flex-shrink: 0;
display: flex;
align-items: center;
justify-content: center;
gap: 16px;
}
& > div.events {
overflow: hidden;
}
.events {
border-top: 1px solid #e7e7e7;
}
.conversation {
display: flex;
flex-shrink: 0;
width: 100%;
overflow: hidden;
height: 200px;
min-height: 0;
max-height: 200px;
border-top: 1px solid #e7e7e7;
}
}
}
.conversation-item {
position: relative;
display: flex;
gap: 16px;
margin-bottom: 16px;
&:not(:hover) .close {
display: none;
}
.close {
position: absolute;
top: 0px;
right: -20px;
background: #aaa;
color: #fff;
display: flex;
border-radius: 16px;
padding: 2px;
cursor: pointer;
&:hover {
background: #696969;
}
svg {
stroke-width: 3;
width: 12px;
height: 12px;
}
}
.speaker {
position: relative;
text-align: left;
gap: 16px;
width: 80px;
flex-shrink: 0;
margin-right: 16px;
&.user {
color: #0099ff;
}
&.assistant {
color: #009900;
}
}
.speaker-content {
color: #18181b;
}
}
.event {
border-radius: 3px;
white-space: pre;
display: flex;
padding: 0px;
gap: 16px;
.event-timestamp {
text-align: left;
gap: 8px;
padding: 4px 0px;
width: 80px;
flex-shrink: 0;
margin-right: 16px;
}
.event-details {
display: flex;
flex-direction: column;
color: #18181b;
gap: 8px;
.event-summary {
padding: 4px 8px;
margin: 0px -8px;
&:hover {
border-radius: 8px;
background-color: #f0f0f0;
}
cursor: pointer;
display: flex;
gap: 8px;
align-items: center;
.event-source {
flex-shrink: 0;
display: flex;
align-items: center;
gap: 8px;
&.client {
color: #0099ff;
}
&.server {
color: #009900;
}
&.error {
color: #990000;
}
svg {
stroke-width: 3;
width: 12px;
height: 12px;
}
}
}
}
}
.visualization {
position: absolute;
display: flex;
bottom: 4px;
right: 8px;
padding: 4px;
border-radius: 16px;
z-index: 10;
gap: 2px;
.visualization-entry {
position: relative;
display: flex;
align-items: center;
height: 40px;
width: 100px;
gap: 4px;
&.client {
color: #0099ff;
}
&.server {
color: #009900;
}
canvas {
width: 100%;
height: 100%;
color: currentColor;
}
}
}
}
+698
View File
@@ -0,0 +1,698 @@
/**
* Change this if you want to connect to a local relay server!
* This will require you to set OPENAI_API_KEY= in a `.env` file
* You can run it with `npm run relay`, in parallel with `npm start`
*
* Simply switch the lines by commenting one and removing the other
*/
// const USE_LOCAL_RELAY_SERVER_URL: string | undefined = 'http://localhost:8081';
const USE_LOCAL_RELAY_SERVER_URL: string | undefined = void 0;
import { useEffect, useRef, useCallback, useState } from 'react';
import { RealtimeClient } from '../lib/realtime-api-beta/index.js';
import { ItemType } from '../lib/realtime-api-beta/dist/lib/client.js';
import { WavRecorder, WavStreamPlayer } from '../lib/wavtools/index.js';
import { instructions } from '../utils/conversation_config.js';
import { WavRenderer } from '../utils/wav_renderer';
import { X, Edit, Zap, ArrowUp, ArrowDown } from 'react-feather';
import { Button } from '../components/button/Button';
import { Toggle } from '../components/toggle/Toggle';
import { Map } from '../components/Map';
import './ConsolePage.scss';
/**
* Type for result from get_weather() function call
*/
interface Coordinates {
lat: number;
lng: number;
location?: string;
}
/**
* Type for all event logs
*/
interface RealtimeEvent {
time: string;
source: 'client' | 'server';
count?: number;
event: { [key: string]: any };
}
export function ConsolePage() {
/**
* Ask user for API Key
* If we're using the local relay server, we don't need this
*/
const apiKey = USE_LOCAL_RELAY_SERVER_URL
? ''
: localStorage.getItem('tmp::voice_api_key') ||
prompt('OpenAI API Key') ||
'';
if (apiKey !== '') {
localStorage.setItem('tmp::voice_api_key', apiKey);
}
/**
* Instantiate:
* - WavRecorder (speech input)
* - WavStreamPlayer (speech output)
* - RealtimeClient (API client)
*/
const wavRecorderRef = useRef<WavRecorder>(
new WavRecorder({ sampleRate: 24000 })
);
const wavStreamPlayerRef = useRef<WavStreamPlayer>(
new WavStreamPlayer({ sampleRate: 24000 })
);
const clientRef = useRef<RealtimeClient>(
new RealtimeClient(
USE_LOCAL_RELAY_SERVER_URL
? { url: USE_LOCAL_RELAY_SERVER_URL }
: {
apiKey: apiKey,
dangerouslyAllowAPIKeyInBrowser: true,
}
)
);
/**
* References for
* - Rendering audio visualization (canvas)
* - Autoscrolling event logs
* - Timing delta for event log displays
*/
const clientCanvasRef = useRef<HTMLCanvasElement>(null);
const serverCanvasRef = useRef<HTMLCanvasElement>(null);
const eventsScrollHeightRef = useRef(0);
const eventsScrollRef = useRef<HTMLDivElement>(null);
const startTimeRef = useRef<string>(new Date().toISOString());
/**
* All of our variables for displaying application state
* - items are all conversation items (dialog)
* - realtimeEvents are event logs, which can be expanded
* - memoryKv is for set_memory() function
* - coords, marker are for get_weather() function
*/
const [items, setItems] = useState<ItemType[]>([]);
const [realtimeEvents, setRealtimeEvents] = useState<RealtimeEvent[]>([]);
const [expandedEvents, setExpandedEvents] = useState<{
[key: string]: boolean;
}>({});
const [isConnected, setIsConnected] = useState(false);
const [canPushToTalk, setCanPushToTalk] = useState(true);
const [isRecording, setIsRecording] = useState(false);
const [memoryKv, setMemoryKv] = useState<{ [key: string]: any }>({});
const [coords, setCoords] = useState<Coordinates | null>({
lat: 37.775593,
lng: -122.418137,
});
const [marker, setMarker] = useState<Coordinates | null>(null);
/**
* Utility for formatting the timing of logs
*/
const formatTime = useCallback((timestamp: string) => {
const startTime = startTimeRef.current;
const t0 = new Date(startTime).valueOf();
const t1 = new Date(timestamp).valueOf();
const delta = t1 - t0;
const hs = Math.floor(delta / 10) % 100;
const s = Math.floor(delta / 1000) % 60;
const m = Math.floor(delta / 60_000) % 60;
const pad = (n: number) => {
let s = n + '';
while (s.length < 2) {
s = '0' + s;
}
return s;
};
return `${pad(m)}:${pad(s)}.${pad(hs)}`;
}, []);
/**
* When you click the API key
*/
const resetAPIKey = useCallback(() => {
const apiKey = prompt('OpenAI API Key');
if (apiKey !== null) {
localStorage.clear();
localStorage.setItem('tmp::voice_api_key', apiKey);
window.location.reload();
}
}, []);
/**
* Connect to conversation:
* WavRecorder taks speech input, WavStreamPlayer output, client is API client
*/
const connectConversation = useCallback(async () => {
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
const wavStreamPlayer = wavStreamPlayerRef.current;
// Set state variables
startTimeRef.current = new Date().toISOString();
setIsConnected(true);
setRealtimeEvents([]);
setItems(client.conversation.getItems());
// Connect to microphone
await wavRecorder.begin();
// Connect to audio output
await wavStreamPlayer.connect();
// Connect to realtime API
await client.connect();
client.sendUserMessageContent([
{
type: `input_text`,
text: `Hello!`,
// text: `For testing purposes, I want you to list ten car brands. Number each item, e.g. "one (or whatever number you are one): the item name".`
},
]);
if (client.getTurnDetectionType() === 'server_vad') {
await wavRecorder.record((data) => client.appendInputAudio(data.mono));
}
}, []);
/**
* Disconnect and reset conversation state
*/
const disconnectConversation = useCallback(async () => {
setIsConnected(false);
setRealtimeEvents([]);
setItems([]);
setMemoryKv({});
setCoords({
lat: 37.775593,
lng: -122.418137,
});
setMarker(null);
const client = clientRef.current;
client.disconnect();
const wavRecorder = wavRecorderRef.current;
await wavRecorder.end();
const wavStreamPlayer = wavStreamPlayerRef.current;
await wavStreamPlayer.interrupt();
}, []);
const deleteConversationItem = useCallback(async (id: string) => {
const client = clientRef.current;
client.deleteItem(id);
}, []);
/**
* In push-to-talk mode, start recording
* .appendInputAudio() for each sample
*/
const startRecording = async () => {
setIsRecording(true);
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
const wavStreamPlayer = wavStreamPlayerRef.current;
const trackSampleOffset = await wavStreamPlayer.interrupt();
if (trackSampleOffset?.trackId) {
const { trackId, offset } = trackSampleOffset;
await client.cancelResponse(trackId, offset);
}
await wavRecorder.record((data) => client.appendInputAudio(data.mono));
};
/**
* In push-to-talk mode, stop recording
*/
const stopRecording = async () => {
setIsRecording(false);
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
await wavRecorder.pause();
client.createResponse();
};
/**
* Switch between Manual <> VAD mode for communication
*/
const changeTurnEndType = async (value: string) => {
const client = clientRef.current;
const wavRecorder = wavRecorderRef.current;
if (value === 'none' && wavRecorder.getStatus() === 'recording') {
await wavRecorder.pause();
}
client.updateSession({
turn_detection: value === 'none' ? null : { type: 'server_vad' },
});
if (value === 'server_vad' && client.isConnected()) {
await wavRecorder.record((data) => client.appendInputAudio(data.mono));
}
setCanPushToTalk(value === 'none');
};
/**
* Auto-scroll the event logs
*/
useEffect(() => {
if (eventsScrollRef.current) {
const eventsEl = eventsScrollRef.current;
const scrollHeight = eventsEl.scrollHeight;
// Only scroll if height has just changed
if (scrollHeight !== eventsScrollHeightRef.current) {
eventsEl.scrollTop = scrollHeight;
eventsScrollHeightRef.current = scrollHeight;
}
}
}, [realtimeEvents]);
/**
* Auto-scroll the conversation logs
*/
useEffect(() => {
const conversationEls = [].slice.call(
document.body.querySelectorAll('[data-conversation-content]')
);
for (const el of conversationEls) {
const conversationEl = el as HTMLDivElement;
conversationEl.scrollTop = conversationEl.scrollHeight;
}
}, [items]);
/**
* Set up render loops for the visualization canvas
*/
useEffect(() => {
let isLoaded = true;
const wavRecorder = wavRecorderRef.current;
const clientCanvas = clientCanvasRef.current;
let clientCtx: CanvasRenderingContext2D | null = null;
const wavStreamPlayer = wavStreamPlayerRef.current;
const serverCanvas = serverCanvasRef.current;
let serverCtx: CanvasRenderingContext2D | null = null;
const render = () => {
if (isLoaded) {
if (clientCanvas) {
if (!clientCanvas.width || !clientCanvas.height) {
clientCanvas.width = clientCanvas.offsetWidth;
clientCanvas.height = clientCanvas.offsetHeight;
}
clientCtx = clientCtx || clientCanvas.getContext('2d');
if (clientCtx) {
clientCtx.clearRect(0, 0, clientCanvas.width, clientCanvas.height);
const result = wavRecorder.recording
? wavRecorder.getFrequencies('voice')
: { values: new Float32Array([0]) };
WavRenderer.drawBars(
clientCanvas,
clientCtx,
result.values,
'#0099ff',
10,
0,
8
);
}
}
if (serverCanvas) {
if (!serverCanvas.width || !serverCanvas.height) {
serverCanvas.width = serverCanvas.offsetWidth;
serverCanvas.height = serverCanvas.offsetHeight;
}
serverCtx = serverCtx || serverCanvas.getContext('2d');
if (serverCtx) {
serverCtx.clearRect(0, 0, serverCanvas.width, serverCanvas.height);
const result = wavStreamPlayer.analyser
? wavStreamPlayer.getFrequencies('voice')
: { values: new Float32Array([0]) };
WavRenderer.drawBars(
serverCanvas,
serverCtx,
result.values,
'#009900',
10,
0,
8
);
}
}
window.requestAnimationFrame(render);
}
};
render();
return () => {
isLoaded = false;
};
}, []);
/**
* Core RealtimeClient and audio capture setup
* Set all of our instructions, tools, events and more
*/
useEffect(() => {
// Get refs
const wavStreamPlayer = wavStreamPlayerRef.current;
const client = clientRef.current;
// Set instructions
client.updateSession({ instructions: instructions });
// Set transcription, otherwise we don't get user transcriptions back
client.updateSession({ input_audio_transcription: { model: 'whisper-1' } });
// Add tools
client.addTool(
{
name: 'set_memory',
description: 'Saves important data about the user into memory.',
parameters: {
type: 'object',
properties: {
key: {
type: 'string',
description:
'The key of the memory value. Always use lowercase and underscores, no other characters.',
},
value: {
type: 'string',
description: 'Value can be anything represented as a string',
},
},
required: ['key', 'value'],
},
},
async ({ key, value }: { [key: string]: any }) => {
setMemoryKv((memoryKv) => {
const newKv = { ...memoryKv };
newKv[key] = value;
return newKv;
});
return { ok: true };
}
);
client.addTool(
{
name: 'get_weather',
description:
'Retrieves the weather for a given lat, lng coordinate pair. Specify a label for the location.',
parameters: {
type: 'object',
properties: {
lat: {
type: 'number',
description: 'Latitude',
},
lng: {
type: 'number',
description: 'Longitude',
},
location: {
type: 'string',
description: 'Name of the location',
},
},
required: ['lat', 'lng', 'location'],
},
},
async ({ lat, lng, location }: { [key: string]: any }) => {
setMarker({ lat, lng, location });
setCoords({ lat, lng, location });
const result = await fetch(
`https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lng}&current=temperature_2m,wind_speed_10m`
);
const json = await result.json();
return json;
}
);
// handle realtime events from client + server for event logging
client.on('realtime.event', (realtimeEvent: RealtimeEvent) => {
setRealtimeEvents((realtimeEvents) => {
const lastEvent = realtimeEvents[realtimeEvents.length - 1];
if (lastEvent?.event.type === realtimeEvent.event.type) {
// if we receive multiple events in a row, aggregate them for display purposes
lastEvent.count = (lastEvent.count || 0) + 1;
return realtimeEvents.slice(0, -1).concat(lastEvent);
} else {
return realtimeEvents.concat(realtimeEvent);
}
});
});
client.on('error', (event: any) => console.error(event));
client.on('conversation.interrupted', async () => {
const trackSampleOffset = await wavStreamPlayer.interrupt();
if (trackSampleOffset?.trackId) {
const { trackId, offset } = trackSampleOffset;
await client.cancelResponse(trackId, offset);
}
});
client.on('conversation.updated', async ({ item, delta }: any) => {
const items = client.conversation.getItems();
if (delta?.audio) {
wavStreamPlayer.add16BitPCM(delta.audio, item.id);
}
if (item.status === 'completed' && item.formatted.audio?.length) {
const wavFile = await WavRecorder.decode(
item.formatted.audio,
24000,
24000
);
item.formatted.file = wavFile;
}
setItems(items);
});
setItems(client.conversation.getItems());
return () => {
// cleanup; resets to defaults
client.reset();
};
}, []);
/**
* Render the application
*/
return (
<div data-component="ConsolePage">
<div className="content-top">
<div className="content-title">
<img src="/openai-logomark.svg" />
<span>realtime console</span>
</div>
<div className="content-api-key">
{!USE_LOCAL_RELAY_SERVER_URL && (
<Button
icon={Edit}
iconPosition="end"
buttonStyle="flush"
label={`api key: ${apiKey.slice(0, 3)}...`}
onClick={() => resetAPIKey()}
/>
)}
</div>
</div>
<div className="content-main">
<div className="content-logs">
<div className="content-block events">
<div className="visualization">
<div className="visualization-entry client">
<canvas ref={clientCanvasRef} />
</div>
<div className="visualization-entry server">
<canvas ref={serverCanvasRef} />
</div>
</div>
<div className="content-block-title">events</div>
<div className="content-block-body" ref={eventsScrollRef}>
{!realtimeEvents.length && `awaiting connection...`}
{realtimeEvents.map((realtimeEvent, i) => {
const count = realtimeEvent.count;
const event = { ...realtimeEvent.event };
if (event.type === 'input_audio_buffer.append') {
event.audio = `[trimmed: ${event.audio.length} bytes]`;
} else if (event.type === 'response.audio.delta') {
event.delta = `[trimmed: ${event.delta.length} bytes]`;
}
return (
<div className="event" key={event.event_id}>
<div className="event-timestamp">
{formatTime(realtimeEvent.time)}
</div>
<div className="event-details">
<div
className="event-summary"
onClick={() => {
// toggle event details
const id = event.event_id;
const expanded = { ...expandedEvents };
if (expanded[id]) {
delete expanded[id];
} else {
expanded[id] = true;
}
setExpandedEvents(expanded);
}}
>
<div
className={`event-source ${
event.type === 'error'
? 'error'
: realtimeEvent.source
}`}
>
{realtimeEvent.source === 'client' ? (
<ArrowUp />
) : (
<ArrowDown />
)}
<span>
{event.type === 'error'
? 'error!'
: realtimeEvent.source}
</span>
</div>
<div className="event-type">
{event.type}
{count && ` (${count})`}
</div>
</div>
{!!expandedEvents[event.event_id] && (
<div className="event-payload">
{JSON.stringify(event, null, 2)}
</div>
)}
</div>
</div>
);
})}
</div>
</div>
<div className="content-block conversation">
<div className="content-block-title">conversation</div>
<div className="content-block-body" data-conversation-content>
{!items.length && `awaiting connection...`}
{items.map((conversationItem, i) => {
return (
<div className="conversation-item" key={conversationItem.id}>
<div className={`speaker ${conversationItem.role}`}>
<div>
{(
conversationItem.role || conversationItem.type
).replaceAll('_', ' ')}
</div>
<div
className="close"
onClick={() =>
deleteConversationItem(conversationItem.id)
}
>
<X />
</div>
</div>
<div className={`speaker-content`}>
{/* tool response */}
{conversationItem.type === 'function_call_output' && (
<div>{conversationItem.formatted.output}</div>
)}
{/* tool call */}
{!!conversationItem.formatted.tool && (
<div>
{conversationItem.formatted.tool.name}(
{conversationItem.formatted.tool.arguments})
</div>
)}
{!conversationItem.formatted.tool &&
conversationItem.role === 'user' && (
<div>
{conversationItem.formatted.transcript ||
(conversationItem.formatted.audio?.length
? '(awaiting transcript)'
: conversationItem.formatted.text ||
'(item sent)')}
</div>
)}
{!conversationItem.formatted.tool &&
conversationItem.role === 'assistant' && (
<div>
{conversationItem.formatted.transcript ||
conversationItem.formatted.text ||
'(truncated)'}
</div>
)}
{conversationItem.formatted.file && (
<audio
src={conversationItem.formatted.file.url}
controls
/>
)}
</div>
</div>
);
})}
</div>
</div>
<div className="content-actions">
<Toggle
defaultValue={false}
labels={['manual', 'vad']}
values={['none', 'server_vad']}
onChange={(_, value) => changeTurnEndType(value)}
/>
<div className="spacer" />
{isConnected && canPushToTalk && (
<Button
label={isRecording ? 'release to send' : 'push to talk'}
buttonStyle={isRecording ? 'alert' : 'regular'}
disabled={!isConnected || !canPushToTalk}
onMouseDown={startRecording}
onMouseUp={stopRecording}
/>
)}
<div className="spacer" />
<Button
label={isConnected ? 'disconnect' : 'connect'}
iconPosition={isConnected ? 'end' : 'start'}
icon={isConnected ? X : Zap}
buttonStyle={isConnected ? 'regular' : 'action'}
onClick={
isConnected ? disconnectConversation : connectConversation
}
/>
</div>
</div>
<div className="content-right">
<div className="content-block map">
<div className="content-block-title">get_weather()</div>
<div className="content-block-title bottom">
{marker?.location || 'not yet retrieved'}
</div>
<div className="content-block-body full">
{coords && (
<Map
center={[coords.lat, coords.lng]}
location={coords.location}
/>
)}
</div>
</div>
<div className="content-block kv">
<div className="content-block-title">set_memory()</div>
<div className="content-block-body content-kv">
{JSON.stringify(memoryKv, null, 2)}
</div>
</div>
</div>
</div>
</div>
);
}
+1
View File
@@ -0,0 +1 @@
/// <reference types="react-scripts" />
+15
View File
@@ -0,0 +1,15 @@
import { ReportHandler } from 'web-vitals';
const reportWebVitals = (onPerfEntry?: ReportHandler) => {
if (onPerfEntry && onPerfEntry instanceof Function) {
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
getCLS(onPerfEntry);
getFID(onPerfEntry);
getFCP(onPerfEntry);
getLCP(onPerfEntry);
getTTFB(onPerfEntry);
});
}
};
export default reportWebVitals;
+5
View File
@@ -0,0 +1,5 @@
// jest-dom adds custom jest matchers for asserting on DOM nodes.
// allows you to do things like:
// expect(element).toHaveTextContent(/react/i)
// learn more: https://github.com/testing-library/jest-dom
import '@testing-library/jest-dom';
+16
View File
@@ -0,0 +1,16 @@
export const instructions = `System settings:
Tool use: enabled.
Instructions:
- You are a conversational assistant in a test playground that helps developers learn about your abilities
- Your personality is upbeat and enthusiastic
- You speak quickly and are concise; avoid rambling and useless information
- You are capable of modifying your tone, energy level, talking speed, and emotional demeanor
- You are capable of many types of expression (including, but not limited to) whispering, speaking loud, being out of breath, weak, dying, full of power
- Let the speaker you're talking to play with your settings and have fun
- Be kind, helpful, and curteous
- It is okay to ask the speaker questions
- Use tools and functions you have available liberally, it is part of the training apparatus
- Be open to exploration and conversation
- Remember: this is just for fun and testing!
`;
+111
View File
@@ -0,0 +1,111 @@
const dataMap = new WeakMap();
/**
* Normalizes a Float32Array to Array(m): We use this to draw amplitudes on a graph
* If we're rendering the same audio data, then we'll often be using
* the same (data, m, downsamplePeaks) triplets so we give option to memoize
*/
const normalizeArray = (
data: Float32Array,
m: number,
downsamplePeaks: boolean = false,
memoize: boolean = false
) => {
let cache, mKey, dKey;
if (memoize) {
mKey = m.toString();
dKey = downsamplePeaks.toString();
cache = dataMap.has(data) ? dataMap.get(data) : {};
dataMap.set(data, cache);
cache[mKey] = cache[mKey] || {};
if (cache[mKey][dKey]) {
return cache[mKey][dKey];
}
}
const n = data.length;
const result = new Array(m);
if (m <= n) {
// Downsampling
result.fill(0);
const count = new Array(m).fill(0);
for (let i = 0; i < n; i++) {
const index = Math.floor(i * (m / n));
if (downsamplePeaks) {
// take highest result in the set
result[index] = Math.max(result[index], Math.abs(data[i]));
} else {
result[index] += Math.abs(data[i]);
}
count[index]++;
}
if (!downsamplePeaks) {
for (let i = 0; i < result.length; i++) {
result[i] = result[i] / count[i];
}
}
} else {
for (let i = 0; i < m; i++) {
const index = (i * (n - 1)) / (m - 1);
const low = Math.floor(index);
const high = Math.ceil(index);
const t = index - low;
if (high >= n) {
result[i] = data[n - 1];
} else {
result[i] = data[low] * (1 - t) + data[high] * t;
}
}
}
if (memoize) {
cache[mKey as string][dKey as string] = result;
}
return result;
};
export const WavRenderer = {
/**
* Renders a point-in-time snapshot of an audio sample, usually frequency values
* @param canvas
* @param ctx
* @param data
* @param color
* @param pointCount number of bars to render
* @param barWidth width of bars in px
* @param barSpacing spacing between bars in px
* @param center vertically center the bars
*/
drawBars: (
canvas: HTMLCanvasElement,
ctx: CanvasRenderingContext2D,
data: Float32Array,
color: string,
pointCount: number = 0,
barWidth: number = 0,
barSpacing: number = 0,
center: boolean = false
) => {
pointCount = Math.floor(
Math.min(
pointCount,
(canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
)
);
if (!pointCount) {
pointCount = Math.floor(
(canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
);
}
if (!barWidth) {
barWidth = (canvas.width - barSpacing) / pointCount - barSpacing;
}
const points = normalizeArray(data, pointCount, true);
for (let i = 0; i < pointCount; i++) {
const amplitude = Math.abs(points[i]);
const height = Math.max(1, amplitude * canvas.height);
const x = barSpacing + i * (barWidth + barSpacing);
const y = center ? (canvas.height - height) / 2 : canvas.height - height;
ctx.fillStyle = color;
ctx.fillRect(x, y, barWidth, height);
}
},
};
+20
View File
@@ -0,0 +1,20 @@
{
"compilerOptions": {
"target": "ES2020",
"lib": ["dom", "dom.iterable", "esnext", "ES2020"],
"allowJs": true,
"skipLibCheck": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"noEmit": true,
"jsx": "react-jsx"
},
"include": ["src", "src/lib"]
}