Compare commits

...

17 Commits

Author SHA1 Message Date
Bruce MacDonald 9c92b18d40 fix: release ReadableStream reader after iteration completes (#277)
The parseJSON function obtained a ReadableStreamDefaultReader but never called
releaseLock() when iteration finished. This caused Deno's test runner to detect a memory leak with streaming responses.
2026-02-18 14:06:18 -08:00
Jeffrey Morgan f23d7eeb6d examples: fix imagegen first step printing (#273) 2026-01-23 11:44:47 -08:00
Jeffrey Morgan ef411aa67e clean up examples readme (#272) 2026-01-22 22:55:05 -08:00
Jeffrey Morgan c8f3fb3b43 Add image generation support (#271) 2026-01-22 22:45:39 -08:00
lif f7827ba69c browser: export AbortableAsyncIterator type (#267)
Export AbortableAsyncIterator type from the browser module to allow
users to import this type when using ollama/browser.

Fixes #135

Signed-off-by: majiayu000 <1835304752@qq.com>
2026-01-05 14:31:24 -08:00
Jag Reehal 133f3623a1 Add min_p parameter to Options interface (#265)
Adds the min_p (minimum probability threshold) parameter to the Options
interface. This parameter is supported by the Ollama API but was missing
from the TypeScript definitions.

min_p works alongside top_p to control token selection during generation
by setting a minimum probability threshold relative to the most likely token.
Tokens with probabilities below this threshold are filtered out.

This addresses the missing parameter mentioned in issue #145.
2025-12-12 16:40:25 -08:00
Kaloyan Stoyanov a667d4d651 browser/interfaces: add VersionResponse type and add ollama.version() to README (#261) 2025-11-13 11:21:46 -08:00
Parth Sareen c3b668c453 browser/interfaces: add logprobs (#260) 2025-11-12 12:21:14 -08:00
Kaloyan Stoyanov 75baea068e browser: add method to retrieve server version (#259) 2025-11-12 12:10:11 -08:00
Bruce MacDonald 603df9fe59 Update publish.yaml 2025-10-30 13:02:49 -07:00
Bruce MacDonald b4acbee8a0 Revert "fix: regenerate package-lock.json with complete @swc/core platform entries (#257)"
This reverts commit 5b54730c8b.
2025-10-30 13:02:08 -07:00
Bruce MacDonald 5b54730c8b fix: regenerate package-lock.json with complete @swc/core platform entries (#257)
Fixes npm ci failure caused by stricter lockfile validation in newer npm versions.
The lockfile was missing node_modules entries for platform-specific optional dependencies.
2025-10-30 12:19:59 -07:00
Bruce MacDonald 5a132f678d fix: streaming chunk boundaries (#256)
TextDecoder.decode() without { stream: true } treats each chunk as
a complete sequence. When a multibyte UTF-8 character (e.g., 'ь' =
0xD1 0x8C) is split across chunks, the first chunk's incomplete bytes
are emitted as replacement characters instead of being buffered.
2025-10-30 12:04:25 -07:00
Sean Gallen 3b8db716b8 remove duplicate line in .npmignore (#254) 2025-10-22 21:43:30 -07:00
Adrian 9dc9716ece Update multi-tool.ts imports (#231)
Fixes `ReferenceError: Ollama is not defined` error
2025-10-16 09:54:29 -07:00
Eden Chan de292ee84f docs(readme): add Cloud Models JS usage and Cloud API example (#253)
add Cloud Models usage for JavaScript and Cloud API example\n\n- Add local offload flow (signin, pull, run)\n- Add direct cloud API usage with auth\n- List supported cloud model IDs\n- Keep examples minimal; match existing style
2025-10-16 09:44:33 -07:00
Parth Sareen 5f33c960f2 examples: rename browser tool to gpt-oss-browser-tools (#251) 2025-09-24 21:45:07 -07:00
14 changed files with 371 additions and 8 deletions
+1 -1
View File
@@ -13,7 +13,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: latest
node-version: '20'
registry-url: https://registry.npmjs.org
cache: npm
- run: npm ci
-1
View File
@@ -2,7 +2,6 @@ node_modules
build
.docs
.coverage
node_modules
package-lock.json
yarn.lock
.vscode
+82
View File
@@ -46,6 +46,73 @@ for await (const part of response) {
}
```
## Cloud Models
Run larger models by offloading to Ollamas cloud while keeping your local workflow.
[You can see models currently available on Ollama's cloud here.](https://ollama.com/search?c=cloud)
### Run via local Ollama
1) Sign in (one-time):
```
ollama signin
```
2) Pull a cloud model:
```
ollama pull gpt-oss:120b-cloud
```
3) Use as usual (offloads automatically):
```javascript
import { Ollama } from 'ollama'
const ollama = new Ollama()
const response = await ollama.chat({
model: 'gpt-oss:120b-cloud',
messages: [{ role: 'user', content: 'Explain quantum computing' }],
stream: true,
})
for await (const part of response) {
process.stdout.write(part.message.content)
}
```
### Cloud API (ollama.com)
Access cloud models directly by pointing the client at `https://ollama.com`.
1) Create an [API key](https://ollama.com/settings/keys), then set the `OLLAMA_API_KEY` environment variable:
```
export OLLAMA_API_KEY=your_api_key
```
2) Generate a response via the cloud API:
```javascript
import { Ollama } from 'ollama'
const ollama = new Ollama({
host: 'https://ollama.com',
headers: { Authorization: 'Bearer ' + process.env.OLLAMA_API_KEY },
})
const response = await ollama.chat({
model: 'gpt-oss:120b',
messages: [{ role: 'user', content: 'Explain quantum computing' }],
stream: true,
})
for await (const part of response) {
process.stdout.write(part.message.content)
}
```
## API
The Ollama JavaScript library's API is designed around the [Ollama REST API](https://github.com/jmorganca/ollama/blob/main/docs/api.md)
@@ -67,6 +134,8 @@ ollama.chat(request)
- `format` `<string>`: (Optional) Set the expected format of the response (`json`).
- `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
- `think` `<boolean | "high" | "medium" | "low">`: (Optional) Enable model thinking. Use `true`/`false` or specify a level. Requires model support.
- `logprobs` `<boolean>`: (Optional) Return log probabilities for tokens. Requires model support.
- `top_logprobs` `<number>`: (Optional) Number of top log probabilities to return per token when `logprobs` is enabled.
- `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
- `tools` `<Tool[]>`: (Optional) A list of tool calls the model may make.
- `options` `<Options>`: (Optional) Options to configure the runtime.
@@ -90,7 +159,12 @@ ollama.generate(request)
- `format` `<string>`: (Optional) Set the expected format of the response (`json`).
- `stream` `<boolean>`: (Optional) When true an `AsyncGenerator` is returned.
- `think` `<boolean | "high" | "medium" | "low">`: (Optional) Enable model thinking. Use `true`/`false` or specify a level. Requires model support.
- `logprobs` `<boolean>`: (Optional) Return log probabilities for tokens. Requires model support.
- `top_logprobs` `<number>`: (Optional) Number of top log probabilities to return per token when `logprobs` is enabled.
- `keep_alive` `<string | number>`: (Optional) How long to keep the model loaded. A number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc.)
- `width` `<number>`: (Optional, Experimental) Width of the generated image in pixels. For image generation models only.
- `height` `<number>`: (Optional, Experimental) Height of the generated image in pixels. For image generation models only.
- `steps` `<number>`: (Optional, Experimental) Number of diffusion steps. For image generation models only.
- `options` `<Options>`: (Optional) Options to configure the runtime.
- Returns: `<GenerateResponse>`
@@ -225,6 +299,14 @@ ollama.ps()
- Returns: `<ListResponse>`
### version
```javascript
ollama.version()
```
- Returns: `<VersionResponse>`
### abort
```javascript
+6
View File
@@ -8,3 +8,9 @@ To run the examples run:
```sh
npx tsx <folder-name>/<file-name>.ts
```
### Image Generation (Experimental)
> **Note:** Image generation is experimental and currently only available on macOS.
- [image-generation/image-generation.ts](image-generation/image-generation.ts)
@@ -0,0 +1,29 @@
// Image generation is experimental and currently only available on macOS
import ollama from 'ollama'
import { writeFileSync } from 'fs'
async function main() {
const prompt = 'a sunset over mountains'
console.log(`Prompt: ${prompt}`)
const response = await ollama.generate({
model: 'x/z-image-turbo',
prompt,
stream: true,
})
for await (const part of response) {
if (part.image) {
// Final response contains the image
const imageBuffer = Buffer.from(part.image, 'base64')
writeFileSync('output.png', imageBuffer)
console.log('\nImage saved to output.png')
} else if (part.total) {
// Progress update
process.stdout.write(`\rProgress: ${part.completed ?? 0}/${part.total}`)
}
}
}
main().catch(console.error)
+31
View File
@@ -0,0 +1,31 @@
import { Ollama, Logprob } from 'ollama';
function printLogprobs(entries: Logprob[], label: string) {
console.log(`\n${label}:`)
for (const entry of entries) {
console.log(` token=${entry.token.padEnd(12)} logprob=${entry.logprob.toFixed(3)}`)
for (const alt of entry.top_logprobs ?? []) {
console.log(` alt -> ${alt.token.padEnd(12)} (${alt.logprob.toFixed(3)})`)
}
}
}
async function main() {
const client = new Ollama()
console.log(`Using model: gemma3`)
const chatResponse = await client.chat({
model: 'gemma3',
messages: [{ role: 'user', content: 'Say hello in one word.' }],
logprobs: true,
top_logprobs: 3,
})
console.log('Chat response:', chatResponse.message.content)
printLogprobs(chatResponse.logprobs ?? [], 'chat logprobs')
}
main().catch((err) => {
console.error(err)
process.exitCode = 1
})
+1 -1
View File
@@ -1,4 +1,4 @@
import ollama from 'ollama';
import ollama, { Ollama } from 'ollama';
// Mock weather functions
function getTemperature(args: { city: string }): string {
@@ -1,6 +1,6 @@
import ollama, { Ollama } from 'ollama'
import type { Message } from 'ollama'
import { Browser } from './browser-tool-helpers'
import { Browser } from './gpt-oss-browser-tools-helpers'
async function main() {
if (!process.env.OLLAMA_API_KEY) {
+14
View File
@@ -28,6 +28,7 @@ import type {
WebSearchResponse,
WebFetchRequest,
WebFetchResponse,
VersionResponse,
} from './interfaces.js'
import { defaultHost } from './constant.js'
@@ -327,6 +328,17 @@ async encodeImage(image: Uint8Array | string): Promise<string> {
return (await response.json()) as ListResponse
}
/**
* Returns the Ollama server version.
* @returns {Promise<VersionResponse>} - The server version object.
*/
async version(): Promise<VersionResponse> {
const response = await utils.get(this.fetch, `${this.config.host}/api/version`, {
headers: this.config.headers,
})
return (await response.json()) as VersionResponse
}
/**
* Performs web search using the Ollama web search API
* @param request {WebSearchRequest} - The search request containing query and options
@@ -362,3 +374,5 @@ export default new Ollama()
// export all types from the main entry point so that packages importing types dont need to specify paths
export * from './interfaces.js'
export type { AbortableAsyncIterator }
+30 -1
View File
@@ -30,6 +30,7 @@ export interface Options {
num_predict: number
top_k: number
top_p: number
min_p: number
tfs_z: number
typical_p: number
repeat_last_n: number
@@ -57,6 +58,13 @@ export interface GenerateRequest {
images?: Uint8Array[] | string[]
keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
think?: boolean | 'high' | 'medium' | 'low'
logprobs?: boolean
top_logprobs?: number
// Experimental image generation parameters
width?: number
height?: number
steps?: number
options?: Partial<Options>
}
@@ -110,6 +118,8 @@ export interface ChatRequest {
keep_alive?: string | number // a number (seconds) or a string with a duration unit suffix ("300ms", "1.5h", "2h45m", etc)
tools?: Tool[]
think?: boolean | 'high' | 'medium' | 'low'
logprobs?: boolean
top_logprobs?: number
options?: Partial<Options>
}
@@ -174,11 +184,19 @@ export interface EmbeddingsRequest {
}
// response types
export interface TokenLogprob {
token: string
logprob: number
}
export interface Logprob extends TokenLogprob {
top_logprobs?: TokenLogprob[]
}
export interface GenerateResponse {
model: string
created_at: Date
response: string
response?: string
thinking?: string
done: boolean
done_reason: string
@@ -189,6 +207,12 @@ export interface GenerateResponse {
prompt_eval_duration: number
eval_count: number
eval_duration: number
logprobs?: Logprob[]
// Image generation response fields
image?: string // Base64-encoded generated image data
completed?: number // Number of completed steps (for streaming progress)
total?: number // Total number of steps (for streaming progress)
}
export interface ChatResponse {
@@ -203,6 +227,7 @@ export interface ChatResponse {
prompt_eval_duration: number
eval_count: number
eval_duration: number
logprobs?: Logprob[]
}
export interface EmbedResponse {
@@ -258,6 +283,10 @@ export interface ShowResponse {
projector_info?: Map<string, any>
}
export interface VersionResponse {
version: string
}
export interface ListResponse {
models: ModelResponse[]
}
+5 -1
View File
@@ -308,10 +308,11 @@ export const parseJSON = async function* <T = unknown>(
const { done, value: chunk } = await reader.read()
if (done) {
reader.releaseLock()
break
}
buffer += decoder.decode(chunk)
buffer += decoder.decode(chunk, { stream: true })
const parts = buffer.split('\n')
@@ -326,6 +327,9 @@ export const parseJSON = async function* <T = unknown>(
}
}
// Flush any remaining bytes from incomplete multibyte sequences
buffer += decoder.decode()
for (const part of buffer.split('\n').filter((p) => p !== '')) {
try {
yield JSON.parse(part)
+144
View File
@@ -0,0 +1,144 @@
import { describe, it, expect, vi } from 'vitest'
import { Ollama } from '../src/browser'
import type { ChatResponse, GenerateResponse } from '../src/interfaces'
import type { AbortableAsyncIterator } from '../src/browser'
describe('AbortableAsyncIterator type export', () => {
it('should be importable from browser module', () => {
const typeCheck = (_: AbortableAsyncIterator<ChatResponse> | null) => {}
typeCheck(null)
expect(true).toBe(true)
})
})
describe('Ollama logprob request fields', () => {
it('forwards logprob settings in generate requests', async () => {
const client = new Ollama()
const spy = vi
.spyOn(client as any, 'processStreamableRequest')
.mockResolvedValue({} as GenerateResponse)
await client.generate({
model: 'dummy',
prompt: 'Hello',
logprobs: true,
top_logprobs: 5,
})
expect(spy).toHaveBeenCalledWith(
'generate',
expect.objectContaining({
logprobs: true,
top_logprobs: 5,
}),
)
})
it('forwards logprob settings in chat requests', async () => {
const client = new Ollama()
const spy = vi
.spyOn(client as any, 'processStreamableRequest')
.mockResolvedValue({} as ChatResponse)
await client.chat({
model: 'dummy',
messages: [{ role: 'user', content: 'hi' }],
logprobs: true,
top_logprobs: 3,
})
expect(spy).toHaveBeenCalledWith(
'chat',
expect.objectContaining({
logprobs: true,
top_logprobs: 3,
}),
)
})
})
describe('Ollama image generation request fields', () => {
it('forwards image generation parameters in generate requests', async () => {
const client = new Ollama()
const spy = vi
.spyOn(client as any, 'processStreamableRequest')
.mockResolvedValue({} as GenerateResponse)
await client.generate({
model: 'dummy-image',
prompt: 'a sunset over mountains',
width: 1024,
height: 768,
steps: 20,
})
expect(spy).toHaveBeenCalledWith(
'generate',
expect.objectContaining({
model: 'dummy-image',
prompt: 'a sunset over mountains',
width: 1024,
height: 768,
steps: 20,
}),
)
})
it('handles image generation response with image field', async () => {
const mockResponse: GenerateResponse = {
model: 'dummy-image',
created_at: new Date(),
done: true,
done_reason: 'stop',
context: [],
total_duration: 1000,
load_duration: 100,
prompt_eval_count: 10,
prompt_eval_duration: 50,
eval_count: 0,
eval_duration: 0,
image: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==',
}
const client = new Ollama()
vi.spyOn(client as any, 'processStreamableRequest').mockResolvedValue(mockResponse)
const response = await client.generate({
model: 'dummy-image',
prompt: 'a sunset',
})
expect(response.image).toBeDefined()
expect(response.done).toBe(true)
})
it('handles streaming progress fields for image generation', async () => {
const mockResponse: GenerateResponse = {
model: 'dummy-image',
created_at: new Date(),
done: false,
done_reason: '',
context: [],
total_duration: 0,
load_duration: 0,
prompt_eval_count: 0,
prompt_eval_duration: 0,
eval_count: 0,
eval_duration: 0,
completed: 5,
total: 20,
}
const client = new Ollama()
vi.spyOn(client as any, 'processStreamableRequest').mockResolvedValue(mockResponse)
const response = await client.generate({
model: 'dummy-image',
prompt: 'a sunset',
})
expect(response.completed).toBe(5)
expect(response.total).toBe(20)
expect(response.done).toBe(false)
})
})
+27 -2
View File
@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { get } from '../src/utils'
import { get, parseJSON } from '../src/utils'
describe('get Function Header Tests', () => {
const mockFetch = vi.fn();
@@ -79,4 +79,29 @@ describe('get Function Header Tests', () => {
headers: expect.objectContaining(defaultHeaders)
});
});
});
});
describe('parseJSON UTF-8 multibyte character handling', () => {
it('should correctly decode multibyte UTF-8 characters split across chunk boundaries', async () => {
const encoder = new TextEncoder()
// Create chunks where the 'ь' character (UTF-8: 0xD1 0x8C) is split
const chunks = [
new Uint8Array([...encoder.encode('{"text":"использоват'), 0xd1]),
new Uint8Array([0x8c, ...encoder.encode('"}\n')]),
]
const stream = new ReadableStream<Uint8Array>({
start(controller) {
for (const chunk of chunks) {
controller.enqueue(chunk)
}
controller.close()
},
})
const itr = parseJSON<{ text: string }>(stream)
const { value } = await itr.next()
expect(value?.text).toBe('использовать')
})
});