fix(llma): include Gemini cached tokens in cost calculation (#37651)

This commit is contained in:
Radu Raicea
2025-09-05 12:02:30 -04:00
committed by GitHub
parent 9235f4abce
commit bd5225a307
5 changed files with 265 additions and 129 deletions

View File

@@ -8,35 +8,35 @@ exports[`processAiEvent() smoke test every model processes claude-2 1`] = `
}
`;
exports[`processAiEvent() smoke test every model processes gemini-2.0-flash 1`] = `
exports[`processAiEvent() smoke test every model processes gemini-2.0-flash-001 1`] = `
{
"$ai_input_cost_usd": 0.000015,
"$ai_output_cost_usd": 0.00000375,
"$ai_total_cost_usd": 0.00001875,
"$ai_input_cost_usd": 0.00001,
"$ai_output_cost_usd": 0.00002,
"$ai_total_cost_usd": 0.00003,
}
`;
exports[`processAiEvent() smoke test every model processes gemini-2.5-flash 1`] = `
{
"$ai_input_cost_usd": 15,
"$ai_output_cost_usd": 30,
"$ai_total_cost_usd": 45,
"$ai_input_cost_usd": 0.00003,
"$ai_output_cost_usd": 0.000125,
"$ai_total_cost_usd": 0.000155,
}
`;
exports[`processAiEvent() smoke test every model processes gemini-2.5-pro-preview 1`] = `
{
"$ai_input_cost_usd": 70,
"$ai_output_cost_usd": 35,
"$ai_total_cost_usd": 105,
"$ai_input_cost_usd": 0.000125,
"$ai_output_cost_usd": 0.0005,
"$ai_total_cost_usd": 0.000625,
}
`;
exports[`processAiEvent() smoke test every model processes gemini-2.5-pro-preview:large 1`] = `
{
"$ai_input_cost_usd": 70,
"$ai_output_cost_usd": 35,
"$ai_total_cost_usd": 105,
"$ai_input_cost_usd": 0.000125,
"$ai_output_cost_usd": 0.0005,
"$ai_total_cost_usd": 0.000625,
}
`;

View File

@@ -19,19 +19,19 @@ jest.mock('./providers', () => {
'claude-2': { model: 'claude-2', cost: { prompt_token: 0.6, completion_token: 0.6 } },
'gemini-2.5-pro-preview': {
model: 'gemini-2.5-pro-preview',
cost: { prompt_token: 0.7, completion_token: 0.7 },
cost: { prompt_token: 0.00000125, completion_token: 0.00001, cache_read_token: 3.1e-7 },
},
'gemini-2.5-pro-preview:large': {
model: 'gemini-2.5-pro-preview:large',
cost: { prompt_token: 0.8, completion_token: 0.8 },
cost: { prompt_token: 0.0000025, completion_token: 0.000015, cache_read_token: 0.000000625 },
},
'gemini-2.5-flash': {
model: 'gemini-2.5-flash',
cost: { prompt_token: 0.15, completion_token: 0.6 },
cost: { prompt_token: 3e-7, completion_token: 0.0000025, cache_read_token: 7.5e-8 },
},
'gemini-2.0-flash': {
model: 'gemini-2.0-flash',
cost: { prompt_token: 0.00000015, completion_token: 0.000000075 },
'gemini-2.0-flash-001': {
model: 'gemini-2.0-flash-001',
cost: { prompt_token: 1e-7, completion_token: 4e-7, cache_read_token: 2.5e-8 },
},
'o1-mini': {
model: 'o1-mini',
@@ -411,13 +411,13 @@ describe('processAiEvent()', () => {
const result = processAiEvent(event)
// For gemini-2.5-flash: prompt_token = 0.15, completion_token = 0.6
// Input cost: 100 * 0.15 = 15
// Output cost: (50 + 200) * 0.6 = 250 * 0.6 = 150
// Total cost: 15 + 150 = 165
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(15, 2)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(150, 2)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(165, 2)
// For gemini-2.5-flash: prompt_token = 3e-7, completion_token = 0.0000025
// Input cost: 100 * 3e-7 = 0.00003
// Output cost: (50 + 200) * 0.0000025 = 250 * 0.0000025 = 0.000625
// Total cost: 0.00003 + 0.000625 = 0.000655
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00003, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.000625, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000655, 6)
})
it('handles undefined reasoning tokens for gemini-2.5-*', () => {
@@ -429,13 +429,13 @@ describe('processAiEvent()', () => {
const result = processAiEvent(event)
// For gemini-2.5-flash: prompt_token = 0.15, completion_token = 0.6
// Input cost: 100 * 0.15 = 15
// Output cost: (50 + 0) * 0.6 = 50 * 0.6 = 30 (undefined reasoning tokens treated as 0)
// Total cost: 15 + 30 = 45
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(15, 2)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(30, 2)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(45, 2)
// For gemini-2.5-flash: prompt_token = 3e-7, completion_token = 0.0000025
// Input cost: 100 * 3e-7 = 0.00003
// Output cost: (50 + 0) * 0.0000025 = 50 * 0.0000025 = 0.000125 (undefined reasoning tokens treated as 0)
// Total cost: 0.00003 + 0.000125 = 0.000155
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00003, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.000125, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000155, 6)
})
it('does not include reasoning tokens for gemini-2.0-*', () => {
@@ -447,13 +447,13 @@ describe('processAiEvent()', () => {
const result = processAiEvent(event)
// For gemini-2.0-flash: prompt_token = 0.00000015, completion_token = 0.000000075
// Input cost: 100 * 0.00000015 = 0.000015
// Output cost: 50 * 0.000000075 = 0.00000375 (reasoning tokens ignored)
// Total cost: 0.000015 + 0.00000375 = 0.00001875
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000015, 8)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.00000375, 8)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00001875, 8)
// Model will match gemini-2.0-flash-001: prompt_token = 1e-7, completion_token = 4e-7
// Input cost: 100 * 1e-7 = 0.00001
// Output cost: 50 * 4e-7 = 0.00002 (reasoning tokens ignored)
// Total cost: 0.00001 + 0.00002 = 0.00003
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00001, 7)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.00002, 7)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00003, 7)
})
it('does not include reasoning tokens for non gemini models', () => {
@@ -474,6 +474,121 @@ describe('processAiEvent()', () => {
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00033, 5)
})
})
describe('gemini cache handling', () => {
it('handles cache read tokens with correct cost calculation for gemini-2.5-pro-preview', () => {
event.properties!.$ai_provider = 'gemini'
event.properties!.$ai_model = 'gemini-2.5-pro-preview'
event.properties!.$ai_input_tokens = 1000
event.properties!.$ai_cache_read_input_tokens = 400
event.properties!.$ai_output_tokens = 50
const result = processAiEvent(event)
// Regular tokens: 1000 - 400 = 600
// Input cost: (600 * 0.00000125) + (400 * 3.1e-7) = 0.00075 + 0.000124 = 0.000874
// Output cost: 50 * 0.00001 = 0.0005
// Total cost: 0.000874 + 0.0005 = 0.001374
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000874, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0005, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.001374, 6)
})
it('handles cache read tokens for gemini-2.5-pro-preview:large', () => {
event.properties!.$ai_provider = 'gemini'
event.properties!.$ai_model = 'gemini-2.5-pro-preview'
event.properties!.$ai_input_tokens = 250000 // > 200k triggers large model
event.properties!.$ai_cache_read_input_tokens = 100000
event.properties!.$ai_output_tokens = 500
const result = processAiEvent(event)
// Model should be switched to gemini-2.5-pro-preview:large
expect(result.properties!.$ai_model_cost_used).toBe('gemini-2.5-pro-preview:large')
// Regular tokens: 250000 - 100000 = 150000
// Input cost: (150000 * 0.0000025) + (100000 * 0.000000625) = 0.375 + 0.0625 = 0.4375
// Output cost: 500 * 0.000015 = 0.0075
// Total cost: 0.4375 + 0.0075 = 0.445
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.4375, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0075, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.445, 6)
})
it('handles cache read tokens for gemini-2.0-flash', () => {
event.properties!.$ai_provider = 'gemini'
event.properties!.$ai_model = 'gemini-2.0-flash'
event.properties!.$ai_input_tokens = 1000
event.properties!.$ai_cache_read_input_tokens = 400
event.properties!.$ai_output_tokens = 50
const result = processAiEvent(event)
// Model will match gemini-2.0-flash-001 from generated-providers.json
// Regular tokens: 1000 - 400 = 600
// Input cost: (600 * 1e-7) + (400 * 2.5e-8) = 0.00006 + 0.00001 = 0.00007
// Output cost: 50 * 4e-7 = 0.00002
// Total cost: 0.00007 + 0.00002 = 0.00009
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00007, 7)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.00002, 7)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00009, 7)
})
it('handles zero cache tokens correctly for gemini', () => {
event.properties!.$ai_provider = 'gemini'
event.properties!.$ai_model = 'gemini-2.5-pro-preview'
event.properties!.$ai_input_tokens = 100
event.properties!.$ai_cache_read_input_tokens = 0
event.properties!.$ai_output_tokens = 50
const result = processAiEvent(event)
// Input cost: 100 * 0.00000125 = 0.000125
// Output cost: 50 * 0.00001 = 0.0005
// Total cost: 0.000125 + 0.0005 = 0.000625
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000125, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0005, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000625, 6)
})
it('handles combined cache and reasoning tokens for gemini-2.5-pro-preview', () => {
event.properties!.$ai_provider = 'gemini'
event.properties!.$ai_model = 'gemini-2.5-pro-preview'
event.properties!.$ai_input_tokens = 1000
event.properties!.$ai_cache_read_input_tokens = 400
event.properties!.$ai_output_tokens = 50
event.properties!.$ai_reasoning_tokens = 200
const result = processAiEvent(event)
// Regular tokens: 1000 - 400 = 600
// Input cost: (600 * 0.00000125) + (400 * 3.1e-7) = 0.00075 + 0.000124 = 0.000874
// Output cost: (50 + 200) * 0.00001 = 250 * 0.00001 = 0.0025
// Total cost: 0.000874 + 0.0025 = 0.003374
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000874, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0025, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.003374, 6)
})
it('handles gemini-2.5-flash with cache from generated providers', () => {
event.properties!.$ai_provider = 'gemini'
event.properties!.$ai_model = 'gemini-2.5-flash'
event.properties!.$ai_input_tokens = 1000
event.properties!.$ai_cache_read_input_tokens = 400
event.properties!.$ai_output_tokens = 50
event.properties!.$ai_reasoning_tokens = 100
const result = processAiEvent(event)
// Regular tokens: 1000 - 400 = 600
// Input cost: (600 * 3e-7) + (400 * 7.5e-8) = 0.00018 + 0.00003 = 0.00021
// Output cost: (50 + 100) * 0.0000025 = 150 * 0.0000025 = 0.000375
// Total cost: 0.00021 + 0.000375 = 0.000585
expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00021, 6)
expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.000375, 6)
expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000585, 6)
})
})
})
describe('normalizeTraceProperties()', () => {

View File

@@ -103,6 +103,19 @@ const calculateInputCost = (event: PluginEvent, cost: ModelRow) => {
const totalCacheCost = bigDecimal.add(writeCost, cacheReadCost)
const uncachedCost = bigDecimal.multiply(cost.cost.prompt_token, inputTokens)
return bigDecimal.add(totalCacheCost, uncachedCost)
} else if (event.properties['$ai_provider'] && event.properties['$ai_provider'].toLowerCase() === 'gemini') {
const cacheReadTokens = event.properties['$ai_cache_read_input_tokens'] || 0
const inputTokens = event.properties['$ai_input_tokens'] || 0
const regularTokens = bigDecimal.subtract(inputTokens, cacheReadTokens)
// Use actual cache read cost if available, otherwise fall back to 0.25 multiplier
const cacheReadCost =
cost.cost.cache_read_token !== undefined
? bigDecimal.multiply(cost.cost.cache_read_token, cacheReadTokens)
: bigDecimal.multiply(bigDecimal.multiply(cost.cost.prompt_token, 0.25), cacheReadTokens)
const regularCost = bigDecimal.multiply(cost.cost.prompt_token, regularTokens)
return bigDecimal.add(cacheReadCost, regularCost)
}
return bigDecimal.multiply(cost.cost.prompt_token, event.properties['$ai_input_tokens'] || 0)
}

View File

@@ -183,6 +183,20 @@
"cache_read_token": 3.75e-7
}
},
{
"model": "cogito-v2-preview-deepseek-671b",
"cost": {
"prompt_token": 0.00000125,
"completion_token": 0.00000125
}
},
{
"model": "cogito-v2-preview-llama-109b-moe",
"cost": {
"prompt_token": 1.8e-7,
"completion_token": 5.9e-7
}
},
{
"model": "command",
"cost": {
@@ -302,6 +316,13 @@
"completion_token": 8e-7
}
},
{
"model": "deepseek-chat-v3.1:free",
"cost": {
"prompt_token": 0,
"completion_token": 0
}
},
{
"model": "deepseek-prover-v2",
"cost": {
@@ -326,8 +347,8 @@
{
"model": "deepseek-r1-0528-qwen3-8b",
"cost": {
"prompt_token": 1e-8,
"completion_token": 2e-8
"prompt_token": 1.703012e-8,
"completion_token": 6.81536e-8
}
},
{
@@ -365,13 +386,6 @@
"completion_token": 4e-8
}
},
{
"model": "deepseek-r1-distill-qwen-1.5b",
"cost": {
"prompt_token": 1.8e-7,
"completion_token": 1.8e-7
}
},
{
"model": "deepseek-r1-distill-qwen-14b",
"cost": {
@@ -421,13 +435,6 @@
"completion_token": 0
}
},
{
"model": "deepseek-v3-base",
"cost": {
"prompt_token": 1.999188e-7,
"completion_token": 8.00064e-7
}
},
{
"model": "deepseek-v3.1-base",
"cost": {
@@ -565,6 +572,20 @@
"cache_write_token": 3.833e-7
}
},
{
"model": "gemini-2.5-flash-image-preview",
"cost": {
"prompt_token": 3e-7,
"completion_token": 0.0000025
}
},
{
"model": "gemini-2.5-flash-image-preview:free",
"cost": {
"prompt_token": 0,
"completion_token": 0
}
},
{
"model": "gemini-2.5-flash-lite",
"cost": {
@@ -750,16 +771,15 @@
{
"model": "glm-4.5",
"cost": {
"prompt_token": 1.999188e-7,
"completion_token": 8.00064e-7
"prompt_token": 3.2986602e-7,
"completion_token": 0.0000013201056
}
},
{
"model": "glm-4.5-air",
"cost": {
"prompt_token": 2e-7,
"completion_token": 0.0000011,
"cache_read_token": 3e-8
"prompt_token": 1.4e-7,
"completion_token": 8.6e-7
}
},
{
@@ -991,6 +1011,13 @@
"completion_token": 2.8e-7
}
},
{
"model": "gpt-oss-120b:free",
"cost": {
"prompt_token": 0,
"completion_token": 0
}
},
{
"model": "gpt-oss-20b",
"cost": {
@@ -1059,6 +1086,14 @@
"cache_read_token": 7.5e-7
}
},
{
"model": "grok-code-fast-1",
"cost": {
"prompt_token": 2e-7,
"completion_token": 0.0000015,
"cache_read_token": 2e-8
}
},
{
"model": "grok-vision-beta",
"cost": {
@@ -1087,6 +1122,20 @@
"completion_token": 2.8e-7
}
},
{
"model": "hermes-4-405b",
"cost": {
"prompt_token": 1.999188e-7,
"completion_token": 8.00064e-7
}
},
{
"model": "hermes-4-70b",
"cost": {
"prompt_token": 9.329544e-8,
"completion_token": 3.733632e-7
}
},
{
"model": "hunyuan-a13b-instruct",
"cost": {
@@ -1115,13 +1164,6 @@
"completion_token": 0.00001
}
},
{
"model": "internvl3-14b",
"cost": {
"prompt_token": 2e-7,
"completion_token": 4e-7
}
},
{
"model": "jamba-large-1.7",
"cost": {
@@ -1136,6 +1178,13 @@
"completion_token": 4e-7
}
},
{
"model": "kimi-dev-72b",
"cost": {
"prompt_token": 2.9e-7,
"completion_token": 0.00000115
}
},
{
"model": "kimi-dev-72b:free",
"cost": {
@@ -1304,13 +1353,6 @@
"completion_token": 4.9e-8
}
},
{
"model": "llama-3.2-11b-vision-instruct:free",
"cost": {
"prompt_token": 0,
"completion_token": 0
}
},
{
"model": "llama-3.2-1b-instruct",
"cost": {
@@ -1321,8 +1363,8 @@
{
"model": "llama-3.2-3b-instruct",
"cost": {
"prompt_token": 3e-9,
"completion_token": 6e-9
"prompt_token": 1.2e-8,
"completion_token": 2.4e-8
}
},
{
@@ -1335,8 +1377,8 @@
{
"model": "llama-3.2-90b-vision-instruct",
"cost": {
"prompt_token": 0.0000012,
"completion_token": 0.0000012
"prompt_token": 3.5e-7,
"completion_token": 4e-7
}
},
{
@@ -1416,13 +1458,6 @@
"completion_token": 1.8e-7
}
},
{
"model": "llama3.1-typhoon2-70b-instruct",
"cost": {
"prompt_token": 8.8e-7,
"completion_token": 8.8e-7
}
},
{
"model": "llemma_7b",
"cost": {
@@ -1601,8 +1636,8 @@
{
"model": "mistral-nemo",
"cost": {
"prompt_token": 7.5e-9,
"completion_token": 5e-8
"prompt_token": 1e-8,
"completion_token": 4.00032e-8
}
},
{
@@ -1731,13 +1766,6 @@
"completion_token": 0.00000175
}
},
{
"model": "nous-hermes-2-mixtral-8x7b-dpo",
"cost": {
"prompt_token": 6e-7,
"completion_token": 6e-7
}
},
{
"model": "nova-lite-v1",
"cost": {
@@ -1893,13 +1921,6 @@
"completion_token": 0.000006
}
},
{
"model": "qwen-2-72b-instruct",
"cost": {
"prompt_token": 9e-7,
"completion_token": 9e-7
}
},
{
"model": "qwen-2.5-72b-instruct",
"cost": {
@@ -2060,8 +2081,15 @@
{
"model": "qwen3-30b-a3b-instruct-2507",
"cost": {
"prompt_token": 1e-7,
"completion_token": 3e-7
"prompt_token": 5.18308e-8,
"completion_token": 2.07424e-7
}
},
{
"model": "qwen3-30b-a3b-thinking-2507",
"cost": {
"prompt_token": 7.13e-8,
"completion_token": 2.852e-7
}
},
{
@@ -2107,14 +2135,14 @@
}
},
{
"model": "qwen3-coder:free",
"model": "qwen3-coder-30b-a3b-instruct",
"cost": {
"prompt_token": 0,
"completion_token": 0
"prompt_token": 5.18308e-8,
"completion_token": 2.07424e-7
}
},
{
"model": "qwerky-72b:free",
"model": "qwen3-coder:free",
"cost": {
"prompt_token": 0,
"completion_token": 0
@@ -2190,13 +2218,6 @@
"completion_token": 0.0000034
}
},
{
"model": "sarvam-m:free",
"cost": {
"prompt_token": 0,
"completion_token": 0
}
},
{
"model": "shisa-v2-llama3.3-70b",
"cost": {

View File

@@ -81,25 +81,12 @@ export const manualCosts: ModelRow[] = [
},
},
// google gen ai
{
model: 'gemini-2.0-flash',
cost: {
prompt_token: 0.00000015,
completion_token: 0.000000075,
},
},
{
model: 'gemini-2.5-pro-preview',
cost: {
prompt_token: 0.00000125,
completion_token: 0.00001,
},
},
{
model: 'gemini-2.5-pro-preview:large',
cost: {
prompt_token: 0.0000025,
completion_token: 0.000015,
cache_read_token: 0.000000625,
},
},
// Other