fix(llma): include Gemini cached tokens in cost calculation (#37651)

2026-02-04 03:01:23 +01:00 · 2025-09-05 12:02:30 -04:00
parent 9235f4abce
commit bd5225a307
5 changed files with 265 additions and 129 deletions
--- a/plugin-server/src/ingestion/ai-costs/snapshots/process-ai-event.test.ts.snap
+++ b/plugin-server/src/ingestion/ai-costs/snapshots/process-ai-event.test.ts.snap
@@ -8,35 +8,35 @@ exports[`processAiEvent() smoke test every model processes claude-2 1`] = `
 }
 `;

-exports[`processAiEvent() smoke test every model processes gemini-2.0-flash 1`] = `
+exports[`processAiEvent() smoke test every model processes gemini-2.0-flash-001 1`] = `
 {
-  "$ai_input_cost_usd": 0.000015,
-  "$ai_output_cost_usd": 0.00000375,
-  "$ai_total_cost_usd": 0.00001875,
+  "$ai_input_cost_usd": 0.00001,
+  "$ai_output_cost_usd": 0.00002,
+  "$ai_total_cost_usd": 0.00003,
 }
 `;

 exports[`processAiEvent() smoke test every model processes gemini-2.5-flash 1`] = `
 {
-  "$ai_input_cost_usd": 15,
-  "$ai_output_cost_usd": 30,
-  "$ai_total_cost_usd": 45,
+  "$ai_input_cost_usd": 0.00003,
+  "$ai_output_cost_usd": 0.000125,
+  "$ai_total_cost_usd": 0.000155,
 }
 `;

 exports[`processAiEvent() smoke test every model processes gemini-2.5-pro-preview 1`] = `
 {
-  "$ai_input_cost_usd": 70,
-  "$ai_output_cost_usd": 35,
-  "$ai_total_cost_usd": 105,
+  "$ai_input_cost_usd": 0.000125,
+  "$ai_output_cost_usd": 0.0005,
+  "$ai_total_cost_usd": 0.000625,
 }
 `;

 exports[`processAiEvent() smoke test every model processes gemini-2.5-pro-preview:large 1`] = `
 {
-  "$ai_input_cost_usd": 70,
-  "$ai_output_cost_usd": 35,
-  "$ai_total_cost_usd": 105,
+  "$ai_input_cost_usd": 0.000125,
+  "$ai_output_cost_usd": 0.0005,
+  "$ai_total_cost_usd": 0.000625,
 }
 `;

--- a/plugin-server/src/ingestion/ai-costs/process-ai-event.test.ts
+++ b/plugin-server/src/ingestion/ai-costs/process-ai-event.test.ts
@@ -19,19 +19,19 @@ jest.mock('./providers', () => {
            'claude-2': { model: 'claude-2', cost: { prompt_token: 0.6, completion_token: 0.6 } },
            'gemini-2.5-pro-preview': {
                model: 'gemini-2.5-pro-preview',
-                cost: { prompt_token: 0.7, completion_token: 0.7 },
+                cost: { prompt_token: 0.00000125, completion_token: 0.00001, cache_read_token: 3.1e-7 },
            },
            'gemini-2.5-pro-preview:large': {
                model: 'gemini-2.5-pro-preview:large',
-                cost: { prompt_token: 0.8, completion_token: 0.8 },
+                cost: { prompt_token: 0.0000025, completion_token: 0.000015, cache_read_token: 0.000000625 },
            },
            'gemini-2.5-flash': {
                model: 'gemini-2.5-flash',
-                cost: { prompt_token: 0.15, completion_token: 0.6 },
+                cost: { prompt_token: 3e-7, completion_token: 0.0000025, cache_read_token: 7.5e-8 },
            },
-            'gemini-2.0-flash': {
-                model: 'gemini-2.0-flash',
-                cost: { prompt_token: 0.00000015, completion_token: 0.000000075 },
+            'gemini-2.0-flash-001': {
+                model: 'gemini-2.0-flash-001',
+                cost: { prompt_token: 1e-7, completion_token: 4e-7, cache_read_token: 2.5e-8 },
            },
            'o1-mini': {
                model: 'o1-mini',
@@ -411,13 +411,13 @@ describe('processAiEvent()', () => {

            const result = processAiEvent(event)

-            // For gemini-2.5-flash: prompt_token = 0.15, completion_token = 0.6
-            // Input cost: 100 * 0.15 = 15
-            // Output cost: (50 + 200) * 0.6 = 250 * 0.6 = 150
-            // Total cost: 15 + 150 = 165
-            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(15, 2)
-            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(150, 2)
-            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(165, 2)
+            // For gemini-2.5-flash: prompt_token = 3e-7, completion_token = 0.0000025
+            // Input cost: 100 * 3e-7 = 0.00003
+            // Output cost: (50 + 200) * 0.0000025 = 250 * 0.0000025 = 0.000625
+            // Total cost: 0.00003 + 0.000625 = 0.000655
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00003, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.000625, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000655, 6)
        })

        it('handles undefined reasoning tokens for gemini-2.5-*', () => {
@@ -429,13 +429,13 @@ describe('processAiEvent()', () => {

            const result = processAiEvent(event)

-            // For gemini-2.5-flash: prompt_token = 0.15, completion_token = 0.6
-            // Input cost: 100 * 0.15 = 15
-            // Output cost: (50 + 0) * 0.6 = 50 * 0.6 = 30 (undefined reasoning tokens treated as 0)
-            // Total cost: 15 + 30 = 45
-            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(15, 2)
-            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(30, 2)
-            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(45, 2)
+            // For gemini-2.5-flash: prompt_token = 3e-7, completion_token = 0.0000025
+            // Input cost: 100 * 3e-7 = 0.00003
+            // Output cost: (50 + 0) * 0.0000025 = 50 * 0.0000025 = 0.000125 (undefined reasoning tokens treated as 0)
+            // Total cost: 0.00003 + 0.000125 = 0.000155
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00003, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.000125, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000155, 6)
        })

        it('does not include reasoning tokens for gemini-2.0-*', () => {
@@ -447,13 +447,13 @@ describe('processAiEvent()', () => {

            const result = processAiEvent(event)

-            // For gemini-2.0-flash: prompt_token = 0.00000015, completion_token = 0.000000075
-            // Input cost: 100 * 0.00000015 = 0.000015
-            // Output cost: 50 * 0.000000075 = 0.00000375 (reasoning tokens ignored)
-            // Total cost: 0.000015 + 0.00000375 = 0.00001875
-            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000015, 8)
-            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.00000375, 8)
-            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00001875, 8)
+            // Model will match gemini-2.0-flash-001: prompt_token = 1e-7, completion_token = 4e-7
+            // Input cost: 100 * 1e-7 = 0.00001
+            // Output cost: 50 * 4e-7 = 0.00002 (reasoning tokens ignored)
+            // Total cost: 0.00001 + 0.00002 = 0.00003
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00001, 7)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.00002, 7)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00003, 7)
        })

        it('does not include reasoning tokens for non gemini models', () => {
@@ -474,6 +474,121 @@ describe('processAiEvent()', () => {
            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00033, 5)
        })
    })
+
+    describe('gemini cache handling', () => {
+        it('handles cache read tokens with correct cost calculation for gemini-2.5-pro-preview', () => {
+            event.properties!.$ai_provider = 'gemini'
+            event.properties!.$ai_model = 'gemini-2.5-pro-preview'
+            event.properties!.$ai_input_tokens = 1000
+            event.properties!.$ai_cache_read_input_tokens = 400
+            event.properties!.$ai_output_tokens = 50
+
+            const result = processAiEvent(event)
+
+            // Regular tokens: 1000 - 400 = 600
+            // Input cost: (600 * 0.00000125) + (400 * 3.1e-7) = 0.00075 + 0.000124 = 0.000874
+            // Output cost: 50 * 0.00001 = 0.0005
+            // Total cost: 0.000874 + 0.0005 = 0.001374
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000874, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0005, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.001374, 6)
+        })
+
+        it('handles cache read tokens for gemini-2.5-pro-preview:large', () => {
+            event.properties!.$ai_provider = 'gemini'
+            event.properties!.$ai_model = 'gemini-2.5-pro-preview'
+            event.properties!.$ai_input_tokens = 250000 // > 200k triggers large model
+            event.properties!.$ai_cache_read_input_tokens = 100000
+            event.properties!.$ai_output_tokens = 500
+
+            const result = processAiEvent(event)
+
+            // Model should be switched to gemini-2.5-pro-preview:large
+            expect(result.properties!.$ai_model_cost_used).toBe('gemini-2.5-pro-preview:large')
+
+            // Regular tokens: 250000 - 100000 = 150000
+            // Input cost: (150000 * 0.0000025) + (100000 * 0.000000625) = 0.375 + 0.0625 = 0.4375
+            // Output cost: 500 * 0.000015 = 0.0075
+            // Total cost: 0.4375 + 0.0075 = 0.445
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.4375, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0075, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.445, 6)
+        })
+
+        it('handles cache read tokens for gemini-2.0-flash', () => {
+            event.properties!.$ai_provider = 'gemini'
+            event.properties!.$ai_model = 'gemini-2.0-flash'
+            event.properties!.$ai_input_tokens = 1000
+            event.properties!.$ai_cache_read_input_tokens = 400
+            event.properties!.$ai_output_tokens = 50
+
+            const result = processAiEvent(event)
+
+            // Model will match gemini-2.0-flash-001 from generated-providers.json
+            // Regular tokens: 1000 - 400 = 600
+            // Input cost: (600 * 1e-7) + (400 * 2.5e-8) = 0.00006 + 0.00001 = 0.00007
+            // Output cost: 50 * 4e-7 = 0.00002
+            // Total cost: 0.00007 + 0.00002 = 0.00009
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00007, 7)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.00002, 7)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.00009, 7)
+        })
+
+        it('handles zero cache tokens correctly for gemini', () => {
+            event.properties!.$ai_provider = 'gemini'
+            event.properties!.$ai_model = 'gemini-2.5-pro-preview'
+            event.properties!.$ai_input_tokens = 100
+            event.properties!.$ai_cache_read_input_tokens = 0
+            event.properties!.$ai_output_tokens = 50
+
+            const result = processAiEvent(event)
+
+            // Input cost: 100 * 0.00000125 = 0.000125
+            // Output cost: 50 * 0.00001 = 0.0005
+            // Total cost: 0.000125 + 0.0005 = 0.000625
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000125, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0005, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000625, 6)
+        })
+
+        it('handles combined cache and reasoning tokens for gemini-2.5-pro-preview', () => {
+            event.properties!.$ai_provider = 'gemini'
+            event.properties!.$ai_model = 'gemini-2.5-pro-preview'
+            event.properties!.$ai_input_tokens = 1000
+            event.properties!.$ai_cache_read_input_tokens = 400
+            event.properties!.$ai_output_tokens = 50
+            event.properties!.$ai_reasoning_tokens = 200
+
+            const result = processAiEvent(event)
+
+            // Regular tokens: 1000 - 400 = 600
+            // Input cost: (600 * 0.00000125) + (400 * 3.1e-7) = 0.00075 + 0.000124 = 0.000874
+            // Output cost: (50 + 200) * 0.00001 = 250 * 0.00001 = 0.0025
+            // Total cost: 0.000874 + 0.0025 = 0.003374
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.000874, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.0025, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.003374, 6)
+        })
+
+        it('handles gemini-2.5-flash with cache from generated providers', () => {
+            event.properties!.$ai_provider = 'gemini'
+            event.properties!.$ai_model = 'gemini-2.5-flash'
+            event.properties!.$ai_input_tokens = 1000
+            event.properties!.$ai_cache_read_input_tokens = 400
+            event.properties!.$ai_output_tokens = 50
+            event.properties!.$ai_reasoning_tokens = 100
+
+            const result = processAiEvent(event)
+
+            // Regular tokens: 1000 - 400 = 600
+            // Input cost: (600 * 3e-7) + (400 * 7.5e-8) = 0.00018 + 0.00003 = 0.00021
+            // Output cost: (50 + 100) * 0.0000025 = 150 * 0.0000025 = 0.000375
+            // Total cost: 0.00021 + 0.000375 = 0.000585
+            expect(result.properties!.$ai_input_cost_usd).toBeCloseTo(0.00021, 6)
+            expect(result.properties!.$ai_output_cost_usd).toBeCloseTo(0.000375, 6)
+            expect(result.properties!.$ai_total_cost_usd).toBeCloseTo(0.000585, 6)
+        })
+    })
 })

 describe('normalizeTraceProperties()', () => {
--- a/plugin-server/src/ingestion/ai-costs/process-ai-event.ts
+++ b/plugin-server/src/ingestion/ai-costs/process-ai-event.ts
@@ -103,6 +103,19 @@ const calculateInputCost = (event: PluginEvent, cost: ModelRow) => {
        const totalCacheCost = bigDecimal.add(writeCost, cacheReadCost)
        const uncachedCost = bigDecimal.multiply(cost.cost.prompt_token, inputTokens)
        return bigDecimal.add(totalCacheCost, uncachedCost)
+    } else if (event.properties['$ai_provider'] && event.properties['$ai_provider'].toLowerCase() === 'gemini') {
+        const cacheReadTokens = event.properties['$ai_cache_read_input_tokens'] || 0
+        const inputTokens = event.properties['$ai_input_tokens'] || 0
+        const regularTokens = bigDecimal.subtract(inputTokens, cacheReadTokens)
+
+        // Use actual cache read cost if available, otherwise fall back to 0.25 multiplier
+        const cacheReadCost =
+            cost.cost.cache_read_token !== undefined
+                ? bigDecimal.multiply(cost.cost.cache_read_token, cacheReadTokens)
+                : bigDecimal.multiply(bigDecimal.multiply(cost.cost.prompt_token, 0.25), cacheReadTokens)
+
+        const regularCost = bigDecimal.multiply(cost.cost.prompt_token, regularTokens)
+        return bigDecimal.add(cacheReadCost, regularCost)
    }
    return bigDecimal.multiply(cost.cost.prompt_token, event.properties['$ai_input_tokens'] || 0)
 }
--- a/plugin-server/src/ingestion/ai-costs/providers/generated-providers.json
+++ b/plugin-server/src/ingestion/ai-costs/providers/generated-providers.json
@@ -183,6 +183,20 @@
            "cache_read_token": 3.75e-7
        }
    },
+    {
+        "model": "cogito-v2-preview-deepseek-671b",
+        "cost": {
+            "prompt_token": 0.00000125,
+            "completion_token": 0.00000125
+        }
+    },
+    {
+        "model": "cogito-v2-preview-llama-109b-moe",
+        "cost": {
+            "prompt_token": 1.8e-7,
+            "completion_token": 5.9e-7
+        }
+    },
    {
        "model": "command",
        "cost": {
@@ -302,6 +316,13 @@
            "completion_token": 8e-7
        }
    },
+    {
+        "model": "deepseek-chat-v3.1:free",
+        "cost": {
+            "prompt_token": 0,
+            "completion_token": 0
+        }
+    },
    {
        "model": "deepseek-prover-v2",
        "cost": {
@@ -326,8 +347,8 @@
    {
        "model": "deepseek-r1-0528-qwen3-8b",
        "cost": {
-            "prompt_token": 1e-8,
-            "completion_token": 2e-8
+            "prompt_token": 1.703012e-8,
+            "completion_token": 6.81536e-8
        }
    },
    {
@@ -365,13 +386,6 @@
            "completion_token": 4e-8
        }
    },
-    {
-        "model": "deepseek-r1-distill-qwen-1.5b",
-        "cost": {
-            "prompt_token": 1.8e-7,
-            "completion_token": 1.8e-7
-        }
-    },
    {
        "model": "deepseek-r1-distill-qwen-14b",
        "cost": {
@@ -421,13 +435,6 @@
            "completion_token": 0
        }
    },
-    {
-        "model": "deepseek-v3-base",
-        "cost": {
-            "prompt_token": 1.999188e-7,
-            "completion_token": 8.00064e-7
-        }
-    },
    {
        "model": "deepseek-v3.1-base",
        "cost": {
@@ -565,6 +572,20 @@
            "cache_write_token": 3.833e-7
        }
    },
+    {
+        "model": "gemini-2.5-flash-image-preview",
+        "cost": {
+            "prompt_token": 3e-7,
+            "completion_token": 0.0000025
+        }
+    },
+    {
+        "model": "gemini-2.5-flash-image-preview:free",
+        "cost": {
+            "prompt_token": 0,
+            "completion_token": 0
+        }
+    },
    {
        "model": "gemini-2.5-flash-lite",
        "cost": {
@@ -750,16 +771,15 @@
    {
        "model": "glm-4.5",
        "cost": {
-            "prompt_token": 1.999188e-7,
-            "completion_token": 8.00064e-7
+            "prompt_token": 3.2986602e-7,
+            "completion_token": 0.0000013201056
        }
    },
    {
        "model": "glm-4.5-air",
        "cost": {
-            "prompt_token": 2e-7,
-            "completion_token": 0.0000011,
-            "cache_read_token": 3e-8
+            "prompt_token": 1.4e-7,
+            "completion_token": 8.6e-7
        }
    },
    {
@@ -991,6 +1011,13 @@
            "completion_token": 2.8e-7
        }
    },
+    {
+        "model": "gpt-oss-120b:free",
+        "cost": {
+            "prompt_token": 0,
+            "completion_token": 0
+        }
+    },
    {
        "model": "gpt-oss-20b",
        "cost": {
@@ -1059,6 +1086,14 @@
            "cache_read_token": 7.5e-7
        }
    },
+    {
+        "model": "grok-code-fast-1",
+        "cost": {
+            "prompt_token": 2e-7,
+            "completion_token": 0.0000015,
+            "cache_read_token": 2e-8
+        }
+    },
    {
        "model": "grok-vision-beta",
        "cost": {
@@ -1087,6 +1122,20 @@
            "completion_token": 2.8e-7
        }
    },
+    {
+        "model": "hermes-4-405b",
+        "cost": {
+            "prompt_token": 1.999188e-7,
+            "completion_token": 8.00064e-7
+        }
+    },
+    {
+        "model": "hermes-4-70b",
+        "cost": {
+            "prompt_token": 9.329544e-8,
+            "completion_token": 3.733632e-7
+        }
+    },
    {
        "model": "hunyuan-a13b-instruct",
        "cost": {
@@ -1115,13 +1164,6 @@
            "completion_token": 0.00001
        }
    },
-    {
-        "model": "internvl3-14b",
-        "cost": {
-            "prompt_token": 2e-7,
-            "completion_token": 4e-7
-        }
-    },
    {
        "model": "jamba-large-1.7",
        "cost": {
@@ -1136,6 +1178,13 @@
            "completion_token": 4e-7
        }
    },
+    {
+        "model": "kimi-dev-72b",
+        "cost": {
+            "prompt_token": 2.9e-7,
+            "completion_token": 0.00000115
+        }
+    },
    {
        "model": "kimi-dev-72b:free",
        "cost": {
@@ -1304,13 +1353,6 @@
            "completion_token": 4.9e-8
        }
    },
-    {
-        "model": "llama-3.2-11b-vision-instruct:free",
-        "cost": {
-            "prompt_token": 0,
-            "completion_token": 0
-        }
-    },
    {
        "model": "llama-3.2-1b-instruct",
        "cost": {
@@ -1321,8 +1363,8 @@
    {
        "model": "llama-3.2-3b-instruct",
        "cost": {
-            "prompt_token": 3e-9,
-            "completion_token": 6e-9
+            "prompt_token": 1.2e-8,
+            "completion_token": 2.4e-8
        }
    },
    {
@@ -1335,8 +1377,8 @@
    {
        "model": "llama-3.2-90b-vision-instruct",
        "cost": {
-            "prompt_token": 0.0000012,
-            "completion_token": 0.0000012
+            "prompt_token": 3.5e-7,
+            "completion_token": 4e-7
        }
    },
    {
@@ -1416,13 +1458,6 @@
            "completion_token": 1.8e-7
        }
    },
-    {
-        "model": "llama3.1-typhoon2-70b-instruct",
-        "cost": {
-            "prompt_token": 8.8e-7,
-            "completion_token": 8.8e-7
-        }
-    },
    {
        "model": "llemma_7b",
        "cost": {
@@ -1601,8 +1636,8 @@
    {
        "model": "mistral-nemo",
        "cost": {
-            "prompt_token": 7.5e-9,
-            "completion_token": 5e-8
+            "prompt_token": 1e-8,
+            "completion_token": 4.00032e-8
        }
    },
    {
@@ -1731,13 +1766,6 @@
            "completion_token": 0.00000175
        }
    },
-    {
-        "model": "nous-hermes-2-mixtral-8x7b-dpo",
-        "cost": {
-            "prompt_token": 6e-7,
-            "completion_token": 6e-7
-        }
-    },
    {
        "model": "nova-lite-v1",
        "cost": {
@@ -1893,13 +1921,6 @@
            "completion_token": 0.000006
        }
    },
-    {
-        "model": "qwen-2-72b-instruct",
-        "cost": {
-            "prompt_token": 9e-7,
-            "completion_token": 9e-7
-        }
-    },
    {
        "model": "qwen-2.5-72b-instruct",
        "cost": {
@@ -2060,8 +2081,15 @@
    {
        "model": "qwen3-30b-a3b-instruct-2507",
        "cost": {
-            "prompt_token": 1e-7,
-            "completion_token": 3e-7
+            "prompt_token": 5.18308e-8,
+            "completion_token": 2.07424e-7
+        }
+    },
+    {
+        "model": "qwen3-30b-a3b-thinking-2507",
+        "cost": {
+            "prompt_token": 7.13e-8,
+            "completion_token": 2.852e-7
        }
    },
    {
@@ -2107,14 +2135,14 @@
        }
    },
    {
-        "model": "qwen3-coder:free",
+        "model": "qwen3-coder-30b-a3b-instruct",
        "cost": {
-            "prompt_token": 0,
-            "completion_token": 0
+            "prompt_token": 5.18308e-8,
+            "completion_token": 2.07424e-7
        }
    },
    {
-        "model": "qwerky-72b:free",
+        "model": "qwen3-coder:free",
        "cost": {
            "prompt_token": 0,
            "completion_token": 0
@@ -2190,13 +2218,6 @@
            "completion_token": 0.0000034
        }
    },
-    {
-        "model": "sarvam-m:free",
-        "cost": {
-            "prompt_token": 0,
-            "completion_token": 0
-        }
-    },
    {
        "model": "shisa-v2-llama3.3-70b",
        "cost": {
--- a/plugin-server/src/ingestion/ai-costs/providers/manual-providers.ts
+++ b/plugin-server/src/ingestion/ai-costs/providers/manual-providers.ts
@@ -81,25 +81,12 @@ export const manualCosts: ModelRow[] = [
        },
    },
    // google gen ai
-    {
-        model: 'gemini-2.0-flash',
-        cost: {
-            prompt_token: 0.00000015,
-            completion_token: 0.000000075,
-        },
-    },
-    {
-        model: 'gemini-2.5-pro-preview',
-        cost: {
-            prompt_token: 0.00000125,
-            completion_token: 0.00001,
-        },
-    },
    {
        model: 'gemini-2.5-pro-preview:large',
        cost: {
            prompt_token: 0.0000025,
            completion_token: 0.000015,
+            cache_read_token: 0.000000625,
        },
    },
    // Other