Use ungated version of mistral tokenizer (#718)

2026-07-01 14:10:59 -04:00 · 2024-04-20 16:01:17 +02:00
parent 992f643e2a
commit 1a9964fb09
3 changed files with 3 additions and 3 deletions
@@ -2986,7 +2986,7 @@ export class PreTrainedTokenizer extends Callable {
     * ```javascript
     * import { AutoTokenizer } from "@xenova/transformers";
     * 
-     * const tokenizer = await AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1");
+     * const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
     * 
     * const chat = [
     *   { "role": "user", "content": "Hello, how are you?" },
@@ -229,7 +229,7 @@ TOKENIZERS_WITH_CHAT_TEMPLATES = {
        'basic',
    ],

-    'mistralai/Mistral-7B-Instruct-v0.1': [
+    'Xenova/mistral-tokenizer-v1': [
        'basic',
    ],

@@ -334,7 +334,7 @@ describe('Extra decoding tests', () => {

 describe('Chat templates', () => {
    it('should generate a chat template', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1");
+        const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");

        const chat = [
            { "role": "user", "content": "Hello, how are you?" },