Support reading data from blob URI (#645)

* Make blob as valid URL

* Create function to detect the blob URI

* Change to `isValidUrl`

* Remove comment

Co-authored-by: Joshua Lochner <admin@xenova.com>

* Merge `isValidHttpUrl` into `isValidUrl`

* Correct implement

* Update docs

* Add test

* Remove export for `isValidUrl`

* Test read blob via `getFile`

* Use `res.text()` instead `res.body`

---------

Co-authored-by: Joshua Lochner <admin@xenova.com>
This commit is contained in:
Hans
2024-05-08 19:14:50 +08:00
committed by GitHub
parent 8bb8c5a33c
commit 880cd3ea65
2 changed files with 24 additions and 9 deletions
+12 -9
View File
@@ -151,23 +151,26 @@ class FileResponse {
}
/**
* Determines whether the given string is a valid HTTP or HTTPS URL.
* @param {string|URL} string The string to test for validity as an HTTP or HTTPS URL.
* Determines whether the given string is a valid URL.
* @param {string|URL} string The string to test for validity as an URL.
* @param {string[]} [protocols=null] A list of valid protocols. If specified, the protocol must be in this list.
* @param {string[]} [validHosts=null] A list of valid hostnames. If specified, the URL's hostname must be in this list.
* @returns {boolean} True if the string is a valid HTTP or HTTPS URL, false otherwise.
* @returns {boolean} True if the string is a valid URL, false otherwise.
*/
function isValidHttpUrl(string, validHosts = null) {
// https://stackoverflow.com/a/43467144
function isValidUrl(string, protocols = null, validHosts = null) {
let url;
try {
url = new URL(string);
} catch (_) {
return false;
}
if (protocols && !protocols.includes(url.protocol)) {
return false;
}
if (validHosts && !validHosts.includes(url.hostname)) {
return false;
}
return url.protocol === "http:" || url.protocol === "https:";
return true;
}
/**
@@ -178,7 +181,7 @@ function isValidHttpUrl(string, validHosts = null) {
*/
export async function getFile(urlOrPath) {
if (env.useFS && !isValidHttpUrl(urlOrPath)) {
if (env.useFS && !isValidUrl(urlOrPath, ['http:', 'https:', 'blob:'])) {
return new FileResponse(urlOrPath);
} else if (typeof process !== 'undefined' && process?.release?.name === 'node') {
@@ -189,7 +192,7 @@ export async function getFile(urlOrPath) {
headers.set('User-Agent', `transformers.js/${version}; is_ci/${IS_CI};`);
// Check whether we are making a request to the Hugging Face Hub.
const isHFURL = isValidHttpUrl(urlOrPath, ['huggingface.co', 'hf.co']);
const isHFURL = isValidUrl(urlOrPath, ['http:', 'https:'], ['huggingface.co', 'hf.co']);
if (isHFURL) {
// If an access token is present in the environment variables,
// we add it to the request headers.
@@ -433,7 +436,7 @@ export async function getModelFile(path_or_repo_id, filename, fatal = true, opti
if (env.allowLocalModels) {
// Accessing local models is enabled, so we try to get the file locally.
// If request is a valid HTTP URL, we skip the local file check. Otherwise, we try to get the file locally.
const isURL = isValidHttpUrl(requestURL);
const isURL = isValidUrl(requestURL, ['http:', 'https:']);
if (!isURL) {
try {
response = await getFile(localPath);
+12
View File
@@ -1,6 +1,7 @@
import { AutoProcessor } from '../src/transformers.js';
import { mel_filter_bank } from '../src/utils/audio.js';
import { getFile } from '../src/utils/hub.js';
import { MAX_TEST_EXECUTION_TIME } from './init.js';
@@ -42,4 +43,15 @@ describe('Utilities', () => {
}, MAX_TEST_EXECUTION_TIME);
});
describe('Hub utilities', () => {
it('Read data from blob', async () => {
const blob = new Blob(['Hello, world!'], { type: 'text/plain' });
const blobUrl = URL.createObjectURL(blob);
const data = await getFile(blobUrl);
expect(await data.text()).toBe('Hello, world!');
});
});
});