mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 21:01:08 +00:00
Bug 1746631 - Implement integer gemm intrinsic functions. r=rhunt
- Implements 7 intrinsic functions - These intrinsics are only enabled for x86/x86-64 platform and for privileged extensions - These intrinsics should never be accessible to web-pages -- Added corresponding mochitest Differential Revision: https://phabricator.services.mozilla.com/D136430
This commit is contained in:
parent
1342f2782a
commit
dfde362b9e
3
dom/tests/mochitest/integer-gemm/mochitest.ini
Normal file
3
dom/tests/mochitest/integer-gemm/mochitest.ini
Normal file
@ -0,0 +1,3 @@
|
||||
[DEFAULT]
|
||||
|
||||
[test_unavailable_for_webpage.html]
|
@ -0,0 +1,29 @@
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1746631
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Mozilla integer gemm (1746631) -- Mozilla integer gemm shouldn't be available for web pages</title>
|
||||
<script src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1746631">Feature Test 1746631</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
const gemm = "mozIntGemm";
|
||||
is(gemm in WebAssembly, false, `"WebAssembly.${gemm}" shouldn't be defined for web pages`);
|
||||
|
||||
SimpleTest.finish();
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
@ -161,6 +161,7 @@ MOCHITEST_MANIFESTS += [
|
||||
"mochitest/gamepad/mochitest.ini",
|
||||
"mochitest/general/mochitest.ini",
|
||||
"mochitest/geolocation/mochitest.ini",
|
||||
"mochitest/integer-gemm/mochitest.ini",
|
||||
"mochitest/keyhandling/mochitest.ini",
|
||||
"mochitest/localstorage/mochitest.ini",
|
||||
"mochitest/orientation/mochitest.ini",
|
||||
|
@ -903,12 +903,10 @@ option(
|
||||
)
|
||||
|
||||
|
||||
@depends("--enable-wasm-moz-intgemm")
|
||||
def wasm_moz_intgemm(value):
|
||||
if not value:
|
||||
return
|
||||
|
||||
return True
|
||||
@depends("--enable-wasm-moz-intgemm", target)
|
||||
def wasm_moz_intgemm(value, target):
|
||||
if value and target.cpu in ("x86", "x86_64"):
|
||||
return True
|
||||
|
||||
|
||||
set_config("ENABLE_WASM_MOZ_INTGEMM", wasm_moz_intgemm)
|
||||
|
405
js/src/intgemm/IntegerGemmIntrinsic.cpp
Normal file
405
js/src/intgemm/IntegerGemmIntrinsic.cpp
Normal file
@ -0,0 +1,405 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: set ts=8 sts=2 et sw=2 tw=80:
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#include "intgemm/IntegerGemmIntrinsic.h"
|
||||
#include "mozilla/CheckedInt.h"
|
||||
#include <intgemm.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "js/HeapAPI.h"
|
||||
#include "vm/JSContext.h"
|
||||
#include "wasm/WasmInstance.h"
|
||||
#include "wasm/WasmLog.h"
|
||||
#include "vm/ArrayBufferObject-inl.h"
|
||||
|
||||
static constexpr uint32_t ARRAY_ALIGNMENT = 64;
|
||||
static constexpr uint32_t ROWS_A_MULTIPLIER = 1;
|
||||
static constexpr uint32_t COLUMNS_A_MULTIPLIER = 64;
|
||||
static constexpr uint32_t ROWS_B_MULTIPLIER = COLUMNS_A_MULTIPLIER;
|
||||
static constexpr uint32_t COLUMNS_B_MULTIPLIER = 8;
|
||||
static constexpr uint32_t SELECTED_COLUMNS_B_MULTIPLIER = 8;
|
||||
|
||||
void ReportGemmError(JSContext* cx, const unsigned errorNumber) {
|
||||
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, errorNumber);
|
||||
}
|
||||
|
||||
size_t GetWasmRawBufferLength(const uint8_t* memBase) {
|
||||
const js::WasmArrayRawBuffer* rawBuf =
|
||||
js::WasmArrayRawBuffer::fromDataPtr(memBase);
|
||||
return rawBuf->byteLength();
|
||||
}
|
||||
|
||||
bool CheckMatrixDimension(JSContext* cx, uint32_t size,
|
||||
uint32_t sizeMultiplier) {
|
||||
// A valid size is a positive integral multiple of Multiplier
|
||||
if ((size == 0) || (size % sizeMultiplier != 0)) {
|
||||
js::wasm::Log(
|
||||
cx, "Invalid dimension value:%" PRIu32 " (should be a multiple of %u)",
|
||||
size, sizeMultiplier);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CheckMatrixBound(JSContext* cx, uint32_t input, uint64_t inputSize,
|
||||
size_t wasmBufferSize) {
|
||||
mozilla::CheckedUint64 inputUpperLimit(inputSize);
|
||||
inputUpperLimit += input;
|
||||
|
||||
// Bound check fails if size overflows or it spans outside the wasm memory
|
||||
if (!inputUpperLimit.isValid() ||
|
||||
(inputUpperLimit.value() >= (uint64_t)wasmBufferSize)) {
|
||||
js::wasm::Log(cx, "Memory out of wasm bounds for matrix:%" PRIu32, input);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CheckMatrixBoundAndAlignment(JSContext* cx, uint32_t input,
|
||||
uint64_t inputSize, size_t wasmBufferSize) {
|
||||
// Alignment check: It is sufficient to check alignment for the offset rather
|
||||
// than for the actual pointer within wasm memory (as long as following assert
|
||||
// is satisfied)
|
||||
static_assert(js::gc::PageSize >= ARRAY_ALIGNMENT,
|
||||
"PageSize should be bigger than Alignment");
|
||||
if (input % ARRAY_ALIGNMENT != 0) {
|
||||
js::wasm::Log(
|
||||
cx, "Unaligned access for matrix:%" PRIu32 " (should be %u aligned)",
|
||||
input, ARRAY_ALIGNMENT);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check Bound
|
||||
return CheckMatrixBound(cx, input, inputSize, wasmBufferSize);
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8PrepareB(wasm::Instance* instance,
|
||||
uint32_t inputMatrixB, float scale,
|
||||
float zeroPoint, uint32_t rowsB,
|
||||
uint32_t colsB, uint32_t outputMatrixB,
|
||||
uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8PrepareB.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
|
||||
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
|
||||
colsB);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound and Alignment checks for matricies
|
||||
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixB, sizeB, wasmBufferSize) ||
|
||||
!CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: inputB:%x rowsB:%" PRIu32 " colsB:%" PRIu32
|
||||
" outputB:%x sizeB:%" PRIu64 " wasmBufferSize:%zu",
|
||||
__FUNCTION__, inputMatrixB, rowsB, colsB, outputMatrixB, sizeB,
|
||||
wasmBufferSize);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm) for PrepareB
|
||||
uint8_t* inputMatrixBPtr = &memBase[inputMatrixB];
|
||||
uint8_t* outputMatrixBPtr = &memBase[outputMatrixB];
|
||||
::intgemm::Int8::PrepareB((const float*)inputMatrixBPtr,
|
||||
(int8_t*)outputMatrixBPtr,
|
||||
(float)scale, // Quant Mult
|
||||
rowsB, colsB);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8PrepareBFromTransposed(
|
||||
wasm::Instance* instance, uint32_t inputMatrixBTransposed, float scale,
|
||||
float zeroPoint, uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB,
|
||||
uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8PrepareBFromTransposed.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
|
||||
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
|
||||
colsB);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound checks for all matricies
|
||||
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBTransposed, sizeB,
|
||||
wasmBufferSize) ||
|
||||
!CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: inputBT:%x rowsB:%" PRIu32 " colsB:%" PRIu32
|
||||
" outputB:%x sizeB:%" PRIu64 " wasmBufferSize:%zu",
|
||||
__FUNCTION__, inputMatrixBTransposed, rowsB, colsB, outputMatrixB,
|
||||
sizeB, wasmBufferSize);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm) for PrepareBTransposed
|
||||
uint8_t* inputMatrixBTransposedPtr = &memBase[inputMatrixBTransposed];
|
||||
uint8_t* outputMatrixBPtr = &memBase[outputMatrixB];
|
||||
::intgemm::Int8::PrepareBTransposed((const float*)inputMatrixBTransposedPtr,
|
||||
(int8_t*)outputMatrixBPtr,
|
||||
(float)scale, // Quant Mult
|
||||
rowsB, colsB);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8PrepareBFromQuantizedTransposed(
|
||||
wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed,
|
||||
uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8PrepareBFromQuantizedTransposed.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
|
||||
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
|
||||
colsB);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound checks for all matricies
|
||||
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBQuantizedTransposed, sizeB,
|
||||
wasmBufferSize) ||
|
||||
!CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: inputBQT:%x rowsB:%" PRIu32 " colsB:%" PRIu32
|
||||
" outputB:%x sizeA:%" PRIu64 " wasmBufferSize:%zu",
|
||||
__FUNCTION__, inputMatrixBQuantizedTransposed, rowsB, colsB,
|
||||
outputMatrixB, sizeB, wasmBufferSize);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm)
|
||||
uint8_t* inputMatrixBQuantizedTransposedPtr =
|
||||
&memBase[inputMatrixBQuantizedTransposed];
|
||||
uint8_t* outputMatrixBPtr = &memBase[outputMatrixB];
|
||||
::intgemm::Int8::PrepareBQuantizedTransposed(
|
||||
(const int8_t*)inputMatrixBQuantizedTransposedPtr,
|
||||
(int8_t*)outputMatrixBPtr, rowsB, colsB);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8PrepareA(wasm::Instance* instance,
|
||||
uint32_t inputMatrixA, float scale,
|
||||
float zeroPoint, uint32_t rowsA,
|
||||
uint32_t colsA, uint32_t outputMatrixA,
|
||||
uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8PrepareA.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsA, COLUMNS_A_MULTIPLIER)) {
|
||||
wasm::Log(cx, "%s: rowsA:%" PRIu32 " colsA:%" PRIu32, __FUNCTION__, rowsA,
|
||||
colsA);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound checks for all matricies
|
||||
uint64_t sizeA = (uint64_t)rowsA * (uint64_t)colsA;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixA, sizeA, wasmBufferSize) ||
|
||||
!CheckMatrixBoundAndAlignment(cx, outputMatrixA, sizeA, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: inputA:%x rowsA:%" PRIu32 " colsA:%" PRIu32
|
||||
" outputA:%x sizeA:%" PRIu64 " wasmBufferSize:%zu",
|
||||
__FUNCTION__, inputMatrixA, rowsA, colsA, outputMatrixA, sizeA,
|
||||
wasmBufferSize);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm)
|
||||
uint8_t* inputMatrixAPtr = &memBase[inputMatrixA];
|
||||
uint8_t* outputMatrixAPtr = &memBase[outputMatrixA];
|
||||
::intgemm::Int8Shift::PrepareA((const float*)inputMatrixAPtr,
|
||||
(int8_t*)outputMatrixAPtr, scale, rowsA,
|
||||
colsA);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8PrepareBias(
|
||||
wasm::Instance* instance, uint32_t inputMatrixBPrepared, float scaleA,
|
||||
float zeroPointA, float scaleB, float zeroPointB, uint32_t rowsB,
|
||||
uint32_t colsB, uint32_t inputBias, uint32_t output, uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8PrepareBias.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
|
||||
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
|
||||
colsB);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound checks for all matricies
|
||||
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
|
||||
uint64_t sizeBias = colsB;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB,
|
||||
wasmBufferSize) ||
|
||||
!CheckMatrixBound(cx, inputBias, sizeBias, wasmBufferSize) ||
|
||||
!CheckMatrixBound(cx, output, sizeBias, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: preparedB:%x rowsB:%" PRIu32 " colsB:%" PRIu32
|
||||
" inputBias:%x outputBias:%x sizeB:%" PRIu64
|
||||
" wasmBufferSize:%zu",
|
||||
__FUNCTION__, inputMatrixBPrepared, rowsB, colsB, inputBias,
|
||||
output, sizeB, wasmBufferSize);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm)
|
||||
uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared];
|
||||
uint8_t* inputBiasPtr = &memBase[inputBias];
|
||||
uint8_t* outputPtr = &memBase[output];
|
||||
float unquantFactor =
|
||||
(-1) * ((127.0f / scaleA) * (127.0f / scaleB)) / (127.0f);
|
||||
::intgemm::Int8Shift::PrepareBias(
|
||||
(const int8_t*)inputMatrixBPreparedPtr, rowsB, colsB,
|
||||
::intgemm::callbacks::UnquantizeAndAddBiasAndWrite(
|
||||
unquantFactor, (const float*)inputBiasPtr, (float*)outputPtr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8MultiplyAndAddBias(
|
||||
wasm::Instance* instance, uint32_t inputMatrixAPrepared, float scaleA,
|
||||
float zeroPointA, uint32_t inputMatrixBPrepared, float scaleB,
|
||||
float zeroPointB, uint32_t inputBiasPrepared, float unquantMultiplier,
|
||||
uint32_t rowsA, uint32_t width, uint32_t colsB, uint32_t output,
|
||||
uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8MultiplyAndAddBias.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, width, COLUMNS_A_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
|
||||
wasm::Log(cx, "%s: rowsA:%" PRIu32 " width:%" PRIu32 " colsB:%" PRIu32,
|
||||
__FUNCTION__, rowsA, width, colsB);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound checks for all matricies
|
||||
uint64_t sizeA = (uint64_t)rowsA * (uint64_t)width;
|
||||
uint64_t sizeB = (uint64_t)width * (uint64_t)colsB;
|
||||
uint64_t sizeBias = (uint64_t)colsB;
|
||||
uint64_t sizeOutput = (uint64_t)rowsA * (uint64_t)colsB;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixAPrepared, sizeA,
|
||||
wasmBufferSize) ||
|
||||
!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB,
|
||||
wasmBufferSize) ||
|
||||
!CheckMatrixBound(cx, inputBiasPrepared, sizeBias, wasmBufferSize) ||
|
||||
!CheckMatrixBound(cx, output, sizeOutput, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: preparedA:%x preparedB:%x preparedBias:%x rowsA:%" PRIu32
|
||||
" width:%" PRIu32 " colsB:%" PRIu32
|
||||
" output:%x sizeA:%" PRIu64 " sizeB:%" PRIu64
|
||||
" sizeBias:%" PRIu64 " sizeOutput:%" PRIu64,
|
||||
__FUNCTION__, inputMatrixAPrepared, inputMatrixBPrepared,
|
||||
inputBiasPrepared, rowsA, width, colsB, output, sizeA, sizeB,
|
||||
sizeBias, sizeOutput);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm)
|
||||
uint8_t* inputMatrixAPreparedPtr = &memBase[inputMatrixAPrepared];
|
||||
uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared];
|
||||
uint8_t* inputBiasPreparedPtr = &memBase[inputBiasPrepared];
|
||||
uint8_t* outputPtr = &memBase[output];
|
||||
float unquantFactor = unquantMultiplier / (scaleA * scaleB);
|
||||
::intgemm::Int8Shift::Multiply(
|
||||
(const int8_t*)inputMatrixAPreparedPtr,
|
||||
(const int8_t*)inputMatrixBPreparedPtr, rowsA, width, colsB,
|
||||
::intgemm::callbacks::UnquantizeAndAddBiasAndWrite(
|
||||
unquantFactor, (const float*)inputBiasPreparedPtr,
|
||||
(float*)outputPtr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t js::intgemm::IntrI8SelectColumnsOfB(wasm::Instance* instance,
|
||||
uint32_t inputMatrixBPrepared,
|
||||
uint32_t rowsB, uint32_t colsB,
|
||||
uint32_t colIndexList,
|
||||
uint32_t sizeColIndexList,
|
||||
uint32_t output, uint8_t* memBase) {
|
||||
MOZ_ASSERT(wasm::SASigIntrI8SelectColumnsOfB.failureMode ==
|
||||
wasm::FailureMode::FailOnNegI32);
|
||||
JSContext* cx = instance->tlsData()->cx;
|
||||
|
||||
// Size checks for matricies
|
||||
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER) ||
|
||||
!CheckMatrixDimension(cx, sizeColIndexList,
|
||||
SELECTED_COLUMNS_B_MULTIPLIER)) {
|
||||
wasm::Log(cx,
|
||||
"%s: rowsB:%" PRIu32 " colsB:%" PRIu32
|
||||
" sizeColIndexList:%" PRIu32,
|
||||
__FUNCTION__, rowsB, colsB, sizeColIndexList);
|
||||
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Memory Bound checks for all matricies
|
||||
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
|
||||
uint64_t sizeOutput = (uint64_t)rowsB * (uint64_t)sizeColIndexList;
|
||||
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
|
||||
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB,
|
||||
wasmBufferSize) ||
|
||||
!CheckMatrixBound(cx, colIndexList, sizeColIndexList, wasmBufferSize) ||
|
||||
!CheckMatrixBound(cx, output, sizeOutput, wasmBufferSize)) {
|
||||
wasm::Log(cx,
|
||||
"%s: preparedB:%x rowsB:%" PRIu32 " colsB:%" PRIu32
|
||||
" colList:%x sizeColList:%" PRIu32 " output:%x sizeB:%" PRIu64
|
||||
" sizeOutput:%" PRIu64,
|
||||
__FUNCTION__, inputMatrixBPrepared, rowsB, colsB, colIndexList,
|
||||
sizeColIndexList, output, sizeB, sizeOutput);
|
||||
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Actual call to the 3rd party library (intgemm)
|
||||
uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared];
|
||||
uint8_t* colIndexListPtr = &memBase[colIndexList];
|
||||
uint8_t* outputPtr = &memBase[output];
|
||||
::intgemm::Int8::SelectColumnsB(
|
||||
(const int8_t*)inputMatrixBPreparedPtr, (int8_t*)outputPtr, rowsB,
|
||||
(const uint32_t*)colIndexListPtr,
|
||||
(const uint32_t*)colIndexListPtr + sizeColIndexList);
|
||||
return 0;
|
||||
}
|
358
js/src/intgemm/IntegerGemmIntrinsic.h
Normal file
358
js/src/intgemm/IntegerGemmIntrinsic.h
Normal file
@ -0,0 +1,358 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
||||
* vim: set ts=8 sts=2 et sw=2 tw=80:
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#ifndef intgemm_IntegerGemmIntrinsic_h
|
||||
#define intgemm_IntegerGemmIntrinsic_h
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace js {
|
||||
namespace wasm {
|
||||
class Instance;
|
||||
}
|
||||
|
||||
namespace intgemm {
|
||||
|
||||
/* Interface for integer matrix multiplication followed by addition of bias.
|
||||
*
|
||||
* C = A * B + Bias
|
||||
*
|
||||
* Input matrix A:
|
||||
* - A 2-D matrix that typically represents activations as floating point
|
||||
* values
|
||||
* - no. of rows should be a positive integer
|
||||
* - no. of columns should be a positive integeral multiple of 64
|
||||
* - is represented as array (contiguous memory locations) in row-major format
|
||||
*
|
||||
* Input matrix B:
|
||||
* - A 2-D matrix that typically represents fixed model parameters as
|
||||
* floating point values
|
||||
* - no. of rows should be:
|
||||
* -- equal to no. of columns of Input matrix A
|
||||
* -- a positive integeral multiple of 64
|
||||
* - no. of columns should be a positive integeral multiple of 8
|
||||
* - is represented as array (contiguous memory locations) in row-major format
|
||||
*
|
||||
* Please note that it is also possible to pass Input matrix B in 2 more forms:
|
||||
* - One that is already a quantized and transposed version of Input matrix B
|
||||
* - Other that is already a transposed version of Input matrix B
|
||||
*
|
||||
* Input Bias:
|
||||
* - is an array (contiguous memory locations) that represents bias
|
||||
* - size of the array should be equal to the no. of columns of Input matrix B
|
||||
*
|
||||
* Output matrix C:
|
||||
* - is a 2-D matrix that represents the result (= A * B + Bias)
|
||||
* - no. of rows = no. of rows of Input matrix A
|
||||
* - no. of columns = no. of columns of Input matrix B (in
|
||||
* untransposed form)
|
||||
* - is represented as array (contiguous memory locations) in row-major format
|
||||
*
|
||||
* Please note that most of the functions in this interface might have
|
||||
* architecture specific implementations.
|
||||
*
|
||||
* Conventions followed for the interface:
|
||||
* - Unless explicitly mentioned, Input matrix B refers to an unquantized
|
||||
* (i.e. float values) and non-transposed version
|
||||
* - no. of rows of Input matrix A = `rowsA`
|
||||
* - no. of columns of Input matrix A (`colsA`) = no. of rows of Input matrix B
|
||||
* (`rowsB`) = `width`
|
||||
* - no. of columns of Input matrix B = `colsB`
|
||||
*/
|
||||
|
||||
/* Prepare B for the Matrix Multiply function from Input matrix B.
|
||||
*
|
||||
* Quantization is performed on the input.
|
||||
* The final prepared B is in CPU-dependent format and can be used as an input
|
||||
* to matrix multiply function (`int8_multiply_and_add_bias`).
|
||||
*
|
||||
* Please note that this interface might have architecture specific
|
||||
* implementation.
|
||||
*
|
||||
* @param[in] inputMatrixB An array representing the Input matrix B in
|
||||
* row-major format.
|
||||
* Size of the array = `rowsB` * `colsB`.
|
||||
* Shape of the matrix: (`rowsB`, `colsB`)
|
||||
* @param[in] scale The scaling factor (for quantization)
|
||||
* @param[in] zeroPoint The zero point (for quantization)
|
||||
* @param[in] rowsB No. of rows of Input matrix B. It should be
|
||||
* a positive integer and a multiple of 64.
|
||||
* @param[in] colsB No. of columns of Input matrix B. It should
|
||||
* be a positive integer and a multiple of 8.
|
||||
* @param[out] outputMatrixB An array representing the prepared B matrix.
|
||||
* Size of the array = `rowsB` * `colsB`.
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32,
|
||||
* colsB: i32, outputMatrixB: i32) which implements the function:
|
||||
* int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint,
|
||||
* uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
|
||||
*/
|
||||
int32_t IntrI8PrepareB(wasm::Instance* instance, uint32_t inputMatrixB,
|
||||
float scale, float zeroPoint, uint32_t rowsB,
|
||||
uint32_t colsB, uint32_t outputMatrixB,
|
||||
uint8_t* memBase);
|
||||
|
||||
/* Prepare B for the Matrix Multiply function from transposed version of Input
|
||||
* matrix B.
|
||||
*
|
||||
* Quantization is performed on floating values of input.
|
||||
* The final prepared B is in CPU-dependent format and can be used as an input
|
||||
* to matrix multiply function (`int8_multiply_and_add_bias`).
|
||||
*
|
||||
* Please note that this interface might have architecture specific
|
||||
* implementation.
|
||||
*
|
||||
* @param[in] inputMatrixBTransposed An array representing transposed version
|
||||
* of Input matrix B.
|
||||
* It is in column-major format.
|
||||
* Size of the array = `rowsB` * `colsB`.
|
||||
* Shape of the matrix: (`colsB`, `rowsB`)
|
||||
* @param[in] scale The scaling factor (for quantization)
|
||||
* @param[in] zeroPoint The zero point (for quantization)
|
||||
* @param[in] rowsB No. of rows of Input matrix B. It should
|
||||
* be a positive integer and a multiple of
|
||||
* 64.
|
||||
* @param[in] colsB No. of columns of Input matrix B. It
|
||||
* should be a positive integer and a
|
||||
* multiple of 8.
|
||||
* @param[out] outputMatrixB An array representing the prepared B
|
||||
* matrix. Size of array = `rowsB`*`colsB`
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32,
|
||||
* zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements
|
||||
* the function: int8_prepare_b_from_transposed(const float*
|
||||
* inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB,
|
||||
* uint32_t colsB, int8_t* outputMatrixB)
|
||||
*/
|
||||
int32_t IntrI8PrepareBFromTransposed(wasm::Instance* instance,
|
||||
uint32_t inputMatrixBTransposed,
|
||||
float scale, float zeroPoint,
|
||||
uint32_t rowsB, uint32_t colsB,
|
||||
uint32_t outputMatrixB, uint8_t* memBase);
|
||||
|
||||
/* Prepare B for the Matrix Multiply function from a quantized and transposed
|
||||
* version of Input matrix B which is also in a CPU-independent format.
|
||||
*
|
||||
* The final prepared B is in CPU-dependent format and can be used as an input
|
||||
* to matrix multiply function (`int8_multiply_and_add_bias`).
|
||||
*
|
||||
* This function is useful while using the quantized models that are stored in a
|
||||
* CPU-independent format on the disk.
|
||||
*
|
||||
* @param[in] inputMatrixBQuantizedTransposed An array representing the
|
||||
* quantized and transposed
|
||||
* version of Input matrix B.
|
||||
* It is in column-major format.
|
||||
* Size of array =
|
||||
* `rowsB`*`colsB`
|
||||
* Shape of the matrix:
|
||||
* (`colsB`,`rowsB`)
|
||||
* @param[in] rowsB No. of rows of Input matrix B.
|
||||
* Should be a positive integer
|
||||
* and a multiple of 64.
|
||||
* @param[in] colsB No. of columns of Input matrix
|
||||
* B. Should be a positive
|
||||
* integer and a multiple of 8
|
||||
* @param[out] outputMatrixB An array representing the
|
||||
* prepared B matrix.
|
||||
* Size: `rowsB` * `colsB`.
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed:
|
||||
* i32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements the
|
||||
* function: int8_prepare_b_from_quantized_transposed(const int8_t*
|
||||
* inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t*
|
||||
* outputMatrixB)
|
||||
*/
|
||||
int32_t IntrI8PrepareBFromQuantizedTransposed(
|
||||
wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed,
|
||||
uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase);
|
||||
|
||||
/* Prepare A for the Matrix Multiply function from Input matrix A.
|
||||
*
|
||||
* It performs quantization on floating values of input.
|
||||
* The final prepared A might be architecture dependent. e.g. On some
|
||||
* architectures like x86, it might be unsigned (achieved by adding 127 to
|
||||
* quantized values) while on others like Arm, it might be signed. The final
|
||||
* prepared A can be used as an input to matrix multiply function
|
||||
* (`int8_multiply_and_add_bias`).
|
||||
*
|
||||
* Please note that this interface might have architecture specific
|
||||
* implementation.
|
||||
*
|
||||
* @param[in] inputMatrixA An array representing the Input matrix A in
|
||||
* row-major format.
|
||||
* Size of the array = `rowsA` * `colsA`.
|
||||
* Shape of the matrix: (`rowsA`, `colsA`)
|
||||
* @param[in] scale The scaling factor (for quantization)
|
||||
* @param[in] zeroPoint The zero point (for quantization)
|
||||
* @param[in] rowsA No. of rows of Input matrix A. It should be a
|
||||
* positive integer.
|
||||
* @param[in] colsA No. of columns of Input matrix A. It should be a
|
||||
* positive integer and a multiple of 64.
|
||||
* @param[out] outputMatrixA An array representing the prepared A matrix.
|
||||
* Size of the array = `rowsA` * `colsA`.
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32,
|
||||
* colsA: i32, outputMatrixA: i32) which implements the function:
|
||||
* int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint,
|
||||
* uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA)
|
||||
*/
|
||||
int32_t IntrI8PrepareA(wasm::Instance* instance, uint32_t inputMatrixA,
|
||||
float scale, float zeroPoint, uint32_t rowsA,
|
||||
uint32_t colsA, uint32_t outputMatrixA,
|
||||
uint8_t* memBase);
|
||||
|
||||
/* Prepares bias for the Matrix Multiply function.
|
||||
*
|
||||
* It uses the prepared B (which must be obtained by using any of the
|
||||
* int8_prepare_b* functions) and a bias input to prepare the final bias.
|
||||
*
|
||||
* The final bias can be used as an input to matrix multiply function
|
||||
* (`int8_multiply_and_add_bias`).
|
||||
*
|
||||
* @param[in] inputMatrixBPrepared An array representing the prepared B
|
||||
* matrix. Size of array = `rowsB`*`colsB`.
|
||||
* @param[in] scaleA The scaling factor (for quantization) of A
|
||||
* @param[in] zeroPointA The zero point (for quantization) of A
|
||||
* @param[in] scaleB The scaling factor (for quantization) of B
|
||||
* @param[in] zeroPointB The zero point (for quantization) of B
|
||||
* @param[in] rowsB No. of rows of Input matrix B (unquantized
|
||||
* & non-transposed). It should be a positive
|
||||
* integer and a multiple of 64.
|
||||
* @param[in] colsB No. of columns of Input matrix B
|
||||
* (unquantized & non-transposed). It should
|
||||
* be a positive integer and a multiple of 8.
|
||||
* @param[in] inputBias An array representing the input bias. Size
|
||||
* of array = `colsB`
|
||||
* @param[out] output An array representing the final prepared
|
||||
* bias. Size of the array = `colsB`
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32,
|
||||
* scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output:
|
||||
* i32) which implements the function: int8_prepare_bias(const int8_t*
|
||||
* inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float
|
||||
* zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float*
|
||||
* output)
|
||||
*/
|
||||
int32_t IntrI8PrepareBias(wasm::Instance* instance,
|
||||
uint32_t inputMatrixBPrepared, float scaleA,
|
||||
float zeroPointA, float scaleB, float zeroPointB,
|
||||
uint32_t rowsB, uint32_t colsB, uint32_t inputBias,
|
||||
uint32_t output, uint8_t* memBase);
|
||||
|
||||
/* Perform multiplication of 2 matrices followed by adding a bias.
|
||||
*
|
||||
* i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared
|
||||
*
|
||||
* The inputs inputMatrixAPrepared, inputMatrixBPrepared and inputBiasPrepared
|
||||
* of this function must be obtained by using `int8_prepare_A`, one of the
|
||||
* `int8_prepare_b*` and `int8_prepare_bias` functions respectively.
|
||||
*
|
||||
* Please note that this interface might have architecture specific
|
||||
* implementation.
|
||||
*
|
||||
* @param[in] inputMatrixAPrepared An array representing the prepared A
|
||||
* matrix. This must be obtained by using
|
||||
* `int8_prepare_A` function. Size of the
|
||||
* array = `rowsA` * `width`.
|
||||
* @param[in] scaleA The scaling factor (quantization) of A
|
||||
* @param[in] zeroPointA The zero point (for quantization) of A
|
||||
* @param[in] inputMatrixBPrepared An array representing the prepared B
|
||||
* matrix. This must be obtained by using
|
||||
* one of `int8_prepare_b*` functions.
|
||||
* Size of the array = `width` * `colsB`.
|
||||
* @param[in] scaleB The scaling factor (quantization) of B
|
||||
* @param[in] zeroPointB The zero point (for quantization) of B
|
||||
* @param[in] inputBiasPrepared An array representing the prepared bias.
|
||||
* This must be obtained by using
|
||||
* `int8_prepare_bias` function.
|
||||
* Size of the array = `colsB`
|
||||
* @param[in] unquantMultiplier A value that will be multiplied to the
|
||||
* final unquantization factor that is
|
||||
* prepared from `scaleA` and `scaleB`.
|
||||
* @param[in] rowsA No. of rows of Input matrix A. It should
|
||||
* be a positive integer.
|
||||
* @param[in] width No. of columns of Input matrix A (same as
|
||||
* no. of columns of Input matrix B). It
|
||||
* should be a positive integer and a
|
||||
* multiple of 64.
|
||||
* @param[in] colsB No. of columns of Input matrix B. Should
|
||||
* be a multiple of 8.
|
||||
* @param[out] output An array representing the result matrix
|
||||
* in row-major format.
|
||||
* Size of the array = `rowsA` * `colsB`.
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32,
|
||||
* zeroPointA: f32, inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32,
|
||||
* inputBiasPrepared: i32, unquantMultiplier: f32,
|
||||
* rowsA: i32, width: i32, colsB: i32, output: i32)
|
||||
* which implements the function:
|
||||
* int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float
|
||||
* scaleA, float zeroPointA, const int8_t* inputMatrixBPrepared, float scaleB,
|
||||
* float zeroPointB, const float* inputBiasPrepared, float unquantMultiplier,
|
||||
* uint32_t rowsA, uint32_t width, uint32_t colsB, float*
|
||||
* output)
|
||||
*/
|
||||
int32_t IntrI8MultiplyAndAddBias(wasm::Instance* instance,
|
||||
uint32_t inputMatrixAPrepared, float scaleA,
|
||||
float zeroPointA,
|
||||
uint32_t inputMatrixBPrepared, float scaleB,
|
||||
float zeroPointB, uint32_t inputBiasPrepared,
|
||||
float unquantMultiplier, uint32_t rowsA,
|
||||
uint32_t width, uint32_t colsB,
|
||||
uint32_t output, uint8_t* memBase);
|
||||
|
||||
/* Select a subset of columns of prepared B.
|
||||
*
|
||||
* Indices of the columns to be selected are specified by an array.
|
||||
*
|
||||
* @param[in] inputMatrixBPrepared An array representing the prepared B
|
||||
* matrix. This must be obtained by using
|
||||
* one of the `int8_prepare_b*` functions.
|
||||
* Size of the array = `rowsB` * `colsB`.
|
||||
* @param[in] rowsB No. of rows of Input matrix B. It should
|
||||
* be a positive integer and a multiple
|
||||
* of 64.
|
||||
* @param[in] colsB No. of columns of Input matrix B. It
|
||||
* should be a positive integer and a
|
||||
* multiple of 8.
|
||||
* @param[in] colIndexList An array of column indices to be selected
|
||||
* from prepared B. All indices of the array
|
||||
* should be valid
|
||||
* i.e. 0 <= colIndexList[N] < colsB
|
||||
* where N = 0, 1 ....(`sizeColIndexList`-1)
|
||||
* @param[in] sizeColIndexList Size of the `colIndexList` array. It
|
||||
* should be a positive integer and a
|
||||
* multiple of 8.
|
||||
* @param[out] output An array representing the selected columns
|
||||
* of prepared B.
|
||||
* Size = `rowsB` * `sizeColIndexList`.
|
||||
*
|
||||
* This function implements the intrinsic:
|
||||
* int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32,
|
||||
* colIndexList: i32, sizeColIndexList: i32, output: i32) which implements the
|
||||
* function: int8_select_columns_of_b(const int8_t* inputMatrixBPrepared,
|
||||
* uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t
|
||||
* sizeColIndexList, int8_t* output)
|
||||
*/
|
||||
int32_t IntrI8SelectColumnsOfB(wasm::Instance* instance,
|
||||
uint32_t inputMatrixBPrepared, uint32_t rowsB,
|
||||
uint32_t colsB, uint32_t colIndexList,
|
||||
uint32_t sizeColIndexList, uint32_t output,
|
||||
uint8_t* memBase);
|
||||
|
||||
} // namespace intgemm
|
||||
} // namespace js
|
||||
|
||||
#endif // intgemm_IntegerGemmIntrinsic_h
|
@ -4,17 +4,23 @@
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
include("../js-config.mozbuild")
|
||||
include("../js-cxxflags.mozbuild")
|
||||
|
||||
FINAL_LIBRARY = "js"
|
||||
|
||||
with Files("*"):
|
||||
BUG_COMPONENT = ("Core", "JavaScript: WebAssembly")
|
||||
|
||||
LOCAL_INCLUDES += [
|
||||
"!..",
|
||||
"..",
|
||||
"/third_party/intgemm/intgemm",
|
||||
]
|
||||
|
||||
SOURCES += [
|
||||
"/third_party/intgemm/intgemm/intgemm.cc",
|
||||
"IntegerGemmIntrinsic.cpp",
|
||||
]
|
||||
|
||||
GeneratedFile(
|
||||
|
@ -19,6 +19,7 @@
|
||||
#ifndef wasm_builtins_h
|
||||
#define wasm_builtins_h
|
||||
|
||||
#include "intgemm/IntegerGemmIntrinsic.h"
|
||||
#include "jit/IonTypes.h"
|
||||
#include "wasm/WasmIntrinsicGenerated.h"
|
||||
|
||||
|
@ -15,3 +15,187 @@
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
#if defined(ENABLE_WASM_MOZ_INTGEMM)
|
||||
|
||||
# Intrinsics for integer matrix multiplication followed by addition of bias.
|
||||
# Please refer to @TOPSRCDIR/js/src/intgemm/IntegerGemmIntrinsic.h for more details on these intrinsics.
|
||||
|
||||
|
||||
# Prepare B for the Matrix Multiply intrinsic from Input matrix B.
|
||||
#
|
||||
# Quantization is performed on the input.
|
||||
# The final prepared B is in CPU-dependent format and can be used as an input to matrix multiply
|
||||
# intrinsic (`int8_multiply_and_add_bias`).
|
||||
#
|
||||
# int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint, uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
|
||||
# int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32)
|
||||
- op: I8PrepareB
|
||||
symbolic_address:
|
||||
name: IntrI8PrepareB
|
||||
type: Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8PrepareB
|
||||
export: int8_prepare_b
|
||||
params:
|
||||
- I32
|
||||
- F32
|
||||
- F32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
|
||||
# Prepare B for the Matrix Multiply intrinsic from transposed version of Input matrix B.
|
||||
#
|
||||
# Quantization is performed on floating values of input.
|
||||
# The final prepared B is in CPU-dependent format and can be used as an input to matrix multiply
|
||||
# intrinsic (`int8_multiply_and_add_bias`).
|
||||
#
|
||||
# int8_prepare_b_from_transposed(const float* inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
|
||||
# int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32, zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32)
|
||||
- op: I8PrepareBFromTransposed
|
||||
symbolic_address:
|
||||
name: IntrI8PrepareBFromTransposed
|
||||
type: Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8PrepareBFromTransposed
|
||||
export: int8_prepare_b_from_transposed
|
||||
params:
|
||||
- I32
|
||||
- F32
|
||||
- F32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
|
||||
# Prepare B for the Matrix Multiply intrinsic from a quantized and transposed version of Input
|
||||
# matrix B which is also in a CPU-independent format.
|
||||
#
|
||||
# The final prepared B is in CPU-dependent format and can be used as an input to matrix multiply
|
||||
# intrinsic (`int8_multiply_and_add_bias`).
|
||||
#
|
||||
# int8_prepare_b_from_quantized_transposed(const int8_t* inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
|
||||
# int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed: i32, rowsB: i32, colsB: i32, outputMatrixB: i32)
|
||||
- op: I8PrepareBFromQuantizedTransposed
|
||||
symbolic_address:
|
||||
name: IntrI8PrepareBFromQuantizedTransposed
|
||||
type: Args_Int32_GeneralInt32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8PrepareBFromQuantizedTransposed
|
||||
export: int8_prepare_b_from_quantized_transposed
|
||||
params:
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
|
||||
# Prepare A for the Matrix Multiply intrinsic from Input matrix A.
|
||||
#
|
||||
# It performs quantization on floating values of input.
|
||||
# The final prepared A might be architecture dependent. e.g. On some architectures like x86, it
|
||||
# might be unsigned (achieved by adding 127 to quantized values) while on others like Arm, it might
|
||||
# be signed.
|
||||
# The final prepared A can be used as an input to matrix multiply intrinsic
|
||||
# (`int8_multiply_and_add_bias`).
|
||||
#
|
||||
# int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint, uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA)
|
||||
# int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32, colsA: i32, outputMatrixA: i32)
|
||||
- op: I8PrepareA
|
||||
symbolic_address:
|
||||
name: IntrI8PrepareA
|
||||
type: Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8PrepareA
|
||||
export: int8_prepare_a
|
||||
params:
|
||||
- I32
|
||||
- F32
|
||||
- F32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
|
||||
# Prepares bias for the Matrix Multiply intrinsic.
|
||||
#
|
||||
# It uses the prepared B (which must be obtained by using any of the `int8_prepare_b*` intrinsics) and
|
||||
# a bias input to prepare the final bias.
|
||||
#
|
||||
# The final bias can be used as an input to matrix multiply intrinsic (`int8_multiply_and_add_bias`).
|
||||
#
|
||||
# int8_prepare_bias(const int8_t* inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float* output)
|
||||
# int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32, scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output: i32)
|
||||
- op: I8PrepareBias
|
||||
symbolic_address:
|
||||
name: IntrI8PrepareBias
|
||||
type: Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8PrepareBias
|
||||
export: int8_prepare_bias
|
||||
params:
|
||||
- I32
|
||||
- F32
|
||||
- F32
|
||||
- F32
|
||||
- F32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
|
||||
# Perform multiplication of 2 matrices followed by adding a bias.
|
||||
#
|
||||
# i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared
|
||||
#
|
||||
# The inputs of this intrinsic must be obtained by using `int8_prepare_A`,
|
||||
# one of the `int8_prepare_b*` and `int8_prepare_bias` intrinsics respectively.
|
||||
#
|
||||
# int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float scaleA, float zeroPointA,
|
||||
# const int8_t* inputMatrixBPrepared, float scaleB, float zeroPointB,
|
||||
# const float* inputBiasPrepared, float unquantMultiplier,
|
||||
# uint32_t rowsA, uint32_t width, uint32_t colsB, float* output)
|
||||
# int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32, zeroPointA: f32,
|
||||
# inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32,
|
||||
# inputBiasPrepared: i32, unquantMultiplier: f32,
|
||||
# rowsA: i32, width: i32, colsB: i32, output: i32)
|
||||
- op: I8MultiplyAndAddBias
|
||||
symbolic_address:
|
||||
name: IntrI8MultiplyAndAddBias
|
||||
type: Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8MultiplyAndAddBias
|
||||
export: int8_multiply_and_add_bias
|
||||
params:
|
||||
- I32
|
||||
- F32
|
||||
- F32
|
||||
- I32
|
||||
- F32
|
||||
- F32
|
||||
- I32
|
||||
- F32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
|
||||
# Select a subset of columns of prepared B.
|
||||
#
|
||||
# Indices of the columns to be selected are specified by an array.
|
||||
#
|
||||
# int8_select_columns_of_b(const int8_t* inputMatrixBPrepared, uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t sizeColIndexList, int8_t* output)
|
||||
# int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32, colIndexList: i32, sizeColIndexList: i32, output: i32)
|
||||
- op: I8SelectColumnsOfB
|
||||
symbolic_address:
|
||||
name: IntrI8SelectColumnsOfB
|
||||
type: Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General
|
||||
entry: intgemm::IntrI8SelectColumnsOfB
|
||||
export: int8_select_columns_of_b
|
||||
params:
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
- I32
|
||||
|
||||
#endif // ENABLE_WASM_MOZ_INTGEMM
|
||||
|
@ -5321,8 +5321,15 @@ static bool WebAssembly_mozIntGemm(JSContext* cx, unsigned argc, Value* vp) {
|
||||
CallArgs args = CallArgsFromVp(argc, vp);
|
||||
|
||||
RootedWasmModuleObject module(cx);
|
||||
if (!wasm::CompileIntrinsicModule(cx, mozilla::Span<IntrinsicOp>(),
|
||||
Shareable::True, &module)) {
|
||||
wasm::IntrinsicOp ops[] = {
|
||||
wasm::IntrinsicOp::I8PrepareB,
|
||||
wasm::IntrinsicOp::I8PrepareBFromTransposed,
|
||||
wasm::IntrinsicOp::I8PrepareBFromQuantizedTransposed,
|
||||
wasm::IntrinsicOp::I8PrepareA,
|
||||
wasm::IntrinsicOp::I8PrepareBias,
|
||||
wasm::IntrinsicOp::I8MultiplyAndAddBias,
|
||||
wasm::IntrinsicOp::I8SelectColumnsOfB};
|
||||
if (!wasm::CompileIntrinsicModule(cx, ops, Shareable::False, &module)) {
|
||||
ReportOutOfMemory(cx);
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user