Bug 1746631 - Implement integer gemm intrinsic functions. r=rhunt

- Implements 7 intrinsic functions
 - These intrinsics are only enabled for x86/x86-64 platform and for
   privileged extensions
 - These intrinsics should never be accessible to web-pages
   -- Added corresponding mochitest

Differential Revision: https://phabricator.services.mozilla.com/D136430
This commit is contained in:
Abhishek Aggarwal 2022-02-04 14:28:29 +00:00
parent 1342f2782a
commit dfde362b9e
10 changed files with 1000 additions and 8 deletions

View File

@ -0,0 +1,3 @@
[DEFAULT]
[test_unavailable_for_webpage.html]

View File

@ -0,0 +1,29 @@
<!DOCTYPE HTML>
<html>
<!--
https://bugzilla.mozilla.org/show_bug.cgi?id=1746631
-->
<head>
<meta charset="utf-8">
<title>Test for Mozilla integer gemm (1746631) -- Mozilla integer gemm shouldn't be available for web pages</title>
<script src="/tests/SimpleTest/SimpleTest.js"></script>
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
</head>
<body>
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1746631">Feature Test 1746631</a>
<p id="display"></p>
<div id="content" style="display: none">
</div>
<pre id="test">
<script type="text/javascript">
SimpleTest.waitForExplicitFinish();
const gemm = "mozIntGemm";
is(gemm in WebAssembly, false, `"WebAssembly.${gemm}" shouldn't be defined for web pages`);
SimpleTest.finish();
</script>
</pre>
</body>
</html>

View File

@ -161,6 +161,7 @@ MOCHITEST_MANIFESTS += [
"mochitest/gamepad/mochitest.ini",
"mochitest/general/mochitest.ini",
"mochitest/geolocation/mochitest.ini",
"mochitest/integer-gemm/mochitest.ini",
"mochitest/keyhandling/mochitest.ini",
"mochitest/localstorage/mochitest.ini",
"mochitest/orientation/mochitest.ini",

View File

@ -903,12 +903,10 @@ option(
)
@depends("--enable-wasm-moz-intgemm")
def wasm_moz_intgemm(value):
if not value:
return
return True
@depends("--enable-wasm-moz-intgemm", target)
def wasm_moz_intgemm(value, target):
if value and target.cpu in ("x86", "x86_64"):
return True
set_config("ENABLE_WASM_MOZ_INTGEMM", wasm_moz_intgemm)

View File

@ -0,0 +1,405 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
#include "intgemm/IntegerGemmIntrinsic.h"
#include "mozilla/CheckedInt.h"
#include <intgemm.h>
#include <utility>
#include "js/HeapAPI.h"
#include "vm/JSContext.h"
#include "wasm/WasmInstance.h"
#include "wasm/WasmLog.h"
#include "vm/ArrayBufferObject-inl.h"
static constexpr uint32_t ARRAY_ALIGNMENT = 64;
static constexpr uint32_t ROWS_A_MULTIPLIER = 1;
static constexpr uint32_t COLUMNS_A_MULTIPLIER = 64;
static constexpr uint32_t ROWS_B_MULTIPLIER = COLUMNS_A_MULTIPLIER;
static constexpr uint32_t COLUMNS_B_MULTIPLIER = 8;
static constexpr uint32_t SELECTED_COLUMNS_B_MULTIPLIER = 8;
void ReportGemmError(JSContext* cx, const unsigned errorNumber) {
JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, errorNumber);
}
size_t GetWasmRawBufferLength(const uint8_t* memBase) {
const js::WasmArrayRawBuffer* rawBuf =
js::WasmArrayRawBuffer::fromDataPtr(memBase);
return rawBuf->byteLength();
}
bool CheckMatrixDimension(JSContext* cx, uint32_t size,
uint32_t sizeMultiplier) {
// A valid size is a positive integral multiple of Multiplier
if ((size == 0) || (size % sizeMultiplier != 0)) {
js::wasm::Log(
cx, "Invalid dimension value:%" PRIu32 " (should be a multiple of %u)",
size, sizeMultiplier);
return false;
}
return true;
}
bool CheckMatrixBound(JSContext* cx, uint32_t input, uint64_t inputSize,
size_t wasmBufferSize) {
mozilla::CheckedUint64 inputUpperLimit(inputSize);
inputUpperLimit += input;
// Bound check fails if size overflows or it spans outside the wasm memory
if (!inputUpperLimit.isValid() ||
(inputUpperLimit.value() >= (uint64_t)wasmBufferSize)) {
js::wasm::Log(cx, "Memory out of wasm bounds for matrix:%" PRIu32, input);
return false;
}
return true;
}
bool CheckMatrixBoundAndAlignment(JSContext* cx, uint32_t input,
uint64_t inputSize, size_t wasmBufferSize) {
// Alignment check: It is sufficient to check alignment for the offset rather
// than for the actual pointer within wasm memory (as long as following assert
// is satisfied)
static_assert(js::gc::PageSize >= ARRAY_ALIGNMENT,
"PageSize should be bigger than Alignment");
if (input % ARRAY_ALIGNMENT != 0) {
js::wasm::Log(
cx, "Unaligned access for matrix:%" PRIu32 " (should be %u aligned)",
input, ARRAY_ALIGNMENT);
return false;
}
// Check Bound
return CheckMatrixBound(cx, input, inputSize, wasmBufferSize);
}
int32_t js::intgemm::IntrI8PrepareB(wasm::Instance* instance,
uint32_t inputMatrixB, float scale,
float zeroPoint, uint32_t rowsB,
uint32_t colsB, uint32_t outputMatrixB,
uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8PrepareB.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
colsB);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound and Alignment checks for matricies
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixB, sizeB, wasmBufferSize) ||
!CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) {
wasm::Log(cx,
"%s: inputB:%x rowsB:%" PRIu32 " colsB:%" PRIu32
" outputB:%x sizeB:%" PRIu64 " wasmBufferSize:%zu",
__FUNCTION__, inputMatrixB, rowsB, colsB, outputMatrixB, sizeB,
wasmBufferSize);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm) for PrepareB
uint8_t* inputMatrixBPtr = &memBase[inputMatrixB];
uint8_t* outputMatrixBPtr = &memBase[outputMatrixB];
::intgemm::Int8::PrepareB((const float*)inputMatrixBPtr,
(int8_t*)outputMatrixBPtr,
(float)scale, // Quant Mult
rowsB, colsB);
return 0;
}
int32_t js::intgemm::IntrI8PrepareBFromTransposed(
wasm::Instance* instance, uint32_t inputMatrixBTransposed, float scale,
float zeroPoint, uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB,
uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8PrepareBFromTransposed.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
colsB);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound checks for all matricies
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBTransposed, sizeB,
wasmBufferSize) ||
!CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) {
wasm::Log(cx,
"%s: inputBT:%x rowsB:%" PRIu32 " colsB:%" PRIu32
" outputB:%x sizeB:%" PRIu64 " wasmBufferSize:%zu",
__FUNCTION__, inputMatrixBTransposed, rowsB, colsB, outputMatrixB,
sizeB, wasmBufferSize);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm) for PrepareBTransposed
uint8_t* inputMatrixBTransposedPtr = &memBase[inputMatrixBTransposed];
uint8_t* outputMatrixBPtr = &memBase[outputMatrixB];
::intgemm::Int8::PrepareBTransposed((const float*)inputMatrixBTransposedPtr,
(int8_t*)outputMatrixBPtr,
(float)scale, // Quant Mult
rowsB, colsB);
return 0;
}
int32_t js::intgemm::IntrI8PrepareBFromQuantizedTransposed(
wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed,
uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8PrepareBFromQuantizedTransposed.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
colsB);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound checks for all matricies
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBQuantizedTransposed, sizeB,
wasmBufferSize) ||
!CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) {
wasm::Log(cx,
"%s: inputBQT:%x rowsB:%" PRIu32 " colsB:%" PRIu32
" outputB:%x sizeA:%" PRIu64 " wasmBufferSize:%zu",
__FUNCTION__, inputMatrixBQuantizedTransposed, rowsB, colsB,
outputMatrixB, sizeB, wasmBufferSize);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm)
uint8_t* inputMatrixBQuantizedTransposedPtr =
&memBase[inputMatrixBQuantizedTransposed];
uint8_t* outputMatrixBPtr = &memBase[outputMatrixB];
::intgemm::Int8::PrepareBQuantizedTransposed(
(const int8_t*)inputMatrixBQuantizedTransposedPtr,
(int8_t*)outputMatrixBPtr, rowsB, colsB);
return 0;
}
int32_t js::intgemm::IntrI8PrepareA(wasm::Instance* instance,
uint32_t inputMatrixA, float scale,
float zeroPoint, uint32_t rowsA,
uint32_t colsA, uint32_t outputMatrixA,
uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8PrepareA.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsA, COLUMNS_A_MULTIPLIER)) {
wasm::Log(cx, "%s: rowsA:%" PRIu32 " colsA:%" PRIu32, __FUNCTION__, rowsA,
colsA);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound checks for all matricies
uint64_t sizeA = (uint64_t)rowsA * (uint64_t)colsA;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixA, sizeA, wasmBufferSize) ||
!CheckMatrixBoundAndAlignment(cx, outputMatrixA, sizeA, wasmBufferSize)) {
wasm::Log(cx,
"%s: inputA:%x rowsA:%" PRIu32 " colsA:%" PRIu32
" outputA:%x sizeA:%" PRIu64 " wasmBufferSize:%zu",
__FUNCTION__, inputMatrixA, rowsA, colsA, outputMatrixA, sizeA,
wasmBufferSize);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm)
uint8_t* inputMatrixAPtr = &memBase[inputMatrixA];
uint8_t* outputMatrixAPtr = &memBase[outputMatrixA];
::intgemm::Int8Shift::PrepareA((const float*)inputMatrixAPtr,
(int8_t*)outputMatrixAPtr, scale, rowsA,
colsA);
return 0;
}
int32_t js::intgemm::IntrI8PrepareBias(
wasm::Instance* instance, uint32_t inputMatrixBPrepared, float scaleA,
float zeroPointA, float scaleB, float zeroPointB, uint32_t rowsB,
uint32_t colsB, uint32_t inputBias, uint32_t output, uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8PrepareBias.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB,
colsB);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound checks for all matricies
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
uint64_t sizeBias = colsB;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB,
wasmBufferSize) ||
!CheckMatrixBound(cx, inputBias, sizeBias, wasmBufferSize) ||
!CheckMatrixBound(cx, output, sizeBias, wasmBufferSize)) {
wasm::Log(cx,
"%s: preparedB:%x rowsB:%" PRIu32 " colsB:%" PRIu32
" inputBias:%x outputBias:%x sizeB:%" PRIu64
" wasmBufferSize:%zu",
__FUNCTION__, inputMatrixBPrepared, rowsB, colsB, inputBias,
output, sizeB, wasmBufferSize);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm)
uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared];
uint8_t* inputBiasPtr = &memBase[inputBias];
uint8_t* outputPtr = &memBase[output];
float unquantFactor =
(-1) * ((127.0f / scaleA) * (127.0f / scaleB)) / (127.0f);
::intgemm::Int8Shift::PrepareBias(
(const int8_t*)inputMatrixBPreparedPtr, rowsB, colsB,
::intgemm::callbacks::UnquantizeAndAddBiasAndWrite(
unquantFactor, (const float*)inputBiasPtr, (float*)outputPtr));
return 0;
}
int32_t js::intgemm::IntrI8MultiplyAndAddBias(
wasm::Instance* instance, uint32_t inputMatrixAPrepared, float scaleA,
float zeroPointA, uint32_t inputMatrixBPrepared, float scaleB,
float zeroPointB, uint32_t inputBiasPrepared, float unquantMultiplier,
uint32_t rowsA, uint32_t width, uint32_t colsB, uint32_t output,
uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8MultiplyAndAddBias.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) ||
!CheckMatrixDimension(cx, width, COLUMNS_A_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) {
wasm::Log(cx, "%s: rowsA:%" PRIu32 " width:%" PRIu32 " colsB:%" PRIu32,
__FUNCTION__, rowsA, width, colsB);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound checks for all matricies
uint64_t sizeA = (uint64_t)rowsA * (uint64_t)width;
uint64_t sizeB = (uint64_t)width * (uint64_t)colsB;
uint64_t sizeBias = (uint64_t)colsB;
uint64_t sizeOutput = (uint64_t)rowsA * (uint64_t)colsB;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixAPrepared, sizeA,
wasmBufferSize) ||
!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB,
wasmBufferSize) ||
!CheckMatrixBound(cx, inputBiasPrepared, sizeBias, wasmBufferSize) ||
!CheckMatrixBound(cx, output, sizeOutput, wasmBufferSize)) {
wasm::Log(cx,
"%s: preparedA:%x preparedB:%x preparedBias:%x rowsA:%" PRIu32
" width:%" PRIu32 " colsB:%" PRIu32
" output:%x sizeA:%" PRIu64 " sizeB:%" PRIu64
" sizeBias:%" PRIu64 " sizeOutput:%" PRIu64,
__FUNCTION__, inputMatrixAPrepared, inputMatrixBPrepared,
inputBiasPrepared, rowsA, width, colsB, output, sizeA, sizeB,
sizeBias, sizeOutput);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm)
uint8_t* inputMatrixAPreparedPtr = &memBase[inputMatrixAPrepared];
uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared];
uint8_t* inputBiasPreparedPtr = &memBase[inputBiasPrepared];
uint8_t* outputPtr = &memBase[output];
float unquantFactor = unquantMultiplier / (scaleA * scaleB);
::intgemm::Int8Shift::Multiply(
(const int8_t*)inputMatrixAPreparedPtr,
(const int8_t*)inputMatrixBPreparedPtr, rowsA, width, colsB,
::intgemm::callbacks::UnquantizeAndAddBiasAndWrite(
unquantFactor, (const float*)inputBiasPreparedPtr,
(float*)outputPtr));
return 0;
}
int32_t js::intgemm::IntrI8SelectColumnsOfB(wasm::Instance* instance,
uint32_t inputMatrixBPrepared,
uint32_t rowsB, uint32_t colsB,
uint32_t colIndexList,
uint32_t sizeColIndexList,
uint32_t output, uint8_t* memBase) {
MOZ_ASSERT(wasm::SASigIntrI8SelectColumnsOfB.failureMode ==
wasm::FailureMode::FailOnNegI32);
JSContext* cx = instance->tlsData()->cx;
// Size checks for matricies
if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) ||
!CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER) ||
!CheckMatrixDimension(cx, sizeColIndexList,
SELECTED_COLUMNS_B_MULTIPLIER)) {
wasm::Log(cx,
"%s: rowsB:%" PRIu32 " colsB:%" PRIu32
" sizeColIndexList:%" PRIu32,
__FUNCTION__, rowsB, colsB, sizeColIndexList);
ReportGemmError(cx, JSMSG_WASM_UNREACHABLE);
return -1;
}
// Memory Bound checks for all matricies
uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB;
uint64_t sizeOutput = (uint64_t)rowsB * (uint64_t)sizeColIndexList;
size_t wasmBufferSize = GetWasmRawBufferLength(memBase);
if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB,
wasmBufferSize) ||
!CheckMatrixBound(cx, colIndexList, sizeColIndexList, wasmBufferSize) ||
!CheckMatrixBound(cx, output, sizeOutput, wasmBufferSize)) {
wasm::Log(cx,
"%s: preparedB:%x rowsB:%" PRIu32 " colsB:%" PRIu32
" colList:%x sizeColList:%" PRIu32 " output:%x sizeB:%" PRIu64
" sizeOutput:%" PRIu64,
__FUNCTION__, inputMatrixBPrepared, rowsB, colsB, colIndexList,
sizeColIndexList, output, sizeB, sizeOutput);
ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS);
return -1;
}
// Actual call to the 3rd party library (intgemm)
uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared];
uint8_t* colIndexListPtr = &memBase[colIndexList];
uint8_t* outputPtr = &memBase[output];
::intgemm::Int8::SelectColumnsB(
(const int8_t*)inputMatrixBPreparedPtr, (int8_t*)outputPtr, rowsB,
(const uint32_t*)colIndexListPtr,
(const uint32_t*)colIndexListPtr + sizeColIndexList);
return 0;
}

View File

@ -0,0 +1,358 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
#ifndef intgemm_IntegerGemmIntrinsic_h
#define intgemm_IntegerGemmIntrinsic_h
#include <stdint.h>
namespace js {
namespace wasm {
class Instance;
}
namespace intgemm {
/* Interface for integer matrix multiplication followed by addition of bias.
*
* C = A * B + Bias
*
* Input matrix A:
* - A 2-D matrix that typically represents activations as floating point
* values
* - no. of rows should be a positive integer
* - no. of columns should be a positive integeral multiple of 64
* - is represented as array (contiguous memory locations) in row-major format
*
* Input matrix B:
* - A 2-D matrix that typically represents fixed model parameters as
* floating point values
* - no. of rows should be:
* -- equal to no. of columns of Input matrix A
* -- a positive integeral multiple of 64
* - no. of columns should be a positive integeral multiple of 8
* - is represented as array (contiguous memory locations) in row-major format
*
* Please note that it is also possible to pass Input matrix B in 2 more forms:
* - One that is already a quantized and transposed version of Input matrix B
* - Other that is already a transposed version of Input matrix B
*
* Input Bias:
* - is an array (contiguous memory locations) that represents bias
* - size of the array should be equal to the no. of columns of Input matrix B
*
* Output matrix C:
* - is a 2-D matrix that represents the result (= A * B + Bias)
* - no. of rows = no. of rows of Input matrix A
* - no. of columns = no. of columns of Input matrix B (in
* untransposed form)
* - is represented as array (contiguous memory locations) in row-major format
*
* Please note that most of the functions in this interface might have
* architecture specific implementations.
*
* Conventions followed for the interface:
* - Unless explicitly mentioned, Input matrix B refers to an unquantized
* (i.e. float values) and non-transposed version
* - no. of rows of Input matrix A = `rowsA`
* - no. of columns of Input matrix A (`colsA`) = no. of rows of Input matrix B
* (`rowsB`) = `width`
* - no. of columns of Input matrix B = `colsB`
*/
/* Prepare B for the Matrix Multiply function from Input matrix B.
*
* Quantization is performed on the input.
* The final prepared B is in CPU-dependent format and can be used as an input
* to matrix multiply function (`int8_multiply_and_add_bias`).
*
* Please note that this interface might have architecture specific
* implementation.
*
* @param[in] inputMatrixB An array representing the Input matrix B in
* row-major format.
* Size of the array = `rowsB` * `colsB`.
* Shape of the matrix: (`rowsB`, `colsB`)
* @param[in] scale The scaling factor (for quantization)
* @param[in] zeroPoint The zero point (for quantization)
* @param[in] rowsB No. of rows of Input matrix B. It should be
* a positive integer and a multiple of 64.
* @param[in] colsB No. of columns of Input matrix B. It should
* be a positive integer and a multiple of 8.
* @param[out] outputMatrixB An array representing the prepared B matrix.
* Size of the array = `rowsB` * `colsB`.
*
* This function implements the intrinsic:
* int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32,
* colsB: i32, outputMatrixB: i32) which implements the function:
* int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint,
* uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
*/
int32_t IntrI8PrepareB(wasm::Instance* instance, uint32_t inputMatrixB,
float scale, float zeroPoint, uint32_t rowsB,
uint32_t colsB, uint32_t outputMatrixB,
uint8_t* memBase);
/* Prepare B for the Matrix Multiply function from transposed version of Input
* matrix B.
*
* Quantization is performed on floating values of input.
* The final prepared B is in CPU-dependent format and can be used as an input
* to matrix multiply function (`int8_multiply_and_add_bias`).
*
* Please note that this interface might have architecture specific
* implementation.
*
* @param[in] inputMatrixBTransposed An array representing transposed version
* of Input matrix B.
* It is in column-major format.
* Size of the array = `rowsB` * `colsB`.
* Shape of the matrix: (`colsB`, `rowsB`)
* @param[in] scale The scaling factor (for quantization)
* @param[in] zeroPoint The zero point (for quantization)
* @param[in] rowsB No. of rows of Input matrix B. It should
* be a positive integer and a multiple of
* 64.
* @param[in] colsB No. of columns of Input matrix B. It
* should be a positive integer and a
* multiple of 8.
* @param[out] outputMatrixB An array representing the prepared B
* matrix. Size of array = `rowsB`*`colsB`
*
* This function implements the intrinsic:
* int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32,
* zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements
* the function: int8_prepare_b_from_transposed(const float*
* inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB,
* uint32_t colsB, int8_t* outputMatrixB)
*/
int32_t IntrI8PrepareBFromTransposed(wasm::Instance* instance,
uint32_t inputMatrixBTransposed,
float scale, float zeroPoint,
uint32_t rowsB, uint32_t colsB,
uint32_t outputMatrixB, uint8_t* memBase);
/* Prepare B for the Matrix Multiply function from a quantized and transposed
* version of Input matrix B which is also in a CPU-independent format.
*
* The final prepared B is in CPU-dependent format and can be used as an input
* to matrix multiply function (`int8_multiply_and_add_bias`).
*
* This function is useful while using the quantized models that are stored in a
* CPU-independent format on the disk.
*
* @param[in] inputMatrixBQuantizedTransposed An array representing the
* quantized and transposed
* version of Input matrix B.
* It is in column-major format.
* Size of array =
* `rowsB`*`colsB`
* Shape of the matrix:
* (`colsB`,`rowsB`)
* @param[in] rowsB No. of rows of Input matrix B.
* Should be a positive integer
* and a multiple of 64.
* @param[in] colsB No. of columns of Input matrix
* B. Should be a positive
* integer and a multiple of 8
* @param[out] outputMatrixB An array representing the
* prepared B matrix.
* Size: `rowsB` * `colsB`.
*
* This function implements the intrinsic:
* int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed:
* i32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements the
* function: int8_prepare_b_from_quantized_transposed(const int8_t*
* inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t*
* outputMatrixB)
*/
int32_t IntrI8PrepareBFromQuantizedTransposed(
wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed,
uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase);
/* Prepare A for the Matrix Multiply function from Input matrix A.
*
* It performs quantization on floating values of input.
* The final prepared A might be architecture dependent. e.g. On some
* architectures like x86, it might be unsigned (achieved by adding 127 to
* quantized values) while on others like Arm, it might be signed. The final
* prepared A can be used as an input to matrix multiply function
* (`int8_multiply_and_add_bias`).
*
* Please note that this interface might have architecture specific
* implementation.
*
* @param[in] inputMatrixA An array representing the Input matrix A in
* row-major format.
* Size of the array = `rowsA` * `colsA`.
* Shape of the matrix: (`rowsA`, `colsA`)
* @param[in] scale The scaling factor (for quantization)
* @param[in] zeroPoint The zero point (for quantization)
* @param[in] rowsA No. of rows of Input matrix A. It should be a
* positive integer.
* @param[in] colsA No. of columns of Input matrix A. It should be a
* positive integer and a multiple of 64.
* @param[out] outputMatrixA An array representing the prepared A matrix.
* Size of the array = `rowsA` * `colsA`.
*
* This function implements the intrinsic:
* int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32,
* colsA: i32, outputMatrixA: i32) which implements the function:
* int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint,
* uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA)
*/
int32_t IntrI8PrepareA(wasm::Instance* instance, uint32_t inputMatrixA,
float scale, float zeroPoint, uint32_t rowsA,
uint32_t colsA, uint32_t outputMatrixA,
uint8_t* memBase);
/* Prepares bias for the Matrix Multiply function.
*
* It uses the prepared B (which must be obtained by using any of the
* int8_prepare_b* functions) and a bias input to prepare the final bias.
*
* The final bias can be used as an input to matrix multiply function
* (`int8_multiply_and_add_bias`).
*
* @param[in] inputMatrixBPrepared An array representing the prepared B
* matrix. Size of array = `rowsB`*`colsB`.
* @param[in] scaleA The scaling factor (for quantization) of A
* @param[in] zeroPointA The zero point (for quantization) of A
* @param[in] scaleB The scaling factor (for quantization) of B
* @param[in] zeroPointB The zero point (for quantization) of B
* @param[in] rowsB No. of rows of Input matrix B (unquantized
* & non-transposed). It should be a positive
* integer and a multiple of 64.
* @param[in] colsB No. of columns of Input matrix B
* (unquantized & non-transposed). It should
* be a positive integer and a multiple of 8.
* @param[in] inputBias An array representing the input bias. Size
* of array = `colsB`
* @param[out] output An array representing the final prepared
* bias. Size of the array = `colsB`
*
* This function implements the intrinsic:
* int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32,
* scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output:
* i32) which implements the function: int8_prepare_bias(const int8_t*
* inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float
* zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float*
* output)
*/
int32_t IntrI8PrepareBias(wasm::Instance* instance,
uint32_t inputMatrixBPrepared, float scaleA,
float zeroPointA, float scaleB, float zeroPointB,
uint32_t rowsB, uint32_t colsB, uint32_t inputBias,
uint32_t output, uint8_t* memBase);
/* Perform multiplication of 2 matrices followed by adding a bias.
*
* i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared
*
* The inputs inputMatrixAPrepared, inputMatrixBPrepared and inputBiasPrepared
* of this function must be obtained by using `int8_prepare_A`, one of the
* `int8_prepare_b*` and `int8_prepare_bias` functions respectively.
*
* Please note that this interface might have architecture specific
* implementation.
*
* @param[in] inputMatrixAPrepared An array representing the prepared A
* matrix. This must be obtained by using
* `int8_prepare_A` function. Size of the
* array = `rowsA` * `width`.
* @param[in] scaleA The scaling factor (quantization) of A
* @param[in] zeroPointA The zero point (for quantization) of A
* @param[in] inputMatrixBPrepared An array representing the prepared B
* matrix. This must be obtained by using
* one of `int8_prepare_b*` functions.
* Size of the array = `width` * `colsB`.
* @param[in] scaleB The scaling factor (quantization) of B
* @param[in] zeroPointB The zero point (for quantization) of B
* @param[in] inputBiasPrepared An array representing the prepared bias.
* This must be obtained by using
* `int8_prepare_bias` function.
* Size of the array = `colsB`
* @param[in] unquantMultiplier A value that will be multiplied to the
* final unquantization factor that is
* prepared from `scaleA` and `scaleB`.
* @param[in] rowsA No. of rows of Input matrix A. It should
* be a positive integer.
* @param[in] width No. of columns of Input matrix A (same as
* no. of columns of Input matrix B). It
* should be a positive integer and a
* multiple of 64.
* @param[in] colsB No. of columns of Input matrix B. Should
* be a multiple of 8.
* @param[out] output An array representing the result matrix
* in row-major format.
* Size of the array = `rowsA` * `colsB`.
*
* This function implements the intrinsic:
* int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32,
* zeroPointA: f32, inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32,
* inputBiasPrepared: i32, unquantMultiplier: f32,
* rowsA: i32, width: i32, colsB: i32, output: i32)
* which implements the function:
* int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float
* scaleA, float zeroPointA, const int8_t* inputMatrixBPrepared, float scaleB,
* float zeroPointB, const float* inputBiasPrepared, float unquantMultiplier,
* uint32_t rowsA, uint32_t width, uint32_t colsB, float*
* output)
*/
int32_t IntrI8MultiplyAndAddBias(wasm::Instance* instance,
uint32_t inputMatrixAPrepared, float scaleA,
float zeroPointA,
uint32_t inputMatrixBPrepared, float scaleB,
float zeroPointB, uint32_t inputBiasPrepared,
float unquantMultiplier, uint32_t rowsA,
uint32_t width, uint32_t colsB,
uint32_t output, uint8_t* memBase);
/* Select a subset of columns of prepared B.
*
* Indices of the columns to be selected are specified by an array.
*
* @param[in] inputMatrixBPrepared An array representing the prepared B
* matrix. This must be obtained by using
* one of the `int8_prepare_b*` functions.
* Size of the array = `rowsB` * `colsB`.
* @param[in] rowsB No. of rows of Input matrix B. It should
* be a positive integer and a multiple
* of 64.
* @param[in] colsB No. of columns of Input matrix B. It
* should be a positive integer and a
* multiple of 8.
* @param[in] colIndexList An array of column indices to be selected
* from prepared B. All indices of the array
* should be valid
* i.e. 0 <= colIndexList[N] < colsB
* where N = 0, 1 ....(`sizeColIndexList`-1)
* @param[in] sizeColIndexList Size of the `colIndexList` array. It
* should be a positive integer and a
* multiple of 8.
* @param[out] output An array representing the selected columns
* of prepared B.
* Size = `rowsB` * `sizeColIndexList`.
*
* This function implements the intrinsic:
* int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32,
* colIndexList: i32, sizeColIndexList: i32, output: i32) which implements the
* function: int8_select_columns_of_b(const int8_t* inputMatrixBPrepared,
* uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t
* sizeColIndexList, int8_t* output)
*/
int32_t IntrI8SelectColumnsOfB(wasm::Instance* instance,
uint32_t inputMatrixBPrepared, uint32_t rowsB,
uint32_t colsB, uint32_t colIndexList,
uint32_t sizeColIndexList, uint32_t output,
uint8_t* memBase);
} // namespace intgemm
} // namespace js
#endif // intgemm_IntegerGemmIntrinsic_h

View File

@ -4,17 +4,23 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
include("../js-config.mozbuild")
include("../js-cxxflags.mozbuild")
FINAL_LIBRARY = "js"
with Files("*"):
BUG_COMPONENT = ("Core", "JavaScript: WebAssembly")
LOCAL_INCLUDES += [
"!..",
"..",
"/third_party/intgemm/intgemm",
]
SOURCES += [
"/third_party/intgemm/intgemm/intgemm.cc",
"IntegerGemmIntrinsic.cpp",
]
GeneratedFile(

View File

@ -19,6 +19,7 @@
#ifndef wasm_builtins_h
#define wasm_builtins_h
#include "intgemm/IntegerGemmIntrinsic.h"
#include "jit/IonTypes.h"
#include "wasm/WasmIntrinsicGenerated.h"

View File

@ -15,3 +15,187 @@
- I32
- I32
- I32
#if defined(ENABLE_WASM_MOZ_INTGEMM)
# Intrinsics for integer matrix multiplication followed by addition of bias.
# Please refer to @TOPSRCDIR/js/src/intgemm/IntegerGemmIntrinsic.h for more details on these intrinsics.
# Prepare B for the Matrix Multiply intrinsic from Input matrix B.
#
# Quantization is performed on the input.
# The final prepared B is in CPU-dependent format and can be used as an input to matrix multiply
# intrinsic (`int8_multiply_and_add_bias`).
#
# int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint, uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
# int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32)
- op: I8PrepareB
symbolic_address:
name: IntrI8PrepareB
type: Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General
entry: intgemm::IntrI8PrepareB
export: int8_prepare_b
params:
- I32
- F32
- F32
- I32
- I32
- I32
# Prepare B for the Matrix Multiply intrinsic from transposed version of Input matrix B.
#
# Quantization is performed on floating values of input.
# The final prepared B is in CPU-dependent format and can be used as an input to matrix multiply
# intrinsic (`int8_multiply_and_add_bias`).
#
# int8_prepare_b_from_transposed(const float* inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
# int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32, zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32)
- op: I8PrepareBFromTransposed
symbolic_address:
name: IntrI8PrepareBFromTransposed
type: Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General
entry: intgemm::IntrI8PrepareBFromTransposed
export: int8_prepare_b_from_transposed
params:
- I32
- F32
- F32
- I32
- I32
- I32
# Prepare B for the Matrix Multiply intrinsic from a quantized and transposed version of Input
# matrix B which is also in a CPU-independent format.
#
# The final prepared B is in CPU-dependent format and can be used as an input to matrix multiply
# intrinsic (`int8_multiply_and_add_bias`).
#
# int8_prepare_b_from_quantized_transposed(const int8_t* inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
# int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed: i32, rowsB: i32, colsB: i32, outputMatrixB: i32)
- op: I8PrepareBFromQuantizedTransposed
symbolic_address:
name: IntrI8PrepareBFromQuantizedTransposed
type: Args_Int32_GeneralInt32Int32Int32Int32General
entry: intgemm::IntrI8PrepareBFromQuantizedTransposed
export: int8_prepare_b_from_quantized_transposed
params:
- I32
- I32
- I32
- I32
# Prepare A for the Matrix Multiply intrinsic from Input matrix A.
#
# It performs quantization on floating values of input.
# The final prepared A might be architecture dependent. e.g. On some architectures like x86, it
# might be unsigned (achieved by adding 127 to quantized values) while on others like Arm, it might
# be signed.
# The final prepared A can be used as an input to matrix multiply intrinsic
# (`int8_multiply_and_add_bias`).
#
# int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint, uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA)
# int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32, colsA: i32, outputMatrixA: i32)
- op: I8PrepareA
symbolic_address:
name: IntrI8PrepareA
type: Args_Int32_GeneralInt32Float32Float32Int32Int32Int32General
entry: intgemm::IntrI8PrepareA
export: int8_prepare_a
params:
- I32
- F32
- F32
- I32
- I32
- I32
# Prepares bias for the Matrix Multiply intrinsic.
#
# It uses the prepared B (which must be obtained by using any of the `int8_prepare_b*` intrinsics) and
# a bias input to prepare the final bias.
#
# The final bias can be used as an input to matrix multiply intrinsic (`int8_multiply_and_add_bias`).
#
# int8_prepare_bias(const int8_t* inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float* output)
# int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32, scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output: i32)
- op: I8PrepareBias
symbolic_address:
name: IntrI8PrepareBias
type: Args_Int32_GeneralInt32Float32Float32Float32Float32Int32Int32Int32Int32General
entry: intgemm::IntrI8PrepareBias
export: int8_prepare_bias
params:
- I32
- F32
- F32
- F32
- F32
- I32
- I32
- I32
- I32
# Perform multiplication of 2 matrices followed by adding a bias.
#
# i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared
#
# The inputs of this intrinsic must be obtained by using `int8_prepare_A`,
# one of the `int8_prepare_b*` and `int8_prepare_bias` intrinsics respectively.
#
# int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float scaleA, float zeroPointA,
# const int8_t* inputMatrixBPrepared, float scaleB, float zeroPointB,
# const float* inputBiasPrepared, float unquantMultiplier,
# uint32_t rowsA, uint32_t width, uint32_t colsB, float* output)
# int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32, zeroPointA: f32,
# inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32,
# inputBiasPrepared: i32, unquantMultiplier: f32,
# rowsA: i32, width: i32, colsB: i32, output: i32)
- op: I8MultiplyAndAddBias
symbolic_address:
name: IntrI8MultiplyAndAddBias
type: Args_Int32_GeneralInt32Float32Float32Int32Float32Float32Int32Float32Int32Int32Int32Int32General
entry: intgemm::IntrI8MultiplyAndAddBias
export: int8_multiply_and_add_bias
params:
- I32
- F32
- F32
- I32
- F32
- F32
- I32
- F32
- I32
- I32
- I32
- I32
# Select a subset of columns of prepared B.
#
# Indices of the columns to be selected are specified by an array.
#
# int8_select_columns_of_b(const int8_t* inputMatrixBPrepared, uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t sizeColIndexList, int8_t* output)
# int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32, colIndexList: i32, sizeColIndexList: i32, output: i32)
- op: I8SelectColumnsOfB
symbolic_address:
name: IntrI8SelectColumnsOfB
type: Args_Int32_GeneralInt32Int32Int32Int32Int32Int32General
entry: intgemm::IntrI8SelectColumnsOfB
export: int8_select_columns_of_b
params:
- I32
- I32
- I32
- I32
- I32
- I32
#endif // ENABLE_WASM_MOZ_INTGEMM

View File

@ -5321,8 +5321,15 @@ static bool WebAssembly_mozIntGemm(JSContext* cx, unsigned argc, Value* vp) {
CallArgs args = CallArgsFromVp(argc, vp);
RootedWasmModuleObject module(cx);
if (!wasm::CompileIntrinsicModule(cx, mozilla::Span<IntrinsicOp>(),
Shareable::True, &module)) {
wasm::IntrinsicOp ops[] = {
wasm::IntrinsicOp::I8PrepareB,
wasm::IntrinsicOp::I8PrepareBFromTransposed,
wasm::IntrinsicOp::I8PrepareBFromQuantizedTransposed,
wasm::IntrinsicOp::I8PrepareA,
wasm::IntrinsicOp::I8PrepareBias,
wasm::IntrinsicOp::I8MultiplyAndAddBias,
wasm::IntrinsicOp::I8SelectColumnsOfB};
if (!wasm::CompileIntrinsicModule(cx, ops, Shareable::False, &module)) {
ReportOutOfMemory(cx);
return false;
}