Bug 1253137 - Baldr: use length+bytes instead of c-strings (r=sunfish)

MozReview-Commit-ID: 1GYHSyxx6n1
2024-11-28 15:23:51 +00:00 · 2016-03-06 17:46:23 -06:00 · 2016-03-06 17:46:23 -06:00 · 6e1a0154ca
commit 6e1a0154ca
parent c4ff9aece7
4 changed files with 101 additions and 96 deletions
--- a/js/src/asmjs/Wasm.cpp
+++ b/js/src/asmjs/Wasm.cpp
@ -247,7 +247,8 @@ DecodeCallIndirect(FunctionDecoder& f, ExprType* type)
 static bool
 DecodeConstI32(FunctionDecoder& f, ExprType* type)
 {
-    if (!f.d().readVarU32())
+    uint32_t _;
+    if (!f.d().readVarU32(&_))
        return f.fail("unable to read i32.const immediate");

    *type = ExprType::I32;
@ -257,7 +258,8 @@ DecodeConstI32(FunctionDecoder& f, ExprType* type)
 static bool
 DecodeConstI64(FunctionDecoder& f, ExprType* type)
 {
-    if (!f.d().readVarU64())
+    uint64_t _;
+    if (!f.d().readVarU64(&_))
        return f.fail("unable to read i64.const immediate");

    *type = ExprType::I64;
@ -833,24 +835,6 @@ DecodeExpr(FunctionDecoder& f, ExprType* type)
    return f.fail("bad expression code");
 }

-/*****************************************************************************/
-// dynamic link data
-
-struct ImportName
-{
-    UniqueChars module;
-    UniqueChars func;
-
-    ImportName(UniqueChars module, UniqueChars func)
-      : module(Move(module)), func(Move(func))
-    {}
-    ImportName(ImportName&& rhs)
-      : module(Move(rhs.module)), func(Move(rhs.func))
-    {}
-};
-
-typedef Vector<ImportName, 0, SystemAllocPolicy> ImportNameVector;
-
 /*****************************************************************************/
 // wasm decoding and generation

@ -1042,6 +1026,21 @@ CheckTypeForJS(JSContext* cx, Decoder& d, const Sig& sig)
    return true;
 }

+struct ImportName
+{
+    Bytes module;
+    Bytes func;
+
+    ImportName(Bytes&& module, Bytes&& func)
+      : module(Move(module)), func(Move(func))
+    {}
+    ImportName(ImportName&& rhs)
+      : module(Move(rhs.module)), func(Move(rhs.func))
+    {}
+};
+
+typedef Vector<ImportName, 0, SystemAllocPolicy> ImportNameVector;
+
 static bool
 DecodeImport(JSContext* cx, Decoder& d, ModuleGeneratorData* init, ImportNameVector* importNames)
 {
@ -1055,15 +1054,15 @@ DecodeImport(JSContext* cx, Decoder& d, ModuleGeneratorData* init, ImportNameVec
    if (!CheckTypeForJS(cx, d, *sig))
        return false;

-    UniqueChars moduleName = d.readCString();
-    if (!moduleName)
+    Bytes moduleName;
+    if (!d.readBytes(&moduleName))
        return Fail(cx, d, "expected import module name");

-    if (!*moduleName.get())
+    if (moduleName.empty())
        return Fail(cx, d, "module name cannot be empty");

-    UniqueChars funcName = d.readCString();
-    if (!funcName)
+    Bytes funcName;
+    if (!d.readBytes(&funcName))
        return Fail(cx, d, "expected import func name");

    return importNames->emplaceBack(Move(moduleName), Move(funcName));
@ -1148,14 +1147,26 @@ DecodeMemory(JSContext* cx, Decoder& d, ModuleGenerator& mg, MutableHandle<Array
 typedef HashSet<const char*, CStringHasher> CStringSet;

 static UniqueChars
-DecodeFieldName(JSContext* cx, Decoder& d, CStringSet* dupSet)
+DecodeExportName(JSContext* cx, Decoder& d, CStringSet* dupSet)
 {
-    UniqueChars fieldName = d.readCString();
-    if (!fieldName) {
-        Fail(cx, d, "expected export external name string");
+    Bytes fieldBytes;
+    if (!d.readBytes(&fieldBytes)) {
+        Fail(cx, d, "expected export name");
        return nullptr;
    }

+    if (memchr(fieldBytes.begin(), 0, fieldBytes.length())) {
+        Fail(cx, d, "null in export names not yet supported");
+        return nullptr;
+    }
+
+    if (!fieldBytes.append(0))
+        return nullptr;
+
+    UniqueChars fieldName((char*)fieldBytes.extractRawBuffer());
+    if (!fieldName)
+        return nullptr;
+
    CStringSet::AddPtr p = dupSet->lookupForAdd(fieldName.get());
    if (p) {
        Fail(cx, d, "duplicate export");
@ -1181,7 +1192,7 @@ DecodeFunctionExport(JSContext* cx, Decoder& d, ModuleGenerator& mg, CStringSet*
    if (!CheckTypeForJS(cx, d, mg.funcSig(funcIndex)))
        return false;

-    UniqueChars fieldName = DecodeFieldName(cx, d, dupSet);
+    UniqueChars fieldName = DecodeExportName(cx, d, dupSet);
    if (!fieldName)
        return false;

@ -1340,7 +1351,7 @@ DecodeDataSegments(JSContext* cx, Decoder& d, Handle<ArrayBufferObject*> heap)
            return Fail(cx, d, "data segment does not fit in memory");

        const uint8_t* src;
-        if (!d.readRawData(numBytes, &src))
+        if (!d.readBytesRaw(numBytes, &src))
            return Fail(cx, d, "data segment shorter than declared");

        memcpy(heapBase + dstOffset, src, numBytes);
@ -1452,9 +1463,9 @@ CheckCompilerSupport(JSContext* cx)
 }

 static bool
-GetProperty(JSContext* cx, HandleObject obj, const char* utf8Chars, MutableHandleValue v)
+GetProperty(JSContext* cx, HandleObject obj, const Bytes& bytes, MutableHandleValue v)
 {
-    JSAtom* atom = AtomizeUTF8Chars(cx, utf8Chars, strlen(utf8Chars));
+    JSAtom* atom = AtomizeUTF8Chars(cx, (char*)bytes.begin(), bytes.length());
    if (!atom)
        return false;

@ -1471,15 +1482,15 @@ ImportFunctions(JSContext* cx, HandleObject importObj, const ImportNameVector& i

    for (const ImportName& name : importNames) {
        RootedValue v(cx);
-        if (!GetProperty(cx, importObj, name.module.get(), &v))
+        if (!GetProperty(cx, importObj, name.module, &v))
            return false;

-        if (*name.func.get()) {
+        if (!name.func.empty()) {
            if (!v.isObject())
                return Fail(cx, "import object field is not an Object");

            RootedObject obj(cx, &v.toObject());
-            if (!GetProperty(cx, obj, name.func.get(), &v))
+            if (!GetProperty(cx, obj, name.func, &v))
                return false;
        }

@ -1524,6 +1535,7 @@ wasm::Eval(JSContext* cx, Handle<TypedArrayObject*> code, HandleObject importObj
    UniqueExportMap exportMap;
    Rooted<ArrayBufferObject*> heap(cx);
    Rooted<WasmModuleObject*> moduleObj(cx);
+
    if (!DecodeModule(cx, Move(file), bytes, length, &importNames, &exportMap, &heap, &moduleObj)) {
        if (!cx->isExceptionPending())
            ReportOutOfMemory(cx);
--- a/js/src/asmjs/WasmBinary.h
+++ b/js/src/asmjs/WasmBinary.h
@ -381,20 +381,17 @@ class Encoder
            offset++;
        return offset - start + 1;
    }
-    static const uint32_t EnumSentinel = 0x3fff;

    template <class T>
    MOZ_WARN_UNUSED_RESULT bool writePatchableEnum(size_t* offset) {
-        static_assert(uint32_t(T::Limit) <= EnumSentinel, "reserve enough bits");
        *offset = bytes_.length();
-        return writeVarU32(EnumSentinel);
+        return writeVarU32(uint32_t(T::Limit));
    }

    template <class T>
    void patchEnum(size_t offset, T v) {
-        static_assert(uint32_t(T::Limit) <= UINT32_MAX, "fits");
        MOZ_ASSERT(uint32_t(v) < uint32_t(T::Limit));
-        return patchVarU32(offset, uint32_t(v), EnumSentinel);
+        return patchVarU32(offset, uint32_t(v), uint32_t(T::Limit));
    }

  public:
@ -470,20 +467,18 @@ class Encoder
        patchEnum(offset, expr);
    }

-    // C-strings are written in UTF8 and null-terminated while raw data can
-    // contain nulls and instead has an explicit byte length.
+    // Byte ranges start with an LEB128 length followed by an arbitrary sequence
+    // of bytes. When used for strings, bytes are to be interpreted as utf8.

-    MOZ_WARN_UNUSED_RESULT bool writeCString(const char* cstr) {
-        return bytes_.append(reinterpret_cast<const uint8_t*>(cstr), strlen(cstr) + 1);
-    }
-    MOZ_WARN_UNUSED_RESULT bool writeRawData(const uint8_t* bytes, uint32_t numBytes) {
-        return bytes_.append(bytes, numBytes);
+    MOZ_WARN_UNUSED_RESULT bool writeBytes(const void* bytes, uint32_t numBytes) {
+        return writeVarU32(numBytes) &&
+               bytes_.append(reinterpret_cast<const uint8_t*>(bytes), numBytes);
    }

    // A "section" is a contiguous range of bytes that stores its own size so
    // that it may be trivially skipped without examining the contents. Sections
    // require backpatching since the size of the section is only known at the
-    // end while the size's uint32 must be stored at the beginning. Immediately
+    // end while the size's varU32 must be stored at the beginning. Immediately
    // after the section length is the string id of the section.

    template <size_t IdSizeWith0>
@ -492,7 +487,7 @@ class Encoder
        MOZ_ASSERT(id[IdSize] == '\0');
        return writePatchableVarU32(offset) &&
               writeVarU32(IdSize) &&
-               writeRawData(reinterpret_cast<const uint8_t*>(id), IdSize);
+               bytes_.append(reinterpret_cast<const uint8_t*>(id), IdSize);
    }
    void finishSection(size_t offset) {
        return patchVarU32(offset, bytes_.length() - offset - varU32ByteLength(offset));
@ -528,8 +523,7 @@ class Decoder
    MOZ_WARN_UNUSED_RESULT bool read(T* out) {
        if (bytesRemain() < sizeof(T))
            return false;
-        if (out)
-            memcpy((void*)out, cur_, sizeof(T));
+        memcpy((void*)out, cur_, sizeof(T));
        cur_ += sizeof(T);
        return true;
    }
@ -540,8 +534,7 @@ class Decoder
        uint32_t u32;
        if (!readVarU32(&u32) || u32 >= uint32_t(T::Limit))
            return false;
-        if (out)
-            *out = T(u32);
+        *out = T(u32);
        return true;
    }

@ -561,7 +554,7 @@ class Decoder
    }

    template <typename UInt>
-    MOZ_WARN_UNUSED_RESULT bool readVarU(UInt* out = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readVarU(UInt* out) {
        const unsigned numBits = sizeof(UInt) * CHAR_BIT;
        const unsigned remainderBits = numBits % 7;
        const unsigned numBitsInSevens = numBits - remainderBits;
@ -572,8 +565,7 @@ class Decoder
            if (!readFixedU8(&byte))
                return false;
            if (!(byte & 0x80)) {
-                if (out)
-                    *out = u | UInt(byte) << shift;
+                *out = u | UInt(byte) << shift;
                return true;
            }
            u |= UInt(byte & 0x7F) << shift;
@ -581,8 +573,7 @@ class Decoder
        } while (shift != numBitsInSevens);
        if (!readFixedU8(&byte) || (byte & (unsigned(-1) << remainderBits)))
            return false;
-        if (out)
-            *out = u | UInt(byte) << numBitsInSevens;
+        *out = u | UInt(byte) << numBitsInSevens;
        return true;
    }

@ -619,31 +610,31 @@ class Decoder
    // Fixed-size encoding operations simply copy the literal bytes (without
    // attempting to align).

-    MOZ_WARN_UNUSED_RESULT bool readFixedU32(uint32_t* u = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readFixedU32(uint32_t* u) {
        return read<uint32_t>(u);
    }
-    MOZ_WARN_UNUSED_RESULT bool readFixedF32(float* f = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readFixedF32(float* f) {
        return read<float>(f);
    }
-    MOZ_WARN_UNUSED_RESULT bool readFixedF64(double* d = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readFixedF64(double* d) {
        return read<double>(d);
    }
-    MOZ_WARN_UNUSED_RESULT bool readFixedI32x4(I32x4* i32x4 = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readFixedI32x4(I32x4* i32x4) {
        return read<I32x4>(i32x4);
    }
-    MOZ_WARN_UNUSED_RESULT bool readFixedF32x4(F32x4* f32x4 = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readFixedF32x4(F32x4* f32x4) {
        return read<F32x4>(f32x4);
    }

    // Variable-length encodings that all use LEB128.

-    MOZ_WARN_UNUSED_RESULT bool readVarU32(uint32_t* out = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readVarU32(uint32_t* out) {
        return readVarU<uint32_t>(out);
    }
-    MOZ_WARN_UNUSED_RESULT bool readVarU64(uint64_t* out = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readVarU64(uint64_t* out) {
        return readVarU<uint64_t>(out);
    }
-    MOZ_WARN_UNUSED_RESULT bool readExpr(Expr* expr = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readExpr(Expr* expr) {
        return readEnum(expr);
    }
    MOZ_WARN_UNUSED_RESULT bool readValType(ValType* type) {
@ -653,20 +644,21 @@ class Decoder
        return readEnum(type);
    }

-    // C-strings are written in UTF8 and null-terminated while raw data can
-    // contain nulls and instead has an explicit byte length.
+    // See writeBytes comment.

-    MOZ_WARN_UNUSED_RESULT UniqueChars readCString() {
-        const char* begin = reinterpret_cast<const char*>(cur_);
-        for (; cur_ != end_; cur_++) {
-            if (!*cur_) {
-                cur_++;
-                return UniqueChars(DuplicateString(begin));
-            }
-        }
-        return nullptr;
+    MOZ_WARN_UNUSED_RESULT bool readBytes(Bytes* bytes) {
+        uint32_t numBytes;
+        if (!readVarU32(&numBytes))
+            return false;
+        if (bytesRemain() < numBytes)
+            return false;
+        if (!bytes->resize(numBytes))
+            return false;
+        memcpy(bytes->begin(), cur_, numBytes);
+        cur_ += numBytes;
+        return true;
    }
-    MOZ_WARN_UNUSED_RESULT bool readRawData(uint32_t numBytes, const uint8_t** bytes = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readBytesRaw(uint32_t numBytes, const uint8_t** bytes) {
        if (bytes)
            *bytes = cur_;
        if (bytesRemain() < numBytes)
@ -779,7 +771,7 @@ class Decoder
    // Temporary encoding forms which should be removed as part of the
    // conversion to wasm:

-    MOZ_WARN_UNUSED_RESULT bool readFixedU8(uint8_t* i = nullptr) {
+    MOZ_WARN_UNUSED_RESULT bool readFixedU8(uint8_t* i) {
        return read<uint8_t>(i);
    }
    uint8_t uncheckedReadFixedU8() {
--- a/js/src/asmjs/WasmText.cpp
+++ b/js/src/asmjs/WasmText.cpp
@ -3782,11 +3782,11 @@ EncodeFunctionSignatures(Encoder& e, WasmAstModule& module)
 }

 static bool
-EncodeCString(Encoder& e, WasmName wasmName)
+EncodeBytes(Encoder& e, WasmName wasmName)
 {
    TwoByteChars range(wasmName.begin(), wasmName.length());
    UniqueChars utf8(JS::CharsToNewUTF8CharsZ(nullptr, range).c_str());
-    return utf8 && e.writeCString(utf8.get());
+    return utf8 && e.writeBytes(utf8.get(), strlen(utf8.get()));
 }

 static bool
@ -3795,10 +3795,10 @@ EncodeImport(Encoder& e, WasmAstImport& imp)
    if (!e.writeVarU32(imp.sigIndex()))
        return false;

-    if (!EncodeCString(e, imp.module()))
+    if (!EncodeBytes(e, imp.module()))
        return false;

-    if (!EncodeCString(e, imp.func()))
+    if (!EncodeBytes(e, imp.func()))
        return false;

    return true;
@ -3866,7 +3866,7 @@ EncodeFunctionExport(Encoder& e, WasmAstExport& exp)
    if (!e.writeVarU32(exp.func().index()))
        return false;

-    if (!EncodeCString(e, exp.name()))
+    if (!EncodeBytes(e, exp.name()))
        return false;

    return true;
@ -3991,10 +3991,7 @@ EncodeDataSegment(Encoder& e, WasmAstSegment& segment)
        bytes.infallibleAppend(byte);
    }

-    if (!e.writeVarU32(bytes.length()))
-        return false;
-
-    if (!e.writeRawData(bytes.begin(), bytes.length()))
+    if (!e.writeBytes(bytes.begin(), bytes.length()))
        return false;

    return true;
--- a/js/src/jit-test/tests/wasm/binary.js
+++ b/js/src/jit-test/tests/wasm/binary.js
@ -79,16 +79,20 @@ function cstring(name) {
 }

 function string(name) {
-    return name.split('').map(c => c.charCodeAt(0));
+    var nameBytes = name.split('').map(c => {
+        var code = c.charCodeAt(0);
+        assertEq(code < 128, true); // TODO
+        return code
+    });
+    return varU32(nameBytes.length).concat(nameBytes);
 }

 function moduleWithSections(sectionArray) {
    var bytes = moduleHeaderThen();
    for (let section of sectionArray) {
-        var nameLength = varU32(section.name.length);
-        bytes.push(...varU32(nameLength.length + section.name.length + section.body.length));
-        bytes.push(...nameLength);
-        bytes.push(...string(section.name));
+        var sectionName = string(section.name);
+        bytes.push(...varU32(sectionName.length + section.body.length));
+        bytes.push(...sectionName);
        bytes.push(...section.body);
    }
    return toU8(bytes);
@ -133,8 +137,8 @@ function importSection(imports) {
    body.push(...varU32(imports.length));
    for (let imp of imports) {
        body.push(...varU32(imp.sigIndex));
-        body.push(...cstring(imp.module));
-        body.push(...cstring(imp.func));
+        body.push(...string(imp.module));
+        body.push(...string(imp.func));
    }
    return { name: importId, body };
 }