From de66ce97bf3dab1826bfc8783293118249dfbea8 Mon Sep 17 00:00:00 2001 From: Matthew Gaudet Date: Tue, 16 Oct 2018 15:43:37 -0400 Subject: [PATCH] Bug 1499544 - [Part 1] Foundational implementation of BytecodeIterator r=djvj This is an initial skeleton of an inteface for bytecode. There are 6 major portions included in this patch. - RawBytecode: A typedef to expose jsbytecode* in a controlled manner, to allow easy future audits. - BytecodeLocation: This is an immutable view of a single program operation (a bytecode). There is is some basic functionality in here included for experimentation sake. - BytecodeLocationOffset: An immutable view of a source location offset, (and RawBytecodeLocationOffset, another typedef to ease future auditing) - BytecodeIterator: A forward iterator over BytecodeLocations, using program text order. - AllBytecodeIterable: A wrapper class that allows the use of a range-based for-loop over BytecodeLocations in a Script. - Modifications to JSScript to expose BytecodeLocations and answer some queries about containment. The interface is experimentally tested using JSScript::initScriptCounts as a simple playground for demonstrating the use of the interface. Differential Revision: https://phabricator.services.mozilla.com/D8918 --HG-- extra : rebase_source : 3d103ab8bc8cdcd48bc32779e38fbde35422e8f0 --- js/src/vm/BytecodeIterator-inl.h | 32 +++++++ js/src/vm/BytecodeIterator.h | 74 ++++++++++++++++ js/src/vm/BytecodeLocation-inl.h | 30 +++++++ js/src/vm/BytecodeLocation.h | 148 +++++++++++++++++++++++++++++++ js/src/vm/JSScript.cpp | 15 ++-- js/src/vm/JSScript.h | 23 +++++ 6 files changed, 317 insertions(+), 5 deletions(-) create mode 100644 js/src/vm/BytecodeIterator-inl.h create mode 100644 js/src/vm/BytecodeIterator.h create mode 100644 js/src/vm/BytecodeLocation-inl.h create mode 100644 js/src/vm/BytecodeLocation.h diff --git a/js/src/vm/BytecodeIterator-inl.h b/js/src/vm/BytecodeIterator-inl.h new file mode 100644 index 000000000000..3ba4e579a889 --- /dev/null +++ b/js/src/vm/BytecodeIterator-inl.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_BytecodeIterator_inl_h +#define vm_BytecodeIterator_inl_h + +#include "vm/BytecodeIterator.h" + +#include "vm/JSScript.h" +namespace js { + +BytecodeIterator::BytecodeIterator(const JSScript* script) + : current_(script, script->code()) +{} + +// AllBytecodesIterable + +inline BytecodeIterator +AllBytecodesIterable::begin() { + return BytecodeIterator(script_); +} + +inline BytecodeIterator +AllBytecodesIterable::end() { + return BytecodeIterator(BytecodeLocation(script_, script_->codeEnd())); +} + +} +#endif \ No newline at end of file diff --git a/js/src/vm/BytecodeIterator.h b/js/src/vm/BytecodeIterator.h new file mode 100644 index 000000000000..f5c3c80ce964 --- /dev/null +++ b/js/src/vm/BytecodeIterator.h @@ -0,0 +1,74 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_BytecodeIterator_h +#define vm_BytecodeIterator_h + +#include "vm/BytecodeLocation.h" + +namespace js { + +class BytecodeIterator +{ + BytecodeLocation current_; + + public: + + explicit BytecodeIterator(const JSScript* script); + + explicit BytecodeIterator(BytecodeLocation loc) + : current_(loc) + {} + + bool operator==(const BytecodeIterator& other) const { + return other.current_ == current_; + } + + bool operator!=(const BytecodeIterator& other) const { + return !(other.current_ == current_); + } + + const BytecodeLocation& operator*() const { + return current_; + } + + const BytecodeLocation* operator->() const { + return ¤t_; + } + + // Pre-increment + BytecodeIterator& operator++() { + current_ = current_.next(); + return *this; + } + + // Post-increment + BytecodeIterator operator++(int) { + current_ = current_.next(); + return *this; + } + +}; + +// Given a JSScript, allow the construction of a range based for-loop +// that will visit all script locations in that script. +class AllBytecodesIterable +{ + + const JSScript* script_; + public: + + explicit AllBytecodesIterable(const JSScript* script) + : script_(script) + {} + + BytecodeIterator begin(); + BytecodeIterator end(); +}; + +} + +#endif \ No newline at end of file diff --git a/js/src/vm/BytecodeLocation-inl.h b/js/src/vm/BytecodeLocation-inl.h new file mode 100644 index 000000000000..301561c91d87 --- /dev/null +++ b/js/src/vm/BytecodeLocation-inl.h @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_BytecodeLocation_inl_h +#define vm_BytecodeLocation_inl_h + +#include "vm/BytecodeLocation.h" + +#include "vm/JSScript.h" + +namespace js { + +inline bool +BytecodeLocation::isValid(const JSScript* script) const { + // Note: Don't create a new BytecodeLocation during the implementation of this, as it + // is used in the constructor, and will recurse forever. + return script->contains(*this) ||toRawBytecode() == script->codeEnd(); +} + +inline bool +BytecodeLocation::isInBounds(const JSScript* script) const { + return script->contains(*this); +} + +} + +#endif \ No newline at end of file diff --git a/js/src/vm/BytecodeLocation.h b/js/src/vm/BytecodeLocation.h new file mode 100644 index 000000000000..3a111f2ebe83 --- /dev/null +++ b/js/src/vm/BytecodeLocation.h @@ -0,0 +1,148 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_BytecodeLocation_h +#define vm_BytecodeLocation_h + +#include "js/TypeDecls.h" +#include "vm/BytecodeUtil.h" + +namespace js { + +typedef uint32_t RawBytecodeLocationOffset; + + +class BytecodeLocationOffset +{ + RawBytecodeLocationOffset rawOffset_; + + public: + explicit BytecodeLocationOffset(RawBytecodeLocationOffset offset) + : rawOffset_(offset) + {} + + RawBytecodeLocationOffset rawOffset() const { + return rawOffset_; + } + +}; + + +typedef jsbytecode* RawBytecode; + +// A immutable representation of a program location +// +class BytecodeLocation +{ + RawBytecode rawBytecode_; +#ifdef DEBUG + const JSScript* debugOnlyScript_; +#endif + + // Construct a new BytecodeLocation, while borrowing scriptIdentity + // from some other BytecodeLocation. + BytecodeLocation(const BytecodeLocation& loc, RawBytecode pc) + : rawBytecode_(pc) +#ifdef DEBUG + , debugOnlyScript_(loc.debugOnlyScript_) +#endif + { + MOZ_ASSERT(isValid()); + } + + public: + BytecodeLocation(const JSScript* script, RawBytecode pc) + : rawBytecode_(pc) +#ifdef DEBUG + , debugOnlyScript_(script) +#endif + { + MOZ_ASSERT(isValid()); + } + + RawBytecode toRawBytecode() const { + return rawBytecode_; + } + + // Return true if this bytecode location is valid for the given script. + // This includes the location 1-past the end of the bytecode. + bool isValid(const JSScript* script) const; + + // Return true if this bytecode location is within the bounds of the + // bytecode for a given script. + bool isInBounds(const JSScript* script) const; + + bool operator==(const BytecodeLocation& other) const { + MOZ_ASSERT(this->debugOnlyScript_ == other.debugOnlyScript_); + return rawBytecode_ == other.rawBytecode_; + } + + bool operator!=(const BytecodeLocation& other) const { + return !(other == *this); + } + + bool operator<(const BytecodeLocation& other) const { + MOZ_ASSERT(this->debugOnlyScript_ == other.debugOnlyScript_); + return rawBytecode_ < other.rawBytecode_; + } + + // It is traditional to represent the rest of the relational operators + // using operator<, so we don't need to assert for these. + bool operator>(const BytecodeLocation& other) const { + return other < *this; + } + + bool operator<=(const BytecodeLocation& other) const { + return !(other < *this); + } + + bool operator>=(const BytecodeLocation& other) const { + return !(*this < other); + } + + // Return the next bytecode + BytecodeLocation next() const { + return BytecodeLocation(*this, rawBytecode_ + GetBytecodeLength(rawBytecode_)); + } + + // Add an offset. + BytecodeLocation operator+(const BytecodeLocationOffset& offset) { + return BytecodeLocation(*this, rawBytecode_ + offset.rawOffset()); + } + + // Identity Checks + bool is(JSOp op) const { + MOZ_ASSERT(isInBounds()); + return getOp() == op; + } + + bool isJumpTarget() const { + return BytecodeIsJumpTarget(getOp()); + } + + // Accessors: + JSOp getOp() const { + return JSOp(*rawBytecode_); + } + +#ifdef DEBUG + // To ease writing assertions + bool isValid() const { + return isValid(debugOnlyScript_); + } + + bool isInBounds() const { + return isInBounds(debugOnlyScript_); + } +#endif + +}; + + +} + + +#endif diff --git a/js/src/vm/JSScript.cpp b/js/src/vm/JSScript.cpp index 38c94dc46979..621efc742da2 100644 --- a/js/src/vm/JSScript.cpp +++ b/js/src/vm/JSScript.cpp @@ -47,6 +47,8 @@ #include "util/StringBuffer.h" #include "util/Text.h" #include "vm/ArgumentsObject.h" +#include "vm/BytecodeIterator.h" +#include "vm/BytecodeLocation.h" #include "vm/BytecodeUtil.h" #include "vm/Compression.h" #include "vm/Debugger.h" @@ -62,6 +64,8 @@ #include "vtune/VTuneWrapper.h" #include "gc/Marking-inl.h" +#include "vm/BytecodeIterator-inl.h" +#include "vm/BytecodeLocation-inl.h" #include "vm/Compartment-inl.h" #include "vm/EnvironmentObject-inl.h" #include "vm/JSFunction-inl.h" @@ -1129,11 +1133,12 @@ JSScript::initScriptCounts(JSContext* cx) // Record all pc which are the first instruction of a basic block. mozilla::Vector jumpTargets; - jsbytecode* mainPc = main(); - jsbytecode* end = codeEnd(); - for (jsbytecode* pc = code(); pc != end; pc = GetNextPc(pc)) { - if (BytecodeIsJumpTarget(JSOp(*pc)) || pc == mainPc) { - if (!jumpTargets.append(pc)) { + + js::BytecodeLocation main = mainLocation(); + AllBytecodesIterable iterable(this); + for (auto& loc : iterable) { + if (loc.isJumpTarget() || loc == main) { + if (!jumpTargets.append(loc.toRawBytecode())) { ReportOutOfMemory(cx); return false; } diff --git a/js/src/vm/JSScript.h b/js/src/vm/JSScript.h index 0a4ad7ab6835..c5324cfb67fa 100644 --- a/js/src/vm/JSScript.h +++ b/js/src/vm/JSScript.h @@ -33,6 +33,8 @@ #include "js/UbiNode.h" #include "js/UniquePtr.h" #include "js/Utility.h" +#include "vm/BytecodeIterator.h" +#include "vm/BytecodeLocation.h" #include "vm/BytecodeUtil.h" #include "vm/JSAtom.h" #include "vm/NativeObject.h" @@ -1748,6 +1750,15 @@ class JSScript : public js::gc::TenuredCell } return scriptData_->code(); } + + js::AllBytecodesIterable allLocations() { + return js::AllBytecodesIterable(this); + } + + js::BytecodeLocation location() { + return js::BytecodeLocation(this, code()); + } + bool isUncompleted() const { // code() becomes non-null only if this script is complete. // See the comment in JSScript::fullyInitFromEmitter. @@ -1771,6 +1782,10 @@ class JSScript : public js::gc::TenuredCell return pc >= code() && pc < codeEnd(); } + bool contains(const js::BytecodeLocation& loc) const { + return containsPC(loc.toRawBytecode()); + } + size_t pcToOffset(const jsbytecode* pc) const { MOZ_ASSERT(containsPC(pc)); return size_t(pc - code()); @@ -2359,6 +2374,14 @@ class JSScript : public js::gc::TenuredCell return code() + mainOffset(); } + js::BytecodeLocation mainLocation() const { + return js::BytecodeLocation(this, main()); + } + + js::BytecodeLocation endLocation() const { + return js::BytecodeLocation(this, codeEnd()); + } + /* * computedSizeOfData() is the in-use size of all the data sections. * sizeOfData() is the size of the block allocated to hold all the data