Bug 1499544 - [Part 1] Foundational implementation of BytecodeIterator r=djvj

This is an initial skeleton of an inteface for bytecode. There are 6
major portions included in this patch.

- RawBytecode: A typedef to expose jsbytecode* in a controlled manner, to allow
  easy future audits.

- BytecodeLocation: This is an immutable view of a single program operation (a
  bytecode). There is is some basic functionality in here included for
  experimentation sake.

- BytecodeLocationOffset: An immutable view of a source location offset, (and
  RawBytecodeLocationOffset, another typedef to ease future auditing)

- BytecodeIterator: A forward iterator over BytecodeLocations, using program
  text order.

- AllBytecodeIterable: A wrapper class that allows the use of a range-based
  for-loop over BytecodeLocations in a Script.

- Modifications to JSScript to expose BytecodeLocations and answer some queries
  about containment.

The interface is experimentally tested using JSScript::initScriptCounts as a
simple playground for demonstrating the use of the interface.

Differential Revision: https://phabricator.services.mozilla.com/D8918

--HG--
extra : rebase_source : 3d103ab8bc8cdcd48bc32779e38fbde35422e8f0
This commit is contained in:
Matthew Gaudet 2018-10-16 15:43:37 -04:00
parent a6e388e171
commit de66ce97bf
6 changed files with 317 additions and 5 deletions

View File

@ -0,0 +1,32 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef vm_BytecodeIterator_inl_h
#define vm_BytecodeIterator_inl_h
#include "vm/BytecodeIterator.h"
#include "vm/JSScript.h"
namespace js {
BytecodeIterator::BytecodeIterator(const JSScript* script)
: current_(script, script->code())
{}
// AllBytecodesIterable
inline BytecodeIterator
AllBytecodesIterable::begin() {
return BytecodeIterator(script_);
}
inline BytecodeIterator
AllBytecodesIterable::end() {
return BytecodeIterator(BytecodeLocation(script_, script_->codeEnd()));
}
}
#endif

View File

@ -0,0 +1,74 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef vm_BytecodeIterator_h
#define vm_BytecodeIterator_h
#include "vm/BytecodeLocation.h"
namespace js {
class BytecodeIterator
{
BytecodeLocation current_;
public:
explicit BytecodeIterator(const JSScript* script);
explicit BytecodeIterator(BytecodeLocation loc)
: current_(loc)
{}
bool operator==(const BytecodeIterator& other) const {
return other.current_ == current_;
}
bool operator!=(const BytecodeIterator& other) const {
return !(other.current_ == current_);
}
const BytecodeLocation& operator*() const {
return current_;
}
const BytecodeLocation* operator->() const {
return &current_;
}
// Pre-increment
BytecodeIterator& operator++() {
current_ = current_.next();
return *this;
}
// Post-increment
BytecodeIterator operator++(int) {
current_ = current_.next();
return *this;
}
};
// Given a JSScript, allow the construction of a range based for-loop
// that will visit all script locations in that script.
class AllBytecodesIterable
{
const JSScript* script_;
public:
explicit AllBytecodesIterable(const JSScript* script)
: script_(script)
{}
BytecodeIterator begin();
BytecodeIterator end();
};
}
#endif

View File

@ -0,0 +1,30 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef vm_BytecodeLocation_inl_h
#define vm_BytecodeLocation_inl_h
#include "vm/BytecodeLocation.h"
#include "vm/JSScript.h"
namespace js {
inline bool
BytecodeLocation::isValid(const JSScript* script) const {
// Note: Don't create a new BytecodeLocation during the implementation of this, as it
// is used in the constructor, and will recurse forever.
return script->contains(*this) ||toRawBytecode() == script->codeEnd();
}
inline bool
BytecodeLocation::isInBounds(const JSScript* script) const {
return script->contains(*this);
}
}
#endif

View File

@ -0,0 +1,148 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef vm_BytecodeLocation_h
#define vm_BytecodeLocation_h
#include "js/TypeDecls.h"
#include "vm/BytecodeUtil.h"
namespace js {
typedef uint32_t RawBytecodeLocationOffset;
class BytecodeLocationOffset
{
RawBytecodeLocationOffset rawOffset_;
public:
explicit BytecodeLocationOffset(RawBytecodeLocationOffset offset)
: rawOffset_(offset)
{}
RawBytecodeLocationOffset rawOffset() const {
return rawOffset_;
}
};
typedef jsbytecode* RawBytecode;
// A immutable representation of a program location
//
class BytecodeLocation
{
RawBytecode rawBytecode_;
#ifdef DEBUG
const JSScript* debugOnlyScript_;
#endif
// Construct a new BytecodeLocation, while borrowing scriptIdentity
// from some other BytecodeLocation.
BytecodeLocation(const BytecodeLocation& loc, RawBytecode pc)
: rawBytecode_(pc)
#ifdef DEBUG
, debugOnlyScript_(loc.debugOnlyScript_)
#endif
{
MOZ_ASSERT(isValid());
}
public:
BytecodeLocation(const JSScript* script, RawBytecode pc)
: rawBytecode_(pc)
#ifdef DEBUG
, debugOnlyScript_(script)
#endif
{
MOZ_ASSERT(isValid());
}
RawBytecode toRawBytecode() const {
return rawBytecode_;
}
// Return true if this bytecode location is valid for the given script.
// This includes the location 1-past the end of the bytecode.
bool isValid(const JSScript* script) const;
// Return true if this bytecode location is within the bounds of the
// bytecode for a given script.
bool isInBounds(const JSScript* script) const;
bool operator==(const BytecodeLocation& other) const {
MOZ_ASSERT(this->debugOnlyScript_ == other.debugOnlyScript_);
return rawBytecode_ == other.rawBytecode_;
}
bool operator!=(const BytecodeLocation& other) const {
return !(other == *this);
}
bool operator<(const BytecodeLocation& other) const {
MOZ_ASSERT(this->debugOnlyScript_ == other.debugOnlyScript_);
return rawBytecode_ < other.rawBytecode_;
}
// It is traditional to represent the rest of the relational operators
// using operator<, so we don't need to assert for these.
bool operator>(const BytecodeLocation& other) const {
return other < *this;
}
bool operator<=(const BytecodeLocation& other) const {
return !(other < *this);
}
bool operator>=(const BytecodeLocation& other) const {
return !(*this < other);
}
// Return the next bytecode
BytecodeLocation next() const {
return BytecodeLocation(*this, rawBytecode_ + GetBytecodeLength(rawBytecode_));
}
// Add an offset.
BytecodeLocation operator+(const BytecodeLocationOffset& offset) {
return BytecodeLocation(*this, rawBytecode_ + offset.rawOffset());
}
// Identity Checks
bool is(JSOp op) const {
MOZ_ASSERT(isInBounds());
return getOp() == op;
}
bool isJumpTarget() const {
return BytecodeIsJumpTarget(getOp());
}
// Accessors:
JSOp getOp() const {
return JSOp(*rawBytecode_);
}
#ifdef DEBUG
// To ease writing assertions
bool isValid() const {
return isValid(debugOnlyScript_);
}
bool isInBounds() const {
return isInBounds(debugOnlyScript_);
}
#endif
};
}
#endif

View File

@ -47,6 +47,8 @@
#include "util/StringBuffer.h"
#include "util/Text.h"
#include "vm/ArgumentsObject.h"
#include "vm/BytecodeIterator.h"
#include "vm/BytecodeLocation.h"
#include "vm/BytecodeUtil.h"
#include "vm/Compression.h"
#include "vm/Debugger.h"
@ -62,6 +64,8 @@
#include "vtune/VTuneWrapper.h"
#include "gc/Marking-inl.h"
#include "vm/BytecodeIterator-inl.h"
#include "vm/BytecodeLocation-inl.h"
#include "vm/Compartment-inl.h"
#include "vm/EnvironmentObject-inl.h"
#include "vm/JSFunction-inl.h"
@ -1129,11 +1133,12 @@ JSScript::initScriptCounts(JSContext* cx)
// Record all pc which are the first instruction of a basic block.
mozilla::Vector<jsbytecode*, 16, SystemAllocPolicy> jumpTargets;
jsbytecode* mainPc = main();
jsbytecode* end = codeEnd();
for (jsbytecode* pc = code(); pc != end; pc = GetNextPc(pc)) {
if (BytecodeIsJumpTarget(JSOp(*pc)) || pc == mainPc) {
if (!jumpTargets.append(pc)) {
js::BytecodeLocation main = mainLocation();
AllBytecodesIterable iterable(this);
for (auto& loc : iterable) {
if (loc.isJumpTarget() || loc == main) {
if (!jumpTargets.append(loc.toRawBytecode())) {
ReportOutOfMemory(cx);
return false;
}

View File

@ -33,6 +33,8 @@
#include "js/UbiNode.h"
#include "js/UniquePtr.h"
#include "js/Utility.h"
#include "vm/BytecodeIterator.h"
#include "vm/BytecodeLocation.h"
#include "vm/BytecodeUtil.h"
#include "vm/JSAtom.h"
#include "vm/NativeObject.h"
@ -1748,6 +1750,15 @@ class JSScript : public js::gc::TenuredCell
}
return scriptData_->code();
}
js::AllBytecodesIterable allLocations() {
return js::AllBytecodesIterable(this);
}
js::BytecodeLocation location() {
return js::BytecodeLocation(this, code());
}
bool isUncompleted() const {
// code() becomes non-null only if this script is complete.
// See the comment in JSScript::fullyInitFromEmitter.
@ -1771,6 +1782,10 @@ class JSScript : public js::gc::TenuredCell
return pc >= code() && pc < codeEnd();
}
bool contains(const js::BytecodeLocation& loc) const {
return containsPC(loc.toRawBytecode());
}
size_t pcToOffset(const jsbytecode* pc) const {
MOZ_ASSERT(containsPC(pc));
return size_t(pc - code());
@ -2359,6 +2374,14 @@ class JSScript : public js::gc::TenuredCell
return code() + mainOffset();
}
js::BytecodeLocation mainLocation() const {
return js::BytecodeLocation(this, main());
}
js::BytecodeLocation endLocation() const {
return js::BytecodeLocation(this, codeEnd());
}
/*
* computedSizeOfData() is the in-use size of all the data sections.
* sizeOfData() is the size of the block allocated to hold all the data