mirror of
https://github.com/open-goal/jak-project.git
synced 2024-11-23 22:29:53 +00:00
475 lines
15 KiB
C++
475 lines
15 KiB
C++
#include "PrettyPrinter2.h"
|
|
|
|
#include "common/common_types.h"
|
|
#include "common/util/Assert.h"
|
|
|
|
#include "third-party/fmt/core.h"
|
|
|
|
namespace pretty_print {
|
|
|
|
namespace v2 {
|
|
|
|
// Note: there's some recursive stuff, but we only recurse once per list depth.
|
|
// The previous issues we had with stack overflow only happened when there was a stack frame per
|
|
// element in a list.
|
|
|
|
// The main node type.
|
|
// unlike v1, this nests lists.
|
|
// these have pointers to parents, so generally not safe to copy.
|
|
struct Node {
|
|
Node() = default;
|
|
|
|
Node(const std::string& str) : kind(Kind::ATOM), atom_str(str) {}
|
|
|
|
Node(std::vector<Node>&& list, bool is_list)
|
|
: kind(is_list ? Kind::LIST : Kind::IMPROPER_LIST), child_nodes(std::move(list)) {}
|
|
enum class Kind : u8 { ATOM, LIST, IMPROPER_LIST, INVALID } kind = Kind::INVALID;
|
|
|
|
std::vector<Node> child_nodes;
|
|
std::string atom_str;
|
|
|
|
// number of quotes this is wrapped in.
|
|
u32 quoted = 0;
|
|
|
|
Node* parent = nullptr;
|
|
u32 my_depth = 0;
|
|
|
|
void link(Node* this_parent, std::vector<Node*>* bfs_order, u32 depth) {
|
|
parent = this_parent;
|
|
my_depth = depth;
|
|
bfs_order->push_back(this);
|
|
switch (kind) {
|
|
case Kind::ATOM:
|
|
break;
|
|
case Kind::LIST:
|
|
case Kind::IMPROPER_LIST:
|
|
ASSERT(!child_nodes.empty());
|
|
for (auto& child : child_nodes) {
|
|
child.link(this, bfs_order, depth + 1);
|
|
}
|
|
break;
|
|
default:
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
|
|
bool needs_end_paren_newline() const {
|
|
if (break_list) {
|
|
return true;
|
|
}
|
|
|
|
if (!child_nodes.empty()) {
|
|
return child_nodes.back().needs_end_paren_newline();
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
std::string debug_to_string() const {
|
|
switch (kind) {
|
|
case Kind::ATOM:
|
|
return fmt::format("[atom {}]", atom_str);
|
|
case Kind::LIST:
|
|
return "[list]";
|
|
case Kind::IMPROPER_LIST:
|
|
return "[improper list]";
|
|
default:
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
|
|
// how wide is this text? not including the indentation of this subtree.
|
|
u32 text_len = 0;
|
|
|
|
bool break_list = false;
|
|
u8 top_line_count = 0;
|
|
u8 sub_elt_indent = 0;
|
|
};
|
|
|
|
Node to_node(const goos::Object& obj) {
|
|
switch (obj.type) {
|
|
case goos::ObjectType::EMPTY_LIST:
|
|
// just treat this as a printing "atom"
|
|
return Node("()");
|
|
case goos::ObjectType::INTEGER:
|
|
case goos::ObjectType::FLOAT:
|
|
case goos::ObjectType::CHAR:
|
|
case goos::ObjectType::SYMBOL:
|
|
case goos::ObjectType::STRING:
|
|
// these are all atoms that the pretty printer should just treat as a blob.
|
|
return Node(obj.print());
|
|
|
|
case goos::ObjectType::PAIR: {
|
|
// we've got three cases: quoted thing, proper list, improper list.
|
|
auto& first = obj.as_pair()->car;
|
|
if (first.is_symbol() && first.as_symbol()->name == "quote") {
|
|
auto& second = obj.as_pair()->cdr;
|
|
if (second.is_pair() && second.as_pair()->cdr.is_empty_list()) {
|
|
Node result = to_node(second.as_pair()->car);
|
|
result.quoted++;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// not quoted, so either list or pair
|
|
std::vector<Node> children;
|
|
auto* to_print = &obj;
|
|
for (;;) {
|
|
if (to_print->is_pair()) {
|
|
// first print the car:
|
|
children.push_back(to_node(to_print->as_pair()->car));
|
|
// then load up the cdr as the next thing to print
|
|
to_print = &to_print->as_pair()->cdr;
|
|
if (to_print->is_empty_list()) {
|
|
// we're done, add a close paren and finish
|
|
return Node(std::move(children), true);
|
|
}
|
|
} else {
|
|
children.push_back(to_node(*to_print));
|
|
return Node(std::move(children), false);
|
|
}
|
|
}
|
|
} break;
|
|
|
|
// these are unsupported by the pretty printer.
|
|
case goos::ObjectType::ARRAY: // todo, we should probably handle arrays.
|
|
case goos::ObjectType::LAMBDA:
|
|
case goos::ObjectType::MACRO:
|
|
case goos::ObjectType::ENVIRONMENT:
|
|
throw std::runtime_error("tried to pretty print a goos object kind which is not supported.");
|
|
default:
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
|
|
void recompute_lengths(const std::vector<Node*>& bfs_order) {
|
|
// iterate from leaves up
|
|
for (auto it = bfs_order.rbegin(); it != bfs_order.rend(); it++) {
|
|
Node* node = *it;
|
|
switch (node->kind) {
|
|
case Node::Kind::ATOM:
|
|
node->text_len = node->atom_str.length() + node->quoted;
|
|
break;
|
|
case Node::Kind::IMPROPER_LIST:
|
|
case Node::Kind::LIST: {
|
|
if (node->break_list) {
|
|
// special case compute first line length
|
|
int first_line_len = 1 + node->quoted; // open paren + quotes
|
|
int nodes_on_first_line =
|
|
std::min(int(node->child_nodes.size()), int(node->top_line_count));
|
|
if (nodes_on_first_line > 0) {
|
|
for (int node_idx = 0; node_idx < nodes_on_first_line; node_idx++) {
|
|
first_line_len += node->child_nodes.at(node_idx).text_len;
|
|
first_line_len++; // trailing space
|
|
}
|
|
first_line_len--; // last one doesn't have a trailing space
|
|
}
|
|
|
|
int max_line_len = first_line_len;
|
|
|
|
// now the length of all the things below
|
|
for (u32 node_idx = nodes_on_first_line; node_idx < node->child_nodes.size();
|
|
node_idx++) {
|
|
int line_len = node->sub_elt_indent + node->child_nodes.at(node_idx).text_len;
|
|
max_line_len = std::max(max_line_len, line_len);
|
|
}
|
|
|
|
node->text_len = max_line_len;
|
|
} else {
|
|
node->text_len = 1 + node->quoted; // open paren + quotes
|
|
for (auto& child : node->child_nodes) {
|
|
node->text_len += (child.text_len + 1); // space or close paren.
|
|
}
|
|
}
|
|
} break;
|
|
default:
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*!
|
|
* Note: this has special cases for how to insert breaks.
|
|
* These rules will be used if the printer decides it should break up the list.
|
|
* If you want to force a form to always be broken up, see insert_required_breaks
|
|
*/
|
|
void break_list(Node* node) {
|
|
ASSERT(!node->break_list);
|
|
node->break_list = true;
|
|
node->sub_elt_indent = 2;
|
|
node->top_line_count = 1;
|
|
|
|
const std::unordered_set<std::string> sameline_splitters = {
|
|
"if", "<", ">", "<=", ">=", "set!", "=", "!=", "+",
|
|
"-", "*", "/", "the", "->", "and", "or", "logand", "logior",
|
|
"logxor", "+!", "*!", "logtest?", "not", "zero?", "nonzero?"};
|
|
|
|
if (node->child_nodes.at(0).kind == Node::Kind::LIST) {
|
|
// ((foo
|
|
// bar
|
|
node->sub_elt_indent = 1;
|
|
} else if (node->child_nodes.at(0).kind == Node::Kind::ATOM) {
|
|
auto& name = node->child_nodes[0].atom_str;
|
|
if (name == "defun" || name == "defun-debug" || name == "defbehavior" || name == "defstate") {
|
|
// things with three things in the top line: (defun <name> <args>
|
|
node->top_line_count = 3;
|
|
} else if (name == "defskelgroup") {
|
|
// things with 5 things in the top line: (defskelgroup <name> <art> jgeo janim
|
|
node->top_line_count = 5;
|
|
node->sub_elt_indent += name.size();
|
|
} else if (name == "process-new") {
|
|
// things with 3 things in the top line
|
|
node->top_line_count = 3;
|
|
node->sub_elt_indent += name.size();
|
|
} else if (name == "ja" || name == "ja-no-eval") {
|
|
node->top_line_count = 3;
|
|
node->sub_elt_indent += name.size();
|
|
} else if (name == "defmethod") {
|
|
// things with 4 things in the top line: (defmethod <method> <type> <args>
|
|
node->top_line_count = 4;
|
|
} else if (name == "until" || name == "while" || name == "dotimes" || name == "countdown" ||
|
|
name == "when" || name == "behavior" || name == "lambda" || name == "defpart" ||
|
|
name == "define") {
|
|
node->top_line_count = 2;
|
|
} else if (name == "let" || name == "let*" || name == "rlet") {
|
|
// special case for things like let.
|
|
node->top_line_count = 2; // (let <defs>
|
|
if (node->child_nodes.size() > 1 && node->child_nodes[1].child_nodes.size() > 1 &&
|
|
!node->child_nodes[1].break_list) {
|
|
// and break the defs.
|
|
break_list(&node->child_nodes[1]);
|
|
}
|
|
} else if (sameline_splitters.count(name) > 0) {
|
|
// if has a special indent rule:
|
|
node->top_line_count = 2;
|
|
node->sub_elt_indent += name.size();
|
|
} else if (name == "cond") {
|
|
// cond should always be broken up
|
|
for (size_t i = 1; i < node->child_nodes.size(); i++) {
|
|
auto& cond_body = node->child_nodes[i];
|
|
if (cond_body.kind == Node::Kind::LIST && !cond_body.break_list) {
|
|
break_list(&cond_body);
|
|
}
|
|
}
|
|
} else if (name == "case") {
|
|
// case gets a second thing on top, plus break up everything.
|
|
node->top_line_count = 2;
|
|
for (size_t i = 2; i < node->child_nodes.size(); i++) {
|
|
auto& cond_body = node->child_nodes[i];
|
|
if (cond_body.kind == Node::Kind::LIST && !cond_body.break_list) {
|
|
break_list(&cond_body);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Node* child = node;
|
|
for (Node* p = node->parent; p; p = p->parent) {
|
|
if (!p->break_list && &p->child_nodes.back() != child) {
|
|
break_list(p);
|
|
}
|
|
child = p;
|
|
}
|
|
}
|
|
|
|
void insert_required_breaks(const std::vector<Node*>& bfs_order) {
|
|
const std::unordered_set<std::string> always_break = {
|
|
"when", "defun-debug", "countdown", "case", "defun", "defmethod", "let",
|
|
"until", "while", "if", "dotimes", "cond", "else", "defbehavior",
|
|
"with-pp", "rlet", "defstate", "behavior", "defpart", "loop"};
|
|
for (auto node : bfs_order) {
|
|
if (!node->break_list && node->kind == Node::Kind::LIST &&
|
|
node->child_nodes.at(0).kind == Node::Kind::ATOM) {
|
|
if (always_break.count(node->child_nodes[0].atom_str) > 0) {
|
|
break_list(node);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int run_algorithm(const std::vector<Node*>& bfs_order, int line_length) {
|
|
// our approach is to go in reverse order and find the first list node that is:
|
|
// - too long
|
|
// - not already split.
|
|
// the "magic" of v2 is:
|
|
// the "too long" check above igores the sublist.
|
|
|
|
int num_broken = 0;
|
|
std::optional<s32> min_depth;
|
|
for (auto it = bfs_order.rbegin(); it != bfs_order.rend(); it++) {
|
|
Node* node = *it;
|
|
if (min_depth && node->my_depth < min_depth) {
|
|
break;
|
|
}
|
|
|
|
if (node->kind != Node::Kind::ATOM && (int)node->text_len > line_length &&
|
|
node->break_list == false) {
|
|
break_list(node);
|
|
num_broken++;
|
|
if (!min_depth) {
|
|
min_depth = node->my_depth;
|
|
}
|
|
}
|
|
}
|
|
recompute_lengths(bfs_order);
|
|
return num_broken;
|
|
}
|
|
|
|
int compute_extra_offset(const std::string& str, int s0, int ei) {
|
|
ASSERT(!str.empty());
|
|
for (size_t i = str.length(); i-- > 0;) {
|
|
if ((int)i == s0) {
|
|
return ei + str.length() - s0;
|
|
} else if (i == '\n') {
|
|
return str.length() - i;
|
|
}
|
|
}
|
|
return ei + str.length() - s0;
|
|
}
|
|
|
|
void append_node_to_string(const Node* node,
|
|
std::string& str,
|
|
int init_indent_level,
|
|
int next_indent_level) {
|
|
for (int i = 0; i < init_indent_level; i++) {
|
|
str.push_back(' ');
|
|
}
|
|
for (u32 i = 0; i < node->quoted; i++) {
|
|
str.push_back('\'');
|
|
}
|
|
switch (node->kind) {
|
|
case Node::Kind::ATOM:
|
|
str.append(node->atom_str);
|
|
break;
|
|
case Node::Kind::IMPROPER_LIST:
|
|
case Node::Kind::LIST:
|
|
if (node->break_list) {
|
|
str.push_back('(');
|
|
size_t node_idx = 0;
|
|
|
|
int listing_indent = next_indent_level + node->quoted + node->sub_elt_indent;
|
|
int extra_indent = 0;
|
|
int old_indent = listing_indent;
|
|
if (node->top_line_count) {
|
|
listing_indent -= node->sub_elt_indent;
|
|
listing_indent += node->child_nodes.front().kind == Node::Kind::LIST ? 1 : 2;
|
|
}
|
|
for (; node_idx < node->top_line_count; node_idx++) {
|
|
size_t s0 = str.length();
|
|
if (node->kind == Node::Kind::IMPROPER_LIST &&
|
|
&node->child_nodes.at(node_idx) == &node->child_nodes.back()) {
|
|
str.append(". ");
|
|
}
|
|
// so, if these need to break, they should have a bigger indent.
|
|
append_node_to_string(&node->child_nodes.at(node_idx), str, 0,
|
|
listing_indent + extra_indent);
|
|
extra_indent = compute_extra_offset(str, s0, extra_indent);
|
|
str.push_back(' ');
|
|
}
|
|
if (node->top_line_count) {
|
|
listing_indent = old_indent;
|
|
}
|
|
if (node->top_line_count > 0) {
|
|
str.pop_back();
|
|
}
|
|
str.push_back('\n');
|
|
bool after_key = false;
|
|
for (; node_idx < node->child_nodes.size(); node_idx++) {
|
|
if (node->kind == Node::Kind::IMPROPER_LIST &&
|
|
&node->child_nodes.at(node_idx) == &node->child_nodes.back()) {
|
|
for (int i = 0; i < listing_indent; i++) {
|
|
str.push_back(' ');
|
|
}
|
|
str.append(".\n");
|
|
}
|
|
append_node_to_string(&node->child_nodes.at(node_idx), str,
|
|
after_key ? 0 : listing_indent, listing_indent);
|
|
if (node->child_nodes.at(node_idx).kind == Node::Kind::ATOM &&
|
|
node->child_nodes.at(node_idx).atom_str.at(0) == ':' &&
|
|
node->child_nodes.at(node_idx).atom_str.find(' ') == std::string::npos) {
|
|
str.push_back(' ');
|
|
after_key = true;
|
|
} else {
|
|
str.push_back('\n');
|
|
after_key = false;
|
|
}
|
|
}
|
|
for (int i = 0; i < listing_indent; i++) {
|
|
str.push_back(' ');
|
|
}
|
|
str.push_back(')');
|
|
} else {
|
|
str.push_back('(');
|
|
ASSERT(!node->child_nodes.empty());
|
|
int listing_indent = next_indent_level + node->quoted;
|
|
int extra_indent = 1;
|
|
int c0 = 0;
|
|
for (auto& child : node->child_nodes) {
|
|
if (node->kind == Node::Kind::IMPROPER_LIST && &child == &node->child_nodes.back()) {
|
|
str.append(". ");
|
|
}
|
|
size_t s0 = str.length();
|
|
append_node_to_string(&child, str, 0, listing_indent + extra_indent);
|
|
str.push_back(' ');
|
|
extra_indent += (str.length() - s0);
|
|
if (&child == &node->child_nodes.at(0) && !child.break_list) {
|
|
//
|
|
if (child.kind == Node::Kind::LIST) {
|
|
c0 = 0;
|
|
} else {
|
|
c0 = str.length() - s0;
|
|
}
|
|
}
|
|
}
|
|
str.pop_back();
|
|
if (node->needs_end_paren_newline()) {
|
|
str.push_back('\n');
|
|
for (int i = 0; i < listing_indent + c0 + 1; i++) {
|
|
str.push_back(' ');
|
|
}
|
|
}
|
|
str.push_back(')');
|
|
}
|
|
break;
|
|
default:
|
|
ASSERT(false);
|
|
}
|
|
}
|
|
|
|
std::string node_to_string(const Node* node) {
|
|
std::string result;
|
|
append_node_to_string(node, result, 0, 0);
|
|
return result;
|
|
}
|
|
|
|
} // namespace v2
|
|
|
|
std::string to_string(const goos::Object& obj, int line_length) {
|
|
using namespace v2;
|
|
|
|
// construct the tree
|
|
Node root = to_node(obj);
|
|
|
|
// create tree links and order by depth
|
|
std::vector<Node*> bfs_order;
|
|
root.link(nullptr, &bfs_order, 0);
|
|
|
|
insert_required_breaks(bfs_order);
|
|
|
|
// compute subtree lengths
|
|
recompute_lengths(bfs_order);
|
|
|
|
int max_depth = 0;
|
|
for (auto node : bfs_order) {
|
|
max_depth = std::max((int)node->my_depth, max_depth);
|
|
}
|
|
|
|
int num_broken = 1;
|
|
while (num_broken) {
|
|
num_broken = run_algorithm(bfs_order, line_length);
|
|
}
|
|
|
|
return node_to_string(&root);
|
|
}
|
|
} // namespace pretty_print
|