formatter: support formatting bindings, for example in a let (#2883)

This commit is contained in:
Tyler Wilding 2023-08-05 13:23:09 -06:00 committed by GitHub
parent bb8b39a5e8
commit a7cf66fda6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 427 additions and 134 deletions

View File

@ -32,10 +32,11 @@ add_library(common
dma/dma_copy.cpp
dma/dma.cpp
dma/gs.cpp
formatter/config/rule_config.cpp
formatter/formatter_tree.cpp
formatter/formatter.cpp
formatter/formatting_rules.cpp
formatter/formatter_tree.cpp
formatter/rules/formatting_rules.cpp
formatter/rules/rule_config.cpp
global_profiler/GlobalProfiler.cpp
goos/Interpreter.cpp
goos/Object.cpp

View File

@ -1,10 +0,0 @@
#include "rule_config.h"
namespace formatter_rules {
namespace config {
// TODO - populate these more
std::unordered_map<std::string, FormConfiguration> opengoal_form_config = {};
} // namespace config
} // namespace formatter_rules

View File

@ -1,19 +0,0 @@
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "common/formatter/formatting_rules.h"
namespace formatter_rules {
namespace config {
struct FormConfiguration {
bool force_hang = false;
int start_hang_at_index = 0;
};
extern std::unordered_map<std::string, FormConfiguration> opengoal_form_config;
} // namespace config
} // namespace formatter_rules

View File

@ -2,6 +2,7 @@
#include "formatter_tree.h"
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include "common/util/string_util.h"
@ -15,9 +16,11 @@ extern "C" {
extern const TSLanguage* tree_sitter_opengoal();
}
std::string apply_formatting(const FormatterTreeNode& curr_node,
std::string output,
int tree_depth = 0) {
std::string apply_formatting(
const FormatterTreeNode& curr_node,
std::string output,
std::optional<formatter_rules::config::FormFormattingConfig> form_element_config) {
using namespace formatter_rules;
if (!curr_node.token && curr_node.refs.empty()) {
return output;
}
@ -34,33 +37,69 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
bool inline_form = false;
// Also check if the form should be constant-paired
const bool constant_pair_form =
formatter_rules::constant_pairs::form_should_be_constant_paired(curr_node);
const bool constant_pair_form = constant_pairs::form_should_be_constant_paired(curr_node);
if (!constant_pair_form) {
// Determine if the form should be inlined or hung/flowed
// TODO - this isn't entirely accurate, needs current cursor positioning (which is tricky
// because recursion!)
inline_form = formatter_rules::indent::form_can_be_inlined(curr_form, curr_node);
inline_form = indent::form_can_be_inlined(curr_form, curr_node);
}
const bool flowing = indent::should_form_flow(curr_node, inline_form);
std::optional<formatter_rules::config::FormFormattingConfig> form_config;
if (!curr_node.refs.empty() && curr_node.refs.at(0).token) {
const auto& form_head = curr_node.refs.at(0).token;
if (form_head && config::opengoal_form_config.find(form_head.value()) !=
config::opengoal_form_config.end()) {
form_config = config::opengoal_form_config.at(form_head.value());
}
}
// TODO - might want to make some kind of per-form config struct, simplify the passing around of
// info below
for (int i = 0; i < curr_node.refs.size(); i++) {
const auto& ref = curr_node.refs.at(i);
// Append a newline if needed
if (!inline_form) {
formatter_rules::indent::append_newline(curr_form, ref, curr_node, tree_depth, i,
constant_pair_form);
// Figure out if the element should be inlined or not
bool inline_element = inline_form;
if (indent::inline_form_element(curr_node, i)) {
inline_element = indent::inline_form_element(curr_node, i).value();
}
// Append a newline if needed
// TODO - cleanup / move
bool is_binding_list = false;
bool force_newline = false;
bool override_force_flow = false;
if (form_config) {
force_newline = std::find(form_config->force_newline_at_indices.begin(),
form_config->force_newline_at_indices.end(),
i) != form_config->force_newline_at_indices.end();
// Check if it's a small enough binding list, if so we don't force a newline if the element
// can be inlined
if (inline_element && i > 0 && form_config->bindings_at_index == i - 1 &&
curr_node.refs.at(i - 1).refs.size() < form_config->allow_inlining_if_size_less_than) {
force_newline = false;
override_force_flow = true;
}
is_binding_list = form_config->bindings_at_index == i;
}
if (!curr_node.metadata.is_top_level &&
(!inline_element || is_binding_list || force_newline ||
(form_element_config && form_element_config->force_flow))) {
indent::append_newline(curr_form, ref, curr_node, i, flowing, constant_pair_form,
(form_element_config && form_element_config->force_flow));
}
// TODO - indent the line (or don't)
// Either print the element's token, or recursively format it as well
if (ref.token) {
// TODO depth hard-coded to 1, i think this can be removed, since
// forms are always done bottom-top recursively, they always act
// independently as if it was the shallowest depth
if (!inline_form) {
formatter_rules::indent::flow_line(curr_form, ref, curr_node, 1, i);
if (!inline_element || force_newline) {
indent::indent_line(curr_form, ref, curr_node, 1, i, flowing);
}
if (ref.metadata.node_type == "comment" && ref.metadata.is_inline) {
curr_form += " " + ref.token.value();
} else if (ref.metadata.node_type == "block_comment") {
curr_form += formatter_rules::comments::format_block_comment(ref.token.value());
curr_form += comments::format_block_comment(ref.token.value());
} else {
curr_form += ref.token.value();
}
@ -68,9 +107,20 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
curr_form += " ";
}
} else {
auto formatted_form = apply_formatting(ref, "", tree_depth + 1);
if (!curr_node.metadata.is_top_level && !inline_form) {
formatter_rules::indent::hang_lines(formatted_form, ref, curr_node, constant_pair_form);
// See if the item at this position has specific formatting
std::optional<formatter_rules::config::FormFormattingConfig> config = {};
std::string formatted_form;
if (form_config && form_config->index_configs.find(i) != form_config->index_configs.end()) {
formatted_form = apply_formatting(ref, "", *form_config->index_configs.at(i));
} else {
formatted_form = apply_formatting(ref, "", {});
}
// TODO - align inner lines only
if (!curr_node.metadata.is_top_level) {
indent::align_lines(
formatted_form, ref, curr_node, constant_pair_form, flowing,
(!override_force_flow && form_config && i >= form_config->start_flow_at_index),
inline_element);
}
curr_form += formatted_form;
if (!curr_node.metadata.is_top_level) {
@ -78,7 +128,7 @@ std::string apply_formatting(const FormatterTreeNode& curr_node,
}
}
// Handle blank lines at the top level, skip if it's the final element
formatter_rules::blank_lines::separate_by_newline(curr_form, curr_node, ref, i);
blank_lines::separate_by_newline(curr_form, curr_node, ref, i);
}
if (!curr_node.metadata.is_top_level) {
curr_form = str_util::rtrim(curr_form) + ")";
@ -104,8 +154,13 @@ std::optional<std::string> formatter::format_code(const std::string& source) {
return std::nullopt;
}
const auto formatting_tree = FormatterTree(source, root_node);
std::string formatted_code = apply_formatting(formatting_tree.root, "");
try {
const auto formatting_tree = FormatterTree(source, root_node);
std::string formatted_code = apply_formatting(formatting_tree.root, "", {});
return formatted_code;
} catch (std::exception& e) {
lg::error("Unable to format code - {}", e.what());
}
return formatted_code;
return std::nullopt;
}

View File

@ -3,7 +3,8 @@
#include <optional>
#include <string>
#include "formatting_rules.h"
#include "common/formatter/rules/formatting_rules.h"
#include "common/formatter/rules/rule_config.h"
#include "tree_sitter/api.h"

View File

@ -2,8 +2,6 @@
#include "common/util/string_util.h"
#include "config/rule_config.h"
#include "third-party/fmt/core.h"
std::string get_source_code(const std::string& source, const TSNode& node) {

View File

@ -31,6 +31,7 @@ class FormatterTreeNode {
bool is_comment = false;
bool is_inline = false;
int num_blank_lines_following = 0;
bool is_binding_list = false; // TODO set this
};
std::vector<FormatterTreeNode> refs;
Metadata metadata;
@ -41,6 +42,8 @@ class FormatterTreeNode {
FormatterTreeNode() = default;
FormatterTreeNode(const std::string& source, const TSNode& node);
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
bool is_list() const { return token.has_value(); }
};
// A FormatterTree has a very simple and crude tree structure where:

View File

@ -2,6 +2,8 @@
#include <set>
#include "rule_config.h"
#include "common/util/string_util.h"
#include "third-party/fmt/core.h"
@ -196,27 +198,89 @@ bool form_contains_comment(const FormatterTreeNode& node) {
return false;
}
bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node) {
// Two main checks:
// - first, is the form too long to fit on a line TODO - increase accuracy here
if (form_exceed_line_width(curr_text, node, 0)) {
bool form_can_be_inlined(const std::string& curr_text, const FormatterTreeNode& list_node) {
// is the form too long to fit on a line TODO - increase accuracy here
if (form_exceed_line_width(curr_text, list_node, 0)) {
return false;
}
// - second, are there any comments? (inlined or not, doesn't matter)
if (form_contains_comment(node)) {
// are there any comments? (inlined or not, doesn't matter)
if (form_contains_comment(list_node)) {
return false;
}
return true;
}
bool should_form_flow(const FormatterTreeNode& list_node, const bool inlining_form) {
if (form_contains_comment(list_node)) {
return true;
}
// does the form begin with a constant (a list of content elements)
if (!inlining_form && !list_node.refs.empty() &&
constant_types.find(list_node.refs.at(0).metadata.node_type) != constant_types.end()) {
return true;
}
// TODO - make a function to make grabbing this metadata easier...
// TODO - honestly should just have an is_list metadata
if (!list_node.refs.empty() && !list_node.refs.at(0).token) {
// if the first element is a comment, force a flow
if (list_node.refs.size() > 1 && list_node.refs.at(1).metadata.is_comment) {
return true;
}
const auto& form_head = list_node.refs.at(0).token;
// See if we have any configuration for this form
if (form_head && config::opengoal_form_config.find(form_head.value()) !=
config::opengoal_form_config.end()) {
const auto& form_config = config::opengoal_form_config.at(form_head.value());
return form_config.force_flow;
}
}
// TODO - cleanup, might be inside a let
/*if (!containing_form.refs.empty() && containing_form.refs.at(0).token) {
const auto& form_head = containing_form.refs.at(0).token;
if (form_head && config::opengoal_form_config.find(form_head.value()) !=
config::opengoal_form_config.end()) {
const auto& form_config = config::opengoal_form_config.at(form_head.value());
if (form_config.force_flow) {
return true;
}
}
}*/
return false;
}
std::optional<bool> inline_form_element(const FormatterTreeNode& list_node, const int index) {
// TODO - honestly should just have an is_list metadata
if (list_node.refs.empty() || !list_node.refs.at(0).token) {
return std::nullopt;
}
const auto& form_head = list_node.refs.at(0).token;
// See if we have any configuration for this form
if (form_head &&
config::opengoal_form_config.find(form_head.value()) != config::opengoal_form_config.end()) {
const auto& form_config = config::opengoal_form_config.at(form_head.value());
if (form_config.inline_until_index != -1) {
return index < form_config.inline_until_index;
}
}
return std::nullopt;
}
void append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index,
const bool constant_pair_form) {
const bool flowing,
const bool constant_pair_form,
const bool force_newline) {
if (force_newline && index >= 1 || (node.metadata.is_comment && !node.metadata.is_inline)) {
curr_text = str_util::rtrim(curr_text) + "\n";
return;
}
if (index <= 0 || containing_node.metadata.is_top_level ||
(node.metadata.is_comment && node.metadata.is_inline)) {
(node.metadata.is_comment && node.metadata.is_inline) || (!flowing && index <= 1)) {
return;
}
// Check if it's a constant pair
@ -227,11 +291,12 @@ void append_newline(std::string& curr_text,
curr_text = str_util::rtrim(curr_text) + "\n";
}
void flow_line(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index) {
void indent_line(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index,
const bool flowing) {
if (node.metadata.is_top_level || (node.metadata.is_inline && node.metadata.is_comment)) {
return;
}
@ -240,42 +305,89 @@ void flow_line(std::string& curr_text,
if (constant_pairs::is_element_second_in_constant_pair(containing_node, node, index)) {
return;
}
if (index > 0) {
// If the first element in the list is a constant, we only indent with 1 space instead
if (constant_types.find(containing_node.refs.at(0).metadata.node_type) !=
constant_types.end()) {
curr_text += str_util::repeat(depth, " ");
} else {
curr_text += str_util::repeat(depth, " ");
}
// If the first element in the list is a constant, we only indent with 1 space instead
if (index > 0 &&
constant_types.find(containing_node.refs.at(0).metadata.node_type) != constant_types.end()) {
curr_text += str_util::repeat(depth, " ");
} else if (index > 0 && flowing) {
curr_text += str_util::repeat(depth, " ");
} else if (index > 1 && !flowing) {
curr_text += str_util::repeat(containing_node.refs.at(0).token.value().length() + 2, " ");
}
}
void hang_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const bool constant_pair_form) {
const auto lines = str_util::split(text);
// TODO - unsafe (breaks on a list of lists)
int alignment_width = 2;
if (constant_pair_form &&
constant_types.find(containing_node.refs.at(0).metadata.node_type) != constant_types.end()) {
alignment_width = 3;
// Recursively iterate through the node until we hit a token
int length_to_hang(const FormatterTreeNode& node, int length) {
if (node.token || node.refs.at(0).token) {
return length;
}
return length_to_hang(node.refs.at(0), length + 1);
}
void align_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const bool constant_pair_form,
const bool flowing,
const bool force_flow,
const bool inline_element) {
const auto lines = str_util::split(text);
int start_index = 0;
if (inline_element) {
start_index = 1;
}
int alignment_width = 2;
if (force_flow) {
start_index = 0;
} else if (constant_pair_form &&
constant_types.find(containing_node.refs.at(0).metadata.node_type) !=
constant_types.end()) {
start_index = 0;
alignment_width = 3;
} else if (!flowing) {
// If the form has a token (it's a normal list)
if (containing_node.refs.at(0).token) {
alignment_width = length_to_hang(containing_node.refs.at(1),
containing_node.refs.at(0).token.value().length()) +
1;
if (!node.token) {
alignment_width++;
}
} else {
// otherwise, it's a list of lists
alignment_width = 1;
}
} else if (!node.token) {
// If it's a list of lists
alignment_width = 1;
}
// TODO - implement hanging
// always hang unless flowing is "better" (this is the hard part)
/*else if (containing_node.metadata.multiple_elements_first_line) {
alignment_width = containing_node.refs.at(0).token.value().length() + 2;
}*/
std::string aligned_form = "";
for (int i = 0; i < lines.size(); i++) {
aligned_form += str_util::repeat(alignment_width, " ") + lines.at(i);
if (i >= start_index) {
aligned_form += str_util::repeat(alignment_width, " ");
}
aligned_form += lines.at(i);
if (i != lines.size() - 1) {
aligned_form += "\n";
}
}
text = aligned_form;
if (!aligned_form.empty()) {
text = aligned_form;
}
}
} // namespace indent
namespace let {
bool can_be_inlined(const FormatterTreeNode& form) {
// Check a variety of things specific to `let` style forms (ones with bindings)
// - does the binding list have more than one binding?
const auto& bindings = form.refs.at(1); // TODO - assuming
if (bindings.refs.size() > 1) {
return false;
}
return true;
}
} // namespace let
} // namespace formatter_rules

View File

@ -2,7 +2,7 @@
#include <string>
#include "formatter_tree.h"
#include "common/formatter/formatter_tree.h"
namespace formatter_rules {
// The formatter will try to collapse as much space as possible in the top-level, this means
@ -85,30 +85,61 @@ bool form_should_be_constant_paired(const FormatterTreeNode& node);
// Additionally, if the head of the form is a constant we `flow` with an indent of `1` instead of
// `2`
//
// TODO: - incorporate more heuristics here, explore both a hang and flow approach to see which is
// better
// By default, we always hang unless:
// - the head-form overrides the configuration
// - TODO it will use more lines than the flow approach
//
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/indent.md
namespace indent {
const static int line_width_target = 120;
bool form_can_be_inlined(std::string& curr_text, const FormatterTreeNode& node);
bool form_can_be_inlined(const std::string& curr_text, const FormatterTreeNode& curr_node);
// TODO - right now this is very primitive in that it only checks against our hard-coded config
// eventually make this explore both routes and determine which is best
// Also factor in distance from the gutter (theres some zprint rationale somewhere on this)
bool should_form_flow(const FormatterTreeNode& list_node, const bool inlining_form);
std::optional<bool> inline_form_element(const FormatterTreeNode& list_node, const int index);
void append_newline(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index,
const bool constant_pair_form);
void flow_line(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index);
void hang_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const bool constant_pair_form);
const bool flowing,
const bool constant_pair_form,
const bool force_newline);
void indent_line(std::string& curr_text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const int depth,
const int index,
const bool flowing);
void align_lines(std::string& text,
const FormatterTreeNode& node,
const FormatterTreeNode& containing_node,
const bool constant_pair_form,
const bool flowing,
const bool force_flow,
const bool inline_element);
} // namespace indent
// Let forms fall into two main categories
// - Ones that can be entirely inlined
// - Ones that cannot
//
// Single line let forms:
// - If there is a single binding, let it continue to be one line
// - otherwise, flow it just as a multi-line let would be done
//
// For multi-line let forms, the difficulty is how to format the binding:
// - align the bindings to within the binding list (single indent space)
// - always format the binding value on the same line as the binding, no new lines
// - forms inside the let binding are flowed
//
// Reference - https://github.com/kkinnear/zprint/blob/main/doc/options/let.md
namespace let {
// TODO - like above, factor in current cursor position
bool can_be_inlined(const FormatterTreeNode& form);
} // namespace let
} // namespace formatter_rules

View File

@ -0,0 +1,34 @@
#include "rule_config.h"
namespace formatter_rules {
namespace config {
// TODO - populate these more
// TODO - this could be greatly simplified with C++20's designated initialization
FormFormattingConfig new_flow_rule(int start_index) {
FormFormattingConfig cfg;
cfg.force_flow = true;
cfg.start_flow_at_index = start_index;
cfg.inline_until_index = start_index;
return cfg;
}
FormFormattingConfig new_binding_rule() {
FormFormattingConfig cfg;
cfg.start_flow_at_index = 2;
cfg.bindings_at_index = 1;
cfg.force_flow = true;
cfg.force_newline_at_indices = {2};
cfg.allow_inlining_if_size_less_than = 2;
auto binding_list_config = std::make_shared<FormFormattingConfig>();
binding_list_config->force_flow = true;
cfg.index_configs.emplace(1, binding_list_config);
return cfg;
}
const std::unordered_map<std::string, FormFormattingConfig> opengoal_form_config = {
{"defun", new_flow_rule(3)},
{"let", new_binding_rule()}};
} // namespace config
} // namespace formatter_rules

View File

@ -0,0 +1,28 @@
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "common/formatter/rules/formatting_rules.h"
namespace formatter_rules {
namespace config {
struct FormFormattingConfig {
bool force_hang = false;
bool force_flow = false;
std::optional<int> allow_inlining_if_size_less_than = {};
int start_hang_at_index = 0;
int start_flow_at_index = 0;
int inline_until_index = -1;
std::optional<int> bindings_at_index = {};
std::optional<int> skip_newlines_until_index = {};
std::vector<int> force_newline_at_indices = {};
bool bindings_force_newlines = false;
std::unordered_map<int, std::shared_ptr<FormFormattingConfig>> index_configs = {};
};
extern const std::unordered_map<std::string, FormFormattingConfig> opengoal_form_config;
} // namespace config
} // namespace formatter_rules

View File

@ -46,8 +46,6 @@ std::string ltrim(const std::string& s) {
return (start == std::string::npos) ? "" : s.substr(start);
}
// TODO - used a lot in formatting, and its slow because i bet it iterates from the start and not
// the end
std::string rtrim(const std::string& s) {
size_t end = s.find_last_not_of(WHITESPACE);
return (end == std::string::npos) ? "" : s.substr(0, end + 1);

View File

@ -6,12 +6,10 @@ Basic Nested Form
---
(println
"hello"
(println
"world"
"world2"
"very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form"))
(println "hello"
(println "world"
"world2"
"very-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-formvery-long-form"))
===
Multiple Top Level Forms
@ -58,3 +56,14 @@ Single Item Form
(println)
===
defun - No Docstring
===
(defun test-function ((arg0 string))
(println arg0))
---
(defun test-function ((arg0 string))
(println arg0))

View File

@ -0,0 +1,24 @@
===
Single Binding - Inlinable
===
(let ((a 1))
(+ a b))
---
(let ((a 1)) (+ a b))
===
Multiple Bindings
===
(let ((a 1)
(b 2))
(+ a b))
---
(let ((a 1)
(b 2))
(+ a b))

View File

@ -0,0 +1,24 @@
===
Constant List
===
(1 2 3 4)
---
(1 2 3 4)
===
Constant List - Too Long to Inline
===
(1111111111111 2222222222222222 3333333333333333333333333 444444444444444444444444444 555555555555555555555555555555555 666666666666666666666666666666666666666)
---
(1111111111111
2222222222222222
3333333333333333333333333
444444444444444444444444444
555555555555555555555555555555555
666666666666666666666666666666666666666)

View File

@ -122,27 +122,31 @@ bool run_tests(const fs::path& file_path, const bool only_important_tests) {
}
bool find_and_run_tests() {
// Enumerate test files
const auto test_files = file_util::find_files_recursively(
file_util::get_file_path({"test/common/formatter/corpus"}), std::regex("^.*\.test.gc$"));
bool failed = false;
// First do a pass to see if any tests are meant to be prioritized for debugging
bool only_important_tests = false;
for (const auto& file : test_files) {
only_important_tests = has_important_tests(file);
if (only_important_tests) {
break;
try {
// Enumerate test files
const auto test_files = file_util::find_files_recursively(
file_util::get_file_path({"test/common/formatter/corpus"}), std::regex("^.*\.test.gc$"));
bool failed = false;
// First do a pass to see if any tests are meant to be prioritized for debugging
bool only_important_tests = false;
for (const auto& file : test_files) {
only_important_tests = has_important_tests(file);
if (only_important_tests) {
break;
}
}
}
for (const auto& file : test_files) {
// don't fail fast, but any failure means we return false
if (failed) {
run_tests(file, only_important_tests);
} else {
failed = run_tests(file, only_important_tests);
for (const auto& file : test_files) {
// don't fail fast, but any failure means we return false
if (failed) {
run_tests(file, only_important_tests);
} else {
failed = run_tests(file, only_important_tests);
}
}
return !failed;
} catch (std::exception& e) {
return false;
}
return !failed;
}
TEST(Formatter, FormatterTests) {