mirror of
https://github.com/xenia-project/elemental-forms.git
synced 2026-01-31 01:25:17 +01:00
463 lines
11 KiB
C++
463 lines
11 KiB
C++
/**
|
|
******************************************************************************
|
|
* Elemental Forms : a lightweight user interface framework *
|
|
******************************************************************************
|
|
* Copyright 2015 Ben Vanik. All rights reserved. Licensed as BSD 3-clause. *
|
|
* Portions ©2011-2015 Emil Segerås: https://github.com/fruxo/turbobadger *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#include <cassert>
|
|
#include <cctype>
|
|
|
|
#include "el/parsing/text_parser.h"
|
|
#include "el/parsing/text_parser_stream.h"
|
|
#include "el/text/utf8.h"
|
|
#include "el/util/string.h"
|
|
|
|
namespace el {
|
|
namespace parsing {
|
|
|
|
bool is_hex(char c) {
|
|
return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
|
|
(c >= 'A' && c <= 'F'));
|
|
}
|
|
|
|
uint32_t parse_hex(char** inout_src, int max_count) {
|
|
auto src = *inout_src;
|
|
uint32_t hex = 0;
|
|
for (int i = 0; i < max_count; ++i) {
|
|
char c = *src;
|
|
if (!is_hex(c)) break;
|
|
hex <<= 4;
|
|
hex |= isdigit(c) ? c - '0' : tolower(c) - 'a' + 10;
|
|
src++;
|
|
}
|
|
*inout_src = src;
|
|
return hex;
|
|
}
|
|
|
|
// Unescapes backslash codes.
|
|
// This is done in place using the string both as source and destination.
|
|
void UnescapeString(char* str) {
|
|
// Fast forward to any escape sequence.
|
|
while (*str && *str != '\\') str++;
|
|
|
|
char *dst = str, *src = str;
|
|
while (*src) {
|
|
if (*src == '\\') {
|
|
bool code_found = true;
|
|
switch (src[1]) {
|
|
case 'a':
|
|
*dst = '\a';
|
|
break;
|
|
case 'b':
|
|
*dst = '\b';
|
|
break;
|
|
case 'f':
|
|
*dst = '\f';
|
|
break;
|
|
case 'n':
|
|
*dst = '\n';
|
|
break;
|
|
case 'r':
|
|
*dst = '\r';
|
|
break;
|
|
case 't':
|
|
*dst = '\t';
|
|
break;
|
|
case 'v':
|
|
*dst = '\v';
|
|
break;
|
|
case '0':
|
|
*dst = '\0';
|
|
break;
|
|
case '\"':
|
|
*dst = '\"';
|
|
break;
|
|
case '\'':
|
|
*dst = '\'';
|
|
break;
|
|
case '\\':
|
|
*dst = '\\';
|
|
break;
|
|
case 'x': // \xXX
|
|
case 'u': // \uXXXX
|
|
{
|
|
// This should be safe. A utf-8 character can be at most 4 bytes,
|
|
// and we have 4 bytes to use for \xXX and 6 for \uXXXX.
|
|
src += 2;
|
|
if (auto hex = parse_hex(&src, src[1] == 'x' ? 2 : 4)) {
|
|
dst += text::utf8::encode(hex, dst);
|
|
}
|
|
continue;
|
|
}
|
|
default:
|
|
code_found = false;
|
|
}
|
|
if (code_found) {
|
|
src += 2;
|
|
dst++;
|
|
continue;
|
|
}
|
|
}
|
|
*dst = *src;
|
|
dst++;
|
|
src++;
|
|
}
|
|
*dst = 0;
|
|
}
|
|
|
|
bool is_white_space(const char* str) {
|
|
switch (*str) {
|
|
case ' ':
|
|
case '\t':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Returns true if the given string starts with a color.
|
|
// Ex: #ffdd00, #fd0
|
|
bool is_start_of_color(const char* str) {
|
|
if (*str++ != '#') return false;
|
|
int digit_count = 0;
|
|
while (is_hex(*str)) {
|
|
str++;
|
|
digit_count++;
|
|
}
|
|
return digit_count == 8 || digit_count == 6 || digit_count == 4 ||
|
|
digit_count == 3;
|
|
}
|
|
|
|
// Returns true if the given string may be a node reference, such as language
|
|
// strings or ParseNodeTree references.
|
|
bool is_start_of_reference(const char* str) {
|
|
if (*str++ != '@') {
|
|
return false;
|
|
}
|
|
while (*str && *str != ' ') {
|
|
// If the token ends with colon, it's not a value but a key.
|
|
if (*str == ':') {
|
|
return false;
|
|
}
|
|
str++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Checks if the line is a comment or empty space. If it is, consume the leading
|
|
// whitespace from line.
|
|
bool is_space_or_comment(char** inout_line) {
|
|
char* tmp = *inout_line;
|
|
while (is_white_space(tmp)) {
|
|
tmp++;
|
|
}
|
|
if (*tmp == '#' || *tmp == 0) {
|
|
*inout_line = tmp;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool is_pending_multiline(const char* str) {
|
|
while (is_white_space(str)) {
|
|
str++;
|
|
}
|
|
return str[0] == '\\' && str[1] == 0;
|
|
}
|
|
|
|
// Checks if buf is pointing at a end quote.
|
|
// It may need to iterate buf backwards toward buf_start to check if any
|
|
// preceding backslashes make it a escaped quote (which should not be the end
|
|
// quote).
|
|
bool IsEndQuote(const char* buf_start, const char* buf, const char quote_type) {
|
|
if (*buf != quote_type) {
|
|
return false;
|
|
}
|
|
int num_backslashes = 0;
|
|
while (buf_start < buf && *(buf-- - 1) == '\\') {
|
|
num_backslashes++;
|
|
}
|
|
return !(num_backslashes & 1);
|
|
}
|
|
|
|
TextParser::Status TextParser::Read(TextParserStream* stream,
|
|
TextParserTarget* target) {
|
|
util::StringBuilder line(1024);
|
|
util::StringBuilder work(1024);
|
|
|
|
current_indent = 0;
|
|
current_line_nr = 1;
|
|
pending_multiline = false;
|
|
multi_line_sub_level = 0;
|
|
|
|
while (size_t read_len = stream->GetMoreData(work.data(), work.capacity())) {
|
|
char* buf = work.data();
|
|
|
|
// Skip BOM (BYTE ORDER MARK) character, often in the beginning of UTF-8
|
|
// documents.
|
|
if (current_line_nr == 1 && read_len > 3 && (uint8_t)buf[0] == 239 &&
|
|
(uint8_t)buf[1] == 187 && (uint8_t)buf[2] == 191) {
|
|
read_len -= 3;
|
|
buf += 3;
|
|
}
|
|
|
|
size_t line_pos = 0;
|
|
while (true) {
|
|
// Find line end.
|
|
size_t line_start = line_pos;
|
|
while (line_pos < read_len && buf[line_pos] != '\n') {
|
|
line_pos++;
|
|
}
|
|
|
|
if (line_pos < read_len) {
|
|
// We have a line.
|
|
// Skip preceding \r (if we have one).
|
|
size_t line_len = line_pos - line_start;
|
|
line.Append(buf + line_start, line_len);
|
|
|
|
// Strip away trailing '\r' if the line has it.
|
|
char* linebuf = line.data();
|
|
size_t linebuf_len = line.GetAppendPos();
|
|
if (linebuf_len > 0 && linebuf[linebuf_len - 1] == '\r') {
|
|
linebuf[linebuf_len - 1] = 0;
|
|
}
|
|
|
|
// Terminate the line string.
|
|
line.Append("", 1);
|
|
|
|
// Handle line.
|
|
OnLine(line.data(), target);
|
|
current_line_nr++;
|
|
|
|
line.ResetAppendPos();
|
|
line_pos++; // Skip this \n
|
|
// Find next line.
|
|
continue;
|
|
}
|
|
// No more lines here so push the rest and break for more data.
|
|
line.Append(buf + line_start, read_len - line_start);
|
|
break;
|
|
}
|
|
}
|
|
if (line.GetAppendPos()) {
|
|
line.Append("", 1);
|
|
OnLine(line.data(), target);
|
|
current_line_nr++;
|
|
}
|
|
return Status::kOk;
|
|
}
|
|
|
|
void TextParser::OnLine(char* line, TextParserTarget* target) {
|
|
if (is_space_or_comment(&line)) {
|
|
if (*line == '#') {
|
|
target->OnComment(current_line_nr, line + 1);
|
|
}
|
|
return;
|
|
}
|
|
if (pending_multiline) {
|
|
OnMultiline(line, target);
|
|
return;
|
|
}
|
|
|
|
// Check indent.
|
|
int indent = 0;
|
|
while (line[indent] == '\t' && line[indent] != 0) {
|
|
indent++;
|
|
}
|
|
line += indent;
|
|
|
|
if (indent - current_indent > 1) {
|
|
target->OnError(current_line_nr, "Indentation error. (Line skipped)");
|
|
return;
|
|
}
|
|
|
|
if (indent > current_indent) {
|
|
// FIX: Report indentation error if more than 1 higher!
|
|
assert(indent - current_indent == 1);
|
|
target->Enter();
|
|
current_indent++;
|
|
} else if (indent < current_indent) {
|
|
while (indent < current_indent) {
|
|
target->Leave();
|
|
current_indent--;
|
|
}
|
|
}
|
|
|
|
if (line[0] == 0) {
|
|
return;
|
|
} else {
|
|
char* token = line;
|
|
// Read line while consuming it and copy over to token buf.
|
|
while (!is_white_space(line) && line[0] != 0) {
|
|
line++;
|
|
}
|
|
size_t token_len = line - token;
|
|
// Consume any white space after the token.
|
|
while (is_white_space(line)) {
|
|
line++;
|
|
}
|
|
|
|
bool is_compact_line = token_len && token[token_len - 1] == ':';
|
|
|
|
Value value;
|
|
if (is_compact_line) {
|
|
token_len--;
|
|
token[token_len] = 0;
|
|
|
|
// Check if the first argument is not a child but the value for this
|
|
// token.
|
|
if (*line == '[' || *line == '\"' || *line == '\'' ||
|
|
util::is_start_of_number(line) || is_start_of_color(line) ||
|
|
is_start_of_reference(line)) {
|
|
ConsumeValue(&value, &line);
|
|
|
|
if (pending_multiline) {
|
|
// The value wrapped to the next line, so we should remember the token
|
|
// and continue.
|
|
multi_line_token = token;
|
|
return;
|
|
}
|
|
}
|
|
} else if (token[token_len]) {
|
|
token[token_len] = 0;
|
|
UnescapeString(line);
|
|
value.parse_string(line, Value::Set::kAsStatic);
|
|
}
|
|
target->OnToken(current_line_nr, token, &value);
|
|
|
|
if (is_compact_line) {
|
|
OnCompactLine(line, target);
|
|
}
|
|
}
|
|
}
|
|
|
|
void TextParser::OnCompactLine(char* line, TextParserTarget* target) {
|
|
target->Enter();
|
|
while (*line) {
|
|
// Consume any whitespace.
|
|
while (is_white_space(line)) {
|
|
line++;
|
|
}
|
|
|
|
// Find token.
|
|
char* token = line;
|
|
while (*line != ':' && *line != 0) {
|
|
line++;
|
|
}
|
|
if (!*line) {
|
|
// Syntax error, expected token.
|
|
break;
|
|
}
|
|
*line++ = 0;
|
|
|
|
// Consume any whitespace.
|
|
while (is_white_space(line)) {
|
|
line++;
|
|
}
|
|
|
|
Value v;
|
|
ConsumeValue(&v, &line);
|
|
|
|
if (pending_multiline) {
|
|
// The value wrapped to the next line, so we should remember the token and
|
|
// continue.
|
|
multi_line_token = token;
|
|
// Since we need to call target->Leave when the multiline is ready, set
|
|
// multi_line_sub_level.
|
|
multi_line_sub_level = 1;
|
|
return;
|
|
}
|
|
|
|
// Ready.
|
|
target->OnToken(current_line_nr, token, &v);
|
|
}
|
|
|
|
target->Leave();
|
|
}
|
|
|
|
void TextParser::OnMultiline(char* line, TextParserTarget* target) {
|
|
// Consume any whitespace.
|
|
while (is_white_space(line)) {
|
|
line++;
|
|
}
|
|
|
|
Value value;
|
|
ConsumeValue(&value, &line);
|
|
|
|
if (!pending_multiline) {
|
|
// Ready with all lines.
|
|
value.set_string(multi_line_value.data(), Value::Set::kAsStatic);
|
|
target->OnToken(current_line_nr, multi_line_token.c_str(), &value);
|
|
|
|
if (multi_line_sub_level) {
|
|
target->Leave();
|
|
}
|
|
|
|
// Reset.
|
|
multi_line_value.SetAppendPos(0);
|
|
multi_line_sub_level = 0;
|
|
}
|
|
}
|
|
|
|
void TextParser::ConsumeValue(Value* dst_value, char** inout_line) {
|
|
// Find value (As quoted string, or as auto).
|
|
char* line = *inout_line;
|
|
char* value = line;
|
|
if (*line == '\"' || *line == '\'') {
|
|
const char quote_type = *line;
|
|
// Consume starting quote.
|
|
line++;
|
|
value++;
|
|
// Find ending quote or end.
|
|
while (!IsEndQuote(value, line, quote_type) && *line != 0) {
|
|
line++;
|
|
}
|
|
// Terminate away the quote.
|
|
if (*line == quote_type) {
|
|
*line++ = 0;
|
|
}
|
|
|
|
// Consume any whitespace.
|
|
while (is_white_space(line)) {
|
|
line++;
|
|
}
|
|
// Consume any comma.
|
|
if (*line == ',') {
|
|
line++;
|
|
}
|
|
|
|
UnescapeString(value);
|
|
dst_value->set_string(value, Value::Set::kAsStatic);
|
|
} else {
|
|
// Find next comma or end.
|
|
while (*line != ',' && *line != 0) {
|
|
line++;
|
|
}
|
|
// Terminate away the comma.
|
|
if (*line == ',') {
|
|
*line++ = 0;
|
|
}
|
|
|
|
UnescapeString(value);
|
|
dst_value->parse_string(value, Value::Set::kAsStatic);
|
|
}
|
|
|
|
// Check if we still have pending value data on the following line and set
|
|
// pending_multiline.
|
|
bool continuing_multiline = pending_multiline;
|
|
pending_multiline = is_pending_multiline(line);
|
|
|
|
// Append the multi line value to the buffer.
|
|
if (continuing_multiline || pending_multiline) {
|
|
multi_line_value.AppendString(dst_value->as_string());
|
|
}
|
|
|
|
*inout_line = line;
|
|
}
|
|
|
|
} // namespace parsing
|
|
} // namespace el
|