diff --git a/binding.gyp b/binding.gyp index efea429..493bc06 100644 --- a/binding.gyp +++ b/binding.gyp @@ -9,7 +9,7 @@ "sources": [ "bindings/node/binding.cc", "src/parser.c", - "src/scanner.c", + "src/scanner.cc", ], "cflags_c": [ "-std=c99", diff --git a/bindings/rust/build.rs b/bindings/rust/build.rs index f7aaf16..618e90a 100644 --- a/bindings/rust/build.rs +++ b/bindings/rust/build.rs @@ -13,9 +13,11 @@ fn main() { // If your language uses an external scanner written in C, // then include this block of code: + /* let scanner_path = src_dir.join("scanner.c"); c_config.file(&scanner_path); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); + */ c_config.compile("parser"); println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); @@ -23,7 +25,6 @@ fn main() { // If your language uses an external scanner written in C++, // then include this block of code: - /* let mut cpp_config = cc::Build::new(); cpp_config.cpp(true); cpp_config.include(&src_dir); @@ -34,5 +35,4 @@ fn main() { cpp_config.file(&scanner_path); cpp_config.compile("scanner"); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); - */ } diff --git a/grammar.js b/grammar.js index 8588958..ed4e121 100644 --- a/grammar.js +++ b/grammar.js @@ -15,7 +15,11 @@ const PREC = { module.exports = grammar({ name: "d2", - externals: ($) => [$._text_block_raw], + externals: ($) => [ + $._text_block_start, + $._text_block_end, + $._text_block_raw_text, + ], extras: ($) => [ /[ \f\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/, @@ -129,16 +133,13 @@ module.exports = grammar({ text_block: ($) => choice( - seq("|", $._text_block_definition, "|"), - // References: https://github.com/terrastruct/d2-vim - seq("|`", $._text_block_definition, "`|") - ), - - _text_block_definition: ($) => - seq( - optional($.language), - /\s/, - optional(alias($._text_block_raw, $.raw_text)) + seq( + alias($._text_block_start, "|"), + optional($.language), + /\s/, + alias($._text_block_raw_text, $.raw_text), + alias($._text_block_end, "|") + ) ), language: ($) => /\w+/, diff --git a/queries/highlights.scm b/queries/highlights.scm index 504fa9c..cbc15a0 100644 --- a/queries/highlights.scm +++ b/queries/highlights.scm @@ -39,8 +39,6 @@ "{" "}" "|" - "|`" - "`|" ] @punctuation.bracket ; Special diff --git a/src/grammar.json b/src/grammar.json index cf506aa..babb67c 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -544,71 +544,47 @@ "type": "SEQ", "members": [ { - "type": "STRING", + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "_text_block_start" + }, + "named": false, "value": "|" }, { - "type": "SYMBOL", - "name": "_text_block_definition" + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "language" + }, + { + "type": "BLANK" + } + ] }, { - "type": "STRING", - "value": "|" - } - ] - }, - { - "type": "SEQ", - "members": [ - { - "type": "STRING", - "value": "|`" + "type": "PATTERN", + "value": "\\s" }, - { - "type": "SYMBOL", - "name": "_text_block_definition" - }, - { - "type": "STRING", - "value": "`|" - } - ] - } - ] - }, - "_text_block_definition": { - "type": "SEQ", - "members": [ - { - "type": "CHOICE", - "members": [ - { - "type": "SYMBOL", - "name": "language" - }, - { - "type": "BLANK" - } - ] - }, - { - "type": "PATTERN", - "value": "\\s" - }, - { - "type": "CHOICE", - "members": [ { "type": "ALIAS", "content": { "type": "SYMBOL", - "name": "_text_block_raw" + "name": "_text_block_raw_text" }, "named": true, "value": "raw_text" }, { - "type": "BLANK" + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "_text_block_end" + }, + "named": false, + "value": "|" } ] } @@ -1446,7 +1422,15 @@ "externals": [ { "type": "SYMBOL", - "name": "_text_block_raw" + "name": "_text_block_start" + }, + { + "type": "SYMBOL", + "name": "_text_block_end" + }, + { + "type": "SYMBOL", + "name": "_text_block_raw_text" } ], "inline": [], diff --git a/src/node-types.json b/src/node-types.json index cfd17d8..ee14e4c 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -311,7 +311,7 @@ "fields": {}, "children": { "multiple": true, - "required": false, + "required": true, "types": [ { "type": "language", @@ -352,10 +352,6 @@ "type": ";", "named": false }, - { - "type": "`|", - "named": false - }, { "type": "animated", "named": false @@ -488,10 +484,6 @@ "type": "|", "named": false }, - { - "type": "|`", - "named": false - }, { "type": "}", "named": false diff --git a/src/parser.c b/src/parser.c index 4115a50..5fa5ff9 100644 Binary files a/src/parser.c and b/src/parser.c differ diff --git a/src/scanner.c b/src/scanner.c deleted file mode 100644 index b3962b2..0000000 --- a/src/scanner.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include - -enum TokenType { - RAW_TEXT -}; - -void *tree_sitter_d2_external_scanner_create() { return NULL; } -void tree_sitter_d2_external_scanner_destroy(void *payload) { } -unsigned tree_sitter_d2_external_scanner_serialize(void *p, char *buffer) { return 0; } -void tree_sitter_d2_external_scanner_deserialize(void *p, const char *b, unsigned n) {} - -static void advance(TSLexer *lexer) { lexer->advance(lexer, false); } -static void skip(TSLexer *lexer) { lexer->advance(lexer, true); } - -static bool scan_raw_text(TSLexer *lexer) { - lexer->result_symbol = RAW_TEXT; - - if (lexer->lookahead == '`') { - advance(lexer); - } - - if (lexer->lookahead == '|') { - return false; - } - - for (bool has_content = false;; has_content = true) { - lexer->mark_end(lexer); - - if (lexer->lookahead == '\0') { - return has_content; - } - - while (iswspace(lexer->lookahead)) { - advance(lexer); - lexer->mark_end(lexer); - } - - if (lexer->lookahead == '`') { - advance(lexer); - } - - if (lexer->lookahead == '|') { - advance(lexer); - if (lexer->lookahead == '\n') { - return has_content; - } - } - - advance(lexer); - } - - return false; -} - -bool tree_sitter_d2_external_scanner_scan( - void *payload, - TSLexer *lexer, - const bool *valid_symbols -) { - if (valid_symbols[RAW_TEXT]) { - return scan_raw_text(lexer); - } - - return false; -} diff --git a/src/scanner.cc b/src/scanner.cc new file mode 100644 index 0000000..c9803ce --- /dev/null +++ b/src/scanner.cc @@ -0,0 +1,159 @@ +#include +#include +#include + +namespace { + using std::vector; + using std::iswpunct; + + enum TokenType { + TEXT_BLOCK_START, + TEXT_BLOCK_END, + TEXT_BLOCK_RAW_TEXT, + }; + + struct Scanner { + vector escape_char_stack; + + Scanner() { + deserialize(NULL, 0); + } + + unsigned serialize(char *buffer) { + size_t i = 0; + size_t escape_char_count = escape_char_stack.size(); + buffer[i++] = escape_char_count; + + vector::iterator + iter = escape_char_stack.begin(), + end = escape_char_stack.end(); + + for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) { + buffer[i++] = *iter; + } + + return i; + } + + void deserialize(const char *buffer, unsigned length) { + escape_char_stack.clear(); + if (length == 0) return; + size_t i = 0; + size_t escape_char_count = (uint8_t)buffer[i++]; + for (; i <= escape_char_count; i++) { + escape_char_stack.push_back(buffer[i]); + } + } + + void advance(TSLexer *lexer) { + lexer->advance(lexer, false); + } + + void skip(TSLexer *lexer) { + lexer->advance(lexer, true); + } + + bool is_text_block_end(TSLexer *lexer) { + vector::reverse_iterator + iter = escape_char_stack.rbegin(), + end = escape_char_stack.rend(); + + for (; iter != end; ++iter) { + if (lexer->lookahead != *iter) { + return false; + } + advance(lexer); + } + + return true; + } + + bool scan(TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[TEXT_BLOCK_START] && escape_char_stack.empty()) { + lexer->result_symbol = TEXT_BLOCK_START; + lexer->mark_end(lexer); + + while (lexer->lookahead != 0 && iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (lexer->lookahead != '|') { + return false; + } + + advance(lexer); + escape_char_stack.push_back('|'); + + if (!iswpunct(lexer->lookahead)) { + lexer->mark_end(lexer); + return true; + } + + int16_t escape_char = lexer->lookahead; + while (lexer->lookahead == escape_char) { + escape_char_stack.push_back(escape_char); + advance(lexer); + } + + lexer->mark_end(lexer); + + return true; + } else if (valid_symbols[TEXT_BLOCK_END] && !escape_char_stack.empty()) { + lexer->result_symbol = TEXT_BLOCK_END; + lexer->mark_end(lexer); + + while (lexer->lookahead != 0 && iswspace(lexer->lookahead)) { + skip(lexer); + } + + if (is_text_block_end(lexer)) { + lexer->mark_end(lexer); + escape_char_stack.clear(); + return true; + } + } else if (valid_symbols[TEXT_BLOCK_RAW_TEXT] && !escape_char_stack.empty()) { + lexer->result_symbol = TEXT_BLOCK_RAW_TEXT; + lexer->mark_end(lexer); + + while (lexer->lookahead != 0 && !is_text_block_end(lexer)) { + advance(lexer); + lexer->mark_end(lexer); + } + + return true; + } + + return false; + + } + }; + +} + +extern "C" { + + void *tree_sitter_d2_external_scanner_create() { + return new Scanner(); + } + + bool tree_sitter_d2_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + Scanner *scanner = static_cast(payload); + return scanner->scan(lexer, valid_symbols); + } + + unsigned tree_sitter_d2_external_scanner_serialize(void *payload, char *buffer) { + Scanner *scanner = static_cast(payload); + return scanner->serialize(buffer); + } + + void tree_sitter_d2_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { + Scanner *scanner = static_cast(payload); + scanner->deserialize(buffer, length); + } + + void tree_sitter_d2_external_scanner_destroy(void *payload) { + Scanner *scanner = static_cast(payload); + delete scanner; + } + +} diff --git a/test/corpus/shape.txt b/test/corpus/shape.txt index 29c2d87..d7f751e 100644 --- a/test/corpus/shape.txt +++ b/test/corpus/shape.txt @@ -184,3 +184,28 @@ foo: |`go ) ) ) + +================================================================================ +Online text block +================================================================================ +foo: | helo world | + +bar: |%%md ## hello world %%| + +-------------------------------------------------------------------------------- + +(source_file + (shape + (shape_key) + (text_block + (raw_text) + ) + ) + (shape + (shape_key) + (text_block + (language) + (raw_text) + ) + ) +) diff --git a/tree-sitter-d2.wasm b/tree-sitter-d2.wasm index 3160194..bca3b6a 100755 Binary files a/tree-sitter-d2.wasm and b/tree-sitter-d2.wasm differ