Add support of any escape character in text block

See more here https://github.com/terrastruct/d2/issues/420
This commit is contained in:
Dmitriy Pleshevskiy 2022-12-10 22:56:41 +03:00
parent 24cdec0cb3
commit 5eb0507baa
Signed by: pleshevskiy
GPG key ID: 1B59187B161C0215
11 changed files with 235 additions and 142 deletions

View file

@ -9,7 +9,7 @@
"sources": [ "sources": [
"bindings/node/binding.cc", "bindings/node/binding.cc",
"src/parser.c", "src/parser.c",
"src/scanner.c", "src/scanner.cc",
], ],
"cflags_c": [ "cflags_c": [
"-std=c99", "-std=c99",

View file

@ -13,9 +13,11 @@ fn main() {
// If your language uses an external scanner written in C, // If your language uses an external scanner written in C,
// then include this block of code: // then include this block of code:
/*
let scanner_path = src_dir.join("scanner.c"); let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path); c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
c_config.compile("parser"); c_config.compile("parser");
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap()); println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
@ -23,7 +25,6 @@ fn main() {
// If your language uses an external scanner written in C++, // If your language uses an external scanner written in C++,
// then include this block of code: // then include this block of code:
/*
let mut cpp_config = cc::Build::new(); let mut cpp_config = cc::Build::new();
cpp_config.cpp(true); cpp_config.cpp(true);
cpp_config.include(&src_dir); cpp_config.include(&src_dir);
@ -34,5 +35,4 @@ fn main() {
cpp_config.file(&scanner_path); cpp_config.file(&scanner_path);
cpp_config.compile("scanner"); cpp_config.compile("scanner");
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap()); println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
*/
} }

View file

@ -15,7 +15,11 @@ const PREC = {
module.exports = grammar({ module.exports = grammar({
name: "d2", name: "d2",
externals: ($) => [$._text_block_raw], externals: ($) => [
$._text_block_start,
$._text_block_end,
$._text_block_raw_text,
],
extras: ($) => [ extras: ($) => [
/[ \f\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/, /[ \f\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]/,
@ -129,16 +133,13 @@ module.exports = grammar({
text_block: ($) => text_block: ($) =>
choice( choice(
seq("|", $._text_block_definition, "|"), seq(
// References: https://github.com/terrastruct/d2-vim alias($._text_block_start, "|"),
seq("|`", $._text_block_definition, "`|") optional($.language),
), /\s/,
alias($._text_block_raw_text, $.raw_text),
_text_block_definition: ($) => alias($._text_block_end, "|")
seq( )
optional($.language),
/\s/,
optional(alias($._text_block_raw, $.raw_text))
), ),
language: ($) => /\w+/, language: ($) => /\w+/,

View file

@ -39,8 +39,6 @@
"{" "{"
"}" "}"
"|" "|"
"|`"
"`|"
] @punctuation.bracket ] @punctuation.bracket
; Special ; Special

View file

@ -544,71 +544,47 @@
"type": "SEQ", "type": "SEQ",
"members": [ "members": [
{ {
"type": "STRING", "type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_text_block_start"
},
"named": false,
"value": "|" "value": "|"
}, },
{ {
"type": "SYMBOL", "type": "CHOICE",
"name": "_text_block_definition" "members": [
{
"type": "SYMBOL",
"name": "language"
},
{
"type": "BLANK"
}
]
}, },
{ {
"type": "STRING", "type": "PATTERN",
"value": "|" "value": "\\s"
}
]
},
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "|`"
}, },
{
"type": "SYMBOL",
"name": "_text_block_definition"
},
{
"type": "STRING",
"value": "`|"
}
]
}
]
},
"_text_block_definition": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "language"
},
{
"type": "BLANK"
}
]
},
{
"type": "PATTERN",
"value": "\\s"
},
{
"type": "CHOICE",
"members": [
{ {
"type": "ALIAS", "type": "ALIAS",
"content": { "content": {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_text_block_raw" "name": "_text_block_raw_text"
}, },
"named": true, "named": true,
"value": "raw_text" "value": "raw_text"
}, },
{ {
"type": "BLANK" "type": "ALIAS",
"content": {
"type": "SYMBOL",
"name": "_text_block_end"
},
"named": false,
"value": "|"
} }
] ]
} }
@ -1446,7 +1422,15 @@
"externals": [ "externals": [
{ {
"type": "SYMBOL", "type": "SYMBOL",
"name": "_text_block_raw" "name": "_text_block_start"
},
{
"type": "SYMBOL",
"name": "_text_block_end"
},
{
"type": "SYMBOL",
"name": "_text_block_raw_text"
} }
], ],
"inline": [], "inline": [],

View file

@ -311,7 +311,7 @@
"fields": {}, "fields": {},
"children": { "children": {
"multiple": true, "multiple": true,
"required": false, "required": true,
"types": [ "types": [
{ {
"type": "language", "type": "language",
@ -352,10 +352,6 @@
"type": ";", "type": ";",
"named": false "named": false
}, },
{
"type": "`|",
"named": false
},
{ {
"type": "animated", "type": "animated",
"named": false "named": false
@ -488,10 +484,6 @@
"type": "|", "type": "|",
"named": false "named": false
}, },
{
"type": "|`",
"named": false
},
{ {
"type": "}", "type": "}",
"named": false "named": false

Binary file not shown.

View file

@ -1,66 +0,0 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
RAW_TEXT
};
void *tree_sitter_d2_external_scanner_create() { return NULL; }
void tree_sitter_d2_external_scanner_destroy(void *payload) { }
unsigned tree_sitter_d2_external_scanner_serialize(void *p, char *buffer) { return 0; }
void tree_sitter_d2_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_raw_text(TSLexer *lexer) {
lexer->result_symbol = RAW_TEXT;
if (lexer->lookahead == '`') {
advance(lexer);
}
if (lexer->lookahead == '|') {
return false;
}
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
if (lexer->lookahead == '\0') {
return has_content;
}
while (iswspace(lexer->lookahead)) {
advance(lexer);
lexer->mark_end(lexer);
}
if (lexer->lookahead == '`') {
advance(lexer);
}
if (lexer->lookahead == '|') {
advance(lexer);
if (lexer->lookahead == '\n') {
return has_content;
}
}
advance(lexer);
}
return false;
}
bool tree_sitter_d2_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
if (valid_symbols[RAW_TEXT]) {
return scan_raw_text(lexer);
}
return false;
}

159
src/scanner.cc Normal file
View file

@ -0,0 +1,159 @@
#include <tree_sitter/parser.h>
#include <vector>
#include <cwctype>
namespace {
using std::vector;
using std::iswpunct;
enum TokenType {
TEXT_BLOCK_START,
TEXT_BLOCK_END,
TEXT_BLOCK_RAW_TEXT,
};
struct Scanner {
vector<int16_t> escape_char_stack;
Scanner() {
deserialize(NULL, 0);
}
unsigned serialize(char *buffer) {
size_t i = 0;
size_t escape_char_count = escape_char_stack.size();
buffer[i++] = escape_char_count;
vector<int16_t>::iterator
iter = escape_char_stack.begin(),
end = escape_char_stack.end();
for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
buffer[i++] = *iter;
}
return i;
}
void deserialize(const char *buffer, unsigned length) {
escape_char_stack.clear();
if (length == 0) return;
size_t i = 0;
size_t escape_char_count = (uint8_t)buffer[i++];
for (; i <= escape_char_count; i++) {
escape_char_stack.push_back(buffer[i]);
}
}
void advance(TSLexer *lexer) {
lexer->advance(lexer, false);
}
void skip(TSLexer *lexer) {
lexer->advance(lexer, true);
}
bool is_text_block_end(TSLexer *lexer) {
vector<int16_t>::reverse_iterator
iter = escape_char_stack.rbegin(),
end = escape_char_stack.rend();
for (; iter != end; ++iter) {
if (lexer->lookahead != *iter) {
return false;
}
advance(lexer);
}
return true;
}
bool scan(TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[TEXT_BLOCK_START] && escape_char_stack.empty()) {
lexer->result_symbol = TEXT_BLOCK_START;
lexer->mark_end(lexer);
while (lexer->lookahead != 0 && iswspace(lexer->lookahead)) {
skip(lexer);
}
if (lexer->lookahead != '|') {
return false;
}
advance(lexer);
escape_char_stack.push_back('|');
if (!iswpunct(lexer->lookahead)) {
lexer->mark_end(lexer);
return true;
}
int16_t escape_char = lexer->lookahead;
while (lexer->lookahead == escape_char) {
escape_char_stack.push_back(escape_char);
advance(lexer);
}
lexer->mark_end(lexer);
return true;
} else if (valid_symbols[TEXT_BLOCK_END] && !escape_char_stack.empty()) {
lexer->result_symbol = TEXT_BLOCK_END;
lexer->mark_end(lexer);
while (lexer->lookahead != 0 && iswspace(lexer->lookahead)) {
skip(lexer);
}
if (is_text_block_end(lexer)) {
lexer->mark_end(lexer);
escape_char_stack.clear();
return true;
}
} else if (valid_symbols[TEXT_BLOCK_RAW_TEXT] && !escape_char_stack.empty()) {
lexer->result_symbol = TEXT_BLOCK_RAW_TEXT;
lexer->mark_end(lexer);
while (lexer->lookahead != 0 && !is_text_block_end(lexer)) {
advance(lexer);
lexer->mark_end(lexer);
}
return true;
}
return false;
}
};
}
extern "C" {
void *tree_sitter_d2_external_scanner_create() {
return new Scanner();
}
bool tree_sitter_d2_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->scan(lexer, valid_symbols);
}
unsigned tree_sitter_d2_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = static_cast<Scanner *>(payload);
return scanner->serialize(buffer);
}
void tree_sitter_d2_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = static_cast<Scanner *>(payload);
scanner->deserialize(buffer, length);
}
void tree_sitter_d2_external_scanner_destroy(void *payload) {
Scanner *scanner = static_cast<Scanner *>(payload);
delete scanner;
}
}

View file

@ -184,3 +184,28 @@ foo: |`go
) )
) )
) )
================================================================================
Online text block
================================================================================
foo: | helo world |
bar: |%%md ## hello world %%|
--------------------------------------------------------------------------------
(source_file
(shape
(shape_key)
(text_block
(raw_text)
)
)
(shape
(shape_key)
(text_block
(language)
(raw_text)
)
)
)

Binary file not shown.