rewrite identifier as regular scanner

This commit is contained in:
Dmitriy Pleshevskiy 2022-12-04 18:35:37 +03:00
parent 8fcd012702
commit fc5a0fff47
Signed by: pleshevskiy
GPG key ID: 1B59187B161C0215
8 changed files with 2058 additions and 883 deletions

View file

@ -9,7 +9,6 @@
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
"src/scanner.c",
# If your language uses an external scanner, add it here.
],
"cflags_c": [

View file

@ -1,3 +1,3 @@
Foo bar
-Biz-baz-
Foo Bar -- Biz Baz
-Bar-Foo- <- -Baz-Biz-

View file

@ -1,8 +1,6 @@
module.exports = grammar({
name: "d2",
externals: ($) => [$._identifier],
rules: {
// TODO: add the actual grammar rules
source_file: ($) => repeat($._definition),
@ -35,12 +33,32 @@ module.exports = grammar({
identifier: ($) => $._identifier,
arrow: ($) =>
_identifier: ($) =>
prec.right(
seq(
repeat(" "),
optional($._dash),
choice(
seq("--", repeat("-")),
seq("<-", repeat("-")),
seq("<-", repeat("-"), ">"),
seq(repeat("-"), "->")
$._word,
repeat1(seq($._word, choice(repeat(" "), $._dash), $._word))
),
optional($._dash),
repeat(" ")
)
),
_dash: ($) => token.immediate("-"),
_word: ($) => /[\w\d]+/,
arrow: ($) =>
prec.left(
choice(
seq("--", repeat($._dash)),
seq("<-", repeat($._dash)),
seq("<-", repeat($._dash), ">"),
seq(repeat($._dash), "->")
)
),
_unquoted_string: ($) => /[^\n;{]+/,

View file

@ -170,7 +170,109 @@
"type": "SYMBOL",
"name": "_identifier"
},
"_identifier": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": " "
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_dash"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_word"
},
{
"type": "REPEAT1",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_word"
},
{
"type": "CHOICE",
"members": [
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": " "
}
},
{
"type": "SYMBOL",
"name": "_dash"
}
]
},
{
"type": "SYMBOL",
"name": "_word"
}
]
}
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_dash"
},
{
"type": "BLANK"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": " "
}
}
]
}
},
"_dash": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "-"
}
},
"_word": {
"type": "PATTERN",
"value": "[\\w\\d]+"
},
"arrow": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{
@ -183,8 +285,8 @@
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": "-"
"type": "SYMBOL",
"name": "_dash"
}
}
]
@ -199,8 +301,8 @@
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": "-"
"type": "SYMBOL",
"name": "_dash"
}
}
]
@ -215,8 +317,8 @@
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": "-"
"type": "SYMBOL",
"name": "_dash"
}
},
{
@ -231,8 +333,8 @@
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": "-"
"type": "SYMBOL",
"name": "_dash"
}
},
{
@ -242,6 +344,7 @@
]
}
]
}
},
"_unquoted_string": {
"type": "PATTERN",
@ -330,12 +433,7 @@
],
"conflicts": [],
"precedences": [],
"externals": [
{
"type": "SYMBOL",
"name": "_identifier"
}
],
"externals": [],
"inline": [],
"supertypes": []
}

View file

@ -98,6 +98,10 @@
"type": "\n",
"named": false
},
{
"type": " ",
"named": false
},
{
"type": "\"",
"named": false
@ -106,10 +110,6 @@
"type": "'",
"named": false
},
{
"type": "-",
"named": false
},
{
"type": "--",
"named": false

File diff suppressed because it is too large Load diff

View file

@ -1,71 +0,0 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
IDENTIFIER
};
void *tree_sitter_d2_external_scanner_create() { return NULL; }
void tree_sitter_d2_external_scanner_destroy(void *payload) { }
unsigned tree_sitter_d2_external_scanner_serialize(void *p, char *buffer) { return 0; }
void tree_sitter_d2_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_identifier(TSLexer *lexer) {
lexer->result_symbol = IDENTIFIER;
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
for (;;) {
switch (lexer->lookahead) {
case '{':
case ':':
case ';':
case '<':
case '.':
case '\n':
case '\0':
return has_content;
}
if (iswspace(lexer->lookahead)) {
advance(lexer);
} else {
break;
}
}
switch (lexer->lookahead) {
case '-':
advance(lexer);
switch (lexer->lookahead) {
case '-':
case '>':
return has_content;
}
break;
default:
advance(lexer);
}
}
}
bool tree_sitter_d2_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
if (valid_symbols[IDENTIFIER]) {
return scan_identifier(lexer);
}
return false;
}

View file

@ -32,6 +32,41 @@ biz<-baz
)
)
==================
Formatted connection
==================
foo -- bar
biz -> baz
biz <-> baz
biz <- baz
---
(source_file
(connection
(identifier)
(arrow)
(identifier)
)
(connection
(identifier)
(arrow)
(identifier)
)
(connection
(identifier)
(arrow)
(identifier)
)
(connection
(identifier)
(arrow)
(identifier)
)
)
=============================
Complex identifier connection
=============================