rewrite identifier as regular scanner

This commit is contained in:
Dmitriy Pleshevskiy 2022-12-04 18:35:37 +03:00
parent 8fcd012702
commit fc5a0fff47
Signed by: pleshevskiy
GPG key ID: 1B59187B161C0215
8 changed files with 2058 additions and 883 deletions

View file

@ -9,7 +9,6 @@
"sources": [ "sources": [
"bindings/node/binding.cc", "bindings/node/binding.cc",
"src/parser.c", "src/parser.c",
"src/scanner.c",
# If your language uses an external scanner, add it here. # If your language uses an external scanner, add it here.
], ],
"cflags_c": [ "cflags_c": [

View file

@ -1,3 +1,3 @@
Foo bar Foo Bar -- Biz Baz
-Biz-baz- -Bar-Foo- <- -Baz-Biz-

View file

@ -1,8 +1,6 @@
module.exports = grammar({ module.exports = grammar({
name: "d2", name: "d2",
externals: ($) => [$._identifier],
rules: { rules: {
// TODO: add the actual grammar rules // TODO: add the actual grammar rules
source_file: ($) => repeat($._definition), source_file: ($) => repeat($._definition),
@ -35,12 +33,32 @@ module.exports = grammar({
identifier: ($) => $._identifier, identifier: ($) => $._identifier,
_identifier: ($) =>
prec.right(
seq(
repeat(" "),
optional($._dash),
choice(
$._word,
repeat1(seq($._word, choice(repeat(" "), $._dash), $._word))
),
optional($._dash),
repeat(" ")
)
),
_dash: ($) => token.immediate("-"),
_word: ($) => /[\w\d]+/,
arrow: ($) => arrow: ($) =>
choice( prec.left(
seq("--", repeat("-")), choice(
seq("<-", repeat("-")), seq("--", repeat($._dash)),
seq("<-", repeat("-"), ">"), seq("<-", repeat($._dash)),
seq(repeat("-"), "->") seq("<-", repeat($._dash), ">"),
seq(repeat($._dash), "->")
)
), ),
_unquoted_string: ($) => /[^\n;{]+/, _unquoted_string: ($) => /[^\n;{]+/,

View file

@ -170,78 +170,181 @@
"type": "SYMBOL", "type": "SYMBOL",
"name": "_identifier" "name": "_identifier"
}, },
"_identifier": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": " "
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_dash"
},
{
"type": "BLANK"
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_word"
},
{
"type": "REPEAT1",
"content": {
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_word"
},
{
"type": "CHOICE",
"members": [
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": " "
}
},
{
"type": "SYMBOL",
"name": "_dash"
}
]
},
{
"type": "SYMBOL",
"name": "_word"
}
]
}
}
]
},
{
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_dash"
},
{
"type": "BLANK"
}
]
},
{
"type": "REPEAT",
"content": {
"type": "STRING",
"value": " "
}
}
]
}
},
"_dash": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "-"
}
},
"_word": {
"type": "PATTERN",
"value": "[\\w\\d]+"
},
"arrow": { "arrow": {
"type": "CHOICE", "type": "PREC_LEFT",
"members": [ "value": 0,
{ "content": {
"type": "SEQ", "type": "CHOICE",
"members": [ "members": [
{ {
"type": "STRING", "type": "SEQ",
"value": "--" "members": [
}, {
{
"type": "REPEAT",
"content": {
"type": "STRING", "type": "STRING",
"value": "-" "value": "--"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_dash"
}
} }
} ]
] },
}, {
{ "type": "SEQ",
"type": "SEQ", "members": [
"members": [ {
{
"type": "STRING",
"value": "<-"
},
{
"type": "REPEAT",
"content": {
"type": "STRING", "type": "STRING",
"value": "-" "value": "<-"
},
{
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_dash"
}
} }
} ]
] },
}, {
{ "type": "SEQ",
"type": "SEQ", "members": [
"members": [ {
{
"type": "STRING",
"value": "<-"
},
{
"type": "REPEAT",
"content": {
"type": "STRING", "type": "STRING",
"value": "-" "value": "<-"
} },
}, {
{ "type": "REPEAT",
"type": "STRING", "content": {
"value": ">" "type": "SYMBOL",
} "name": "_dash"
] }
}, },
{ {
"type": "SEQ",
"members": [
{
"type": "REPEAT",
"content": {
"type": "STRING", "type": "STRING",
"value": "-" "value": ">"
} }
}, ]
{ },
"type": "STRING", {
"value": "->" "type": "SEQ",
} "members": [
] {
} "type": "REPEAT",
] "content": {
"type": "SYMBOL",
"name": "_dash"
}
},
{
"type": "STRING",
"value": "->"
}
]
}
]
}
}, },
"_unquoted_string": { "_unquoted_string": {
"type": "PATTERN", "type": "PATTERN",
@ -330,12 +433,7 @@
], ],
"conflicts": [], "conflicts": [],
"precedences": [], "precedences": [],
"externals": [ "externals": [],
{
"type": "SYMBOL",
"name": "_identifier"
}
],
"inline": [], "inline": [],
"supertypes": [] "supertypes": []
} }

View file

@ -98,6 +98,10 @@
"type": "\n", "type": "\n",
"named": false "named": false
}, },
{
"type": " ",
"named": false
},
{ {
"type": "\"", "type": "\"",
"named": false "named": false
@ -106,10 +110,6 @@
"type": "'", "type": "'",
"named": false "named": false
}, },
{
"type": "-",
"named": false
},
{ {
"type": "--", "type": "--",
"named": false "named": false

File diff suppressed because it is too large Load diff

View file

@ -1,71 +0,0 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
IDENTIFIER
};
void *tree_sitter_d2_external_scanner_create() { return NULL; }
void tree_sitter_d2_external_scanner_destroy(void *payload) { }
unsigned tree_sitter_d2_external_scanner_serialize(void *p, char *buffer) { return 0; }
void tree_sitter_d2_external_scanner_deserialize(void *p, const char *b, unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_identifier(TSLexer *lexer) {
lexer->result_symbol = IDENTIFIER;
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
for (;;) {
switch (lexer->lookahead) {
case '{':
case ':':
case ';':
case '<':
case '.':
case '\n':
case '\0':
return has_content;
}
if (iswspace(lexer->lookahead)) {
advance(lexer);
} else {
break;
}
}
switch (lexer->lookahead) {
case '-':
advance(lexer);
switch (lexer->lookahead) {
case '-':
case '>':
return has_content;
}
break;
default:
advance(lexer);
}
}
}
bool tree_sitter_d2_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *valid_symbols
) {
if (valid_symbols[IDENTIFIER]) {
return scan_identifier(lexer);
}
return false;
}

View file

@ -32,6 +32,41 @@ biz<-baz
) )
) )
==================
Formatted connection
==================
foo -- bar
biz -> baz
biz <-> baz
biz <- baz
---
(source_file
(connection
(identifier)
(arrow)
(identifier)
)
(connection
(identifier)
(arrow)
(identifier)
)
(connection
(identifier)
(arrow)
(identifier)
)
(connection
(identifier)
(arrow)
(identifier)
)
)
============================= =============================
Complex identifier connection Complex identifier connection
============================= =============================