fix identifier scanner

This commit is contained in:
Dmitriy Pleshevskiy 2022-12-04 03:13:40 +03:00
parent a2a7925e3b
commit a5059fd3de
Signed by: pleshevskiy
GPG key ID: 1B59187B161C0215
8 changed files with 138 additions and 154 deletions

View file

@ -1,8 +1,2 @@
a <- b -- b a
b b
foo: Hello world {
shape: oval
}
hello world- -- foo

3
examples/test.txt Normal file
View file

@ -0,0 +1,3 @@
Foo bar
-Biz-baz-

View file

@ -19,16 +19,9 @@ module.exports = grammar({
$._end $._end
), ),
shape: ($) => shape: ($) => seq($.identifier, optional(seq(":", $.label)), $._end),
prec.left(
seq(
field("id", $.identifier),
optional(seq(":", field("label", $.label))),
$._end
)
),
label: ($) => choice($.string, /[^\n{]+/), label: ($) => choice($.string, /[^\n;{]+/),
identifier: ($) => $._identifier, identifier: ($) => $._identifier,

View file

@ -64,50 +64,38 @@
] ]
}, },
"shape": { "shape": {
"type": "PREC_LEFT", "type": "SEQ",
"value": 0, "members": [
"content": { {
"type": "SEQ", "type": "SYMBOL",
"members": [ "name": "identifier"
{ },
"type": "FIELD", {
"name": "id", "type": "CHOICE",
"content": { "members": [
"type": "SYMBOL", {
"name": "identifier" "type": "SEQ",
"members": [
{
"type": "STRING",
"value": ":"
},
{
"type": "SYMBOL",
"name": "label"
}
]
},
{
"type": "BLANK"
} }
}, ]
{ },
"type": "CHOICE", {
"members": [ "type": "SYMBOL",
{ "name": "_end"
"type": "SEQ", }
"members": [ ]
{
"type": "STRING",
"value": ":"
},
{
"type": "FIELD",
"name": "label",
"content": {
"type": "SYMBOL",
"name": "label"
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_end"
}
]
}
}, },
"label": { "label": {
"type": "CHOICE", "type": "CHOICE",
@ -118,7 +106,7 @@
}, },
{ {
"type": "PATTERN", "type": "PATTERN",
"value": "[^\\n{]+" "value": "[^\\n;{]+"
} }
] ]
}, },

View file

@ -63,27 +63,20 @@
{ {
"type": "shape", "type": "shape",
"named": true, "named": true,
"fields": { "fields": {},
"id": { "children": {
"multiple": false, "multiple": true,
"required": true, "required": true,
"types": [ "types": [
{ {
"type": "identifier", "type": "identifier",
"named": true "named": true
} },
] {
}, "type": "label",
"label": { "named": true
"multiple": false, }
"required": false, ]
"types": [
{
"type": "label",
"named": true
}
]
}
} }
}, },
{ {

View file

@ -12,9 +12,9 @@
#define ALIAS_COUNT 0 #define ALIAS_COUNT 0
#define TOKEN_COUNT 15 #define TOKEN_COUNT 15
#define EXTERNAL_TOKEN_COUNT 1 #define EXTERNAL_TOKEN_COUNT 1
#define FIELD_COUNT 4 #define FIELD_COUNT 2
#define MAX_ALIAS_SEQUENCE_LENGTH 4 #define MAX_ALIAS_SEQUENCE_LENGTH 4
#define PRODUCTION_ID_COUNT 4 #define PRODUCTION_ID_COUNT 2
enum { enum {
anon_sym_SEMI = 1, anon_sym_SEMI = 1,
@ -217,33 +217,22 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
}; };
enum { enum {
field_id = 1, field_left = 1,
field_label = 2, field_right = 2,
field_left = 3,
field_right = 4,
}; };
static const char * const ts_field_names[] = { static const char * const ts_field_names[] = {
[0] = NULL, [0] = NULL,
[field_id] = "id",
[field_label] = "label",
[field_left] = "left", [field_left] = "left",
[field_right] = "right", [field_right] = "right",
}; };
static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = { static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {
[1] = {.index = 0, .length = 1}, [1] = {.index = 0, .length = 2},
[2] = {.index = 1, .length = 2},
[3] = {.index = 3, .length = 2},
}; };
static const TSFieldMapEntry ts_field_map_entries[] = { static const TSFieldMapEntry ts_field_map_entries[] = {
[0] = [0] =
{field_id, 0},
[1] =
{field_id, 0},
{field_label, 2},
[3] =
{field_left, 0}, {field_left, 0},
{field_right, 2}, {field_right, 2},
}; };
@ -326,18 +315,19 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE(); END_STATE();
case 2: case 2:
if (lookahead == '\n') SKIP(2) if (lookahead == '\n') SKIP(2)
if (lookahead == '"') ADVANCE(30); if (lookahead == '"') ADVANCE(29);
if (lookahead == '\'') ADVANCE(23); if (lookahead == '\'') ADVANCE(23);
if (lookahead == '`') ADVANCE(33); if (lookahead == '`') ADVANCE(32);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') ADVANCE(13); lookahead == ' ') ADVANCE(13);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14); lookahead != '{') ADVANCE(14);
END_STATE(); END_STATE();
case 3: case 3:
if (lookahead == '\n') SKIP(3) if (lookahead == '\n') SKIP(3)
if (lookahead == '`') ADVANCE(32); if (lookahead == '`') ADVANCE(33);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') ADVANCE(25); lookahead == ' ') ADVANCE(25);
@ -346,7 +336,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE(); END_STATE();
case 4: case 4:
if (lookahead == '\n') SKIP(4) if (lookahead == '\n') SKIP(4)
if (lookahead == '"') ADVANCE(29); if (lookahead == '"') ADVANCE(30);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') ADVANCE(24); lookahead == ' ') ADVANCE(24);
@ -394,20 +384,22 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE(); END_STATE();
case 13: case 13:
ACCEPT_TOKEN(aux_sym_label_token1); ACCEPT_TOKEN(aux_sym_label_token1);
if (lookahead == '"') ADVANCE(30); if (lookahead == '"') ADVANCE(29);
if (lookahead == '\'') ADVANCE(23); if (lookahead == '\'') ADVANCE(23);
if (lookahead == '`') ADVANCE(33); if (lookahead == '`') ADVANCE(32);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') ADVANCE(13); lookahead == ' ') ADVANCE(13);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14); lookahead != '{') ADVANCE(14);
END_STATE(); END_STATE();
case 14: case 14:
ACCEPT_TOKEN(aux_sym_label_token1); ACCEPT_TOKEN(aux_sym_label_token1);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14); lookahead != '{') ADVANCE(14);
END_STATE(); END_STATE();
case 15: case 15:
@ -441,11 +433,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
ACCEPT_TOKEN(anon_sym_SQUOTE); ACCEPT_TOKEN(anon_sym_SQUOTE);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14); lookahead != '{') ADVANCE(14);
END_STATE(); END_STATE();
case 24: case 24:
ACCEPT_TOKEN(aux_sym_string_token1); ACCEPT_TOKEN(aux_sym_string_token1);
if (lookahead == '"') ADVANCE(29); if (lookahead == '"') ADVANCE(30);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') ADVANCE(24); lookahead == ' ') ADVANCE(24);
@ -455,7 +448,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE(); END_STATE();
case 25: case 25:
ACCEPT_TOKEN(aux_sym_string_token1); ACCEPT_TOKEN(aux_sym_string_token1);
if (lookahead == '`') ADVANCE(32); if (lookahead == '`') ADVANCE(33);
if (lookahead == '\t' || if (lookahead == '\t' ||
lookahead == '\r' || lookahead == '\r' ||
lookahead == ' ') ADVANCE(25); lookahead == ' ') ADVANCE(25);
@ -485,13 +478,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
ACCEPT_TOKEN(anon_sym_DQUOTE); ACCEPT_TOKEN(anon_sym_DQUOTE);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != '\'') ADVANCE(27); lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE(); END_STATE();
case 30: case 30:
ACCEPT_TOKEN(anon_sym_DQUOTE); ACCEPT_TOKEN(anon_sym_DQUOTE);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != '{') ADVANCE(14); lookahead != '\'') ADVANCE(27);
END_STATE(); END_STATE();
case 31: case 31:
ACCEPT_TOKEN(anon_sym_BQUOTE); ACCEPT_TOKEN(anon_sym_BQUOTE);
@ -500,13 +494,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
ACCEPT_TOKEN(anon_sym_BQUOTE); ACCEPT_TOKEN(anon_sym_BQUOTE);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != '\'') ADVANCE(27); lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE(); END_STATE();
case 33: case 33:
ACCEPT_TOKEN(anon_sym_BQUOTE); ACCEPT_TOKEN(anon_sym_BQUOTE);
if (lookahead != 0 && if (lookahead != 0 &&
lookahead != '\n' && lookahead != '\n' &&
lookahead != '{') ADVANCE(14); lookahead != '\'') ADVANCE(27);
END_STATE(); END_STATE();
default: default:
return false; return false;
@ -926,9 +921,9 @@ static const TSParseActionEntry ts_parse_actions[] = {
[97] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21), [97] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21),
[99] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 2), [99] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 2),
[101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 2), [101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 2),
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2, .production_id = 1), [103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2),
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4, .production_id = 2), [105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4),
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 3), [107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 1),
[109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 3), [109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 3),
[111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3), [111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3),
[113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_label, 1), [113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_label, 1),

View file

@ -15,31 +15,43 @@ static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_identifier(TSLexer *lexer) { static bool scan_identifier(TSLexer *lexer) {
lexer->result_symbol = IDENTIFIER; lexer->result_symbol = IDENTIFIER;
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
for (bool has_content = false;; has_content = true) { for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer); lexer->mark_end(lexer);
while (iswspace(lexer->lookahead)) { for (;;) {
skip(lexer); switch (lexer->lookahead) {
} case '{':
case ':':
int32_t next = lexer->lookahead; case ';':
case '<':
if (next == '{' || next == ':' || next == ';' || next == '\n' || next == 0) { case '\n':
return has_content; case '\0':
} return has_content;
}
// arrows
if (next == '-') { if (iswspace(lexer->lookahead)) {
advance(lexer); advance(lexer);
int32_t next_2 = lexer->lookahead; } else {
if (next_2 == '-' || next_2 == '>') { break;
return has_content;
} }
} else if (next == '<') {
return has_content;
} }
advance(lexer); switch (lexer->lookahead) {
case '-':
advance(lexer);
switch (lexer->lookahead) {
case '-':
case '>':
return has_content;
}
break;
default:
advance(lexer);
}
} }
} }

View file

@ -1,12 +1,31 @@
================== ==================
Single shape Simple shape
================== ==================
foo foo
bar
--- ---
(source_file (shape (identifier))) (source_file
(shape (identifier))
(shape (identifier))
)
==================
Complex identifier
==================
Foo bar
-Biz-baz-
---
(source_file
(shape (identifier))
(shape (identifier))
)
================== ==================
Inline shapes Inline shapes
@ -21,36 +40,23 @@ a;b;c
(identifier)) (identifier))
(shape (shape
(identifier)) (identifier))
(shape
(identifier)))
==================
Number identifier
==================
1;1a;b2
---
(source_file
(shape (shape
(identifier)) (identifier))
(shape )
(identifier))
(shape
(identifier)))
================== ==================
Aliased shapes Aliased shapes
================== ==================
a: Hello World a: Foo Bar
a: Foo Bar; b: Biz Baz
--- ---
(source_file (source_file
(shape (shape (identifier) (label))
id: (identifier) (shape (identifier) (label))
label: (label))) (shape (identifier) (label))
)