fix identifier scanner

This commit is contained in:
Dmitriy Pleshevskiy 2022-12-04 03:13:40 +03:00
parent a2a7925e3b
commit a5059fd3de
Signed by: pleshevskiy
GPG key ID: 1B59187B161C0215
8 changed files with 138 additions and 154 deletions

View file

@ -1,8 +1,2 @@
a <- b -- b
a
b
foo: Hello world {
shape: oval
}
hello world- -- foo

3
examples/test.txt Normal file
View file

@ -0,0 +1,3 @@
Foo bar
-Biz-baz-

View file

@ -19,16 +19,9 @@ module.exports = grammar({
$._end
),
shape: ($) =>
prec.left(
seq(
field("id", $.identifier),
optional(seq(":", field("label", $.label))),
$._end
)
),
shape: ($) => seq($.identifier, optional(seq(":", $.label)), $._end),
label: ($) => choice($.string, /[^\n{]+/),
label: ($) => choice($.string, /[^\n;{]+/),
identifier: ($) => $._identifier,

View file

@ -64,50 +64,38 @@
]
},
"shape": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "FIELD",
"name": "id",
"content": {
"type": "SYMBOL",
"name": "identifier"
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ":"
},
{
"type": "SYMBOL",
"name": "label"
}
]
},
{
"type": "BLANK"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": ":"
},
{
"type": "FIELD",
"name": "label",
"content": {
"type": "SYMBOL",
"name": "label"
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "SYMBOL",
"name": "_end"
}
]
}
]
},
{
"type": "SYMBOL",
"name": "_end"
}
]
},
"label": {
"type": "CHOICE",
@ -118,7 +106,7 @@
},
{
"type": "PATTERN",
"value": "[^\\n{]+"
"value": "[^\\n;{]+"
}
]
},

View file

@ -63,27 +63,20 @@
{
"type": "shape",
"named": true,
"fields": {
"id": {
"multiple": false,
"required": true,
"types": [
{
"type": "identifier",
"named": true
}
]
},
"label": {
"multiple": false,
"required": false,
"types": [
{
"type": "label",
"named": true
}
]
}
"fields": {},
"children": {
"multiple": true,
"required": true,
"types": [
{
"type": "identifier",
"named": true
},
{
"type": "label",
"named": true
}
]
}
},
{

View file

@ -12,9 +12,9 @@
#define ALIAS_COUNT 0
#define TOKEN_COUNT 15
#define EXTERNAL_TOKEN_COUNT 1
#define FIELD_COUNT 4
#define FIELD_COUNT 2
#define MAX_ALIAS_SEQUENCE_LENGTH 4
#define PRODUCTION_ID_COUNT 4
#define PRODUCTION_ID_COUNT 2
enum {
anon_sym_SEMI = 1,
@ -217,33 +217,22 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
};
enum {
field_id = 1,
field_label = 2,
field_left = 3,
field_right = 4,
field_left = 1,
field_right = 2,
};
static const char * const ts_field_names[] = {
[0] = NULL,
[field_id] = "id",
[field_label] = "label",
[field_left] = "left",
[field_right] = "right",
};
static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {
[1] = {.index = 0, .length = 1},
[2] = {.index = 1, .length = 2},
[3] = {.index = 3, .length = 2},
[1] = {.index = 0, .length = 2},
};
static const TSFieldMapEntry ts_field_map_entries[] = {
[0] =
{field_id, 0},
[1] =
{field_id, 0},
{field_label, 2},
[3] =
{field_left, 0},
{field_right, 2},
};
@ -326,18 +315,19 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE();
case 2:
if (lookahead == '\n') SKIP(2)
if (lookahead == '"') ADVANCE(30);
if (lookahead == '"') ADVANCE(29);
if (lookahead == '\'') ADVANCE(23);
if (lookahead == '`') ADVANCE(33);
if (lookahead == '`') ADVANCE(32);
if (lookahead == '\t' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(13);
if (lookahead != 0 &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE();
case 3:
if (lookahead == '\n') SKIP(3)
if (lookahead == '`') ADVANCE(32);
if (lookahead == '`') ADVANCE(33);
if (lookahead == '\t' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(25);
@ -346,7 +336,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE();
case 4:
if (lookahead == '\n') SKIP(4)
if (lookahead == '"') ADVANCE(29);
if (lookahead == '"') ADVANCE(30);
if (lookahead == '\t' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(24);
@ -394,20 +384,22 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE();
case 13:
ACCEPT_TOKEN(aux_sym_label_token1);
if (lookahead == '"') ADVANCE(30);
if (lookahead == '"') ADVANCE(29);
if (lookahead == '\'') ADVANCE(23);
if (lookahead == '`') ADVANCE(33);
if (lookahead == '`') ADVANCE(32);
if (lookahead == '\t' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(13);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE();
case 14:
ACCEPT_TOKEN(aux_sym_label_token1);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE();
case 15:
@ -441,11 +433,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
ACCEPT_TOKEN(anon_sym_SQUOTE);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE();
case 24:
ACCEPT_TOKEN(aux_sym_string_token1);
if (lookahead == '"') ADVANCE(29);
if (lookahead == '"') ADVANCE(30);
if (lookahead == '\t' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(24);
@ -455,7 +448,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
END_STATE();
case 25:
ACCEPT_TOKEN(aux_sym_string_token1);
if (lookahead == '`') ADVANCE(32);
if (lookahead == '`') ADVANCE(33);
if (lookahead == '\t' ||
lookahead == '\r' ||
lookahead == ' ') ADVANCE(25);
@ -485,13 +478,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
ACCEPT_TOKEN(anon_sym_DQUOTE);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != '\'') ADVANCE(27);
lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE();
case 30:
ACCEPT_TOKEN(anon_sym_DQUOTE);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != '{') ADVANCE(14);
lookahead != '\'') ADVANCE(27);
END_STATE();
case 31:
ACCEPT_TOKEN(anon_sym_BQUOTE);
@ -500,13 +494,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
ACCEPT_TOKEN(anon_sym_BQUOTE);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != '\'') ADVANCE(27);
lookahead != ';' &&
lookahead != '{') ADVANCE(14);
END_STATE();
case 33:
ACCEPT_TOKEN(anon_sym_BQUOTE);
if (lookahead != 0 &&
lookahead != '\n' &&
lookahead != '{') ADVANCE(14);
lookahead != '\'') ADVANCE(27);
END_STATE();
default:
return false;
@ -926,9 +921,9 @@ static const TSParseActionEntry ts_parse_actions[] = {
[97] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21),
[99] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 2),
[101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 2),
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2, .production_id = 1),
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4, .production_id = 2),
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 3),
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2),
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4),
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 1),
[109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 3),
[111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3),
[113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_label, 1),

View file

@ -15,31 +15,43 @@ static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_identifier(TSLexer *lexer) {
lexer->result_symbol = IDENTIFIER;
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
int32_t next = lexer->lookahead;
if (next == '{' || next == ':' || next == ';' || next == '\n' || next == 0) {
return has_content;
}
// arrows
if (next == '-') {
advance(lexer);
int32_t next_2 = lexer->lookahead;
if (next_2 == '-' || next_2 == '>') {
return has_content;
for (;;) {
switch (lexer->lookahead) {
case '{':
case ':':
case ';':
case '<':
case '\n':
case '\0':
return has_content;
}
if (iswspace(lexer->lookahead)) {
advance(lexer);
} else {
break;
}
} else if (next == '<') {
return has_content;
}
advance(lexer);
switch (lexer->lookahead) {
case '-':
advance(lexer);
switch (lexer->lookahead) {
case '-':
case '>':
return has_content;
}
break;
default:
advance(lexer);
}
}
}

View file

@ -1,12 +1,31 @@
==================
Single shape
Simple shape
==================
foo
bar
---
(source_file (shape (identifier)))
(source_file
(shape (identifier))
(shape (identifier))
)
==================
Complex identifier
==================
Foo bar
-Biz-baz-
---
(source_file
(shape (identifier))
(shape (identifier))
)
==================
Inline shapes
@ -21,36 +40,23 @@ a;b;c
(identifier))
(shape
(identifier))
(shape
(identifier)))
==================
Number identifier
==================
1;1a;b2
---
(source_file
(shape
(identifier))
(shape
(identifier))
(shape
(identifier)))
)
==================
Aliased shapes
==================
a: Hello World
a: Foo Bar
a: Foo Bar; b: Biz Baz
---
(source_file
(shape
id: (identifier)
label: (label)))
(shape (identifier) (label))
(shape (identifier) (label))
(shape (identifier) (label))
)