fix identifier scanner
This commit is contained in:
parent
a2a7925e3b
commit
a5059fd3de
8 changed files with 138 additions and 154 deletions
|
@ -1,8 +1,2 @@
|
|||
a <- b -- b
|
||||
a
|
||||
b
|
||||
|
||||
foo: Hello world {
|
||||
shape: oval
|
||||
}
|
||||
|
||||
hello world- -- foo
|
||||
|
|
3
examples/test.txt
Normal file
3
examples/test.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
Foo bar
|
||||
-Biz-baz-
|
||||
|
11
grammar.js
11
grammar.js
|
@ -19,16 +19,9 @@ module.exports = grammar({
|
|||
$._end
|
||||
),
|
||||
|
||||
shape: ($) =>
|
||||
prec.left(
|
||||
seq(
|
||||
field("id", $.identifier),
|
||||
optional(seq(":", field("label", $.label))),
|
||||
$._end
|
||||
)
|
||||
),
|
||||
shape: ($) => seq($.identifier, optional(seq(":", $.label)), $._end),
|
||||
|
||||
label: ($) => choice($.string, /[^\n{]+/),
|
||||
label: ($) => choice($.string, /[^\n;{]+/),
|
||||
|
||||
identifier: ($) => $._identifier,
|
||||
|
||||
|
|
|
@ -64,50 +64,38 @@
|
|||
]
|
||||
},
|
||||
"shape": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "id",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ":"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "label"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": ":"
|
||||
},
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "label",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "label"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_end"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "_end"
|
||||
}
|
||||
]
|
||||
},
|
||||
"label": {
|
||||
"type": "CHOICE",
|
||||
|
@ -118,7 +106,7 @@
|
|||
},
|
||||
{
|
||||
"type": "PATTERN",
|
||||
"value": "[^\\n{]+"
|
||||
"value": "[^\\n;{]+"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
|
@ -63,27 +63,20 @@
|
|||
{
|
||||
"type": "shape",
|
||||
"named": true,
|
||||
"fields": {
|
||||
"id": {
|
||||
"multiple": false,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "identifier",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"label": {
|
||||
"multiple": false,
|
||||
"required": false,
|
||||
"types": [
|
||||
{
|
||||
"type": "label",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
"fields": {},
|
||||
"children": {
|
||||
"multiple": true,
|
||||
"required": true,
|
||||
"types": [
|
||||
{
|
||||
"type": "identifier",
|
||||
"named": true
|
||||
},
|
||||
{
|
||||
"type": "label",
|
||||
"named": true
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
|
|
57
src/parser.c
57
src/parser.c
|
@ -12,9 +12,9 @@
|
|||
#define ALIAS_COUNT 0
|
||||
#define TOKEN_COUNT 15
|
||||
#define EXTERNAL_TOKEN_COUNT 1
|
||||
#define FIELD_COUNT 4
|
||||
#define FIELD_COUNT 2
|
||||
#define MAX_ALIAS_SEQUENCE_LENGTH 4
|
||||
#define PRODUCTION_ID_COUNT 4
|
||||
#define PRODUCTION_ID_COUNT 2
|
||||
|
||||
enum {
|
||||
anon_sym_SEMI = 1,
|
||||
|
@ -217,33 +217,22 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
|
|||
};
|
||||
|
||||
enum {
|
||||
field_id = 1,
|
||||
field_label = 2,
|
||||
field_left = 3,
|
||||
field_right = 4,
|
||||
field_left = 1,
|
||||
field_right = 2,
|
||||
};
|
||||
|
||||
static const char * const ts_field_names[] = {
|
||||
[0] = NULL,
|
||||
[field_id] = "id",
|
||||
[field_label] = "label",
|
||||
[field_left] = "left",
|
||||
[field_right] = "right",
|
||||
};
|
||||
|
||||
static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {
|
||||
[1] = {.index = 0, .length = 1},
|
||||
[2] = {.index = 1, .length = 2},
|
||||
[3] = {.index = 3, .length = 2},
|
||||
[1] = {.index = 0, .length = 2},
|
||||
};
|
||||
|
||||
static const TSFieldMapEntry ts_field_map_entries[] = {
|
||||
[0] =
|
||||
{field_id, 0},
|
||||
[1] =
|
||||
{field_id, 0},
|
||||
{field_label, 2},
|
||||
[3] =
|
||||
{field_left, 0},
|
||||
{field_right, 2},
|
||||
};
|
||||
|
@ -326,18 +315,19 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
END_STATE();
|
||||
case 2:
|
||||
if (lookahead == '\n') SKIP(2)
|
||||
if (lookahead == '"') ADVANCE(30);
|
||||
if (lookahead == '"') ADVANCE(29);
|
||||
if (lookahead == '\'') ADVANCE(23);
|
||||
if (lookahead == '`') ADVANCE(33);
|
||||
if (lookahead == '`') ADVANCE(32);
|
||||
if (lookahead == '\t' ||
|
||||
lookahead == '\r' ||
|
||||
lookahead == ' ') ADVANCE(13);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != ';' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
END_STATE();
|
||||
case 3:
|
||||
if (lookahead == '\n') SKIP(3)
|
||||
if (lookahead == '`') ADVANCE(32);
|
||||
if (lookahead == '`') ADVANCE(33);
|
||||
if (lookahead == '\t' ||
|
||||
lookahead == '\r' ||
|
||||
lookahead == ' ') ADVANCE(25);
|
||||
|
@ -346,7 +336,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
END_STATE();
|
||||
case 4:
|
||||
if (lookahead == '\n') SKIP(4)
|
||||
if (lookahead == '"') ADVANCE(29);
|
||||
if (lookahead == '"') ADVANCE(30);
|
||||
if (lookahead == '\t' ||
|
||||
lookahead == '\r' ||
|
||||
lookahead == ' ') ADVANCE(24);
|
||||
|
@ -394,20 +384,22 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
END_STATE();
|
||||
case 13:
|
||||
ACCEPT_TOKEN(aux_sym_label_token1);
|
||||
if (lookahead == '"') ADVANCE(30);
|
||||
if (lookahead == '"') ADVANCE(29);
|
||||
if (lookahead == '\'') ADVANCE(23);
|
||||
if (lookahead == '`') ADVANCE(33);
|
||||
if (lookahead == '`') ADVANCE(32);
|
||||
if (lookahead == '\t' ||
|
||||
lookahead == '\r' ||
|
||||
lookahead == ' ') ADVANCE(13);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != ';' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
END_STATE();
|
||||
case 14:
|
||||
ACCEPT_TOKEN(aux_sym_label_token1);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != ';' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
END_STATE();
|
||||
case 15:
|
||||
|
@ -441,11 +433,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
ACCEPT_TOKEN(anon_sym_SQUOTE);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != ';' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
END_STATE();
|
||||
case 24:
|
||||
ACCEPT_TOKEN(aux_sym_string_token1);
|
||||
if (lookahead == '"') ADVANCE(29);
|
||||
if (lookahead == '"') ADVANCE(30);
|
||||
if (lookahead == '\t' ||
|
||||
lookahead == '\r' ||
|
||||
lookahead == ' ') ADVANCE(24);
|
||||
|
@ -455,7 +448,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
END_STATE();
|
||||
case 25:
|
||||
ACCEPT_TOKEN(aux_sym_string_token1);
|
||||
if (lookahead == '`') ADVANCE(32);
|
||||
if (lookahead == '`') ADVANCE(33);
|
||||
if (lookahead == '\t' ||
|
||||
lookahead == '\r' ||
|
||||
lookahead == ' ') ADVANCE(25);
|
||||
|
@ -485,13 +478,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
ACCEPT_TOKEN(anon_sym_DQUOTE);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != '\'') ADVANCE(27);
|
||||
lookahead != ';' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
END_STATE();
|
||||
case 30:
|
||||
ACCEPT_TOKEN(anon_sym_DQUOTE);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
lookahead != '\'') ADVANCE(27);
|
||||
END_STATE();
|
||||
case 31:
|
||||
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
||||
|
@ -500,13 +494,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
|||
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != '\'') ADVANCE(27);
|
||||
lookahead != ';' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
END_STATE();
|
||||
case 33:
|
||||
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
||||
if (lookahead != 0 &&
|
||||
lookahead != '\n' &&
|
||||
lookahead != '{') ADVANCE(14);
|
||||
lookahead != '\'') ADVANCE(27);
|
||||
END_STATE();
|
||||
default:
|
||||
return false;
|
||||
|
@ -926,9 +921,9 @@ static const TSParseActionEntry ts_parse_actions[] = {
|
|||
[97] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21),
|
||||
[99] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 2),
|
||||
[101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 2),
|
||||
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2, .production_id = 1),
|
||||
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4, .production_id = 2),
|
||||
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 3),
|
||||
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2),
|
||||
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4),
|
||||
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 1),
|
||||
[109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 3),
|
||||
[111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3),
|
||||
[113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_label, 1),
|
||||
|
|
|
@ -15,31 +15,43 @@ static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
|||
|
||||
static bool scan_identifier(TSLexer *lexer) {
|
||||
lexer->result_symbol = IDENTIFIER;
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
for (bool has_content = false;; has_content = true) {
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
int32_t next = lexer->lookahead;
|
||||
|
||||
if (next == '{' || next == ':' || next == ';' || next == '\n' || next == 0) {
|
||||
return has_content;
|
||||
}
|
||||
|
||||
// arrows
|
||||
if (next == '-') {
|
||||
advance(lexer);
|
||||
int32_t next_2 = lexer->lookahead;
|
||||
if (next_2 == '-' || next_2 == '>') {
|
||||
return has_content;
|
||||
for (;;) {
|
||||
switch (lexer->lookahead) {
|
||||
case '{':
|
||||
case ':':
|
||||
case ';':
|
||||
case '<':
|
||||
case '\n':
|
||||
case '\0':
|
||||
return has_content;
|
||||
}
|
||||
|
||||
if (iswspace(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (next == '<') {
|
||||
return has_content;
|
||||
}
|
||||
|
||||
advance(lexer);
|
||||
switch (lexer->lookahead) {
|
||||
case '-':
|
||||
advance(lexer);
|
||||
switch (lexer->lookahead) {
|
||||
case '-':
|
||||
case '>':
|
||||
return has_content;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,12 +1,31 @@
|
|||
==================
|
||||
Single shape
|
||||
Simple shape
|
||||
==================
|
||||
|
||||
foo
|
||||
bar
|
||||
|
||||
---
|
||||
|
||||
(source_file (shape (identifier)))
|
||||
(source_file
|
||||
(shape (identifier))
|
||||
(shape (identifier))
|
||||
)
|
||||
|
||||
==================
|
||||
Complex identifier
|
||||
==================
|
||||
|
||||
Foo bar
|
||||
-Biz-baz-
|
||||
|
||||
---
|
||||
|
||||
(source_file
|
||||
(shape (identifier))
|
||||
(shape (identifier))
|
||||
)
|
||||
|
||||
|
||||
==================
|
||||
Inline shapes
|
||||
|
@ -21,36 +40,23 @@ a;b;c
|
|||
(identifier))
|
||||
(shape
|
||||
(identifier))
|
||||
(shape
|
||||
(identifier)))
|
||||
|
||||
==================
|
||||
Number identifier
|
||||
==================
|
||||
|
||||
1;1a;b2
|
||||
|
||||
---
|
||||
|
||||
(source_file
|
||||
(shape
|
||||
(identifier))
|
||||
(shape
|
||||
(identifier))
|
||||
(shape
|
||||
(identifier)))
|
||||
|
||||
)
|
||||
|
||||
==================
|
||||
Aliased shapes
|
||||
==================
|
||||
|
||||
a: Hello World
|
||||
a: Foo Bar
|
||||
|
||||
a: Foo Bar; b: Biz Baz
|
||||
|
||||
---
|
||||
|
||||
(source_file
|
||||
(shape
|
||||
id: (identifier)
|
||||
label: (label)))
|
||||
(shape (identifier) (label))
|
||||
(shape (identifier) (label))
|
||||
(shape (identifier) (label))
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in a new issue