fix identifier scanner
This commit is contained in:
parent
a2a7925e3b
commit
a5059fd3de
8 changed files with 138 additions and 154 deletions
|
@ -1,8 +1,2 @@
|
||||||
a <- b -- b
|
a
|
||||||
b
|
b
|
||||||
|
|
||||||
foo: Hello world {
|
|
||||||
shape: oval
|
|
||||||
}
|
|
||||||
|
|
||||||
hello world- -- foo
|
|
||||||
|
|
3
examples/test.txt
Normal file
3
examples/test.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
Foo bar
|
||||||
|
-Biz-baz-
|
||||||
|
|
11
grammar.js
11
grammar.js
|
@ -19,16 +19,9 @@ module.exports = grammar({
|
||||||
$._end
|
$._end
|
||||||
),
|
),
|
||||||
|
|
||||||
shape: ($) =>
|
shape: ($) => seq($.identifier, optional(seq(":", $.label)), $._end),
|
||||||
prec.left(
|
|
||||||
seq(
|
|
||||||
field("id", $.identifier),
|
|
||||||
optional(seq(":", field("label", $.label))),
|
|
||||||
$._end
|
|
||||||
)
|
|
||||||
),
|
|
||||||
|
|
||||||
label: ($) => choice($.string, /[^\n{]+/),
|
label: ($) => choice($.string, /[^\n;{]+/),
|
||||||
|
|
||||||
identifier: ($) => $._identifier,
|
identifier: ($) => $._identifier,
|
||||||
|
|
||||||
|
|
|
@ -64,18 +64,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"shape": {
|
"shape": {
|
||||||
"type": "PREC_LEFT",
|
|
||||||
"value": 0,
|
|
||||||
"content": {
|
|
||||||
"type": "SEQ",
|
"type": "SEQ",
|
||||||
"members": [
|
"members": [
|
||||||
{
|
{
|
||||||
"type": "FIELD",
|
|
||||||
"name": "id",
|
|
||||||
"content": {
|
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "identifier"
|
"name": "identifier"
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
|
@ -88,13 +81,9 @@
|
||||||
"value": ":"
|
"value": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "FIELD",
|
|
||||||
"name": "label",
|
|
||||||
"content": {
|
|
||||||
"type": "SYMBOL",
|
"type": "SYMBOL",
|
||||||
"name": "label"
|
"name": "label"
|
||||||
}
|
}
|
||||||
}
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -107,7 +96,6 @@
|
||||||
"name": "_end"
|
"name": "_end"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"label": {
|
"label": {
|
||||||
"type": "CHOICE",
|
"type": "CHOICE",
|
||||||
|
@ -118,7 +106,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "PATTERN",
|
"type": "PATTERN",
|
||||||
"value": "[^\\n{]+"
|
"value": "[^\\n;{]+"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -63,28 +63,21 @@
|
||||||
{
|
{
|
||||||
"type": "shape",
|
"type": "shape",
|
||||||
"named": true,
|
"named": true,
|
||||||
"fields": {
|
"fields": {},
|
||||||
"id": {
|
"children": {
|
||||||
"multiple": false,
|
"multiple": true,
|
||||||
"required": true,
|
"required": true,
|
||||||
"types": [
|
"types": [
|
||||||
{
|
{
|
||||||
"type": "identifier",
|
"type": "identifier",
|
||||||
"named": true
|
"named": true
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"label": {
|
|
||||||
"multiple": false,
|
|
||||||
"required": false,
|
|
||||||
"types": [
|
|
||||||
{
|
{
|
||||||
"type": "label",
|
"type": "label",
|
||||||
"named": true
|
"named": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "source_file",
|
"type": "source_file",
|
||||||
|
|
57
src/parser.c
57
src/parser.c
|
@ -12,9 +12,9 @@
|
||||||
#define ALIAS_COUNT 0
|
#define ALIAS_COUNT 0
|
||||||
#define TOKEN_COUNT 15
|
#define TOKEN_COUNT 15
|
||||||
#define EXTERNAL_TOKEN_COUNT 1
|
#define EXTERNAL_TOKEN_COUNT 1
|
||||||
#define FIELD_COUNT 4
|
#define FIELD_COUNT 2
|
||||||
#define MAX_ALIAS_SEQUENCE_LENGTH 4
|
#define MAX_ALIAS_SEQUENCE_LENGTH 4
|
||||||
#define PRODUCTION_ID_COUNT 4
|
#define PRODUCTION_ID_COUNT 2
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
anon_sym_SEMI = 1,
|
anon_sym_SEMI = 1,
|
||||||
|
@ -217,33 +217,22 @@ static const TSSymbolMetadata ts_symbol_metadata[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
field_id = 1,
|
field_left = 1,
|
||||||
field_label = 2,
|
field_right = 2,
|
||||||
field_left = 3,
|
|
||||||
field_right = 4,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * const ts_field_names[] = {
|
static const char * const ts_field_names[] = {
|
||||||
[0] = NULL,
|
[0] = NULL,
|
||||||
[field_id] = "id",
|
|
||||||
[field_label] = "label",
|
|
||||||
[field_left] = "left",
|
[field_left] = "left",
|
||||||
[field_right] = "right",
|
[field_right] = "right",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {
|
static const TSFieldMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {
|
||||||
[1] = {.index = 0, .length = 1},
|
[1] = {.index = 0, .length = 2},
|
||||||
[2] = {.index = 1, .length = 2},
|
|
||||||
[3] = {.index = 3, .length = 2},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const TSFieldMapEntry ts_field_map_entries[] = {
|
static const TSFieldMapEntry ts_field_map_entries[] = {
|
||||||
[0] =
|
[0] =
|
||||||
{field_id, 0},
|
|
||||||
[1] =
|
|
||||||
{field_id, 0},
|
|
||||||
{field_label, 2},
|
|
||||||
[3] =
|
|
||||||
{field_left, 0},
|
{field_left, 0},
|
||||||
{field_right, 2},
|
{field_right, 2},
|
||||||
};
|
};
|
||||||
|
@ -326,18 +315,19 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 2:
|
case 2:
|
||||||
if (lookahead == '\n') SKIP(2)
|
if (lookahead == '\n') SKIP(2)
|
||||||
if (lookahead == '"') ADVANCE(30);
|
if (lookahead == '"') ADVANCE(29);
|
||||||
if (lookahead == '\'') ADVANCE(23);
|
if (lookahead == '\'') ADVANCE(23);
|
||||||
if (lookahead == '`') ADVANCE(33);
|
if (lookahead == '`') ADVANCE(32);
|
||||||
if (lookahead == '\t' ||
|
if (lookahead == '\t' ||
|
||||||
lookahead == '\r' ||
|
lookahead == '\r' ||
|
||||||
lookahead == ' ') ADVANCE(13);
|
lookahead == ' ') ADVANCE(13);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
|
lookahead != ';' &&
|
||||||
lookahead != '{') ADVANCE(14);
|
lookahead != '{') ADVANCE(14);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 3:
|
case 3:
|
||||||
if (lookahead == '\n') SKIP(3)
|
if (lookahead == '\n') SKIP(3)
|
||||||
if (lookahead == '`') ADVANCE(32);
|
if (lookahead == '`') ADVANCE(33);
|
||||||
if (lookahead == '\t' ||
|
if (lookahead == '\t' ||
|
||||||
lookahead == '\r' ||
|
lookahead == '\r' ||
|
||||||
lookahead == ' ') ADVANCE(25);
|
lookahead == ' ') ADVANCE(25);
|
||||||
|
@ -346,7 +336,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 4:
|
case 4:
|
||||||
if (lookahead == '\n') SKIP(4)
|
if (lookahead == '\n') SKIP(4)
|
||||||
if (lookahead == '"') ADVANCE(29);
|
if (lookahead == '"') ADVANCE(30);
|
||||||
if (lookahead == '\t' ||
|
if (lookahead == '\t' ||
|
||||||
lookahead == '\r' ||
|
lookahead == '\r' ||
|
||||||
lookahead == ' ') ADVANCE(24);
|
lookahead == ' ') ADVANCE(24);
|
||||||
|
@ -394,20 +384,22 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 13:
|
case 13:
|
||||||
ACCEPT_TOKEN(aux_sym_label_token1);
|
ACCEPT_TOKEN(aux_sym_label_token1);
|
||||||
if (lookahead == '"') ADVANCE(30);
|
if (lookahead == '"') ADVANCE(29);
|
||||||
if (lookahead == '\'') ADVANCE(23);
|
if (lookahead == '\'') ADVANCE(23);
|
||||||
if (lookahead == '`') ADVANCE(33);
|
if (lookahead == '`') ADVANCE(32);
|
||||||
if (lookahead == '\t' ||
|
if (lookahead == '\t' ||
|
||||||
lookahead == '\r' ||
|
lookahead == '\r' ||
|
||||||
lookahead == ' ') ADVANCE(13);
|
lookahead == ' ') ADVANCE(13);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
|
lookahead != ';' &&
|
||||||
lookahead != '{') ADVANCE(14);
|
lookahead != '{') ADVANCE(14);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 14:
|
case 14:
|
||||||
ACCEPT_TOKEN(aux_sym_label_token1);
|
ACCEPT_TOKEN(aux_sym_label_token1);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
|
lookahead != ';' &&
|
||||||
lookahead != '{') ADVANCE(14);
|
lookahead != '{') ADVANCE(14);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 15:
|
case 15:
|
||||||
|
@ -441,11 +433,12 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
ACCEPT_TOKEN(anon_sym_SQUOTE);
|
ACCEPT_TOKEN(anon_sym_SQUOTE);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
|
lookahead != ';' &&
|
||||||
lookahead != '{') ADVANCE(14);
|
lookahead != '{') ADVANCE(14);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 24:
|
case 24:
|
||||||
ACCEPT_TOKEN(aux_sym_string_token1);
|
ACCEPT_TOKEN(aux_sym_string_token1);
|
||||||
if (lookahead == '"') ADVANCE(29);
|
if (lookahead == '"') ADVANCE(30);
|
||||||
if (lookahead == '\t' ||
|
if (lookahead == '\t' ||
|
||||||
lookahead == '\r' ||
|
lookahead == '\r' ||
|
||||||
lookahead == ' ') ADVANCE(24);
|
lookahead == ' ') ADVANCE(24);
|
||||||
|
@ -455,7 +448,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 25:
|
case 25:
|
||||||
ACCEPT_TOKEN(aux_sym_string_token1);
|
ACCEPT_TOKEN(aux_sym_string_token1);
|
||||||
if (lookahead == '`') ADVANCE(32);
|
if (lookahead == '`') ADVANCE(33);
|
||||||
if (lookahead == '\t' ||
|
if (lookahead == '\t' ||
|
||||||
lookahead == '\r' ||
|
lookahead == '\r' ||
|
||||||
lookahead == ' ') ADVANCE(25);
|
lookahead == ' ') ADVANCE(25);
|
||||||
|
@ -485,13 +478,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
ACCEPT_TOKEN(anon_sym_DQUOTE);
|
ACCEPT_TOKEN(anon_sym_DQUOTE);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
lookahead != '\'') ADVANCE(27);
|
lookahead != ';' &&
|
||||||
|
lookahead != '{') ADVANCE(14);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 30:
|
case 30:
|
||||||
ACCEPT_TOKEN(anon_sym_DQUOTE);
|
ACCEPT_TOKEN(anon_sym_DQUOTE);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
lookahead != '{') ADVANCE(14);
|
lookahead != '\'') ADVANCE(27);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 31:
|
case 31:
|
||||||
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
||||||
|
@ -500,13 +494,14 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
|
||||||
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
lookahead != '\'') ADVANCE(27);
|
lookahead != ';' &&
|
||||||
|
lookahead != '{') ADVANCE(14);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
case 33:
|
case 33:
|
||||||
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
ACCEPT_TOKEN(anon_sym_BQUOTE);
|
||||||
if (lookahead != 0 &&
|
if (lookahead != 0 &&
|
||||||
lookahead != '\n' &&
|
lookahead != '\n' &&
|
||||||
lookahead != '{') ADVANCE(14);
|
lookahead != '\'') ADVANCE(27);
|
||||||
END_STATE();
|
END_STATE();
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -926,9 +921,9 @@ static const TSParseActionEntry ts_parse_actions[] = {
|
||||||
[97] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21),
|
[97] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21),
|
||||||
[99] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 2),
|
[99] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 2),
|
||||||
[101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 2),
|
[101] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 2),
|
||||||
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2, .production_id = 1),
|
[103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 2),
|
||||||
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4, .production_id = 2),
|
[105] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_shape, 4),
|
||||||
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 3),
|
[107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_connection, 4, .production_id = 1),
|
||||||
[109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 3),
|
[109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_string, 3),
|
||||||
[111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3),
|
[111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_string, 3),
|
||||||
[113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_label, 1),
|
[113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_label, 1),
|
||||||
|
|
|
@ -15,31 +15,43 @@ static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
||||||
|
|
||||||
static bool scan_identifier(TSLexer *lexer) {
|
static bool scan_identifier(TSLexer *lexer) {
|
||||||
lexer->result_symbol = IDENTIFIER;
|
lexer->result_symbol = IDENTIFIER;
|
||||||
for (bool has_content = false;; has_content = true) {
|
|
||||||
lexer->mark_end(lexer);
|
|
||||||
|
|
||||||
while (iswspace(lexer->lookahead)) {
|
while (iswspace(lexer->lookahead)) {
|
||||||
skip(lexer);
|
skip(lexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t next = lexer->lookahead;
|
for (bool has_content = false;; has_content = true) {
|
||||||
|
lexer->mark_end(lexer);
|
||||||
|
|
||||||
if (next == '{' || next == ':' || next == ';' || next == '\n' || next == 0) {
|
for (;;) {
|
||||||
|
switch (lexer->lookahead) {
|
||||||
|
case '{':
|
||||||
|
case ':':
|
||||||
|
case ';':
|
||||||
|
case '<':
|
||||||
|
case '\n':
|
||||||
|
case '\0':
|
||||||
return has_content;
|
return has_content;
|
||||||
}
|
}
|
||||||
|
|
||||||
// arrows
|
if (iswspace(lexer->lookahead)) {
|
||||||
if (next == '-') {
|
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
int32_t next_2 = lexer->lookahead;
|
} else {
|
||||||
if (next_2 == '-' || next_2 == '>') {
|
break;
|
||||||
return has_content;
|
|
||||||
}
|
}
|
||||||
} else if (next == '<') {
|
|
||||||
return has_content;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (lexer->lookahead) {
|
||||||
|
case '-':
|
||||||
advance(lexer);
|
advance(lexer);
|
||||||
|
switch (lexer->lookahead) {
|
||||||
|
case '-':
|
||||||
|
case '>':
|
||||||
|
return has_content;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
advance(lexer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,31 @@
|
||||||
==================
|
==================
|
||||||
Single shape
|
Simple shape
|
||||||
==================
|
==================
|
||||||
|
|
||||||
foo
|
foo
|
||||||
|
bar
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
(source_file (shape (identifier)))
|
(source_file
|
||||||
|
(shape (identifier))
|
||||||
|
(shape (identifier))
|
||||||
|
)
|
||||||
|
|
||||||
|
==================
|
||||||
|
Complex identifier
|
||||||
|
==================
|
||||||
|
|
||||||
|
Foo bar
|
||||||
|
-Biz-baz-
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
(source_file
|
||||||
|
(shape (identifier))
|
||||||
|
(shape (identifier))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
==================
|
==================
|
||||||
Inline shapes
|
Inline shapes
|
||||||
|
@ -21,36 +40,23 @@ a;b;c
|
||||||
(identifier))
|
(identifier))
|
||||||
(shape
|
(shape
|
||||||
(identifier))
|
(identifier))
|
||||||
(shape
|
|
||||||
(identifier)))
|
|
||||||
|
|
||||||
==================
|
|
||||||
Number identifier
|
|
||||||
==================
|
|
||||||
|
|
||||||
1;1a;b2
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
(source_file
|
|
||||||
(shape
|
(shape
|
||||||
(identifier))
|
(identifier))
|
||||||
(shape
|
)
|
||||||
(identifier))
|
|
||||||
(shape
|
|
||||||
(identifier)))
|
|
||||||
|
|
||||||
|
|
||||||
==================
|
==================
|
||||||
Aliased shapes
|
Aliased shapes
|
||||||
==================
|
==================
|
||||||
|
|
||||||
a: Hello World
|
a: Foo Bar
|
||||||
|
|
||||||
|
a: Foo Bar; b: Biz Baz
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
(source_file
|
(source_file
|
||||||
(shape
|
(shape (identifier) (label))
|
||||||
id: (identifier)
|
(shape (identifier) (label))
|
||||||
label: (label)))
|
(shape (identifier) (label))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue