#include #include #include namespace { using std::vector; using std::iswpunct; enum TokenType { TEXT_BLOCK_START, TEXT_BLOCK_END, TEXT_BLOCK_RAW_TEXT, BLOCK_COMMENT, }; struct Scanner { vector escape_char_stack; Scanner() { deserialize(NULL, 0); } unsigned serialize(char *buffer) { size_t i = 0; size_t escape_char_count = escape_char_stack.size(); buffer[i++] = escape_char_count; vector::iterator iter = escape_char_stack.begin(), end = escape_char_stack.end(); for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) { buffer[i++] = *iter; } return i; } void deserialize(const char *buffer, unsigned length) { escape_char_stack.clear(); if (length == 0) return; size_t i = 0; size_t escape_char_count = (uint8_t)buffer[i++]; for (; i <= escape_char_count; i++) { escape_char_stack.push_back(buffer[i]); } } void advance(TSLexer *lexer) { lexer->advance(lexer, false); } void skip(TSLexer *lexer) { lexer->advance(lexer, true); } void skip_whitespaces(TSLexer *lexer) { while (lexer->lookahead != 0 && iswspace(lexer->lookahead)) { skip(lexer); } } bool is_text_block_end(TSLexer *lexer) { vector::reverse_iterator iter = escape_char_stack.rbegin(), end = escape_char_stack.rend(); for (; iter != end; ++iter) { if (lexer->lookahead != *iter) { return false; } advance(lexer); } return true; } bool is_triple_double_quote(TSLexer *lexer) { for (int i = 0; i < 3; ++i) { if (lexer->lookahead != '"') { return false; } advance(lexer); } return true; } bool scan(TSLexer *lexer, const bool *valid_symbols) { if (valid_symbols[TEXT_BLOCK_START] && escape_char_stack.empty()) { lexer->result_symbol = TEXT_BLOCK_START; lexer->mark_end(lexer); skip_whitespaces(lexer); if (lexer->lookahead != '|') { return false; } advance(lexer); escape_char_stack.push_back('|'); if (!iswpunct(lexer->lookahead)) { lexer->mark_end(lexer); return true; } int16_t escape_char = lexer->lookahead; while (lexer->lookahead == escape_char) { escape_char_stack.push_back(escape_char); advance(lexer); } lexer->mark_end(lexer); return true; } else if (valid_symbols[TEXT_BLOCK_END] && !escape_char_stack.empty()) { lexer->result_symbol = TEXT_BLOCK_END; lexer->mark_end(lexer); skip_whitespaces(lexer); if (is_text_block_end(lexer)) { lexer->mark_end(lexer); escape_char_stack.clear(); return true; } } else if (valid_symbols[TEXT_BLOCK_RAW_TEXT] && !escape_char_stack.empty()) { lexer->result_symbol = TEXT_BLOCK_RAW_TEXT; lexer->mark_end(lexer); while (lexer->lookahead != 0 && !is_text_block_end(lexer)) { advance(lexer); lexer->mark_end(lexer); } return true; } else if (valid_symbols[BLOCK_COMMENT]) { lexer->result_symbol = BLOCK_COMMENT; lexer->mark_end(lexer); skip_whitespaces(lexer); // Check start of block comment if (!is_triple_double_quote(lexer)) { return false; } // Search end of block comment while (!is_triple_double_quote(lexer)) { // d2 expects closed tag for block comment if (lexer->lookahead == 0) return false; advance(lexer); } lexer->mark_end(lexer); return true; } return false; } }; } extern "C" { void *tree_sitter_d2_external_scanner_create() { return new Scanner(); } bool tree_sitter_d2_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { Scanner *scanner = static_cast(payload); return scanner->scan(lexer, valid_symbols); } unsigned tree_sitter_d2_external_scanner_serialize(void *payload, char *buffer) { Scanner *scanner = static_cast(payload); return scanner->serialize(buffer); } void tree_sitter_d2_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { Scanner *scanner = static_cast(payload); scanner->deserialize(buffer, length); } void tree_sitter_d2_external_scanner_destroy(void *payload) { Scanner *scanner = static_cast(payload); delete scanner; } }