#include "tree_sitter/array.h"
#include "tree_sitter/parser.h"
static inline Delimiter new_delimiter() { return (Delimiter){0}; }
static inline bool is_format(Delimiter *delimiter) { return delimiter->flags & Format; }
static inline bool is_raw(Delimiter *delimiter) { return delimiter->flags & Raw; }
static inline bool is_triple(Delimiter *delimiter) { return delimiter->flags & Triple; }
static inline bool is_bytes(Delimiter *delimiter) { return delimiter->flags & Bytes; }
static inline int32_t end_character(Delimiter *delimiter) {
if (delimiter->flags & SingleQuote) {
if (delimiter->flags & DoubleQuote) {
if (delimiter->flags & BackQuote) {
static inline void set_format(Delimiter *delimiter) { delimiter->flags |= Format; }
static inline void set_raw(Delimiter *delimiter) { delimiter->flags |= Raw; }
static inline void set_triple(Delimiter *delimiter) { delimiter->flags |= Triple; }
static inline void set_bytes(Delimiter *delimiter) { delimiter->flags |= Bytes; }
static inline void set_end_character(Delimiter *delimiter, int32_t character) {
delimiter->flags |= SingleQuote;
delimiter->flags |= DoubleQuote;
delimiter->flags |= BackQuote;
Array(Delimiter) delimiters;
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
bool tree_sitter_plum_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
bool error_recovery_mode = valid_symbols[STRING_CONTENT] && valid_symbols[INDENT];
bool within_brackets = valid_symbols[CLOSE_BRACE] || valid_symbols[CLOSE_PAREN] || valid_symbols[CLOSE_BRACKET];
bool advanced_once = false;
if (valid_symbols[ESCAPE_INTERPOLATION] && scanner->delimiters.size > 0 &&
(lexer->lookahead == '{' || lexer->lookahead == '}') && !error_recovery_mode) {
Delimiter *delimiter = array_back(&scanner->delimiters);
if (is_format(delimiter)) {
bool is_left_brace = lexer->lookahead == '{';
if ((lexer->lookahead == '{' && is_left_brace) || (lexer->lookahead == '}' && !is_left_brace)) {
lexer->result_symbol = ESCAPE_INTERPOLATION;
if (valid_symbols[STRING_CONTENT] && scanner->delimiters.size > 0 && !error_recovery_mode) {
Delimiter *delimiter = array_back(&scanner->delimiters);
int32_t end_char = end_character(delimiter);
bool has_content = advanced_once;
while (lexer->lookahead) {
if ((advanced_once || lexer->lookahead == '{' || lexer->lookahead == '}') && is_format(delimiter)) {
lexer->result_symbol = STRING_CONTENT;
if (lexer->lookahead == '\\') {
// Step over the backslash.
// Step over any escaped quotes.
if (lexer->lookahead == end_character(delimiter) || lexer->lookahead == '\\') {
if (lexer->lookahead == '\r') {
if (lexer->lookahead == '\n') {
} else if (lexer->lookahead == '\n') {
if (is_bytes(delimiter)) {
if (lexer->lookahead == 'N' || lexer->lookahead == 'u' || lexer->lookahead == 'U') {
// In bytes string, \N{...}, \uXXXX and \UXXXXXXXX are
// https://docs.plum.org/3/reference/lexical_analysis.html#string-and-bytes-literals
lexer->result_symbol = STRING_CONTENT;
lexer->result_symbol = STRING_CONTENT;
} else if (lexer->lookahead == end_char) {
if (is_triple(delimiter)) {
if (lexer->lookahead == end_char) {
if (lexer->lookahead == end_char) {
lexer->result_symbol = STRING_CONTENT;
array_pop(&scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
lexer->result_symbol = STRING_CONTENT;
lexer->result_symbol = STRING_CONTENT;
lexer->result_symbol = STRING_CONTENT;
array_pop(&scanner->delimiters);
lexer->result_symbol = STRING_END;
scanner->inside_f_string = false;
} else if (lexer->lookahead == '\n' && has_content && !is_triple(delimiter)) {
bool found_end_of_line = false;
uint32_t indent_length = 0;
int32_t first_comment_indent_length = -1;
if (lexer->lookahead == '\n') {
found_end_of_line = true;
} else if (lexer->lookahead == ' ') {
} else if (lexer->lookahead == '\r' || lexer->lookahead == '\f') {
} else if (lexer->lookahead == '\t') {
} else if (lexer->lookahead == '#' && (valid_symbols[INDENT] || valid_symbols[DEDENT] ||
valid_symbols[NEWLINE] || valid_symbols[EXCEPT])) {
// If we haven't found an EOL yet,
// then this is a comment after an expression:
// Just return, since we don't want to generate an indent/dedent
if (!found_end_of_line) {
if (first_comment_indent_length == -1) {
first_comment_indent_length = (int32_t)indent_length;
while (lexer->lookahead && lexer->lookahead != '\n') {
} else if (lexer->lookahead == '\\') {
if (lexer->lookahead == '\r') {
if (lexer->lookahead == '\n' || lexer->eof(lexer)) {
} else if (lexer->eof(lexer)) {
found_end_of_line = true;
if (scanner->indents.size > 0) {
uint16_t current_indent_length = *array_back(&scanner->indents);
if (valid_symbols[INDENT] && indent_length > current_indent_length) {
array_push(&scanner->indents, indent_length);
lexer->result_symbol = INDENT;
bool next_tok_is_string_start =
lexer->lookahead == '\"' || lexer->lookahead == '\'' || lexer->lookahead == '`';
if ((valid_symbols[DEDENT] ||
(!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) &&
indent_length < current_indent_length && !scanner->inside_f_string &&
// Wait to create a dedent token until we've consumed any
// whose indentation matches the current block.
first_comment_indent_length < (int32_t)current_indent_length) {
array_pop(&scanner->indents);
lexer->result_symbol = DEDENT;
if (valid_symbols[NEWLINE] && !error_recovery_mode) {
lexer->result_symbol = NEWLINE;
if (first_comment_indent_length == -1 && valid_symbols[STRING_START]) {
Delimiter delimiter = new_delimiter();
// while (lexer->lookahead) {
// if (lexer->lookahead == 'f' || lexer->lookahead == 'F') {
// set_format(&delimiter);
// } else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
// } else if (lexer->lookahead == 'b' || lexer->lookahead == 'B') {
// set_bytes(&delimiter);
// } else if (lexer->lookahead != 'u' && lexer->lookahead != 'U') {
if (lexer->lookahead == '`') {
set_end_character(&delimiter, '`');
} else if (lexer->lookahead == '\'') {
set_end_character(&delimiter, '\'');
if (lexer->lookahead == '\'') {
if (lexer->lookahead == '\'') {
} else if (lexer->lookahead == '"') {
set_end_character(&delimiter, '"');
if (lexer->lookahead == '"') {
if (lexer->lookahead == '"') {
if (end_character(&delimiter)) {
array_push(&scanner->delimiters, delimiter);
lexer->result_symbol = STRING_START;
scanner->inside_f_string = is_format(&delimiter);
unsigned tree_sitter_plum_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
buffer[size++] = (char)scanner->inside_f_string;
size_t delimiter_count = scanner->delimiters.size;
if (delimiter_count > UINT8_MAX) {
delimiter_count = UINT8_MAX;
buffer[size++] = (char)delimiter_count;
if (delimiter_count > 0) {
memcpy(&buffer[size], scanner->delimiters.contents, delimiter_count);
for (; iter < scanner->indents.size && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
buffer[size++] = (char)*array_get(&scanner->indents, iter);
void tree_sitter_plum_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
array_delete(&scanner->delimiters);
array_delete(&scanner->indents);
array_push(&scanner->indents, 0);
scanner->inside_f_string = (bool)buffer[size++];
size_t delimiter_count = (uint8_t)buffer[size++];
if (delimiter_count > 0) {
array_reserve(&scanner->delimiters, delimiter_count);
scanner->delimiters.size = delimiter_count;
memcpy(scanner->delimiters.contents, &buffer[size], delimiter_count);
for (; size < length; size++) {
array_push(&scanner->indents, (unsigned char)buffer[size]);
void *tree_sitter_plum_external_scanner_create() {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
_Static_assert(sizeof(Delimiter) == sizeof(char), "");
assert(sizeof(Delimiter) == sizeof(char));
Scanner *scanner = calloc(1, sizeof(Scanner));
array_init(&scanner->indents);
array_init(&scanner->delimiters);
tree_sitter_plum_external_scanner_deserialize(scanner, NULL, 0);
void tree_sitter_plum_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
array_delete(&scanner->indents);
array_delete(&scanner->delimiters);