#include "./get_changed_ranges.h" #include "./subtree.h" #include "./language.h" #include "./error_costs.h" #include "./tree_cursor.h" #include // #define DEBUG_GET_CHANGED_RANGES static void ts_range_array_add(TSRangeArray *self, Length start, Length end) { if (self->size > 0) { TSRange *last_range = array_back(self); if (start.bytes <= last_range->end_byte) { last_range->end_byte = end.bytes; last_range->end_point = end.extent; return; } } if (start.bytes < end.bytes) { TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; array_push(self, range); } } bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index, uint32_t start_byte, uint32_t end_byte) { for (unsigned i = start_index; i < self->size; i++) { TSRange *range = &self->contents[i]; if (range->end_byte > start_byte) { if (range->start_byte >= end_byte) break; return true; } } return false; } void ts_range_array_get_changed_ranges( const TSRange *old_ranges, unsigned old_range_count, const TSRange *new_ranges, unsigned new_range_count, TSRangeArray *differences ) { unsigned new_index = 0; unsigned old_index = 0; Length current_position = length_zero(); bool in_old_range = false; bool in_new_range = false; while (old_index < old_range_count || new_index < new_range_count) { const TSRange *old_range = &old_ranges[old_index]; const TSRange *new_range = &new_ranges[new_index]; Length next_old_position; if (in_old_range) { next_old_position = (Length) {old_range->end_byte, old_range->end_point}; } else if (old_index < old_range_count) { next_old_position = (Length) {old_range->start_byte, old_range->start_point}; } else { next_old_position = LENGTH_MAX; } Length next_new_position; if (in_new_range) { next_new_position = (Length) {new_range->end_byte, new_range->end_point}; } else if (new_index < new_range_count) { next_new_position = (Length) {new_range->start_byte, new_range->start_point}; } else { next_new_position = LENGTH_MAX; } if (next_old_position.bytes < next_new_position.bytes) { if (in_old_range != in_new_range) { ts_range_array_add(differences, current_position, next_old_position); } if (in_old_range) old_index++; current_position = next_old_position; in_old_range = !in_old_range; } else if (next_new_position.bytes < next_old_position.bytes) { if (in_old_range != in_new_range) { ts_range_array_add(differences, current_position, next_new_position); } if (in_new_range) new_index++; current_position = next_new_position; in_new_range = !in_new_range; } else { if (in_old_range != in_new_range) { ts_range_array_add(differences, current_position, next_new_position); } if (in_old_range) old_index++; if (in_new_range) new_index++; in_old_range = !in_old_range; in_new_range = !in_new_range; current_position = next_new_position; } } } typedef struct { TreeCursor cursor; const TSLanguage *language; unsigned visible_depth; bool in_padding; } Iterator; static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) { array_clear(&cursor->stack); array_push(&cursor->stack, ((TreeCursorEntry){ .subtree = tree, .position = length_zero(), .child_index = 0, .structural_child_index = 0, })); return (Iterator) { .cursor = *cursor, .language = language, .visible_depth = 1, .in_padding = false, }; } static bool iterator_done(Iterator *self) { return self->cursor.stack.size == 0; } Length iterator_start_position(Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); if (self->in_padding) { return entry.position; } else { return length_add(entry.position, ts_subtree_padding(*entry.subtree)); } } Length iterator_end_position(Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); if (self->in_padding) { return result; } else { return length_add(result, ts_subtree_size(*entry.subtree)); } } static bool iterator_tree_is_visible(const Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); if (ts_subtree_visible(*entry.subtree)) return true; if (self->cursor.stack.size > 1) { Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, parent.ptr->production_id ); return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } return false; } static void iterator_get_visible_state(const Iterator *self, Subtree *tree, TSSymbol *alias_symbol, uint32_t *start_byte) { uint32_t i = self->cursor.stack.size - 1; if (self->in_padding) { if (i == 0) return; i--; } for (; i + 1 > 0; i--) { TreeCursorEntry entry = self->cursor.stack.contents[i]; if (i > 0) { const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, parent->ptr->production_id ); if (alias_sequence) { *alias_symbol = alias_sequence[entry.structural_child_index]; } } if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { *tree = *entry.subtree; *start_byte = entry.position.bytes; break; } } } static void iterator_ascend(Iterator *self) { if (iterator_done(self)) return; if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; self->cursor.stack.size--; } static bool iterator_descend(Iterator *self, uint32_t goal_position) { if (self->in_padding) return false; bool did_descend; do { did_descend = false; TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; uint32_t structural_child_index = 0; for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { const Subtree *child = &entry.subtree->ptr->children[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); Length child_right = length_add(child_left, ts_subtree_size(*child)); if (child_right.bytes > goal_position) { array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = child, .position = position, .child_index = i, .structural_child_index = structural_child_index, })); if (iterator_tree_is_visible(self)) { if (child_left.bytes > goal_position) { self->in_padding = true; } else { self->visible_depth++; } return true; } did_descend = true; break; } position = child_right; if (!ts_subtree_extra(*child)) structural_child_index++; } } while (did_descend); return false; } static void iterator_advance(Iterator *self) { if (self->in_padding) { self->in_padding = false; if (iterator_tree_is_visible(self)) { self->visible_depth++; } else { iterator_descend(self, 0); } return; } for (;;) { if (iterator_tree_is_visible(self)) self->visible_depth--; TreeCursorEntry entry = array_pop(&self->cursor.stack); if (iterator_done(self)) return; const Subtree *parent = array_back(&self->cursor.stack)->subtree; uint32_t child_index = entry.child_index + 1; if (ts_subtree_child_count(*parent) > child_index) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); uint32_t structural_child_index = entry.structural_child_index; if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; const Subtree *next_child = &parent->ptr->children[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, .position = position, .child_index = child_index, .structural_child_index = structural_child_index, })); if (iterator_tree_is_visible(self)) { if (ts_subtree_padding(*next_child).bytes > 0) { self->in_padding = true; } else { self->visible_depth++; } } else { iterator_descend(self, 0); } break; } } } typedef enum { IteratorDiffers, IteratorMayDiffer, IteratorMatches, } IteratorComparison; IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) { Subtree old_tree = NULL_SUBTREE; Subtree new_tree = NULL_SUBTREE; uint32_t old_start = 0; uint32_t new_start = 0; TSSymbol old_alias_symbol = 0; TSSymbol new_alias_symbol = 0; iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; if ( old_alias_symbol == new_alias_symbol && ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) ) { if (old_start == new_start && !ts_subtree_has_changes(old_tree) && ts_subtree_symbol(old_tree) != ts_builtin_sym_error && ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && (ts_subtree_parse_state(old_tree) == ERROR_STATE) == (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { return IteratorMatches; } else { return IteratorMayDiffer; } } return IteratorDiffers; } #ifdef DEBUG_GET_CHANGED_RANGES static inline void iterator_print_state(Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); TSPoint start = iterator_start_position(self).extent; TSPoint end = iterator_end_position(self).extent; const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); printf( "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", name, self->in_padding ? "(p)" : " ", self->visible_depth, start.row + 1, start.column, end.row + 1, end.column ); } #endif unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree, TreeCursor *cursor1, TreeCursor *cursor2, const TSLanguage *language, const TSRangeArray *included_range_differences, TSRange **ranges) { TSRangeArray results = array_new(); Iterator old_iter = iterator_new(cursor1, old_tree, language); Iterator new_iter = iterator_new(cursor2, new_tree, language); unsigned included_range_difference_index = 0; Length position = iterator_start_position(&old_iter); Length next_position = iterator_start_position(&new_iter); if (position.bytes < next_position.bytes) { ts_range_array_add(&results, position, next_position); position = next_position; } else if (position.bytes > next_position.bytes) { ts_range_array_add(&results, next_position, position); next_position = position; } do { #ifdef DEBUG_GET_CHANGED_RANGES printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); iterator_print_state(&old_iter); printf("\tvs\t"); iterator_print_state(&new_iter); puts(""); #endif // Compare the old and new subtrees. IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); // Even if the two subtrees appear to be identical, they could differ // internally if they contain a range of text that was previously // excluded from the parse, and is now included, or vice-versa. if (comparison == IteratorMatches && ts_range_array_intersects( included_range_differences, included_range_difference_index, position.bytes, iterator_end_position(&old_iter).bytes )) { comparison = IteratorMayDiffer; } bool is_changed = false; switch (comparison) { // If the subtrees are definitely identical, move to the end // of both subtrees. case IteratorMatches: next_position = iterator_end_position(&old_iter); break; // If the subtrees might differ internally, descend into both // subtrees, finding the first child that spans the current position. case IteratorMayDiffer: if (iterator_descend(&old_iter, position.bytes)) { if (!iterator_descend(&new_iter, position.bytes)) { is_changed = true; next_position = iterator_end_position(&old_iter); } } else if (iterator_descend(&new_iter, position.bytes)) { is_changed = true; next_position = iterator_end_position(&new_iter); } else { next_position = length_min( iterator_end_position(&old_iter), iterator_end_position(&new_iter) ); } break; // If the subtrees are different, record a change and then move // to the end of both subtrees. case IteratorDiffers: is_changed = true; next_position = length_min( iterator_end_position(&old_iter), iterator_end_position(&new_iter) ); break; } // Ensure that both iterators are caught up to the current position. while ( !iterator_done(&old_iter) && iterator_end_position(&old_iter).bytes <= next_position.bytes ) iterator_advance(&old_iter); while ( !iterator_done(&new_iter) && iterator_end_position(&new_iter).bytes <= next_position.bytes ) iterator_advance(&new_iter); // Ensure that both iterators are at the same depth in the tree. while (old_iter.visible_depth > new_iter.visible_depth) { iterator_ascend(&old_iter); } while (new_iter.visible_depth > old_iter.visible_depth) { iterator_ascend(&new_iter); } if (is_changed) { #ifdef DEBUG_GET_CHANGED_RANGES printf( " change: [[%u, %u] - [%u, %u]]\n", position.extent.row + 1, position.extent.column, next_position.extent.row + 1, next_position.extent.column ); #endif ts_range_array_add(&results, position, next_position); } position = next_position; // Keep track of the current position in the included range differences // array in order to avoid scanning the entire array on each iteration. while (included_range_difference_index < included_range_differences->size) { const TSRange *range = &included_range_differences->contents[ included_range_difference_index ]; if (range->end_byte <= position.bytes) { included_range_difference_index++; } else { break; } } } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); Length old_size = ts_subtree_total_size(*old_tree); Length new_size = ts_subtree_total_size(*new_tree); if (old_size.bytes < new_size.bytes) { ts_range_array_add(&results, old_size, new_size); } else if (new_size.bytes < old_size.bytes) { ts_range_array_add(&results, new_size, old_size); } *cursor1 = old_iter.cursor; *cursor2 = new_iter.cursor; *ranges = results.contents; return results.size; }