// Copyright 2013 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Author: dsites@google.com (Dick Sites) // #ifndef UTIL_UTF8_OFFSETMAP_H_ #define UTIL_UTF8_OFFSETMAP_H_ #include // for string #include "integral_types.h" // for uint32 // ***************************** OffsetMap ************************** // // An OffsetMap object is a container for a mapping from offsets in one text // buffer A' to offsets in another text buffer A. It is most useful when A' is // built from A via substitutions that occasionally do not preserve byte length. // // A series of operators are used to build the correspondence map, then // calls can be made to map an offset in A' to an offset in A, or vice versa. // The map starts with offset 0 in A corresponding to offset 0 in A'. // The mapping is then built sequentially, adding on byte ranges that are // identical in A and A', byte ranges that are inserted in A', and byte ranges // that are deleted from A. All bytes beyond those specified when building the // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the // end of the map. // // The internal data structure records positions at which bytes are added or // deleted. Using the map is O(1) when increasing the A' or A offset // monotonically, and O(n) when accessing random offsets, where n is the // number of differences. // namespace CLD2 { class OffsetMap { public: // Constructor, destructor OffsetMap(); ~OffsetMap(); // Clear the map void Clear(); // Add to mapping from A to A', specifying how many next bytes correspond // in A and A' void Copy(int bytes); // Add to mapping from A to A', specifying how many next bytes are // inserted in A' while not advancing in A at all void Insert(int bytes); // Add to mapping from A to A', specifying how many next bytes are // deleted from A while not advancing in A' at all void Delete(int bytes); // Print map to file, for debugging void Printmap(const char* filename); // [Finish building map,] Re-position to offset 0 // This call is optional; MapForward and MapBack finish building the map // if necessary void Reset(); // Map an offset in A' to the corresponding offset in A int MapBack(int aprimeoffset); // Map an offset in A to the corresponding offset in A' int MapForward(int aoffset); // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is // from A' to A'' and h is from A to A''. // // Note that g->MoveForward(f->MoveForward(aoffset)) always equals // to h->MoveForward(aoffset), while // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals // to h->MoveBack(aprimeprimeoffset). This happens when deletion in // f and insertion in g are at the same place. For example, // // A 1 2 3 4 // ^ | ^ ^ // | | / | f // v vv v // A' 1' 2' 3' // ^ ^^ ^ // | | \ | g // v | v v // A'' 1'' 2'' 3'' 4'' // // results in: // // A 1 2 3 4 // ^ ^\ ^ ^ // | | \ | | h // v | vv v // A'' 1'' 2'' 3'' 4'' // // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in // the latter figure. static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h); // For debugging only; writes to stderr void DumpWindow(); // For testing only -- force a mapping void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset); private: enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP}; void Flush(); void FlushAll(); void MaybeFlushAll(); void Emit(MapOp op, int len); void SetLeft(); void SetRight(); // Back up over previous range, 1..5 bytes // Return subscript at the beginning of that. Pins at 0 int Backup(int sub); // Parse next range, 1..5 bytes // Return subscript just off the end of that int ParseNext(int sub, MapOp* op, int* length); // Parse previous range, 1..5 bytes // Return current subscript int ParsePrevious(int sub, MapOp* op, int* length); void PrintPosition(const char* str); bool MoveRight(); // Returns true if OK bool MoveLeft(); // Returns true if OK void DumpString(); // Copies insert operations from source to dest. Returns true if no // other operations are found. static bool CopyInserts(OffsetMap* source, OffsetMap* dest); // Copies delete operations from source to dest. Returns true if no other // operations are found. static bool CopyDeletes(OffsetMap* source, OffsetMap* dest); std::string diffs_; MapOp pending_op_; uint32 pending_length_; // Offsets in the ranges below correspond to each other, with A' = A + diff int next_diff_sub_; int current_lo_aoffset_; int current_hi_aoffset_; int current_lo_aprimeoffset_; int current_hi_aprimeoffset_; int current_diff_; int max_aoffset_; int max_aprimeoffset_; }; } // namespace CLD2 #endif // UTIL_UTF8_OFFSETMAP_H_