// Copyright 2013 Facebook /** * RedisListIterator: * An abstraction over the "list" concept (e.g.: for redis lists). * Provides functionality to read, traverse, edit, and write these lists. * * Upon construction, the RedisListIterator is given a block of list data. * Internally, it stores a pointer to the data and a pointer to current item. * It also stores a "result" list that will be mutated over time. * * Traversal and mutation are done by "forward iteration". * The Push() and Skip() methods will advance the iterator to the next item. * However, Push() will also "write the current item to the result". * Skip() will simply move to next item, causing current item to be dropped. * * Upon completion, the result (accessible by WriteResult()) will be saved. * All "skipped" items will be gone; all "pushed" items will remain. * * @throws Any of the operations may throw a RedisListException if an invalid * operation is performed or if the data is found to be corrupt. * * @notes By default, if WriteResult() is called part-way through iteration, * it will automatically advance the iterator to the end, and Keep() * all items that haven't been traversed yet. This may be subject * to review. * * @notes Can access the "current" item via GetCurrent(), and other * list-specific information such as Length(). * * @notes The internal representation is due to change at any time. Presently, * the list is represented as follows: * - 32-bit integer header: the number of items in the list * - For each item: * - 32-bit int (n): the number of bytes representing this item * - n bytes of data: the actual data. * * @author Deon Nicholas (dnicholas@fb.com) */ #pragma once #ifndef ROCKSDB_LITE #include #include "redis_list_exception.h" #include "rocksdb/slice.h" #include "util/coding.h" namespace rocksdb { /// An abstraction over the "list" concept. /// All operations may throw a RedisListException class RedisListIterator { public: /// Construct a redis-list-iterator based on data. /// If the data is non-empty, it must formatted according to @notes above. /// /// If the data is valid, we can assume the following invariant(s): /// a) length_, num_bytes_ are set correctly. /// b) cur_byte_ always refers to the start of the current element, /// just before the bytes that specify element length. /// c) cur_elem_ is always the index of the current element. /// d) cur_elem_length_ is always the number of bytes in current element, /// excluding the 4-byte header itself. /// e) result_ will always contain data_[0..cur_byte_) and a header /// f) Whenever corrupt data is encountered or an invalid operation is /// attempted, a RedisListException will immediately be thrown. explicit RedisListIterator(const std::string& list_data) : data_(list_data.data()), num_bytes_(static_cast(list_data.size())), cur_byte_(0), cur_elem_(0), cur_elem_length_(0), length_(0), result_() { // Initialize the result_ (reserve enough space for header) InitializeResult(); // Parse the data only if it is not empty. if (num_bytes_ == 0) { return; } // If non-empty, but less than 4 bytes, data must be corrupt if (num_bytes_ < sizeof(length_)) { ThrowError("Corrupt header."); // Will break control flow } // Good. The first bytes specify the number of elements length_ = DecodeFixed32(data_); cur_byte_ = sizeof(length_); // If we have at least one element, point to that element. // Also, read the first integer of the element (specifying the size), // if possible. if (length_ > 0) { if (cur_byte_ + sizeof(cur_elem_length_) <= num_bytes_) { cur_elem_length_ = DecodeFixed32(data_+cur_byte_); } else { ThrowError("Corrupt data for first element."); } } // At this point, we are fully set-up. // The invariants described in the header should now be true. } /// Reserve some space for the result_. /// Equivalent to result_.reserve(bytes). void Reserve(int bytes) { result_.reserve(bytes); } /// Go to next element in data file. /// Also writes the current element to result_. RedisListIterator& Push() { WriteCurrentElement(); MoveNext(); return *this; } /// Go to next element in data file. /// Drops/skips the current element. It will not be written to result_. RedisListIterator& Skip() { MoveNext(); --length_; // One less item --cur_elem_; // We moved one forward, but index did not change return *this; } /// Insert elem into the result_ (just BEFORE the current element / byte) /// Note: if Done() (i.e.: iterator points to end), this will append elem. void InsertElement(const Slice& elem) { // Ensure we are in a valid state CheckErrors(); const int kOrigSize = static_cast(result_.size()); result_.resize(kOrigSize + SizeOf(elem)); EncodeFixed32(result_.data() + kOrigSize, static_cast(elem.size())); memcpy(result_.data() + kOrigSize + sizeof(uint32_t), elem.data(), elem.size()); ++length_; ++cur_elem_; } /// Access the current element, and save the result into *curElem void GetCurrent(Slice* curElem) { // Ensure we are in a valid state CheckErrors(); // Ensure that we are not past the last element. if (Done()) { ThrowError("Invalid dereferencing."); } // Dereference the element *curElem = Slice(data_+cur_byte_+sizeof(cur_elem_length_), cur_elem_length_); } // Number of elements int Length() const { return length_; } // Number of bytes in the final representation (i.e: WriteResult().size()) int Size() const { // result_ holds the currently written data // data_[cur_byte..num_bytes-1] is the remainder of the data return static_cast(result_.size() + (num_bytes_ - cur_byte_)); } // Reached the end? bool Done() const { return cur_byte_ >= num_bytes_ || cur_elem_ >= length_; } /// Returns a string representing the final, edited, data. /// Assumes that all bytes of data_ in the range [0,cur_byte_) have been read /// and that result_ contains this data. /// The rest of the data must still be written. /// So, this method ADVANCES THE ITERATOR TO THE END before writing. Slice WriteResult() { CheckErrors(); // The header should currently be filled with dummy data (0's) // Correctly update the header. // Note, this is safe since result_ is a vector (guaranteed contiguous) EncodeFixed32(&result_[0],length_); // Append the remainder of the data to the result. result_.insert(result_.end(),data_+cur_byte_, data_ +num_bytes_); // Seek to end of file cur_byte_ = num_bytes_; cur_elem_ = length_; cur_elem_length_ = 0; // Return the result return Slice(result_.data(),result_.size()); } public: // Static public functions /// An upper-bound on the amount of bytes needed to store this element. /// This is used to hide representation information from the client. /// E.G. This can be used to compute the bytes we want to Reserve(). static uint32_t SizeOf(const Slice& elem) { // [Integer Length . Data] return static_cast(sizeof(uint32_t) + elem.size()); } private: // Private functions /// Initializes the result_ string. /// It will fill the first few bytes with 0's so that there is /// enough space for header information when we need to write later. /// Currently, "header information" means: the length (number of elements) /// Assumes that result_ is empty to begin with void InitializeResult() { assert(result_.empty()); // Should always be true. result_.resize(sizeof(uint32_t),0); // Put a block of 0's as the header } /// Go to the next element (used in Push() and Skip()) void MoveNext() { CheckErrors(); // Check to make sure we are not already in a finished state if (Done()) { ThrowError("Attempting to iterate past end of list."); } // Move forward one element. cur_byte_ += sizeof(cur_elem_length_) + cur_elem_length_; ++cur_elem_; // If we are at the end, finish if (Done()) { cur_elem_length_ = 0; return; } // Otherwise, we should be able to read the new element's length if (cur_byte_ + sizeof(cur_elem_length_) > num_bytes_) { ThrowError("Corrupt element data."); } // Set the new element's length cur_elem_length_ = DecodeFixed32(data_+cur_byte_); return; } /// Append the current element (pointed to by cur_byte_) to result_ /// Assumes result_ has already been reserved appropriately. void WriteCurrentElement() { // First verify that the iterator is still valid. CheckErrors(); if (Done()) { ThrowError("Attempting to write invalid element."); } // Append the cur element. result_.insert(result_.end(), data_+cur_byte_, data_+cur_byte_+ sizeof(uint32_t) + cur_elem_length_); } /// Will ThrowError() if necessary. /// Checks for common/ubiquitous errors that can arise after most operations. /// This method should be called before any reading operation. /// If this function succeeds, then we are guaranteed to be in a valid state. /// Other member functions should check for errors and ThrowError() also /// if an error occurs that is specific to it even while in a valid state. void CheckErrors() { // Check if any crazy thing has happened recently if ((cur_elem_ > length_) || // Bad index (cur_byte_ > num_bytes_) || // No more bytes (cur_byte_ + cur_elem_length_ > num_bytes_) || // Item too large (cur_byte_ == num_bytes_ && cur_elem_ != length_) || // Too many items (cur_elem_ == length_ && cur_byte_ != num_bytes_)) { // Too many bytes ThrowError("Corrupt data."); } } /// Will throw an exception based on the passed-in message. /// This function is guaranteed to STOP THE CONTROL-FLOW. /// (i.e.: you do not have to call "return" after calling ThrowError) void ThrowError(const char* const msg = NULL) { // TODO: For now we ignore the msg parameter. This can be expanded later. throw RedisListException(); } private: const char* const data_; // A pointer to the data (the first byte) const uint32_t num_bytes_; // The number of bytes in this list uint32_t cur_byte_; // The current byte being read uint32_t cur_elem_; // The current element being read uint32_t cur_elem_length_; // The number of bytes in current element uint32_t length_; // The number of elements in this list std::vector result_; // The output data }; } // namespace rocksdb #endif // ROCKSDB_LITE