/****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite ** version 3.8.11.1. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements ** of 5% or more are commonly seen when SQLite is compiled as a single ** translation unit. ** ** This file is all you need to compile SQLite. To use SQLite in other ** programs, you need this file and the "sqliteBtree.h" header file that defines ** the programming interface to the SQLite library. (If you do not have ** the "sqliteBtree.h" header file at hand, you will find a copy embedded within ** the text of this file. Search for "Begin file sqliteBtree.h" to find the start ** of the embedded sqliteBtree.h header file.) */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 #ifndef SQLITE_PRIVATE # define SQLITE_PRIVATE static #endif /************** Begin file sqliteInt.h ***************************************/ /* ** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** Internal interface definitions for SQLite. ** */ #ifndef _SQLITEINT_H_ #define _SQLITEINT_H_ /* ** Include the header file used to customize the compiler options for MSVC. ** This should be done first so that it can successfully prevent spurious ** compiler warnings due to subsequent content in this file and other files ** that are included by this file. */ /************** Include msvc.h in the middle of sqliteInt.h ******************/ /************** Begin file msvc.h ********************************************/ /* ** 2015 January 12 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code that is specific to MSVC. */ #ifndef _MSVC_H_ #define _MSVC_H_ #if defined(_MSC_VER) #pragma warning(disable : 4054) #pragma warning(disable : 4055) #pragma warning(disable : 4100) #pragma warning(disable : 4127) #pragma warning(disable : 4130) #pragma warning(disable : 4152) #pragma warning(disable : 4189) #pragma warning(disable : 4206) #pragma warning(disable : 4210) #pragma warning(disable : 4232) #pragma warning(disable : 4244) #pragma warning(disable : 4305) #pragma warning(disable : 4306) #pragma warning(disable : 4702) #pragma warning(disable : 4706) #endif /* defined(_MSC_VER) */ #endif /* _MSVC_H_ */ /************** End of msvc.h ************************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /* ** Special setup for VxWorks */ /************** Include vxworks.h in the middle of sqliteInt.h ***************/ /************** Begin file vxworks.h *****************************************/ /* ** 2015-03-02 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code that is specific to Wind River's VxWorks */ #if defined(__RTP__) || defined(_WRS_KERNEL) /* This is VxWorks. Set up things specially for that OS */ #include #include /* amalgamator: dontcache */ #define OS_VXWORKS 1 #define SQLITE_OS_OTHER 0 #define SQLITE_HOMEGROWN_RECURSIVE_MUTEX 1 #define SQLITE_ENABLE_LOCKING_STYLE 0 #define HAVE_UTIME 1 #else /* This is not VxWorks. */ #define OS_VXWORKS 0 #endif /* defined(_WRS_KERNEL) */ /************** End of vxworks.h *********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /* ** These #defines should enable >2GB file support on POSIX if the ** underlying operating system supports it. If the OS lacks ** large file support, or if the OS is windows, these should be no-ops. ** ** Ticket #2739: The _LARGEFILE_SOURCE macro must appear before any ** system #includes. Hence, this block of code must be the very first ** code in all source files. ** ** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch ** on the compiler command line. This is necessary if you are compiling ** on a recent machine (ex: Red Hat 7.2) but you want your code to work ** on an older machine (ex: Red Hat 6.0). If you compile on Red Hat 7.2 ** without this option, LFS is enable. But LFS does not exist in the kernel ** in Red Hat 6.0, so the code won't work. Hence, for maximum binary ** portability you should omit LFS. ** ** The previous paragraph was written in 2005. (This paragraph is written ** on 2008-11-28.) These days, all Linux kernels support large files, so ** you should probably leave LFS enabled. But some embedded platforms might ** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful. ** ** Similar is true for Mac OS X. LFS is only supported on Mac OS X 9 and later. */ #ifndef SQLITE_DISABLE_LFS # define _LARGE_FILE 1 # ifndef _FILE_OFFSET_BITS # define _FILE_OFFSET_BITS 64 # endif # define _LARGEFILE_SOURCE 1 #endif /* What version of GCC is being used. 0 means GCC is not being used */ #ifdef __GNUC__ # define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) #else # define GCC_VERSION 0 #endif /* Needed for various definitions... */ #if defined(__GNUC__) && !defined(_GNU_SOURCE) # define _GNU_SOURCE #endif #if defined(__OpenBSD__) && !defined(_BSD_SOURCE) # define _BSD_SOURCE #endif /* ** For MinGW, check to see if we can include the header file containing its ** version information, among other things. Normally, this internal MinGW ** header file would [only] be included automatically by other MinGW header ** files; however, the contained version information is now required by this ** header file to work around binary compatibility issues (see below) and ** this is the only known way to reliably obtain it. This entire #if block ** would be completely unnecessary if there was any other way of detecting ** MinGW via their preprocessor (e.g. if they customized their GCC to define ** some MinGW-specific macros). When compiling for MinGW, either the ** _HAVE_MINGW_H or _HAVE__MINGW_H (note the extra underscore) macro must be ** defined; otherwise, detection of conditions specific to MinGW will be ** disabled. */ #if defined(_HAVE_MINGW_H) # include "mingw.h" #elif defined(_HAVE__MINGW_H) # include "_mingw.h" #endif /* ** For MinGW version 4.x (and higher), check to see if the _USE_32BIT_TIME_T ** define is required to maintain binary compatibility with the MSVC runtime ** library in use (e.g. for Windows XP). */ #if !defined(_USE_32BIT_TIME_T) && !defined(_USE_64BIT_TIME_T) && \ defined(_WIN32) && !defined(_WIN64) && \ defined(__MINGW_MAJOR_VERSION) && __MINGW_MAJOR_VERSION >= 4 && \ defined(__MSVCRT__) # define _USE_32BIT_TIME_T #endif #include "sqlite3Btree.h" #include #include /* Needed for the definition of va_list */ /* ** Provide the ability to override linkage features of the interface. */ #ifndef SQLITE_EXTERN # define SQLITE_EXTERN extern #endif #ifndef SQLITE_API # define SQLITE_API #endif #ifndef SQLITE_CDECL # define SQLITE_CDECL #endif #ifndef SQLITE_STDCALL # define SQLITE_STDCALL #endif /* ** CAPI3REF: Compile-Time Library Version Numbers ** ** ^(The [SQLITE_VERSION] C preprocessor macro in the sqlite3.h header ** evaluates to a string literal that is the SQLite version in the ** format "X.Y.Z" where X is the major version number (always 3 for ** SQLite3) and Y is the minor version number and Z is the release number.)^ ** ^(The [SQLITE_VERSION_NUMBER] C preprocessor macro resolves to an integer ** with the value (X*1000000 + Y*1000 + Z) where X, Y, and Z are the same ** numbers used in [SQLITE_VERSION].)^ ** The SQLITE_VERSION_NUMBER for any given release of SQLite will also ** be larger than the release from which it is derived. Either Y will ** be held constant and Z will be incremented or else Y will be incremented ** and Z will be reset to zero. ** ** Since version 3.6.18, SQLite source code has been stored in the ** Fossil configuration management ** system. ^The SQLITE_SOURCE_ID macro evaluates to ** a string which identifies a particular check-in of SQLite ** within its configuration management system. ^The SQLITE_SOURCE_ID ** string contains the date and time of the check-in (UTC) and an SHA1 ** hash of the entire source tree. ** ** See also: [sqlite3_libversion()], ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ #define SQLITE_VERSION "3.8.11.1" #define SQLITE_VERSION_NUMBER 3008011 #define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f" /* ** CAPI3REF: Device Characteristics ** ** The xDeviceCharacteristics method of the [sqlite3_io_methods] ** object returns an integer which is a vector of these ** bit values expressing I/O characteristics of the mass storage ** device that holds the file that the [sqlite3_io_methods] ** refers to. ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of ** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values ** mean that writes of blocks that are nnn bytes in size and ** are aligned to an address which is an integer multiple of ** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means ** that when data is appended to a file, the data is appended ** first then the size of the file is extended, never the other ** way around. The SQLITE_IOCAP_SEQUENTIAL property means that ** information is written to disk in the same order as calls ** to xWrite(). The SQLITE_IOCAP_POWERSAFE_OVERWRITE property means that ** after reboot following a crash or power loss, the only bytes in a ** file that were written at the application level might have changed ** and that adjacent bytes, even bytes within the same sector are ** guaranteed to be unchanged. The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN ** flag indicate that a file cannot be deleted when open. The ** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on ** read-only media and cannot be changed even by processes with ** elevated privileges. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 #define SQLITE_IOCAP_ATOMIC1K 0x00000004 #define SQLITE_IOCAP_ATOMIC2K 0x00000008 #define SQLITE_IOCAP_ATOMIC4K 0x00000010 #define SQLITE_IOCAP_ATOMIC8K 0x00000020 #define SQLITE_IOCAP_ATOMIC16K 0x00000040 #define SQLITE_IOCAP_ATOMIC32K 0x00000080 #define SQLITE_IOCAP_ATOMIC64K 0x00000100 #define SQLITE_IOCAP_SAFE_APPEND 0x00000200 #define SQLITE_IOCAP_SEQUENTIAL 0x00000400 #define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 /* ** CAPI3REF: File Locking Levels ** ** SQLite uses one of these integer values as the second ** argument to calls it makes to the xLock() and xUnlock() methods ** of an [sqlite3_io_methods] object. */ #define SQLITE_LOCK_NONE 0 #define SQLITE_LOCK_SHARED 1 #define SQLITE_LOCK_RESERVED 2 #define SQLITE_LOCK_PENDING 3 #define SQLITE_LOCK_EXCLUSIVE 4 /* ** CAPI3REF: Synchronization Type Flags ** ** When SQLite invokes the xSync() method of an ** [sqlite3_io_methods] object it uses a combination of ** these integer values as the second argument. ** ** When the SQLITE_SYNC_DATAONLY flag is used, it means that the ** sync operation only needs to flush data to mass storage. Inode ** information need not be flushed. If the lower four bits of the flag ** equal SQLITE_SYNC_NORMAL, that means to use normal fsync() semantics. ** If the lower four bits equal SQLITE_SYNC_FULL, that means ** to use Mac OS X style fullsync instead of fsync(). ** ** Do not confuse the SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags ** with the [PRAGMA synchronous]=NORMAL and [PRAGMA synchronous]=FULL ** settings. The [synchronous pragma] determines when calls to the ** xSync VFS method occur and applies uniformly across all platforms. ** The SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL flags determine how ** energetic or rigorous or forceful the sync operations are and ** only make a difference on Mac OSX for the default SQLite code. ** (Third-party VFS implementations might also make the distinction ** between SQLITE_SYNC_NORMAL and SQLITE_SYNC_FULL, but among the ** operating systems natively supported by SQLite, only Mac OSX ** cares about the difference.) */ #define SQLITE_SYNC_NORMAL 0x00002 #define SQLITE_SYNC_FULL 0x00003 #define SQLITE_SYNC_DATAONLY 0x00010 /* ** CAPI3REF: OS Interface Open File Handle ** ** An [sqlite3_file] object represents an open file in the ** [sqlite3_vfs | OS interface layer]. Individual OS interface ** implementations will ** want to subclass this object by appending additional fields ** for their own use. The pMethods entry is a pointer to an ** [sqlite3_io_methods] object that defines methods for performing ** I/O operations on the open file. */ typedef struct sqlite3_file sqlite3_file; struct sqlite3_file { const struct sqlite3_io_methods *pMethods; /* Methods for an open file */ }; /* ** CAPI3REF: OS Interface File Virtual Methods Object ** ** Every file opened by the [sqlite3_vfs.xOpen] method populates an ** [sqlite3_file] object (or, more commonly, a subclass of the ** [sqlite3_file] object) with a pointer to an instance of this object. ** This object defines the methods used to perform various operations ** against the open file represented by the [sqlite3_file] object. ** ** If the [sqlite3_vfs.xOpen] method sets the sqlite3_file.pMethods element ** to a non-NULL pointer, then the sqlite3_io_methods.xClose method ** may be invoked even if the [sqlite3_vfs.xOpen] reported that it failed. The ** only way to prevent a call to xClose following a failed [sqlite3_vfs.xOpen] ** is for the [sqlite3_vfs.xOpen] to set the sqlite3_file.pMethods element ** to NULL. ** ** The flags argument to xSync may be one of [SQLITE_SYNC_NORMAL] or ** [SQLITE_SYNC_FULL]. The first choice is the normal fsync(). ** The second choice is a Mac OS X style fullsync. The [SQLITE_SYNC_DATAONLY] ** flag may be ORed in to indicate that only the data of the file ** and not its inode needs to be synced. ** ** The integer values to xLock() and xUnlock() are one of **
    **
  • [SQLITE_LOCK_NONE], **
  • [SQLITE_LOCK_SHARED], **
  • [SQLITE_LOCK_RESERVED], **
  • [SQLITE_LOCK_PENDING], or **
  • [SQLITE_LOCK_EXCLUSIVE]. **
** xLock() increases the lock. xUnlock() decreases the lock. ** The xCheckReservedLock() method checks whether any database connection, ** either in this process or in some other process, is holding a RESERVED, ** PENDING, or EXCLUSIVE lock on the file. It returns true ** if such a lock exists and false otherwise. ** ** The xFileControl() method is a generic interface that allows custom ** VFS implementations to directly control an open file using the ** [sqlite3_file_control()] interface. The second "op" argument is an ** integer opcode. The third argument is a generic pointer intended to ** point to a structure that may contain arguments or space in which to ** write return values. Potential uses for xFileControl() might be ** functions to enable blocking locks with timeouts, to change the ** locking strategy (for example to use dot-file locks), to inquire ** about the status of a lock, or to break stale locks. The SQLite ** core reserves all opcodes less than 100 for its own use. ** A [file control opcodes | list of opcodes] less than 100 is available. ** Applications that define a custom xFileControl method should use opcodes ** greater than 100 to avoid conflicts. VFS implementations should ** return [SQLITE_NOTFOUND] for file control opcodes that they do not ** recognize. ** ** The xSectorSize() method returns the sector size of the ** device that underlies the file. The sector size is the ** minimum write that can be performed without disturbing ** other bytes in the file. The xDeviceCharacteristics() ** method returns a bit vector describing behaviors of the ** underlying device: ** **
    **
  • [SQLITE_IOCAP_ATOMIC] **
  • [SQLITE_IOCAP_ATOMIC512] **
  • [SQLITE_IOCAP_ATOMIC1K] **
  • [SQLITE_IOCAP_ATOMIC2K] **
  • [SQLITE_IOCAP_ATOMIC4K] **
  • [SQLITE_IOCAP_ATOMIC8K] **
  • [SQLITE_IOCAP_ATOMIC16K] **
  • [SQLITE_IOCAP_ATOMIC32K] **
  • [SQLITE_IOCAP_ATOMIC64K] **
  • [SQLITE_IOCAP_SAFE_APPEND] **
  • [SQLITE_IOCAP_SEQUENTIAL] **
** ** The SQLITE_IOCAP_ATOMIC property means that all writes of ** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values ** mean that writes of blocks that are nnn bytes in size and ** are aligned to an address which is an integer multiple of ** nnn are atomic. The SQLITE_IOCAP_SAFE_APPEND value means ** that when data is appended to a file, the data is appended ** first then the size of the file is extended, never the other ** way around. The SQLITE_IOCAP_SEQUENTIAL property means that ** information is written to disk in the same order as calls ** to xWrite(). ** ** If xRead() returns SQLITE_IOERR_SHORT_READ it must also fill ** in the unread portions of the buffer with zeros. A VFS that ** fails to zero-fill short reads might seem to work. However, ** failure to zero-fill short reads will eventually lead to ** database corruption. */ typedef struct sqlite3_io_methods sqlite3_io_methods; struct sqlite3_io_methods { int iVersion; int (*xClose)(sqlite3_file*); int (*xRead)(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); int (*xWrite)(sqlite3_file*, const void*, int iAmt, sqlite3_int64 iOfst); int (*xTruncate)(sqlite3_file*, sqlite3_int64 size); int (*xSync)(sqlite3_file*, int flags); int (*xFileSize)(sqlite3_file*, sqlite3_int64 *pSize); int (*xLock)(sqlite3_file*, int); int (*xUnlock)(sqlite3_file*, int); int (*xCheckReservedLock)(sqlite3_file*, int *pResOut); int (*xFileControl)(sqlite3_file*, int op, void *pArg); int (*xSectorSize)(sqlite3_file*); int (*xDeviceCharacteristics)(sqlite3_file*); /* Methods above are valid for version 1 */ int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmUnmap)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); /* Methods above are valid for version 3 */ /* Additional methods may be added in future releases */ }; /* ** CAPI3REF: Standard File Control Opcodes ** KEYWORDS: {file control opcodes} {file control opcode} ** ** These integer constants are opcodes for the xFileControl method ** of the [sqlite3_io_methods] object and for the [sqlite3_file_control()] ** interface. ** **
    **
  • [[SQLITE_FCNTL_LOCKSTATE]] ** The [SQLITE_FCNTL_LOCKSTATE] opcode is used for debugging. This ** opcode causes the xFileControl method to write the current state of ** the lock (one of [SQLITE_LOCK_NONE], [SQLITE_LOCK_SHARED], ** [SQLITE_LOCK_RESERVED], [SQLITE_LOCK_PENDING], or [SQLITE_LOCK_EXCLUSIVE]) ** into an integer that the pArg argument points to. This capability ** is used during testing and is only available when the SQLITE_TEST ** compile-time option is used. ** **
  • [[SQLITE_FCNTL_SIZE_HINT]] ** The [SQLITE_FCNTL_SIZE_HINT] opcode is used by SQLite to give the VFS ** layer a hint of how large the database file will grow to be during the ** current transaction. This hint is not guaranteed to be accurate but it ** is often close. The underlying VFS might choose to preallocate database ** file space based on this hint in order to help writes to the database ** file run faster. ** **
  • [[SQLITE_FCNTL_CHUNK_SIZE]] ** The [SQLITE_FCNTL_CHUNK_SIZE] opcode is used to request that the VFS ** extends and truncates the database file in chunks of a size specified ** by the user. The fourth argument to [sqlite3_file_control()] should ** point to an integer (type int) containing the new chunk-size to use ** for the nominated database. Allocating database file space in large ** chunks (say 1MB at a time), may reduce file-system fragmentation and ** improve performance on some systems. ** **
  • [[SQLITE_FCNTL_FILE_POINTER]] ** The [SQLITE_FCNTL_FILE_POINTER] opcode is used to obtain a pointer ** to the [sqlite3_file] object associated with a particular database ** connection. See the [sqlite3_file_control()] documentation for ** additional information. ** **
  • [[SQLITE_FCNTL_SYNC_OMITTED]] ** No longer in use. ** **
  • [[SQLITE_FCNTL_SYNC]] ** The [SQLITE_FCNTL_SYNC] opcode is generated internally by SQLite and ** sent to the VFS immediately before the xSync method is invoked on a ** database file descriptor. Or, if the xSync method is not invoked ** because the user has configured SQLite with ** [PRAGMA synchronous | PRAGMA synchronous=OFF] it is invoked in place ** of the xSync method. In most cases, the pointer argument passed with ** this file-control is NULL. However, if the database file is being synced ** as part of a multi-database commit, the argument points to a nul-terminated ** string containing the transactions master-journal file name. VFSes that ** do not need this signal should silently ignore this opcode. Applications ** should not call [sqlite3_file_control()] with this opcode as doing so may ** disrupt the operation of the specialized VFSes that do require it. ** **
  • [[SQLITE_FCNTL_COMMIT_PHASETWO]] ** The [SQLITE_FCNTL_COMMIT_PHASETWO] opcode is generated internally by SQLite ** and sent to the VFS after a transaction has been committed immediately ** but before the database is unlocked. VFSes that do not need this signal ** should silently ignore this opcode. Applications should not call ** [sqlite3_file_control()] with this opcode as doing so may disrupt the ** operation of the specialized VFSes that do require it. ** **
  • [[SQLITE_FCNTL_WIN32_AV_RETRY]] ** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic ** retry counts and intervals for certain disk I/O operations for the ** windows [VFS] in order to provide robustness in the presence of ** anti-virus programs. By default, the windows VFS will retry file read, ** file write, and file delete operations up to 10 times, with a delay ** of 25 milliseconds before the first retry and with the delay increasing ** by an additional 25 milliseconds with each subsequent retry. This ** opcode allows these two values (10 retries and 25 milliseconds of delay) ** to be adjusted. The values are changed for all database connections ** within the same process. The argument is a pointer to an array of two ** integers where the first integer i the new retry count and the second ** integer is the delay. If either integer is negative, then the setting ** is not changed but instead the prior value of that setting is written ** into the array entry, allowing the current retry settings to be ** interrogated. The zDbName parameter is ignored. ** **
  • [[SQLITE_FCNTL_PERSIST_WAL]] ** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the ** persistent [WAL | Write Ahead Log] setting. By default, the auxiliary ** write ahead log and shared memory files used for transaction control ** are automatically deleted when the latest connection to the database ** closes. Setting persistent WAL mode causes those files to persist after ** close. Persisting the files is useful when other processes that do not ** have write permission on the directory containing the database file want ** to read the database file, as the WAL and shared memory files must exist ** in order for the database to be readable. The fourth parameter to ** [sqlite3_file_control()] for this opcode should be a pointer to an integer. ** That integer is 0 to disable persistent WAL mode or 1 to enable persistent ** WAL mode. If the integer is -1, then it is overwritten with the current ** WAL persistence setting. ** **
  • [[SQLITE_FCNTL_POWERSAFE_OVERWRITE]] ** ^The [SQLITE_FCNTL_POWERSAFE_OVERWRITE] opcode is used to set or query the ** persistent "powersafe-overwrite" or "PSOW" setting. The PSOW setting ** determines the [SQLITE_IOCAP_POWERSAFE_OVERWRITE] bit of the ** xDeviceCharacteristics methods. The fourth parameter to ** [sqlite3_file_control()] for this opcode should be a pointer to an integer. ** That integer is 0 to disable zero-damage mode or 1 to enable zero-damage ** mode. If the integer is -1, then it is overwritten with the current ** zero-damage mode setting. ** **
  • [[SQLITE_FCNTL_OVERWRITE]] ** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening ** a write transaction to indicate that, unless it is rolled back for some ** reason, the entire database file will be overwritten by the current ** transaction. This is used by VACUUM operations. ** **
  • [[SQLITE_FCNTL_VFSNAME]] ** ^The [SQLITE_FCNTL_VFSNAME] opcode can be used to obtain the names of ** all [VFSes] in the VFS stack. The names are of all VFS shims and the ** final bottom-level VFS are written into memory obtained from ** [sqlite3_malloc()] and the result is stored in the char* variable ** that the fourth parameter of [sqlite3_file_control()] points to. ** The caller is responsible for freeing the memory when done. As with ** all file-control actions, there is no guarantee that this will actually ** do anything. Callers should initialize the char* variable to a NULL ** pointer in case this file-control is not implemented. This file-control ** is intended for diagnostic use only. ** **
  • [[SQLITE_FCNTL_PRAGMA]] ** ^Whenever a [PRAGMA] statement is parsed, an [SQLITE_FCNTL_PRAGMA] ** file control is sent to the open [sqlite3_file] object corresponding ** to the database file to which the pragma statement refers. ^The argument ** to the [SQLITE_FCNTL_PRAGMA] file control is an array of ** pointers to strings (char**) in which the second element of the array ** is the name of the pragma and the third element is the argument to the ** pragma or NULL if the pragma has no argument. ^The handler for an ** [SQLITE_FCNTL_PRAGMA] file control can optionally make the first element ** of the char** argument point to a string obtained from [sqlite3_mprintf()] ** or the equivalent and that string will become the result of the pragma or ** the error message if the pragma fails. ^If the ** [SQLITE_FCNTL_PRAGMA] file control returns [SQLITE_NOTFOUND], then normal ** [PRAGMA] processing continues. ^If the [SQLITE_FCNTL_PRAGMA] ** file control returns [SQLITE_OK], then the parser assumes that the ** VFS has handled the PRAGMA itself and the parser generates a no-op ** prepared statement if result string is NULL, or that returns a copy ** of the result string if the string is non-NULL. ** ^If the [SQLITE_FCNTL_PRAGMA] file control returns ** any result code other than [SQLITE_OK] or [SQLITE_NOTFOUND], that means ** that the VFS encountered an error while handling the [PRAGMA] and the ** compilation of the PRAGMA fails with an error. ^The [SQLITE_FCNTL_PRAGMA] ** file control occurs at the beginning of pragma statement analysis and so ** it is able to override built-in [PRAGMA] statements. ** **
  • [[SQLITE_FCNTL_BUSYHANDLER]] ** ^The [SQLITE_FCNTL_BUSYHANDLER] ** file-control may be invoked by SQLite on the database file handle ** shortly after it is opened in order to provide a custom VFS with access ** to the connections busy-handler callback. The argument is of type (void **) ** - an array of two (void *) values. The first (void *) actually points ** to a function of type (int (*)(void *)). In order to invoke the connections ** busy-handler, this function should be invoked with the second (void *) in ** the array as the only argument. If it returns non-zero, then the operation ** should be retried. If it returns zero, the custom VFS should abandon the ** current operation. ** **
  • [[SQLITE_FCNTL_TEMPFILENAME]] ** ^Application can invoke the [SQLITE_FCNTL_TEMPFILENAME] file-control ** to have SQLite generate a ** temporary filename using the same algorithm that is followed to generate ** temporary filenames for TEMP tables and other internal uses. The ** argument should be a char** which will be filled with the filename ** written into memory obtained from [sqlite3_malloc()]. The caller should ** invoke [sqlite3_free()] on the result to avoid a memory leak. ** **
  • [[SQLITE_FCNTL_MMAP_SIZE]] ** The [SQLITE_FCNTL_MMAP_SIZE] file control is used to query or set the ** maximum number of bytes that will be used for memory-mapped I/O. ** The argument is a pointer to a value of type sqlite3_int64 that ** is an advisory maximum number of bytes in the file to memory map. The ** pointer is overwritten with the old value. The limit is not changed if ** the value originally pointed to is negative, and so the current limit ** can be queried by passing in a pointer to a negative number. This ** file-control is used internally to implement [PRAGMA mmap_size]. ** **
  • [[SQLITE_FCNTL_TRACE]] ** The [SQLITE_FCNTL_TRACE] file control provides advisory information ** to the VFS about what the higher layers of the SQLite stack are doing. ** This file control is used by some VFS activity tracing [shims]. ** The argument is a zero-terminated string. Higher layers in the ** SQLite stack may generate instances of this file control if ** the [SQLITE_USE_FCNTL_TRACE] compile-time option is enabled. ** **
  • [[SQLITE_FCNTL_HAS_MOVED]] ** The [SQLITE_FCNTL_HAS_MOVED] file control interprets its argument as a ** pointer to an integer and it writes a boolean into that integer depending ** on whether or not the file has been renamed, moved, or deleted since it ** was first opened. ** **
  • [[SQLITE_FCNTL_WIN32_SET_HANDLE]] ** The [SQLITE_FCNTL_WIN32_SET_HANDLE] opcode is used for debugging. This ** opcode causes the xFileControl method to swap the file handle with the one ** pointed to by the pArg argument. This capability is used during testing ** and only needs to be supported when SQLITE_TEST is defined. ** **
  • [[SQLITE_FCNTL_WAL_BLOCK]] ** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might ** be advantageous to block on the next WAL lock if the lock is not immediately ** available. The WAL subsystem issues this signal during rare ** circumstances in order to fix a problem with priority inversion. ** Applications should not use this file-control. ** **
  • [[SQLITE_FCNTL_ZIPVFS]] ** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other ** VFS should return SQLITE_NOTFOUND for this opcode. ** **
  • [[SQLITE_FCNTL_RBU]] ** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by ** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for ** this opcode. **
*/ #define SQLITE_FCNTL_LOCKSTATE 1 #define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 #define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 #define SQLITE_FCNTL_LAST_ERRNO 4 #define SQLITE_FCNTL_SIZE_HINT 5 #define SQLITE_FCNTL_CHUNK_SIZE 6 #define SQLITE_FCNTL_FILE_POINTER 7 #define SQLITE_FCNTL_SYNC_OMITTED 8 #define SQLITE_FCNTL_WIN32_AV_RETRY 9 #define SQLITE_FCNTL_PERSIST_WAL 10 #define SQLITE_FCNTL_OVERWRITE 11 #define SQLITE_FCNTL_VFSNAME 12 #define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 #define SQLITE_FCNTL_PRAGMA 14 #define SQLITE_FCNTL_BUSYHANDLER 15 #define SQLITE_FCNTL_TEMPFILENAME 16 #define SQLITE_FCNTL_MMAP_SIZE 18 #define SQLITE_FCNTL_TRACE 19 #define SQLITE_FCNTL_HAS_MOVED 20 #define SQLITE_FCNTL_SYNC 21 #define SQLITE_FCNTL_COMMIT_PHASETWO 22 #define SQLITE_FCNTL_WIN32_SET_HANDLE 23 #define SQLITE_FCNTL_WAL_BLOCK 24 #define SQLITE_FCNTL_ZIPVFS 25 #define SQLITE_FCNTL_RBU 26 /* ** CAPI3REF: Mutex Handle ** ** The mutex module within SQLite defines [sqlite3_mutex] to be an ** abstract type for a mutex object. The SQLite core never looks ** at the internal representation of an [sqlite3_mutex]. It only ** deals with pointers to the [sqlite3_mutex] object. ** ** Mutexes are created using [sqlite3_mutex_alloc()]. */ typedef struct sqlite3_mutex sqlite3_mutex; /* ** CAPI3REF: OS Interface Object ** ** An instance of the sqlite3_vfs object defines the interface between ** the SQLite core and the underlying operating system. The "vfs" ** in the name of the object stands for "virtual file system". See ** the [VFS | VFS documentation] for further information. ** ** The value of the iVersion field is initially 1 but may be larger in ** future versions of SQLite. Additional fields may be appended to this ** object when the iVersion value is increased. Note that the structure ** of the sqlite3_vfs object changes in the transaction between ** SQLite version 3.5.9 and 3.6.0 and yet the iVersion field was not ** modified. ** ** The szOsFile field is the size of the subclassed [sqlite3_file] ** structure used by this VFS. mxPathname is the maximum length of ** a pathname in this VFS. ** ** Registered sqlite3_vfs objects are kept on a linked list formed by ** the pNext pointer. The [sqlite3_vfs_register()] interface manages ** this list in a thread-safe way. The [sqlite3_vfs_find()] interface ** searches the list. Neither the application code nor the VFS ** implementation should use the pNext pointer. ** ** The pNext field is the only field in the sqlite3_vfs ** structure that SQLite will ever modify. SQLite will only access ** or modify this field while holding a particular static mutex. ** The application should never modify anything within the sqlite3_vfs ** object once the object has been registered. ** ** The zName field holds the name of the VFS module. The name must ** be unique across all VFS modules. ** ** [[sqlite3_vfs.xOpen]] ** ^SQLite guarantees that the zFilename parameter to xOpen ** is either a NULL pointer or string obtained ** from xFullPathname() with an optional suffix added. ** ^If a suffix is added to the zFilename parameter, it will ** consist of a single "-" character followed by no more than ** 11 alphanumeric and/or "-" characters. ** ^SQLite further guarantees that ** the string will be valid and unchanged until xClose() is ** called. Because of the previous sentence, ** the [sqlite3_file] can safely store a pointer to the ** filename if it needs to remember the filename for some reason. ** If the zFilename parameter to xOpen is a NULL pointer then xOpen ** must invent its own temporary name for the file. ^Whenever the ** xFilename parameter is NULL it will also be the case that the ** flags parameter will include [SQLITE_OPEN_DELETEONCLOSE]. ** ** The flags argument to xOpen() includes all bits set in ** the flags argument to [sqlite3_open_v2()]. Or if [sqlite3_open()] ** or [sqlite3_open16()] is used, then flags includes at least ** [SQLITE_OPEN_READWRITE] | [SQLITE_OPEN_CREATE]. ** If xOpen() opens a file read-only then it sets *pOutFlags to ** include [SQLITE_OPEN_READONLY]. Other bits in *pOutFlags may be set. ** ** ^(SQLite will also add one of the following flags to the xOpen() ** call, depending on the object being opened: ** **
    **
  • [SQLITE_OPEN_MAIN_DB] **
  • [SQLITE_OPEN_MAIN_JOURNAL] **
  • [SQLITE_OPEN_TEMP_DB] **
  • [SQLITE_OPEN_TEMP_JOURNAL] **
  • [SQLITE_OPEN_TRANSIENT_DB] **
  • [SQLITE_OPEN_SUBJOURNAL] **
  • [SQLITE_OPEN_MASTER_JOURNAL] **
  • [SQLITE_OPEN_WAL] **
)^ ** ** The file I/O implementation can use the object type flags to ** change the way it deals with files. For example, an application ** that does not care about crash recovery or rollback might make ** the open of a journal file a no-op. Writes to this journal would ** also be no-ops, and any attempt to read the journal would return ** SQLITE_IOERR. Or the implementation might recognize that a database ** file will be doing page-aligned sector reads and writes in a random ** order and set up its I/O subsystem accordingly. ** ** SQLite might also add one of the following flags to the xOpen method: ** **
    **
  • [SQLITE_OPEN_DELETEONCLOSE] **
  • [SQLITE_OPEN_EXCLUSIVE] **
** ** The [SQLITE_OPEN_DELETEONCLOSE] flag means the file should be ** deleted when it is closed. ^The [SQLITE_OPEN_DELETEONCLOSE] ** will be set for TEMP databases and their journals, transient ** databases, and subjournals. ** ** ^The [SQLITE_OPEN_EXCLUSIVE] flag is always used in conjunction ** with the [SQLITE_OPEN_CREATE] flag, which are both directly ** analogous to the O_EXCL and O_CREAT flags of the POSIX open() ** API. The SQLITE_OPEN_EXCLUSIVE flag, when paired with the ** SQLITE_OPEN_CREATE, is used to indicate that file should always ** be created, and that it is an error if it already exists. ** It is not used to indicate the file should be opened ** for exclusive access. ** ** ^At least szOsFile bytes of memory are allocated by SQLite ** to hold the [sqlite3_file] structure passed as the third ** argument to xOpen. The xOpen method does not have to ** allocate the structure; it should just fill it in. Note that ** the xOpen method must set the sqlite3_file.pMethods to either ** a valid [sqlite3_io_methods] object or to NULL. xOpen must do ** this even if the open fails. SQLite expects that the sqlite3_file.pMethods ** element will be valid after xOpen returns regardless of the success ** or failure of the xOpen call. ** ** [[sqlite3_vfs.xAccess]] ** ^The flags argument to xAccess() may be [SQLITE_ACCESS_EXISTS] ** to test for the existence of a file, or [SQLITE_ACCESS_READWRITE] to ** test whether a file is readable and writable, or [SQLITE_ACCESS_READ] ** to test whether a file is at least readable. The file can be a ** directory. ** ** ^SQLite will always allocate at least mxPathname+1 bytes for the ** output buffer xFullPathname. The exact size of the output buffer ** is also passed as a parameter to both methods. If the output buffer ** is not large enough, [SQLITE_CANTOPEN] should be returned. Since this is ** handled as a fatal error by SQLite, vfs implementations should endeavor ** to prevent this by setting mxPathname to a sufficiently large value. ** ** The xRandomness(), xSleep(), xCurrentTime(), and xCurrentTimeInt64() ** interfaces are not strictly a part of the filesystem, but they are ** included in the VFS structure for completeness. ** The xRandomness() function attempts to return nBytes bytes ** of good-quality randomness into zOut. The return value is ** the actual number of bytes of randomness obtained. ** The xSleep() method causes the calling thread to sleep for at ** least the number of microseconds given. ^The xCurrentTime() ** method returns a Julian Day Number for the current date and time as ** a floating point value. ** ^The xCurrentTimeInt64() method returns, as an integer, the Julian ** Day Number multiplied by 86400000 (the number of milliseconds in ** a 24-hour day). ** ^SQLite will use the xCurrentTimeInt64() method to get the current ** date and time if that method is available (if iVersion is 2 or ** greater and the function pointer is not NULL) and will fall back ** to xCurrentTime() if xCurrentTimeInt64() is unavailable. ** ** ^The xSetSystemCall(), xGetSystemCall(), and xNestSystemCall() interfaces ** are not used by the SQLite core. These optional interfaces are provided ** by some VFSes to facilitate testing of the VFS code. By overriding ** system calls with functions under its control, a test program can ** simulate faults and error conditions that would otherwise be difficult ** or impossible to induce. The set of system calls that can be overridden ** varies from one VFS to another, and from one version of the same VFS to the ** next. Applications that use these interfaces must be prepared for any ** or all of these interfaces to be NULL or for their behavior to change ** from one release to the next. Applications must not attempt to access ** any of these methods if the iVersion of the VFS is less than 3. */ typedef struct sqlite3_vfs sqlite3_vfs; typedef void (*sqlite3_syscall_ptr)(void); struct sqlite3_vfs { int iVersion; /* Structure version number (currently 3) */ int szOsFile; /* Size of subclassed sqlite3_file */ int mxPathname; /* Maximum file pathname length */ sqlite3_vfs *pNext; /* Next registered VFS */ const char *zName; /* Name of this virtual file system */ void *pAppData; /* Pointer to application-specific data */ int (*xOpen)(sqlite3_vfs*, const char *zName, sqlite3_file*, int flags, int *pOutFlags); int (*xDelete)(sqlite3_vfs*, const char *zName, int syncDir); int (*xAccess)(sqlite3_vfs*, const char *zName, int flags, int *pResOut); int (*xFullPathname)(sqlite3_vfs*, const char *zName, int nOut, char *zOut); void *(*xDlOpen)(sqlite3_vfs*, const char *zFilename); void (*xDlError)(sqlite3_vfs*, int nByte, char *zErrMsg); void (*(*xDlSym)(sqlite3_vfs*,void*, const char *zSymbol))(void); void (*xDlClose)(sqlite3_vfs*, void*); int (*xRandomness)(sqlite3_vfs*, int nByte, char *zOut); int (*xSleep)(sqlite3_vfs*, int microseconds); int (*xCurrentTime)(sqlite3_vfs*, double*); int (*xGetLastError)(sqlite3_vfs*, int, char *); /* ** The methods above are in version 1 of the sqlite_vfs object ** definition. Those that follow are added in version 2 or later */ int (*xCurrentTimeInt64)(sqlite3_vfs*, sqlite3_int64*); /* ** The methods above are in versions 1 and 2 of the sqlite_vfs object. ** Those below are for version 3 and greater. */ int (*xSetSystemCall)(sqlite3_vfs*, const char *zName, sqlite3_syscall_ptr); sqlite3_syscall_ptr (*xGetSystemCall)(sqlite3_vfs*, const char *zName); const char *(*xNextSystemCall)(sqlite3_vfs*, const char *zName); /* ** The methods above are in versions 1 through 3 of the sqlite_vfs object. ** New fields may be appended in figure versions. The iVersion ** value will increment whenever this happens. */ }; /* ** CAPI3REF: Flags for the xAccess VFS method ** ** These integer constants can be used as the third parameter to ** the xAccess method of an [sqlite3_vfs] object. They determine ** what kind of permissions the xAccess method is looking for. ** With SQLITE_ACCESS_EXISTS, the xAccess method ** simply checks whether the file exists. ** With SQLITE_ACCESS_READWRITE, the xAccess method ** checks whether the named directory is both readable and writable ** (in other words, if files can be added, removed, and renamed within ** the directory). ** The SQLITE_ACCESS_READWRITE constant is currently used only by the ** [temp_store_directory pragma], though this could change in a future ** release of SQLite. ** With SQLITE_ACCESS_READ, the xAccess method ** checks whether the file is readable. The SQLITE_ACCESS_READ constant is ** currently unused, though it might be used in a future release of ** SQLite. */ #define SQLITE_ACCESS_EXISTS 0 #define SQLITE_ACCESS_READWRITE 1 /* Used by PRAGMA temp_store_directory */ #define SQLITE_ACCESS_READ 2 /* Unused */ /* ** CAPI3REF: Flags for the xShmLock VFS method ** ** These integer constants define the various locking operations ** allowed by the xShmLock method of [sqlite3_io_methods]. The ** following are the only legal combinations of flags to the ** xShmLock method: ** **
    **
  • SQLITE_SHM_LOCK | SQLITE_SHM_SHARED **
  • SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE **
  • SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED **
  • SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE **
** ** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as ** was given on the corresponding lock. ** ** The xShmLock method can transition between unlocked and SHARED or ** between unlocked and EXCLUSIVE. It cannot transition between SHARED ** and EXCLUSIVE. */ #define SQLITE_SHM_UNLOCK 1 #define SQLITE_SHM_LOCK 2 #define SQLITE_SHM_SHARED 4 #define SQLITE_SHM_EXCLUSIVE 8 /* ** CAPI3REF: Maximum xShmLock index ** ** The xShmLock method on [sqlite3_io_methods] may use values ** between 0 and this upper bound as its "offset" argument. ** The SQLite core will never attempt to acquire or release a ** lock outside of this range */ #define SQLITE_SHM_NLOCK 8 SQLITE_PRIVATE int sqlite3_os_init(void); SQLITE_PRIVATE int sqlite3_os_end(void); /* ** CAPI3REF: Configuring The SQLite Library ** ** The sqlite3_config() interface is used to make global configuration ** changes to SQLite in order to tune SQLite to the specific needs of ** the application. The default configuration is recommended for most ** applications and so this routine is usually not necessary. It is ** provided to support rare applications with unusual needs. ** ** The sqlite3_config() interface is not threadsafe. The application ** must ensure that no other SQLite interfaces are invoked by other ** threads while sqlite3_config() is running. ** ** The sqlite3_config() interface ** may only be invoked prior to library initialization using ** [sqlite3BtreeInitialize()] or after shutdown by [sqlite3BtreeShutdown()]. ** ^If sqlite3_config() is called after [sqlite3BtreeInitialize()] and before ** [sqlite3BtreeShutdown()] then it will return SQLITE_MISUSE. ** Note, however, that ^sqlite3_config() can be called as part of the ** implementation of an application-defined [sqlite3_os_init()]. ** ** The first argument to sqlite3_config() is an integer ** [configuration option] that determines ** what property of SQLite is to be configured. Subsequent arguments ** vary depending on the [configuration option] ** in the first argument. ** ** ^When a configuration option is set, sqlite3_config() returns [SQLITE_OK]. ** ^If the option is unknown or SQLite is unable to set the option ** then this routine returns a non-zero [error code]. */ SQLITE_PRIVATE int sqlite3_config(int, ...); /* ** CAPI3REF: Memory Allocation Routines ** ** An instance of this object defines the interface between SQLite ** and low-level memory allocation routines. ** ** This object is used in only one place in the SQLite interface. ** A pointer to an instance of this object is the argument to ** [sqlite3_config()] when the configuration option is ** [SQLITE_CONFIG_MALLOC] or [SQLITE_CONFIG_GETMALLOC]. ** By creating an instance of this object ** and passing it to [sqlite3_config]([SQLITE_CONFIG_MALLOC]) ** during configuration, an application can specify an alternative ** memory allocation subsystem for SQLite to use for all of its ** dynamic memory needs. ** ** Note that SQLite comes with several [built-in memory allocators] ** that are perfectly adequate for the overwhelming majority of applications ** and that this object is only useful to a tiny minority of applications ** with specialized memory allocation requirements. This object is ** also used during testing of SQLite in order to specify an alternative ** memory allocator that simulates memory out-of-memory conditions in ** order to verify that SQLite recovers gracefully from such ** conditions. ** ** The xMalloc, xRealloc, and xFree methods must work like the ** malloc(), realloc() and free() functions from the standard C library. ** ^SQLite guarantees that the second argument to ** xRealloc is always a value returned by a prior call to xRoundup. ** ** xSize should return the allocated size of a memory allocation ** previously obtained from xMalloc or xRealloc. The allocated size ** is always at least as big as the requested size but may be larger. ** ** The xRoundup method returns what would be the allocated size of ** a memory allocation given a particular requested size. Most memory ** allocators round up memory allocations at least to the next multiple ** of 8. Some allocators round up to a larger multiple or to a power of 2. ** Every memory allocation request coming in through [sqlite3_malloc()] ** or [sqlite3_realloc()] first calls xRoundup. If xRoundup returns 0, ** that causes the corresponding memory allocation to fail. ** ** The xInit method initializes the memory allocator. For example, ** it might allocate any require mutexes or initialize internal data ** structures. The xShutdown method is invoked (indirectly) by ** [sqlite3BtreeShutdown()] and should deallocate any resources acquired ** by xInit. The pAppData pointer is used as the only parameter to ** xInit and xShutdown. ** ** SQLite holds the [SQLITE_MUTEX_STATIC_MASTER] mutex when it invokes ** the xInit method, so the xInit method need not be threadsafe. The ** xShutdown method is only called from [sqlite3BtreeShutdown()] so it does ** not need to be threadsafe either. For all other methods, SQLite ** holds the [SQLITE_MUTEX_STATIC_MEM] mutex as long as the ** [SQLITE_CONFIG_MEMSTATUS] configuration option is turned on (which ** it is by default) and so the methods are automatically serialized. ** However, if [SQLITE_CONFIG_MEMSTATUS] is disabled, then the other ** methods must be threadsafe or else make their own arrangements for ** serialization. ** ** SQLite will never invoke xInit() more than once without an intervening ** call to xShutdown(). */ typedef struct sqlite3_mem_methods sqlite3_mem_methods; struct sqlite3_mem_methods { void *(*xMalloc)(int); /* Memory allocation function */ void (*xFree)(void*); /* Free a prior allocation */ void *(*xRealloc)(void*,int); /* Resize an allocation */ int (*xSize)(void*); /* Return the size of an allocation */ int (*xRoundup)(int); /* Round up request size to allocation size */ int (*xInit)(void*); /* Initialize the memory allocator */ void (*xShutdown)(void*); /* Deinitialize the memory allocator */ void *pAppData; /* Argument to xInit() and xShutdown() */ }; /* ** CAPI3REF: Configuration Options ** KEYWORDS: {configuration option} ** ** These constants are the available integer configuration options that ** can be passed as the first argument to the [sqlite3_config()] interface. ** ** New configuration options may be added in future releases of SQLite. ** Existing configuration options might be discontinued. Applications ** should check the return code from [sqlite3_config()] to make sure that ** the call worked. The [sqlite3_config()] interface will return a ** non-zero [error code] if a discontinued or unsupported configuration option ** is invoked. ** **
** [[SQLITE_CONFIG_SINGLETHREAD]]
SQLITE_CONFIG_SINGLETHREAD
**
There are no arguments to this option. ^This option sets the ** [threading mode] to Single-thread. In other words, it disables ** all mutexing and puts SQLite into a mode where it can only be used ** by a single thread. ^If SQLite is compiled with ** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then ** it is not possible to change the [threading mode] from its default ** value of Single-thread and so [sqlite3_config()] will return ** [SQLITE_ERROR] if called with the SQLITE_CONFIG_SINGLETHREAD ** configuration option.
** ** [[SQLITE_CONFIG_MULTITHREAD]]
SQLITE_CONFIG_MULTITHREAD
**
There are no arguments to this option. ^This option sets the ** [threading mode] to Multi-thread. In other words, it disables ** mutexing on [database connection] and [prepared statement] objects. ** The application is responsible for serializing access to ** [database connections] and [prepared statements]. But other mutexes ** are enabled so that SQLite will be safe to use in a multi-threaded ** environment as long as no two threads attempt to use the same ** [database connection] at the same time. ^If SQLite is compiled with ** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then ** it is not possible to set the Multi-thread [threading mode] and ** [sqlite3_config()] will return [SQLITE_ERROR] if called with the ** SQLITE_CONFIG_MULTITHREAD configuration option.
** ** [[SQLITE_CONFIG_SERIALIZED]]
SQLITE_CONFIG_SERIALIZED
**
There are no arguments to this option. ^This option sets the ** [threading mode] to Serialized. In other words, this option enables ** all mutexes including the recursive ** mutexes on [database connection] and [prepared statement] objects. ** In this mode (which is the default when SQLite is compiled with ** [SQLITE_THREADSAFE=1]) the SQLite library will itself serialize access ** to [database connections] and [prepared statements] so that the ** application is free to use the same [database connection] or the ** same [prepared statement] in different threads at the same time. ** ^If SQLite is compiled with ** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then ** it is not possible to set the Serialized [threading mode] and ** [sqlite3_config()] will return [SQLITE_ERROR] if called with the ** SQLITE_CONFIG_SERIALIZED configuration option.
** ** [[SQLITE_CONFIG_MALLOC]]
SQLITE_CONFIG_MALLOC
**
^(The SQLITE_CONFIG_MALLOC option takes a single argument which is ** a pointer to an instance of the [sqlite3_mem_methods] structure. ** The argument specifies ** alternative low-level memory allocation routines to be used in place of ** the memory allocation routines built into SQLite.)^ ^SQLite makes ** its own private copy of the content of the [sqlite3_mem_methods] structure ** before the [sqlite3_config()] call returns.
** ** [[SQLITE_CONFIG_GETMALLOC]]
SQLITE_CONFIG_GETMALLOC
**
^(The SQLITE_CONFIG_GETMALLOC option takes a single argument which ** is a pointer to an instance of the [sqlite3_mem_methods] structure. ** The [sqlite3_mem_methods] ** structure is filled with the currently defined memory allocation routines.)^ ** This option can be used to overload the default memory allocation ** routines with a wrapper that simulations memory allocation failure or ** tracks memory usage, for example.
** ** [[SQLITE_CONFIG_MEMSTATUS]]
SQLITE_CONFIG_MEMSTATUS
**
^The SQLITE_CONFIG_MEMSTATUS option takes single argument of type int, ** interpreted as a boolean, which enables or disables the collection of ** memory allocation statistics. ^(When memory allocation statistics are ** disabled, the following SQLite interfaces become non-operational: **
    **
  • [sqlite3_memory_used()] **
  • [sqlite3_memory_highwater()] **
  • [sqlite3_soft_heap_limit64()] **
  • [sqlite3_status64()] **
)^ ** ^Memory allocation statistics are enabled by default unless SQLite is ** compiled with [SQLITE_DEFAULT_MEMSTATUS]=0 in which case memory ** allocation statistics are disabled by default. **
** ** [[SQLITE_CONFIG_SCRATCH]]
SQLITE_CONFIG_SCRATCH
**
^The SQLITE_CONFIG_SCRATCH option specifies a static memory buffer ** that SQLite can use for scratch memory. ^(There are three arguments ** to SQLITE_CONFIG_SCRATCH: A pointer an 8-byte ** aligned memory buffer from which the scratch allocations will be ** drawn, the size of each scratch allocation (sz), ** and the maximum number of scratch allocations (N).)^ ** The first argument must be a pointer to an 8-byte aligned buffer ** of at least sz*N bytes of memory. ** ^SQLite will not use more than one scratch buffers per thread. ** ^SQLite will never request a scratch buffer that is more than 6 ** times the database page size. ** ^If SQLite needs needs additional ** scratch memory beyond what is provided by this configuration option, then ** [sqlite3_malloc()] will be used to obtain the memory needed.

** ^When the application provides any amount of scratch memory using ** SQLITE_CONFIG_SCRATCH, SQLite avoids unnecessary large ** [sqlite3_malloc|heap allocations]. ** This can help [Robson proof|prevent memory allocation failures] due to heap ** fragmentation in low-memory embedded systems. **

** ** [[SQLITE_CONFIG_PAGECACHE]]
SQLITE_CONFIG_PAGECACHE
**
^The SQLITE_CONFIG_PAGECACHE option specifies a static memory buffer ** that SQLite can use for the database page cache with the default page ** cache implementation. ** This configuration should not be used if an application-define page ** cache implementation is loaded using the [SQLITE_CONFIG_PCACHE2] ** configuration option. ** ^There are three arguments to SQLITE_CONFIG_PAGECACHE: A pointer to ** 8-byte aligned ** memory, the size of each page buffer (sz), and the number of pages (N). ** The sz argument should be the size of the largest database page ** (a power of two between 512 and 65536) plus some extra bytes for each ** page header. ^The number of extra bytes needed by the page header ** can be determined using the [SQLITE_CONFIG_PCACHE_HDRSZ] option ** to [sqlite3_config()]. ** ^It is harmless, apart from the wasted memory, ** for the sz parameter to be larger than necessary. The first ** argument should pointer to an 8-byte aligned block of memory that ** is at least sz*N bytes of memory, otherwise subsequent behavior is ** undefined. ** ^SQLite will use the memory provided by the first argument to satisfy its ** memory needs for the first N pages that it adds to cache. ^If additional ** page cache memory is needed beyond what is provided by this option, then ** SQLite goes to [sqlite3_malloc()] for the additional storage space.
** ** [[SQLITE_CONFIG_HEAP]]
SQLITE_CONFIG_HEAP
**
^The SQLITE_CONFIG_HEAP option specifies a static memory buffer ** that SQLite will use for all of its dynamic memory allocation needs ** beyond those provided for by [SQLITE_CONFIG_SCRATCH] and ** [SQLITE_CONFIG_PAGECACHE]. ** ^The SQLITE_CONFIG_HEAP option is only available if SQLite is compiled ** with either [SQLITE_ENABLE_MEMSYS3] or [SQLITE_ENABLE_MEMSYS5] and returns ** [SQLITE_ERROR] if invoked otherwise. ** ^There are three arguments to SQLITE_CONFIG_HEAP: ** An 8-byte aligned pointer to the memory, ** the number of bytes in the memory buffer, and the minimum allocation size. ** ^If the first pointer (the memory pointer) is NULL, then SQLite reverts ** to using its default memory allocator (the system malloc() implementation), ** undoing any prior invocation of [SQLITE_CONFIG_MALLOC]. ^If the ** memory pointer is not NULL then the alternative memory ** allocator is engaged to handle all of SQLites memory allocation needs. ** The first pointer (the memory pointer) must be aligned to an 8-byte ** boundary or subsequent behavior of SQLite will be undefined. ** The minimum allocation size is capped at 2**12. Reasonable values ** for the minimum allocation size are 2**5 through 2**8.
** ** [[SQLITE_CONFIG_MUTEX]]
SQLITE_CONFIG_MUTEX
**
^(The SQLITE_CONFIG_MUTEX option takes a single argument which is a ** pointer to an instance of the [sqlite3_mutex_methods] structure. ** The argument specifies alternative low-level mutex routines to be used ** in place the mutex routines built into SQLite.)^ ^SQLite makes a copy of ** the content of the [sqlite3_mutex_methods] structure before the call to ** [sqlite3_config()] returns. ^If SQLite is compiled with ** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then ** the entire mutexing subsystem is omitted from the build and hence calls to ** [sqlite3_config()] with the SQLITE_CONFIG_MUTEX configuration option will ** return [SQLITE_ERROR].
** ** [[SQLITE_CONFIG_GETMUTEX]]
SQLITE_CONFIG_GETMUTEX
**
^(The SQLITE_CONFIG_GETMUTEX option takes a single argument which ** is a pointer to an instance of the [sqlite3_mutex_methods] structure. The ** [sqlite3_mutex_methods] ** structure is filled with the currently defined mutex routines.)^ ** This option can be used to overload the default mutex allocation ** routines with a wrapper used to track mutex usage for performance ** profiling or testing, for example. ^If SQLite is compiled with ** the [SQLITE_THREADSAFE | SQLITE_THREADSAFE=0] compile-time option then ** the entire mutexing subsystem is omitted from the build and hence calls to ** [sqlite3_config()] with the SQLITE_CONFIG_GETMUTEX configuration option will ** return [SQLITE_ERROR].
** ** [[SQLITE_CONFIG_LOOKASIDE]]
SQLITE_CONFIG_LOOKASIDE
**
^(The SQLITE_CONFIG_LOOKASIDE option takes two arguments that determine ** the default size of lookaside memory on each [database connection]. ** The first argument is the ** size of each lookaside buffer slot and the second is the number of ** slots allocated to each database connection.)^ ^(SQLITE_CONFIG_LOOKASIDE ** sets the default lookaside size. The [SQLITE_DBCONFIG_LOOKASIDE] ** option to [sqlite3_db_config()] can be used to change the lookaside ** configuration on individual connections.)^
** ** [[SQLITE_CONFIG_PCACHE2]]
SQLITE_CONFIG_PCACHE2
**
^(The SQLITE_CONFIG_PCACHE2 option takes a single argument which is ** a pointer to an [sqlite3_pcache_methods2] object. This object specifies ** the interface to a custom page cache implementation.)^ ** ^SQLite makes a copy of the [sqlite3_pcache_methods2] object.
** ** [[SQLITE_CONFIG_GETPCACHE2]]
SQLITE_CONFIG_GETPCACHE2
**
^(The SQLITE_CONFIG_GETPCACHE2 option takes a single argument which ** is a pointer to an [sqlite3_pcache_methods2] object. SQLite copies of ** the current page cache implementation into that object.)^
** ** [[SQLITE_CONFIG_LOG]]
SQLITE_CONFIG_LOG
**
The SQLITE_CONFIG_LOG option is used to configure the SQLite ** global [error log]. ** (^The SQLITE_CONFIG_LOG option takes two arguments: a pointer to a ** function with a call signature of void(*)(void*,int,const char*), ** and a pointer to void. ^If the function pointer is not NULL, it is ** invoked by [sqlite3_log()] to process each logging event. ^If the ** function pointer is NULL, the [sqlite3_log()] interface becomes a no-op. ** ^The void pointer that is the second argument to SQLITE_CONFIG_LOG is ** passed through as the first parameter to the application-defined logger ** function whenever that function is invoked. ^The second parameter to ** the logger function is a copy of the first parameter to the corresponding ** [sqlite3_log()] call and is intended to be a [result code] or an ** [extended result code]. ^The third parameter passed to the logger is ** log message after formatting via [sqlite3_snprintf()]. ** The SQLite logging interface is not reentrant; the logger function ** supplied by the application must not invoke any SQLite interface. ** In a multi-threaded application, the application-defined logger ** function must be threadsafe.
** ** [[SQLITE_CONFIG_URI]]
SQLITE_CONFIG_URI **
^(The SQLITE_CONFIG_URI option takes a single argument of type int. ** If non-zero, then URI handling is globally enabled. If the parameter is zero, ** then URI handling is globally disabled.)^ ^If URI handling is globally ** enabled, all filenames passed to [sqlite3_open()], [sqlite3_open_v2()], ** [sqlite3_open16()] or ** specified as part of [ATTACH] commands are interpreted as URIs, regardless ** of whether or not the [SQLITE_OPEN_URI] flag is set when the database ** connection is opened. ^If it is globally disabled, filenames are ** only interpreted as URIs if the SQLITE_OPEN_URI flag is set when the ** database connection is opened. ^(By default, URI handling is globally ** disabled. The default value may be changed by compiling with the ** [SQLITE_USE_URI] symbol defined.)^ ** ** [[SQLITE_CONFIG_COVERING_INDEX_SCAN]]
SQLITE_CONFIG_COVERING_INDEX_SCAN **
^The SQLITE_CONFIG_COVERING_INDEX_SCAN option takes a single integer ** argument which is interpreted as a boolean in order to enable or disable ** the use of covering indices for full table scans in the query optimizer. ** ^The default setting is determined ** by the [SQLITE_ALLOW_COVERING_INDEX_SCAN] compile-time option, or is "on" ** if that compile-time option is omitted. ** The ability to disable the use of covering indices for full table scans ** is because some incorrectly coded legacy applications might malfunction ** when the optimization is enabled. Providing the ability to ** disable the optimization allows the older, buggy application code to work ** without change even with newer versions of SQLite. ** ** [[SQLITE_CONFIG_PCACHE]] [[SQLITE_CONFIG_GETPCACHE]] **
SQLITE_CONFIG_PCACHE and SQLITE_CONFIG_GETPCACHE **
These options are obsolete and should not be used by new code. ** They are retained for backwards compatibility but are now no-ops. **
** ** [[SQLITE_CONFIG_MMAP_SIZE]] **
SQLITE_CONFIG_MMAP_SIZE **
^SQLITE_CONFIG_MMAP_SIZE takes two 64-bit integer (sqlite3_int64) values ** that are the default mmap size limit (the default setting for ** [PRAGMA mmap_size]) and the maximum allowed mmap size limit. ** ^The default setting can be overridden by each database connection using ** either the [PRAGMA mmap_size] command, or by using the ** [SQLITE_FCNTL_MMAP_SIZE] file control. ^(The maximum allowed mmap size ** will be silently truncated if necessary so that it does not exceed the ** compile-time maximum mmap size set by the ** [SQLITE_MAX_MMAP_SIZE] compile-time option.)^ ** ^If either argument to this option is negative, then that argument is ** changed to its compile-time default. ** ** [[SQLITE_CONFIG_WIN32_HEAPSIZE]] **
SQLITE_CONFIG_WIN32_HEAPSIZE **
^The SQLITE_CONFIG_WIN32_HEAPSIZE option is only available if SQLite is ** compiled for Windows with the [SQLITE_WIN32_MALLOC] pre-processor macro ** defined. ^SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit unsigned integer value ** that specifies the maximum size of the created heap. ** ** [[SQLITE_CONFIG_PCACHE_HDRSZ]] **
SQLITE_CONFIG_PCACHE_HDRSZ **
^The SQLITE_CONFIG_PCACHE_HDRSZ option takes a single parameter which ** is a pointer to an integer and writes into that integer the number of extra ** bytes per page required for each page in [SQLITE_CONFIG_PAGECACHE]. ** The amount of extra space required can change depending on the compiler, ** target platform, and SQLite version. ** ** [[SQLITE_CONFIG_PMASZ]] **
SQLITE_CONFIG_PMASZ **
^The SQLITE_CONFIG_PMASZ option takes a single parameter which ** is an unsigned integer and sets the "Minimum PMA Size" for the multithreaded ** sorter to that integer. The default minimum PMA Size is set by the ** [SQLITE_SORTER_PMASZ] compile-time option. New threads are launched ** to help with sort operations when multithreaded sorting ** is enabled (using the [PRAGMA threads] command) and the amount of content ** to be sorted exceeds the page size times the minimum of the ** [PRAGMA cache_size] setting and this value. **
*/ #define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ #define SQLITE_CONFIG_MULTITHREAD 2 /* nil */ #define SQLITE_CONFIG_SERIALIZED 3 /* nil */ #define SQLITE_CONFIG_MALLOC 4 /* sqlite3_mem_methods* */ #define SQLITE_CONFIG_GETMALLOC 5 /* sqlite3_mem_methods* */ #define SQLITE_CONFIG_SCRATCH 6 /* void*, int sz, int N */ #define SQLITE_CONFIG_PAGECACHE 7 /* void*, int sz, int N */ #define SQLITE_CONFIG_HEAP 8 /* void*, int nByte, int min */ #define SQLITE_CONFIG_MEMSTATUS 9 /* boolean */ #define SQLITE_CONFIG_MUTEX 10 /* sqlite3_mutex_methods* */ #define SQLITE_CONFIG_GETMUTEX 11 /* sqlite3_mutex_methods* */ /* previously SQLITE_CONFIG_CHUNKALLOC 12 which is now unused. */ #define SQLITE_CONFIG_LOOKASIDE 13 /* int int */ #define SQLITE_CONFIG_PCACHE 14 /* no-op */ #define SQLITE_CONFIG_GETPCACHE 15 /* no-op */ #define SQLITE_CONFIG_LOG 16 /* xFunc, void* */ #define SQLITE_CONFIG_URI 17 /* int */ #define SQLITE_CONFIG_PCACHE2 18 /* sqlite3_pcache_methods2* */ #define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ #define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ #define SQLITE_CONFIG_MMAP_SIZE 22 /* sqlite3_int64, sqlite3_int64 */ #define SQLITE_CONFIG_WIN32_HEAPSIZE 23 /* int nByte */ #define SQLITE_CONFIG_PCACHE_HDRSZ 24 /* int *psz */ #define SQLITE_CONFIG_PMASZ 25 /* unsigned int szPma */ /* ** CAPI3REF: Formatted String Printing Functions ** ** These routines are work-alikes of the "printf()" family of functions ** from the standard C library. ** These routines understand most of the common K&R formatting options, ** plus some additional non-standard formats, detailed below. ** Note that some of the more obscure formatting options from recent ** C-library standards are omitted from this implementation. ** ** ^The sqlite3_mprintf() and sqlite3_vmprintf() routines write their ** results into memory obtained from [sqlite3_malloc()]. ** The strings returned by these two routines should be ** released by [sqlite3_free()]. ^Both routines return a ** NULL pointer if [sqlite3_malloc()] is unable to allocate enough ** memory to hold the resulting string. ** ** ^(The sqlite3_snprintf() routine is similar to "snprintf()" from ** the standard C library. The result is written into the ** buffer supplied as the second parameter whose size is given by ** the first parameter. Note that the order of the ** first two parameters is reversed from snprintf().)^ This is an ** historical accident that cannot be fixed without breaking ** backwards compatibility. ^(Note also that sqlite3_snprintf() ** returns a pointer to its buffer instead of the number of ** characters actually written into the buffer.)^ We admit that ** the number of characters written would be a more useful return ** value but we cannot change the implementation of sqlite3_snprintf() ** now without breaking compatibility. ** ** ^As long as the buffer size is greater than zero, sqlite3_snprintf() ** guarantees that the buffer is always zero-terminated. ^The first ** parameter "n" is the total size of the buffer, including space for ** the zero terminator. So the longest string that can be completely ** written will be n-1 characters. ** ** ^The sqlite3_vsnprintf() routine is a varargs version of sqlite3_snprintf(). ** ** These routines all implement some additional formatting ** options that are useful for constructing SQL statements. ** All of the usual printf() formatting options apply. In addition, there ** is are "%q", "%Q", "%w" and "%z" options. ** ** ^(The %q option works like %s in that it substitutes a nul-terminated ** string from the argument list. But %q also doubles every '\'' character. ** %q is designed for use inside a string literal.)^ By doubling each '\'' ** character it escapes that character and allows it to be inserted into ** the string. ** ** For example, assume the string variable zText contains text as follows: ** **
**  char *zText = "It's a happy day!";
** 
** ** One can use this text in an SQL statement as follows: ** **
**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES('%q')", zText);
**  sqlite3_exec(db, zSQL, 0, 0, 0);
**  sqlite3_free(zSQL);
** 
** ** Because the %q format string is used, the '\'' character in zText ** is escaped and the SQL generated is as follows: ** **
**  INSERT INTO table1 VALUES('It''s a happy day!')
** 
** ** This is correct. Had we used %s instead of %q, the generated SQL ** would have looked like this: ** **
**  INSERT INTO table1 VALUES('It's a happy day!');
** 
** ** This second example is an SQL syntax error. As a general rule you should ** always use %q instead of %s when inserting text into a string literal. ** ** ^(The %Q option works like %q except it also adds single quotes around ** the outside of the total string. Additionally, if the parameter in the ** argument list is a NULL pointer, %Q substitutes the text "NULL" (without ** single quotes).)^ So, for example, one could say: ** **
**  char *zSQL = sqlite3_mprintf("INSERT INTO table VALUES(%Q)", zText);
**  sqlite3_exec(db, zSQL, 0, 0, 0);
**  sqlite3_free(zSQL);
** 
** ** The code above will render a correct SQL statement in the zSQL ** variable even if the zText variable is a NULL pointer. ** ** ^(The "%w" formatting option is like "%q" except that it expects to ** be contained within double-quotes instead of single quotes, and it ** escapes the double-quote character instead of the single-quote ** character.)^ The "%w" formatting option is intended for safely inserting ** table and column names into a constructed SQL statement. ** ** ^(The "%z" formatting option works like "%s" but with the ** addition that after the string has been read and copied into ** the result, [sqlite3_free()] is called on the input string.)^ */ SQLITE_PRIVATE char *sqlite3_mprintf(const char*,...); SQLITE_PRIVATE char *sqlite3_vmprintf(const char*, va_list); SQLITE_PRIVATE char *sqlite3_snprintf(int,char*,const char*, ...); SQLITE_PRIVATE char *sqlite3_vsnprintf(int,char*,const char*, va_list); /* ** CAPI3REF: Memory Allocation Subsystem ** ** The SQLite core uses these three routines for all of its own ** internal memory allocation needs. "Core" in the previous sentence ** does not include operating-system specific VFS implementation. The ** Windows VFS uses native malloc() and free() for some operations. ** ** ^The sqlite3_malloc() routine returns a pointer to a block ** of memory at least N bytes in length, where N is the parameter. ** ^If sqlite3_malloc() is unable to obtain sufficient free ** memory, it returns a NULL pointer. ^If the parameter N to ** sqlite3_malloc() is zero or negative then sqlite3_malloc() returns ** a NULL pointer. ** ** ^The sqlite3_malloc64(N) routine works just like ** sqlite3_malloc(N) except that N is an unsigned 64-bit integer instead ** of a signed 32-bit integer. ** ** ^Calling sqlite3_free() with a pointer previously returned ** by sqlite3_malloc() or sqlite3_realloc() releases that memory so ** that it might be reused. ^The sqlite3_free() routine is ** a no-op if is called with a NULL pointer. Passing a NULL pointer ** to sqlite3_free() is harmless. After being freed, memory ** should neither be read nor written. Even reading previously freed ** memory might result in a segmentation fault or other severe error. ** Memory corruption, a segmentation fault, or other severe error ** might result if sqlite3_free() is called with a non-NULL pointer that ** was not obtained from sqlite3_malloc() or sqlite3_realloc(). ** ** ^The sqlite3_realloc(X,N) interface attempts to resize a ** prior memory allocation X to be at least N bytes. ** ^If the X parameter to sqlite3_realloc(X,N) ** is a NULL pointer then its behavior is identical to calling ** sqlite3_malloc(N). ** ^If the N parameter to sqlite3_realloc(X,N) is zero or ** negative then the behavior is exactly the same as calling ** sqlite3_free(X). ** ^sqlite3_realloc(X,N) returns a pointer to a memory allocation ** of at least N bytes in size or NULL if insufficient memory is available. ** ^If M is the size of the prior allocation, then min(N,M) bytes ** of the prior allocation are copied into the beginning of buffer returned ** by sqlite3_realloc(X,N) and the prior allocation is freed. ** ^If sqlite3_realloc(X,N) returns NULL and N is positive, then the ** prior allocation is not freed. ** ** ^The sqlite3_realloc64(X,N) interfaces works the same as ** sqlite3_realloc(X,N) except that N is a 64-bit unsigned integer instead ** of a 32-bit signed integer. ** ** ^The memory returned by sqlite3_malloc(), sqlite3_realloc(), ** sqlite3_malloc64(), and sqlite3_realloc64() ** is always aligned to at least an 8 byte boundary, or to a ** 4 byte boundary if the [SQLITE_4_BYTE_ALIGNED_MALLOC] compile-time ** option is used. ** ** In SQLite version 3.5.0 and 3.5.1, it was possible to define ** the SQLITE_OMIT_MEMORY_ALLOCATION which would cause the built-in ** implementation of these routines to be omitted. That capability ** is no longer provided. Only built-in memory allocators can be used. ** ** Prior to SQLite version 3.7.10, the Windows OS interface layer called ** the system malloc() and free() directly when converting ** filenames between the UTF-8 encoding used by SQLite ** and whatever filename encoding is used by the particular Windows ** installation. Memory allocation errors were detected, but ** they were reported back as [SQLITE_CANTOPEN] or ** [SQLITE_IOERR] rather than [SQLITE_NOMEM]. ** ** The pointer arguments to [sqlite3_free()] and [sqlite3_realloc()] ** must be either NULL or else pointers obtained from a prior ** invocation of [sqlite3_malloc()] or [sqlite3_realloc()] that have ** not yet been released. ** ** The application must not read or write any part of ** a block of memory after it has been released using ** [sqlite3_free()] or [sqlite3_realloc()]. */ SQLITE_PRIVATE void *sqlite3_malloc(int); SQLITE_PRIVATE void *sqlite3_malloc64(sqlite3_uint64); SQLITE_PRIVATE void *sqlite3_realloc(void*, int); SQLITE_PRIVATE void *sqlite3_realloc64(void*, sqlite3_uint64); SQLITE_PRIVATE void sqlite3_free(void*); /* ** CAPI3REF: Run-Time Limit Categories ** KEYWORDS: {limit category} {*limit categories} ** ** These constants define various performance limits ** that can be lowered at run-time using [sqlite3_limit()]. ** The synopsis of the meanings of the various limits is shown below. ** Additional information is available at [limits | Limits in SQLite]. ** **
** [[SQLITE_LIMIT_LENGTH]] ^(
SQLITE_LIMIT_LENGTH
**
The maximum size of any string or BLOB or table row, in bytes.
)^ ** ** [[SQLITE_LIMIT_SQL_LENGTH]] ^(
SQLITE_LIMIT_SQL_LENGTH
**
The maximum length of an SQL statement, in bytes.
)^ ** ** [[SQLITE_LIMIT_COLUMN]] ^(
SQLITE_LIMIT_COLUMN
**
The maximum number of columns in a table definition or in the ** result set of a [SELECT] or the maximum number of columns in an index ** or in an ORDER BY or GROUP BY clause.
)^ ** ** [[SQLITE_LIMIT_EXPR_DEPTH]] ^(
SQLITE_LIMIT_EXPR_DEPTH
**
The maximum depth of the parse tree on any expression.
)^ ** ** [[SQLITE_LIMIT_COMPOUND_SELECT]] ^(
SQLITE_LIMIT_COMPOUND_SELECT
**
The maximum number of terms in a compound SELECT statement.
)^ ** ** [[SQLITE_LIMIT_VDBE_OP]] ^(
SQLITE_LIMIT_VDBE_OP
**
The maximum number of instructions in a virtual machine program ** used to implement an SQL statement. This limit is not currently ** enforced, though that might be added in some future release of ** SQLite.
)^ ** ** [[SQLITE_LIMIT_FUNCTION_ARG]] ^(
SQLITE_LIMIT_FUNCTION_ARG
**
The maximum number of arguments on a function.
)^ ** ** [[SQLITE_LIMIT_ATTACHED]] ^(
SQLITE_LIMIT_ATTACHED
**
The maximum number of [ATTACH | attached databases].)^
** ** [[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]] ** ^(
SQLITE_LIMIT_LIKE_PATTERN_LENGTH
**
The maximum length of the pattern argument to the [LIKE] or ** [GLOB] operators.
)^ ** ** [[SQLITE_LIMIT_VARIABLE_NUMBER]] ** ^(
SQLITE_LIMIT_VARIABLE_NUMBER
**
The maximum index number of any [parameter] in an SQL statement.)^ ** ** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(
SQLITE_LIMIT_TRIGGER_DEPTH
**
The maximum depth of recursion for triggers.
)^ ** ** [[SQLITE_LIMIT_WORKER_THREADS]] ^(
SQLITE_LIMIT_WORKER_THREADS
**
The maximum number of auxiliary worker threads that a single ** [prepared statement] may start.
)^ **
*/ #define SQLITE_LIMIT_LENGTH 0 #define SQLITE_LIMIT_SQL_LENGTH 1 #define SQLITE_LIMIT_COLUMN 2 #define SQLITE_LIMIT_EXPR_DEPTH 3 #define SQLITE_LIMIT_COMPOUND_SELECT 4 #define SQLITE_LIMIT_VDBE_OP 5 #define SQLITE_LIMIT_FUNCTION_ARG 6 #define SQLITE_LIMIT_ATTACHED 7 #define SQLITE_LIMIT_LIKE_PATTERN_LENGTH 8 #define SQLITE_LIMIT_VARIABLE_NUMBER 9 #define SQLITE_LIMIT_TRIGGER_DEPTH 10 #define SQLITE_LIMIT_WORKER_THREADS 11 /* ** CAPI3REF: Name Of The Folder Holding Temporary Files ** ** ^(If this global variable is made to point to a string which is ** the name of a folder (a.k.a. directory), then all temporary files ** created by SQLite when using a built-in [sqlite3_vfs | VFS] ** will be placed in that directory.)^ ^If this variable ** is a NULL pointer, then SQLite performs a search for an appropriate ** temporary file directory. ** ** Applications are strongly discouraged from using this global variable. ** It is required to set a temporary folder on Windows Runtime (WinRT). ** But for all other platforms, it is highly recommended that applications ** neither read nor write this variable. This global variable is a relic ** that exists for backwards compatibility of legacy applications and should ** be avoided in new projects. ** ** It is not safe to read or modify this variable in more than one ** thread at a time. It is not safe to read or modify this variable ** if a [database connection] is being used at the same time in a separate ** thread. ** It is intended that this variable be set once ** as part of process initialization and before any SQLite interface ** routines have been called and that this variable remain unchanged ** thereafter. ** ** ^The [temp_store_directory pragma] may modify this variable and cause ** it to point to memory obtained from [sqlite3_malloc]. ^Furthermore, ** the [temp_store_directory pragma] always assumes that any string ** that this variable points to is held in memory obtained from ** [sqlite3_malloc] and the pragma may attempt to free that memory ** using [sqlite3_free]. ** Hence, if this variable is modified directly, either it should be ** made NULL or made to point to memory obtained from [sqlite3_malloc] ** or else the use of the [temp_store_directory pragma] should be avoided. ** Except when requested by the [temp_store_directory pragma], SQLite ** does not free the memory that sqlite3_temp_directory points to. If ** the application wants that memory to be freed, it must do ** so itself, taking care to only do so after all [database connection] ** objects have been destroyed. ** ** Note to Windows Runtime users: The temporary directory must be set ** prior to calling [sqlite3_open] or [sqlite3_open_v2]. Otherwise, various ** features that require the use of temporary files may fail. Here is an ** example of how to do this using C++ with the Windows Runtime: ** **
** LPCWSTR zPath = Windows::Storage::ApplicationData::Current->
**       TemporaryFolder->Path->Data();
** char zPathBuf[MAX_PATH + 1];
** memset(zPathBuf, 0, sizeof(zPathBuf));
** WideCharToMultiByte(CP_UTF8, 0, zPath, -1, zPathBuf, sizeof(zPathBuf),
**       NULL, NULL);
** sqlite3_temp_directory = sqlite3_mprintf("%s", zPathBuf);
** 
*/ SQLITE_PRIVATE char *sqlite3_temp_directory; /* ** CAPI3REF: Virtual File System Objects ** ** A virtual filesystem (VFS) is an [sqlite3_vfs] object ** that SQLite uses to interact ** with the underlying operating system. Most SQLite builds come with a ** single default VFS that is appropriate for the host computer. ** New VFSes can be registered and existing VFSes can be unregistered. ** The following interfaces are provided. ** ** ^The sqlite3_vfs_find() interface returns a pointer to a VFS given its name. ** ^Names are case sensitive. ** ^Names are zero-terminated UTF-8 strings. ** ^If there is no match, a NULL pointer is returned. ** ^If zVfsName is NULL then the default VFS is returned. ** ** ^New VFSes are registered with sqlite3_vfs_register(). ** ^Each new VFS becomes the default VFS if the makeDflt flag is set. ** ^The same VFS can be registered multiple times without injury. ** ^To make an existing VFS into the default VFS, register it again ** with the makeDflt flag set. If two different VFSes with the ** same name are registered, the behavior is undefined. If a ** VFS is registered with a name that is NULL or an empty string, ** then the behavior is undefined. ** */ SQLITE_PRIVATE sqlite3_vfs *sqlite3_vfs_find(const char *zVfsName); SQLITE_PRIVATE int sqlite3_vfs_register(sqlite3_vfs*, int makeDflt); /* ** CAPI3REF: Mutexes ** ** The SQLite core uses these routines for thread ** synchronization. Though they are intended for internal ** use by SQLite, code that links against SQLite is ** permitted to use any of these routines. ** ** The SQLite source code contains multiple implementations ** of these mutex routines. An appropriate implementation ** is selected automatically at compile-time. The following ** implementations are available in the SQLite core: ** **
    **
  • SQLITE_MUTEX_PTHREADS **
  • SQLITE_MUTEX_W32 **
  • SQLITE_MUTEX_NOOP **
** ** The SQLITE_MUTEX_NOOP implementation is a set of routines ** that does no real locking and is appropriate for use in ** a single-threaded application. The SQLITE_MUTEX_PTHREADS and ** SQLITE_MUTEX_W32 implementations are appropriate for use on Unix ** and Windows. ** ** If SQLite is compiled with the SQLITE_MUTEX_APPDEF preprocessor ** macro defined (with "-DSQLITE_MUTEX_APPDEF=1"), then no mutex ** implementation is included with the library. In this case the ** application must supply a custom mutex implementation using the ** [SQLITE_CONFIG_MUTEX] option of the sqlite3_config() function ** before calling sqlite3BtreeInitialize() or any other public sqlite3_ ** function that calls sqlite3BtreeInitialize(). ** ** ^The sqlite3_mutex_alloc() routine allocates a new ** mutex and returns a pointer to it. ^The sqlite3_mutex_alloc() ** routine returns NULL if it is unable to allocate the requested ** mutex. The argument to sqlite3_mutex_alloc() must one of these ** integer constants: ** **
    **
  • SQLITE_MUTEX_FAST **
  • SQLITE_MUTEX_RECURSIVE **
  • SQLITE_MUTEX_STATIC_MASTER **
  • SQLITE_MUTEX_STATIC_MEM **
  • SQLITE_MUTEX_STATIC_OPEN **
  • SQLITE_MUTEX_STATIC_PRNG **
  • SQLITE_MUTEX_STATIC_LRU **
  • SQLITE_MUTEX_STATIC_PMEM **
  • SQLITE_MUTEX_STATIC_APP1 **
  • SQLITE_MUTEX_STATIC_APP2 **
  • SQLITE_MUTEX_STATIC_APP3 **
  • SQLITE_MUTEX_STATIC_VFS1 **
  • SQLITE_MUTEX_STATIC_VFS2 **
  • SQLITE_MUTEX_STATIC_VFS3 **
** ** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) ** cause sqlite3_mutex_alloc() to create ** a new mutex. ^The new mutex is recursive when SQLITE_MUTEX_RECURSIVE ** is used but not necessarily so when SQLITE_MUTEX_FAST is used. ** The mutex implementation does not need to make a distinction ** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does ** not want to. SQLite will only request a recursive mutex in ** cases where it really needs one. If a faster non-recursive mutex ** implementation is available on the host platform, the mutex subsystem ** might return such a mutex in response to SQLITE_MUTEX_FAST. ** ** ^The other allowed parameters to sqlite3_mutex_alloc() (anything other ** than SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE) each return ** a pointer to a static preexisting mutex. ^Nine static mutexes are ** used by the current version of SQLite. Future versions of SQLite ** may add additional static mutexes. Static mutexes are for internal ** use by SQLite only. Applications that use SQLite mutexes should ** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or ** SQLITE_MUTEX_RECURSIVE. ** ** ^Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST ** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() ** returns a different mutex on every call. ^For the static ** mutex types, the same mutex is returned on every call that has ** the same type number. ** ** ^The sqlite3_mutex_free() routine deallocates a previously ** allocated dynamic mutex. Attempting to deallocate a static ** mutex results in undefined behavior. ** ** ^The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt ** to enter a mutex. ^If another thread is already within the mutex, ** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return ** SQLITE_BUSY. ^The sqlite3_mutex_try() interface returns [SQLITE_OK] ** upon successful entry. ^(Mutexes created using ** SQLITE_MUTEX_RECURSIVE can be entered multiple times by the same thread. ** In such cases, the ** mutex must be exited an equal number of times before another thread ** can enter.)^ If the same thread tries to enter any mutex other ** than an SQLITE_MUTEX_RECURSIVE more than once, the behavior is undefined. ** ** ^(Some systems (for example, Windows 95) do not support the operation ** implemented by sqlite3_mutex_try(). On those systems, sqlite3_mutex_try() ** will always return SQLITE_BUSY. The SQLite core only ever uses ** sqlite3_mutex_try() as an optimization so this is acceptable ** behavior.)^ ** ** ^The sqlite3_mutex_leave() routine exits a mutex that was ** previously entered by the same thread. The behavior ** is undefined if the mutex is not currently entered by the ** calling thread or is not currently allocated. ** ** ^If the argument to sqlite3_mutex_enter(), sqlite3_mutex_try(), or ** sqlite3_mutex_leave() is a NULL pointer, then all three routines ** behave as no-ops. ** ** See also: [sqlite3_mutex_held()] and [sqlite3_mutex_notheld()]. */ SQLITE_PRIVATE sqlite3_mutex *sqlite3_mutex_alloc(int); SQLITE_PRIVATE void sqlite3_mutex_free(sqlite3_mutex*); SQLITE_PRIVATE void sqlite3_mutex_enter(sqlite3_mutex*); SQLITE_PRIVATE int sqlite3_mutex_try(sqlite3_mutex*); SQLITE_PRIVATE void sqlite3_mutex_leave(sqlite3_mutex*); /* ** CAPI3REF: Mutex Methods Object ** ** An instance of this structure defines the low-level routines ** used to allocate and use mutexes. ** ** Usually, the default mutex implementations provided by SQLite are ** sufficient, however the application has the option of substituting a custom ** implementation for specialized deployments or systems for which SQLite ** does not provide a suitable implementation. In this case, the application ** creates and populates an instance of this structure to pass ** to sqlite3_config() along with the [SQLITE_CONFIG_MUTEX] option. ** Additionally, an instance of this structure can be used as an ** output variable when querying the system for the current mutex ** implementation, using the [SQLITE_CONFIG_GETMUTEX] option. ** ** ^The xMutexInit method defined by this structure is invoked as ** part of system initialization by the sqlite3BtreeInitialize() function. ** ^The xMutexInit routine is called by SQLite exactly once for each ** effective call to [sqlite3BtreeInitialize()]. ** ** ^The xMutexEnd method defined by this structure is invoked as ** part of system shutdown by the sqlite3BtreeShutdown() function. The ** implementation of this method is expected to release all outstanding ** resources obtained by the mutex methods implementation, especially ** those obtained by the xMutexInit method. ^The xMutexEnd() ** interface is invoked exactly once for each call to [sqlite3BtreeShutdown()]. ** ** ^(The remaining seven methods defined by this structure (xMutexAlloc, ** xMutexFree, xMutexEnter, xMutexTry, xMutexLeave, xMutexHeld and ** xMutexNotheld) implement the following interfaces (respectively): ** **
    **
  • [sqlite3_mutex_alloc()]
  • **
  • [sqlite3_mutex_free()]
  • **
  • [sqlite3_mutex_enter()]
  • **
  • [sqlite3_mutex_try()]
  • **
  • [sqlite3_mutex_leave()]
  • **
  • [sqlite3_mutex_held()]
  • **
  • [sqlite3_mutex_notheld()]
  • **
)^ ** ** The only difference is that the public sqlite3_XXX functions enumerated ** above silently ignore any invocations that pass a NULL pointer instead ** of a valid mutex handle. The implementations of the methods defined ** by this structure are not required to handle this case, the results ** of passing a NULL pointer instead of a valid mutex handle are undefined ** (i.e. it is acceptable to provide an implementation that segfaults if ** it is passed a NULL pointer). ** ** The xMutexInit() method must be threadsafe. It must be harmless to ** invoke xMutexInit() multiple times within the same process and without ** intervening calls to xMutexEnd(). Second and subsequent calls to ** xMutexInit() must be no-ops. ** ** xMutexInit() must not use SQLite memory allocation ([sqlite3_malloc()] ** and its associates). Similarly, xMutexAlloc() must not use SQLite memory ** allocation for a static mutex. ^However xMutexAlloc() may use SQLite ** memory allocation for a fast or recursive mutex. ** ** ^SQLite will invoke the xMutexEnd() method when [sqlite3BtreeShutdown()] is ** called, but only if the prior call to xMutexInit returned SQLITE_OK. ** If xMutexInit fails in any way, it is expected to clean up after itself ** prior to returning. */ typedef struct sqlite3_mutex_methods sqlite3_mutex_methods; struct sqlite3_mutex_methods { int (*xMutexInit)(void); int (*xMutexEnd)(void); sqlite3_mutex *(*xMutexAlloc)(int); void (*xMutexFree)(sqlite3_mutex *); void (*xMutexEnter)(sqlite3_mutex *); int (*xMutexTry)(sqlite3_mutex *); void (*xMutexLeave)(sqlite3_mutex *); int (*xMutexHeld)(sqlite3_mutex *); int (*xMutexNotheld)(sqlite3_mutex *); }; /* ** CAPI3REF: Mutex Types ** ** The [sqlite3_mutex_alloc()] interface takes a single argument ** which is one of these integer constants. ** ** The set of static mutexes may change from one SQLite release to the ** next. Applications that override the built-in mutex logic must be ** prepared to accommodate additional static mutexes. */ #define SQLITE_MUTEX_FAST 0 #define SQLITE_MUTEX_RECURSIVE 1 #define SQLITE_MUTEX_STATIC_MASTER 2 #define SQLITE_MUTEX_STATIC_MEM 3 /* sqlite3_malloc() */ #define SQLITE_MUTEX_STATIC_MEM2 4 /* NOT USED */ #define SQLITE_MUTEX_STATIC_OPEN 4 /* sqlite3BtreeOpen() */ #define SQLITE_MUTEX_STATIC_PRNG 5 /* sqlite3_random() */ #define SQLITE_MUTEX_STATIC_LRU 6 /* lru page list */ #define SQLITE_MUTEX_STATIC_LRU2 7 /* NOT USED */ #define SQLITE_MUTEX_STATIC_PMEM 7 /* sqlite3PageMalloc() */ #define SQLITE_MUTEX_STATIC_APP1 8 /* For use by application */ #define SQLITE_MUTEX_STATIC_APP2 9 /* For use by application */ #define SQLITE_MUTEX_STATIC_APP3 10 /* For use by application */ #define SQLITE_MUTEX_STATIC_VFS1 11 /* For use by built-in VFS */ #define SQLITE_MUTEX_STATIC_VFS2 12 /* For use by extension VFS */ #define SQLITE_MUTEX_STATIC_VFS3 13 /* For use by application VFS */ /* ** CAPI3REF: Status Parameters ** KEYWORDS: {status parameters} ** ** These integer constants designate various run-time status parameters ** that can be returned by [sqlite3_status()]. ** **
** [[SQLITE_STATUS_MEMORY_USED]] ^(
SQLITE_STATUS_MEMORY_USED
**
This parameter is the current amount of memory checked out ** using [sqlite3_malloc()], either directly or indirectly. The ** figure includes calls made to [sqlite3_malloc()] by the application ** and internal memory usage by the SQLite library. Scratch memory ** controlled by [SQLITE_CONFIG_SCRATCH] and auxiliary page-cache ** memory controlled by [SQLITE_CONFIG_PAGECACHE] is not included in ** this parameter. The amount returned is the sum of the allocation ** sizes as reported by the xSize method in [sqlite3_mem_methods].
)^ ** ** [[SQLITE_STATUS_MALLOC_SIZE]] ^(
SQLITE_STATUS_MALLOC_SIZE
**
This parameter records the largest memory allocation request ** handed to [sqlite3_malloc()] or [sqlite3_realloc()] (or their ** internal equivalents). Only the value returned in the ** *pHighwater parameter to [sqlite3_status()] is of interest. ** The value written into the *pCurrent parameter is undefined.
)^ ** ** [[SQLITE_STATUS_MALLOC_COUNT]] ^(
SQLITE_STATUS_MALLOC_COUNT
**
This parameter records the number of separate memory allocations ** currently checked out.
)^ ** ** [[SQLITE_STATUS_PAGECACHE_USED]] ^(
SQLITE_STATUS_PAGECACHE_USED
**
This parameter returns the number of pages used out of the ** [pagecache memory allocator] that was configured using ** [SQLITE_CONFIG_PAGECACHE]. The ** value returned is in pages, not in bytes.
)^ ** ** [[SQLITE_STATUS_PAGECACHE_OVERFLOW]] ** ^(
SQLITE_STATUS_PAGECACHE_OVERFLOW
**
This parameter returns the number of bytes of page cache ** allocation which could not be satisfied by the [SQLITE_CONFIG_PAGECACHE] ** buffer and where forced to overflow to [sqlite3_malloc()]. The ** returned value includes allocations that overflowed because they ** where too large (they were larger than the "sz" parameter to ** [SQLITE_CONFIG_PAGECACHE]) and allocations that overflowed because ** no space was left in the page cache.
)^ ** ** [[SQLITE_STATUS_PAGECACHE_SIZE]] ^(
SQLITE_STATUS_PAGECACHE_SIZE
**
This parameter records the largest memory allocation request ** handed to [pagecache memory allocator]. Only the value returned in the ** *pHighwater parameter to [sqlite3_status()] is of interest. ** The value written into the *pCurrent parameter is undefined.
)^ ** ** [[SQLITE_STATUS_SCRATCH_USED]] ^(
SQLITE_STATUS_SCRATCH_USED
**
This parameter returns the number of allocations used out of the ** [scratch memory allocator] configured using ** [SQLITE_CONFIG_SCRATCH]. The value returned is in allocations, not ** in bytes. Since a single thread may only have one scratch allocation ** outstanding at time, this parameter also reports the number of threads ** using scratch memory at the same time.
)^ ** ** [[SQLITE_STATUS_SCRATCH_OVERFLOW]] ^(
SQLITE_STATUS_SCRATCH_OVERFLOW
**
This parameter returns the number of bytes of scratch memory ** allocation which could not be satisfied by the [SQLITE_CONFIG_SCRATCH] ** buffer and where forced to overflow to [sqlite3_malloc()]. The values ** returned include overflows because the requested allocation was too ** larger (that is, because the requested allocation was larger than the ** "sz" parameter to [SQLITE_CONFIG_SCRATCH]) and because no scratch buffer ** slots were available. **
)^ ** ** [[SQLITE_STATUS_SCRATCH_SIZE]] ^(
SQLITE_STATUS_SCRATCH_SIZE
**
This parameter records the largest memory allocation request ** handed to [scratch memory allocator]. Only the value returned in the ** *pHighwater parameter to [sqlite3_status()] is of interest. ** The value written into the *pCurrent parameter is undefined.
)^ ** ** [[SQLITE_STATUS_PARSER_STACK]] ^(
SQLITE_STATUS_PARSER_STACK
**
This parameter records the deepest parser stack. It is only ** meaningful if SQLite is compiled with [YYTRACKMAXSTACKDEPTH].
)^ **
** ** New status parameters may be added from time to time. */ #define SQLITE_STATUS_MEMORY_USED 0 #define SQLITE_STATUS_PAGECACHE_USED 1 #define SQLITE_STATUS_PAGECACHE_OVERFLOW 2 #define SQLITE_STATUS_SCRATCH_USED 3 #define SQLITE_STATUS_SCRATCH_OVERFLOW 4 #define SQLITE_STATUS_MALLOC_SIZE 5 #define SQLITE_STATUS_PARSER_STACK 6 #define SQLITE_STATUS_PAGECACHE_SIZE 7 #define SQLITE_STATUS_SCRATCH_SIZE 8 #define SQLITE_STATUS_MALLOC_COUNT 9 /* ** CAPI3REF: Status Parameters for database connections ** KEYWORDS: {SQLITE_DBSTATUS options} ** ** These constants are the available integer "verbs" that can be passed as ** the second argument to the [sqlite3_db_status()] interface. ** ** New verbs may be added in future releases of SQLite. Existing verbs ** might be discontinued. Applications should check the return code from ** [sqlite3_db_status()] to make sure that the call worked. ** The [sqlite3_db_status()] interface will return a non-zero error code ** if a discontinued or unsupported verb is invoked. ** **
** [[SQLITE_DBSTATUS_LOOKASIDE_USED]] ^(
SQLITE_DBSTATUS_LOOKASIDE_USED
**
This parameter returns the number of lookaside memory slots currently ** checked out.
)^ ** ** [[SQLITE_DBSTATUS_LOOKASIDE_HIT]] ^(
SQLITE_DBSTATUS_LOOKASIDE_HIT
**
This parameter returns the number malloc attempts that were ** satisfied using lookaside memory. Only the high-water value is meaningful; ** the current value is always zero.)^ ** ** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE]] ** ^(
SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE
**
This parameter returns the number malloc attempts that might have ** been satisfied using lookaside memory but failed due to the amount of ** memory requested being larger than the lookaside slot size. ** Only the high-water value is meaningful; ** the current value is always zero.)^ ** ** [[SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL]] ** ^(
SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL
**
This parameter returns the number malloc attempts that might have ** been satisfied using lookaside memory but failed due to all lookaside ** memory already being in use. ** Only the high-water value is meaningful; ** the current value is always zero.)^ ** ** [[SQLITE_DBSTATUS_CACHE_USED]] ^(
SQLITE_DBSTATUS_CACHE_USED
**
This parameter returns the approximate number of bytes of heap ** memory used by all pager caches associated with the database connection.)^ ** ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_USED is always 0. ** ** [[SQLITE_DBSTATUS_SCHEMA_USED]] ^(
SQLITE_DBSTATUS_SCHEMA_USED
**
This parameter returns the approximate number of bytes of heap ** memory used to store the schema for all databases associated ** with the connection - main, temp, and any [ATTACH]-ed databases.)^ ** ^The full amount of memory used by the schemas is reported, even if the ** schema memory is shared with other database connections due to ** [shared cache mode] being enabled. ** ^The highwater mark associated with SQLITE_DBSTATUS_SCHEMA_USED is always 0. ** ** [[SQLITE_DBSTATUS_STMT_USED]] ^(
SQLITE_DBSTATUS_STMT_USED
**
This parameter returns the approximate number of bytes of heap ** and lookaside memory used by all prepared statements associated with ** the database connection.)^ ** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0. **
** ** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(
SQLITE_DBSTATUS_CACHE_HIT
**
This parameter returns the number of pager cache hits that have ** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT ** is always 0. **
** ** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(
SQLITE_DBSTATUS_CACHE_MISS
**
This parameter returns the number of pager cache misses that have ** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS ** is always 0. **
** ** [[SQLITE_DBSTATUS_CACHE_WRITE]] ^(
SQLITE_DBSTATUS_CACHE_WRITE
**
This parameter returns the number of dirty cache entries that have ** been written to disk. Specifically, the number of pages written to the ** wal file in wal mode databases, or the number of pages written to the ** database file in rollback mode databases. Any pages written as part of ** transaction rollback or database recovery operations are not included. ** If an IO or other error occurs while writing a page to disk, the effect ** on subsequent SQLITE_DBSTATUS_CACHE_WRITE requests is undefined.)^ ^The ** highwater mark associated with SQLITE_DBSTATUS_CACHE_WRITE is always 0. **
** ** [[SQLITE_DBSTATUS_DEFERRED_FKS]] ^(
SQLITE_DBSTATUS_DEFERRED_FKS
**
This parameter returns zero for the current value if and only if ** all foreign key constraints (deferred or immediate) have been ** resolved.)^ ^The highwater mark is always 0. **
**
*/ #define SQLITE_DBSTATUS_LOOKASIDE_USED 0 #define SQLITE_DBSTATUS_CACHE_USED 1 #define SQLITE_DBSTATUS_SCHEMA_USED 2 #define SQLITE_DBSTATUS_STMT_USED 3 #define SQLITE_DBSTATUS_LOOKASIDE_HIT 4 #define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE 5 #define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL 6 #define SQLITE_DBSTATUS_CACHE_HIT 7 #define SQLITE_DBSTATUS_CACHE_MISS 8 #define SQLITE_DBSTATUS_CACHE_WRITE 9 #define SQLITE_DBSTATUS_DEFERRED_FKS 10 #define SQLITE_DBSTATUS_MAX 10 /* Largest defined DBSTATUS */ /* ** CAPI3REF: Checkpoint Mode Values ** KEYWORDS: {checkpoint mode} ** ** These constants define all valid values for the "checkpoint mode" passed ** as the third parameter to the [sqlite3_wal_checkpoint_v2()] interface. ** See the [sqlite3_wal_checkpoint_v2()] documentation for details on the ** meaning of each of these checkpoint modes. */ #define SQLITE_CHECKPOINT_PASSIVE 0 /* Do as much as possible w/o blocking */ #define SQLITE_CHECKPOINT_FULL 1 /* Wait for writers, then checkpoint */ #define SQLITE_CHECKPOINT_RESTART 2 /* Like FULL but wait for for readers */ #define SQLITE_CHECKPOINT_TRUNCATE 3 /* Like RESTART but also truncate WAL */ /* ** CAPI3REF: Custom Page Cache Object ** ** The sqlite3_pcache type is opaque. It is implemented by ** the pluggable module. The SQLite core has no knowledge of ** its size or internal structure and never deals with the ** sqlite3_pcache object except by holding and passing pointers ** to the object. ** ** See [sqlite3_pcache_methods2] for additional information. */ typedef struct sqlite3_pcache sqlite3_pcache; /* ** CAPI3REF: Custom Page Cache Object ** ** The sqlite3_pcache_page object represents a single page in the ** page cache. The page cache will allocate instances of this ** object. Various methods of the page cache use pointers to instances ** of this object as parameters or as their return value. ** ** See [sqlite3_pcache_methods2] for additional information. */ typedef struct sqlite3_pcache_page sqlite3_pcache_page; struct sqlite3_pcache_page { void *pBuf; /* The content of the page */ void *pExtra; /* Extra information associated with the page */ }; /* ** CAPI3REF: Application Defined Page Cache. ** KEYWORDS: {page cache} ** ** ^(The [sqlite3_config]([SQLITE_CONFIG_PCACHE2], ...) interface can ** register an alternative page cache implementation by passing in an ** instance of the sqlite3_pcache_methods2 structure.)^ ** In many applications, most of the heap memory allocated by ** SQLite is used for the page cache. ** By implementing a ** custom page cache using this API, an application can better control ** the amount of memory consumed by SQLite, the way in which ** that memory is allocated and released, and the policies used to ** determine exactly which parts of a database file are cached and for ** how long. ** ** The alternative page cache mechanism is an ** extreme measure that is only needed by the most demanding applications. ** The built-in page cache is recommended for most uses. ** ** ^(The contents of the sqlite3_pcache_methods2 structure are copied to an ** internal buffer by SQLite within the call to [sqlite3_config]. Hence ** the application may discard the parameter after the call to ** [sqlite3_config()] returns.)^ ** ** [[the xInit() page cache method]] ** ^(The xInit() method is called once for each effective ** call to [sqlite3BtreeInitialize()])^ ** (usually only once during the lifetime of the process). ^(The xInit() ** method is passed a copy of the sqlite3_pcache_methods2.pArg value.)^ ** The intent of the xInit() method is to set up global data structures ** required by the custom page cache implementation. ** ^(If the xInit() method is NULL, then the ** built-in default page cache is used instead of the application defined ** page cache.)^ ** ** [[the xShutdown() page cache method]] ** ^The xShutdown() method is called by [sqlite3BtreeShutdown()]. ** It can be used to clean up ** any outstanding resources before process shutdown, if required. ** ^The xShutdown() method may be NULL. ** ** ^SQLite automatically serializes calls to the xInit method, ** so the xInit method need not be threadsafe. ^The ** xShutdown method is only called from [sqlite3BtreeShutdown()] so it does ** not need to be threadsafe either. All other methods must be threadsafe ** in multithreaded applications. ** ** ^SQLite will never invoke xInit() more than once without an intervening ** call to xShutdown(). ** ** [[the xCreate() page cache methods]] ** ^SQLite invokes the xCreate() method to construct a new cache instance. ** SQLite will typically create one cache instance for each open database file, ** though this is not guaranteed. ^The ** first parameter, szPage, is the size in bytes of the pages that must ** be allocated by the cache. ^szPage will always a power of two. ^The ** second parameter szExtra is a number of bytes of extra storage ** associated with each page cache entry. ^The szExtra parameter will ** a number less than 250. SQLite will use the ** extra szExtra bytes on each page to store metadata about the underlying ** database page on disk. The value passed into szExtra depends ** on the SQLite version, the target platform, and how SQLite was compiled. ** ^The third argument to xCreate(), bPurgeable, is true if the cache being ** created will be used to cache database pages of a file stored on disk, or ** false if it is used for an in-memory database. The cache implementation ** does not have to do anything special based with the value of bPurgeable; ** it is purely advisory. ^On a cache where bPurgeable is false, SQLite will ** never invoke xUnpin() except to deliberately delete a page. ** ^In other words, calls to xUnpin() on a cache with bPurgeable set to ** false will always have the "discard" flag set to true. ** ^Hence, a cache created with bPurgeable false will ** never contain any unpinned pages. ** ** [[the xCachesize() page cache method]] ** ^(The xCachesize() method may be called at any time by SQLite to set the ** suggested maximum cache-size (number of pages stored by) the cache ** instance passed as the first argument. This is the value configured using ** the SQLite "[PRAGMA cache_size]" command.)^ As with the bPurgeable ** parameter, the implementation is not required to do anything with this ** value; it is advisory only. ** ** [[the xPagecount() page cache methods]] ** The xPagecount() method must return the number of pages currently ** stored in the cache, both pinned and unpinned. ** ** [[the xFetch() page cache methods]] ** The xFetch() method locates a page in the cache and returns a pointer to ** an sqlite3_pcache_page object associated with that page, or a NULL pointer. ** The pBuf element of the returned sqlite3_pcache_page object will be a ** pointer to a buffer of szPage bytes used to store the content of a ** single database page. The pExtra element of sqlite3_pcache_page will be ** a pointer to the szExtra bytes of extra storage that SQLite has requested ** for each entry in the page cache. ** ** The page to be fetched is determined by the key. ^The minimum key value ** is 1. After it has been retrieved using xFetch, the page is considered ** to be "pinned". ** ** If the requested page is already in the page cache, then the page cache ** implementation must return a pointer to the page buffer with its content ** intact. If the requested page is not already in the cache, then the ** cache implementation should use the value of the createFlag ** parameter to help it determined what action to take: ** ** **
createFlag Behavior when page is not already in cache **
0 Do not allocate a new page. Return NULL. **
1 Allocate a new page if it easy and convenient to do so. ** Otherwise return NULL. **
2 Make every effort to allocate a new page. Only return ** NULL if allocating a new page is effectively impossible. **
** ** ^(SQLite will normally invoke xFetch() with a createFlag of 0 or 1. SQLite ** will only use a createFlag of 2 after a prior call with a createFlag of 1 ** failed.)^ In between the to xFetch() calls, SQLite may ** attempt to unpin one or more cache pages by spilling the content of ** pinned pages to disk and synching the operating system disk cache. ** ** [[the xUnpin() page cache method]] ** ^xUnpin() is called by SQLite with a pointer to a currently pinned page ** as its second argument. If the third parameter, discard, is non-zero, ** then the page must be evicted from the cache. ** ^If the discard parameter is ** zero, then the page may be discarded or retained at the discretion of ** page cache implementation. ^The page cache implementation ** may choose to evict unpinned pages at any time. ** ** The cache must not perform any reference counting. A single ** call to xUnpin() unpins the page regardless of the number of prior calls ** to xFetch(). ** ** [[the xRekey() page cache methods]] ** The xRekey() method is used to change the key value associated with the ** page passed as the second argument. If the cache ** previously contains an entry associated with newKey, it must be ** discarded. ^Any prior cache entry associated with newKey is guaranteed not ** to be pinned. ** ** When SQLite calls the xTruncate() method, the cache must discard all ** existing cache entries with page numbers (keys) greater than or equal ** to the value of the iLimit parameter passed to xTruncate(). If any ** of these pages are pinned, they are implicitly unpinned, meaning that ** they can be safely discarded. ** ** [[the xDestroy() page cache method]] ** ^The xDestroy() method is used to delete a cache allocated by xCreate(). ** All resources associated with the specified cache should be freed. ^After ** calling the xDestroy() method, SQLite considers the [sqlite3_pcache*] ** handle invalid, and will not use it with any other sqlite3_pcache_methods2 ** functions. ** ** [[the xShrink() page cache method]] ** ^SQLite invokes the xShrink() method when it wants the page cache to ** free up as much of heap memory as possible. The page cache implementation ** is not obligated to free any memory, but well-behaved implementations should ** do their best. */ typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2; struct sqlite3_pcache_methods2 { int iVersion; void *pArg; int (*xInit)(void*); void (*xShutdown)(void*); sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable); void (*xCachesize)(sqlite3_pcache*, int nCachesize); int (*xPagecount)(sqlite3_pcache*); sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag); void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard); void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, unsigned oldKey, unsigned newKey); void (*xTruncate)(sqlite3_pcache*, unsigned iLimit); void (*xDestroy)(sqlite3_pcache*); void (*xShrink)(sqlite3_pcache*); }; /* ** CAPI3REF: Online Backup Object ** ** The sqlite3_backup object records state information about an ongoing ** online backup operation. ^The sqlite3_backup object is created by ** a call to [sqlite3_backup_init()] and is destroyed by a call to ** [sqlite3_backup_finish()]. ** ** See Also: [Using the SQLite Online Backup API] */ typedef struct sqlite3_backup sqlite3_backup; /* ** CAPI3REF: Error Logging Interface ** ** ^The [sqlite3_log()] interface writes a message into the [error log] ** established by the [SQLITE_CONFIG_LOG] option to [sqlite3_config()]. ** ^If logging is enabled, the zFormat string and subsequent arguments are ** used with [sqlite3_snprintf()] to generate the final output string. ** ** The sqlite3_log() interface is intended for use by extensions such as ** virtual tables, collating functions, and SQL functions. While there is ** nothing to prevent an application from calling sqlite3_log(), doing so ** is considered bad form. ** ** The zFormat string must not be NULL. ** ** To avoid deadlocks and other threading problems, the sqlite3_log() routine ** will not use dynamically allocated memory. The log message is stored in ** a fixed-length buffer on the stack. If the log message is longer than ** a few hundred characters, it will be truncated to the length of the ** buffer. */ SQLITE_PRIVATE void sqlite3_log(int iErrCode, const char *zFormat, ...); /* ** CAPI3REF: Text Encodings ** ** These constant define integer codes that represent the various ** text encodings supported by SQLite. */ #define SQLITE_UTF8 1 /* IMP: R-37514-35566 */ #define SQLITE_UTF16LE 2 /* IMP: R-03371-37637 */ #define SQLITE_UTF16BE 3 /* IMP: R-51971-34154 */ #define SQLITE_UTF16 4 /* Use native byte order */ #define SQLITE_ANY 5 /* Deprecated */ #define SQLITE_UTF16_ALIGNED 8 /* sqlite3_create_collation only */ /* ** Include the configuration header output by 'configure' if we're using the ** autoconf-based build */ #ifdef _HAVE_SQLITE_CONFIG_H #include "config.h" #endif /************** Include sqliteLimit.h in the middle of sqliteInt.h ***********/ /************** Begin file sqliteLimit.h *************************************/ /* ** 2007 May 7 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file defines various limits of what SQLite can process. */ /* ** The maximum length of a TEXT or BLOB in bytes. This also ** limits the size of a row in a table or index. ** ** The hard limit is the ability of a 32-bit signed integer ** to count the size: 2^31-1 or 2147483647. */ #ifndef SQLITE_MAX_LENGTH # define SQLITE_MAX_LENGTH 1000000000 #endif /* ** This is the maximum number of ** ** * Columns in a table ** * Columns in an index ** * Columns in a view ** * Terms in the SET clause of an UPDATE statement ** * Terms in the result set of a SELECT statement ** * Terms in the GROUP BY or ORDER BY clauses of a SELECT statement. ** * Terms in the VALUES clause of an INSERT statement ** ** The hard upper limit here is 32676. Most database people will ** tell you that in a well-normalized database, you usually should ** not have more than a dozen or so columns in any table. And if ** that is the case, there is no point in having more than a few ** dozen values in any of the other situations described above. */ #ifndef SQLITE_MAX_COLUMN # define SQLITE_MAX_COLUMN 2000 #endif /* ** The maximum length of a single SQL statement in bytes. ** ** It used to be the case that setting this value to zero would ** turn the limit off. That is no longer true. It is not possible ** to turn this limit off. */ #ifndef SQLITE_MAX_SQL_LENGTH # define SQLITE_MAX_SQL_LENGTH 1000000000 #endif /* ** The maximum depth of an expression tree. This is limited to ** some extent by SQLITE_MAX_SQL_LENGTH. But sometime you might ** want to place more severe limits on the complexity of an ** expression. ** ** A value of 0 used to mean that the limit was not enforced. ** But that is no longer true. The limit is now strictly enforced ** at all times. */ #ifndef SQLITE_MAX_EXPR_DEPTH # define SQLITE_MAX_EXPR_DEPTH 1000 #endif /* ** The maximum number of terms in a compound SELECT statement. ** The code generator for compound SELECT statements does one ** level of recursion for each term. A stack overflow can result ** if the number of terms is too large. In practice, most SQL ** never has more than 3 or 4 terms. Use a value of 0 to disable ** any limit on the number of terms in a compount SELECT. */ #ifndef SQLITE_MAX_COMPOUND_SELECT # define SQLITE_MAX_COMPOUND_SELECT 500 #endif /* ** The maximum number of opcodes in a VDBE program. ** Not currently enforced. */ #ifndef SQLITE_MAX_VDBE_OP # define SQLITE_MAX_VDBE_OP 25000 #endif /* ** The maximum number of arguments to an SQL function. */ #ifndef SQLITE_MAX_FUNCTION_ARG # define SQLITE_MAX_FUNCTION_ARG 127 #endif /* ** The suggested maximum number of in-memory pages to use for ** the main database table and for temporary tables. ** ** IMPLEMENTATION-OF: R-31093-59126 The default suggested cache size ** is 2000 pages. ** IMPLEMENTATION-OF: R-48205-43578 The default suggested cache size can be ** altered using the SQLITE_DEFAULT_CACHE_SIZE compile-time options. */ #ifndef SQLITE_DEFAULT_CACHE_SIZE # define SQLITE_DEFAULT_CACHE_SIZE 2000 #endif /* ** The default number of frames to accumulate in the log file before ** checkpointing the database in WAL mode. */ #ifndef SQLITE_DEFAULT_WAL_AUTOCHECKPOINT # define SQLITE_DEFAULT_WAL_AUTOCHECKPOINT 1000 #endif /* ** The maximum number of attached databases. This must be between 0 ** and 62. The upper bound on 62 is because a 64-bit integer bitmap ** is used internally to track attached databases. */ #ifndef SQLITE_MAX_ATTACHED # define SQLITE_MAX_ATTACHED 10 #endif /* ** The maximum value of a ?nnn wildcard that the parser will accept. */ #ifndef SQLITE_MAX_VARIABLE_NUMBER # define SQLITE_MAX_VARIABLE_NUMBER 999 #endif /* Maximum page size. The upper bound on this value is 65536. This a limit ** imposed by the use of 16-bit offsets within each page. ** ** Earlier versions of SQLite allowed the user to change this value at ** compile time. This is no longer permitted, on the grounds that it creates ** a library that is technically incompatible with an SQLite library ** compiled with a different limit. If a process operating on a database ** with a page-size of 65536 bytes crashes, then an instance of SQLite ** compiled with the default page-size limit will not be able to rollback ** the aborted transaction. This could lead to database corruption. */ #ifdef SQLITE_MAX_PAGE_SIZE # undef SQLITE_MAX_PAGE_SIZE #endif #define SQLITE_MAX_PAGE_SIZE 65536 /* ** The default size of a database page. */ #ifndef SQLITE_DEFAULT_PAGE_SIZE # define SQLITE_DEFAULT_PAGE_SIZE 1024 #endif #if SQLITE_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE # undef SQLITE_DEFAULT_PAGE_SIZE # define SQLITE_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE #endif /* ** Ordinarily, if no value is explicitly provided, SQLite creates databases ** with page size SQLITE_DEFAULT_PAGE_SIZE. However, based on certain ** device characteristics (sector-size and atomic write() support), ** SQLite may choose a larger value. This constant is the maximum value ** SQLite will choose on its own. */ #ifndef SQLITE_MAX_DEFAULT_PAGE_SIZE # define SQLITE_MAX_DEFAULT_PAGE_SIZE 8192 #endif #if SQLITE_MAX_DEFAULT_PAGE_SIZE>SQLITE_MAX_PAGE_SIZE # undef SQLITE_MAX_DEFAULT_PAGE_SIZE # define SQLITE_MAX_DEFAULT_PAGE_SIZE SQLITE_MAX_PAGE_SIZE #endif /* ** Maximum number of pages in one database file. ** ** This is really just the default value for the max_page_count pragma. ** This value can be lowered (or raised) at run-time using that the ** max_page_count macro. */ #ifndef SQLITE_MAX_PAGE_COUNT # define SQLITE_MAX_PAGE_COUNT 1073741823 #endif /* ** Maximum length (in bytes) of the pattern in a LIKE or GLOB ** operator. */ #ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH # define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 #endif /* ** Maximum depth of recursion for triggers. ** ** A value of 1 means that a trigger program will not be able to itself ** fire any triggers. A value of 0 means that no trigger programs at all ** may be executed. */ #ifndef SQLITE_MAX_TRIGGER_DEPTH # define SQLITE_MAX_TRIGGER_DEPTH 1000 #endif /************** End of sqliteLimit.h *****************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /* Disable nuisance warnings on Borland compilers */ #if defined(__BORLANDC__) #pragma warn -rch /* unreachable code */ #pragma warn -ccc /* Condition is always true or false */ #pragma warn -aus /* Assigned value is never used */ #pragma warn -csu /* Comparing signed and unsigned */ #pragma warn -spa /* Suspicious pointer arithmetic */ #endif /* ** Include standard header files as necessary */ #ifdef HAVE_STDINT_H #include #endif #ifdef HAVE_INTTYPES_H #include #endif /* ** The following macros are used to cast pointers to integers and ** integers to pointers. The way you do this varies from one compiler ** to the next, so we have developed the following set of #if statements ** to generate appropriate macros for a wide range of compilers. ** ** The correct "ANSI" way to do this is to use the intptr_t type. ** Unfortunately, that typedef is not available on all compilers, or ** if it is available, it requires an #include of specific headers ** that vary from one machine to the next. ** ** Ticket #3860: The llvm-gcc-4.2 compiler from Apple chokes on ** the ((void*)&((char*)0)[X]) construct. But MSVC chokes on ((void*)(X)). ** So we have to define the macros in different ways depending on the ** compiler. */ #if defined(__PTRDIFF_TYPE__) /* This case should work for GCC */ # define SQLITE_INT_TO_PTR(X) ((void*)(__PTRDIFF_TYPE__)(X)) # define SQLITE_PTR_TO_INT(X) ((int)(__PTRDIFF_TYPE__)(X)) #elif !defined(__GNUC__) /* Works for compilers other than LLVM */ # define SQLITE_INT_TO_PTR(X) ((void*)&((char*)0)[X]) # define SQLITE_PTR_TO_INT(X) ((int)(((char*)X)-(char*)0)) #elif defined(HAVE_STDINT_H) /* Use this case if we have ANSI headers */ # define SQLITE_INT_TO_PTR(X) ((void*)(intptr_t)(X)) # define SQLITE_PTR_TO_INT(X) ((int)(intptr_t)(X)) #else /* Generates a warning - but it always works */ # define SQLITE_INT_TO_PTR(X) ((void*)(X)) # define SQLITE_PTR_TO_INT(X) ((int)(X)) #endif /* ** A macro to hint to the compiler that a function should not be ** inlined. */ #if defined(__GNUC__) # define SQLITE_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) && _MSC_VER>=1310 # define SQLITE_NOINLINE __declspec(noinline) #else # define SQLITE_NOINLINE #endif /* ** Make sure that the compiler intrinsics we desire are enabled when ** compiling with an appropriate version of MSVC unless prevented by ** the SQLITE_DISABLE_INTRINSIC define. */ #if !defined(SQLITE_DISABLE_INTRINSIC) # if defined(_MSC_VER) && _MSC_VER>=1300 # if !defined(_WIN32_WCE) # include # pragma intrinsic(_byteswap_ushort) # pragma intrinsic(_byteswap_ulong) # pragma intrinsic(_ReadWriteBarrier) # else # include # endif # endif #endif /* ** Powersafe overwrite is on by default. But can be turned off using ** the -DSQLITE_POWERSAFE_OVERWRITE=0 command-line option. */ #ifndef SQLITE_POWERSAFE_OVERWRITE # define SQLITE_POWERSAFE_OVERWRITE 1 #endif /* ** EVIDENCE-OF: R-25715-37072 Memory allocation statistics are enabled by ** default unless SQLite is compiled with SQLITE_DEFAULT_MEMSTATUS=0 in ** which case memory allocation statistics are disabled by default. */ #if !defined(SQLITE_DEFAULT_MEMSTATUS) # define SQLITE_DEFAULT_MEMSTATUS 1 #endif /* ** Exactly one of the following macros must be defined in order to ** specify which memory allocation subsystem to use. ** ** SQLITE_SYSTEM_MALLOC // Use normal system malloc() ** SQLITE_WIN32_MALLOC // Use Win32 native heap API ** SQLITE_ZERO_MALLOC // Use a stub allocator that always fails ** SQLITE_MEMDEBUG // Debugging version of system malloc() ** ** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the ** assert() macro is enabled, each call into the Win32 native heap subsystem ** will cause HeapValidate to be called. If heap validation should fail, an ** assertion will be triggered. ** ** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as ** the default. */ #if defined(SQLITE_SYSTEM_MALLOC) \ + defined(SQLITE_WIN32_MALLOC) \ + defined(SQLITE_ZERO_MALLOC) \ + defined(SQLITE_MEMDEBUG)>1 # error "Two or more of the following compile-time configuration options\ are defined but at most one is allowed:\ SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG,\ SQLITE_ZERO_MALLOC" #endif #if defined(SQLITE_SYSTEM_MALLOC) \ + defined(SQLITE_WIN32_MALLOC) \ + defined(SQLITE_ZERO_MALLOC) \ + defined(SQLITE_MEMDEBUG)==0 # define SQLITE_SYSTEM_MALLOC 1 #endif /* ** We need to define _XOPEN_SOURCE as follows in order to enable ** recursive mutexes on most Unix systems and fchmod() on OpenBSD. ** But _XOPEN_SOURCE define causes problems for Mac OS X, so omit ** it. */ #if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__) # define _XOPEN_SOURCE 600 #endif /* ** NDEBUG and SQLITE_DEBUG are opposites. It should always be true that ** defined(NDEBUG)==!defined(SQLITE_DEBUG). If this is not currently true, ** make it true by defining or undefining NDEBUG. ** ** Setting NDEBUG makes the code smaller and faster by disabling the ** assert() statements in the code. So we want the default action ** to be for NDEBUG to be set and NDEBUG to be undefined only if SQLITE_DEBUG ** is set. Thus NDEBUG becomes an opt-in rather than an opt-out ** feature. */ #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) # define NDEBUG 1 #endif #if defined(NDEBUG) && defined(SQLITE_DEBUG) # undef NDEBUG #endif /* ** The testcase() macro is used to aid in coverage testing. When ** doing coverage testing, the condition inside the argument to ** testcase() must be evaluated both true and false in order to ** get full branch coverage. The testcase() macro is inserted ** to help ensure adequate test coverage in places where simple ** condition/decision coverage is inadequate. For example, testcase() ** can be used to make sure boundary values are tested. For ** bitmask tests, testcase() can be used to make sure each bit ** is significant and used at least once. On switch statements ** where multiple cases go to the same block of code, testcase() ** can insure that all cases are evaluated. ** */ #ifdef SQLITE_COVERAGE_TEST SQLITE_PRIVATE void sqlite3Coverage(int); # define testcase(X) if( X ){ sqlite3Coverage(__LINE__); } #else # define testcase(X) #endif /* ** The TESTONLY macro is used to enclose variable declarations or ** other bits of code that are needed to support the arguments ** within testcase() and assert() macros. */ #if !defined(NDEBUG) || defined(SQLITE_COVERAGE_TEST) # define TESTONLY(X) X #else # define TESTONLY(X) #endif /* ** Sometimes we need a small amount of code such as a variable initialization ** to setup for a later assert() statement. We do not want this code to ** appear when assert() is disabled. The following macro is therefore ** used to contain that setup code. The "VVA" acronym stands for ** "Verification, Validation, and Accreditation". In other words, the ** code within VVA_ONLY() will only run during verification processes. */ #ifndef NDEBUG # define VVA_ONLY(X) X #else # define VVA_ONLY(X) #endif /* ** The ALWAYS and NEVER macros surround boolean expressions which ** are intended to always be true or false, respectively. Such ** expressions could be omitted from the code completely. But they ** are included in a few cases in order to enhance the resilience ** of SQLite to unexpected behavior - to make the code "self-healing" ** or "ductile" rather than being "brittle" and crashing at the first ** hint of unplanned behavior. ** ** In other words, ALWAYS and NEVER are added for defensive code. ** ** When doing coverage testing ALWAYS and NEVER are hard-coded to ** be true and false so that the unreachable code they specify will ** not be counted as untested code. */ #if defined(SQLITE_COVERAGE_TEST) # define ALWAYS(X) (1) # define NEVER(X) (0) #elif !defined(NDEBUG) # define ALWAYS(X) ((X)?1:(assert(0),0)) # define NEVER(X) ((X)?(assert(0),1):0) #else # define ALWAYS(X) (X) # define NEVER(X) (X) #endif /* ** Declarations used for tracing the operating system interfaces. */ #if defined(SQLITE_FORCE_OS_TRACE) || defined(SQLITE_TEST) || \ (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) extern int sqlite3OSTrace; # define OSTRACE(X) if( sqlite3OSTrace ) sqlite3DebugPrintf X # define SQLITE_HAVE_OS_TRACE #else # define OSTRACE(X) # undef SQLITE_HAVE_OS_TRACE #endif /* ** Is the sqlite3ErrName() function needed in the build? Currently, ** it is needed by "mutex_w32.c" (when debugging), "os_win.c" (when ** OSTRACE is enabled), and by several "test*.c" files (which are ** compiled using SQLITE_TEST). */ #if defined(SQLITE_HAVE_OS_TRACE) || defined(SQLITE_TEST) || \ (defined(SQLITE_DEBUG) && SQLITE_OS_WIN) # define SQLITE_NEED_ERR_NAME #else # undef SQLITE_NEED_ERR_NAME #endif /* ** The macro unlikely() is a hint that surrounds a boolean ** expression that is usually false. Macro likely() surrounds ** a boolean expression that is usually true. These hints could, ** in theory, be used by the compiler to generate better code, but ** currently they are just comments for human readers. */ #define likely(X) (X) #define unlikely(X) (X) #include #include #include #include /* ** The "file format" number is an integer that is incremented whenever ** the VDBE-level file format changes. The following macros define the ** the default file format for new databases and the maximum file format ** that the library can read. */ #define SQLITE_MAX_FILE_FORMAT 4 #ifndef SQLITE_DEFAULT_FILE_FORMAT # define SQLITE_DEFAULT_FILE_FORMAT 4 #endif /* ** The default initial allocation for the pagecache when using separate ** pagecaches for each database connection. A positive number is the ** number of pages. A negative number N translations means that a buffer ** of -1024*N bytes is allocated and used for as many pages as it will hold. */ #ifndef SQLITE_DEFAULT_PCACHE_INITSZ # define SQLITE_DEFAULT_PCACHE_INITSZ 100 #endif /* ** GCC does not define the offsetof() macro so we'll have to do it ** ourselves. */ #ifndef offsetof #define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD)) #endif /* ** Macros to compute minimum and maximum of two numbers. */ #define MIN(A,B) ((A)<(B)?(A):(B)) #define MAX(A,B) ((A)>(B)?(A):(B)) /* ** Swap two objects of type TYPE. */ #define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;} /* ** SQLITE_MAX_U32 is a u64 constant that is the maximum u64 value ** that can be stored in a u32 without loss of data. The value ** is 0x00000000ffffffff. But because of quirks of some compilers, we ** have to specify the value in the less intuitive manner shown: */ #define SQLITE_MAX_U32 ((((u64)1)<<32)-1) /* ** Set the SQLITE_PTRSIZE macro to the number of bytes in a pointer */ #ifndef SQLITE_PTRSIZE # if defined(__SIZEOF_POINTER__) # define SQLITE_PTRSIZE __SIZEOF_POINTER__ # elif defined(i386) || defined(__i386__) || defined(_M_IX86) || \ defined(_M_ARM) || defined(__arm__) || defined(__x86) # define SQLITE_PTRSIZE 4 # else # define SQLITE_PTRSIZE 8 # endif #endif /* ** Constants for the largest and smallest possible 64-bit signed integers. ** These macros are designed to work correctly on both 32-bit and 64-bit ** compilers. */ #define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) #define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) /* ** Round up a number to the next larger multiple of 8. This is used ** to force 8-byte alignment on 64-bit architectures. */ #define ROUND8(x) (((x)+7)&~7) /* ** Round down to the nearest multiple of 8 */ #define ROUNDDOWN8(x) ((x)&~7) /* ** Disable MMAP on platforms where it is known to not work */ #if defined(__OpenBSD__) || defined(__QNXNTO__) # undef SQLITE_MAX_MMAP_SIZE # define SQLITE_MAX_MMAP_SIZE 0 #endif /* ** Default maximum size of memory used by memory-mapped I/O in the VFS */ #ifdef __APPLE__ # include # if TARGET_OS_IPHONE # undef SQLITE_MAX_MMAP_SIZE # define SQLITE_MAX_MMAP_SIZE 0 # endif #endif #ifndef SQLITE_MAX_MMAP_SIZE # if defined(__linux__) \ || defined(_WIN32) \ || (defined(__APPLE__) && defined(__MACH__)) \ || defined(__sun) \ || defined(__FreeBSD__) \ || defined(__DragonFly__) # define SQLITE_MAX_MMAP_SIZE 0x7fff0000 /* 2147418112 */ # else # define SQLITE_MAX_MMAP_SIZE 0 # endif # define SQLITE_MAX_MMAP_SIZE_xc 1 /* exclude from ctime.c */ #endif /* ** The default MMAP_SIZE is zero on all platforms. Or, even if a larger ** default MMAP_SIZE is specified at compile-time, make sure that it does ** not exceed the maximum mmap size. */ #ifndef SQLITE_DEFAULT_MMAP_SIZE # define SQLITE_DEFAULT_MMAP_SIZE 0 # define SQLITE_DEFAULT_MMAP_SIZE_xc 1 /* Exclude from ctime.c */ #endif #if SQLITE_DEFAULT_MMAP_SIZE>SQLITE_MAX_MMAP_SIZE # undef SQLITE_DEFAULT_MMAP_SIZE # define SQLITE_DEFAULT_MMAP_SIZE SQLITE_MAX_MMAP_SIZE #endif /* ** An instance of the following structure is used to store the busy-handler ** callback for a given sqlite handle. ** ** The sqlite.busyHandler member of the sqlite struct contains the busy ** callback for the database handle. Each pager opened via the sqlite ** handle is passed a pointer to sqlite.busyHandler. The busy-handler ** callback is currently invoked only from within pager.c. */ typedef struct BusyHandler BusyHandler; struct BusyHandler { int (*xFunc)(void *,int); /* The busy callback */ void *pArg; /* First arg to busy callback */ int nBusy; /* Incremented with each busy call */ }; /* ** The root-page of the master database table. */ #define MASTER_ROOT 1 /* ** A convenience macro that returns the number of elements in ** an array. */ #define ArraySize(X) ((int)(sizeof(X)/sizeof(X[0]))) /* ** When SQLITE_OMIT_WSD is defined, it means that the target platform does ** not support Writable Static Data (WSD) such as global and static variables. ** All variables must either be on the stack or dynamically allocated from ** the heap. When WSD is unsupported, the variable declarations scattered ** throughout the SQLite code must become constants instead. The SQLITE_WSD ** macro is used for this purpose. And instead of referencing the variable ** directly, we use its constant as a key to lookup the run-time allocated ** buffer that holds real variable. The constant is also the initializer ** for the run-time allocated buffer. ** ** In the usual case where WSD is supported, the SQLITE_WSD and GLOBAL ** macros become no-ops and have zero performance impact. */ #ifdef SQLITE_OMIT_WSD #define SQLITE_WSD const #define GLOBAL(t,v) (*(t*)sqlite3_wsd_find((void*)&(v), sizeof(v))) #define sqlite3GlobalConfig GLOBAL(struct Sqlite3Config, sqlite3Config) SQLITE_PRIVATE int sqlite3_wsd_init(int N, int J); SQLITE_PRIVATE void *sqlite3_wsd_find(void *K, int L); #else #define SQLITE_WSD #define GLOBAL(t,v) v #define sqlite3GlobalConfig sqlite3Config #endif /* ** The following macros are used to suppress compiler warnings and to ** make it clear to human readers when a function parameter is deliberately ** left unused within the body of a function. This usually happens when ** a function is called via a function pointer. For example the ** implementation of an SQL aggregate step callback may not use the ** parameter indicating the number of arguments passed to the aggregate, ** if it knows that this is enforced elsewhere. ** ** When a function parameter is not used at all within the body of a function, ** it is generally named "NotUsed" or "NotUsed2" to make things even clearer. ** However, these macros may also be used to suppress warnings related to ** parameters that may or may not be used depending on compilation options. ** For example those parameters only used in assert() statements. In these ** cases the parameters are named as per the usual conventions. */ #define UNUSED_PARAMETER(x) (void)(x) #define UNUSED_PARAMETER2(x,y) UNUSED_PARAMETER(x),UNUSED_PARAMETER(y) typedef struct Bitvec Bitvec; typedef struct CollSeq CollSeq; typedef struct PrintfArguments PrintfArguments; typedef struct Savepoint Savepoint; typedef struct StrAccum StrAccum; /************** Include pager.h in the middle of sqliteInt.h *****************/ /************** Begin file pager.h *******************************************/ /* ** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This header file defines the interface that the sqlite page cache ** subsystem. The page cache subsystem reads and writes a file a page ** at a time and provides a journal for rollback. */ #ifndef _PAGER_H_ #define _PAGER_H_ /* ** Default maximum size for persistent journal files. A negative ** value means no limit. This value may be overridden using the ** sqlite3PagerJournalSizeLimit() API. See also "PRAGMA journal_size_limit". */ #ifndef SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT #define SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT -1 #endif /* ** The type used to represent a page number. The first page in a file ** is called page 1. 0 is used to represent "not a page". */ typedef u32 Pgno; /* ** Each open file is managed by a separate instance of the "Pager" structure. */ typedef struct Pager Pager; /* ** Handle type for pages. */ typedef struct PgHdr DbPage; /* ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is ** reserved for working around a windows/posix incompatibility). It is ** used in the journal to signify that the remainder of the journal file ** is devoted to storing a master journal name - there are no more pages to ** roll back. See comments for function writeMasterJournal() in pager.c ** for details. */ #define PAGER_MJ_PGNO(x) ((Pgno)((PENDING_BYTE/((x)->pageSize))+1)) /* ** Allowed values for the flags parameter to sqlite3PagerOpen(). ** ** NOTE: These values must match the corresponding BTREE_ values in btree.h. */ #define PAGER_OMIT_JOURNAL 0x0001 /* Do not use a rollback journal */ #define PAGER_MEMORY 0x0002 /* In-memory database */ /* ** Valid values for the second argument to sqlite3PagerLockingMode(). */ #define PAGER_LOCKINGMODE_QUERY -1 #define PAGER_LOCKINGMODE_NORMAL 0 #define PAGER_LOCKINGMODE_EXCLUSIVE 1 /* ** Numeric constants that encode the journalmode. */ #define PAGER_JOURNALMODE_QUERY (-1) /* Query the value of journalmode */ #define PAGER_JOURNALMODE_DELETE 0 /* Commit by deleting journal file */ #define PAGER_JOURNALMODE_PERSIST 1 /* Commit by zeroing journal header */ #define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ #define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ /* ** Flags that make up the mask passed to sqlite3PagerAcquire(). */ #define PAGER_GET_NOCONTENT 0x01 /* Do not load data from disk */ #define PAGER_GET_READONLY 0x02 /* Read-only page is acceptable */ /* ** Flags for sqlite3PagerSetFlags() */ #define PAGER_SYNCHRONOUS_OFF 0x01 /* PRAGMA synchronous=OFF */ #define PAGER_SYNCHRONOUS_NORMAL 0x02 /* PRAGMA synchronous=NORMAL */ #define PAGER_SYNCHRONOUS_FULL 0x03 /* PRAGMA synchronous=FULL */ #define PAGER_SYNCHRONOUS_MASK 0x03 /* Mask for three values above */ #define PAGER_FULLFSYNC 0x04 /* PRAGMA fullfsync=ON */ #define PAGER_CKPT_FULLFSYNC 0x08 /* PRAGMA checkpoint_fullfsync=ON */ #define PAGER_CACHESPILL 0x10 /* PRAGMA cache_spill=ON */ #define PAGER_FLAGS_MASK 0x1c /* All above except SYNCHRONOUS */ /* ** The remainder of this file contains the declarations of the functions ** that make up the Pager sub-system API. See source code comments for ** a detailed description of each routine. */ /* Open and close a Pager connection. */ SQLITE_PRIVATE int sqlite3PagerOpen( sqlite3_vfs*, Pager **ppPager, const char*, int, int, int, void(*)(DbPage*) ); SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager); SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager*, int, unsigned char*); /* Functions used to configure a Pager object. */ SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *); SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager*, u32*, int); #ifdef SQLITE_HAS_CODEC SQLITE_PRIVATE void sqlite3PagerAlignReserve(Pager*,Pager*); #endif SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager*, int); SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager*, int); SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64); SQLITE_PRIVATE void sqlite3PagerSetFlags(Pager*,unsigned); #if 0 SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *, int); SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *, int); SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager*); SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager*); SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *, i64); #endif /* Functions used to obtain and release page references. */ SQLITE_PRIVATE int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag); #define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0) SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno); SQLITE_PRIVATE void sqlite3PagerRef(DbPage*); SQLITE_PRIVATE void sqlite3PagerUnref(DbPage*); SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage*); /* Operations on page references. */ SQLITE_PRIVATE int sqlite3PagerWrite(DbPage*); SQLITE_PRIVATE void sqlite3PagerDontWrite(DbPage*); SQLITE_PRIVATE int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int); SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage*); SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *); SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *); /* Functions used to manage pager transactions and savepoints. */ SQLITE_PRIVATE void sqlite3PagerPagecount(Pager*, int*); SQLITE_PRIVATE int sqlite3PagerBegin(Pager*, int exFlag, int); SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, int); SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager*); SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zMaster); SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager*); SQLITE_PRIVATE int sqlite3PagerRollback(Pager*); SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int n); SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager); #ifndef SQLITE_OMIT_WAL SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int, int*, int*); SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager); #if 0 SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager); #endif SQLITE_PRIVATE int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); #if 0 SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager); #endif #endif #ifdef SQLITE_ENABLE_ZIPVFS SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager); #endif /* Functions used to query pager state and configuration. */ SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager*); SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager*); #ifdef SQLITE_DEBUG SQLITE_PRIVATE int sqlite3PagerRefcount(Pager*); #endif #if 0 SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager*); #endif SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager*, int); SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager*); #ifdef SQLITE_DIRECT_OVERFLOW_READ SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager*); #endif SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager*); SQLITE_PRIVATE int sqlite3PagerNosync(Pager*); SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager*); #if 0 SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *, int, int, int *); SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *); #endif SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *); /* Functions used to truncate the database file. */ SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager*,Pgno); SQLITE_PRIVATE void sqlite3PagerRekey(DbPage*, Pgno, u16); #if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL) SQLITE_PRIVATE void *sqlite3PagerCodec(DbPage *); #endif /* Functions to support testing and debugging. */ #if !defined(NDEBUG) || defined(SQLITE_TEST) SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage*); SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage*); #endif #ifdef SQLITE_TEST SQLITE_PRIVATE int *sqlite3PagerStats(Pager*); SQLITE_PRIVATE void sqlite3PagerRefdump(Pager*); void disable_simulated_io_errors(void); void enable_simulated_io_errors(void); #else # define disable_simulated_io_errors() # define enable_simulated_io_errors() #endif #endif /* _PAGER_H_ */ /************** End of pager.h ***********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /************** Include pcache.h in the middle of sqliteInt.h ****************/ /************** Begin file pcache.h ******************************************/ /* ** 2008 August 05 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This header file defines the interface that the sqlite page cache ** subsystem. */ #ifndef _PCACHE_H_ typedef struct PgHdr PgHdr; typedef struct PCache PCache; /* ** Every page in the cache is controlled by an instance of the following ** structure. */ struct PgHdr { sqlite3_pcache_page *pPage; /* Pcache object page handle */ void *pData; /* Page data */ void *pExtra; /* Extra content */ PgHdr *pDirty; /* Transient list of dirty pages */ Pager *pPager; /* The pager this page is part of */ Pgno pgno; /* Page number for this page */ #ifdef SQLITE_CHECK_PAGES u32 pageHash; /* Hash of page content */ #endif u16 flags; /* PGHDR flags defined below */ /********************************************************************** ** Elements above are public. All that follows is private to pcache.c ** and should not be accessed by other modules. */ i16 nRef; /* Number of users of this page */ PCache *pCache; /* Cache that owns this page */ PgHdr *pDirtyNext; /* Next element in list of dirty pages */ PgHdr *pDirtyPrev; /* Previous element in list of dirty pages */ }; /* Bit values for PgHdr.flags */ #define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */ #define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */ #define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */ #define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before ** writing this page to the database */ #define PGHDR_NEED_READ 0x010 /* Content is unread */ #define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ #define PGHDR_MMAP 0x040 /* This is an mmap page object */ /* Initialize and shutdown the page cache subsystem */ SQLITE_PRIVATE int sqlite3PcacheInitialize(void); SQLITE_PRIVATE void sqlite3PcacheShutdown(void); /* Page cache buffer management: ** These routines implement SQLITE_CONFIG_PAGECACHE. */ SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *, int sz, int n); /* Create a new pager cache. ** Under memory stress, invoke xStress to try to make pages clean. ** Only clean and unpinned pages can be reclaimed. */ SQLITE_PRIVATE int sqlite3PcacheOpen( int szPage, /* Size of every page */ int szExtra, /* Extra space associated with each page */ int bPurgeable, /* True if pages are on backing store */ int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */ void *pStress, /* Argument to xStress */ PCache *pToInit /* Preallocated space for the PCache */ ); /* Modify the page-size after the cache has been created. */ SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *, int); /* Return the size in bytes of a PCache object. Used to preallocate ** storage space. */ SQLITE_PRIVATE int sqlite3PcacheSize(void); /* One release per successful fetch. Page is pinned until released. ** Reference counted. */ SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch(PCache*, Pgno, int createFlag); SQLITE_PRIVATE int sqlite3PcacheFetchStress(PCache*, Pgno, sqlite3_pcache_page**); SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish(PCache*, Pgno, sqlite3_pcache_page *pPage); SQLITE_PRIVATE void sqlite3PcacheRelease(PgHdr*); SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr*); /* Remove page from cache */ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr*); /* Make sure page is marked dirty */ SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr*); /* Mark a single page as clean */ SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache*); /* Mark all dirty list pages as clean */ /* Change a page number. Used by incr-vacuum. */ SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr*, Pgno); /* Remove all pages with pgno>x. Reset the cache if x==0 */ SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache*, Pgno x); /* Get a list of all dirty pages in the cache, sorted by page number */ SQLITE_PRIVATE PgHdr *sqlite3PcacheDirtyList(PCache*); /* Reset and close the cache object */ SQLITE_PRIVATE void sqlite3PcacheClose(PCache*); /* Clear flags from pages of the page cache */ SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *); /* Discard the contents of the cache */ SQLITE_PRIVATE void sqlite3PcacheClear(PCache*); /* Return the total number of outstanding page references */ SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache*); /* Increment the reference count of an existing page */ SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr*); SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr*); #ifdef SQLITE_TEST /* Return the total number of pages stored in the cache */ SQLITE_PRIVATE int sqlite3PcachePagecount(PCache*); #endif #if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) /* Iterate through all dirty pages currently stored in the cache. This ** interface is only available if SQLITE_CHECK_PAGES is defined when the ** library is built. */ SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)); #endif /* Set and get the suggested cache-size for the specified pager-cache. ** ** If no global maximum is configured, then the system attempts to limit ** the total number of pages cached by purgeable pager-caches to the sum ** of the suggested cache-sizes. */ SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *, int); #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *); #endif #if 0 /* Free up as much memory as possible from the page cache */ SQLITE_PRIVATE void sqlite3PcacheShrink(PCache*); #endif #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT /* Try to return memory used by the pcache module to the main memory heap */ SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int); #endif #ifdef SQLITE_TEST SQLITE_PRIVATE void sqlite3PcacheStats(int*,int*,int*,int*); #endif SQLITE_PRIVATE void sqlite3PCacheSetDefault(void); /* Return the header size */ SQLITE_PRIVATE int sqlite3HeaderSizePcache(void); SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void); #endif /* _PCACHE_H_ */ /************** End of pcache.h **********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /************** Include os.h in the middle of sqliteInt.h ********************/ /************** Begin file os.h **********************************************/ /* ** 2001 September 16 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This header file (together with is companion C source-code file ** "os.c") attempt to abstract the underlying operating system so that ** the SQLite library will work on both POSIX and windows systems. ** ** This header file is #include-ed by sqliteInt.h and thus ends up ** being included by every source file. */ #ifndef _SQLITE_OS_H_ #define _SQLITE_OS_H_ /* ** Attempt to automatically detect the operating system and setup the ** necessary pre-processor macros for it. */ /************** Include os_setup.h in the middle of os.h *********************/ /************** Begin file os_setup.h ****************************************/ /* ** 2013 November 25 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains pre-processor directives related to operating system ** detection and/or setup. */ #ifndef _OS_SETUP_H_ #define _OS_SETUP_H_ /* ** Figure out if we are dealing with Unix, Windows, or some other operating ** system. ** ** After the following block of preprocess macros, all of SQLITE_OS_UNIX, ** SQLITE_OS_WIN, and SQLITE_OS_OTHER will defined to either 1 or 0. One of ** the three will be 1. The other two will be 0. */ #if defined(SQLITE_OS_OTHER) # if SQLITE_OS_OTHER==1 # undef SQLITE_OS_UNIX # define SQLITE_OS_UNIX 0 # undef SQLITE_OS_WIN # define SQLITE_OS_WIN 0 # else # undef SQLITE_OS_OTHER # endif #endif #if !defined(SQLITE_OS_UNIX) && !defined(SQLITE_OS_OTHER) # define SQLITE_OS_OTHER 0 # ifndef SQLITE_OS_WIN # if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || \ defined(__MINGW32__) || defined(__BORLANDC__) # define SQLITE_OS_WIN 1 # define SQLITE_OS_UNIX 0 # else # define SQLITE_OS_WIN 0 # define SQLITE_OS_UNIX 1 # endif # else # define SQLITE_OS_UNIX 0 # endif #else # ifndef SQLITE_OS_WIN # define SQLITE_OS_WIN 0 # endif #endif #endif /* _OS_SETUP_H_ */ /************** End of os_setup.h ********************************************/ /************** Continuing where we left off in os.h *************************/ /* If the SET_FULLSYNC macro is not defined above, then make it ** a no-op */ #ifndef SET_FULLSYNC # define SET_FULLSYNC(x,y) #endif /* ** The default size of a disk sector */ #ifndef SQLITE_DEFAULT_SECTOR_SIZE # define SQLITE_DEFAULT_SECTOR_SIZE 4096 #endif /* ** Temporary files are named starting with this prefix followed by 16 random ** alphanumeric characters, and no file extension. They are stored in the ** OS's standard temporary file directory, and are deleted prior to exit. ** If sqlite is being embedded in another program, you may wish to change the ** prefix to reflect your program's name, so that if your program exits ** prematurely, old temporary files can be easily identified. This can be done ** using -DSQLITE_TEMP_FILE_PREFIX=myprefix_ on the compiler command line. ** ** 2006-10-31: The default prefix used to be "sqlite_". But then ** Mcafee started using SQLite in their anti-virus product and it ** started putting files with the "sqlite" name in the c:/temp folder. ** This annoyed many windows users. Those users would then do a ** Google search for "sqlite", find the telephone numbers of the ** developers and call to wake them up at night and complain. ** For this reason, the default name prefix is changed to be "sqlite" ** spelled backwards. So the temp files are still identified, but ** anybody smart enough to figure out the code is also likely smart ** enough to know that calling the developer will not help get rid ** of the file. */ #ifndef SQLITE_TEMP_FILE_PREFIX # define SQLITE_TEMP_FILE_PREFIX "etilqs_" #endif /* ** The following values may be passed as the second argument to ** sqlite3OsLock(). The various locks exhibit the following semantics: ** ** SHARED: Any number of processes may hold a SHARED lock simultaneously. ** RESERVED: A single process may hold a RESERVED lock on a file at ** any time. Other processes may hold and obtain new SHARED locks. ** PENDING: A single process may hold a PENDING lock on a file at ** any one time. Existing SHARED locks may persist, but no new ** SHARED locks may be obtained by other processes. ** EXCLUSIVE: An EXCLUSIVE lock precludes all other locks. ** ** PENDING_LOCK may not be passed directly to sqlite3OsLock(). Instead, a ** process that requests an EXCLUSIVE lock may actually obtain a PENDING ** lock. This can be upgraded to an EXCLUSIVE lock by a subsequent call to ** sqlite3OsLock(). */ #define NO_LOCK 0 #define SHARED_LOCK 1 #define RESERVED_LOCK 2 #define PENDING_LOCK 3 #define EXCLUSIVE_LOCK 4 /* ** File Locking Notes: (Mostly about windows but also some info for Unix) ** ** We cannot use LockFileEx() or UnlockFileEx() on Win95/98/ME because ** those functions are not available. So we use only LockFile() and ** UnlockFile(). ** ** LockFile() prevents not just writing but also reading by other processes. ** A SHARED_LOCK is obtained by locking a single randomly-chosen ** byte out of a specific range of bytes. The lock byte is obtained at ** random so two separate readers can probably access the file at the ** same time, unless they are unlucky and choose the same lock byte. ** An EXCLUSIVE_LOCK is obtained by locking all bytes in the range. ** There can only be one writer. A RESERVED_LOCK is obtained by locking ** a single byte of the file that is designated as the reserved lock byte. ** A PENDING_LOCK is obtained by locking a designated byte different from ** the RESERVED_LOCK byte. ** ** On WinNT/2K/XP systems, LockFileEx() and UnlockFileEx() are available, ** which means we can use reader/writer locks. When reader/writer locks ** are used, the lock is placed on the same range of bytes that is used ** for probabilistic locking in Win95/98/ME. Hence, the locking scheme ** will support two or more Win95 readers or two or more WinNT readers. ** But a single Win95 reader will lock out all WinNT readers and a single ** WinNT reader will lock out all other Win95 readers. ** ** The following #defines specify the range of bytes used for locking. ** SHARED_SIZE is the number of bytes available in the pool from which ** a random byte is selected for a shared lock. The pool of bytes for ** shared locks begins at SHARED_FIRST. ** ** The same locking strategy and ** byte ranges are used for Unix. This leaves open the possibility of having ** clients on win95, winNT, and unix all talking to the same shared file ** and all locking correctly. To do so would require that samba (or whatever ** tool is being used for file sharing) implements locks correctly between ** windows and unix. I'm guessing that isn't likely to happen, but by ** using the same locking range we are at least open to the possibility. ** ** Locking in windows is manditory. For this reason, we cannot store ** actual data in the bytes used for locking. The pager never allocates ** the pages involved in locking therefore. SHARED_SIZE is selected so ** that all locks will fit on a single page even at the minimum page size. ** PENDING_BYTE defines the beginning of the locks. By default PENDING_BYTE ** is set high so that we don't have to allocate an unused page except ** for very large databases. But one should test the page skipping logic ** by setting PENDING_BYTE low and running the entire regression suite. ** ** Changing the value of PENDING_BYTE results in a subtly incompatible ** file format. Depending on how it is changed, you might not notice ** the incompatibility right away, even running a full regression test. ** The default location of PENDING_BYTE is the first byte past the ** 1GB boundary. ** */ #ifdef SQLITE_OMIT_WSD # define PENDING_BYTE (0x40000000) #else # define PENDING_BYTE sqlite3PendingByte #endif #define RESERVED_BYTE (PENDING_BYTE+1) #define SHARED_FIRST (PENDING_BYTE+2) #define SHARED_SIZE 510 /* ** Wrapper around OS specific sqlite3_os_init() function. */ SQLITE_PRIVATE int sqlite3OsInit(void); /* ** Functions for accessing sqlite3_file methods */ SQLITE_PRIVATE int sqlite3OsClose(sqlite3_file*); SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file*, void*, int amt, i64 offset); SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file*, const void*, int amt, i64 offset); SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file*, i64 size); SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file*, int); SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file*, i64 *pSize); SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file*, int); SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file*, int); SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut); SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file*,int,void*); SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file*,int,void*); #define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0 SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id); SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id); SQLITE_PRIVATE int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int, int, int); SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id); SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int); SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64, int, void **); SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *, i64, void *); /* ** Functions for accessing sqlite3_vfs methods */ SQLITE_PRIVATE int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *); SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *, const char *, int); SQLITE_PRIVATE int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut); SQLITE_PRIVATE int sqlite3OsFullPathname(sqlite3_vfs *, const char *, int, char *); SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *, int, char *); SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *, int); #endif /* _SQLITE_OS_H_ */ /************** End of os.h **************************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /************** Include mutex.h in the middle of sqliteInt.h *****************/ /************** Begin file mutex.h *******************************************/ /* ** 2007 August 28 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains the common header for all mutex implementations. ** The sqliteInt.h header #includes this file so that it is available ** to all source files. We break it out in an effort to keep the code ** better organized. ** ** NOTE: source files should *not* #include this header file directly. ** Source files should #include the sqliteInt.h file and let that file ** include this one indirectly. */ /* ** Figure out what version of the code to use. The choices are ** ** SQLITE_MUTEX_OMIT No mutex logic. Not even stubs. The ** mutexes implementation cannot be overridden ** at start-time. ** ** SQLITE_MUTEX_NOOP For single-threaded applications. No ** mutual exclusion is provided. But this ** implementation can be overridden at ** start-time. ** ** SQLITE_MUTEX_PTHREADS For multi-threaded applications on Unix. ** ** SQLITE_MUTEX_W32 For multi-threaded applications on Win32. */ #if !SQLITE_THREADSAFE # define SQLITE_MUTEX_OMIT #endif #if SQLITE_THREADSAFE && !defined(SQLITE_MUTEX_NOOP) # if SQLITE_OS_UNIX # define SQLITE_MUTEX_PTHREADS # elif SQLITE_OS_WIN # define SQLITE_MUTEX_W32 # else # define SQLITE_MUTEX_NOOP # endif #endif #ifdef SQLITE_MUTEX_OMIT /* ** If this is a no-op implementation, implement everything as macros. */ #define sqlite3_mutex_alloc(X) ((sqlite3_mutex*)8) #define sqlite3_mutex_free(X) #define sqlite3_mutex_enter(X) #define sqlite3_mutex_try(X) SQLITE_OK #define sqlite3_mutex_leave(X) #define sqlite3_mutex_held(X) ((void)(X),1) #define sqlite3_mutex_notheld(X) ((void)(X),1) #define sqlite3MutexAlloc(X) ((sqlite3_mutex*)8) #define sqlite3MutexInit() SQLITE_OK #define sqlite3MutexEnd() #define MUTEX_LOGIC(X) #else #define MUTEX_LOGIC(X) X #endif /* defined(SQLITE_MUTEX_OMIT) */ /************** End of mutex.h ***********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /* ** The number of different kinds of things that can be limited ** using the sqlite3_limit() interface. */ #define SQLITE_N_LIMIT (SQLITE_LIMIT_WORKER_THREADS+1) /* ** Possible values for the sqlite.magic field. ** The numbers are obtained at random and have no special meaning, other ** than being distinct from one another. */ #define SQLITE_MAGIC_OPEN 0xa029a697 /* Database is open */ #define SQLITE_MAGIC_CLOSED 0x9f3c2d33 /* Database is closed */ #define SQLITE_MAGIC_SICK 0x4b771290 /* Error and awaiting close */ #define SQLITE_MAGIC_BUSY 0xf03b7906 /* Database currently in use */ #define SQLITE_MAGIC_ERROR 0xb5357930 /* An SQLITE_MISUSE error occurred */ #define SQLITE_MAGIC_ZOMBIE 0x64cffc7f /* Close with last statement close */ /* ** The SQLITE_*_BKPT macros are substitutes for the error codes with ** the same name but without the _BKPT suffix. These macros invoke ** routines that report the line-number on which the error originated ** using sqlite3_log(). The routines also provide a convenient place ** to set a debugger breakpoint. */ SQLITE_PRIVATE int sqlite3CorruptError(int); SQLITE_PRIVATE int sqlite3MisuseError(int); SQLITE_PRIVATE int sqlite3CantopenError(int); #define SQLITE_CORRUPT_BKPT sqlite3CorruptError(__LINE__) #define SQLITE_MISUSE_BKPT sqlite3MisuseError(__LINE__) #define SQLITE_CANTOPEN_BKPT sqlite3CantopenError(__LINE__) #include # define sqlite3Toupper(x) toupper((unsigned char)(x)) # define sqlite3Isspace(x) isspace((unsigned char)(x)) # define sqlite3Isalnum(x) isalnum((unsigned char)(x)) # define sqlite3Isalpha(x) isalpha((unsigned char)(x)) # define sqlite3Isdigit(x) isdigit((unsigned char)(x)) # define sqlite3Isxdigit(x) isxdigit((unsigned char)(x)) # define sqlite3Tolower(x) tolower((unsigned char)(x)) /* ** Internal function prototypes */ SQLITE_PRIVATE int sqlite3Strlen30(const char*); SQLITE_PRIVATE int sqlite3MallocInit(void); SQLITE_PRIVATE void sqlite3MallocEnd(void); SQLITE_PRIVATE void *sqlite3Malloc(u64); SQLITE_PRIVATE void *sqlite3MallocZero(u64); SQLITE_PRIVATE void *sqlite3DbMallocZero(Btree*, u64); SQLITE_PRIVATE void *sqlite3DbMallocRaw(Btree*, u64); SQLITE_PRIVATE void *sqlite3Realloc(void*, u64); SQLITE_PRIVATE void *sqlite3DbReallocOrFree(Btree*, void *, u64); SQLITE_PRIVATE void *sqlite3DbRealloc(Btree*, void *, u64); SQLITE_PRIVATE void sqlite3DbFree(Btree*, void*); SQLITE_PRIVATE int sqlite3MallocSize(void*); SQLITE_PRIVATE int sqlite3DbMallocSize(Btree*, void*); SQLITE_PRIVATE void *sqlite3ScratchMalloc(int); SQLITE_PRIVATE void sqlite3ScratchFree(void*); SQLITE_PRIVATE void *sqlite3PageMalloc(int); SQLITE_PRIVATE void sqlite3PageFree(void*); SQLITE_PRIVATE void sqlite3MemSetDefault(void); SQLITE_PRIVATE int sqlite3HeapNearlyFull(void); /* ** On systems with ample stack space and that support alloca(), make ** use of alloca() to obtain space for large automatic objects. By default, ** obtain space from malloc(). ** ** The alloca() routine never returns NULL. This will cause code paths ** that deal with sqlite3StackAlloc() failures to be unreachable. */ #ifdef SQLITE_USE_ALLOCA # define sqlite3StackAllocRaw(D,N) alloca(N) # define sqlite3StackAllocZero(D,N) memset(alloca(N), 0, N) # define sqlite3StackFree(D,P) #else # define sqlite3StackAllocRaw(D,N) sqlite3DbMallocRaw(D,N) # define sqlite3StackAllocZero(D,N) sqlite3DbMallocZero(D,N) # define sqlite3StackFree(D,P) sqlite3DbFree(D,P) #endif #ifndef SQLITE_MUTEX_OMIT SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void); SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void); SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int); SQLITE_PRIVATE int sqlite3MutexInit(void); SQLITE_PRIVATE int sqlite3MutexEnd(void); #endif #if !defined(SQLITE_MUTEX_OMIT) && !defined(SQLITE_MUTEX_NOOP) SQLITE_PRIVATE void sqlite3MemoryBarrier(void); #else # define sqlite3MemoryBarrier() #endif #ifndef SQLITE_OMIT_FLOATING_POINT SQLITE_PRIVATE int sqlite3IsNaN(double); #else # define sqlite3IsNaN(X) 0 #endif /* ** An instance of the following structure holds information about SQL ** functions arguments that are the parameters to the printf() function. */ struct PrintfArguments { int nArg; /* Total number of arguments */ int nUsed; /* Number of arguments used so far */ Mem **apArg; /* The argument values */ }; #define SQLITE_PRINTF_INTERNAL 0x01 #define SQLITE_PRINTF_SQLFUNC 0x02 SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum*, Btree* pBtree, char*, int, int); SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum*,const char*,int); SQLITE_PRIVATE void sqlite3AppendChar(StrAccum*,int,char); SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum*); SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum*); SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(Btree*); SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler*); SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *); SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *, Pgno, const u8 *); typedef int (*RecordCompare)(int,const void*,UnpackedRecord*); #define SQLITE_RecoveryMode 0x00010000 /* Ignore schema errors */ #define SQLITE_CellSizeCk 0x10000000 /* Check btree cell sizes on load */ /* ** The following are used as the second parameter to sqlite3Savepoint(), ** and as the P1 argument to the OP_Savepoint instruction. */ #define SAVEPOINT_BEGIN 0 #define SAVEPOINT_RELEASE 1 #define SAVEPOINT_ROLLBACK 2 /* ** A "Collating Sequence" is defined by an instance of the following ** structure. Conceptually, a collating sequence consists of a name and ** a comparison routine that defines the order of that sequence. ** ** If CollSeq.xCmp is NULL, it means that the ** collating sequence is undefined. Indices built on an undefined ** collating sequence may not be read or written. */ struct CollSeq { char *zName; /* Name of the collating sequence, UTF-8 encoded */ u8 enc; /* Text encoding handled by xCmp() */ void *pUser; /* First argument to xCmp() */ int (*xCmp)(void*,int, const void*, int, const void*); void (*xDel)(void*); /* Destructor for pUser */ }; /* ** An instance of the following structure is passed as the first ** argument to sqlite3VdbeKeyCompare and is used to control the ** comparison of the two index keys. ** ** Note that aSortOrder[] and aColl[] have nField+1 slots. There ** are nField slots for the columns of an index then one extra slot ** for the rowid at the end. */ struct KeyInfo { u32 nRef; /* Number of references to this KeyInfo object */ u8 enc; /* Text encoding - one of the SQLITE_UTF* values */ u16 nField; /* Number of key columns in the index */ u16 nXField; /* Number of columns beyond the key columns */ Btree *pBtree; /* The database connection */ u8 *aSortOrder; /* Sort order for each column. */ CollSeq *aColl[1]; /* Collating sequence for each term of the key */ }; /* ** An objected used to accumulate the text of a string where we ** do not necessarily know how big the string will be in the end. */ struct StrAccum { Btree *pBtree; /* Optional database for lookaside. Can be NULL */ char *zBase; /* A base allocation. Not from malloc. */ char *zText; /* The string collected so far */ int nChar; /* Length of the string so far */ int nAlloc; /* Amount of space allocated in zText */ int mxAlloc; /* Maximum allowed allocation. 0 for no malloc usage */ u8 accError; /* STRACCUM_NOMEM or STRACCUM_TOOBIG */ }; #define STRACCUM_NOMEM 1 #define STRACCUM_TOOBIG 2 /************** Include btreeInt.h in the middle of sqliteInt.h **************/ /************** Begin file btreeInt.h ****************************************/ /* ** 2004 April 6 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file implements an external (disk-based) database using BTrees. ** For a detailed discussion of BTrees, refer to ** ** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3: ** "Sorting And Searching", pages 473-480. Addison-Wesley ** Publishing Company, Reading, Massachusetts. ** ** The basic idea is that each page of the file contains N database ** entries and N+1 pointers to subpages. ** ** ---------------------------------------------------------------- ** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N-1) | Ptr(N) | ** ---------------------------------------------------------------- ** ** All of the keys on the page that Ptr(0) points to have values less ** than Key(0). All of the keys on page Ptr(1) and its subpages have ** values greater than Key(0) and less than Key(1). All of the keys ** on Ptr(N) and its subpages have values greater than Key(N-1). And ** so forth. ** ** Finding a particular key requires reading O(log(M)) pages from the ** disk where M is the number of entries in the tree. ** ** In this implementation, a single file can hold one or more separate ** BTrees. Each BTree is identified by the index of its root page. The ** key and data for any entry are combined to form the "payload". A ** fixed amount of payload can be carried directly on the database ** page. If the payload is larger than the preset amount then surplus ** bytes are stored on overflow pages. The payload for an entry ** and the preceding pointer are combined to form a "Cell". Each ** page has a small header which contains the Ptr(N) pointer and other ** information such as the size of key and data. ** ** FORMAT DETAILS ** ** The file is divided into pages. The first page is called page 1, ** the second is page 2, and so forth. A page number of zero indicates ** "no such page". The page size can be any power of 2 between 512 and 65536. ** Each page can be either a btree page, a freelist page, an overflow ** page, or a pointer-map page. ** ** The first page is always a btree page. The first 100 bytes of the first ** page contain a special header (the "file header") that describes the file. ** The format of the file header is as follows: ** ** OFFSET SIZE DESCRIPTION ** 0 16 Header string: "SQLite format 3\000" ** 16 2 Page size in bytes. (1 means 65536) ** 18 1 File format write version ** 19 1 File format read version ** 20 1 Bytes of unused space at the end of each page ** 21 1 Max embedded payload fraction (must be 64) ** 22 1 Min embedded payload fraction (must be 32) ** 23 1 Min leaf payload fraction (must be 32) ** 24 4 File change counter ** 28 4 Reserved for future use ** 32 4 First freelist page ** 36 4 Number of freelist pages in the file ** 40 60 15 4-byte meta values passed to higher layers ** ** 40 4 Schema cookie ** 44 4 File format of schema layer ** 48 4 Size of page cache ** 52 4 Largest root-page (auto/incr_vacuum) ** 56 4 1=UTF-8 2=UTF16le 3=UTF16be ** 60 4 User version ** 64 4 Incremental vacuum mode ** 68 4 Application-ID ** 72 20 unused ** 92 4 The version-valid-for number ** 96 4 SQLITE_VERSION_NUMBER ** ** All of the integer values are big-endian (most significant byte first). ** ** The file change counter is incremented when the database is changed ** This counter allows other processes to know when the file has changed ** and thus when they need to flush their cache. ** ** The max embedded payload fraction is the amount of the total usable ** space in a page that can be consumed by a single cell for standard ** B-tree (non-LEAFDATA) tables. A value of 255 means 100%. The default ** is to limit the maximum cell size so that at least 4 cells will fit ** on one page. Thus the default max embedded payload fraction is 64. ** ** If the payload for a cell is larger than the max payload, then extra ** payload is spilled to overflow pages. Once an overflow page is allocated, ** as many bytes as possible are moved into the overflow pages without letting ** the cell size drop below the min embedded payload fraction. ** ** The min leaf payload fraction is like the min embedded payload fraction ** except that it applies to leaf nodes in a LEAFDATA tree. The maximum ** payload fraction for a LEAFDATA tree is always 100% (or 255) and it ** not specified in the header. ** ** Each btree pages is divided into three sections: The header, the ** cell pointer array, and the cell content area. Page 1 also has a 100-byte ** file header that occurs before the page header. ** ** |----------------| ** | file header | 100 bytes. Page 1 only. ** |----------------| ** | page header | 8 bytes for leaves. 12 bytes for interior nodes ** |----------------| ** | cell pointer | | 2 bytes per cell. Sorted order. ** | array | | Grows downward ** | | v ** |----------------| ** | unallocated | ** | space | ** |----------------| ^ Grows upwards ** | cell content | | Arbitrary order interspersed with freeblocks. ** | area | | and free space fragments. ** |----------------| ** ** The page headers looks like this: ** ** OFFSET SIZE DESCRIPTION ** 0 1 Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf ** 1 2 byte offset to the first freeblock ** 3 2 number of cells on this page ** 5 2 first byte of the cell content area ** 7 1 number of fragmented free bytes ** 8 4 Right child (the Ptr(N) value). Omitted on leaves. ** ** The flags define the format of this btree page. The leaf flag means that ** this page has no children. The zerodata flag means that this page carries ** only keys and no data. The intkey flag means that the key is an integer ** which is stored in the key size entry of the cell header rather than in ** the payload area. ** ** The cell pointer array begins on the first byte after the page header. ** The cell pointer array contains zero or more 2-byte numbers which are ** offsets from the beginning of the page to the cell content in the cell ** content area. The cell pointers occur in sorted order. The system strives ** to keep free space after the last cell pointer so that new cells can ** be easily added without having to defragment the page. ** ** Cell content is stored at the very end of the page and grows toward the ** beginning of the page. ** ** Unused space within the cell content area is collected into a linked list of ** freeblocks. Each freeblock is at least 4 bytes in size. The byte offset ** to the first freeblock is given in the header. Freeblocks occur in ** increasing order. Because a freeblock must be at least 4 bytes in size, ** any group of 3 or fewer unused bytes in the cell content area cannot ** exist on the freeblock chain. A group of 3 or fewer free bytes is called ** a fragment. The total number of bytes in all fragments is recorded. ** in the page header at offset 7. ** ** SIZE DESCRIPTION ** 2 Byte offset of the next freeblock ** 2 Bytes in this freeblock ** ** Cells are of variable length. Cells are stored in the cell content area at ** the end of the page. Pointers to the cells are in the cell pointer array ** that immediately follows the page header. Cells is not necessarily ** contiguous or in order, but cell pointers are contiguous and in order. ** ** Cell content makes use of variable length integers. A variable ** length integer is 1 to 9 bytes where the lower 7 bits of each ** byte are used. The integer consists of all bytes that have bit 8 set and ** the first byte with bit 8 clear. The most significant byte of the integer ** appears first. A variable-length integer may not be more than 9 bytes long. ** As a special case, all 8 bytes of the 9th byte are used as data. This ** allows a 64-bit integer to be encoded in 9 bytes. ** ** 0x00 becomes 0x00000000 ** 0x7f becomes 0x0000007f ** 0x81 0x00 becomes 0x00000080 ** 0x82 0x00 becomes 0x00000100 ** 0x80 0x7f becomes 0x0000007f ** 0x8a 0x91 0xd1 0xac 0x78 becomes 0x12345678 ** 0x81 0x81 0x81 0x81 0x01 becomes 0x10204081 ** ** Variable length integers are used for rowids and to hold the number of ** bytes of key and data in a btree cell. ** ** The content of a cell looks like this: ** ** SIZE DESCRIPTION ** 4 Page number of the left child. Omitted if leaf flag is set. ** var Number of bytes of data. Omitted if the zerodata flag is set. ** var Number of bytes of key. Or the key itself if intkey flag is set. ** * Payload ** 4 First page of the overflow chain. Omitted if no overflow ** ** Overflow pages form a linked list. Each page except the last is completely ** filled with data (pagesize - 4 bytes). The last page can have as little ** as 1 byte of data. ** ** SIZE DESCRIPTION ** 4 Page number of next overflow page ** * Data ** ** Freelist pages come in two subtypes: trunk pages and leaf pages. The ** file header points to the first in a linked list of trunk page. Each trunk ** page points to multiple leaf pages. The content of a leaf page is ** unspecified. A trunk page looks like this: ** ** SIZE DESCRIPTION ** 4 Page number of next trunk page ** 4 Number of leaf pointers on this page ** * zero or more pages numbers of leaves */ /* #include "sqliteInt.h" */ /* The following value is the maximum cell size assuming a maximum page ** size give above. */ #define MX_CELL_SIZE(pBt) ((int)(pBt->pageSize-8)) /* The maximum number of cells on a single page of the database. This ** assumes a minimum cell size of 6 bytes (4 bytes for the cell itself ** plus 2 bytes for the index to the cell in the page header). Such ** small cells will be rare, but they are possible. */ #define MX_CELL(pBt) ((pBt->pageSize-8)/6) /* Forward declarations */ typedef struct MemPage MemPage; typedef struct BtLock BtLock; typedef struct CellInfo CellInfo; /* ** This is a magic string that appears at the beginning of every ** SQLite database in order to identify the file as a real database. ** ** You can change this value at compile-time by specifying a ** -DSQLITE_FILE_HEADER="..." on the compiler command-line. The ** header must be exactly 16 bytes including the zero-terminator so ** the string itself should be 15 characters long. If you change ** the header, then your custom library will not be able to read ** databases generated by the standard tools and the standard tools ** will not be able to read databases created by your custom library. */ #ifndef SQLITE_FILE_HEADER /* 123456789 123456 */ # define SQLITE_FILE_HEADER "SQLite Btree v3" #endif /* ** Page type flags. An ORed combination of these flags appear as the ** first byte of on-disk image of every BTree page. */ #define PTF_INTKEY 0x01 #define PTF_ZERODATA 0x02 #define PTF_LEAFDATA 0x04 #define PTF_LEAF 0x08 /* ** As each page of the file is loaded into memory, an instance of the following ** structure is appended and initialized to zero. This structure stores ** information about the page that is decoded from the raw file page. ** ** The pParent field points back to the parent page. This allows us to ** walk up the BTree from any leaf to the root. Care must be taken to ** unref() the parent page pointer when this page is no longer referenced. ** The pageDestructor() routine handles that chore. ** ** Access to all fields of this structure is controlled by the mutex ** stored in MemPage.pBt->mutex. */ struct MemPage { u8 isInit; /* True if previously initialized. MUST BE FIRST! */ u8 nOverflow; /* Number of overflow cell bodies in aCell[] */ u8 intKey; /* True if table b-trees. False for index b-trees */ u8 intKeyLeaf; /* True if the leaf of an intKey table */ u8 noPayload; /* True if internal intKey page (thus w/o data) */ u8 leaf; /* True if a leaf page */ u8 hdrOffset; /* 100 for page 1. 0 otherwise */ u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */ u8 max1bytePayload; /* min(maxLocal,127) */ u8 bBusy; /* Prevent endless loops on corrupt database files */ u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */ u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */ u16 cellOffset; /* Index in aData of first cell pointer */ u16 nFree; /* Number of free bytes on the page */ u16 nCell; /* Number of cells on this page, local and ovfl */ u16 maskPage; /* Mask for page offset */ u16 aiOvfl[5]; /* Insert the i-th overflow cell before the aiOvfl-th ** non-overflow cell */ u8 *apOvfl[5]; /* Pointers to the body of overflow cells */ BtShared *pBt; /* Pointer to BtShared that this page is part of */ u8 *aData; /* Pointer to disk image of the page data */ u8 *aDataEnd; /* One byte past the end of usable data */ u8 *aCellIdx; /* The cell index area */ u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ DbPage *pDbPage; /* Pager page handle */ u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ Pgno pgno; /* Page number for this page */ }; /* ** The in-memory image of a disk page has the auxiliary information appended ** to the end. EXTRA_SIZE is the number of bytes of space needed to hold ** that extra information. */ #define EXTRA_SIZE sizeof(MemPage) /* ** A linked list of the following structures is stored at BtShared.pLock. ** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor ** is opened on the table with root page BtShared.iTable. Locks are removed ** from this list when a transaction is committed or rolled back, or when ** a btree handle is closed. */ struct BtLock { Btree *pBtree; /* Btree handle holding this lock */ Pgno iTable; /* Root page of table */ u8 eLock; /* READ_LOCK or WRITE_LOCK */ BtLock *pNext; /* Next in BtShared.pLock list */ }; /* Candidate values for BtLock.eLock */ #define READ_LOCK 1 #define WRITE_LOCK 2 /* A Btree handle ** ** A database connection contains a pointer to an instance of ** this object for every database file that it has open. This structure ** is opaque to the database connection. The database connection cannot ** see the internals of this structure and only deals with pointers to ** this structure. ** ** For some database files, the same underlying database cache might be ** shared between multiple connections. In that case, each connection ** has it own instance of this object. But each instance of this object ** points to the same BtShared object. The database cache and the ** schema associated with the database file are all contained within ** the BtShared object. ** ** All fields in this structure are accessed under sqlite3.mutex. ** The pBt pointer itself may not be changed while there exists cursors ** in the referenced BtShared that point back to this Btree since those ** cursors have to go through this Btree to find their BtShared and ** they often do so without holding sqlite3.mutex. */ struct Btree { sqlite3_mutex *mutex; /* Connection mutex */ int flags; /* Miscellaneous flags */ i64 szMmap; /* Default mmap_size setting */ u8 enc; /* Text encoding */ u8 mallocFailed; /* True if we have seen a malloc failure */ u32 magic; /* Magic number for detect library misuse */ int nVdbeRead; /* Number of active VDBEs that read or write */ BusyHandler busyHandler; /* Busy callback */ int nSavepoint; /* Number of non-transaction savepoints */ int *pnBytesFreed; /* If not NULL, increment this in DbFree() */ BtShared *pBt; /* Sharable content of this btree */ u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ u8 sharable; /* True if we can share pBt with another db */ u8 locked; /* True if db currently has pBt locked */ u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ int nBackup; /* Number of backup operations reading this btree */ u32 iDataVersion; /* Combines with pBt->pPager->iDataVersion */ Btree *pNext; /* List of other sharable Btrees from the same db */ Btree *pPrev; /* Back pointer of the same list */ #ifndef SQLITE_OMIT_SHARED_CACHE BtLock lock; /* Object used to lock page 1 */ #endif }; /* ** Btree.inTrans may take one of the following values. ** ** If the shared-data extension is enabled, there may be multiple users ** of the Btree structure. At most one of these may open a write transaction, ** but any number may have active read transactions. */ #define TRANS_NONE 0 #define TRANS_READ 1 #define TRANS_WRITE 2 /* ** An instance of this object represents a single database file. ** ** A single database file can be in use at the same time by two ** or more database connections. When two or more connections are ** sharing the same database file, each connection has it own ** private Btree object for the file and each of those Btrees points ** to this one BtShared object. BtShared.nRef is the number of ** connections currently sharing this database file. ** ** Fields in this structure are accessed under the BtShared.mutex ** mutex, except for nRef and pNext which are accessed under the ** global SQLITE_MUTEX_STATIC_MASTER mutex. The pPager field ** may not be modified once it is initially set as long as nRef>0. ** The pSchema field may be set once under BtShared.mutex and ** thereafter is unchanged as long as nRef>0. ** ** isPending: ** ** If a BtShared client fails to obtain a write-lock on a database ** table (because there exists one or more read-locks on the table), ** the shared-cache enters 'pending-lock' state and isPending is ** set to true. ** ** The shared-cache leaves the 'pending lock' state when either of ** the following occur: ** ** 1) The current writer (BtShared.pWriter) concludes its transaction, OR ** 2) The number of locks held by other connections drops to zero. ** ** while in the 'pending-lock' state, no connection may start a new ** transaction. ** ** This feature is included to help prevent writer-starvation. */ struct BtShared { Pager *pPager; /* The page cache */ Btree *pBt; /* Database connection currently using this Btree */ BtCursor *pCursor; /* A list of all open cursors */ MemPage *pPage1; /* First page of the database */ u8 openFlags; /* Flags to sqlite3BtreeOpen() */ #ifndef SQLITE_OMIT_AUTOVACUUM u8 autoVacuum; /* True if auto-vacuum is enabled */ u8 incrVacuum; /* True if incr-vacuum is enabled */ u8 bDoTruncate; /* True to truncate db on commit */ #endif u8 inTransaction; /* Transaction state */ u8 max1bytePayload; /* Maximum first byte of cell for a 1-byte payload */ #ifdef SQLITE_HAS_CODEC u8 optimalReserve; /* Desired amount of reserved space per page */ #endif u16 btsFlags; /* Boolean parameters. See BTS_* macros below */ u16 maxLocal; /* Maximum local payload in non-LEAFDATA tables */ u16 minLocal; /* Minimum local payload in non-LEAFDATA tables */ u16 maxLeaf; /* Maximum local payload in a LEAFDATA table */ u16 minLeaf; /* Minimum local payload in a LEAFDATA table */ u32 pageSize; /* Total number of bytes on a page */ u32 file_format; /* File Format */ u32 usableSize; /* Number of usable bytes on each page */ int nTransaction; /* Number of open transactions (read + write) */ u32 nPage; /* Number of pages in the database */ void *pSchema; /* Pointer to space allocated by sqlite3BtreeSchema() */ void (*xFreeSchema)(void*); /* Destructor for BtShared.pSchema */ sqlite3_mutex *mutex; /* Non-recursive mutex required to access this object */ Bitvec *pHasContent; /* Set of pages moved to free-list this transaction */ #ifndef SQLITE_OMIT_SHARED_CACHE int nRef; /* Number of references to this structure */ BtShared *pNext; /* Next on a list of sharable BtShared structs */ BtLock *pLock; /* List of locks held on this shared-btree struct */ Btree *pWriter; /* Btree with currently open write transaction */ #endif u8 *pTmpSpace; /* Temp space sufficient to hold a single cell */ }; /* ** Allowed values for BtShared.btsFlags */ #define BTS_READ_ONLY 0x0001 /* Underlying file is readonly */ #define BTS_PAGESIZE_FIXED 0x0002 /* Page size can no longer be changed */ #define BTS_SECURE_DELETE 0x0004 /* PRAGMA secure_delete is enabled */ #define BTS_INITIALLY_EMPTY 0x0008 /* Database was empty at trans start */ #define BTS_NO_WAL 0x0010 /* Do not open write-ahead-log files */ #define BTS_EXCLUSIVE 0x0020 /* pWriter has an exclusive lock */ #define BTS_PENDING 0x0040 /* Waiting for read-locks to clear */ /* ** An instance of the following structure is used to hold information ** about a cell. The parseCellPtr() function fills in this structure ** based on information extract from the raw disk page. */ struct CellInfo { i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */ u8 *pPayload; /* Pointer to the start of payload */ u32 nPayload; /* Bytes of payload */ u16 nLocal; /* Amount of payload held locally, not on overflow */ u16 iOverflow; /* Offset to overflow page number. Zero if no overflow */ u16 nSize; /* Size of the cell content on the main b-tree page */ }; /* ** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than ** this will be declared corrupt. This value is calculated based on a ** maximum database size of 2^31 pages a minimum fanout of 2 for a ** root-node and 3 for all other internal nodes. ** ** If a tree that appears to be taller than this is encountered, it is ** assumed that the database is corrupt. */ #define BTCURSOR_MAX_DEPTH 20 /* ** A cursor is a pointer to a particular entry within a particular ** b-tree within a database file. ** ** The entry is identified by its MemPage and the index in ** MemPage.aCell[] of the entry. ** ** A single database file can be shared by two more database connections, ** but cursors cannot be shared. Each cursor is associated with a ** particular database connection identified BtCursor.pBtree.db. ** ** Fields in this structure are accessed under the BtShared.mutex ** found at self->pBt->mutex. ** ** skipNext meaning: ** eState==SKIPNEXT && skipNext>0: Next sqlite3BtreeNext() is no-op. ** eState==SKIPNEXT && skipNext<0: Next sqlite3BtreePrevious() is no-op. ** eState==FAULT: Cursor fault with skipNext as error code. */ struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ BtCursor *pNext; /* Forms a linked list of all cursors */ Pgno *aOverflow; /* Cache of overflow page locations */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ void *pKey; /* Saved key that was cursor last known position */ Pgno pgnoRoot; /* The root page of this tree */ int nOvflAlloc; /* Allocated size of aOverflow[] array */ int skipNext; /* Prev() is noop if negative. Next() is noop if positive. ** Error code if eState==CURSOR_FAULT */ u8 curFlags; /* zero or more BTCF_* flags defined below */ u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */ u8 eState; /* One of the CURSOR_XXX constants (see below) */ u8 hints; /* As configured by CursorSetHints() */ i8 iPage; /* Index of current page in apPage */ u8 curIntKey; /* Value of apPage[0]->intKey */ struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ void *padding1; /* Make object size a multiple of 16 */ u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ }; /* ** Legal values for BtCursor.curFlags */ #define BTCF_WriteFlag 0x01 /* True if a write cursor */ #define BTCF_ValidNKey 0x02 /* True if info.nKey is valid */ #define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ #define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ #define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ #define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ /* ** Potential values for BtCursor.eState. ** ** CURSOR_INVALID: ** Cursor does not point to a valid entry. This can happen (for example) ** because the table is empty or because BtreeCursorFirst() has not been ** called. ** ** CURSOR_VALID: ** Cursor points to a valid entry. getPayload() etc. may be called. ** ** CURSOR_SKIPNEXT: ** Cursor is valid except that the Cursor.skipNext field is non-zero ** indicating that the next sqlite3BtreeNext() or sqlite3BtreePrevious() ** operation should be a no-op. ** ** CURSOR_REQUIRESEEK: ** The table that this cursor was opened on still exists, but has been ** modified since the cursor was last used. The cursor position is saved ** in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in ** this state, restoreCursorPosition() can be called to attempt to ** seek the cursor to the saved position. ** ** CURSOR_FAULT: ** An unrecoverable error (an I/O error or a malloc failure) has occurred ** on a different connection that shares the BtShared cache with this ** cursor. The error has left the cache in an inconsistent state. ** Do nothing else with this cursor. Any attempt to use the cursor ** should return the error code stored in BtCursor.skipNext */ #define CURSOR_INVALID 0 #define CURSOR_VALID 1 #define CURSOR_SKIPNEXT 2 #define CURSOR_REQUIRESEEK 3 #define CURSOR_FAULT 4 /* ** The database page the PENDING_BYTE occupies. This page is never used. */ # define PENDING_BYTE_PAGE(pBt) PAGER_MJ_PGNO(pBt) /* ** These macros define the location of the pointer-map entry for a ** database page. The first argument to each is the number of usable ** bytes on each page of the database (often 1024). The second is the ** page number to look up in the pointer map. ** ** PTRMAP_PAGENO returns the database page number of the pointer-map ** page that stores the required pointer. PTRMAP_PTROFFSET returns ** the offset of the requested map entry. ** ** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page, ** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be ** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements ** this test. */ #define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno) #define PTRMAP_PTROFFSET(pgptrmap, pgno) (5*(pgno-pgptrmap-1)) #define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno)) /* ** The pointer map is a lookup table that identifies the parent page for ** each child page in the database file. The parent page is the page that ** contains a pointer to the child. Every page in the database contains ** 0 or 1 parent pages. (In this context 'database page' refers ** to any page that is not part of the pointer map itself.) Each pointer map ** entry consists of a single byte 'type' and a 4 byte parent page number. ** The PTRMAP_XXX identifiers below are the valid types. ** ** The purpose of the pointer map is to facility moving pages from one ** position in the file to another as part of autovacuum. When a page ** is moved, the pointer in its parent must be updated to point to the ** new location. The pointer map is used to locate the parent page quickly. ** ** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not ** used in this case. ** ** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number ** is not used in this case. ** ** PTRMAP_OVERFLOW1: The database page is the first page in a list of ** overflow pages. The page number identifies the page that ** contains the cell with a pointer to this overflow page. ** ** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of ** overflow pages. The page-number identifies the previous ** page in the overflow page list. ** ** PTRMAP_BTREE: The database page is a non-root btree page. The page number ** identifies the parent page in the btree. */ #define PTRMAP_ROOTPAGE 1 #define PTRMAP_FREEPAGE 2 #define PTRMAP_OVERFLOW1 3 #define PTRMAP_OVERFLOW2 4 #define PTRMAP_BTREE 5 /* A bunch of assert() statements to check the transaction state variables ** of handle p (type Btree*) are internally consistent. */ #define btreeIntegrity(p) \ assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \ assert( p->pBt->inTransaction>=p->inTrans ); /* ** The ISAUTOVACUUM macro is used within balance_nonroot() to determine ** if the database supports auto-vacuum or not. Because it is used ** within an expression that is an argument to another macro ** (sqliteMallocRaw), it is not possible to use conditional compilation. ** So, this macro is defined instead. */ #ifndef SQLITE_OMIT_AUTOVACUUM #define ISAUTOVACUUM (pBt->autoVacuum) #else #define ISAUTOVACUUM 0 #endif /* ** This structure is passed around through all the sanity checking routines ** in order to keep track of some global state information. ** ** The aRef[] array is allocated so that there is 1 bit for each page in ** the database. As the integrity-check proceeds, for each page used in ** the database the corresponding bit is set. This allows integrity-check to ** detect pages that are used twice and orphaned pages (both of which ** indicate corruption). */ typedef struct IntegrityCk IntegrityCk; struct IntegrityCk { BtShared *pBt; /* The tree being checked out */ Pager *pPager; /* The associated pager. Also accessible by pBt->pPager */ u8 *aPgRef; /* 1 bit per page in the db (see above) */ Pgno nPage; /* Number of pages in the database */ int mxErr; /* Stop accumulating errors when this reaches zero */ int nErr; /* Number of messages written to zErrMsg so far */ int mallocFailed; /* A memory allocation error has occurred */ const char *zPfx; /* Error message prefix */ int v1, v2; /* Values for up to two %d fields in zPfx */ StrAccum errMsg; /* Accumulate the error message text here */ u32 *heap; /* Min-heap used for analyzing cell coverage */ }; /* ** Routines to read or write a two- and four-byte big-endian integer values. */ #define get2byte(x) ((x)[0]<<8 | (x)[1]) #define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v)) #define get4byte sqlite3Get4byte #define put4byte sqlite3Put4byte /* ** get2byteAligned(), unlike get2byte(), requires that its argument point to a ** two-byte aligned address. get2bytea() is only used for accessing the ** cell addresses in a btree header. */ #if SQLITE_BYTEORDER==4321 # define get2byteAligned(x) (*(u16*)(x)) #elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ && GCC_VERSION>=4008000 # define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) #elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ && defined(_MSC_VER) && _MSC_VER>=1300 # define get2byteAligned(x) _byteswap_ushort(*(u16*)(x)) #else # define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) #endif #ifndef SQLITE_OMIT_INCRBLOB SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *); #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0) #else #define sqlite3VdbeMemExpandBlob(x) SQLITE_OK #define ExpandBlob(P) SQLITE_OK #endif SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void); SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*); SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **); typedef int (*RecordCompare)(int,const void*,UnpackedRecord*); SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord*); /************** End of btreeInt.h ********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ /* ** Structure containing global configuration data for the SQLite library. ** ** This structure also contains some state information. */ struct Sqlite3Config { int bMemstat; /* True to enable memory status */ int bCoreMutex; /* True to enable core mutexing */ int bFullMutex; /* True to enable full mutexing */ int bOpenUri; /* True to interpret filenames as URIs */ int bUseCis; /* Use covering indices for full-scans */ int mxStrlen; /* Maximum string length */ int neverCorrupt; /* Database is always well-formed */ int szLookaside; /* Default lookaside buffer size */ int nLookaside; /* Default lookaside buffer count */ sqlite3_mem_methods m; /* Low-level memory allocation interface */ sqlite3_mutex_methods mutex; /* Low-level mutex interface */ sqlite3_pcache_methods2 pcache2; /* Low-level page-cache interface */ void *pHeap; /* Heap storage space */ int nHeap; /* Size of pHeap[] */ int mnReq, mxReq; /* Min and max heap requests sizes */ sqlite3_int64 szMmap; /* mmap() space per open file */ sqlite3_int64 mxMmap; /* Maximum value for szMmap */ void *pScratch; /* Scratch memory */ int szScratch; /* Size of each scratch buffer */ int nScratch; /* Number of scratch buffers */ void *pPage; /* Page cache memory */ int szPage; /* Size of each page in pPage[] */ int nPage; /* Number of pages in pPage[] */ int mxParserStack; /* maximum depth of the parser stack */ int sharedCacheEnabled; /* true if shared-cache mode enabled */ u32 szPma; /* Maximum Sorter PMA size */ /* The above might be initialized to non-zero. The following need to always ** initially be zero, however. */ int isInit; /* True after initialization has finished */ int inProgress; /* True while initialization in progress */ int isMutexInit; /* True after mutexes are initialized */ int isMallocInit; /* True after malloc is initialized */ int isPCacheInit; /* True after malloc is initialized */ int nRefInitMutex; /* Number of users of pInitMutex */ sqlite3_mutex *pInitMutex; /* Mutex used by sqlite3BtreeInitialize() */ void (*xLog)(void*,int,const char*); /* Function for logging */ void *pLogArg; /* First argument to xLog() */ #ifdef SQLITE_VDBE_COVERAGE /* The following callback (if not NULL) is invoked on every VDBE branch ** operation. Set the callback using SQLITE_TESTCTRL_VDBE_COVERAGE. */ void (*xVdbeBranch)(void*,int iSrcLine,u8 eThis,u8 eMx); /* Callback */ void *pVdbeBranchArg; /* 1st argument */ #endif int bLocaltimeFault; /* True to fail localtime() calls */ }; #ifndef SQLITE_AMALGAMATION SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config; #ifndef SQLITE_OMIT_WSD SQLITE_PRIVATE int sqlite3PendingByte; #endif #endif #if (defined(i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ defined(_M_AMD64) || defined(_M_ARM) || defined(__x86) || \ defined(__arm__)) && !defined(SQLITE_RUNTIME_BYTEORDER) # define SQLITE_BYTEORDER 1234 # define SQLITE_BIGENDIAN 0 # define SQLITE_LITTLEENDIAN 1 # define SQLITE_UTF16NATIVE SQLITE_UTF16LE #endif #if (defined(sparc) || defined(__ppc__)) \ && !defined(SQLITE_RUNTIME_BYTEORDER) # define SQLITE_BYTEORDER 4321 # define SQLITE_BIGENDIAN 1 # define SQLITE_LITTLEENDIAN 0 # define SQLITE_UTF16NATIVE SQLITE_UTF16BE #endif #if !defined(SQLITE_BYTEORDER) const int sqlite3one = 1; # define SQLITE_BYTEORDER 0 /* 0 means "unknown at compile-time" */ # define SQLITE_BIGENDIAN (*(char *)(&sqlite3one)==0) # define SQLITE_LITTLEENDIAN (*(char *)(&sqlite3one)==1) # define SQLITE_UTF16NATIVE (SQLITE_BIGENDIAN?SQLITE_UTF16BE:SQLITE_UTF16LE) #endif #ifdef SQLITE_ENABLE_ATOMIC_WRITE SQLITE_PRIVATE int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int); SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *); SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *); SQLITE_PRIVATE int sqlite3JournalExists(sqlite3_file *p); #else #define sqlite3JournalSize(pVfs) ((pVfs)->szOsFile) #define sqlite3JournalExists(p) 1 #endif SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *); SQLITE_PRIVATE int sqlite3MemJournalSize(void); SQLITE_PRIVATE int sqlite3IsMemJournal(sqlite3_file *); #ifdef SQLITE_ENABLE_UNLOCK_NOTIFY SQLITE_PRIVATE void sqlite3ConnectionBlocked(Btree*, Btree*); SQLITE_PRIVATE void sqlite3ConnectionUnlocked(Btree *db); SQLITE_PRIVATE void sqlite3ConnectionClosed(Btree *db); #else #define sqlite3ConnectionBlocked(x,y) #define sqlite3ConnectionUnlocked(x) #define sqlite3ConnectionClosed(x) #endif /* ** If the SQLITE_ENABLE IOTRACE exists then the global variable ** sqlite3IoTrace is a pointer to a printf-like routine used to ** print I/O tracing messages. */ #ifdef SQLITE_ENABLE_IOTRACE # define IOTRACE(A) if( sqlite3IoTrace ){ sqlite3IoTrace A; } SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe*); SQLITE_API SQLITE_EXTERN void (SQLITE_CDECL *sqlite3IoTrace)(const char*,...); #else # define IOTRACE(A) # define sqlite3VdbeIOTraceSql(X) #endif SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32); SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec*, u32); SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec*, u32); SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec*, u32); SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec*, u32, void*); SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec*); SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec*); SQLITE_PRIVATE const void *sqlite3ValueText(Mem*, u8); /* ** These routines are available for the mem2.c debugging memory allocator ** only. They are used to verify that different "types" of memory ** allocations are properly tracked by the system. ** ** sqlite3MemdebugSetType() sets the "type" of an allocation to one of ** the MEMTYPE_* macros defined below. The type must be a bitmask with ** a single bit set. ** ** sqlite3MemdebugHasType() returns true if any of the bits in its second ** argument match the type set by the previous sqlite3MemdebugSetType(). ** sqlite3MemdebugHasType() is intended for use inside assert() statements. ** ** sqlite3MemdebugNoType() returns true if none of the bits in its second ** argument match the type set by the previous sqlite3MemdebugSetType(). ** ** Perhaps the most important point is the difference between MEMTYPE_HEAP ** and MEMTYPE_LOOKASIDE. If an allocation is MEMTYPE_LOOKASIDE, that means ** it might have been allocated by lookaside, except the allocation was ** too large or lookaside was already full. It is important to verify ** that allocations that might have been satisfied by lookaside are not ** passed back to non-lookaside free() routines. Asserts such as the ** example above are placed on the non-lookaside free() routines to verify ** this constraint. ** ** All of this is no-op for a production build. It only comes into ** play when the SQLITE_MEMDEBUG compile-time option is used. */ #ifdef SQLITE_MEMDEBUG SQLITE_PRIVATE void sqlite3MemdebugSetType(void*,u8); SQLITE_PRIVATE int sqlite3MemdebugHasType(void*,u8); SQLITE_PRIVATE int sqlite3MemdebugNoType(void*,u8); #else # define sqlite3MemdebugSetType(X,Y) /* no-op */ # define sqlite3MemdebugHasType(X,Y) 1 # define sqlite3MemdebugNoType(X,Y) 1 #endif #define MEMTYPE_HEAP 0x01 /* General heap allocations */ #define MEMTYPE_LOOKASIDE 0x02 /* Heap that might have been lookaside */ #define MEMTYPE_SCRATCH 0x04 /* Scratch allocations */ #define MEMTYPE_PCACHE 0x08 /* Page cache allocations */ SQLITE_PRIVATE i64 sqlite3VdbeIntValue(Mem*); SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem*); SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip(int, const void *, UnpackedRecord *, int); #endif /* _SQLITEINT_H_ */ /************** End of sqliteInt.h *******************************************/ /************** Begin file global.c ******************************************/ /* ** 2008 June 13 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains definitions of global variables and constants. */ /* #include "sqliteInt.h" */ /* An array to map all upper-case characters into their corresponding ** lower-case character. ** ** SQLite only considers US-ASCII (or EBCDIC) characters. We do not ** handle case conversions for the UTF character set since the tables ** involved are nearly as big or bigger than SQLite itself. */ const unsigned char sqlite3UpperToLower[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, 252,253,254,255 }; /* EVIDENCE-OF: R-02982-34736 In order to maintain full backwards ** compatibility for legacy applications, the URI filename capability is ** disabled by default. ** ** EVIDENCE-OF: R-38799-08373 URI filenames can be enabled or disabled ** using the SQLITE_USE_URI=1 or SQLITE_USE_URI=0 compile-time options. ** ** EVIDENCE-OF: R-43642-56306 By default, URI handling is globally ** disabled. The default value may be changed by compiling with the ** SQLITE_USE_URI symbol defined. */ #ifndef SQLITE_USE_URI # define SQLITE_USE_URI 0 #endif /* EVIDENCE-OF: R-38720-18127 The default setting is determined by the ** SQLITE_ALLOW_COVERING_INDEX_SCAN compile-time option, or is "on" if ** that compile-time option is omitted. */ #ifndef SQLITE_ALLOW_COVERING_INDEX_SCAN # define SQLITE_ALLOW_COVERING_INDEX_SCAN 1 #endif /* The minimum PMA size is set to this value multiplied by the database ** page size in bytes. */ #ifndef SQLITE_SORTER_PMASZ # define SQLITE_SORTER_PMASZ 250 #endif /* ** The following singleton contains the global configuration for ** the SQLite library. */ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { SQLITE_DEFAULT_MEMSTATUS, /* bMemstat */ 1, /* bCoreMutex */ SQLITE_THREADSAFE==1, /* bFullMutex */ SQLITE_USE_URI, /* bOpenUri */ SQLITE_ALLOW_COVERING_INDEX_SCAN, /* bUseCis */ 0x7ffffffe, /* mxStrlen */ 0, /* neverCorrupt */ 128, /* szLookaside */ 500, /* nLookaside */ {0,0,0,0,0,0,0,0}, /* m */ {0,0,0,0,0,0,0,0,0}, /* mutex */ {0,0,0,0,0,0,0,0,0,0,0,0,0},/* pcache2 */ (void*)0, /* pHeap */ 0, /* nHeap */ 0, 0, /* mnHeap, mxHeap */ SQLITE_DEFAULT_MMAP_SIZE, /* szMmap */ SQLITE_MAX_MMAP_SIZE, /* mxMmap */ (void*)0, /* pScratch */ 0, /* szScratch */ 0, /* nScratch */ (void*)0, /* pPage */ 0, /* szPage */ SQLITE_DEFAULT_PCACHE_INITSZ, /* nPage */ 0, /* mxParserStack */ 0, /* sharedCacheEnabled */ SQLITE_SORTER_PMASZ, /* szPma */ /* All the rest should always be initialized to zero */ 0, /* isInit */ 0, /* inProgress */ 0, /* isMutexInit */ 0, /* isMallocInit */ 0, /* isPCacheInit */ 0, /* nRefInitMutex */ 0, /* pInitMutex */ 0, /* xLog */ 0, /* pLogArg */ #ifdef SQLITE_VDBE_COVERAGE 0, /* xVdbeBranch */ 0, /* pVbeBranchArg */ #endif 0 /* bLocaltimeFault */ }; /* ** The value of the "pending" byte must be 0x40000000 (1 byte past the ** 1-gibabyte boundary) in a compatible database. SQLite never uses ** the database page that contains the pending byte. It never attempts ** to read or write that page. The pending byte page is set assign ** for use by the VFS layers as space for managing file locks. ** ** During testing, it is often desirable to move the pending byte to ** a different position in the file. This allows code that has to ** deal with the pending byte to run on files that are much smaller ** than 1 GiB. The sqlite3_test_control() interface can be used to ** move the pending byte. ** ** IMPORTANT: Changing the pending byte to any value other than ** 0x40000000 results in an incompatible database file format! ** Changing the pending byte during operation will result in undefined ** and incorrect behavior. */ #ifndef SQLITE_OMIT_WSD SQLITE_PRIVATE int sqlite3PendingByte = 0x40000000; #endif /************** End of global.c **********************************************/ /************** Begin file status.c ******************************************/ /* ** 2008 June 18 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This module implements the sqlite3_status() interface and related ** functionality. */ /* #include "sqliteInt.h" */ /* ** Variables in which to record status information. */ typedef struct sqlite3StatType sqlite3StatType; static SQLITE_WSD struct sqlite3StatType { #if SQLITE_PTRSIZE>4 sqlite3_int64 nowValue[10]; /* Current value */ sqlite3_int64 mxValue[10]; /* Maximum value */ #else u32 nowValue[10]; /* Current value */ u32 mxValue[10]; /* Maximum value */ #endif } sqlite3Stat = { {0,}, {0,} }; #ifndef NDEBUG /* ** Elements of sqlite3Stat[] are protected by either the memory allocator ** mutex, or by the pcache1 mutex. The following array determines which. */ static const char statMutex[] = { 0, /* SQLITE_STATUS_MEMORY_USED */ 1, /* SQLITE_STATUS_PAGECACHE_USED */ 1, /* SQLITE_STATUS_PAGECACHE_OVERFLOW */ 0, /* SQLITE_STATUS_SCRATCH_USED */ 0, /* SQLITE_STATUS_SCRATCH_OVERFLOW */ 0, /* SQLITE_STATUS_MALLOC_SIZE */ 0, /* SQLITE_STATUS_PARSER_STACK */ 1, /* SQLITE_STATUS_PAGECACHE_SIZE */ 0, /* SQLITE_STATUS_SCRATCH_SIZE */ 0, /* SQLITE_STATUS_MALLOC_COUNT */ }; #endif /* The "wsdStat" macro will resolve to the status information ** state vector. If writable static data is unsupported on the target, ** we have to locate the state vector at run-time. In the more common ** case where writable static data is supported, wsdStat can refer directly ** to the "sqlite3Stat" state vector declared above. */ #ifdef SQLITE_OMIT_WSD # define wsdStatInit sqlite3StatType *x = &GLOBAL(sqlite3StatType,sqlite3Stat) # define wsdStat x[0] #else # define wsdStatInit # define wsdStat sqlite3Stat #endif /* ** Return the current value of a status parameter. The caller must ** be holding the appropriate mutex. */ SQLITE_PRIVATE sqlite3_int64 sqlite3StatusValue(int op){ wsdStatInit; assert( op>=0 && op=0 && op=0 && op=0 && opwsdStat.mxValue[op] ){ wsdStat.mxValue[op] = wsdStat.nowValue[op]; } } SQLITE_PRIVATE void sqlite3StatusDown(int op, int N){ wsdStatInit; assert( N>=0 ); assert( op>=0 && op=0 && op=0 && op=0 && opwsdStat.mxValue[op] ){ wsdStat.mxValue[op] = wsdStat.nowValue[op]; } } /************** End of status.c **********************************************/ /************** Begin file os.c **********************************************/ /* ** 2005 November 29 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains OS interface code that is common to all ** architectures. */ #define _SQLITE_OS_C_ 1 /* #include "sqliteInt.h" */ #undef _SQLITE_OS_C_ /* ** The default SQLite sqlite3_vfs implementations do not allocate ** memory (actually, os_unix.c allocates a small amount of memory ** from within OsOpen()), but some third-party implementations may. ** So we test the effects of a malloc() failing and the sqlite3OsXXX() ** function returning SQLITE_IOERR_NOMEM using the DO_OS_MALLOC_TEST macro. ** ** The following functions are instrumented for malloc() failure ** testing: ** ** sqlite3OsRead() ** sqlite3OsWrite() ** sqlite3OsSync() ** sqlite3OsFileSize() ** sqlite3OsLock() ** sqlite3OsCheckReservedLock() ** sqlite3OsFileControl() ** sqlite3OsShmMap() ** sqlite3OsOpen() ** sqlite3OsDelete() ** sqlite3OsAccess() ** sqlite3OsFullPathname() ** */ #if defined(SQLITE_TEST) SQLITE_PRIVATE int sqlite3_memdebug_vfs_oom_test = 1; #define DO_OS_MALLOC_TEST(x) \ if (sqlite3_memdebug_vfs_oom_test && (!x || !sqlite3IsMemJournal(x))) { \ void *pTstAlloc = sqlite3Malloc(10); \ if (!pTstAlloc) return SQLITE_IOERR_NOMEM; \ sqlite3_free(pTstAlloc); \ } #else #define DO_OS_MALLOC_TEST(x) #endif /* ** The following routines are convenience wrappers around methods ** of the sqlite3_file object. This is mostly just syntactic sugar. All ** of this would be completely automatic if SQLite were coded using ** C++ instead of plain old C. */ SQLITE_PRIVATE int sqlite3OsClose(sqlite3_file *pId){ int rc = SQLITE_OK; if( pId->pMethods ){ rc = pId->pMethods->xClose(pId); pId->pMethods = 0; } return rc; } SQLITE_PRIVATE int sqlite3OsRead(sqlite3_file *id, void *pBuf, int amt, i64 offset){ DO_OS_MALLOC_TEST(id); return id->pMethods->xRead(id, pBuf, amt, offset); } SQLITE_PRIVATE int sqlite3OsWrite(sqlite3_file *id, const void *pBuf, int amt, i64 offset){ DO_OS_MALLOC_TEST(id); return id->pMethods->xWrite(id, pBuf, amt, offset); } SQLITE_PRIVATE int sqlite3OsTruncate(sqlite3_file *id, i64 size){ return id->pMethods->xTruncate(id, size); } SQLITE_PRIVATE int sqlite3OsSync(sqlite3_file *id, int flags){ DO_OS_MALLOC_TEST(id); return id->pMethods->xSync(id, flags); } SQLITE_PRIVATE int sqlite3OsFileSize(sqlite3_file *id, i64 *pSize){ DO_OS_MALLOC_TEST(id); return id->pMethods->xFileSize(id, pSize); } SQLITE_PRIVATE int sqlite3OsLock(sqlite3_file *id, int lockType){ DO_OS_MALLOC_TEST(id); return id->pMethods->xLock(id, lockType); } SQLITE_PRIVATE int sqlite3OsUnlock(sqlite3_file *id, int lockType){ return id->pMethods->xUnlock(id, lockType); } SQLITE_PRIVATE int sqlite3OsCheckReservedLock(sqlite3_file *id, int *pResOut){ DO_OS_MALLOC_TEST(id); return id->pMethods->xCheckReservedLock(id, pResOut); } /* ** Use sqlite3OsFileControl() when we are doing something that might fail ** and we need to know about the failures. Use sqlite3OsFileControlHint() ** when simply tossing information over the wall to the VFS and we do not ** really care if the VFS receives and understands the information since it ** is only a hint and can be safely ignored. The sqlite3OsFileControlHint() ** routine has no return value since the return value would be meaningless. */ SQLITE_PRIVATE int sqlite3OsFileControl(sqlite3_file *id, int op, void *pArg){ #ifdef SQLITE_TEST if( op!=SQLITE_FCNTL_COMMIT_PHASETWO ){ /* Faults are not injected into COMMIT_PHASETWO because, assuming SQLite ** is using a regular VFS, it is called after the corresponding ** transaction has been committed. Injecting a fault at this point ** confuses the test scripts - the COMMIT comand returns SQLITE_NOMEM ** but the transaction is committed anyway. ** ** The core must call OsFileControl() though, not OsFileControlHint(), ** as if a custom VFS (e.g. zipvfs) returns an error here, it probably ** means the commit really has failed and an error should be returned ** to the user. */ DO_OS_MALLOC_TEST(id); } #endif return id->pMethods->xFileControl(id, op, pArg); } SQLITE_PRIVATE void sqlite3OsFileControlHint(sqlite3_file *id, int op, void *pArg){ (void)id->pMethods->xFileControl(id, op, pArg); } SQLITE_PRIVATE int sqlite3OsSectorSize(sqlite3_file *id){ int (*xSectorSize)(sqlite3_file*) = id->pMethods->xSectorSize; return (xSectorSize ? xSectorSize(id) : SQLITE_DEFAULT_SECTOR_SIZE); } SQLITE_PRIVATE int sqlite3OsDeviceCharacteristics(sqlite3_file *id){ return id->pMethods->xDeviceCharacteristics(id); } SQLITE_PRIVATE int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ return id->pMethods->xShmLock(id, offset, n, flags); } SQLITE_PRIVATE void sqlite3OsShmBarrier(sqlite3_file *id){ id->pMethods->xShmBarrier(id); } SQLITE_PRIVATE int sqlite3OsShmUnmap(sqlite3_file *id, int deleteFlag){ return id->pMethods->xShmUnmap(id, deleteFlag); } SQLITE_PRIVATE int sqlite3OsShmMap( sqlite3_file *id, /* Database file handle */ int iPage, int pgsz, int bExtend, /* True to extend file if necessary */ void volatile **pp /* OUT: Pointer to mapping */ ){ DO_OS_MALLOC_TEST(id); return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp); } #if SQLITE_MAX_MMAP_SIZE>0 /* The real implementation of xFetch and xUnfetch */ SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ DO_OS_MALLOC_TEST(id); return id->pMethods->xFetch(id, iOff, iAmt, pp); } SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ return id->pMethods->xUnfetch(id, iOff, p); } #else /* No-op stubs to use when memory-mapped I/O is disabled */ SQLITE_PRIVATE int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ *pp = 0; return SQLITE_OK; } SQLITE_PRIVATE int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ return SQLITE_OK; } #endif /* ** The next group of routines are convenience wrappers around the ** VFS methods. */ SQLITE_PRIVATE int sqlite3OsOpen( sqlite3_vfs *pVfs, const char *zPath, sqlite3_file *pFile, int flags, int *pFlagsOut ){ int rc; DO_OS_MALLOC_TEST(0); /* 0x87f7f is a mask of SQLITE_OPEN_ flags that are valid to be passed ** down into the VFS layer. Some SQLITE_OPEN_ flags (for example, ** SQLITE_OPEN_FULLMUTEX or SQLITE_OPEN_SHAREDCACHE) are blocked before ** reaching the VFS. */ rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x87f7f, pFlagsOut); assert( rc==SQLITE_OK || pFile->pMethods==0 ); return rc; } SQLITE_PRIVATE int sqlite3OsDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ DO_OS_MALLOC_TEST(0); assert( dirSync==0 || dirSync==1 ); return pVfs->xDelete(pVfs, zPath, dirSync); } SQLITE_PRIVATE int sqlite3OsAccess( sqlite3_vfs *pVfs, const char *zPath, int flags, int *pResOut ){ DO_OS_MALLOC_TEST(0); return pVfs->xAccess(pVfs, zPath, flags, pResOut); } SQLITE_PRIVATE int sqlite3OsFullPathname( sqlite3_vfs *pVfs, const char *zPath, int nPathOut, char *zPathOut ){ DO_OS_MALLOC_TEST(0); zPathOut[0] = 0; return pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); } SQLITE_PRIVATE int sqlite3OsRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ return pVfs->xRandomness(pVfs, nByte, zBufOut); } SQLITE_PRIVATE int sqlite3OsSleep(sqlite3_vfs *pVfs, int nMicro){ return pVfs->xSleep(pVfs, nMicro); } /* ** This function is a wrapper around the OS specific implementation of ** sqlite3_os_init(). The purpose of the wrapper is to provide the ** ability to simulate a malloc failure, so that the handling of an ** error in sqlite3_os_init() by the upper layers can be tested. */ SQLITE_PRIVATE int sqlite3OsInit(void){ void *p = sqlite3_malloc(10); if( p==0 ) return SQLITE_NOMEM; sqlite3_free(p); return sqlite3_os_init(); } /* ** The list of all registered VFS implementations. */ static sqlite3_vfs * SQLITE_WSD vfsList = 0; #define vfsList GLOBAL(sqlite3_vfs *, vfsList) /* ** Locate a VFS by name. If no name is given, simply return the ** first VFS on the list. */ SQLITE_PRIVATE sqlite3_vfs *sqlite3_vfs_find(const char *zVfs){ sqlite3_vfs *pVfs = 0; #if SQLITE_THREADSAFE sqlite3_mutex *mutex; #endif #ifndef SQLITE_OMIT_AUTOINIT int rc = sqlite3BtreeInitialize(); if( rc ) return 0; #endif #if SQLITE_THREADSAFE mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); #endif sqlite3_mutex_enter(mutex); for(pVfs = vfsList; pVfs; pVfs=pVfs->pNext){ if( zVfs==0 ) break; if( strcmp(zVfs, pVfs->zName)==0 ) break; } sqlite3_mutex_leave(mutex); return pVfs; } /* ** Unlink a VFS from the linked list */ static void vfsUnlink(sqlite3_vfs *pVfs){ assert( sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)) ); if( pVfs==0 ){ /* No-op */ }else if( vfsList==pVfs ){ vfsList = pVfs->pNext; }else if( vfsList ){ sqlite3_vfs *p = vfsList; while( p->pNext && p->pNext!=pVfs ){ p = p->pNext; } if( p->pNext==pVfs ){ p->pNext = pVfs->pNext; } } } /* ** Register a VFS with the system. It is harmless to register the same ** VFS multiple times. The new VFS becomes the default if makeDflt is ** true. */ SQLITE_PRIVATE int sqlite3_vfs_register(sqlite3_vfs *pVfs, int makeDflt){ MUTEX_LOGIC(sqlite3_mutex *mutex;) #ifndef SQLITE_OMIT_AUTOINIT int rc = sqlite3BtreeInitialize(); if( rc ) return rc; #endif #ifdef SQLITE_ENABLE_API_ARMOR if( pVfs==0 ) return SQLITE_MISUSE_BKPT; #endif MUTEX_LOGIC( mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) sqlite3_mutex_enter(mutex); vfsUnlink(pVfs); if( makeDflt || vfsList==0 ){ pVfs->pNext = vfsList; vfsList = pVfs; }else{ pVfs->pNext = vfsList->pNext; vfsList->pNext = pVfs; } assert(vfsList); sqlite3_mutex_leave(mutex); return SQLITE_OK; } /************** End of os.c **************************************************/ /************** Begin file mem1.c ********************************************/ /* ** 2007 August 14 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains low-level memory allocation drivers for when ** SQLite will use the standard C-library malloc/realloc/free interface ** to obtain the memory it needs. ** ** This file contains implementations of the low-level memory allocation ** routines specified in the sqlite3_mem_methods object. The content of ** this file is only used if SQLITE_SYSTEM_MALLOC is defined. The ** SQLITE_SYSTEM_MALLOC macro is defined automatically if neither the ** SQLITE_MEMDEBUG nor the SQLITE_WIN32_MALLOC macros are defined. The ** default configuration is to use memory allocation routines in this ** file. ** ** C-preprocessor macro summary: ** ** HAVE_MALLOC_USABLE_SIZE The configure script sets this symbol if ** the malloc_usable_size() interface exists ** on the target platform. Or, this symbol ** can be set manually, if desired. ** If an equivalent interface exists by ** a different name, using a separate -D ** option to rename it. ** ** SQLITE_WITHOUT_ZONEMALLOC Some older macs lack support for the zone ** memory allocator. Set this symbol to enable ** building on older macs. ** ** SQLITE_WITHOUT_MSIZE Set this symbol to disable the use of ** _msize() on windows systems. This might ** be necessary when compiling for Delphi, ** for example. */ /* #include "sqliteInt.h" */ /* ** This version of the memory allocator is the default. It is ** used when no other memory allocator is specified using compile-time ** macros. */ #ifdef SQLITE_SYSTEM_MALLOC #if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) /* ** Use the zone allocator available on apple products unless the ** SQLITE_WITHOUT_ZONEMALLOC symbol is defined. */ #include #include #include static malloc_zone_t* _sqliteZone_; #define SQLITE_MALLOC(x) malloc_zone_malloc(_sqliteZone_, (x)) #define SQLITE_FREE(x) malloc_zone_free(_sqliteZone_, (x)); #define SQLITE_REALLOC(x,y) malloc_zone_realloc(_sqliteZone_, (x), (y)) #define SQLITE_MALLOCSIZE(x) \ (_sqliteZone_ ? _sqliteZone_->size(_sqliteZone_,x) : malloc_size(x)) #else /* if not __APPLE__ */ /* ** Use standard C library malloc and free on non-Apple systems. ** Also used by Apple systems if SQLITE_WITHOUT_ZONEMALLOC is defined. */ #define SQLITE_MALLOC(x) malloc(x) #define SQLITE_FREE(x) free(x) #define SQLITE_REALLOC(x,y) realloc((x),(y)) /* ** The malloc.h header file is needed for malloc_usable_size() function ** on some systems (e.g. Linux). */ #if HAVE_MALLOC_H && HAVE_MALLOC_USABLE_SIZE # define SQLITE_USE_MALLOC_H 1 # define SQLITE_USE_MALLOC_USABLE_SIZE 1 /* ** The MSVCRT has malloc_usable_size(), but it is called _msize(). The ** use of _msize() is automatic, but can be disabled by compiling with ** -DSQLITE_WITHOUT_MSIZE. Using the _msize() function also requires ** the malloc.h header file. */ #elif defined(_MSC_VER) && !defined(SQLITE_WITHOUT_MSIZE) # define SQLITE_USE_MALLOC_H # define SQLITE_USE_MSIZE #endif /* ** Include the malloc.h header file, if necessary. Also set define macro ** SQLITE_MALLOCSIZE to the appropriate function name, which is _msize() ** for MSVC and malloc_usable_size() for most other systems (e.g. Linux). ** The memory size function can always be overridden manually by defining ** the macro SQLITE_MALLOCSIZE to the desired function name. */ #if defined(SQLITE_USE_MALLOC_H) # include # if defined(SQLITE_USE_MALLOC_USABLE_SIZE) # if !defined(SQLITE_MALLOCSIZE) # define SQLITE_MALLOCSIZE(x) malloc_usable_size(x) # endif # elif defined(SQLITE_USE_MSIZE) # if !defined(SQLITE_MALLOCSIZE) # define SQLITE_MALLOCSIZE _msize # endif # endif #endif /* defined(SQLITE_USE_MALLOC_H) */ #endif /* __APPLE__ or not __APPLE__ */ /* ** Like malloc(), but remember the size of the allocation ** so that we can find it later using sqlite3MemSize(). ** ** For this low-level routine, we are guaranteed that nByte>0 because ** cases of nByte<=0 will be intercepted and dealt with by higher level ** routines. */ static void *sqlite3MemMalloc(int nByte){ #ifdef SQLITE_MALLOCSIZE void *p = SQLITE_MALLOC( nByte ); if( p==0 ){ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte); } return p; #else sqlite3_int64 *p; assert( nByte>0 ); nByte = ROUND8(nByte); p = SQLITE_MALLOC( nByte+8 ); if( p ){ p[0] = nByte; p++; }else{ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_NOMEM, "failed to allocate %u bytes of memory", nByte); } return (void *)p; #endif } /* ** Like free() but works for allocations obtained from sqlite3MemMalloc() ** or sqlite3MemRealloc(). ** ** For this low-level routine, we already know that pPrior!=0 since ** cases where pPrior==0 will have been intecepted and dealt with ** by higher-level routines. */ static void sqlite3MemFree(void *pPrior){ #ifdef SQLITE_MALLOCSIZE SQLITE_FREE(pPrior); #else sqlite3_int64 *p = (sqlite3_int64*)pPrior; assert( pPrior!=0 ); p--; SQLITE_FREE(p); #endif } /* ** Report the allocated size of a prior return from xMalloc() ** or xRealloc(). */ static int sqlite3MemSize(void *pPrior){ #ifdef SQLITE_MALLOCSIZE return pPrior ? (int)SQLITE_MALLOCSIZE(pPrior) : 0; #else sqlite3_int64 *p; if( pPrior==0 ) return 0; p = (sqlite3_int64*)pPrior; p--; return (int)p[0]; #endif } /* ** Like realloc(). Resize an allocation previously obtained from ** sqlite3MemMalloc(). ** ** For this low-level interface, we know that pPrior!=0. Cases where ** pPrior==0 while have been intercepted by higher-level routine and ** redirected to xMalloc. Similarly, we know that nByte>0 because ** cases where nByte<=0 will have been intercepted by higher-level ** routines and redirected to xFree. */ static void *sqlite3MemRealloc(void *pPrior, int nByte){ #ifdef SQLITE_MALLOCSIZE void *p = SQLITE_REALLOC(pPrior, nByte); if( p==0 ){ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_NOMEM, "failed memory resize %u to %u bytes", SQLITE_MALLOCSIZE(pPrior), nByte); } return p; #else sqlite3_int64 *p = (sqlite3_int64*)pPrior; assert( pPrior!=0 && nByte>0 ); assert( nByte==ROUND8(nByte) ); /* EV: R-46199-30249 */ p--; p = SQLITE_REALLOC(p, nByte+8 ); if( p ){ p[0] = nByte; p++; }else{ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_NOMEM, "failed memory resize %u to %u bytes", sqlite3MemSize(pPrior), nByte); } return (void*)p; #endif } /* ** Round up a request size to the next valid allocation size. */ static int sqlite3MemRoundup(int n){ return ROUND8(n); } /* ** Initialize this module. */ static int sqlite3MemInit(void *NotUsed){ #if defined(__APPLE__) && !defined(SQLITE_WITHOUT_ZONEMALLOC) int cpuCount; size_t len; if( _sqliteZone_ ){ return SQLITE_OK; } len = sizeof(cpuCount); /* One usually wants to use hw.acctivecpu for MT decisions, but not here */ sysctlbyname("hw.ncpu", &cpuCount, &len, NULL, 0); if( cpuCount>1 ){ /* defer MT decisions to system malloc */ _sqliteZone_ = malloc_default_zone(); }else{ /* only 1 core, use our own zone to contention over global locks, ** e.g. we have our own dedicated locks */ bool success; malloc_zone_t* newzone = malloc_create_zone(4096, 0); malloc_set_zone_name(newzone, "Sqlite_Heap"); do{ success = OSAtomicCompareAndSwapPtrBarrier(NULL, newzone, (void * volatile *)&_sqliteZone_); }while(!_sqliteZone_); if( !success ){ /* somebody registered a zone first */ malloc_destroy_zone(newzone); } } #endif UNUSED_PARAMETER(NotUsed); return SQLITE_OK; } /* ** Deinitialize this module. */ static void sqlite3MemShutdown(void *NotUsed){ UNUSED_PARAMETER(NotUsed); return; } /* ** This routine is the only routine in this file with external linkage. ** ** Populate the low-level memory allocation function pointers in ** sqlite3GlobalConfig.m with pointers to the routines in this file. */ SQLITE_PRIVATE void sqlite3MemSetDefault(void){ static const sqlite3_mem_methods defaultMethods = { sqlite3MemMalloc, sqlite3MemFree, sqlite3MemRealloc, sqlite3MemSize, sqlite3MemRoundup, sqlite3MemInit, sqlite3MemShutdown, 0 }; sqlite3_config(SQLITE_CONFIG_MALLOC, &defaultMethods); } #endif /* SQLITE_SYSTEM_MALLOC */ /************** End of mem1.c ************************************************/ /************** Begin file mutex.c *******************************************/ /* ** 2007 August 14 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains the C functions that implement mutexes. ** ** This file contains code that is common across all mutex implementations. */ /* #include "sqliteInt.h" */ #if defined(SQLITE_DEBUG) && !defined(SQLITE_MUTEX_OMIT) /* ** For debugging purposes, record when the mutex subsystem is initialized ** and uninitialized so that we can assert() if there is an attempt to ** allocate a mutex while the system is uninitialized. */ static SQLITE_WSD int mutexIsInit = 0; #endif /* SQLITE_DEBUG && !defined(SQLITE_MUTEX_OMIT) */ #ifndef SQLITE_MUTEX_OMIT /* ** Initialize the mutex system. */ SQLITE_PRIVATE int sqlite3MutexInit(void){ int rc = SQLITE_OK; if( !sqlite3GlobalConfig.mutex.xMutexAlloc ){ /* If the xMutexAlloc method has not been set, then the user did not ** install a mutex implementation via sqlite3_config() prior to ** sqlite3BtreeInitialize() being called. This block copies pointers to ** the default implementation into the sqlite3GlobalConfig structure. */ sqlite3_mutex_methods const *pFrom; sqlite3_mutex_methods *pTo = &sqlite3GlobalConfig.mutex; if( sqlite3GlobalConfig.bCoreMutex ){ pFrom = sqlite3DefaultMutex(); }else{ pFrom = sqlite3NoopMutex(); } pTo->xMutexInit = pFrom->xMutexInit; pTo->xMutexEnd = pFrom->xMutexEnd; pTo->xMutexFree = pFrom->xMutexFree; pTo->xMutexEnter = pFrom->xMutexEnter; pTo->xMutexTry = pFrom->xMutexTry; pTo->xMutexLeave = pFrom->xMutexLeave; pTo->xMutexHeld = pFrom->xMutexHeld; pTo->xMutexNotheld = pFrom->xMutexNotheld; sqlite3MemoryBarrier(); pTo->xMutexAlloc = pFrom->xMutexAlloc; } assert( sqlite3GlobalConfig.mutex.xMutexInit ); rc = sqlite3GlobalConfig.mutex.xMutexInit(); #ifdef SQLITE_DEBUG GLOBAL(int, mutexIsInit) = 1; #endif return rc; } /* ** Shutdown the mutex system. This call frees resources allocated by ** sqlite3MutexInit(). */ SQLITE_PRIVATE int sqlite3MutexEnd(void){ int rc = SQLITE_OK; if( sqlite3GlobalConfig.mutex.xMutexEnd ){ rc = sqlite3GlobalConfig.mutex.xMutexEnd(); } #ifdef SQLITE_DEBUG GLOBAL(int, mutexIsInit) = 0; #endif return rc; } /* ** Retrieve a pointer to a static mutex or allocate a new dynamic one. */ SQLITE_PRIVATE sqlite3_mutex *sqlite3_mutex_alloc(int id){ #ifndef SQLITE_OMIT_AUTOINIT if( id<=SQLITE_MUTEX_RECURSIVE && sqlite3BtreeInitialize() ) return 0; if( id>SQLITE_MUTEX_RECURSIVE && sqlite3MutexInit() ) return 0; #endif assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); return sqlite3GlobalConfig.mutex.xMutexAlloc(id); } SQLITE_PRIVATE sqlite3_mutex *sqlite3MutexAlloc(int id){ if( !sqlite3GlobalConfig.bCoreMutex ){ return 0; } assert( GLOBAL(int, mutexIsInit) ); assert( sqlite3GlobalConfig.mutex.xMutexAlloc ); return sqlite3GlobalConfig.mutex.xMutexAlloc(id); } /* ** Free a dynamic mutex. */ SQLITE_PRIVATE void sqlite3_mutex_free(sqlite3_mutex *p){ if( p ){ assert( sqlite3GlobalConfig.mutex.xMutexFree ); sqlite3GlobalConfig.mutex.xMutexFree(p); } } /* ** Obtain the mutex p. If some other thread already has the mutex, block ** until it can be obtained. */ SQLITE_PRIVATE void sqlite3_mutex_enter(sqlite3_mutex *p){ if( p ){ assert( sqlite3GlobalConfig.mutex.xMutexEnter ); sqlite3GlobalConfig.mutex.xMutexEnter(p); } } /* ** Obtain the mutex p. If successful, return SQLITE_OK. Otherwise, if another ** thread holds the mutex and it cannot be obtained, return SQLITE_BUSY. */ SQLITE_PRIVATE int sqlite3_mutex_try(sqlite3_mutex *p){ int rc = SQLITE_OK; if( p ){ assert( sqlite3GlobalConfig.mutex.xMutexTry ); return sqlite3GlobalConfig.mutex.xMutexTry(p); } return rc; } /* ** The sqlite3_mutex_leave() routine exits a mutex that was previously ** entered by the same thread. The behavior is undefined if the mutex ** is not currently entered. If a NULL pointer is passed as an argument ** this function is a no-op. */ SQLITE_PRIVATE void sqlite3_mutex_leave(sqlite3_mutex *p){ if( p ){ assert( sqlite3GlobalConfig.mutex.xMutexLeave ); sqlite3GlobalConfig.mutex.xMutexLeave(p); } } #ifndef NDEBUG /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use inside assert() statements. */ SQLITE_PRIVATE int sqlite3_mutex_held(sqlite3_mutex *p){ assert( p==0 || sqlite3GlobalConfig.mutex.xMutexHeld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p); } SQLITE_PRIVATE int sqlite3_mutex_notheld(sqlite3_mutex *p){ assert( p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p); } #endif #endif /* !defined(SQLITE_MUTEX_OMIT) */ /************** End of mutex.c ***********************************************/ /************** Begin file mutex_noop.c **************************************/ /* ** 2008 October 07 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains the C functions that implement mutexes. ** ** This implementation in this file does not provide any mutual ** exclusion and is thus suitable for use only in applications ** that use SQLite in a single thread. The routines defined ** here are place-holders. Applications can substitute working ** mutex routines at start-time using the ** ** sqlite3_config(SQLITE_CONFIG_MUTEX,...) ** ** interface. ** ** If compiled with SQLITE_DEBUG, then additional logic is inserted ** that does error checking on mutexes to make sure they are being ** called correctly. */ /* #include "sqliteInt.h" */ #ifndef SQLITE_MUTEX_OMIT #ifndef SQLITE_DEBUG /* ** Stub routines for all mutex methods. ** ** This routines provide no mutual exclusion or error checking. */ static int noopMutexInit(void){ return SQLITE_OK; } static int noopMutexEnd(void){ return SQLITE_OK; } static sqlite3_mutex *noopMutexAlloc(int id){ UNUSED_PARAMETER(id); return (sqlite3_mutex*)8; } static void noopMutexFree(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } static void noopMutexEnter(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } static int noopMutexTry(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return SQLITE_OK; } static void noopMutexLeave(sqlite3_mutex *p){ UNUSED_PARAMETER(p); return; } SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){ static const sqlite3_mutex_methods sMutex = { noopMutexInit, noopMutexEnd, noopMutexAlloc, noopMutexFree, noopMutexEnter, noopMutexTry, noopMutexLeave, 0, 0, }; return &sMutex; } #endif /* !SQLITE_DEBUG */ #ifdef SQLITE_DEBUG /* ** In this implementation, error checking is provided for testing ** and debugging purposes. The mutexes still do not provide any ** mutual exclusion. */ /* ** The mutex object */ typedef struct sqlite3_debug_mutex { int id; /* The mutex type */ int cnt; /* Number of entries without a matching leave */ } sqlite3_debug_mutex; /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use inside assert() statements. */ static int debugMutexHeld(sqlite3_mutex *pX){ sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; return p==0 || p->cnt>0; } static int debugMutexNotheld(sqlite3_mutex *pX){ sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; return p==0 || p->cnt==0; } /* ** Initialize and deinitialize the mutex subsystem. */ static int debugMutexInit(void){ return SQLITE_OK; } static int debugMutexEnd(void){ return SQLITE_OK; } /* ** The sqlite3_mutex_alloc() routine allocates a new ** mutex and returns a pointer to it. If it returns NULL ** that means that a mutex could not be allocated. */ static sqlite3_mutex *debugMutexAlloc(int id){ static sqlite3_debug_mutex aStatic[SQLITE_MUTEX_STATIC_VFS3 - 1]; sqlite3_debug_mutex *pNew = 0; switch( id ){ case SQLITE_MUTEX_FAST: case SQLITE_MUTEX_RECURSIVE: { pNew = sqlite3Malloc(sizeof(*pNew)); if( pNew ){ pNew->id = id; pNew->cnt = 0; } break; } default: { #ifdef SQLITE_ENABLE_API_ARMOR if( id-2<0 || id-2>=ArraySize(aStatic) ){ (void)SQLITE_MISUSE_BKPT; return 0; } #endif pNew = &aStatic[id-2]; pNew->id = id; break; } } return (sqlite3_mutex*)pNew; } /* ** This routine deallocates a previously allocated mutex. */ static void debugMutexFree(sqlite3_mutex *pX){ sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; assert( p->cnt==0 ); if( p->id==SQLITE_MUTEX_RECURSIVE || p->id==SQLITE_MUTEX_FAST ){ sqlite3_free(p); }else{ #ifdef SQLITE_ENABLE_API_ARMOR (void)SQLITE_MISUSE_BKPT; #endif } } /* ** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt ** to enter a mutex. If another thread is already within the mutex, ** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return ** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK ** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can ** be entered multiple times by the same thread. In such cases the, ** mutex must be exited an equal number of times before another thread ** can enter. If the same thread tries to enter any other kind of mutex ** more than once, the behavior is undefined. */ static void debugMutexEnter(sqlite3_mutex *pX){ sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); p->cnt++; } static int debugMutexTry(sqlite3_mutex *pX){ sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); p->cnt++; return SQLITE_OK; } /* ** The sqlite3_mutex_leave() routine exits a mutex that was ** previously entered by the same thread. The behavior ** is undefined if the mutex is not currently entered or ** is not currently allocated. SQLite will never do either. */ static void debugMutexLeave(sqlite3_mutex *pX){ sqlite3_debug_mutex *p = (sqlite3_debug_mutex*)pX; assert( debugMutexHeld(pX) ); p->cnt--; assert( p->id==SQLITE_MUTEX_RECURSIVE || debugMutexNotheld(pX) ); } SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3NoopMutex(void){ static const sqlite3_mutex_methods sMutex = { debugMutexInit, debugMutexEnd, debugMutexAlloc, debugMutexFree, debugMutexEnter, debugMutexTry, debugMutexLeave, debugMutexHeld, debugMutexNotheld }; return &sMutex; } #endif /* SQLITE_DEBUG */ /* ** If compiled with SQLITE_MUTEX_NOOP, then the no-op mutex implementation ** is used regardless of the run-time threadsafety setting. */ #ifdef SQLITE_MUTEX_NOOP SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ return sqlite3NoopMutex(); } #endif /* defined(SQLITE_MUTEX_NOOP) */ #endif /* !defined(SQLITE_MUTEX_OMIT) */ /************** End of mutex_noop.c ******************************************/ /************** Begin file mutex_unix.c **************************************/ /* ** 2007 August 28 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains the C functions that implement mutexes for pthreads */ /* #include "sqliteInt.h" */ /* ** The code in this file is only used if we are compiling threadsafe ** under unix with pthreads. ** ** Note that this implementation requires a version of pthreads that ** supports recursive mutexes. */ #ifdef SQLITE_MUTEX_PTHREADS #include /* ** The sqlite3_mutex.id, sqlite3_mutex.nRef, and sqlite3_mutex.owner fields ** are necessary under two condidtions: (1) Debug builds and (2) using ** home-grown mutexes. Encapsulate these conditions into a single #define. */ #if defined(SQLITE_DEBUG) || defined(SQLITE_HOMEGROWN_RECURSIVE_MUTEX) # define SQLITE_MUTEX_NREF 1 #else # define SQLITE_MUTEX_NREF 0 #endif /* ** Each recursive mutex is an instance of the following structure. */ struct sqlite3_mutex { pthread_mutex_t mutex; /* Mutex controlling the lock */ #if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) int id; /* Mutex type */ #endif #if SQLITE_MUTEX_NREF volatile int nRef; /* Number of entrances */ volatile pthread_t owner; /* Thread that is within this mutex */ int trace; /* True to trace changes */ #endif }; #if SQLITE_MUTEX_NREF #define SQLITE3_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, 0, 0, (pthread_t)0, 0 } #else #define SQLITE3_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER } #endif /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use only inside assert() statements. On some platforms, ** there might be race conditions that can cause these routines to ** deliver incorrect results. In particular, if pthread_equal() is ** not an atomic operation, then these routines might delivery ** incorrect results. On most platforms, pthread_equal() is a ** comparison of two integers and is therefore atomic. But we are ** told that HPUX is not such a platform. If so, then these routines ** will not always work correctly on HPUX. ** ** On those platforms where pthread_equal() is not atomic, SQLite ** should be compiled without -DSQLITE_DEBUG and with -DNDEBUG to ** make sure no assert() statements are evaluated and hence these ** routines are never called. */ #if !defined(NDEBUG) || defined(SQLITE_DEBUG) static int pthreadMutexHeld(sqlite3_mutex *p){ return (p->nRef!=0 && pthread_equal(p->owner, pthread_self())); } static int pthreadMutexNotheld(sqlite3_mutex *p){ return p->nRef==0 || pthread_equal(p->owner, pthread_self())==0; } #endif /* ** Try to provide a memory barrier operation, needed for initialization ** and also for the implementation of xShmBarrier in the VFS in cases ** where SQLite is compiled without mutexes. */ SQLITE_PRIVATE void sqlite3MemoryBarrier(void){ #if defined(SQLITE_MEMORY_BARRIER) SQLITE_MEMORY_BARRIER; #elif defined(__GNUC__) && GCC_VERSION>=4001000 __sync_synchronize(); #endif } /* ** Initialize and deinitialize the mutex subsystem. */ static int pthreadMutexInit(void){ return SQLITE_OK; } static int pthreadMutexEnd(void){ return SQLITE_OK; } /* ** The sqlite3_mutex_alloc() routine allocates a new ** mutex and returns a pointer to it. If it returns NULL ** that means that a mutex could not be allocated. SQLite ** will unwind its stack and return an error. The argument ** to sqlite3_mutex_alloc() is one of these integer constants: ** **
    **
  • SQLITE_MUTEX_FAST **
  • SQLITE_MUTEX_RECURSIVE **
  • SQLITE_MUTEX_STATIC_MASTER **
  • SQLITE_MUTEX_STATIC_MEM **
  • SQLITE_MUTEX_STATIC_OPEN **
  • SQLITE_MUTEX_STATIC_PRNG **
  • SQLITE_MUTEX_STATIC_LRU **
  • SQLITE_MUTEX_STATIC_PMEM **
  • SQLITE_MUTEX_STATIC_APP1 **
  • SQLITE_MUTEX_STATIC_APP2 **
  • SQLITE_MUTEX_STATIC_APP3 **
  • SQLITE_MUTEX_STATIC_VFS1 **
  • SQLITE_MUTEX_STATIC_VFS2 **
  • SQLITE_MUTEX_STATIC_VFS3 **
** ** The first two constants cause sqlite3_mutex_alloc() to create ** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE ** is used but not necessarily so when SQLITE_MUTEX_FAST is used. ** The mutex implementation does not need to make a distinction ** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does ** not want to. But SQLite will only request a recursive mutex in ** cases where it really needs one. If a faster non-recursive mutex ** implementation is available on the host platform, the mutex subsystem ** might return such a mutex in response to SQLITE_MUTEX_FAST. ** ** The other allowed parameters to sqlite3_mutex_alloc() each return ** a pointer to a static preexisting mutex. Six static mutexes are ** used by the current version of SQLite. Future versions of SQLite ** may add additional static mutexes. Static mutexes are for internal ** use by SQLite only. Applications that use SQLite mutexes should ** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or ** SQLITE_MUTEX_RECURSIVE. ** ** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST ** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() ** returns a different mutex on every call. But for the static ** mutex types, the same mutex is returned on every call that has ** the same type number. */ static sqlite3_mutex *pthreadMutexAlloc(int iType){ static sqlite3_mutex staticMutexes[] = { SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER }; sqlite3_mutex *p; switch( iType ){ case SQLITE_MUTEX_RECURSIVE: { p = sqlite3MallocZero( sizeof(*p) ); if( p ){ #ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX /* If recursive mutexes are not available, we will have to ** build our own. See below. */ pthread_mutex_init(&p->mutex, 0); #else /* Use a recursive mutex if it is available */ pthread_mutexattr_t recursiveAttr; pthread_mutexattr_init(&recursiveAttr); pthread_mutexattr_settype(&recursiveAttr, PTHREAD_MUTEX_RECURSIVE); pthread_mutex_init(&p->mutex, &recursiveAttr); pthread_mutexattr_destroy(&recursiveAttr); #endif } break; } case SQLITE_MUTEX_FAST: { p = sqlite3MallocZero( sizeof(*p) ); if( p ){ pthread_mutex_init(&p->mutex, 0); } break; } default: { #ifdef SQLITE_ENABLE_API_ARMOR if( iType-2<0 || iType-2>=ArraySize(staticMutexes) ){ (void)SQLITE_MISUSE_BKPT; return 0; } #endif p = &staticMutexes[iType-2]; break; } } #if SQLITE_MUTEX_NREF || defined(SQLITE_ENABLE_API_ARMOR) if( p ) p->id = iType; #endif return p; } /* ** This routine deallocates a previously ** allocated mutex. SQLite is careful to deallocate every ** mutex that it allocates. */ static void pthreadMutexFree(sqlite3_mutex *p){ assert( p->nRef==0 ); #if SQLITE_ENABLE_API_ARMOR if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ) #endif { pthread_mutex_destroy(&p->mutex); sqlite3_free(p); } #ifdef SQLITE_ENABLE_API_ARMOR else{ (void)SQLITE_MISUSE_BKPT; } #endif } /* ** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt ** to enter a mutex. If another thread is already within the mutex, ** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return ** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK ** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can ** be entered multiple times by the same thread. In such cases the, ** mutex must be exited an equal number of times before another thread ** can enter. If the same thread tries to enter any other kind of mutex ** more than once, the behavior is undefined. */ static void pthreadMutexEnter(sqlite3_mutex *p){ assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) ); #ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX /* If recursive mutexes are not available, then we have to grow ** our own. This implementation assumes that pthread_equal() ** is atomic - that it cannot be deceived into thinking self ** and p->owner are equal if p->owner changes between two values ** that are not equal to self while the comparison is taking place. ** This implementation also assumes a coherent cache - that ** separate processes cannot read different values from the same ** address at the same time. If either of these two conditions ** are not met, then the mutexes will fail and problems will result. */ { pthread_t self = pthread_self(); if( p->nRef>0 && pthread_equal(p->owner, self) ){ p->nRef++; }else{ pthread_mutex_lock(&p->mutex); assert( p->nRef==0 ); p->owner = self; p->nRef = 1; } } #else /* Use the built-in recursive mutexes if they are available. */ pthread_mutex_lock(&p->mutex); #if SQLITE_MUTEX_NREF assert( p->nRef>0 || p->owner==0 ); p->owner = pthread_self(); p->nRef++; #endif #endif #ifdef SQLITE_DEBUG if( p->trace ){ printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); } #endif } static int pthreadMutexTry(sqlite3_mutex *p){ int rc; assert( p->id==SQLITE_MUTEX_RECURSIVE || pthreadMutexNotheld(p) ); #ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX /* If recursive mutexes are not available, then we have to grow ** our own. This implementation assumes that pthread_equal() ** is atomic - that it cannot be deceived into thinking self ** and p->owner are equal if p->owner changes between two values ** that are not equal to self while the comparison is taking place. ** This implementation also assumes a coherent cache - that ** separate processes cannot read different values from the same ** address at the same time. If either of these two conditions ** are not met, then the mutexes will fail and problems will result. */ { pthread_t self = pthread_self(); if( p->nRef>0 && pthread_equal(p->owner, self) ){ p->nRef++; rc = SQLITE_OK; }else if( pthread_mutex_trylock(&p->mutex)==0 ){ assert( p->nRef==0 ); p->owner = self; p->nRef = 1; rc = SQLITE_OK; }else{ rc = SQLITE_BUSY; } } #else /* Use the built-in recursive mutexes if they are available. */ if( pthread_mutex_trylock(&p->mutex)==0 ){ #if SQLITE_MUTEX_NREF p->owner = pthread_self(); p->nRef++; #endif rc = SQLITE_OK; }else{ rc = SQLITE_BUSY; } #endif #ifdef SQLITE_DEBUG if( rc==SQLITE_OK && p->trace ){ printf("enter mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); } #endif return rc; } /* ** The sqlite3_mutex_leave() routine exits a mutex that was ** previously entered by the same thread. The behavior ** is undefined if the mutex is not currently entered or ** is not currently allocated. SQLite will never do either. */ static void pthreadMutexLeave(sqlite3_mutex *p){ assert( pthreadMutexHeld(p) ); #if SQLITE_MUTEX_NREF p->nRef--; if( p->nRef==0 ) p->owner = 0; #endif assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); #ifdef SQLITE_HOMEGROWN_RECURSIVE_MUTEX if( p->nRef==0 ){ pthread_mutex_unlock(&p->mutex); } #else pthread_mutex_unlock(&p->mutex); #endif #ifdef SQLITE_DEBUG if( p->trace ){ printf("leave mutex %p (%d) with nRef=%d\n", p, p->trace, p->nRef); } #endif } SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ static const sqlite3_mutex_methods sMutex = { pthreadMutexInit, pthreadMutexEnd, pthreadMutexAlloc, pthreadMutexFree, pthreadMutexEnter, pthreadMutexTry, pthreadMutexLeave, #ifdef SQLITE_DEBUG pthreadMutexHeld, pthreadMutexNotheld #else 0, 0 #endif }; return &sMutex; } #endif /* SQLITE_MUTEX_PTHREADS */ /************** End of mutex_unix.c ******************************************/ /************** Begin file mutex_w32.c ***************************************/ /* ** 2007 August 14 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains the C functions that implement mutexes for Win32. */ /* #include "sqliteInt.h" */ #if SQLITE_OS_WIN /* ** Include code that is common to all os_*.c files */ /************** Include os_common.h in the middle of mutex_w32.c *************/ /************** Begin file os_common.h ***************************************/ /* ** 2004 May 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains macros and a little bit of code that is common to ** all of the platform-specific files (os_*.c) and is #included into those ** files. ** ** This file should be #included by the os_*.c files only. It is not a ** general purpose header file. */ #ifndef _OS_COMMON_H_ #define _OS_COMMON_H_ /* ** At least two bugs have slipped in because we changed the MEMORY_DEBUG ** macro to SQLITE_DEBUG and some older makefiles have not yet made the ** switch. The following code should catch this problem at compile-time. */ #ifdef MEMORY_DEBUG # error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." #endif /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. */ #ifdef SQLITE_PERFORMANCE_TRACE /* ** hwtime.h contains inline assembler code for implementing ** high-performance timing routines. */ #include "hwtime.h" static sqlite_uint64 g_start; static sqlite_uint64 g_elapsed; #define TIMER_START g_start=sqlite3Hwtime() #define TIMER_END g_elapsed=sqlite3Hwtime()-g_start #define TIMER_ELAPSED g_elapsed #else #define TIMER_START #define TIMER_END #define TIMER_ELAPSED ((sqlite_uint64)0) #endif /* ** If we compile with the SQLITE_TEST macro set, then the following block ** of code will give us the ability to simulate a disk I/O error. This ** is used for testing the I/O recovery logic. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_io_error_hit = 0; /* Total number of I/O Errors */ SQLITE_PRIVATE int sqlite3_io_error_hardhit = 0; /* Number of non-benign errors */ SQLITE_PRIVATE int sqlite3_io_error_pending = 0; /* Count down to first I/O error */ SQLITE_PRIVATE int sqlite3_io_error_persist = 0; /* True if I/O errors persist */ SQLITE_PRIVATE int sqlite3_io_error_benign = 0; /* True if errors are benign */ SQLITE_PRIVATE int sqlite3_diskfull_pending = 0; SQLITE_PRIVATE int sqlite3_diskfull = 0; #define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) #define SimulateIOError(CODE) \ if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ || sqlite3_io_error_pending-- == 1 ) \ { local_ioerr(); CODE; } static void local_ioerr(){ IOTRACE(("IOERR\n")); sqlite3_io_error_hit++; if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; } #define SimulateDiskfullError(CODE) \ if( sqlite3_diskfull_pending ){ \ if( sqlite3_diskfull_pending == 1 ){ \ local_ioerr(); \ sqlite3_diskfull = 1; \ sqlite3_io_error_hit = 1; \ CODE; \ }else{ \ sqlite3_diskfull_pending--; \ } \ } #else #define SimulateIOErrorBenign(X) #define SimulateIOError(A) #define SimulateDiskfullError(A) #endif /* ** When testing, keep a count of the number of open files. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_open_file_count = 0; #define OpenCounter(X) sqlite3_open_file_count+=(X) #else #define OpenCounter(X) #endif #endif /* !defined(_OS_COMMON_H_) */ /************** End of os_common.h *******************************************/ /************** Continuing where we left off in mutex_w32.c ******************/ /* ** Include the header file for the Windows VFS. */ /************** Include os_win.h in the middle of mutex_w32.c ****************/ /************** Begin file os_win.h ******************************************/ /* ** 2013 November 25 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code that is specific to Windows. */ #ifndef _OS_WIN_H_ #define _OS_WIN_H_ /* ** Include the primary Windows SDK header file. */ #include "windows.h" #ifdef __CYGWIN__ # include # include /* amalgamator: dontcache */ #endif /* ** Determine if we are dealing with Windows NT. ** ** We ought to be able to determine if we are compiling for Windows 9x or ** Windows NT using the _WIN32_WINNT macro as follows: ** ** #if defined(_WIN32_WINNT) ** # define SQLITE_OS_WINNT 1 ** #else ** # define SQLITE_OS_WINNT 0 ** #endif ** ** However, Visual Studio 2005 does not set _WIN32_WINNT by default, as ** it ought to, so the above test does not work. We'll just assume that ** everything is Windows NT unless the programmer explicitly says otherwise ** by setting SQLITE_OS_WINNT to 0. */ #if SQLITE_OS_WIN && !defined(SQLITE_OS_WINNT) # define SQLITE_OS_WINNT 1 #endif /* ** Determine if we are dealing with Windows CE - which has a much reduced ** API. */ #if defined(_WIN32_WCE) # define SQLITE_OS_WINCE 1 #else # define SQLITE_OS_WINCE 0 #endif /* ** Determine if we are dealing with WinRT, which provides only a subset of ** the full Win32 API. */ #if !defined(SQLITE_OS_WINRT) # define SQLITE_OS_WINRT 0 #endif /* ** For WinCE, some API function parameters do not appear to be declared as ** volatile. */ #if SQLITE_OS_WINCE # define SQLITE_WIN32_VOLATILE #else # define SQLITE_WIN32_VOLATILE volatile #endif /* ** For some Windows sub-platforms, the _beginthreadex() / _endthreadex() ** functions are not available (e.g. those not using MSVC, Cygwin, etc). */ #if SQLITE_OS_WIN && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ SQLITE_THREADSAFE>0 && !defined(__CYGWIN__) # define SQLITE_OS_WIN_THREADS 1 #else # define SQLITE_OS_WIN_THREADS 0 #endif #endif /* _OS_WIN_H_ */ /************** End of os_win.h **********************************************/ /************** Continuing where we left off in mutex_w32.c ******************/ #endif /* ** The code in this file is only used if we are compiling multithreaded ** on a Win32 system. */ #ifdef SQLITE_MUTEX_W32 /* ** Each recursive mutex is an instance of the following structure. */ struct sqlite3_mutex { CRITICAL_SECTION mutex; /* Mutex controlling the lock */ int id; /* Mutex type */ #ifdef SQLITE_DEBUG volatile int nRef; /* Number of enterances */ volatile DWORD owner; /* Thread holding this mutex */ volatile int trace; /* True to trace changes */ #endif }; /* ** These are the initializer values used when declaring a "static" mutex ** on Win32. It should be noted that all mutexes require initialization ** on the Win32 platform. */ #define SQLITE_W32_MUTEX_INITIALIZER { 0 } #ifdef SQLITE_DEBUG #define SQLITE3_MUTEX_INITIALIZER { SQLITE_W32_MUTEX_INITIALIZER, 0, \ 0L, (DWORD)0, 0 } #else #define SQLITE3_MUTEX_INITIALIZER { SQLITE_W32_MUTEX_INITIALIZER, 0 } #endif #ifdef SQLITE_DEBUG /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use only inside assert() statements. */ static int winMutexHeld(sqlite3_mutex *p){ return p->nRef!=0 && p->owner==GetCurrentThreadId(); } static int winMutexNotheld2(sqlite3_mutex *p, DWORD tid){ return p->nRef==0 || p->owner!=tid; } static int winMutexNotheld(sqlite3_mutex *p){ DWORD tid = GetCurrentThreadId(); return winMutexNotheld2(p, tid); } #endif /* ** Try to provide a memory barrier operation, needed for initialization ** and also for the xShmBarrier method of the VFS in cases when SQLite is ** compiled without mutexes (SQLITE_THREADSAFE=0). */ SQLITE_PRIVATE void sqlite3MemoryBarrier(void){ #if defined(SQLITE_MEMORY_BARRIER) SQLITE_MEMORY_BARRIER; #elif defined(__GNUC__) __sync_synchronize(); #elif !defined(SQLITE_DISABLE_INTRINSIC) && \ defined(_MSC_VER) && _MSC_VER>=1300 _ReadWriteBarrier(); #elif defined(MemoryBarrier) MemoryBarrier(); #endif } /* ** Initialize and deinitialize the mutex subsystem. */ static sqlite3_mutex winMutex_staticMutexes[] = { SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER }; static int winMutex_isInit = 0; static int winMutex_isNt = -1; /* <0 means "need to query" */ /* As the winMutexInit() and winMutexEnd() functions are called as part ** of the sqlite3BtreeInitialize() and sqlite3BtreeShutdown() processing, the ** "interlocked" magic used here is probably not strictly necessary. */ static LONG SQLITE_WIN32_VOLATILE winMutex_lock = 0; SQLITE_PRIVATE int sqlite3_win32_is_nt(void); /* os_win.c */ SQLITE_PRIVATE void sqlite3_win32_sleep(DWORD milliseconds); /* os_win.c */ static int winMutexInit(void){ /* The first to increment to 1 does actual initialization */ if( InterlockedCompareExchange(&winMutex_lock, 1, 0)==0 ){ int i; for(i=0; i **
  • SQLITE_MUTEX_FAST **
  • SQLITE_MUTEX_RECURSIVE **
  • SQLITE_MUTEX_STATIC_MASTER **
  • SQLITE_MUTEX_STATIC_MEM **
  • SQLITE_MUTEX_STATIC_OPEN **
  • SQLITE_MUTEX_STATIC_PRNG **
  • SQLITE_MUTEX_STATIC_LRU **
  • SQLITE_MUTEX_STATIC_PMEM **
  • SQLITE_MUTEX_STATIC_APP1 **
  • SQLITE_MUTEX_STATIC_APP2 **
  • SQLITE_MUTEX_STATIC_APP3 **
  • SQLITE_MUTEX_STATIC_VFS1 **
  • SQLITE_MUTEX_STATIC_VFS2 **
  • SQLITE_MUTEX_STATIC_VFS3 ** ** ** The first two constants cause sqlite3_mutex_alloc() to create ** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE ** is used but not necessarily so when SQLITE_MUTEX_FAST is used. ** The mutex implementation does not need to make a distinction ** between SQLITE_MUTEX_RECURSIVE and SQLITE_MUTEX_FAST if it does ** not want to. But SQLite will only request a recursive mutex in ** cases where it really needs one. If a faster non-recursive mutex ** implementation is available on the host platform, the mutex subsystem ** might return such a mutex in response to SQLITE_MUTEX_FAST. ** ** The other allowed parameters to sqlite3_mutex_alloc() each return ** a pointer to a static preexisting mutex. Six static mutexes are ** used by the current version of SQLite. Future versions of SQLite ** may add additional static mutexes. Static mutexes are for internal ** use by SQLite only. Applications that use SQLite mutexes should ** use only the dynamic mutexes returned by SQLITE_MUTEX_FAST or ** SQLITE_MUTEX_RECURSIVE. ** ** Note that if one of the dynamic mutex parameters (SQLITE_MUTEX_FAST ** or SQLITE_MUTEX_RECURSIVE) is used then sqlite3_mutex_alloc() ** returns a different mutex on every call. But for the static ** mutex types, the same mutex is returned on every call that has ** the same type number. */ static sqlite3_mutex *winMutexAlloc(int iType){ sqlite3_mutex *p; switch( iType ){ case SQLITE_MUTEX_FAST: case SQLITE_MUTEX_RECURSIVE: { p = sqlite3MallocZero( sizeof(*p) ); if( p ){ p->id = iType; #ifdef SQLITE_DEBUG #ifdef SQLITE_WIN32_MUTEX_TRACE_DYNAMIC p->trace = 1; #endif #endif #if SQLITE_OS_WINRT InitializeCriticalSectionEx(&p->mutex, 0, 0); #else InitializeCriticalSection(&p->mutex); #endif } break; } default: { #ifdef SQLITE_ENABLE_API_ARMOR if( iType-2<0 || iType-2>=ArraySize(winMutex_staticMutexes) ){ (void)SQLITE_MISUSE_BKPT; return 0; } #endif p = &winMutex_staticMutexes[iType-2]; p->id = iType; #ifdef SQLITE_DEBUG #ifdef SQLITE_WIN32_MUTEX_TRACE_STATIC p->trace = 1; #endif #endif break; } } return p; } /* ** This routine deallocates a previously ** allocated mutex. SQLite is careful to deallocate every ** mutex that it allocates. */ static void winMutexFree(sqlite3_mutex *p){ assert( p ); assert( p->nRef==0 && p->owner==0 ); if( p->id==SQLITE_MUTEX_FAST || p->id==SQLITE_MUTEX_RECURSIVE ){ DeleteCriticalSection(&p->mutex); sqlite3_free(p); }else{ #ifdef SQLITE_ENABLE_API_ARMOR (void)SQLITE_MISUSE_BKPT; #endif } } /* ** The sqlite3_mutex_enter() and sqlite3_mutex_try() routines attempt ** to enter a mutex. If another thread is already within the mutex, ** sqlite3_mutex_enter() will block and sqlite3_mutex_try() will return ** SQLITE_BUSY. The sqlite3_mutex_try() interface returns SQLITE_OK ** upon successful entry. Mutexes created using SQLITE_MUTEX_RECURSIVE can ** be entered multiple times by the same thread. In such cases the, ** mutex must be exited an equal number of times before another thread ** can enter. If the same thread tries to enter any other kind of mutex ** more than once, the behavior is undefined. */ static void winMutexEnter(sqlite3_mutex *p){ #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) DWORD tid = GetCurrentThreadId(); #endif #ifdef SQLITE_DEBUG assert( p ); assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); #else assert( p ); #endif assert( winMutex_isInit==1 ); EnterCriticalSection(&p->mutex); #ifdef SQLITE_DEBUG assert( p->nRef>0 || p->owner==0 ); p->owner = tid; p->nRef++; if( p->trace ){ OSTRACE(("ENTER-MUTEX tid=%lu, mutex=%p (%d), nRef=%d\n", tid, p, p->trace, p->nRef)); } #endif } static int winMutexTry(sqlite3_mutex *p){ #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) DWORD tid = GetCurrentThreadId(); #endif int rc = SQLITE_BUSY; assert( p ); assert( p->id==SQLITE_MUTEX_RECURSIVE || winMutexNotheld2(p, tid) ); /* ** The sqlite3_mutex_try() routine is very rarely used, and when it ** is used it is merely an optimization. So it is OK for it to always ** fail. ** ** The TryEnterCriticalSection() interface is only available on WinNT. ** And some windows compilers complain if you try to use it without ** first doing some #defines that prevent SQLite from building on Win98. ** For that reason, we will omit this optimization for now. See ** ticket #2685. */ #if defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0400 assert( winMutex_isInit==1 ); assert( winMutex_isNt>=-1 && winMutex_isNt<=1 ); if( winMutex_isNt<0 ){ winMutex_isNt = sqlite3_win32_is_nt(); } assert( winMutex_isNt==0 || winMutex_isNt==1 ); if( winMutex_isNt && TryEnterCriticalSection(&p->mutex) ){ #ifdef SQLITE_DEBUG p->owner = tid; p->nRef++; #endif rc = SQLITE_OK; } #else UNUSED_PARAMETER(p); #endif #ifdef SQLITE_DEBUG if( p->trace ){ OSTRACE(("TRY-MUTEX tid=%lu, mutex=%p (%d), owner=%lu, nRef=%d, rc=%s\n", tid, p, p->trace, p->owner, p->nRef, sqlite3ErrName(rc))); } #endif return rc; } /* ** The sqlite3_mutex_leave() routine exits a mutex that was ** previously entered by the same thread. The behavior ** is undefined if the mutex is not currently entered or ** is not currently allocated. SQLite will never do either. */ static void winMutexLeave(sqlite3_mutex *p){ #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) DWORD tid = GetCurrentThreadId(); #endif assert( p ); #ifdef SQLITE_DEBUG assert( p->nRef>0 ); assert( p->owner==tid ); p->nRef--; if( p->nRef==0 ) p->owner = 0; assert( p->nRef==0 || p->id==SQLITE_MUTEX_RECURSIVE ); #endif assert( winMutex_isInit==1 ); LeaveCriticalSection(&p->mutex); #ifdef SQLITE_DEBUG if( p->trace ){ OSTRACE(("LEAVE-MUTEX tid=%lu, mutex=%p (%d), nRef=%d\n", tid, p, p->trace, p->nRef)); } #endif } SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ static const sqlite3_mutex_methods sMutex = { winMutexInit, winMutexEnd, winMutexAlloc, winMutexFree, winMutexEnter, winMutexTry, winMutexLeave, #ifdef SQLITE_DEBUG winMutexHeld, winMutexNotheld #else 0, 0 #endif }; return &sMutex; } #endif /* SQLITE_MUTEX_W32 */ /************** End of mutex_w32.c *******************************************/ /************** Begin file malloc.c ******************************************/ /* ** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** Memory allocation functions used throughout sqlite. */ /* #include "sqliteInt.h" */ /* #include */ /* ** Attempt to release up to n bytes of non-essential memory currently ** held by SQLite. An example of non-essential memory is memory used to ** cache database pages that are not currently in use. */ SQLITE_PRIVATE int sqlite3_release_memory(int n){ #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT return sqlite3PcacheReleaseMemory(n); #else /* IMPLEMENTATION-OF: R-34391-24921 The sqlite3_release_memory() routine ** is a no-op returning zero if SQLite is not compiled with ** SQLITE_ENABLE_MEMORY_MANAGEMENT. */ UNUSED_PARAMETER(n); return 0; #endif } /* ** An instance of the following object records the location of ** each unused scratch buffer. */ typedef struct ScratchFreeslot { struct ScratchFreeslot *pNext; /* Next unused scratch buffer */ } ScratchFreeslot; /* ** State information local to the memory allocation subsystem. */ static SQLITE_WSD struct Mem0Global { sqlite3_mutex *mutex; /* Mutex to serialize access */ sqlite3_int64 alarmThreshold; /* The soft heap limit */ /* ** Pointers to the end of sqlite3GlobalConfig.pScratch memory ** (so that a range test can be used to determine if an allocation ** being freed came from pScratch) and a pointer to the list of ** unused scratch allocations. */ void *pScratchEnd; ScratchFreeslot *pScratchFree; u32 nScratchFree; /* ** True if heap is nearly "full" where "full" is defined by the ** sqlite3_soft_heap_limit() setting. */ int nearlyFull; } mem0 = { 0, 0, 0, 0, 0, 0 }; #define mem0 GLOBAL(struct Mem0Global, mem0) #ifndef NDEBUG /* ** Return the memory allocator mutex. sqlite3_status() needs it. */ SQLITE_PRIVATE sqlite3_mutex *sqlite3MallocMutex(void){ return mem0.mutex; } #endif /* ** Initialize the memory allocation subsystem. */ SQLITE_PRIVATE int sqlite3MallocInit(void){ int rc; if( sqlite3GlobalConfig.m.xMalloc==0 ){ sqlite3MemSetDefault(); } memset(&mem0, 0, sizeof(mem0)); if( sqlite3GlobalConfig.bCoreMutex ){ mem0.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MEM); } if( sqlite3GlobalConfig.pScratch && sqlite3GlobalConfig.szScratch>=100 && sqlite3GlobalConfig.nScratch>0 ){ int i, n, sz; ScratchFreeslot *pSlot; sz = ROUNDDOWN8(sqlite3GlobalConfig.szScratch); sqlite3GlobalConfig.szScratch = sz; pSlot = (ScratchFreeslot*)sqlite3GlobalConfig.pScratch; n = sqlite3GlobalConfig.nScratch; mem0.pScratchFree = pSlot; mem0.nScratchFree = n; for(i=0; ipNext = (ScratchFreeslot*)(sz+(char*)pSlot); pSlot = pSlot->pNext; } pSlot->pNext = 0; mem0.pScratchEnd = (void*)&pSlot[1]; }else{ mem0.pScratchEnd = 0; sqlite3GlobalConfig.pScratch = 0; sqlite3GlobalConfig.szScratch = 0; sqlite3GlobalConfig.nScratch = 0; } if( sqlite3GlobalConfig.pPage==0 || sqlite3GlobalConfig.szPage<512 || sqlite3GlobalConfig.nPage<=0 ){ sqlite3GlobalConfig.pPage = 0; sqlite3GlobalConfig.szPage = 0; } rc = sqlite3GlobalConfig.m.xInit(sqlite3GlobalConfig.m.pAppData); if( rc!=SQLITE_OK ) memset(&mem0, 0, sizeof(mem0)); return rc; } /* ** Return true if the heap is currently under memory pressure - in other ** words if the amount of heap used is close to the limit set by ** sqlite3_soft_heap_limit(). */ SQLITE_PRIVATE int sqlite3HeapNearlyFull(void){ return mem0.nearlyFull; } /* ** Deinitialize the memory allocation subsystem. */ SQLITE_PRIVATE void sqlite3MallocEnd(void){ if( sqlite3GlobalConfig.m.xShutdown ){ sqlite3GlobalConfig.m.xShutdown(sqlite3GlobalConfig.m.pAppData); } memset(&mem0, 0, sizeof(mem0)); } /* ** Trigger the alarm */ static void sqlite3MallocAlarm(int nByte){ if( mem0.alarmThreshold<=0 ) return; sqlite3_mutex_leave(mem0.mutex); sqlite3_release_memory(nByte); sqlite3_mutex_enter(mem0.mutex); } /* ** Do a memory allocation with statistics and alarms. Assume the ** lock is already held. */ static int mallocWithAlarm(int n, void **pp){ int nFull; void *p; assert( sqlite3_mutex_held(mem0.mutex) ); nFull = sqlite3GlobalConfig.m.xRoundup(n); sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, n); if( mem0.alarmThreshold>0 ){ sqlite3_int64 nUsed = sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED); if( nUsed >= mem0.alarmThreshold - nFull ){ mem0.nearlyFull = 1; sqlite3MallocAlarm(nFull); }else{ mem0.nearlyFull = 0; } } p = sqlite3GlobalConfig.m.xMalloc(nFull); #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT if( p==0 && mem0.alarmThreshold>0 ){ sqlite3MallocAlarm(nFull); p = sqlite3GlobalConfig.m.xMalloc(nFull); } #endif if( p ){ nFull = sqlite3MallocSize(p); sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nFull); sqlite3StatusUp(SQLITE_STATUS_MALLOC_COUNT, 1); } *pp = p; return nFull; } /* ** Allocate memory. This routine is like sqlite3_malloc() except that it ** assumes the memory subsystem has already been initialized. */ SQLITE_PRIVATE void *sqlite3Malloc(u64 n){ void *p; if( n==0 || n>=0x7fffff00 ){ /* A memory allocation of a number of bytes which is near the maximum ** signed integer value might cause an integer overflow inside of the ** xMalloc(). Hence we limit the maximum size to 0x7fffff00, giving ** 255 bytes of overhead. SQLite itself will never use anything near ** this amount. The only way to reach the limit is with sqlite3_malloc() */ p = 0; }else if( sqlite3GlobalConfig.bMemstat ){ sqlite3_mutex_enter(mem0.mutex); mallocWithAlarm((int)n, &p); sqlite3_mutex_leave(mem0.mutex); }else{ p = sqlite3GlobalConfig.m.xMalloc((int)n); } assert( EIGHT_BYTE_ALIGNMENT(p) ); /* IMP: R-11148-40995 */ return p; } /* ** This version of the memory allocation is for use by the application. ** First make sure the memory subsystem is initialized, then do the ** allocation. */ SQLITE_PRIVATE void *sqlite3_malloc(int n){ #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return 0; #endif return n<=0 ? 0 : sqlite3Malloc(n); } SQLITE_PRIVATE void *sqlite3_malloc64(sqlite3_uint64 n){ #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return 0; #endif return sqlite3Malloc(n); } /* ** Each thread may only have a single outstanding allocation from ** xScratchMalloc(). We verify this constraint in the single-threaded ** case by setting scratchAllocOut to 1 when an allocation ** is outstanding clearing it when the allocation is freed. */ #if SQLITE_THREADSAFE==0 && !defined(NDEBUG) static int scratchAllocOut = 0; #endif /* ** Allocate memory that is to be used and released right away. ** This routine is similar to alloca() in that it is not intended ** for situations where the memory might be held long-term. This ** routine is intended to get memory to old large transient data ** structures that would not normally fit on the stack of an ** embedded processor. */ SQLITE_PRIVATE void *sqlite3ScratchMalloc(int n){ void *p; assert( n>0 ); sqlite3_mutex_enter(mem0.mutex); sqlite3StatusSet(SQLITE_STATUS_SCRATCH_SIZE, n); if( mem0.nScratchFree && sqlite3GlobalConfig.szScratch>=n ){ p = mem0.pScratchFree; mem0.pScratchFree = mem0.pScratchFree->pNext; mem0.nScratchFree--; sqlite3StatusUp(SQLITE_STATUS_SCRATCH_USED, 1); sqlite3_mutex_leave(mem0.mutex); }else{ sqlite3_mutex_leave(mem0.mutex); p = sqlite3Malloc(n); if( sqlite3GlobalConfig.bMemstat && p ){ sqlite3_mutex_enter(mem0.mutex); sqlite3StatusUp(SQLITE_STATUS_SCRATCH_OVERFLOW, sqlite3MallocSize(p)); sqlite3_mutex_leave(mem0.mutex); } sqlite3MemdebugSetType(p, MEMTYPE_SCRATCH); } assert( sqlite3_mutex_notheld(mem0.mutex) ); #if SQLITE_THREADSAFE==0 && !defined(NDEBUG) /* EVIDENCE-OF: R-12970-05880 SQLite will not use more than one scratch ** buffers per thread. ** ** This can only be checked in single-threaded mode. */ assert( scratchAllocOut==0 ); if( p ) scratchAllocOut++; #endif return p; } SQLITE_PRIVATE void sqlite3ScratchFree(void *p){ if( p ){ #if SQLITE_THREADSAFE==0 && !defined(NDEBUG) /* Verify that no more than two scratch allocation per thread ** is outstanding at one time. (This is only checked in the ** single-threaded case since checking in the multi-threaded case ** would be much more complicated.) */ assert( scratchAllocOut>=1 && scratchAllocOut<=2 ); scratchAllocOut--; #endif if( p>=sqlite3GlobalConfig.pScratch && ppNext = mem0.pScratchFree; mem0.pScratchFree = pSlot; mem0.nScratchFree++; assert( mem0.nScratchFree <= (u32)sqlite3GlobalConfig.nScratch ); sqlite3StatusDown(SQLITE_STATUS_SCRATCH_USED, 1); sqlite3_mutex_leave(mem0.mutex); }else{ /* Release memory back to the heap */ assert( sqlite3MemdebugHasType(p, MEMTYPE_SCRATCH) ); assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_SCRATCH) ); sqlite3MemdebugSetType(p, MEMTYPE_HEAP); if( sqlite3GlobalConfig.bMemstat ){ int iSize = sqlite3MallocSize(p); sqlite3_mutex_enter(mem0.mutex); sqlite3StatusDown(SQLITE_STATUS_SCRATCH_OVERFLOW, iSize); sqlite3StatusDown(SQLITE_STATUS_MEMORY_USED, iSize); sqlite3StatusDown(SQLITE_STATUS_MALLOC_COUNT, 1); sqlite3GlobalConfig.m.xFree(p); sqlite3_mutex_leave(mem0.mutex); }else{ sqlite3GlobalConfig.m.xFree(p); } } } } /* ** Return the size of a memory allocation previously obtained from ** sqlite3Malloc() or sqlite3_malloc(). */ SQLITE_PRIVATE int sqlite3MallocSize(void *p){ assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); return sqlite3GlobalConfig.m.xSize(p); } SQLITE_PRIVATE int sqlite3DbMallocSize(Btree *pBtree, void *p){ if( pBtree==0 ){ #if SQLITE_DEBUG assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); #endif return sqlite3GlobalConfig.m.xSize(p); }else{ assert( sqlite3_mutex_held(pBtree->mutex) ); return 0; } } /* ** Free memory previously obtained from sqlite3Malloc(). */ SQLITE_PRIVATE void sqlite3_free(void *p){ if( p==0 ) return; /* IMP: R-49053-54554 */ assert( sqlite3MemdebugHasType(p, MEMTYPE_HEAP) ); assert( sqlite3MemdebugNoType(p, (u8)~MEMTYPE_HEAP) ); if( sqlite3GlobalConfig.bMemstat ){ sqlite3_mutex_enter(mem0.mutex); sqlite3StatusDown(SQLITE_STATUS_MEMORY_USED, sqlite3MallocSize(p)); sqlite3StatusDown(SQLITE_STATUS_MALLOC_COUNT, 1); sqlite3GlobalConfig.m.xFree(p); sqlite3_mutex_leave(mem0.mutex); }else{ sqlite3GlobalConfig.m.xFree(p); } } /* ** Add the size of memory allocation "p" to the count in ** *db->pnBytesFreed. */ static SQLITE_NOINLINE void measureAllocationSize(Btree *pBtree, void *p){ *pBtree->pnBytesFreed += sqlite3DbMallocSize(pBtree,p); } /* ** Free memory that might be associated with a particular database ** connection. */ SQLITE_PRIVATE void sqlite3DbFree(Btree *pBtree, void *p){ assert( pBTree==0 || sqlite3_mutex_held(pTree->mutex) ); if( p==0 ) return; if( pBtree ){ if( pBtree->pnBytesFreed ){ measureAllocationSize(pBtree, p); return; } } assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); assert( pBtree!=0 || sqlite3MemdebugNoType(p, MEMTYPE_LOOKASIDE) ); sqlite3MemdebugSetType(p, MEMTYPE_HEAP); sqlite3_free(p); } /* ** Change the size of an existing memory allocation */ SQLITE_PRIVATE void *sqlite3Realloc(void *pOld, u64 nBytes){ int nOld, nNew, nDiff; void *pNew; assert( sqlite3MemdebugHasType(pOld, MEMTYPE_HEAP) ); assert( sqlite3MemdebugNoType(pOld, (u8)~MEMTYPE_HEAP) ); if( pOld==0 ){ return sqlite3Malloc(nBytes); /* IMP: R-04300-56712 */ } if( nBytes==0 ){ sqlite3_free(pOld); /* IMP: R-26507-47431 */ return 0; } if( nBytes>=0x7fffff00 ){ /* The 0x7ffff00 limit term is explained in comments on sqlite3Malloc() */ return 0; } nOld = sqlite3MallocSize(pOld); /* IMPLEMENTATION-OF: R-46199-30249 SQLite guarantees that the second ** argument to xRealloc is always a value returned by a prior call to ** xRoundup. */ nNew = sqlite3GlobalConfig.m.xRoundup((int)nBytes); if( nOld==nNew ){ pNew = pOld; }else if( sqlite3GlobalConfig.bMemstat ){ sqlite3_mutex_enter(mem0.mutex); sqlite3StatusSet(SQLITE_STATUS_MALLOC_SIZE, (int)nBytes); nDiff = nNew - nOld; if( sqlite3StatusValue(SQLITE_STATUS_MEMORY_USED) >= mem0.alarmThreshold-nDiff ){ sqlite3MallocAlarm(nDiff); } pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); if( pNew==0 && mem0.alarmThreshold>0 ){ sqlite3MallocAlarm((int)nBytes); pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); } if( pNew ){ nNew = sqlite3MallocSize(pNew); sqlite3StatusUp(SQLITE_STATUS_MEMORY_USED, nNew-nOld); } sqlite3_mutex_leave(mem0.mutex); }else{ pNew = sqlite3GlobalConfig.m.xRealloc(pOld, nNew); } assert( EIGHT_BYTE_ALIGNMENT(pNew) ); /* IMP: R-11148-40995 */ return pNew; } /* ** The public interface to sqlite3Realloc. Make sure that the memory ** subsystem is initialized prior to invoking sqliteRealloc. */ SQLITE_PRIVATE void *sqlite3_realloc(void *pOld, int n){ #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return 0; #endif if( n<0 ) n = 0; /* IMP: R-26507-47431 */ return sqlite3Realloc(pOld, n); } SQLITE_PRIVATE void *sqlite3_realloc64(void *pOld, sqlite3_uint64 n){ #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return 0; #endif return sqlite3Realloc(pOld, n); } /* ** Allocate and zero memory. */ SQLITE_PRIVATE void *sqlite3MallocZero(u64 n){ void *p = sqlite3Malloc(n); if( p ){ memset(p, 0, (size_t)n); } return p; } /* ** Allocate and zero memory. If the allocation fails, make ** the mallocFailed flag in the connection pointer. */ SQLITE_PRIVATE void *sqlite3DbMallocZero(Btree *pBtree, u64 n){ void *p = sqlite3DbMallocRaw(pBtree, n); if( p ){ memset(p, 0, (size_t)n); } return p; } /* ** Allocate and zero memory. If the allocation fails, make ** the mallocFailed flag in the connection pointer. ** ** If pBtree!=0 and pBtree->mallocFailed is true (indicating a prior malloc ** failure on the same database connection) then always return 0. ** Hence for a particular database connection, once malloc starts ** failing, it fails consistently until mallocFailed is reset. ** This is an important assumption. There are many places in the ** code that do things like this: ** ** int *a = (int*)sqlite3DbMallocRaw(pBtree, 100); ** int *b = (int*)sqlite3DbMallocRaw(pBtree, 200); ** if( b ) a[10] = 9; ** ** In other words, if a subsequent malloc (ex: "b") worked, it is assumed ** that all prior mallocs (ex: "a") worked too. */ SQLITE_PRIVATE void *sqlite3DbMallocRaw(Btree *pBtree, u64 n){ void *p; assert( pBtree==0 || sqlite3_mutex_held(pBtree->mutex) ); assert( pBtree==0 || pBtree->pnBytesFreed==0 ); if( pBtree && pBtree->mallocFailed ){ return 0; } p = sqlite3Malloc(n); if( !p && pBtree ){ pBtree->mallocFailed = 1; } sqlite3MemdebugSetType(p, MEMTYPE_HEAP); return p; } /* ** Resize the block of memory pointed to by p to n bytes. If the ** resize fails, set the mallocFailed flag in the connection object. */ SQLITE_PRIVATE void *sqlite3DbRealloc(Btree *pBtree, void *p, u64 n){ void *pNew = 0; assert( pBtree!=0 ); assert( sqlite3_mutex_held(pBtree->mutex) ); if( pBtree->mallocFailed==0 ){ if( p==0 ){ return sqlite3DbMallocRaw(pBtree, n); } assert( sqlite3MemdebugHasType(p, (MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); assert( sqlite3MemdebugNoType(p, (u8)~(MEMTYPE_LOOKASIDE|MEMTYPE_HEAP)) ); sqlite3MemdebugSetType(p, MEMTYPE_HEAP); pNew = sqlite3_realloc64(p, n); if( !pNew ){ pBtree->mallocFailed = 1; } sqlite3MemdebugSetType(pNew, MEMTYPE_HEAP); } return pNew; } /* ** Attempt to reallocate p. If the reallocation fails, then free p ** and set the mallocFailed flag in the database connection. */ SQLITE_PRIVATE void *sqlite3DbReallocOrFree(Btree *pBtree, void *p, u64 n){ void *pNew; pNew = sqlite3DbRealloc(pBtree, p, n); if( !pNew ){ sqlite3DbFree(pBtree, p); } return pNew; } /* ** Make a copy of a string in memory obtained from sqliteMalloc(). These ** functions call sqlite3MallocRaw() directly instead of sqliteMalloc(). This ** is because when memory debugging is turned on, these two functions are ** called via macros that record the current file and line number in the ** ThreadData structure. */ char *sqlite3DbStrDup(Btree *pBtree, const char *z){ char *zNew; size_t n; if( z==0 ){ return 0; } n = sqlite3Strlen30(z) + 1; assert( (n&0x7fffffff)==n ); zNew = sqlite3DbMallocRaw(pBtree, (int)n); if( zNew ){ memcpy(zNew, z, n); } return zNew; } /************** End of malloc.c **********************************************/ /************** Begin file printf.c ******************************************/ /* ** The "printf" code that follows dates from the 1980's. It is in ** the public domain. ** ************************************************************************** ** ** This file contains code for a set of "printf"-like routines. These ** routines format strings much like the printf() from the standard C ** library, though the implementation here has enhancements to support ** SQLite. */ /* #include "sqliteInt.h" */ /* ** Conversion types fall into various categories as defined by the ** following enumeration. */ #define etRADIX 1 /* Integer types. %d, %x, %o, and so forth */ #define etFLOAT 2 /* Floating point. %f */ #define etEXP 3 /* Exponentional notation. %e and %E */ #define etGENERIC 4 /* Floating or exponential, depending on exponent. %g */ #define etSIZE 5 /* Return number of characters processed so far. %n */ #define etSTRING 6 /* Strings. %s */ #define etDYNSTRING 7 /* Dynamically allocated strings. %z */ #define etPERCENT 8 /* Percent symbol. %% */ #define etCHARX 9 /* Characters. %c */ /* The rest are extensions, not normally found in printf() */ #define etSQLESCAPE 10 /* Strings with '\'' doubled. %q */ #define etSQLESCAPE2 11 /* Strings with '\'' doubled and enclosed in '', NULL pointers replaced by SQL NULL. %Q */ #define etPOINTER 14 /* The %p conversion */ #define etSQLESCAPE3 15 /* %w -> Strings with '\"' doubled */ #define etORDINAL 16 /* %r -> 1st, 2nd, 3rd, 4th, etc. English only */ #define etINVALID 0 /* Any unrecognized conversion type */ /* ** An "etByte" is an 8-bit unsigned value. */ typedef unsigned char etByte; /* ** Each builtin conversion character (ex: the 'd' in "%d") is described ** by an instance of the following structure */ typedef struct et_info { /* Information about each format field */ char fmttype; /* The format field code letter */ etByte base; /* The base for radix conversion */ etByte flags; /* One or more of FLAG_ constants below */ etByte type; /* Conversion paradigm */ etByte charset; /* Offset into aDigits[] of the digits string */ etByte prefix; /* Offset into aPrefix[] of the prefix string */ } et_info; /* ** Allowed values for et_info.flags */ #define FLAG_SIGNED 1 /* True if the value to convert is signed */ #define FLAG_INTERN 2 /* True if for internal use only */ #define FLAG_STRING 4 /* Allow infinity precision */ /* ** The following table is searched linearly, so it is good to put the ** most frequently used conversion types first. */ static const char aDigits[] = "0123456789ABCDEF0123456789abcdef"; static const char aPrefix[] = "-x0\000X0"; static const et_info fmtinfo[] = { { 'd', 10, 1, etRADIX, 0, 0 }, { 's', 0, 4, etSTRING, 0, 0 }, { 'g', 0, 1, etGENERIC, 30, 0 }, { 'z', 0, 4, etDYNSTRING, 0, 0 }, { 'q', 0, 4, etSQLESCAPE, 0, 0 }, { 'Q', 0, 4, etSQLESCAPE2, 0, 0 }, { 'w', 0, 4, etSQLESCAPE3, 0, 0 }, { 'c', 0, 0, etCHARX, 0, 0 }, { 'o', 8, 0, etRADIX, 0, 2 }, { 'u', 10, 0, etRADIX, 0, 0 }, { 'x', 16, 0, etRADIX, 16, 1 }, { 'X', 16, 0, etRADIX, 0, 4 }, #ifndef SQLITE_OMIT_FLOATING_POINT { 'f', 0, 1, etFLOAT, 0, 0 }, { 'e', 0, 1, etEXP, 30, 0 }, { 'E', 0, 1, etEXP, 14, 0 }, { 'G', 0, 1, etGENERIC, 14, 0 }, #endif { 'i', 10, 1, etRADIX, 0, 0 }, { 'n', 0, 0, etSIZE, 0, 0 }, { '%', 0, 0, etPERCENT, 0, 0 }, { 'p', 16, 0, etPOINTER, 0, 1 }, /* All the rest have the FLAG_INTERN bit set and are thus for internal ** use only */ { 'r', 10, 3, etORDINAL, 0, 0 }, }; /* ** If SQLITE_OMIT_FLOATING_POINT is defined, then none of the floating point ** conversions will work. */ #ifndef SQLITE_OMIT_FLOATING_POINT /* ** "*val" is a double such that 0.1 <= *val < 10.0 ** Return the ascii code for the leading digit of *val, then ** multiply "*val" by 10.0 to renormalize. ** ** Example: ** input: *val = 3.14159 ** output: *val = 1.4159 function return = '3' ** ** The counter *cnt is incremented each time. After counter exceeds ** 16 (the number of significant digits in a 64-bit float) '0' is ** always returned. */ static char et_getdigit(LONGDOUBLE_TYPE *val, int *cnt){ int digit; LONGDOUBLE_TYPE d; if( (*cnt)<=0 ) return '0'; (*cnt)--; digit = (int)*val; d = digit; digit += '0'; *val = (*val - d)*10.0; return (char)digit; } #endif /* SQLITE_OMIT_FLOATING_POINT */ /* ** Set the StrAccum object to an error mode. */ static void setStrAccumError(StrAccum *p, u8 eError){ assert( eError==STRACCUM_NOMEM || eError==STRACCUM_TOOBIG ); p->accError = eError; p->nAlloc = 0; } /* ** Extra argument values from a PrintfArguments object */ static sqlite3_int64 getIntArg(PrintfArguments *p){ if( p->nArg<=p->nUsed ) return 0; return sqlite3VdbeIntValue((Mem*)p->apArg[p->nUsed++]); } static double getDoubleArg(PrintfArguments *p){ if( p->nArg<=p->nUsed ) return 0.0; return sqlite3VdbeRealValue((Mem*)p->apArg[p->nUsed++]); } static char *getTextArg(PrintfArguments *p){ if( p->nArg<=p->nUsed ) return 0; return (char*)sqlite3ValueText(p->apArg[p->nUsed++], SQLITE_UTF8); } /* ** On machines with a small stack size, you can redefine the ** SQLITE_PRINT_BUF_SIZE to be something smaller, if desired. */ #ifndef SQLITE_PRINT_BUF_SIZE # define SQLITE_PRINT_BUF_SIZE 70 #endif #define etBUFSIZE SQLITE_PRINT_BUF_SIZE /* Size of the output buffer */ /* ** Render a string given by "fmt" into the StrAccum object. */ SQLITE_PRIVATE void sqlite3VXPrintf( StrAccum *pAccum, /* Accumulate results here */ u32 bFlags, /* SQLITE_PRINTF_* flags */ const char *fmt, /* Format string */ va_list ap /* arguments */ ){ int c; /* Next character in the format string */ char *bufpt; /* Pointer to the conversion buffer */ int precision; /* Precision of the current field */ int length; /* Length of the field */ int idx; /* A general purpose loop counter */ int width; /* Width of the current field */ etByte flag_leftjustify; /* True if "-" flag is present */ etByte flag_plussign; /* True if "+" flag is present */ etByte flag_blanksign; /* True if " " flag is present */ etByte flag_alternateform; /* True if "#" flag is present */ etByte flag_altform2; /* True if "!" flag is present */ etByte flag_zeropad; /* True if field width constant starts with zero */ etByte flag_long; /* True if "l" flag is present */ etByte flag_longlong; /* True if the "ll" flag is present */ etByte done; /* Loop termination flag */ etByte xtype = 0; /* Conversion paradigm */ u8 bArgList; /* True for SQLITE_PRINTF_SQLFUNC */ u8 useIntern; /* Ok to use internal conversions (ex: %T) */ char prefix; /* Prefix character. "+" or "-" or " " or '\0'. */ sqlite_uint64 longvalue; /* Value for integer types */ LONGDOUBLE_TYPE realvalue; /* Value for real types */ const et_info *infop; /* Pointer to the appropriate info structure */ char *zOut; /* Rendering buffer */ int nOut; /* Size of the rendering buffer */ char *zExtra = 0; /* Malloced memory used by some conversion */ #ifndef SQLITE_OMIT_FLOATING_POINT int exp, e2; /* exponent of real numbers */ int nsd; /* Number of significant digits returned */ double rounder; /* Used for rounding floating point values */ etByte flag_dp; /* True if decimal point should be shown */ etByte flag_rtz; /* True if trailing zeros should be removed */ #endif PrintfArguments *pArgList = 0; /* Arguments for SQLITE_PRINTF_SQLFUNC */ char buf[etBUFSIZE]; /* Conversion buffer */ bufpt = 0; if( bFlags ){ if( (bArgList = (bFlags & SQLITE_PRINTF_SQLFUNC))!=0 ){ pArgList = va_arg(ap, PrintfArguments*); } useIntern = bFlags & SQLITE_PRINTF_INTERNAL; }else{ bArgList = useIntern = 0; } for(; (c=(*fmt))!=0; ++fmt){ if( c!='%' ){ bufpt = (char *)fmt; #if HAVE_STRCHRNUL fmt = strchrnul(fmt, '%'); #else do{ fmt++; }while( *fmt && *fmt != '%' ); #endif sqlite3StrAccumAppend(pAccum, bufpt, (int)(fmt - bufpt)); if( *fmt==0 ) break; } if( (c=(*++fmt))==0 ){ sqlite3StrAccumAppend(pAccum, "%", 1); break; } /* Find out what flags are present */ flag_leftjustify = flag_plussign = flag_blanksign = flag_alternateform = flag_altform2 = flag_zeropad = 0; done = 0; do{ switch( c ){ case '-': flag_leftjustify = 1; break; case '+': flag_plussign = 1; break; case ' ': flag_blanksign = 1; break; case '#': flag_alternateform = 1; break; case '!': flag_altform2 = 1; break; case '0': flag_zeropad = 1; break; default: done = 1; break; } }while( !done && (c=(*++fmt))!=0 ); /* Get the field width */ if( c=='*' ){ if( bArgList ){ width = (int)getIntArg(pArgList); }else{ width = va_arg(ap,int); } if( width<0 ){ flag_leftjustify = 1; width = width >= -2147483647 ? -width : 0; } c = *++fmt; }else{ unsigned wx = 0; while( c>='0' && c<='9' ){ wx = wx*10 + c - '0'; c = *++fmt; } testcase( wx>0x7fffffff ); width = wx & 0x7fffffff; } /* Get the precision */ if( c=='.' ){ c = *++fmt; if( c=='*' ){ if( bArgList ){ precision = (int)getIntArg(pArgList); }else{ precision = va_arg(ap,int); } c = *++fmt; if( precision<0 ){ precision = precision >= -2147483647 ? -precision : -1; } }else{ unsigned px = 0; while( c>='0' && c<='9' ){ px = px*10 + c - '0'; c = *++fmt; } testcase( px>0x7fffffff ); precision = px & 0x7fffffff; } }else{ precision = -1; } /* Get the conversion type modifier */ if( c=='l' ){ flag_long = 1; c = *++fmt; if( c=='l' ){ flag_longlong = 1; c = *++fmt; }else{ flag_longlong = 0; } }else{ flag_long = flag_longlong = 0; } /* Fetch the info entry for the field */ infop = &fmtinfo[0]; xtype = etINVALID; for(idx=0; idxflags & FLAG_INTERN)==0 ){ xtype = infop->type; }else{ return; } break; } } /* ** At this point, variables are initialized as follows: ** ** flag_alternateform TRUE if a '#' is present. ** flag_altform2 TRUE if a '!' is present. ** flag_plussign TRUE if a '+' is present. ** flag_leftjustify TRUE if a '-' is present or if the ** field width was negative. ** flag_zeropad TRUE if the width began with 0. ** flag_long TRUE if the letter 'l' (ell) prefixed ** the conversion character. ** flag_longlong TRUE if the letter 'll' (ell ell) prefixed ** the conversion character. ** flag_blanksign TRUE if a ' ' is present. ** width The specified field width. This is ** always non-negative. Zero is the default. ** precision The specified precision. The default ** is -1. ** xtype The class of the conversion. ** infop Pointer to the appropriate info struct. */ switch( xtype ){ case etPOINTER: flag_longlong = sizeof(char*)==sizeof(i64); flag_long = sizeof(char*)==sizeof(long int); /* Fall through into the next case */ case etORDINAL: case etRADIX: if( infop->flags & FLAG_SIGNED ){ i64 v; if( bArgList ){ v = getIntArg(pArgList); }else if( flag_longlong ){ v = va_arg(ap,i64); }else if( flag_long ){ v = va_arg(ap,long int); }else{ v = va_arg(ap,int); } if( v<0 ){ if( v==SMALLEST_INT64 ){ longvalue = ((u64)1)<<63; }else{ longvalue = -v; } prefix = '-'; }else{ longvalue = v; if( flag_plussign ) prefix = '+'; else if( flag_blanksign ) prefix = ' '; else prefix = 0; } }else{ if( bArgList ){ longvalue = (u64)getIntArg(pArgList); }else if( flag_longlong ){ longvalue = va_arg(ap,u64); }else if( flag_long ){ longvalue = va_arg(ap,unsigned long int); }else{ longvalue = va_arg(ap,unsigned int); } prefix = 0; } if( longvalue==0 ) flag_alternateform = 0; if( flag_zeropad && precision=4 || (longvalue/10)%10==1 ){ x = 0; } *(--bufpt) = zOrd[x*2+1]; *(--bufpt) = zOrd[x*2]; } { const char *cset = &aDigits[infop->charset]; u8 base = infop->base; do{ /* Convert to ascii */ *(--bufpt) = cset[longvalue%base]; longvalue = longvalue/base; }while( longvalue>0 ); } length = (int)(&zOut[nOut-1]-bufpt); for(idx=precision-length; idx>0; idx--){ *(--bufpt) = '0'; /* Zero pad */ } if( prefix ) *(--bufpt) = prefix; /* Add sign */ if( flag_alternateform && infop->prefix ){ /* Add "0" or "0x" */ const char *pre; char x; pre = &aPrefix[infop->prefix]; for(; (x=(*pre))!=0; pre++) *(--bufpt) = x; } length = (int)(&zOut[nOut-1]-bufpt); break; case etFLOAT: case etEXP: case etGENERIC: if( bArgList ){ realvalue = getDoubleArg(pArgList); }else{ realvalue = va_arg(ap,double); } #ifdef SQLITE_OMIT_FLOATING_POINT length = 0; #else if( precision<0 ) precision = 6; /* Set default precision */ if( realvalue<0.0 ){ realvalue = -realvalue; prefix = '-'; }else{ if( flag_plussign ) prefix = '+'; else if( flag_blanksign ) prefix = ' '; else prefix = 0; } if( xtype==etGENERIC && precision>0 ) precision--; testcase( precision>0xfff ); for(idx=precision&0xfff, rounder=0.5; idx>0; idx--, rounder*=0.1){} if( xtype==etFLOAT ) realvalue += rounder; /* Normalize realvalue to within 10.0 > realvalue >= 1.0 */ exp = 0; if( sqlite3IsNaN((double)realvalue) ){ bufpt = "NaN"; length = 3; break; } if( realvalue>0.0 ){ LONGDOUBLE_TYPE scale = 1.0; while( realvalue>=1e100*scale && exp<=350 ){ scale *= 1e100;exp+=100;} while( realvalue>=1e10*scale && exp<=350 ){ scale *= 1e10; exp+=10; } while( realvalue>=10.0*scale && exp<=350 ){ scale *= 10.0; exp++; } realvalue /= scale; while( realvalue<1e-8 ){ realvalue *= 1e8; exp-=8; } while( realvalue<1.0 ){ realvalue *= 10.0; exp--; } if( exp>350 ){ bufpt = buf; buf[0] = prefix; memcpy(buf+(prefix!=0),"Inf",4); length = 3+(prefix!=0); break; } } bufpt = buf; /* ** If the field type is etGENERIC, then convert to either etEXP ** or etFLOAT, as appropriate. */ if( xtype!=etFLOAT ){ realvalue += rounder; if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; } } if( xtype==etGENERIC ){ flag_rtz = !flag_alternateform; if( exp<-4 || exp>precision ){ xtype = etEXP; }else{ precision = precision - exp; xtype = etFLOAT; } }else{ flag_rtz = flag_altform2; } if( xtype==etEXP ){ e2 = 0; }else{ e2 = exp; } if( MAX(e2,0)+(i64)precision+(i64)width > etBUFSIZE - 15 ){ bufpt = zExtra = sqlite3Malloc( MAX(e2,0)+(i64)precision+(i64)width+15 ); if( bufpt==0 ){ setStrAccumError(pAccum, STRACCUM_NOMEM); return; } } zOut = bufpt; nsd = 16 + flag_altform2*10; flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2; /* The sign in front of the number */ if( prefix ){ *(bufpt++) = prefix; } /* Digits prior to the decimal point */ if( e2<0 ){ *(bufpt++) = '0'; }else{ for(; e2>=0; e2--){ *(bufpt++) = et_getdigit(&realvalue,&nsd); } } /* The decimal point */ if( flag_dp ){ *(bufpt++) = '.'; } /* "0" digits after the decimal point but before the first ** significant digit of the number */ for(e2++; e2<0; precision--, e2++){ assert( precision>0 ); *(bufpt++) = '0'; } /* Significant digits after the decimal point */ while( (precision--)>0 ){ *(bufpt++) = et_getdigit(&realvalue,&nsd); } /* Remove trailing zeros and the "." if no digits follow the "." */ if( flag_rtz && flag_dp ){ while( bufpt[-1]=='0' ) *(--bufpt) = 0; assert( bufpt>zOut ); if( bufpt[-1]=='.' ){ if( flag_altform2 ){ *(bufpt++) = '0'; }else{ *(--bufpt) = 0; } } } /* Add the "eNNN" suffix */ if( xtype==etEXP ){ *(bufpt++) = aDigits[infop->charset]; if( exp<0 ){ *(bufpt++) = '-'; exp = -exp; }else{ *(bufpt++) = '+'; } if( exp>=100 ){ *(bufpt++) = (char)((exp/100)+'0'); /* 100's digit */ exp %= 100; } *(bufpt++) = (char)(exp/10+'0'); /* 10's digit */ *(bufpt++) = (char)(exp%10+'0'); /* 1's digit */ } *bufpt = 0; /* The converted number is in buf[] and zero terminated. Output it. ** Note that the number is in the usual order, not reversed as with ** integer conversions. */ length = (int)(bufpt-zOut); bufpt = zOut; /* Special case: Add leading zeros if the flag_zeropad flag is ** set and we are not left justified */ if( flag_zeropad && !flag_leftjustify && length < width){ int i; int nPad = width - length; for(i=width; i>=nPad; i--){ bufpt[i] = bufpt[i-nPad]; } i = prefix!=0; while( nPad-- ) bufpt[i++] = '0'; length = width; } #endif /* !defined(SQLITE_OMIT_FLOATING_POINT) */ break; case etSIZE: if( !bArgList ){ *(va_arg(ap,int*)) = pAccum->nChar; } length = width = 0; break; case etPERCENT: buf[0] = '%'; bufpt = buf; length = 1; break; case etCHARX: if( bArgList ){ bufpt = getTextArg(pArgList); c = bufpt ? bufpt[0] : 0; }else{ c = va_arg(ap,int); } if( precision>1 ){ width -= precision-1; if( width>1 && !flag_leftjustify ){ sqlite3AppendChar(pAccum, width-1, ' '); width = 0; } sqlite3AppendChar(pAccum, precision-1, c); } length = 1; buf[0] = c; bufpt = buf; break; case etSTRING: case etDYNSTRING: if( bArgList ){ bufpt = getTextArg(pArgList); xtype = etSTRING; }else{ bufpt = va_arg(ap,char*); } if( bufpt==0 ){ bufpt = ""; }else if( xtype==etDYNSTRING ){ zExtra = bufpt; } if( precision>=0 ){ for(length=0; lengthetBUFSIZE ){ bufpt = zExtra = sqlite3Malloc( n ); if( bufpt==0 ){ setStrAccumError(pAccum, STRACCUM_NOMEM); return; } }else{ bufpt = buf; } j = 0; if( needQuote ) bufpt[j++] = q; k = i; for(i=0; i=0 && precision0 && !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' '); sqlite3StrAccumAppend(pAccum, bufpt, length); if( width>0 && flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' '); if( zExtra ){ sqlite3_free(zExtra); zExtra = 0; } }/* End for loop over the format string */ } /* End of function */ /* ** Enlarge the memory allocation on a StrAccum object so that it is ** able to accept at least N more bytes of text. ** ** Return the number of bytes of text that StrAccum is able to accept ** after the attempted enlargement. The value returned might be zero. */ static int sqlite3StrAccumEnlarge(StrAccum *p, int N){ char *zNew; assert( p->nChar+(i64)N >= p->nAlloc ); /* Only called if really needed */ if( p->accError ){ testcase(p->accError==STRACCUM_TOOBIG); testcase(p->accError==STRACCUM_NOMEM); return 0; } if( p->mxAlloc==0 ){ N = p->nAlloc - p->nChar - 1; setStrAccumError(p, STRACCUM_TOOBIG); return N; }else{ char *zOld = (p->zText==p->zBase ? 0 : p->zText); i64 szNew = p->nChar; szNew += N + 1; if( szNew+p->nChar<=p->mxAlloc ){ /* Force exponential buffer size growth as long as it does not overflow, ** to avoid having to call this routine too often */ szNew += p->nChar; } if( szNew > p->mxAlloc ){ sqlite3StrAccumReset(p); setStrAccumError(p, STRACCUM_TOOBIG); return 0; }else{ p->nAlloc = (int)szNew; } if( p->pBtree ){ zNew = sqlite3DbRealloc(p->pBtree, zOld, p->nAlloc); }else{ zNew = sqlite3_realloc64(zOld, p->nAlloc); } if( zNew ){ assert( p->zText!=0 || p->nChar==0 ); if( zOld==0 && p->nChar>0 ) memcpy(zNew, p->zText, p->nChar); p->zText = zNew; p->nAlloc = sqlite3DbMallocSize(p->pBtree, zNew); }else{ sqlite3StrAccumReset(p); setStrAccumError(p, STRACCUM_NOMEM); return 0; } } return N; } /* ** Append N copies of character c to the given string buffer. */ SQLITE_PRIVATE void sqlite3AppendChar(StrAccum *p, int N, char c){ testcase( p->nChar + (i64)N > 0x7fffffff ); if( p->nChar+(i64)N >= p->nAlloc && (N = sqlite3StrAccumEnlarge(p, N))<=0 ){ return; } while( (N--)>0 ) p->zText[p->nChar++] = c; } /* ** The StrAccum "p" is not large enough to accept N new bytes of z[]. ** So enlarge if first, then do the append. ** ** This is a helper routine to sqlite3StrAccumAppend() that does special-case ** work (enlarging the buffer) using tail recursion, so that the ** sqlite3StrAccumAppend() routine can use fast calling semantics. */ static void SQLITE_NOINLINE enlargeAndAppend(StrAccum *p, const char *z, int N){ N = sqlite3StrAccumEnlarge(p, N); if( N>0 ){ memcpy(&p->zText[p->nChar], z, N); p->nChar += N; } } /* ** Append N bytes of text from z to the StrAccum object. Increase the ** size of the memory allocation for StrAccum if necessary. */ SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){ assert( z!=0 || N==0 ); assert( p->zText!=0 || p->nChar==0 || p->accError ); assert( N>=0 ); assert( p->accError==0 || p->nAlloc==0 ); if( p->nChar+N >= p->nAlloc ){ enlargeAndAppend(p,z,N); }else{ assert( p->zText ); p->nChar += N; memcpy(&p->zText[p->nChar-N], z, N); } } /* ** Finish off a string by making sure it is zero-terminated. ** Return a pointer to the resulting string. Return a NULL ** pointer if any kind of error was encountered. */ SQLITE_PRIVATE char *sqlite3StrAccumFinish(StrAccum *p){ if( p->zText ){ p->zText[p->nChar] = 0; if( p->mxAlloc>0 && p->zText==p->zBase ){ p->zText = sqlite3DbMallocRaw(p->pBtree, p->nChar+1 ); if( p->zText ){ memcpy(p->zText, p->zBase, p->nChar+1); }else{ setStrAccumError(p, STRACCUM_NOMEM); } } } return p->zText; } /* ** Reset an StrAccum string. Reclaim all malloced memory. */ SQLITE_PRIVATE void sqlite3StrAccumReset(StrAccum *p){ if( p->zText!=p->zBase ){ sqlite3DbFree(p->pBtree, p->zText); } p->zText = 0; } /* ** Initialize a string accumulator. ** ** p: The accumulator to be initialized. ** db: Pointer to a database connection. May be NULL. Lookaside ** memory is used if not NULL. db->mallocFailed is set appropriately ** when not NULL. ** zBase: An initial buffer. May be NULL in which case the initial buffer ** is malloced. ** n: Size of zBase in bytes. If total space requirements never exceed ** n then no memory allocations ever occur. ** mx: Maximum number of bytes to accumulate. If mx==0 then no memory ** allocations will ever occur. */ SQLITE_PRIVATE void sqlite3StrAccumInit(StrAccum *p, Btree* pBtree, char *zBase, int n, int mx){ p->zText = p->zBase = zBase; p->pBtree = pBtree; p->nChar = 0; p->nAlloc = n; p->mxAlloc = mx; p->accError = 0; } /* ** Print into memory obtained from sqlite3_malloc(). Omit the internal ** %-conversion extensions. */ SQLITE_PRIVATE char *sqlite3_vmprintf(const char *zFormat, va_list ap){ char *z; char zBase[SQLITE_PRINT_BUF_SIZE]; StrAccum acc; #ifdef SQLITE_ENABLE_API_ARMOR if( zFormat==0 ){ (void)SQLITE_MISUSE_BKPT; return 0; } #endif #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return 0; #endif sqlite3StrAccumInit(&acc, 0, zBase, sizeof(zBase), SQLITE_MAX_LENGTH); sqlite3VXPrintf(&acc, 0, zFormat, ap); z = sqlite3StrAccumFinish(&acc); return z; } /* ** Print into memory obtained from sqlite3_malloc()(). Omit the internal ** %-conversion extensions. */ SQLITE_PRIVATE char *sqlite3_mprintf(const char *zFormat, ...){ va_list ap; char *z; #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return 0; #endif va_start(ap, zFormat); z = sqlite3_vmprintf(zFormat, ap); va_end(ap); return z; } /* ** sqlite3_snprintf() works like snprintf() except that it ignores the ** current locale settings. This is important for SQLite because we ** are not able to use a "," as the decimal point in place of "." as ** specified by some locales. ** ** Oops: The first two arguments of sqlite3_snprintf() are backwards ** from the snprintf() standard. Unfortunately, it is too late to change ** this without breaking compatibility, so we just have to live with the ** mistake. ** ** sqlite3_vsnprintf() is the varargs version. */ SQLITE_PRIVATE char *sqlite3_vsnprintf(int n, char *zBuf, const char *zFormat, va_list ap){ StrAccum acc; if( n<=0 ) return zBuf; #ifdef SQLITE_ENABLE_API_ARMOR if( zBuf==0 || zFormat==0 ) { (void)SQLITE_MISUSE_BKPT; if( zBuf ) zBuf[0] = 0; return zBuf; } #endif sqlite3StrAccumInit(&acc, 0, zBuf, n, 0); sqlite3VXPrintf(&acc, 0, zFormat, ap); return sqlite3StrAccumFinish(&acc); } SQLITE_PRIVATE char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){ char *z; va_list ap; va_start(ap,zFormat); z = sqlite3_vsnprintf(n, zBuf, zFormat, ap); va_end(ap); return z; } /* ** This is the routine that actually formats the sqlite3_log() message. ** We house it in a separate routine from sqlite3_log() to avoid using ** stack space on small-stack systems when logging is disabled. ** ** sqlite3_log() must render into a static buffer. It cannot dynamically ** allocate memory because it might be called while the memory allocator ** mutex is held. ** ** sqlite3VXPrintf() might ask for *temporary* memory allocations for ** certain format characters (%q) or for very large precisions or widths. ** Care must be taken that any sqlite3_log() calls that occur while the ** memory mutex is held do not use these mechanisms. */ static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ StrAccum acc; /* String accumulator */ char zMsg[SQLITE_PRINT_BUF_SIZE*3]; /* Complete log message */ sqlite3StrAccumInit(&acc, 0, zMsg, sizeof(zMsg), 0); sqlite3VXPrintf(&acc, 0, zFormat, ap); sqlite3GlobalConfig.xLog(sqlite3GlobalConfig.pLogArg, iErrCode, sqlite3StrAccumFinish(&acc)); } /* ** Format and write a message to the log if logging is enabled. */ SQLITE_PRIVATE void sqlite3_log(int iErrCode, const char *zFormat, ...){ va_list ap; /* Vararg list */ if( sqlite3GlobalConfig.xLog ){ va_start(ap, zFormat); renderLogMsg(iErrCode, zFormat, ap); va_end(ap); } } #if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) /* ** A version of printf() that understands %lld. Used for debugging. ** The printf() built into some versions of windows does not understand %lld ** and segfaults if you give it a long long int. */ SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){ va_list ap; StrAccum acc; char zBuf[500]; sqlite3StrAccumInit(&acc, 0, zBuf, sizeof(zBuf), 0); va_start(ap,zFormat); sqlite3VXPrintf(&acc, 0, zFormat, ap); va_end(ap); sqlite3StrAccumFinish(&acc); fprintf(stdout,"%s", zBuf); fflush(stdout); } #endif /* ** variable-argument wrapper around sqlite3VXPrintf(). The bFlags argument ** can contain the bit SQLITE_PRINTF_INTERNAL enable internal formats. */ SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){ va_list ap; va_start(ap,zFormat); sqlite3VXPrintf(p, bFlags, zFormat, ap); va_end(ap); } /************** End of printf.c **********************************************/ /************** Begin file random.c ******************************************/ /* ** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file contains code to implement a pseudo-random number ** generator (PRNG) for SQLite. ** ** Random numbers are used by some of the database backends in order ** to generate random integer keys for tables or random filenames. */ /* #include "sqliteInt.h" */ /* All threads share a single random number generator. ** This structure is the current state of the generator. */ static SQLITE_WSD struct sqlite3PrngType { unsigned char isInit; /* True if initialized */ unsigned char i, j; /* State variables */ unsigned char s[256]; /* State variables */ } sqlite3Prng; /* ** Return N random bytes. */ SQLITE_PRIVATE void sqlite3_randomness(int N, void *pBuf){ unsigned char t; unsigned char *zBuf = pBuf; /* The "wsdPrng" macro will resolve to the pseudo-random number generator ** state vector. If writable static data is unsupported on the target, ** we have to locate the state vector at run-time. In the more common ** case where writable static data is supported, wsdPrng can refer directly ** to the "sqlite3Prng" state vector declared above. */ #ifdef SQLITE_OMIT_WSD struct sqlite3PrngType *p = &GLOBAL(struct sqlite3PrngType, sqlite3Prng); # define wsdPrng p[0] #else # define wsdPrng sqlite3Prng #endif #if SQLITE_THREADSAFE sqlite3_mutex *mutex; #endif #ifndef SQLITE_OMIT_AUTOINIT if( sqlite3BtreeInitialize() ) return; #endif #if SQLITE_THREADSAFE mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_PRNG); #endif sqlite3_mutex_enter(mutex); if( N<=0 || pBuf==0 ){ wsdPrng.isInit = 0; sqlite3_mutex_leave(mutex); return; } /* Initialize the state of the random number generator once, ** the first time this routine is called. The seed value does ** not need to contain a lot of randomness since we are not ** trying to do secure encryption or anything like that... ** ** Nothing in this file or anywhere else in SQLite does any kind of ** encryption. The RC4 algorithm is being used as a PRNG (pseudo-random ** number generator) not as an encryption device. */ if( !wsdPrng.isInit ){ int i; char k[256]; wsdPrng.j = 0; wsdPrng.i = 0; sqlite3OsRandomness(sqlite3_vfs_find(0), 256, k); for(i=0; i<256; i++){ wsdPrng.s[i] = (u8)i; } for(i=0; i<256; i++){ wsdPrng.j += wsdPrng.s[i] + k[i]; t = wsdPrng.s[wsdPrng.j]; wsdPrng.s[wsdPrng.j] = wsdPrng.s[i]; wsdPrng.s[i] = t; } wsdPrng.isInit = 1; } assert( N>0 ); do{ wsdPrng.i++; t = wsdPrng.s[wsdPrng.i]; wsdPrng.j += t; wsdPrng.s[wsdPrng.i] = wsdPrng.s[wsdPrng.j]; wsdPrng.s[wsdPrng.j] = t; t += wsdPrng.s[wsdPrng.i]; *(zBuf++) = wsdPrng.s[t]; }while( --N ); sqlite3_mutex_leave(mutex); } /************** End of random.c **********************************************/ /************** Begin file util.c ********************************************/ /* ** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** Utility functions used throughout sqlite. ** ** This file contains functions for allocating memory, comparing ** strings, and stuff like that. ** */ /* #include "sqliteInt.h" */ /* #include */ #if HAVE_ISNAN || SQLITE_HAVE_ISNAN # include #endif /* ** Routine needed to support the testcase() macro. */ #ifdef SQLITE_COVERAGE_TEST SQLITE_PRIVATE void sqlite3Coverage(int x){ static unsigned dummy = 0; dummy += (unsigned)x; } #endif #ifndef SQLITE_OMIT_FLOATING_POINT /* ** Return true if the floating point value is Not a Number (NaN). ** ** Use the math library isnan() function if compiled with SQLITE_HAVE_ISNAN. ** Otherwise, we have our own implementation that works on most systems. */ SQLITE_PRIVATE int sqlite3IsNaN(double x){ int rc; /* The value return */ #if !SQLITE_HAVE_ISNAN && !HAVE_ISNAN /* ** Systems that support the isnan() library function should probably ** make use of it by compiling with -DSQLITE_HAVE_ISNAN. But we have ** found that many systems do not have a working isnan() function so ** this implementation is provided as an alternative. ** ** This NaN test sometimes fails if compiled on GCC with -ffast-math. ** On the other hand, the use of -ffast-math comes with the following ** warning: ** ** This option [-ffast-math] should never be turned on by any ** -O option since it can result in incorrect output for programs ** which depend on an exact implementation of IEEE or ISO ** rules/specifications for math functions. ** ** Under MSVC, this NaN test may fail if compiled with a floating- ** point precision mode other than /fp:precise. From the MSDN ** documentation: ** ** The compiler [with /fp:precise] will properly handle comparisons ** involving NaN. For example, x != x evaluates to true if x is NaN ** ... */ #ifdef __FAST_MATH__ # error SQLite will not work correctly with the -ffast-math option of GCC. #endif volatile double y = x; volatile double z = y; rc = (y!=z); #else /* if HAVE_ISNAN */ rc = isnan(x); #endif /* HAVE_ISNAN */ testcase( rc ); return rc; } #endif /* SQLITE_OMIT_FLOATING_POINT */ /* ** Compute a string length that is limited to what can be stored in ** lower 30 bits of a 32-bit signed integer. ** ** The value returned will never be negative. Nor will it ever be greater ** than the actual length of the string. For very long strings (greater ** than 1GiB) the value returned might be less than the true string length. */ SQLITE_PRIVATE int sqlite3Strlen30(const char *z){ if( z==0 ) return 0; return 0x3fffffff & (int)strlen(z); } /* Convenient short-hand */ #define UpperToLower sqlite3UpperToLower int sqlite3StrICmp(const char *zLeft, const char *zRight){ unsigned char *a, *b; int c; a = (unsigned char *)zLeft; b = (unsigned char *)zRight; for(;;){ c = (int)UpperToLower[*a] - (int)UpperToLower[*b]; if( c || *a==0 ) break; a++; b++; } return c; } /* ** The string z[] is an text representation of a real number. ** Convert this string to a double and write it into *pResult. ** ** The string z[] is length bytes in length (bytes, not characters) and ** uses the encoding enc. The string is not necessarily zero-terminated. ** ** Return TRUE if the result is a valid real number (or integer) and FALSE ** if the string is empty or contains extraneous text. Valid numbers ** are in one of these formats: ** ** [+-]digits[E[+-]digits] ** [+-]digits.[digits][E[+-]digits] ** [+-].digits[E[+-]digits] ** ** Leading and trailing whitespace is ignored for the purpose of determining ** validity. ** ** If some prefix of the input string is a valid number, this routine ** returns FALSE but it still converts the prefix and writes the result ** into *pResult. */ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 enc){ #ifndef SQLITE_OMIT_FLOATING_POINT int incr; const char *zEnd = z + length; /* sign * significand * (10 ^ (esign * exponent)) */ int sign = 1; /* sign of significand */ i64 s = 0; /* significand */ int d = 0; /* adjust exponent for shifting decimal point */ int esign = 1; /* sign of exponent */ int e = 0; /* exponent */ int eValid = 1; /* True exponent is either not used or is well-formed */ double result; int nDigits = 0; int nonNum = 0; assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); *pResult = 0.0; /* Default return value, in case of an error */ if( enc==SQLITE_UTF8 ){ incr = 1; }else{ int i; incr = 2; assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); for(i=3-enc; i=zEnd ) return 0; /* get sign of significand */ if( *z=='-' ){ sign = -1; z+=incr; }else if( *z=='+' ){ z+=incr; } /* skip leading zeroes */ while( z=zEnd ) goto do_atof_calc; /* if decimal point is present */ if( *z=='.' ){ z+=incr; /* copy digits from after decimal to significand ** (decrease exponent by d to shift decimal right) */ while( z=zEnd ) goto do_atof_calc; /* if exponent is present */ if( *z=='e' || *z=='E' ){ z+=incr; eValid = 0; if( z>=zEnd ) goto do_atof_calc; /* get sign of exponent */ if( *z=='-' ){ esign = -1; z+=incr; }else if( *z=='+' ){ z+=incr; } /* copy digits to exponent */ while( z0 ){ while( s<(LARGEST_INT64/10) && e>0 ) e--,s*=10; }else{ while( !(s%10) && e>0 ) e--,s/=10; } /* adjust the sign of significand */ s = sign<0 ? -s : s; /* if exponent, scale significand as appropriate ** and store in result. */ if( e ){ LONGDOUBLE_TYPE scale = 1.0; /* attempt to handle extremely small/large numbers better */ if( e>307 && e<342 ){ while( e%308 ) { scale *= 1.0e+1; e -= 1; } if( esign<0 ){ result = s / scale; result /= 1.0e+308; }else{ result = s * scale; result *= 1.0e+308; } }else if( e>=342 ){ if( esign<0 ){ result = 0.0*s; }else{ result = 1e308*1e308*s; /* Infinity */ } }else{ /* 1.0e+22 is the largest power of 10 than can be ** represented exactly. */ while( e%22 ) { scale *= 1.0e+1; e -= 1; } while( e>0 ) { scale *= 1.0e+22; e -= 22; } if( esign<0 ){ result = s / scale; }else{ result = s * scale; } } } else { result = (double)s; } } /* store the result */ *pResult = result; /* return true if number and no extra non-whitespace chracters after */ return z>=zEnd && nDigits>0 && eValid && nonNum==0; #else return !sqlite3Atoi64(z, pResult, length, enc); #endif /* SQLITE_OMIT_FLOATING_POINT */ } /* ** Compare the 19-character string zNum against the text representation ** value 2^63: 9223372036854775808. Return negative, zero, or positive ** if zNum is less than, equal to, or greater than the string. ** Note that zNum must contain exactly 19 characters. ** ** Unlike memcmp() this routine is guaranteed to return the difference ** in the values of the last digit if the only difference is in the ** last digit. So, for example, ** ** compare2pow63("9223372036854775800", 1) ** ** will return -8. */ static int compare2pow63(const char *zNum, int incr){ int c = 0; int i; /* 012345678901234567 */ const char *pow63 = "922337203685477580"; for(i=0; c==0 && i<18; i++){ c = (zNum[i*incr]-pow63[i])*10; } if( c==0 ){ c = zNum[18*incr] - '8'; testcase( c==(-1) ); testcase( c==0 ); testcase( c==(+1) ); } return c; } /* ** Convert zNum to a 64-bit signed integer. zNum must be decimal. This ** routine does *not* accept hexadecimal notation. ** ** If the zNum value is representable as a 64-bit twos-complement ** integer, then write that value into *pNum and return 0. ** ** If zNum is exactly 9223372036854775808, return 2. This special ** case is broken out because while 9223372036854775808 cannot be a ** signed 64-bit integer, its negative -9223372036854775808 can be. ** ** If zNum is too big for a 64-bit integer and is not ** 9223372036854775808 or if zNum contains any non-numeric text, ** then return 1. ** ** length is the number of bytes in the string (bytes, not characters). ** The string is not necessarily zero-terminated. The encoding is ** given by enc. */ SQLITE_PRIVATE int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc){ int incr; u64 u = 0; int neg = 0; /* assume positive */ int i; int c = 0; int nonNum = 0; const char *zStart; const char *zEnd = zNum + length; assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); if( enc==SQLITE_UTF8 ){ incr = 1; }else{ incr = 2; assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 ); for(i=3-enc; i='0' && c<='9'; i+=incr){ u = u*10 + c - '0'; } if( u>LARGEST_INT64 ){ *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64; }else if( neg ){ *pNum = -(i64)u; }else{ *pNum = (i64)u; } testcase( i==18 ); testcase( i==19 ); testcase( i==20 ); if( (c!=0 && &zNum[i]19*incr || nonNum ){ /* zNum is empty or contains non-numeric text or is longer ** than 19 digits (thus guaranteeing that it is too large) */ return 1; }else if( i<19*incr ){ /* Less than 19 digits, so we know that it fits in 64 bits */ assert( u<=LARGEST_INT64 ); return 0; }else{ /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */ c = compare2pow63(zNum, incr); if( c<0 ){ /* zNum is less than 9223372036854775808 so it fits */ assert( u<=LARGEST_INT64 ); return 0; }else if( c>0 ){ /* zNum is greater than 9223372036854775808 so it overflows */ return 1; }else{ /* zNum is exactly 9223372036854775808. Fits if negative. The ** special case 2 overflow if positive */ assert( u-1==LARGEST_INT64 ); return neg ? 0 : 2; } } } /* ** The variable-length integer encoding is as follows: ** ** KEY: ** A = 0xxxxxxx 7 bits of data and one flag bit ** B = 1xxxxxxx 7 bits of data and one flag bit ** C = xxxxxxxx 8 bits of data ** ** 7 bits - A ** 14 bits - BA ** 21 bits - BBA ** 28 bits - BBBA ** 35 bits - BBBBA ** 42 bits - BBBBBA ** 49 bits - BBBBBBA ** 56 bits - BBBBBBBA ** 64 bits - BBBBBBBBC */ /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data write will be between 1 and 9 bytes. The number ** of bytes written is returned. ** ** A variable-length integer consists of the lower 7 bits of each byte ** for all bytes that have the 8th bit set and one byte with the 8th ** bit clear. Except, if we get to the 9th byte, it stores the full ** 8 bits and is the last byte. */ static int SQLITE_NOINLINE putVarint64(unsigned char *p, u64 v){ int i, j, n; u8 buf[10]; if( v & (((u64)0xff000000)<<32) ){ p[8] = (u8)v; v >>= 8; for(i=7; i>=0; i--){ p[i] = (u8)((v & 0x7f) | 0x80); v >>= 7; } return 9; } n = 0; do{ buf[n++] = (u8)((v & 0x7f) | 0x80); v >>= 7; }while( v!=0 ); buf[0] &= 0x7f; assert( n<=9 ); for(i=0, j=n-1; j>=0; j--, i++){ p[i] = buf[j]; } return n; } SQLITE_API int SQLITE_STDCALL sqlite3BtreePutVarint(unsigned char *p, u64 v){ if( v<=0x7f ){ p[0] = v&0x7f; return 1; } if( v<=0x3fff ){ p[0] = ((v>>7)&0x7f)|0x80; p[1] = v&0x7f; return 2; } return putVarint64(p,v); } /* ** Bitmasks used by sqlite3GetVarint(). These precomputed constants ** are defined here rather than simply putting the constant expressions ** inline in order to work around bugs in the RVT compiler. ** ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f ** ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 */ #define SLOT_2_0 0x001fc07f #define SLOT_4_2_0 0xf01fc07f /* ** Read a 64-bit variable-length integer from memory starting at p[0]. ** Return the number of bytes read. The value is stored in *v. */ SQLITE_API u8 SQLITE_STDCALL sqlite3BtreeGetVarint(const unsigned char *p, u64 *v){ u32 a,b,s; a = *p; /* a: p0 (unmasked) */ if (!(a&0x80)) { *v = a; return 1; } p++; b = *p; /* b: p1 (unmasked) */ if (!(b&0x80)) { a &= 0x7f; a = a<<7; a |= b; *v = a; return 2; } /* Verify that constants are precomputed correctly */ assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); p++; a = a<<14; a |= *p; /* a: p0<<14 | p2 (unmasked) */ if (!(a&0x80)) { a &= SLOT_2_0; b &= 0x7f; b = b<<7; a |= b; *v = a; return 3; } /* CSE1 from below */ a &= SLOT_2_0; p++; b = b<<14; b |= *p; /* b: p1<<14 | p3 (unmasked) */ if (!(b&0x80)) { b &= SLOT_2_0; /* moved CSE1 up */ /* a &= (0x7f<<14)|(0x7f); */ a = a<<7; a |= b; *v = a; return 4; } /* a: p0<<14 | p2 (masked) */ /* b: p1<<14 | p3 (unmasked) */ /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ /* moved CSE1 up */ /* a &= (0x7f<<14)|(0x7f); */ b &= SLOT_2_0; s = a; /* s: p0<<14 | p2 (masked) */ p++; a = a<<14; a |= *p; /* a: p0<<28 | p2<<14 | p4 (unmasked) */ if (!(a&0x80)) { /* we can skip these cause they were (effectively) done above in calc'ing s */ /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ /* b &= (0x7f<<14)|(0x7f); */ b = b<<7; a |= b; s = s>>18; *v = ((u64)s)<<32 | a; return 5; } /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ s = s<<7; s |= b; /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ p++; b = b<<14; b |= *p; /* b: p1<<28 | p3<<14 | p5 (unmasked) */ if (!(b&0x80)) { /* we can skip this cause it was (effectively) done above in calc'ing s */ /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ a &= SLOT_2_0; a = a<<7; a |= b; s = s>>18; *v = ((u64)s)<<32 | a; return 6; } p++; a = a<<14; a |= *p; /* a: p2<<28 | p4<<14 | p6 (unmasked) */ if (!(a&0x80)) { a &= SLOT_4_2_0; b &= SLOT_2_0; b = b<<7; a |= b; s = s>>11; *v = ((u64)s)<<32 | a; return 7; } /* CSE2 from below */ a &= SLOT_2_0; p++; b = b<<14; b |= *p; /* b: p3<<28 | p5<<14 | p7 (unmasked) */ if (!(b&0x80)) { b &= SLOT_4_2_0; /* moved CSE2 up */ /* a &= (0x7f<<14)|(0x7f); */ a = a<<7; a |= b; s = s>>4; *v = ((u64)s)<<32 | a; return 8; } p++; a = a<<15; a |= *p; /* a: p4<<29 | p6<<15 | p8 (unmasked) */ /* moved CSE2 up */ /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ b &= SLOT_2_0; b = b<<8; a |= b; s = s<<4; b = p[-4]; b &= 0x7f; b = b>>3; s |= b; *v = ((u64)s)<<32 | a; return 9; } /* ** Read a 32-bit variable-length integer from memory starting at p[0]. ** Return the number of bytes read. The value is stored in *v. ** ** If the varint stored in p[0] is larger than can fit in a 32-bit unsigned ** integer, then set *v to 0xffffffff. ** ** A MACRO version, getVarint32, is provided which inlines the ** single-byte case. All code should use the MACRO version as ** this function assumes the single-byte case has already been handled. */ SQLITE_API u8 SQLITE_STDCALL sqlite3BtreeGetVarint32(const unsigned char *p, u32 *v){ u32 a,b; /* The 1-byte case. Overwhelmingly the most common. Handled inline ** by the getVarin32() macro */ a = *p; /* a: p0 (unmasked) */ #ifndef getVarint32 if (!(a&0x80)) { /* Values between 0 and 127 */ *v = a; return 1; } #endif /* The 2-byte case */ p++; b = *p; /* b: p1 (unmasked) */ if (!(b&0x80)) { /* Values between 128 and 16383 */ a &= 0x7f; a = a<<7; *v = a | b; return 2; } /* The 3-byte case */ p++; a = a<<14; a |= *p; /* a: p0<<14 | p2 (unmasked) */ if (!(a&0x80)) { /* Values between 16384 and 2097151 */ a &= (0x7f<<14)|(0x7f); b &= 0x7f; b = b<<7; *v = a | b; return 3; } /* A 32-bit varint is used to store size information in btrees. ** Objects are rarely larger than 2MiB limit of a 3-byte varint. ** A 3-byte varint is sufficient, for example, to record the size ** of a 1048569-byte BLOB or string. ** ** We only unroll the first 1-, 2-, and 3- byte cases. The very ** rare larger cases can be handled by the slower 64-bit varint ** routine. */ #if 1 { u64 v64; u8 n; p -= 2; n = sqlite3BtreeGetVarint(p, &v64); assert( n>3 && n<=9 ); if( (v64 & SQLITE_MAX_U32)!=v64 ){ *v = 0xffffffff; }else{ *v = (u32)v64; } return n; } #else /* For following code (kept for historical record only) shows an ** unrolling for the 3- and 4-byte varint cases. This code is ** slightly faster, but it is also larger and much harder to test. */ p++; b = b<<14; b |= *p; /* b: p1<<14 | p3 (unmasked) */ if (!(b&0x80)) { /* Values between 2097152 and 268435455 */ b &= (0x7f<<14)|(0x7f); a &= (0x7f<<14)|(0x7f); a = a<<7; *v = a | b; return 4; } p++; a = a<<14; a |= *p; /* a: p0<<28 | p2<<14 | p4 (unmasked) */ if (!(a&0x80)) { /* Values between 268435456 and 34359738367 */ a &= SLOT_4_2_0; b &= SLOT_4_2_0; b = b<<7; *v = a | b; return 5; } /* We can only reach this point when reading a corrupt database ** file. In that case we are not in any hurry. Use the (relatively ** slow) general-purpose sqlite3BtreeGetVarint() routine to extract the ** value. */ { u64 v64; u8 n; p -= 4; n = sqlite3BtreeGetVarint(p, &v64); assert( n>5 && n<=9 ); *v = (u32)v64; return n; } #endif } /* ** Return the number of bytes that will be needed to store the given ** 64-bit integer. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeVarintLen(u64 v){ int i; for(i=1; (v >>= 7)!=0; i++){ assert( i<9 ); } return i; } /* ** Read or write a four-byte big-endian integer value. */ SQLITE_PRIVATE u32 sqlite3Get4byte(const u8 *p){ #if SQLITE_BYTEORDER==4321 u32 x; memcpy(&x,p,4); return x; #elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ && defined(__GNUC__) && GCC_VERSION>=4003000 u32 x; memcpy(&x,p,4); return __builtin_bswap32(x); #elif SQLITE_BYTEORDER==1234 && !defined(SQLITE_DISABLE_INTRINSIC) \ && defined(_MSC_VER) && _MSC_VER>=1300 u32 x; memcpy(&x,p,4); return _byteswap_ulong(x); #else testcase( p[0]&0x80 ); return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; #endif } SQLITE_PRIVATE void sqlite3Put4byte(unsigned char *p, u32 v){ #if SQLITE_BYTEORDER==4321 memcpy(p,&v,4); #elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) && GCC_VERSION>=4003000 u32 x = __builtin_bswap32(v); memcpy(p,&x,4); #elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 u32 x = _byteswap_ulong(v); memcpy(p,&x,4); #else p[0] = (u8)(v>>24); p[1] = (u8)(v>>16); p[2] = (u8)(v>>8); p[3] = (u8)v; #endif } /* ** Log an error that is an API call on a connection pointer that should ** not have been used. The "type" of connection pointer is given as the ** argument. The zType is a word like "NULL" or "closed" or "invalid". */ static void logBadConnection(const char *zType){ sqlite3_log(SQLITE_MISUSE, "API call with %s database connection pointer", zType ); } SQLITE_PRIVATE int sqlite3SafetyCheckSickOrOk(Btree *pBtree){ u32 magic; magic = pBtree->magic; if( magic!=SQLITE_MAGIC_SICK && magic!=SQLITE_MAGIC_OPEN && magic!=SQLITE_MAGIC_BUSY ){ testcase( sqlite3GlobalConfig.xLog!=0 ); logBadConnection("invalid"); return 0; }else{ return 1; } } /* ** Compute the absolute value of a 32-bit signed integer, of possible. Or ** if the integer has a value of -2147483648, return +2147483647 */ SQLITE_PRIVATE int sqlite3AbsInt32(int x){ if( x>=0 ) return x; if( x==(int)0x80000000 ) return 0x7fffffff; return -x; } /************** End of util.c ************************************************/ /************** Begin file os_unix.c *****************************************/ /* ** 2004 May 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains the VFS implementation for unix-like operating systems ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. ** ** There are actually several different VFS implementations in this file. ** The differences are in the way that file locking is done. The default ** implementation uses Posix Advisory Locks. Alternative implementations ** use flock(), dot-files, various proprietary locking schemas, or simply ** skip locking all together. ** ** This source file is organized into divisions where the logic for various ** subfunctions is contained within the appropriate division. PLEASE ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed ** in the correct division and should be clearly labeled. ** ** The layout of divisions is as follows: ** ** * General-purpose declarations and utility functions. ** * Unique file ID logic used by VxWorks. ** * Various locking primitive implementations (all except proxy locking): ** + for Posix Advisory Locks ** + for no-op locks ** + for dot-file locks ** + for flock() locking ** + for named semaphore locks (VxWorks only) ** + for AFP filesystem locks (MacOSX only) ** * sqlite3_file methods not associated with locking. ** * Definitions of sqlite3_io_methods objects for all locking ** methods plus "finder" functions for each locking method. ** * sqlite3_vfs method implementations. ** * Locking primitives for the proxy uber-locking-method. (MacOSX only) ** * Definitions of sqlite3_vfs objects for all locking methods ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). */ /* #include "sqliteInt.h" */ #if SQLITE_OS_UNIX /* This file is used on unix only */ /* ** There are various methods for file locking used for concurrency ** control: ** ** 1. POSIX locking (the default), ** 2. No locking, ** 3. Dot-file locking, ** 4. flock() locking, ** 5. AFP locking (OSX only), ** 6. Named POSIX semaphores (VXWorks only), ** 7. proxy locking. (OSX only) ** ** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE ** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic ** selection of the appropriate locking style based on the filesystem ** where the database is located. */ #if !defined(SQLITE_ENABLE_LOCKING_STYLE) # if defined(__APPLE__) # define SQLITE_ENABLE_LOCKING_STYLE 1 # else # define SQLITE_ENABLE_LOCKING_STYLE 0 # endif #endif /* ** standard include files. */ #include #include #include #include #include #include #include #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 # include #endif #if SQLITE_ENABLE_LOCKING_STYLE # include # include # include #endif /* SQLITE_ENABLE_LOCKING_STYLE */ #if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \ (__IPHONE_OS_VERSION_MIN_REQUIRED > 2000)) # if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \ && (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0)) # define HAVE_GETHOSTUUID 1 # else # warning "gethostuuid() is disabled." # endif #endif #if OS_VXWORKS /* # include */ # include # include #endif /* OS_VXWORKS */ #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE # include #endif #ifdef HAVE_UTIME # include #endif /* ** Allowed values of unixFile.fsFlags */ #define SQLITE_FSFLAGS_IS_MSDOS 0x1 /* ** If we are to be thread-safe, include the pthreads header and define ** the SQLITE_UNIX_THREADS macro. */ #if SQLITE_THREADSAFE /* # include */ # define SQLITE_UNIX_THREADS 1 #endif /* ** Default permissions when creating a new file */ #ifndef SQLITE_DEFAULT_FILE_PERMISSIONS # define SQLITE_DEFAULT_FILE_PERMISSIONS 0644 #endif /* ** Default permissions when creating auto proxy dir */ #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 #endif /* ** Maximum supported path-length. */ #define MAX_PATHNAME 512 /* Always cast the getpid() return type for compatibility with ** kernel modules in VxWorks. */ #define osGetpid(X) (pid_t)getpid() /* ** Only set the lastErrno if the error code is a real error and not ** a normal expected return code of SQLITE_BUSY or SQLITE_OK */ #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) /* Forward references */ typedef struct unixShm unixShm; /* Connection shared memory */ typedef struct unixShmNode unixShmNode; /* Shared memory instance */ typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ /* ** Sometimes, after a file handle is closed by SQLite, the file descriptor ** cannot be closed immediately. In these cases, instances of the following ** structure are used to store the file descriptor while waiting for an ** opportunity to either close or reuse it. */ struct UnixUnusedFd { int fd; /* File descriptor to close */ int flags; /* Flags this file descriptor was opened with */ UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ }; /* ** The unixFile structure is subclass of sqlite3_file specific to the unix ** VFS implementations. */ typedef struct unixFile unixFile; struct unixFile { sqlite3_io_methods const *pMethod; /* Always the first entry */ sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ unsigned char eFileLock; /* The type of lock held on this fd */ unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ int lastErrno; /* The unix errno from last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ #if SQLITE_MAX_MMAP_SIZE>0 int nFetchOut; /* Number of outstanding xFetch refs */ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ void *pMapRegion; /* Memory mapped region */ #endif #ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ #endif #if SQLITE_ENABLE_LOCKING_STYLE int openFlags; /* The flags specified at open() */ #endif #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) unsigned fsFlags; /* cached details from statfs() */ #endif #if OS_VXWORKS struct vxworksFileId *pId; /* Unique file ID */ #endif #ifdef SQLITE_DEBUG /* The next group of variables are used to track whether or not the ** transaction counter in bytes 24-27 of database files are updated ** whenever any part of the database changes. An assertion fault will ** occur if a file is updated without also updating the transaction ** counter. This test is made to avoid new problems similar to the ** one described by ticket #3584. */ unsigned char transCntrChng; /* True if the transaction counter changed */ unsigned char dbUpdate; /* True if any part of database file changed */ unsigned char inNormalWrite; /* True if in a normal write operation */ #endif #ifdef SQLITE_TEST /* In test mode, increase the size of this structure a bit so that ** it is larger than the struct CrashFile defined in test6.c. */ char aPadding[32]; #endif }; /* This variable holds the process id (pid) from when the xRandomness() ** method was called. If xOpen() is called from a different process id, ** indicating that a fork() has occurred, the PRNG will be reset. */ static pid_t randomnessPid = 0; /* ** Allowed values for the unixFile.ctrlFlags bitmask: */ #define UNIXFILE_EXCL 0x01 /* Connections from one process only */ #define UNIXFILE_RDONLY 0x02 /* Connection is read only */ #define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ #ifndef SQLITE_DISABLE_DIRSYNC # define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */ #else # define UNIXFILE_DIRSYNC 0x00 #endif #define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ #define UNIXFILE_DELETE 0x20 /* Delete on close */ #define UNIXFILE_URI 0x40 /* Filename might have query parameters */ #define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ #define UNIXFILE_WARNED 0x0100 /* verifyDbFile() warnings issued */ #define UNIXFILE_BLOCK 0x0200 /* Next SHM lock might block */ /* ** Include code that is common to all os_*.c files */ /************** Include os_common.h in the middle of os_unix.c ***************/ /************** Begin file os_common.h ***************************************/ /* ** 2004 May 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains macros and a little bit of code that is common to ** all of the platform-specific files (os_*.c) and is #included into those ** files. ** ** This file should be #included by the os_*.c files only. It is not a ** general purpose header file. */ #ifndef _OS_COMMON_H_ #define _OS_COMMON_H_ /* ** At least two bugs have slipped in because we changed the MEMORY_DEBUG ** macro to SQLITE_DEBUG and some older makefiles have not yet made the ** switch. The following code should catch this problem at compile-time. */ #ifdef MEMORY_DEBUG # error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." #endif /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. */ #ifdef SQLITE_PERFORMANCE_TRACE /* ** hwtime.h contains inline assembler code for implementing ** high-performance timing routines. */ /* #include "hwtime.h" */ static sqlite_uint64 g_start; static sqlite_uint64 g_elapsed; #define TIMER_START g_start=sqlite3Hwtime() #define TIMER_END g_elapsed=sqlite3Hwtime()-g_start #define TIMER_ELAPSED g_elapsed #else #define TIMER_START #define TIMER_END #define TIMER_ELAPSED ((sqlite_uint64)0) #endif /* ** If we compile with the SQLITE_TEST macro set, then the following block ** of code will give us the ability to simulate a disk I/O error. This ** is used for testing the I/O recovery logic. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_io_error_hit = 0; /* Total number of I/O Errors */ SQLITE_PRIVATE int sqlite3_io_error_hardhit = 0; /* Number of non-benign errors */ SQLITE_PRIVATE int sqlite3_io_error_pending = 0; /* Count down to first I/O error */ SQLITE_PRIVATE int sqlite3_io_error_persist = 0; /* True if I/O errors persist */ SQLITE_PRIVATE int sqlite3_io_error_benign = 0; /* True if errors are benign */ SQLITE_PRIVATE int sqlite3_diskfull_pending = 0; SQLITE_PRIVATE int sqlite3_diskfull = 0; #define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) #define SimulateIOError(CODE) \ if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ || sqlite3_io_error_pending-- == 1 ) \ { local_ioerr(); CODE; } static void local_ioerr(){ IOTRACE(("IOERR\n")); sqlite3_io_error_hit++; if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; } #define SimulateDiskfullError(CODE) \ if( sqlite3_diskfull_pending ){ \ if( sqlite3_diskfull_pending == 1 ){ \ local_ioerr(); \ sqlite3_diskfull = 1; \ sqlite3_io_error_hit = 1; \ CODE; \ }else{ \ sqlite3_diskfull_pending--; \ } \ } #else #define SimulateIOErrorBenign(X) #define SimulateIOError(A) #define SimulateDiskfullError(A) #endif /* ** When testing, keep a count of the number of open files. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_open_file_count = 0; #define OpenCounter(X) sqlite3_open_file_count+=(X) #else #define OpenCounter(X) #endif #endif /* !defined(_OS_COMMON_H_) */ /************** End of os_common.h *******************************************/ /************** Continuing where we left off in os_unix.c ********************/ /* ** Define various macros that are missing from some systems. */ #ifndef O_LARGEFILE # define O_LARGEFILE 0 #endif #ifdef SQLITE_DISABLE_LFS # undef O_LARGEFILE # define O_LARGEFILE 0 #endif #ifndef O_NOFOLLOW # define O_NOFOLLOW 0 #endif #ifndef O_BINARY # define O_BINARY 0 #endif /* ** The threadid macro resolves to the thread-id or to 0. Used for ** testing and debugging only. */ #if SQLITE_THREADSAFE #define threadid pthread_self() #else #define threadid 0 #endif /* ** HAVE_MREMAP defaults to true on Linux and false everywhere else. */ #if !defined(HAVE_MREMAP) # if defined(__linux__) && defined(_GNU_SOURCE) # define HAVE_MREMAP 1 # else # define HAVE_MREMAP 0 # endif #endif /* ** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek() ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. */ #ifdef __ANDROID__ # define lseek lseek64 #endif /* ** Different Unix systems declare open() in different ways. Same use ** open(const char*,int,mode_t). Others use open(const char*,int,...). ** The difference is important when using a pointer to the function. ** ** The safest way to deal with the problem is to always use this wrapper ** which always has the same well-defined interface. */ static int posixOpen(const char *zFile, int flags, int mode){ return open(zFile, flags, mode); } /* ** On some systems, calls to fchown() will trigger a message in a security ** log if they come from non-root processes. So avoid calling fchown() if ** we are not running as root. */ static int posixFchown(int fd, uid_t uid, gid_t gid){ #if OS_VXWORKS return 0; #else return geteuid() ? 0 : fchown(fd,uid,gid); #endif } /* Forward reference */ static int openDirectory(const char*, int*); static int unixGetpagesize(void); /* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during ** testing and sandboxing. The following array holds the names and pointers ** to all overrideable system calls. */ static struct unix_syscall { const char *zName; /* Name of the system call */ sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ sqlite3_syscall_ptr pDefault; /* Default value */ } aSyscall[] = { { "open", (sqlite3_syscall_ptr)posixOpen, 0 }, #define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent) { "close", (sqlite3_syscall_ptr)close, 0 }, #define osClose ((int(*)(int))aSyscall[1].pCurrent) { "access", (sqlite3_syscall_ptr)access, 0 }, #define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent) { "getcwd", (sqlite3_syscall_ptr)getcwd, 0 }, #define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent) { "stat", (sqlite3_syscall_ptr)stat, 0 }, #define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent) /* ** The DJGPP compiler environment looks mostly like Unix, but it ** lacks the fcntl() system call. So redefine fcntl() to be something ** that always succeeds. This means that locking does not occur under ** DJGPP. But it is DOS - what did you expect? */ #ifdef __DJGPP__ { "fstat", 0, 0 }, #define osFstat(a,b,c) 0 #else { "fstat", (sqlite3_syscall_ptr)fstat, 0 }, #define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent) #endif { "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 }, #define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent) { "fcntl", (sqlite3_syscall_ptr)fcntl, 0 }, #define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent) { "read", (sqlite3_syscall_ptr)read, 0 }, #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE { "pread", (sqlite3_syscall_ptr)pread, 0 }, #else { "pread", (sqlite3_syscall_ptr)0, 0 }, #endif #define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent) #if defined(USE_PREAD64) { "pread64", (sqlite3_syscall_ptr)pread64, 0 }, #else { "pread64", (sqlite3_syscall_ptr)0, 0 }, #endif #define osPread64 ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent) { "write", (sqlite3_syscall_ptr)write, 0 }, #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) #if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, #else { "pwrite", (sqlite3_syscall_ptr)0, 0 }, #endif #define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\ aSyscall[12].pCurrent) #if defined(USE_PREAD64) { "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 }, #else { "pwrite64", (sqlite3_syscall_ptr)0, 0 }, #endif #define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\ aSyscall[13].pCurrent) { "fchmod", (sqlite3_syscall_ptr)fchmod, 0 }, #define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent) #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE { "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 }, #else { "fallocate", (sqlite3_syscall_ptr)0, 0 }, #endif #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent) { "unlink", (sqlite3_syscall_ptr)unlink, 0 }, #define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent) { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, #define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 }, #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent) { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, #define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent) #if HAVE_MREMAP { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, #else { "mremap", (sqlite3_syscall_ptr)0, 0 }, #endif #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, #define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent) #endif }; /* End of the overrideable system calls */ /* ** This is the xSetSystemCall() method of sqlite3_vfs for all of the ** "unix" VFSes. Return SQLITE_OK opon successfully updating the ** system call pointer, or SQLITE_NOTFOUND if there is no configurable ** system call named zName. */ static int unixSetSystemCall( sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ const char *zName, /* Name of system call to override */ sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ ){ unsigned int i; int rc = SQLITE_NOTFOUND; UNUSED_PARAMETER(pNotUsed); if( zName==0 ){ /* If no zName is given, restore all system calls to their default ** settings and return NULL */ rc = SQLITE_OK; for(i=0; i=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; osClose(fd); // sqlite3_log(SQLITE_WARNING, // "attempt to open \"%s\" as file descriptor %d", z, fd); fd = -1; if( osOpen("/dev/null", f, m)<0 ) break; } if( fd>=0 ){ if( m!=0 ){ struct stat statbuf; if( osFstat(fd, &statbuf)==0 && statbuf.st_size==0 && (statbuf.st_mode&0777)!=m ){ osFchmod(fd, m); } } #if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); #endif } return fd; } /* ** Helper functions to obtain and relinquish the global mutex. The ** global mutex is used to protect the unixInodeInfo and ** vxworksFileId objects used by this file, all of which may be ** shared by multiple threads. ** ** Function unixMutexHeld() is used to assert() that the global mutex ** is held when required. This function is only used as part of assert() ** statements. e.g. ** ** unixEnterMutex() ** assert( unixMutexHeld() ); ** unixEnterLeave() */ static void unixEnterMutex(void){ sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } static void unixLeaveMutex(void){ sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #ifndef NDEBUG static int unixMutexHeld(void) { return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #endif #ifdef SQLITE_HAVE_OS_TRACE /* ** Helper function for printing out trace information from debugging ** binaries. This returns the string representation of the supplied ** integer lock-type. */ static const char *azFileLock(int eFileLock){ switch( eFileLock ){ case NO_LOCK: return "NONE"; case SHARED_LOCK: return "SHARED"; case RESERVED_LOCK: return "RESERVED"; case PENDING_LOCK: return "PENDING"; case EXCLUSIVE_LOCK: return "EXCLUSIVE"; } return "ERROR"; } #endif #ifdef SQLITE_LOCK_TRACE /* ** Print out information about all locking operations. ** ** This routine is used for troubleshooting locks on multithreaded ** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE ** command-line option on the compiler. This code is normally ** turned off. */ static int lockTrace(int fd, int op, struct flock *p){ char *zOpName, *zType; int s; int savedErrno; if( op==F_GETLK ){ zOpName = "GETLK"; }else if( op==F_SETLK ){ zOpName = "SETLK"; }else{ s = osFcntl(fd, op, p); sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s); return s; } if( p->l_type==F_RDLCK ){ zType = "RDLCK"; }else if( p->l_type==F_WRLCK ){ zType = "WRLCK"; }else if( p->l_type==F_UNLCK ){ zType = "UNLCK"; }else{ assert( 0 ); } assert( p->l_whence==SEEK_SET ); s = osFcntl(fd, op, p); savedErrno = errno; sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, (int)p->l_pid, s); if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ struct flock l2; l2 = *p; osFcntl(fd, F_GETLK, &l2); if( l2.l_type==F_RDLCK ){ zType = "RDLCK"; }else if( l2.l_type==F_WRLCK ){ zType = "WRLCK"; }else if( l2.l_type==F_UNLCK ){ zType = "UNLCK"; }else{ assert( 0 ); } sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n", zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid); } errno = savedErrno; return s; } #undef osFcntl #define osFcntl lockTrace #endif /* SQLITE_LOCK_TRACE */ /* ** Retry ftruncate() calls that fail due to EINTR ** ** All calls to ftruncate() within this file should be made through ** this wrapper. On the Android platform, bypassing the logic below ** could lead to a corrupt database. */ static int robust_ftruncate(int h, sqlite3_int64 sz){ int rc; #ifdef __ANDROID__ /* On Android, ftruncate() always uses 32-bit offsets, even if ** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to ** truncate a file to any size larger than 2GiB. Silently ignore any ** such attempts. */ if( sz>(sqlite3_int64)0x7FFFFFFF ){ rc = SQLITE_OK; }else #endif do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR ); return rc; } /* ** This routine translates a standard POSIX errno code into something ** useful to the clients of the sqlite3 functions. Specifically, it is ** intended to translate a variety of "try again" errors into SQLITE_BUSY ** and a variety of "please close the file descriptor NOW" errors into ** SQLITE_IOERR ** ** Errors during initialization of locks, or file system support for locks, ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. */ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { switch (posixError) { #if 0 /* At one point this code was not commented out. In theory, this branch ** should never be hit, as this function should only be called after ** a locking-related function (i.e. fcntl()) has returned non-zero with ** the value of errno as the first argument. Since a system call has failed, ** errno should be non-zero. ** ** Despite this, if errno really is zero, we still don't want to return ** SQLITE_OK. The system call failed, and *some* SQLite error should be ** propagated back to the caller. Commenting this branch out means errno==0 ** will be handled by the "default:" case below. */ case 0: return SQLITE_OK; #endif case EAGAIN: case ETIMEDOUT: case EBUSY: case EINTR: case ENOLCK: /* random NFS retry error, unless during file system support * introspection, in which it actually means what it says */ return SQLITE_BUSY; case EACCES: /* EACCES is like EAGAIN during locking operations, but not any other time*/ if( (sqliteIOErr == SQLITE_IOERR_LOCK) || (sqliteIOErr == SQLITE_IOERR_UNLOCK) || (sqliteIOErr == SQLITE_IOERR_RDLOCK) || (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){ return SQLITE_BUSY; } /* else fall through */ case EPERM: return SQLITE_PERM; #if EOPNOTSUPP!=ENOTSUP case EOPNOTSUPP: /* something went terribly awry, unless during file system support * introspection, in which it actually means what it says */ #endif #ifdef ENOTSUP case ENOTSUP: /* invalid fd, unless during file system support introspection, in which * it actually means what it says */ #endif case EIO: case EBADF: case EINVAL: case ENOTCONN: case ENODEV: case ENXIO: case ENOENT: #ifdef ESTALE /* ESTALE is not defined on Interix systems */ case ESTALE: #endif case ENOSYS: /* these should force the client to close the file and reconnect */ default: return sqliteIOErr; } } /****************************************************************************** ****************** Begin Unique File ID Utility Used By VxWorks *************** ** ** On most versions of unix, we can get a unique ID for a file by concatenating ** the device number and the inode number. But this does not work on VxWorks. ** On VxWorks, a unique file id must be based on the canonical filename. ** ** A pointer to an instance of the following structure can be used as a ** unique file ID in VxWorks. Each instance of this structure contains ** a copy of the canonical filename. There is also a reference count. ** The structure is reclaimed when the number of pointers to it drops to ** zero. ** ** There are never very many files open at one time and lookups are not ** a performance-critical path, so it is sufficient to put these ** structures on a linked list. */ struct vxworksFileId { struct vxworksFileId *pNext; /* Next in a list of them all */ int nRef; /* Number of references to this one */ int nName; /* Length of the zCanonicalName[] string */ char *zCanonicalName; /* Canonical filename */ }; #if OS_VXWORKS /* ** All unique filenames are held on a linked list headed by this ** variable: */ static struct vxworksFileId *vxworksFileList = 0; /* ** Simplify a filename into its canonical form ** by making the following changes: ** ** * removing any trailing and duplicate / ** * convert /./ into just / ** * convert /A/../ where A is any simple name into just / ** ** Changes are made in-place. Return the new name length. ** ** The original filename is in z[0..n-1]. Return the number of ** characters in the simplified name. */ static int vxworksSimplifyName(char *z, int n){ int i, j; while( n>1 && z[n-1]=='/' ){ n--; } for(i=j=0; i0 && z[j-1]!='/' ){ j--; } if( j>0 ){ j--; } i += 2; continue; } } z[j++] = z[i]; } z[j] = 0; return j; } /* ** Find a unique file ID for the given absolute pathname. Return ** a pointer to the vxworksFileId object. This pointer is the unique ** file ID. ** ** The nRef field of the vxworksFileId object is incremented before ** the object is returned. A new vxworksFileId object is created ** and added to the global list if necessary. ** ** If a memory allocation error occurs, return NULL. */ static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){ struct vxworksFileId *pNew; /* search key and new file ID */ struct vxworksFileId *pCandidate; /* For looping over existing file IDs */ int n; /* Length of zAbsoluteName string */ assert( zAbsoluteName[0]=='/' ); n = (int)strlen(zAbsoluteName); pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) ); if( pNew==0 ) return 0; pNew->zCanonicalName = (char*)&pNew[1]; memcpy(pNew->zCanonicalName, zAbsoluteName, n+1); n = vxworksSimplifyName(pNew->zCanonicalName, n); /* Search for an existing entry that matching the canonical name. ** If found, increment the reference count and return a pointer to ** the existing file ID. */ unixEnterMutex(); for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){ if( pCandidate->nName==n && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0 ){ sqlite3_free(pNew); pCandidate->nRef++; unixLeaveMutex(); return pCandidate; } } /* No match was found. We will make a new file ID */ pNew->nRef = 1; pNew->nName = n; pNew->pNext = vxworksFileList; vxworksFileList = pNew; unixLeaveMutex(); return pNew; } /* ** Decrement the reference count on a vxworksFileId object. Free ** the object when the reference count reaches zero. */ static void vxworksReleaseFileId(struct vxworksFileId *pId){ unixEnterMutex(); assert( pId->nRef>0 ); pId->nRef--; if( pId->nRef==0 ){ struct vxworksFileId **pp; for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){} assert( *pp==pId ); *pp = pId->pNext; sqlite3_free(pId); } unixLeaveMutex(); } #endif /* OS_VXWORKS */ /*************** End of Unique File ID Utility Used By VxWorks **************** ******************************************************************************/ /****************************************************************************** *************************** Posix Advisory Locking **************************** ** ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) ** section 6.5.2.2 lines 483 through 490 specify that when a process ** sets or clears a lock, that operation overrides any prior locks set ** by the same process. It does not explicitly say so, but this implies ** that it overrides locks set by the same process using a different ** file descriptor. Consider this test case: ** ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); ** ** Suppose ./file1 and ./file2 are really the same file (because ** one is a hard or symbolic link to the other) then if you set ** an exclusive lock on fd1, then try to get an exclusive lock ** on fd2, it works. I would have expected the second lock to ** fail since there was already a lock on the file due to fd1. ** But not so. Since both locks came from the same process, the ** second overrides the first, even though they were on different ** file descriptors opened on different file names. ** ** This means that we cannot use POSIX locks to synchronize file access ** among competing threads of the same process. POSIX locks will work fine ** to synchronize access for threads in separate processes, but not ** threads within the same process. ** ** To work around the problem, SQLite has to manage file locks internally ** on its own. Whenever a new database is opened, we have to find the ** specific inode of the database file (the inode is determined by the ** st_dev and st_ino fields of the stat structure that fstat() fills in) ** and check for locks already existing on that inode. When locks are ** created or removed, we have to look at our own internal record of the ** locks to see if another thread has previously set a lock on that same ** inode. ** ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. ** For VxWorks, we have to use the alternative unique ID system based on ** canonical filename and implemented in the previous division.) ** ** The sqlite3_file structure for POSIX is no longer just an integer file ** descriptor. It is now a structure that holds the integer file ** descriptor and a pointer to a structure that describes the internal ** locks on the corresponding inode. There is one locking structure ** per inode, so if the same inode is opened twice, both unixFile structures ** point to the same locking structure. The locking structure keeps ** a reference count (so we will know when to delete it) and a "cnt" ** field that tells us its internal lock status. cnt==0 means the ** file is unlocked. cnt==-1 means the file has an exclusive lock. ** cnt>0 means there are cnt shared locks on the file. ** ** Any attempt to lock or unlock a file first checks the locking ** structure. The fcntl() system call is only invoked to set a ** POSIX lock if the internal lock structure transitions between ** a locked and an unlocked state. ** ** But wait: there are yet more problems with POSIX advisory locks. ** ** If you close a file descriptor that points to a file that has locks, ** all locks on that file that are owned by the current process are ** released. To work around this problem, each unixInodeInfo object ** maintains a count of the number of pending locks on tha inode. ** When an attempt is made to close an unixFile, if there are ** other unixFile open on the same inode that are holding locks, the call ** to close() the file descriptor is deferred until all of the locks clear. ** The unixInodeInfo structure keeps a list of file descriptors that need to ** be closed and that list is walked (and cleared) when the last lock ** clears. ** ** Yet another problem: LinuxThreads do not play well with posix locks. ** ** Many older versions of linux use the LinuxThreads library which is ** not posix compliant. Under LinuxThreads, a lock created by thread ** A cannot be modified or overridden by a different thread B. ** Only thread A can modify the lock. Locking behavior is correct ** if the appliation uses the newer Native Posix Thread Library (NPTL) ** on linux - with NPTL a lock created by thread A can override locks ** in thread B. But there is no way to know at compile-time which ** threading library is being used. So there is no way to know at ** compile-time whether or not thread A can override locks on thread B. ** One has to do a run-time check to discover the behavior of the ** current process. ** ** SQLite used to support LinuxThreads. But support for LinuxThreads ** was dropped beginning with version 3.7.0. SQLite will still work with ** LinuxThreads provided that (1) there is no more than one connection ** per database file in the same process and (2) database connections ** do not move across threads. */ /* ** An instance of the following structure serves as the key used ** to locate a particular unixInodeInfo object. */ struct unixFileId { dev_t dev; /* Device number */ #if OS_VXWORKS struct vxworksFileId *pId; /* Unique file ID for vxworks. */ #else ino_t ino; /* Inode number */ #endif }; /* ** An instance of the following structure is allocated for each open ** inode. Or, on LinuxThreads, there is one of these structures for ** each inode opened by each thread. ** ** A single inode can have multiple file descriptors, so each unixFile ** structure contains a pointer to an instance of this object and this ** object keeps a count of the number of unixFile pointing to it. */ struct unixInodeInfo { struct unixFileId fileId; /* The lookup key */ int nShared; /* Number of SHARED locks held */ unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ unsigned char bProcessLock; /* An exclusive process lock is held */ int nRef; /* Number of pointers to this structure */ unixShmNode *pShmNode; /* Shared memory associated with this inode */ int nLock; /* Number of outstanding file locks */ UnixUnusedFd *pUnused; /* Unused file descriptors to close */ unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ unixInodeInfo *pPrev; /* .... doubly linked */ #if SQLITE_ENABLE_LOCKING_STYLE unsigned long long sharedByte; /* for AFP simulated shared lock */ #endif #if OS_VXWORKS sem_t *pSem; /* Named POSIX semaphore */ char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ #endif }; /* ** A lists of all unixInodeInfo objects. */ static unixInodeInfo *inodeList = 0; /* ** ** This function - unixLogError_x(), is only ever called via the macro ** unixLogError(). ** ** It is invoked after an error occurs in an OS function and errno has been ** set. It logs a message using sqlite3_log() containing the current value of ** errno and, if possible, the human-readable equivalent from strerror() or ** strerror_r(). ** ** The first argument passed to the macro should be the error code that ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). ** The two subsequent arguments should be the name of the OS function that ** failed (e.g. "unlink", "open") and the associated file-system path, ** if any. */ #define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__) static int unixLogErrorAtLine( int errcode, /* SQLite error code */ const char *zFunc, /* Name of OS function that failed */ const char *zPath, /* File path associated with error */ int iLine /* Source line number where error occurred */ ){ char *zErr; /* Message from strerror() or equivalent */ int iErrno = errno; /* Saved syscall error number */ /* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use ** the strerror() function to obtain the human-readable error message ** equivalent to errno. Otherwise, use strerror_r(). */ #if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R) char aErr[80]; memset(aErr, 0, sizeof(aErr)); zErr = aErr; /* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined, ** assume that the system provides the GNU version of strerror_r() that ** returns a pointer to a buffer containing the error message. That pointer ** may point to aErr[], or it may point to some static storage somewhere. ** Otherwise, assume that the system provides the POSIX version of ** strerror_r(), which always writes an error message into aErr[]. ** ** If the code incorrectly assumes that it is the POSIX version that is ** available, the error message will often be an empty string. Not a ** huge problem. Incorrectly concluding that the GNU version is available ** could lead to a segfault though. */ #if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU) zErr = # endif strerror_r(iErrno, aErr, sizeof(aErr)-1); #elif SQLITE_THREADSAFE /* This is a threadsafe build, but strerror_r() is not available. */ zErr = ""; #else /* Non-threadsafe build, use strerror(). */ zErr = strerror(iErrno); #endif if( zPath==0 ) zPath = ""; sqlite3_log(errcode, "os_unix.c:%d: (%d) %s(%s) - %s", iLine, iErrno, zFunc, zPath, zErr ); return errcode; } /* ** Close a file descriptor. ** ** We assume that close() almost always works, since it is only in a ** very sick application or on a very sick platform that it might fail. ** If it does fail, simply leak the file descriptor, but do log the ** error. ** ** Note that it is not safe to retry close() after EINTR since the ** file descriptor might have already been reused by another thread. ** So we don't even try to recover from an EINTR. Just log the error ** and move on. */ static void robust_close(unixFile *pFile, int h, int lineno){ if( osClose(h) ){ unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close", pFile ? pFile->zPath : 0, lineno); } } /* ** Set the pFile->lastErrno. Do this in a subroutine as that provides ** a convenient place to set a breakpoint. */ static void storeLastErrno(unixFile *pFile, int error){ pFile->lastErrno = error; } /* ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. */ static void closePendingFds(unixFile *pFile){ unixInodeInfo *pInode = pFile->pInode; UnixUnusedFd *p; UnixUnusedFd *pNext; for(p=pInode->pUnused; p; p=pNext){ pNext = p->pNext; robust_close(pFile, p->fd, __LINE__); sqlite3_free(p); } pInode->pUnused = 0; } /* ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). ** ** The mutex entered using the unixEnterMutex() function must be held ** when this function is called. */ static void releaseInodeInfo(unixFile *pFile){ unixInodeInfo *pInode = pFile->pInode; assert( unixMutexHeld() ); if( ALWAYS(pInode) ){ pInode->nRef--; if( pInode->nRef==0 ){ assert( pInode->pShmNode==0 ); closePendingFds(pFile); if( pInode->pPrev ){ assert( pInode->pPrev->pNext==pInode ); pInode->pPrev->pNext = pInode->pNext; }else{ assert( inodeList==pInode ); inodeList = pInode->pNext; } if( pInode->pNext ){ assert( pInode->pNext->pPrev==pInode ); pInode->pNext->pPrev = pInode->pPrev; } sqlite3_free(pInode); } } } /* ** Given a file descriptor, locate the unixInodeInfo object that ** describes that file descriptor. Create a new one if necessary. The ** return value might be uninitialized if an error occurs. ** ** The mutex entered using the unixEnterMutex() function must be held ** when this function is called. ** ** Return an appropriate error code. */ static int findInodeInfo( unixFile *pFile, /* Unix file with file desc used in the key */ unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ ){ int rc; /* System call return code */ int fd; /* The file descriptor for pFile */ struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ struct stat statbuf; /* Low-level file information */ unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ assert( unixMutexHeld() ); /* Get low-level information about the file that we can used to ** create a unique name for the file. */ fd = pFile->h; rc = osFstat(fd, &statbuf); if( rc!=0 ){ storeLastErrno(pFile, errno); #ifdef EOVERFLOW if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS; #endif return SQLITE_IOERR; } #ifdef __APPLE__ /* On OS X on an msdos filesystem, the inode number is reported ** incorrectly for zero-size files. See ticket #3260. To work ** around this problem (we consider it a bug in OS X, not SQLite) ** we always increase the file size to 1 by writing a single byte ** prior to accessing the inode number. The one byte written is ** an ASCII 'S' character which also happens to be the first byte ** in the header of every SQLite database. In this way, if there ** is a race condition such that another thread has already populated ** the first page of the database, no damage is done. */ if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){ do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR ); if( rc!=1 ){ storeLastErrno(pFile, errno); return SQLITE_IOERR; } rc = osFstat(fd, &statbuf); if( rc!=0 ){ storeLastErrno(pFile, errno); return SQLITE_IOERR; } } #endif memset(&fileId, 0, sizeof(fileId)); fileId.dev = statbuf.st_dev; #if OS_VXWORKS fileId.pId = pFile->pId; #else fileId.ino = statbuf.st_ino; #endif pInode = inodeList; while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){ pInode = pInode->pNext; } if( pInode==0 ){ pInode = sqlite3_malloc64( sizeof(*pInode) ); if( pInode==0 ){ return SQLITE_NOMEM; } memset(pInode, 0, sizeof(*pInode)); memcpy(&pInode->fileId, &fileId, sizeof(fileId)); pInode->nRef = 1; pInode->pNext = inodeList; pInode->pPrev = 0; if( inodeList ) inodeList->pPrev = pInode; inodeList = pInode; }else{ pInode->nRef++; } *ppInode = pInode; return SQLITE_OK; } /* ** Return TRUE if pFile has been renamed or unlinked since it was first opened. */ static int fileHasMoved(unixFile *pFile){ #if OS_VXWORKS return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; #else struct stat buf; return pFile->pInode!=0 && (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino); #endif } /* ** Check a unixFile that is a database. Verify the following: ** ** (1) There is exactly one hard link on the file ** (2) The file is not a symbolic link ** (3) The file has not been renamed or unlinked ** ** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right. */ static void verifyDbFile(unixFile *pFile){ struct stat buf; int rc; if( pFile->ctrlFlags & UNIXFILE_WARNED ){ /* One or more of the following warnings have already been issued. Do not ** repeat them so as not to clutter the error log */ return; } rc = osFstat(pFile->h, &buf); if( rc!=0 ){ // sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath); pFile->ctrlFlags |= UNIXFILE_WARNED; return; } if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){ // sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath); pFile->ctrlFlags |= UNIXFILE_WARNED; return; } if( buf.st_nlink>1 ){ // sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath); pFile->ctrlFlags |= UNIXFILE_WARNED; return; } if( fileHasMoved(pFile) ){ // sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); pFile->ctrlFlags |= UNIXFILE_WARNED; return; } } /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); unixEnterMutex(); /* Because pFile->pInode is shared across threads */ /* Check if a thread in this process holds such a lock */ if( pFile->pInode->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. */ #ifndef __DJGPP__ if( !reserved && !pFile->pInode->bProcessLock ){ struct flock lock; lock.l_whence = SEEK_SET; lock.l_start = RESERVED_BYTE; lock.l_len = 1; lock.l_type = F_WRLCK; if( osFcntl(pFile->h, F_GETLK, &lock) ){ rc = SQLITE_IOERR_CHECKRESERVEDLOCK; storeLastErrno(pFile, errno); } else if( lock.l_type!=F_UNLCK ){ reserved = 1; } } #endif unixLeaveMutex(); OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ** Attempt to set a system-lock on the file pFile. The lock is ** described by pLock. ** ** If the pFile was opened read/write from unix-excl, then the only lock ** ever obtained is an exclusive lock, and it is obtained exactly once ** the first time any lock is attempted. All subsequent system locking ** operations become no-ops. Locking operations still happen internally, ** in order to coordinate access between separate database connections ** within this process, but all of that is handled in memory and the ** operating system does not participate. ** ** This function is a pass-through to fcntl(F_SETLK) if pFile is using ** any VFS other than "unix-excl" or if pFile is opened on "unix-excl" ** and is read-only. ** ** Zero is returned if the call completes successfully, or -1 if a call ** to fcntl() fails. In this case, errno is set appropriately (by fcntl()). */ static int unixFileLock(unixFile *pFile, struct flock *pLock){ int rc; unixInodeInfo *pInode = pFile->pInode; assert( unixMutexHeld() ); assert( pInode!=0 ); if( ((pFile->ctrlFlags & UNIXFILE_EXCL)!=0 || pInode->bProcessLock) && ((pFile->ctrlFlags & UNIXFILE_RDONLY)==0) ){ if( pInode->bProcessLock==0 ){ struct flock lock; assert( pInode->nLock==0 ); lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; lock.l_type = F_WRLCK; rc = osFcntl(pFile->h, F_SETLK, &lock); if( rc<0 ) return rc; pInode->bProcessLock = 1; pInode->nLock++; }else{ rc = 0; } }else{ rc = osFcntl(pFile->h, F_SETLK, pLock); } return rc; } /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** This routine will only increase a lock. Use the sqlite3OsUnlock() ** routine to lower a locking level. */ static int unixLock(sqlite3_file *id, int eFileLock){ /* The following describes the implementation of the various locks and ** lock transitions in terms of the POSIX advisory shared and exclusive ** lock primitives (called read-locks and write-locks below, to avoid ** confusion with SQLite lock names). The algorithms are complicated ** slightly in order to be compatible with windows systems simultaneously ** accessing the same database file, in case that is ever required. ** ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved ** byte', each single bytes at well known offsets, and the 'shared byte ** range', a range of 510 bytes at a well known offset. ** ** To obtain a SHARED lock, a read-lock is obtained on the 'pending ** byte'. If this is successful, a random byte from the 'shared byte ** range' is read-locked and the lock on the 'pending byte' released. ** ** A process may only obtain a RESERVED lock after it has a SHARED lock. ** A RESERVED lock is implemented by grabbing a write-lock on the ** 'reserved byte'. ** ** A process may only obtain a PENDING lock after it has obtained a ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock ** on the 'pending byte'. This ensures that no new SHARED locks can be ** obtained, but existing SHARED locks are allowed to persist. A process ** does not have to obtain a RESERVED lock on the way to a PENDING lock. ** This property is used by the algorithm for rolling back a journal file ** after a crash. ** ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is ** implemented by obtaining a write-lock on the entire 'shared byte ** range'. Since all other locks require a read-lock on one of the bytes ** within this range, this ensures that no other locks are held on the ** database. ** ** The reason a single byte cannot be used instead of the 'shared byte ** range' is that some versions of windows do not support read-locks. By ** locking a random byte from a range, concurrent SHARED locks may exist ** even if the locking primitive used is always a write-lock. */ int rc = SQLITE_OK; unixFile *pFile = (unixFile*)id; unixInodeInfo *pInode; struct flock lock; int tErrno = 0; assert( pFile ); OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h, azFileLock(eFileLock), azFileLock(pFile->eFileLock), azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared, osGetpid(0))); /* If there is already a lock of this type or more restrictive on the ** unixFile, do nothing. Don't use the end_lock: exit path, as ** unixEnterMutex() hasn't been called yet. */ if( pFile->eFileLock>=eFileLock ){ OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h, azFileLock(eFileLock))); return SQLITE_OK; } /* Make sure the locking sequence is correct. ** (1) We never move from unlocked to anything higher than shared lock. ** (2) SQLite never explicitly requests a pendig lock. ** (3) A shared lock is always held when a reserve lock is requested. */ assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); assert( eFileLock!=PENDING_LOCK ); assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); /* This mutex is needed because pFile->pInode is shared across threads */ unixEnterMutex(); pInode = pFile->pInode; /* If some thread using this PID has a lock via a different unixFile* ** handle that precludes the requested lock, return BUSY. */ if( (pFile->eFileLock!=pInode->eFileLock && (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) ){ rc = SQLITE_BUSY; goto end_lock; } /* If a SHARED lock is requested, and some thread using this PID already ** has a SHARED or RESERVED lock, then increment reference counts and ** return SQLITE_OK. */ if( eFileLock==SHARED_LOCK && (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ assert( eFileLock==SHARED_LOCK ); assert( pFile->eFileLock==0 ); assert( pInode->nShared>0 ); pFile->eFileLock = SHARED_LOCK; pInode->nShared++; pInode->nLock++; goto end_lock; } /* A PENDING lock is needed before acquiring a SHARED lock and before ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will ** be released. */ lock.l_len = 1L; lock.l_whence = SEEK_SET; if( eFileLock==SHARED_LOCK || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLocknShared==0 ); assert( pInode->eFileLock==0 ); assert( rc==SQLITE_OK ); /* Now get the read-lock */ lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; if( unixFileLock(pFile, &lock) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); } /* Drop the temporary PENDING lock */ lock.l_start = PENDING_BYTE; lock.l_len = 1L; lock.l_type = F_UNLCK; if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){ /* This could happen with a network mount */ tErrno = errno; rc = SQLITE_IOERR_UNLOCK; } if( rc ){ if( rc!=SQLITE_BUSY ){ storeLastErrno(pFile, tErrno); } goto end_lock; }else{ pFile->eFileLock = SHARED_LOCK; pInode->nLock++; pInode->nShared = 1; } }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ /* We are trying for an exclusive lock but another thread in this ** same process is still holding a shared lock. */ rc = SQLITE_BUSY; }else{ /* The request was for a RESERVED or EXCLUSIVE lock. It is ** assumed that there is a SHARED or greater lock on the file ** already. */ assert( 0!=pFile->eFileLock ); lock.l_type = F_WRLCK; assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK ); if( eFileLock==RESERVED_LOCK ){ lock.l_start = RESERVED_BYTE; lock.l_len = 1L; }else{ lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; } if( unixFileLock(pFile, &lock) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( rc!=SQLITE_BUSY ){ storeLastErrno(pFile, tErrno); } } } #ifdef SQLITE_DEBUG /* Set up the transaction-counter change checking flags when ** transitioning from a SHARED to a RESERVED lock. The change ** from SHARED to RESERVED marks the beginning of a normal ** write operation (not a hot journal rollback). */ if( rc==SQLITE_OK && pFile->eFileLock<=SHARED_LOCK && eFileLock==RESERVED_LOCK ){ pFile->transCntrChng = 0; pFile->dbUpdate = 0; pFile->inNormalWrite = 1; } #endif if( rc==SQLITE_OK ){ pFile->eFileLock = eFileLock; pInode->eFileLock = eFileLock; }else if( eFileLock==EXCLUSIVE_LOCK ){ pFile->eFileLock = PENDING_LOCK; pInode->eFileLock = PENDING_LOCK; } end_lock: unixLeaveMutex(); OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); return rc; } /* ** Add the file descriptor used by file handle pFile to the corresponding ** pUnused list. */ static void setPendingFd(unixFile *pFile){ unixInodeInfo *pInode = pFile->pInode; UnixUnusedFd *p = pFile->pUnused; p->pNext = pInode->pUnused; pInode->pUnused = p; pFile->h = -1; pFile->pUnused = 0; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. ** ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED ** the byte range is divided into 2 parts and the first part is unlocked then ** set to a read lock, then the other part is simply unlocked. This works ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to ** remove the write lock on a region when a read lock is set. */ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){ unixFile *pFile = (unixFile*)id; unixInodeInfo *pInode; struct flock lock; int rc = SQLITE_OK; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock, pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); if( pFile->eFileLock<=eFileLock ){ return SQLITE_OK; } unixEnterMutex(); pInode = pFile->pInode; assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); #ifdef SQLITE_DEBUG /* When reducing a lock such that other processes can start ** reading the database file again, make sure that the ** transaction counter was updated if any part of the database ** file changed. If the transaction counter is not updated, ** other connections to the same file might not realize that ** the file has changed and hence might not know to flush their ** cache. The use of a stale cache can lead to database corruption. */ pFile->inNormalWrite = 0; #endif /* downgrading to a shared lock on NFS involves clearing the write lock ** before establishing the readlock - to avoid a race condition we downgrade ** the lock in 2 blocks, so that part of the range will be covered by a ** write lock until the rest is covered by a read lock: ** 1: [WWWWW] ** 2: [....W] ** 3: [RRRRW] ** 4: [RRRR.] */ if( eFileLock==SHARED_LOCK ){ #if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE (void)handleNFSUnlock; assert( handleNFSUnlock==0 ); #endif #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE if( handleNFSUnlock ){ int tErrno; /* Error code from system call errors */ off_t divSize = SHARED_SIZE - 1; lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = divSize; if( unixFileLock(pFile, &lock)==(-1) ){ tErrno = errno; rc = SQLITE_IOERR_UNLOCK; if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } goto end_unlock; } lock.l_type = F_RDLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = divSize; if( unixFileLock(pFile, &lock)==(-1) ){ tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK); if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } goto end_unlock; } lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST+divSize; lock.l_len = SHARED_SIZE-divSize; if( unixFileLock(pFile, &lock)==(-1) ){ tErrno = errno; rc = SQLITE_IOERR_UNLOCK; if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } goto end_unlock; } }else #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ { lock.l_type = F_RDLCK; lock.l_whence = SEEK_SET; lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; if( unixFileLock(pFile, &lock) ){ /* In theory, the call to unixFileLock() cannot fail because another ** process is holding an incompatible lock. If it does, this ** indicates that the other process is not following the locking ** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning ** SQLITE_BUSY would confuse the upper layer (in practice it causes ** an assert to fail). */ rc = SQLITE_IOERR_RDLOCK; storeLastErrno(pFile, errno); goto end_unlock; } } } lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = PENDING_BYTE; lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE ); if( unixFileLock(pFile, &lock)==0 ){ pInode->eFileLock = SHARED_LOCK; }else{ rc = SQLITE_IOERR_UNLOCK; storeLastErrno(pFile, errno); goto end_unlock; } } if( eFileLock==NO_LOCK ){ /* Decrement the shared lock counter. Release the lock using an ** OS call only when all threads in this same process have released ** the lock. */ pInode->nShared--; if( pInode->nShared==0 ){ lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = lock.l_len = 0L; if( unixFileLock(pFile, &lock)==0 ){ pInode->eFileLock = NO_LOCK; }else{ rc = SQLITE_IOERR_UNLOCK; storeLastErrno(pFile, errno); pInode->eFileLock = NO_LOCK; pFile->eFileLock = NO_LOCK; } } /* Decrement the count of locks against this same file. When the ** count reaches zero, close any other file descriptors whose close ** was deferred because of outstanding locks. */ pInode->nLock--; assert( pInode->nLock>=0 ); if( pInode->nLock==0 ){ closePendingFds(pFile); } } end_unlock: unixLeaveMutex(); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int eFileLock){ #if SQLITE_MAX_MMAP_SIZE>0 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); #endif return posixUnlock(id, eFileLock, 0); } #if SQLITE_MAX_MMAP_SIZE>0 static int unixMapfile(unixFile *pFd, i64 nByte); static void unixUnmapfile(unixFile *pFd); #endif /* ** This function performs the parts of the "close file" operation ** common to all locking schemes. It closes the directory and file ** handles, if they are valid, and sets all fields of the unixFile ** structure to 0. ** ** It is *not* necessary to hold the mutex when this routine is called, ** even on VxWorks. A mutex will be acquired on VxWorks by the ** vxworksReleaseFileId() routine. */ static int closeUnixFile(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; #if SQLITE_MAX_MMAP_SIZE>0 unixUnmapfile(pFile); #endif if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; } #if OS_VXWORKS if( pFile->pId ){ if( pFile->ctrlFlags & UNIXFILE_DELETE ){ osUnlink(pFile->pId->zCanonicalName); } vxworksReleaseFileId(pFile->pId); pFile->pId = 0; } #endif #ifdef SQLITE_UNLINK_AFTER_CLOSE if( pFile->ctrlFlags & UNIXFILE_DELETE ){ osUnlink(pFile->zPath); sqlite3_free(*(char**)&pFile->zPath); pFile->zPath = 0; } #endif OSTRACE(("CLOSE %-3d\n", pFile->h)); OpenCounter(-1); sqlite3_free(pFile->pUnused); memset(pFile, 0, sizeof(unixFile)); return SQLITE_OK; } /* ** Close a file. */ static int unixClose(sqlite3_file *id){ int rc = SQLITE_OK; unixFile *pFile = (unixFile *)id; verifyDbFile(pFile); unixUnlock(id, NO_LOCK); unixEnterMutex(); /* unixFile.pInode is always valid here. Otherwise, a different close ** routine (e.g. nolockClose()) would be called instead. */ assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 ); if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){ /* If there are outstanding locks, do not actually close the file just ** yet because that would clear those locks. Instead, add the file ** descriptor to pInode->pUnused list. It will be automatically closed ** when the last lock is cleared. */ setPendingFd(pFile); } releaseInodeInfo(pFile); rc = closeUnixFile(id); unixLeaveMutex(); return rc; } /************** End of the posix advisory lock implementation ***************** ******************************************************************************/ /****************************************************************************** ****************************** No-op Locking ********************************** ** ** Of the various locking implementations available, this is by far the ** simplest: locking is ignored. No attempt is made to lock the database ** file for reading or writing. ** ** This locking mode is appropriate for use on read-only databases ** (ex: databases that are burned into CD-ROM, for example.) It can ** also be used if the application employs some external mechanism to ** prevent simultaneous access of the same database by two or more ** database connections. But there is a serious risk of database ** corruption if this locking mode is used in situations where multiple ** database connections are accessing the same database file at the same ** time and one or more of those connections are writing. */ static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){ UNUSED_PARAMETER(NotUsed); *pResOut = 0; return SQLITE_OK; } static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){ UNUSED_PARAMETER2(NotUsed, NotUsed2); return SQLITE_OK; } static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ UNUSED_PARAMETER2(NotUsed, NotUsed2); return SQLITE_OK; } /* ** Close the file. */ static int nolockClose(sqlite3_file *id) { return closeUnixFile(id); } /******************* End of the no-op lock implementation ********************* ******************************************************************************/ /****************************************************************************** ************************* Begin dot-file Locking ****************************** ** ** The dotfile locking implementation uses the existence of separate lock ** files (really a directory) to control access to the database. This works ** on just about every filesystem imaginable. But there are serious downsides: ** ** (1) There is zero concurrency. A single reader blocks all other ** connections from reading or writing the database. ** ** (2) An application crash or power loss can leave stale lock files ** sitting around that need to be cleared manually. ** ** Nevertheless, a dotlock is an appropriate locking mode for use if no ** other locking strategy is available. ** ** Dotfile locking works by creating a subdirectory in the same directory as ** the database and with the same name but with a ".lock" extension added. ** The existence of a lock directory implies an EXCLUSIVE lock. All other ** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. */ /* ** The file suffix added to the data base filename in order to create the ** lock directory. */ #define DOTLOCK_SUFFIX ".lock" /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. ** ** In dotfile locking, either a lock exists or it does not. So in this ** variation of CheckReservedLock(), *pResOut is set to true if any lock ** is held on the file and false if the file is unlocked. */ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); /* Check if a thread in this process holds such a lock */ if( pFile->eFileLock>SHARED_LOCK ){ /* Either this connection or some other connection in the same process ** holds a lock on the file. No need to check further. */ reserved = 1; }else{ /* The lock is held if and only if the lockfile exists */ const char *zLockFile = (const char*)pFile->lockingContext; reserved = osAccess(zLockFile, 0)==0; } OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** This routine will only increase a lock. Use the sqlite3OsUnlock() ** routine to lower a locking level. ** ** With dotfile locking, we really only support state (4): EXCLUSIVE. ** But we track the other locking levels internally. */ static int dotlockLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; char *zLockFile = (char *)pFile->lockingContext; int rc = SQLITE_OK; /* If we have any lock, then the lock file already exists. All we have ** to do is adjust our internal record of the lock level. */ if( pFile->eFileLock > NO_LOCK ){ pFile->eFileLock = eFileLock; /* Always update the timestamp on the old file */ #ifdef HAVE_UTIME utime(zLockFile, NULL); #else utimes(zLockFile, NULL); #endif return SQLITE_OK; } /* grab an exclusive lock */ rc = osMkdir(zLockFile, 0777); if( rc<0 ){ /* failed to open/create the lock directory */ int tErrno = errno; if( EEXIST == tErrno ){ rc = SQLITE_BUSY; } else { rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } } return rc; } /* got it, set the type and return ok */ pFile->eFileLock = eFileLock; return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. ** ** When the locking level reaches NO_LOCK, delete the lock file. */ static int dotlockUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; char *zLockFile = (char *)pFile->lockingContext; int rc; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, pFile->eFileLock, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); /* no-op if possible */ if( pFile->eFileLock==eFileLock ){ return SQLITE_OK; } /* To downgrade to shared, simply update our internal notion of the ** lock state. No need to mess with the file on disk. */ if( eFileLock==SHARED_LOCK ){ pFile->eFileLock = SHARED_LOCK; return SQLITE_OK; } /* To fully unlock the database, delete the lock file */ assert( eFileLock==NO_LOCK ); rc = osRmdir(zLockFile); if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile); if( rc<0 ){ int tErrno = errno; rc = 0; if( ENOENT != tErrno ){ rc = SQLITE_IOERR_UNLOCK; } if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } return rc; } pFile->eFileLock = NO_LOCK; return SQLITE_OK; } /* ** Close a file. Make sure the lock has been released before closing. */ static int dotlockClose(sqlite3_file *id) { int rc = SQLITE_OK; if( id ){ unixFile *pFile = (unixFile*)id; dotlockUnlock(id, NO_LOCK); sqlite3_free(pFile->lockingContext); rc = closeUnixFile(id); } return rc; } /****************** End of the dot-file lock implementation ******************* ******************************************************************************/ /****************************************************************************** ************************** Begin flock Locking ******************************** ** ** Use the flock() system call to do file locking. ** ** flock() locking is like dot-file locking in that the various ** fine-grain locking levels supported by SQLite are collapsed into ** a single exclusive lock. In other words, SHARED, RESERVED, and ** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite ** still works when you do this, but concurrency is reduced since ** only a single process can be reading the database at a time. ** ** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off */ #if SQLITE_ENABLE_LOCKING_STYLE /* ** Retry flock() calls that fail with EINTR */ #ifdef EINTR static int robust_flock(int fd, int op){ int rc; do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR ); return rc; } #else # define robust_flock(a,b) flock(a,b) #endif /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); /* Check if a thread in this process holds such a lock */ if( pFile->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. */ if( !reserved ){ /* attempt to get the lock */ int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); if( !lrc ){ /* got the lock, unlock it */ lrc = robust_flock(pFile->h, LOCK_UN); if ( lrc ) { int tErrno = errno; /* unlock failed with an error */ lrc = SQLITE_IOERR_UNLOCK; if( IS_LOCK_ERROR(lrc) ){ storeLastErrno(pFile, tErrno); rc = lrc; } } } else { int tErrno = errno; reserved = 1; /* someone else might have it reserved */ lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(lrc) ){ storeLastErrno(pFile, tErrno); rc = lrc; } } } OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ rc = SQLITE_OK; reserved=1; } #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ *pResOut = reserved; return rc; } /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** flock() only really support EXCLUSIVE locks. We track intermediate ** lock states in the sqlite3_file structure, but all locks SHARED or ** above are really EXCLUSIVE locks and exclude all other processes from ** access the file. ** ** This routine will only increase a lock. Use the sqlite3OsUnlock() ** routine to lower a locking level. */ static int flockLock(sqlite3_file *id, int eFileLock) { int rc = SQLITE_OK; unixFile *pFile = (unixFile*)id; assert( pFile ); /* if we already have a lock, it is exclusive. ** Just adjust level and punt on outta here. */ if (pFile->eFileLock > NO_LOCK) { pFile->eFileLock = eFileLock; return SQLITE_OK; } /* grab an exclusive lock */ if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) { int tErrno = errno; /* didn't get, must be busy */ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } } else { /* got it, set the type and return ok */ pFile->eFileLock = eFileLock; } OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){ rc = SQLITE_BUSY; } #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int flockUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock, pFile->eFileLock, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); /* no-op if possible */ if( pFile->eFileLock==eFileLock ){ return SQLITE_OK; } /* shared can just be set because we always have an exclusive */ if (eFileLock==SHARED_LOCK) { pFile->eFileLock = eFileLock; return SQLITE_OK; } /* no, really, unlock. */ if( robust_flock(pFile->h, LOCK_UN) ){ #ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS return SQLITE_OK; #endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ return SQLITE_IOERR_UNLOCK; }else{ pFile->eFileLock = NO_LOCK; return SQLITE_OK; } } /* ** Close a file. */ static int flockClose(sqlite3_file *id) { int rc = SQLITE_OK; if( id ){ flockUnlock(id, NO_LOCK); rc = closeUnixFile(id); } return rc; } #endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */ /******************* End of the flock lock implementation ********************* ******************************************************************************/ /****************************************************************************** ************************ Begin Named Semaphore Locking ************************ ** ** Named semaphore locking is only supported on VxWorks. ** ** Semaphore locking is like dot-lock and flock in that it really only ** supports EXCLUSIVE locking. Only a single process can read or write ** the database file at a time. This reduces potential concurrency, but ** makes the lock implementation much easier. */ #if OS_VXWORKS /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) { int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); /* Check if a thread in this process holds such a lock */ if( pFile->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. */ if( !reserved ){ sem_t *pSem = pFile->pInode->pSem; if( sem_trywait(pSem)==-1 ){ int tErrno = errno; if( EAGAIN != tErrno ){ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK); storeLastErrno(pFile, tErrno); } else { /* someone else has the lock when we are in NO_LOCK */ reserved = (pFile->eFileLock < SHARED_LOCK); } }else{ /* we could have it if we want it */ sem_post(pSem); } } OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** Semaphore locks only really support EXCLUSIVE locks. We track intermediate ** lock states in the sqlite3_file structure, but all locks SHARED or ** above are really EXCLUSIVE locks and exclude all other processes from ** access the file. ** ** This routine will only increase a lock. Use the sqlite3OsUnlock() ** routine to lower a locking level. */ static int semXLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; sem_t *pSem = pFile->pInode->pSem; int rc = SQLITE_OK; /* if we already have a lock, it is exclusive. ** Just adjust level and punt on outta here. */ if (pFile->eFileLock > NO_LOCK) { pFile->eFileLock = eFileLock; rc = SQLITE_OK; goto sem_end_lock; } /* lock semaphore now but bail out when already locked. */ if( sem_trywait(pSem)==-1 ){ rc = SQLITE_BUSY; goto sem_end_lock; } /* got it, set the type and return ok */ pFile->eFileLock = eFileLock; sem_end_lock: return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int semXUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; sem_t *pSem = pFile->pInode->pSem; assert( pFile ); assert( pSem ); OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock, pFile->eFileLock, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); /* no-op if possible */ if( pFile->eFileLock==eFileLock ){ return SQLITE_OK; } /* shared can just be set because we always have an exclusive */ if (eFileLock==SHARED_LOCK) { pFile->eFileLock = eFileLock; return SQLITE_OK; } /* no, really unlock. */ if ( sem_post(pSem)==-1 ) { int rc, tErrno = errno; rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } return rc; } pFile->eFileLock = NO_LOCK; return SQLITE_OK; } /* ** Close a file. */ static int semXClose(sqlite3_file *id) { if( id ){ unixFile *pFile = (unixFile*)id; semXUnlock(id, NO_LOCK); assert( pFile ); unixEnterMutex(); releaseInodeInfo(pFile); unixLeaveMutex(); closeUnixFile(id); } return SQLITE_OK; } #endif /* OS_VXWORKS */ /* ** Named semaphore locking is only available on VxWorks. ** *************** End of the named semaphore lock implementation **************** ******************************************************************************/ /****************************************************************************** *************************** Begin AFP Locking ********************************* ** ** AFP is the Apple Filing Protocol. AFP is a network filesystem found ** on Apple Macintosh computers - both OS9 and OSX. ** ** Third-party implementations of AFP are available. But this code here ** only works on OSX. */ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE /* ** The afpLockingContext structure contains all afp lock specific state */ typedef struct afpLockingContext afpLockingContext; struct afpLockingContext { int reserved; const char *dbPath; /* Name of the open file */ }; struct ByteRangeLockPB2 { unsigned long long offset; /* offset to first byte to lock */ unsigned long long length; /* nbr of bytes to lock */ unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */ unsigned char unLockFlag; /* 1 = unlock, 0 = lock */ unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */ int fd; /* file desc to assoc this lock with */ }; #define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2) /* ** This is a utility for setting or clearing a bit-range lock on an ** AFP filesystem. ** ** Return SQLITE_OK on success, SQLITE_BUSY on failure. */ static int afpSetLock( const char *path, /* Name of the file to be locked or unlocked */ unixFile *pFile, /* Open file descriptor on path */ unsigned long long offset, /* First byte to be locked */ unsigned long long length, /* Number of bytes to lock */ int setLockFlag /* True to set lock. False to clear lock */ ){ struct ByteRangeLockPB2 pb; int err; pb.unLockFlag = setLockFlag ? 0 : 1; pb.startEndFlag = 0; pb.offset = offset; pb.length = length; pb.fd = pFile->h; OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), offset, length)); err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0); if ( err==-1 ) { int rc; int tErrno = errno; OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno))); #ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS rc = SQLITE_BUSY; #else rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); #endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */ if( IS_LOCK_ERROR(rc) ){ storeLastErrno(pFile, tErrno); } return rc; } else { return SQLITE_OK; } } /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){ int rc = SQLITE_OK; int reserved = 0; unixFile *pFile = (unixFile*)id; afpLockingContext *context; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); context = (afpLockingContext *) pFile->lockingContext; if( context->reserved ){ *pResOut = 1; return SQLITE_OK; } unixEnterMutex(); /* Because pFile->pInode is shared across threads */ /* Check if a thread in this process holds such a lock */ if( pFile->pInode->eFileLock>SHARED_LOCK ){ reserved = 1; } /* Otherwise see if some other process holds it. */ if( !reserved ){ /* lock the RESERVED byte */ int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); if( SQLITE_OK==lrc ){ /* if we succeeded in taking the reserved lock, unlock it to restore ** the original state */ lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); } else { /* if we failed to get the lock then someone else must have it */ reserved = 1; } if( IS_LOCK_ERROR(lrc) ){ rc=lrc; } } unixLeaveMutex(); OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved)); *pResOut = reserved; return rc; } /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** This routine will only increase a lock. Use the sqlite3OsUnlock() ** routine to lower a locking level. */ static int afpLock(sqlite3_file *id, int eFileLock){ int rc = SQLITE_OK; unixFile *pFile = (unixFile*)id; unixInodeInfo *pInode = pFile->pInode; afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; assert( pFile ); OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h, azFileLock(eFileLock), azFileLock(pFile->eFileLock), azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0))); /* If there is already a lock of this type or more restrictive on the ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as ** unixEnterMutex() hasn't been called yet. */ if( pFile->eFileLock>=eFileLock ){ OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h, azFileLock(eFileLock))); return SQLITE_OK; } /* Make sure the locking sequence is correct ** (1) We never move from unlocked to anything higher than shared lock. ** (2) SQLite never explicitly requests a pendig lock. ** (3) A shared lock is always held when a reserve lock is requested. */ assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK ); assert( eFileLock!=PENDING_LOCK ); assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK ); /* This mutex is needed because pFile->pInode is shared across threads */ unixEnterMutex(); pInode = pFile->pInode; /* If some thread using this PID has a lock via a different unixFile* ** handle that precludes the requested lock, return BUSY. */ if( (pFile->eFileLock!=pInode->eFileLock && (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) ){ rc = SQLITE_BUSY; goto afp_end_lock; } /* If a SHARED lock is requested, and some thread using this PID already ** has a SHARED or RESERVED lock, then increment reference counts and ** return SQLITE_OK. */ if( eFileLock==SHARED_LOCK && (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ assert( eFileLock==SHARED_LOCK ); assert( pFile->eFileLock==0 ); assert( pInode->nShared>0 ); pFile->eFileLock = SHARED_LOCK; pInode->nShared++; pInode->nLock++; goto afp_end_lock; } /* A PENDING lock is needed before acquiring a SHARED lock and before ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will ** be released. */ if( eFileLock==SHARED_LOCK || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLockdbPath, pFile, PENDING_BYTE, 1, 1); if (failed) { rc = failed; goto afp_end_lock; } } /* If control gets to this point, then actually go ahead and make ** operating system calls for the specified lock. */ if( eFileLock==SHARED_LOCK ){ int lrc1, lrc2, lrc1Errno = 0; long lk, mask; assert( pInode->nShared==0 ); assert( pInode->eFileLock==0 ); mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff; /* Now get the read-lock SHARED_LOCK */ /* note that the quality of the randomness doesn't matter that much */ lk = random(); pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1); lrc1 = afpSetLock(context->dbPath, pFile, SHARED_FIRST+pInode->sharedByte, 1, 1); if( IS_LOCK_ERROR(lrc1) ){ lrc1Errno = pFile->lastErrno; } /* Drop the temporary PENDING lock */ lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); if( IS_LOCK_ERROR(lrc1) ) { storeLastErrno(pFile, lrc1Errno); rc = lrc1; goto afp_end_lock; } else if( IS_LOCK_ERROR(lrc2) ){ rc = lrc2; goto afp_end_lock; } else if( lrc1 != SQLITE_OK ) { rc = lrc1; } else { pFile->eFileLock = SHARED_LOCK; pInode->nLock++; pInode->nShared = 1; } }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ /* We are trying for an exclusive lock but another thread in this ** same process is still holding a shared lock. */ rc = SQLITE_BUSY; }else{ /* The request was for a RESERVED or EXCLUSIVE lock. It is ** assumed that there is a SHARED or greater lock on the file ** already. */ int failed = 0; assert( 0!=pFile->eFileLock ); if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) { /* Acquire a RESERVED lock */ failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1); if( !failed ){ context->reserved = 1; } } if (!failed && eFileLock == EXCLUSIVE_LOCK) { /* Acquire an EXCLUSIVE lock */ /* Remove the shared lock before trying the range. we'll need to ** reestablish the shared lock if we can't get the afpUnlock */ if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST + pInode->sharedByte, 1, 0)) ){ int failed2 = SQLITE_OK; /* now attemmpt to get the exclusive lock range */ failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 1); if( failed && (failed2 = afpSetLock(context->dbPath, pFile, SHARED_FIRST + pInode->sharedByte, 1, 1)) ){ /* Can't reestablish the shared lock. Sqlite can't deal, this is ** a critical I/O error */ rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : SQLITE_IOERR_LOCK; goto afp_end_lock; } }else{ rc = failed; } } if( failed ){ rc = failed; } } if( rc==SQLITE_OK ){ pFile->eFileLock = eFileLock; pInode->eFileLock = eFileLock; }else if( eFileLock==EXCLUSIVE_LOCK ){ pFile->eFileLock = PENDING_LOCK; pInode->eFileLock = PENDING_LOCK; } afp_end_lock: unixLeaveMutex(); OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock), rc==SQLITE_OK ? "ok" : "failed")); return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int afpUnlock(sqlite3_file *id, int eFileLock) { int rc = SQLITE_OK; unixFile *pFile = (unixFile*)id; unixInodeInfo *pInode; afpLockingContext *context = (afpLockingContext *) pFile->lockingContext; int skipShared = 0; #ifdef SQLITE_TEST int h = pFile->h; #endif assert( pFile ); OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock, pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared, osGetpid(0))); assert( eFileLock<=SHARED_LOCK ); if( pFile->eFileLock<=eFileLock ){ return SQLITE_OK; } unixEnterMutex(); pInode = pFile->pInode; assert( pInode->nShared!=0 ); if( pFile->eFileLock>SHARED_LOCK ){ assert( pInode->eFileLock==pFile->eFileLock ); SimulateIOErrorBenign(1); SimulateIOError( h=(-1) ) SimulateIOErrorBenign(0); #ifdef SQLITE_DEBUG /* When reducing a lock such that other processes can start ** reading the database file again, make sure that the ** transaction counter was updated if any part of the database ** file changed. If the transaction counter is not updated, ** other connections to the same file might not realize that ** the file has changed and hence might not know to flush their ** cache. The use of a stale cache can lead to database corruption. */ assert( pFile->inNormalWrite==0 || pFile->dbUpdate==0 || pFile->transCntrChng==1 ); pFile->inNormalWrite = 0; #endif if( pFile->eFileLock==EXCLUSIVE_LOCK ){ rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0); if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){ /* only re-establish the shared lock if necessary */ int sharedLockByte = SHARED_FIRST+pInode->sharedByte; rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1); } else { skipShared = 1; } } if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){ rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0); } if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){ rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0); if( !rc ){ context->reserved = 0; } } if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){ pInode->eFileLock = SHARED_LOCK; } } if( rc==SQLITE_OK && eFileLock==NO_LOCK ){ /* Decrement the shared lock counter. Release the lock using an ** OS call only when all threads in this same process have released ** the lock. */ unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte; pInode->nShared--; if( pInode->nShared==0 ){ SimulateIOErrorBenign(1); SimulateIOError( h=(-1) ) SimulateIOErrorBenign(0); if( !skipShared ){ rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0); } if( !rc ){ pInode->eFileLock = NO_LOCK; pFile->eFileLock = NO_LOCK; } } if( rc==SQLITE_OK ){ pInode->nLock--; assert( pInode->nLock>=0 ); if( pInode->nLock==0 ){ closePendingFds(pFile); } } } unixLeaveMutex(); if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock; return rc; } /* ** Close a file & cleanup AFP specific locking context */ static int afpClose(sqlite3_file *id) { int rc = SQLITE_OK; if( id ){ unixFile *pFile = (unixFile*)id; afpUnlock(id, NO_LOCK); unixEnterMutex(); if( pFile->pInode && pFile->pInode->nLock ){ /* If there are outstanding locks, do not actually close the file just ** yet because that would clear those locks. Instead, add the file ** descriptor to pInode->aPending. It will be automatically closed when ** the last lock is cleared. */ setPendingFd(pFile); } releaseInodeInfo(pFile); sqlite3_free(pFile->lockingContext); rc = closeUnixFile(id); unixLeaveMutex(); } return rc; } #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ /* ** The code above is the AFP lock implementation. The code is specific ** to MacOSX and does not work on other unix platforms. No alternative ** is available. If you don't compile for a mac, then the "unix-afp" ** VFS is not available. ** ********************* End of the AFP lock implementation ********************** ******************************************************************************/ /****************************************************************************** *************************** Begin NFS Locking ********************************/ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int nfsUnlock(sqlite3_file *id, int eFileLock){ return posixUnlock(id, eFileLock, 1); } #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ /* ** The code above is the NFS lock implementation. The code is specific ** to MacOSX and does not work on other unix platforms. No alternative ** is available. ** ********************* End of the NFS lock implementation ********************** ******************************************************************************/ /****************************************************************************** **************** Non-locking sqlite3_file methods ***************************** ** ** The next division contains implementations for all methods of the ** sqlite3_file object other than the locking methods. The locking ** methods were defined in divisions above (one locking method per ** division). Those methods that are common to all locking modes ** are gather together into this division. */ /* ** Seek to the offset passed as the second argument, then read cnt ** bytes into pBuf. Return the number of bytes actually read. ** ** NB: If you define USE_PREAD or USE_PREAD64, then it might also ** be necessary to define _XOPEN_SOURCE to be 500. This varies from ** one system to another. Since SQLite does not define USE_PREAD ** in any form by default, we will not attempt to define _XOPEN_SOURCE. ** See tickets #2741 and #2681. ** ** To avoid stomping the errno value on a failed read the lastErrno value ** is set before returning. */ static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ int got; int prior = 0; #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) i64 newOffset; #endif TIMER_START; assert( cnt==(cnt&0x1ffff) ); assert( id->h>2 ); do{ #if defined(USE_PREAD) got = osPread(id->h, pBuf, cnt, offset); SimulateIOError( got = -1 ); #elif defined(USE_PREAD64) got = osPread64(id->h, pBuf, cnt, offset); SimulateIOError( got = -1 ); #else newOffset = lseek(id->h, offset, SEEK_SET); SimulateIOError( newOffset-- ); if( newOffset!=offset ){ if( newOffset == -1 ){ storeLastErrno((unixFile*)id, errno); }else{ storeLastErrno((unixFile*)id, 0); } return -1; } got = osRead(id->h, pBuf, cnt); #endif if( got==cnt ) break; if( got<0 ){ if( errno==EINTR ){ got = 1; continue; } prior = 0; storeLastErrno((unixFile*)id, errno); break; }else if( got>0 ){ cnt -= got; offset += got; prior += got; pBuf = (void*)(got + (char*)pBuf); } }while( got>0 ); TIMER_END; OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got+prior, offset-prior, TIMER_ELAPSED)); return got+prior; } /* ** Read data from a file into a buffer. Return SQLITE_OK if all ** bytes were read successfully and SQLITE_IOERR if anything goes ** wrong. */ static int unixRead( sqlite3_file *id, void *pBuf, int amt, sqlite3_int64 offset ){ unixFile *pFile = (unixFile *)id; int got; assert( id ); assert( offset>=0 ); assert( amt>0 ); /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ #if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); #endif #if SQLITE_MAX_MMAP_SIZE>0 /* Deal with as much of this read request as possible by transfering ** data from the memory mapping using memcpy(). */ if( offsetmmapSize ){ if( offset+amt <= pFile->mmapSize ){ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); return SQLITE_OK; }else{ int nCopy = pFile->mmapSize - offset; memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); pBuf = &((u8 *)pBuf)[nCopy]; amt -= nCopy; offset += nCopy; } } #endif got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ return SQLITE_OK; }else if( got<0 ){ /* lastErrno set by seekAndRead */ return SQLITE_IOERR_READ; }else{ storeLastErrno(pFile, 0); /* not a system error */ /* Unread parts of the buffer must be zero-filled */ memset(&((char*)pBuf)[got], 0, amt-got); return SQLITE_IOERR_SHORT_READ; } } /* ** Attempt to seek the file-descriptor passed as the first argument to ** absolute offset iOff, then attempt to write nBuf bytes of data from ** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise, ** return the actual number of bytes written (which may be less than ** nBuf). */ static int seekAndWriteFd( int fd, /* File descriptor to write to */ i64 iOff, /* File offset to begin writing at */ const void *pBuf, /* Copy data from this buffer to the file */ int nBuf, /* Size of buffer pBuf in bytes */ int *piErrno /* OUT: Error number if error occurs */ ){ int rc = 0; /* Value returned by system call */ assert( nBuf==(nBuf&0x1ffff) ); assert( fd>2 ); nBuf &= 0x1ffff; TIMER_START; #if defined(USE_PREAD) do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR ); #elif defined(USE_PREAD64) do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR); #else do{ i64 iSeek = lseek(fd, iOff, SEEK_SET); SimulateIOError( iSeek-- ); if( iSeek!=iOff ){ if( piErrno ) *piErrno = (iSeek==-1 ? errno : 0); return -1; } rc = osWrite(fd, pBuf, nBuf); }while( rc<0 && errno==EINTR ); #endif TIMER_END; OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED)); if( rc<0 && piErrno ) *piErrno = errno; return rc; } /* ** Seek to the offset in id->offset then read cnt bytes into pBuf. ** Return the number of bytes actually read. Update the offset. ** ** To avoid stomping the errno value on a failed write the lastErrno value ** is set before returning. */ static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){ return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno); } /* ** Write data from a buffer into a file. Return SQLITE_OK on success ** or some other error code on failure. */ static int unixWrite( sqlite3_file *id, const void *pBuf, int amt, sqlite3_int64 offset ){ unixFile *pFile = (unixFile*)id; int wrote = 0; assert( id ); assert( amt>0 ); /* If this is a database file (not a journal, master-journal or temp ** file), the bytes in the locking range should never be read or written. */ #if 0 assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); #endif #ifdef SQLITE_DEBUG /* If we are doing a normal write to a database file (as opposed to ** doing a hot-journal rollback or a write to some file other than a ** normal database file) then record the fact that the database ** has changed. If the transaction counter is modified, record that ** fact too. */ if( pFile->inNormalWrite ){ pFile->dbUpdate = 1; /* The database has been modified */ if( offset<=24 && offset+amt>=27 ){ int rc; char oldCntr[4]; SimulateIOErrorBenign(1); rc = seekAndRead(pFile, 24, oldCntr, 4); SimulateIOErrorBenign(0); if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){ pFile->transCntrChng = 1; /* The transaction counter has changed */ } } } #endif #if SQLITE_MAX_MMAP_SIZE>0 /* Deal with as much of this write request as possible by transfering ** data from the memory mapping using memcpy(). */ if( offsetmmapSize ){ if( offset+amt <= pFile->mmapSize ){ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); return SQLITE_OK; }else{ int nCopy = pFile->mmapSize - offset; memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); pBuf = &((u8 *)pBuf)[nCopy]; amt -= nCopy; offset += nCopy; } } #endif while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))0 ){ amt -= wrote; offset += wrote; pBuf = &((char*)pBuf)[wrote]; } SimulateIOError(( wrote=(-1), amt=1 )); SimulateDiskfullError(( wrote=0, amt=1 )); if( amt>wrote ){ if( wrote<0 && pFile->lastErrno!=ENOSPC ){ /* lastErrno set by seekAndWrite */ return SQLITE_IOERR_WRITE; }else{ storeLastErrno(pFile, 0); /* not a system error */ return SQLITE_FULL; } } return SQLITE_OK; } #ifdef SQLITE_TEST /* ** Count the number of fullsyncs and normal syncs. This is used to test ** that syncs and fullsyncs are occurring at the right times. */ SQLITE_PRIVATE int sqlite3_sync_count = 0; SQLITE_PRIVATE int sqlite3_fullsync_count = 0; #endif /* ** We do not trust systems to provide a working fdatasync(). Some do. ** Others do no. To be safe, we will stick with the (slightly slower) ** fsync(). If you know that your system does support fdatasync() correctly, ** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC */ #if !defined(fdatasync) && !HAVE_FDATASYNC # define fdatasync fsync #endif /* ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently ** only available on Mac OS X. But that could change. */ #ifdef F_FULLFSYNC # define HAVE_FULLFSYNC 1 #else # define HAVE_FULLFSYNC 0 #endif /* ** The fsync() system call does not work as advertised on many ** unix systems. The following procedure is an attempt to make ** it work better. ** ** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful ** for testing when we want to run through the test suite quickly. ** You are strongly advised *not* to deploy with SQLITE_NO_SYNC ** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash ** or power failure will likely corrupt the database file. ** ** SQLite sets the dataOnly flag if the size of the file is unchanged. ** The idea behind dataOnly is that it should only write the file content ** to disk, not the inode. We only set dataOnly if the file size is ** unchanged since the file size is part of the inode. However, ** Ted Ts'o tells us that fdatasync() will also write the inode if the ** file size has changed. The only real difference between fdatasync() ** and fsync(), Ted tells us, is that fdatasync() will not flush the ** inode if the mtime or owner or other inode attributes have changed. ** We only care about the file size, not the other file attributes, so ** as far as SQLite is concerned, an fdatasync() is always adequate. ** So, we always use fdatasync() if it is available, regardless of ** the value of the dataOnly flag. */ static int full_fsync(int fd, int fullSync, int dataOnly){ int rc; /* The following "ifdef/elif/else/" block has the same structure as ** the one below. It is replicated here solely to avoid cluttering ** up the real code with the UNUSED_PARAMETER() macros. */ #ifdef SQLITE_NO_SYNC UNUSED_PARAMETER(fd); UNUSED_PARAMETER(fullSync); UNUSED_PARAMETER(dataOnly); #elif HAVE_FULLFSYNC UNUSED_PARAMETER(dataOnly); #else UNUSED_PARAMETER(fullSync); UNUSED_PARAMETER(dataOnly); #endif /* Record the number of times that we do a normal fsync() and ** FULLSYNC. This is used during testing to verify that this procedure ** gets called with the correct arguments. */ #ifdef SQLITE_TEST if( fullSync ) sqlite3_fullsync_count++; sqlite3_sync_count++; #endif /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a ** no-op */ #ifdef SQLITE_NO_SYNC rc = SQLITE_OK; #elif HAVE_FULLFSYNC if( fullSync ){ rc = osFcntl(fd, F_FULLFSYNC, 0); }else{ rc = 1; } /* If the FULLFSYNC failed, fall back to attempting an fsync(). ** It shouldn't be possible for fullfsync to fail on the local ** file system (on OSX), so failure indicates that FULLFSYNC ** isn't supported for this file system. So, attempt an fsync ** and (for now) ignore the overhead of a superfluous fcntl call. ** It'd be better to detect fullfsync support once and avoid ** the fcntl call every time sync is called. */ if( rc ) rc = fsync(fd); #elif defined(__APPLE__) /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly ** so currently we default to the macro that redefines fdatasync to fsync */ rc = fsync(fd); #else rc = fdatasync(fd); #if OS_VXWORKS if( rc==-1 && errno==ENOTSUP ){ rc = fsync(fd); } #endif /* OS_VXWORKS */ #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */ if( OS_VXWORKS && rc!= -1 ){ rc = 0; } return rc; } /* ** Open a file descriptor to the directory containing file zFilename. ** If successful, *pFd is set to the opened file descriptor and ** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM ** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined ** value. ** ** The directory file descriptor is used for only one thing - to ** fsync() a directory to make sure file creation and deletion events ** are flushed to disk. Such fsyncs are not needed on newer ** journaling filesystems, but are required on older filesystems. ** ** This routine can be overridden using the xSetSysCall interface. ** The ability to override this routine was added in support of the ** chromium sandbox. Opening a directory is a security risk (we are ** told) so making it overrideable allows the chromium sandbox to ** replace this routine with a harmless no-op. To make this routine ** a no-op, replace it with a stub that returns SQLITE_OK but leaves ** *pFd set to a negative number. ** ** If SQLITE_OK is returned, the caller is responsible for closing ** the file descriptor *pFd using close(). */ static int openDirectory(const char *zFilename, int *pFd){ int ii; int fd = -1; char zDirname[MAX_PATHNAME+1]; sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename); for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--); if( ii>0 ){ zDirname[ii] = '\0'; fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); if( fd>=0 ){ OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); } } *pFd = fd; return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname)); } /* ** Make sure all writes to a particular file are committed to disk. ** ** If dataOnly==0 then both the file itself and its metadata (file ** size, access time, etc) are synced. If dataOnly!=0 then only the ** file data is synced. ** ** Under Unix, also make sure that the directory entry for the file ** has been created by fsync-ing the directory that contains the file. ** If we do not do this and we encounter a power failure, the directory ** entry for the journal might not exist after we reboot. The next ** SQLite to access the file will not know that the journal exists (because ** the directory entry for the journal was never created) and the transaction ** will not roll back - possibly leading to database corruption. */ static int unixSync(sqlite3_file *id, int flags){ int rc; unixFile *pFile = (unixFile*)id; int isDataOnly = (flags&SQLITE_SYNC_DATAONLY); int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL; /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ assert((flags&0x0F)==SQLITE_SYNC_NORMAL || (flags&0x0F)==SQLITE_SYNC_FULL ); /* Unix cannot, but some systems may return SQLITE_FULL from here. This ** line is to test that doing so does not cause any problems. */ SimulateDiskfullError( return SQLITE_FULL ); assert( pFile ); OSTRACE(("SYNC %-3d\n", pFile->h)); rc = full_fsync(pFile->h, isFullsync, isDataOnly); SimulateIOError( rc=1 ); if( rc ){ storeLastErrno(pFile, errno); return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath); } /* Also fsync the directory containing the file if the DIRSYNC flag ** is set. This is a one-time occurrence. Many systems (examples: AIX) ** are unable to fsync a directory, so ignore errors on the fsync. */ if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){ int dirfd; OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath, HAVE_FULLFSYNC, isFullsync)); rc = osOpenDirectory(pFile->zPath, &dirfd); if( rc==SQLITE_OK && dirfd>=0 ){ full_fsync(dirfd, 0, 0); robust_close(pFile, dirfd, __LINE__); }else if( rc==SQLITE_CANTOPEN ){ rc = SQLITE_OK; } pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC; } return rc; } /* ** Truncate an open file to a specified size */ static int unixTruncate(sqlite3_file *id, i64 nByte){ unixFile *pFile = (unixFile *)id; int rc; assert( pFile ); SimulateIOError( return SQLITE_IOERR_TRUNCATE ); /* If the user has configured a chunk-size for this file, truncate the ** file so that it consists of an integer number of chunks (i.e. the ** actual file size after the operation may be larger than the requested ** size). */ if( pFile->szChunk>0 ){ nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; } rc = robust_ftruncate(pFile->h, nByte); if( rc ){ storeLastErrno(pFile, errno); return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); }else{ #ifdef SQLITE_DEBUG /* If we are doing a normal write to a database file (as opposed to ** doing a hot-journal rollback or a write to some file other than a ** normal database file) and we truncate the file to zero length, ** that effectively updates the change counter. This might happen ** when restoring a database using the backup API from a zero-length ** source. */ if( pFile->inNormalWrite && nByte==0 ){ pFile->transCntrChng = 1; } #endif #if SQLITE_MAX_MMAP_SIZE>0 /* If the file was just truncated to a size smaller than the currently ** mapped region, reduce the effective mapping size as well. SQLite will ** use read() and write() to access data beyond this point from now on. */ if( nBytemmapSize ){ pFile->mmapSize = nByte; } #endif return SQLITE_OK; } } /* ** Determine the current size of a file in bytes */ static int unixFileSize(sqlite3_file *id, i64 *pSize){ int rc; struct stat buf; assert( id ); rc = osFstat(((unixFile*)id)->h, &buf); SimulateIOError( rc=1 ); if( rc!=0 ){ storeLastErrno((unixFile*)id, errno); return SQLITE_IOERR_FSTAT; } *pSize = buf.st_size; /* When opening a zero-size database, the findInodeInfo() procedure ** writes a single byte into that file in order to work around a bug ** in the OS-X msdos filesystem. In order to avoid problems with upper ** layers, we need to report this file size as zero even though it is ** really 1. Ticket #3260. */ if( *pSize==1 ) *pSize = 0; return SQLITE_OK; } #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) /* ** Handler for proxy-locking file-control verbs. Defined below in the ** proxying locking division. */ static int proxyFileControl(sqlite3_file*,int,void*); #endif /* ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT ** file-control operation. Enlarge the database to nBytes in size ** (rounded up to the next chunk-size). If the database is already ** nBytes or larger, this routine is a no-op. */ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ if( pFile->szChunk>0 ){ i64 nSize; /* Required file size */ struct stat buf; /* Used to hold return values of fstat() */ if( osFstat(pFile->h, &buf) ){ return SQLITE_IOERR_FSTAT; } nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk; if( nSize>(i64)buf.st_size ){ #if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE /* The code below is handling the return value of osFallocate() ** correctly. posix_fallocate() is defined to "returns zero on success, ** or an error number on failure". See the manpage for details. */ int err; do{ err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size); }while( err==EINTR ); if( err ) return SQLITE_IOERR_WRITE; #else /* If the OS does not have posix_fallocate(), fake it. Write a ** single byte to the last byte in each block that falls entirely ** within the extended region. Then, if required, a single byte ** at offset (nSize-1), to set the size of the file correctly. ** This is a similar technique to that used by glibc on systems ** that do not have a real fallocate() call. */ int nBlk = buf.st_blksize; /* File-system block size */ int nWrite = 0; /* Number of bytes written by seekAndWrite */ i64 iWrite; /* Next offset to write to */ iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1; assert( iWrite>=buf.st_size ); assert( (iWrite/nBlk)==((buf.st_size+nBlk-1)/nBlk) ); assert( ((iWrite+1)%nBlk)==0 ); for(/*no-op*/; iWrite0 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ int rc; if( pFile->szChunk<=0 ){ if( robust_ftruncate(pFile->h, nByte) ){ storeLastErrno(pFile, errno); return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); } } rc = unixMapfile(pFile, nByte); return rc; } #endif return SQLITE_OK; } /* ** If *pArg is initially negative then this is a query. Set *pArg to ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. ** ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. */ static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ if( *pArg<0 ){ *pArg = (pFile->ctrlFlags & mask)!=0; }else if( (*pArg)==0 ){ pFile->ctrlFlags &= ~mask; }else{ pFile->ctrlFlags |= mask; } } /* Forward declaration */ static int unixGetTempname(int nBuf, char *zBuf); /* ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ unixFile *pFile = (unixFile*)id; switch( op ){ case SQLITE_FCNTL_WAL_BLOCK: { /* pFile->ctrlFlags |= UNIXFILE_BLOCK; // Deferred feature */ return SQLITE_OK; } case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; } case SQLITE_FCNTL_LAST_ERRNO: { *(int*)pArg = pFile->lastErrno; return SQLITE_OK; } case SQLITE_FCNTL_CHUNK_SIZE: { pFile->szChunk = *(int *)pArg; return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { int rc; SimulateIOErrorBenign(1); rc = fcntlSizeHint(pFile, *(i64 *)pArg); SimulateIOErrorBenign(0); return rc; } case SQLITE_FCNTL_PERSIST_WAL: { unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); return SQLITE_OK; } case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); return SQLITE_OK; } case SQLITE_FCNTL_TEMPFILENAME: { char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname ); if( zTFile ){ unixGetTempname(pFile->pVfs->mxPathname, zTFile); *(char**)pArg = zTFile; } return SQLITE_OK; } case SQLITE_FCNTL_HAS_MOVED: { *(int*)pArg = fileHasMoved(pFile); return SQLITE_OK; } #if SQLITE_MAX_MMAP_SIZE>0 case SQLITE_FCNTL_MMAP_SIZE: { i64 newLimit = *(i64*)pArg; int rc = SQLITE_OK; if( newLimit>sqlite3GlobalConfig.mxMmap ){ newLimit = sqlite3GlobalConfig.mxMmap; } *(i64*)pArg = pFile->mmapSizeMax; if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ pFile->mmapSizeMax = newLimit; if( pFile->mmapSize>0 ){ unixUnmapfile(pFile); rc = unixMapfile(pFile, -1); } } return rc; } #endif #ifdef SQLITE_DEBUG /* The pager calls this method to signal that it has done ** a rollback and that the database is therefore unchanged and ** it hence it is OK for the transaction change counter to be ** unchanged. */ case SQLITE_FCNTL_DB_UNCHANGED: { ((unixFile*)id)->dbUpdate = 0; return SQLITE_OK; } #endif #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) case SQLITE_FCNTL_SET_LOCKPROXYFILE: case SQLITE_FCNTL_GET_LOCKPROXYFILE: { return proxyFileControl(id,op,pArg); } #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ } return SQLITE_NOTFOUND; } /* ** Return the sector size in bytes of the underlying block device for ** the specified file. This is almost always 512 bytes, but may be ** larger for some devices. ** ** SQLite code assumes this function cannot fail. It also assumes that ** if two files are created in the same file-system directory (i.e. ** a database and its journal file) that the sector size will be the ** same for both. */ #ifndef __QNXNTO__ static int unixSectorSize(sqlite3_file *NotUsed){ UNUSED_PARAMETER(NotUsed); return SQLITE_DEFAULT_SECTOR_SIZE; } #endif /* ** The following version of unixSectorSize() is optimized for QNX. */ #ifdef __QNXNTO__ #include #include static int unixSectorSize(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; if( pFile->sectorSize == 0 ){ struct statvfs fsInfo; /* Set defaults for non-supported filesystems */ pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; pFile->deviceCharacteristics = 0; if( fstatvfs(pFile->h, &fsInfo) == -1 ) { return pFile->sectorSize; } if( !strcmp(fsInfo.f_basetype, "tmp") ) { pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until ** the write succeeds */ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; }else if( strstr(fsInfo.f_basetype, "etfs") ){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* etfs cluster size writes are atomic */ (pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) | SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until ** the write succeeds */ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; }else if( !strcmp(fsInfo.f_basetype, "qnx6") ){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until ** the write succeeds */ SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; }else if( !strcmp(fsInfo.f_basetype, "qnx4") ){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* full bitset of atomics from max sector size and smaller */ ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; }else if( strstr(fsInfo.f_basetype, "dos") ){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* full bitset of atomics from max sector size and smaller */ ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; }else{ pFile->deviceCharacteristics = SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */ SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until ** the write succeeds */ 0; } } /* Last chance verification. If the sector size isn't a multiple of 512 ** then it isn't valid.*/ if( pFile->sectorSize % 512 != 0 ){ pFile->deviceCharacteristics = 0; pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } return pFile->sectorSize; } #endif /* __QNXNTO__ */ /* ** Return the device characteristics for the file. ** ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. ** However, that choice is controversial since technically the underlying ** file system does not always provide powersafe overwrites. (In other ** words, after a power-loss event, parts of the file that were never ** written might end up being altered.) However, non-PSOW behavior is very, ** very rare. And asserting PSOW makes a large reduction in the amount ** of required I/O for journaling, since a lot of padding is eliminated. ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control ** available to turn it off and URI query parameter available to turn it off. */ static int unixDeviceCharacteristics(sqlite3_file *id){ unixFile *p = (unixFile*)id; int rc = 0; #ifdef __QNXNTO__ if( p->sectorSize==0 ) unixSectorSize(id); rc = p->deviceCharacteristics; #endif if( p->ctrlFlags & UNIXFILE_PSOW ){ rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; } return rc; } #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 /* ** Return the system page size. ** ** This function should not be called directly by other code in this file. ** Instead, it should be called via macro osGetpagesize(). */ static int unixGetpagesize(void){ #if OS_VXWORKS return 1024; #elif defined(_BSD_SOURCE) return getpagesize(); #else return (int)sysconf(_SC_PAGESIZE); #endif } #endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ #ifndef SQLITE_OMIT_WAL /* ** Object used to represent an shared memory buffer. ** ** When multiple threads all reference the same wal-index, each thread ** has its own unixShm object, but they all point to a single instance ** of this unixShmNode object. In other words, each wal-index is opened ** only once per process. ** ** Each unixShmNode object is connected to a single unixInodeInfo object. ** We could coalesce this object into unixInodeInfo, but that would mean ** every open file that does not use shared memory (in other words, most ** open files) would have to carry around this extra information. So ** the unixInodeInfo object contains a pointer to this unixShmNode object ** and the unixShmNode object is created only when needed. ** ** unixMutexHeld() must be true when creating or destroying ** this object or while reading or writing the following fields: ** ** nRef ** ** The following fields are read-only after the object is created: ** ** fid ** zFilename ** ** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and ** unixMutexHeld() is true when reading or writing any other field ** in this structure. */ struct unixShmNode { unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */ sqlite3_mutex *mutex; /* Mutex to access this object */ char *zFilename; /* Name of the mmapped file */ int h; /* Open file descriptor */ int szRegion; /* Size of shared-memory regions */ u16 nRegion; /* Size of array apRegion */ u8 isReadonly; /* True if read-only */ char **apRegion; /* Array of mapped shared-memory regions */ int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ #ifdef SQLITE_DEBUG u8 exclMask; /* Mask of exclusive locks held */ u8 sharedMask; /* Mask of shared locks held */ u8 nextShmId; /* Next available unixShm.id value */ #endif }; /* ** Structure used internally by this VFS to record the state of an ** open shared memory connection. ** ** The following fields are initialized when this object is created and ** are read-only thereafter: ** ** unixShm.pFile ** unixShm.id ** ** All other fields are read/write. The unixShm.pFile->mutex must be held ** while accessing any read/write fields. */ struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ u8 hasMutex; /* True if holding the unixShmNode mutex */ u8 id; /* Id of this connection within its unixShmNode */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ }; /* ** Constants used for locking */ #define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ #define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ /* ** Apply posix advisory locks for all bytes from ofst through ofst+n-1. ** ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking ** otherwise. */ static int unixShmSystemLock( unixFile *pFile, /* Open connection to the WAL file */ int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ int ofst, /* First byte of the locking range */ int n /* Number of bytes to lock */ ){ unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */ struct flock f; /* The posix advisory locking structure */ int rc = SQLITE_OK; /* Result code form fcntl() */ /* Access to the unixShmNode object is serialized by the caller */ pShmNode = pFile->pInode->pShmNode; assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); /* Shared locks never span more than one byte */ assert( n==1 || lockType!=F_RDLCK ); /* Locks are within range */ assert( n>=1 && nh>=0 ){ int lkType; /* Initialize the locking parameters */ memset(&f, 0, sizeof(f)); f.l_type = lockType; f.l_whence = SEEK_SET; f.l_start = ofst; f.l_len = n; lkType = (pFile->ctrlFlags & UNIXFILE_BLOCK)!=0 ? F_SETLKW : F_SETLK; rc = osFcntl(pShmNode->h, lkType, &f); rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; pFile->ctrlFlags &= ~UNIXFILE_BLOCK; } /* Update the global lock state and do debug tracing */ #ifdef SQLITE_DEBUG { u16 mask; OSTRACE(("SHM-LOCK ")); mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<exclMask &= ~mask; pShmNode->sharedMask &= ~mask; }else if( lockType==F_RDLCK ){ OSTRACE(("read-lock %d ok", ofst)); pShmNode->exclMask &= ~mask; pShmNode->sharedMask |= mask; }else{ assert( lockType==F_WRLCK ); OSTRACE(("write-lock %d ok", ofst)); pShmNode->exclMask |= mask; pShmNode->sharedMask &= ~mask; } }else{ if( lockType==F_UNLCK ){ OSTRACE(("unlock %d failed", ofst)); }else if( lockType==F_RDLCK ){ OSTRACE(("read-lock failed")); }else{ assert( lockType==F_WRLCK ); OSTRACE(("write-lock %d failed", ofst)); } } OSTRACE((" - afterwards %03x,%03x\n", pShmNode->sharedMask, pShmNode->exclMask)); } #endif return rc; } /* ** Return the minimum number of 32KB shm regions that should be mapped at ** a time, assuming that each mapping must be an integer multiple of the ** current system page-size. ** ** Usually, this is 1. The exception seems to be systems that are configured ** to use 64KB pages - in this case each mapping must cover at least two ** shm regions. */ static int unixShmRegionPerMap(void){ int shmsz = 32*1024; /* SHM region size */ int pgsz = osGetpagesize(); /* System page size */ assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ if( pgszpInode->pShmNode; assert( unixMutexHeld() ); if( p && p->nRef==0 ){ int nShmPerMap = unixShmRegionPerMap(); int i; assert( p->pInode==pFd->pInode ); sqlite3_mutex_free(p->mutex); for(i=0; inRegion; i+=nShmPerMap){ if( p->h>=0 ){ osMunmap(p->apRegion[i], p->szRegion); }else{ sqlite3_free(p->apRegion[i]); } } sqlite3_free(p->apRegion); if( p->h>=0 ){ robust_close(pFd, p->h, __LINE__); p->h = -1; } p->pInode->pShmNode = 0; sqlite3_free(p); } } /* ** Open a shared-memory area associated with open database file pDbFd. ** This particular implementation uses mmapped files. ** ** The file used to implement shared-memory is in the same directory ** as the open database file and has the same name as the open database ** file with the "-shm" suffix added. For example, if the database file ** is "/home/user1/config.db" then the file that is created and mmapped ** for shared memory will be called "/home/user1/config.db-shm". ** ** Another approach to is to use files in /dev/shm or /dev/tmp or an ** some other tmpfs mount. But if a file in a different directory ** from the database file is used, then differing access permissions ** or a chroot() might cause two different processes on the same ** database to end up using different files for shared memory - ** meaning that their memory would not really be shared - resulting ** in database corruption. Nevertheless, this tmpfs file usage ** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm" ** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time ** option results in an incompatible build of SQLite; builds of SQLite ** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the ** same database file at the same time, database corruption will likely ** result. The SQLITE_SHM_DIRECTORY compile-time option is considered ** "unsupported" and may go away in a future SQLite release. ** ** When opening a new shared-memory file, if no other instances of that ** file are currently open, in this process or in other processes, then ** the file must be truncated to zero length or have its header cleared. ** ** If the original database file (pDbFd) is using the "unix-excl" VFS ** that means that an exclusive lock is held on the database file and ** that no other processes are able to read or write the database. In ** that case, we do not really need shared memory. No shared memory ** file is created. The shared memory will be simulated with heap memory. */ static int unixOpenSharedMemory(unixFile *pDbFd){ struct unixShm *p = 0; /* The connection to be opened */ struct unixShmNode *pShmNode; /* The underlying mmapped file */ int rc; /* Result code */ unixInodeInfo *pInode; /* The inode of fd */ char *zShmFilename; /* Name of the file used for SHM */ int nShmFilename; /* Size of the SHM filename in bytes */ /* Allocate space for the new unixShm object. */ p = sqlite3_malloc64( sizeof(*p) ); if( p==0 ) return SQLITE_NOMEM; memset(p, 0, sizeof(*p)); assert( pDbFd->pShm==0 ); /* Check to see if a unixShmNode object already exists. Reuse an existing ** one if present. Create a new one if necessary. */ unixEnterMutex(); pInode = pDbFd->pInode; pShmNode = pInode->pShmNode; if( pShmNode==0 ){ struct stat sStat; /* fstat() info for database file */ #ifndef SQLITE_SHM_DIRECTORY const char *zBasePath = pDbFd->zPath; #endif /* Call fstat() to figure out the permissions on the database file. If ** a new *-shm file is created, an attempt will be made to create it ** with the same permissions. */ if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){ rc = SQLITE_IOERR_FSTAT; goto shm_open_err; } #ifdef SQLITE_SHM_DIRECTORY nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; #else nShmFilename = 6 + (int)strlen(zBasePath); #endif pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename ); if( pShmNode==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1]; #ifdef SQLITE_SHM_DIRECTORY sqlite3_snprintf(nShmFilename, zShmFilename, SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x", (u32)sStat.st_ino, (u32)sStat.st_dev); #else sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", zBasePath); #endif pShmNode->h = -1; pDbFd->pInode->pShmNode = pShmNode; pShmNode->pInode = pDbFd->pInode; pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pShmNode->mutex==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } if( pInode->bProcessLock==0 ){ int openFlags = O_RDWR | O_CREAT; pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777)); if( pShmNode->h<0 ){ rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); goto shm_open_err; } /* If this process is running as root, make sure that the SHM file ** is owned by the same user that owns the original database. Otherwise, ** the original owner will not be able to connect. */ osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid); /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ rc = SQLITE_OK; if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ if( robust_ftruncate(pShmNode->h, 0) ){ rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename); } } if( rc==SQLITE_OK ){ rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1); } if( rc ) goto shm_open_err; } } /* Make the new connection a child of the unixShmNode */ p->pShmNode = pShmNode; #ifdef SQLITE_DEBUG p->id = pShmNode->nextShmId++; #endif pShmNode->nRef++; pDbFd->pShm = p; unixLeaveMutex(); /* The reference count on pShmNode has already been incremented under ** the cover of the unixEnterMutex() mutex and the pointer from the ** new (struct unixShm) object to the pShmNode has been set. All that is ** left to do is to link the new object into the linked list starting ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex ** mutex. */ sqlite3_mutex_enter(pShmNode->mutex); p->pNext = pShmNode->pFirst; pShmNode->pFirst = p; sqlite3_mutex_leave(pShmNode->mutex); return SQLITE_OK; /* Jump here on any error */ shm_open_err: unixShmPurge(pDbFd); /* This call frees pShmNode if required */ sqlite3_free(p); unixLeaveMutex(); return rc; } /* ** This function is called to obtain a pointer to region iRegion of the ** shared-memory associated with the database file fd. Shared-memory regions ** are numbered starting from zero. Each shared-memory region is szRegion ** bytes in size. ** ** If an error occurs, an error code is returned and *pp is set to NULL. ** ** Otherwise, if the bExtend parameter is 0 and the requested shared-memory ** region has not been allocated (by any client, including one running in a ** separate process), then *pp is set to NULL and SQLITE_OK returned. If ** bExtend is non-zero and the requested shared-memory region has not yet ** been allocated, it is allocated by this function. ** ** If the shared-memory region has already been allocated or is allocated by ** this call as described above, then it is mapped into this processes ** address space (if it is not already), *pp is set to point to the mapped ** memory and SQLITE_OK returned. */ static int unixShmMap( sqlite3_file *fd, /* Handle open on database file */ int iRegion, /* Region to retrieve */ int szRegion, /* Size of regions */ int bExtend, /* True to extend file if necessary */ void volatile **pp /* OUT: Mapped memory */ ){ unixFile *pDbFd = (unixFile*)fd; unixShm *p; unixShmNode *pShmNode; int rc = SQLITE_OK; int nShmPerMap = unixShmRegionPerMap(); int nReqRegion; /* If the shared-memory file has not yet been opened, open it now. */ if( pDbFd->pShm==0 ){ rc = unixOpenSharedMemory(pDbFd); if( rc!=SQLITE_OK ) return rc; } p = pDbFd->pShm; pShmNode = p->pShmNode; sqlite3_mutex_enter(pShmNode->mutex); assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); assert( pShmNode->pInode==pDbFd->pInode ); assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); /* Minimum number of regions required to be mapped. */ nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; if( pShmNode->nRegionszRegion = szRegion; if( pShmNode->h>=0 ){ /* The requested region is not mapped into this processes address space. ** Check to see if it has been allocated (i.e. if the wal-index file is ** large enough to contain the requested region). */ if( osFstat(pShmNode->h, &sStat) ){ rc = SQLITE_IOERR_SHMSIZE; goto shmpage_out; } if( sStat.st_sizeh, iPg*pgsz + pgsz-1, "", 1, 0)!=1 ){ const char *zFile = pShmNode->zFilename; rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile); goto shmpage_out; } } } } } /* Map the requested memory region into this processes address space. */ apNew = (char **)sqlite3_realloc( pShmNode->apRegion, nReqRegion*sizeof(char *) ); if( !apNew ){ rc = SQLITE_IOERR_NOMEM; goto shmpage_out; } pShmNode->apRegion = apNew; while( pShmNode->nRegionh>=0 ){ pMem = osMmap(0, nMap, pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion ); if( pMem==MAP_FAILED ){ rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); goto shmpage_out; } }else{ pMem = sqlite3_malloc64(szRegion); if( pMem==0 ){ rc = SQLITE_NOMEM; goto shmpage_out; } memset(pMem, 0, szRegion); } for(i=0; iapRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; } pShmNode->nRegion += nShmPerMap; } } shmpage_out: if( pShmNode->nRegion>iRegion ){ *pp = pShmNode->apRegion[iRegion]; }else{ *pp = 0; } if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY; sqlite3_mutex_leave(pShmNode->mutex); return rc; } /* ** Change the lock state for a shared-memory segment. ** ** Note that the relationship between SHAREd and EXCLUSIVE locks is a little ** different here than in posix. In xShmLock(), one can go from unlocked ** to shared and back or from unlocked to exclusive and back. But one may ** not go from shared to exclusive or from exclusive to shared. */ static int unixShmLock( sqlite3_file *fd, /* Database file holding the shared memory */ int ofst, /* First lock to acquire or release */ int n, /* Number of locks to acquire or release */ int flags /* What to do with the lock */ ){ unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ unixShm *p = pDbFd->pShm; /* The shared memory being locked */ unixShm *pX; /* For looping over all siblings */ unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ int rc = SQLITE_OK; /* Result code */ u16 mask; /* Mask of locks to take or release */ assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); assert( n>=1 ); assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); mask = (1<<(ofst+n)) - (1<1 || mask==(1<mutex); if( flags & SQLITE_SHM_UNLOCK ){ u16 allMask = 0; /* Mask of locks held by siblings */ /* See if any siblings hold this same lock */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( pX==p ) continue; assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); allMask |= pX->sharedMask; } /* Unlock the system-level locks */ if( (mask & allMask)==0 ){ rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n); }else{ rc = SQLITE_OK; } /* Undo the local locks */ if( rc==SQLITE_OK ){ p->exclMask &= ~mask; p->sharedMask &= ~mask; } }else if( flags & SQLITE_SHM_SHARED ){ u16 allShared = 0; /* Union of locks held by connections other than "p" */ /* Find out which shared locks are already held by sibling connections. ** If any sibling already holds an exclusive lock, go ahead and return ** SQLITE_BUSY. */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( (pX->exclMask & mask)!=0 ){ rc = SQLITE_BUSY; break; } allShared |= pX->sharedMask; } /* Get shared locks at the system level, if necessary */ if( rc==SQLITE_OK ){ if( (allShared & mask)==0 ){ rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n); }else{ rc = SQLITE_OK; } } /* Get the local shared locks */ if( rc==SQLITE_OK ){ p->sharedMask |= mask; } }else{ /* Make sure no sibling connections hold locks that will block this ** lock. If any do, return SQLITE_BUSY right away. */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ rc = SQLITE_BUSY; break; } } /* Get the exclusive locks at the system level. Then if successful ** also mark the local connection as being locked. */ if( rc==SQLITE_OK ){ rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n); if( rc==SQLITE_OK ){ assert( (p->sharedMask & mask)==0 ); p->exclMask |= mask; } } } sqlite3_mutex_leave(pShmNode->mutex); OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", p->id, osGetpid(0), p->sharedMask, p->exclMask)); return rc; } /* ** Implement a memory barrier or memory fence on shared memory. ** ** All loads and stores begun before the barrier must complete before ** any load or store begun after the barrier. */ static void unixShmBarrier( sqlite3_file *fd /* Database file holding the shared memory */ ){ UNUSED_PARAMETER(fd); sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ unixEnterMutex(); /* Also mutex, for redundancy */ unixLeaveMutex(); } /* ** Close a connection to shared-memory. Delete the underlying ** storage if deleteFlag is true. ** ** If there is no shared memory associated with the connection then this ** routine is a harmless no-op. */ static int unixShmUnmap( sqlite3_file *fd, /* The underlying database file */ int deleteFlag /* Delete shared-memory if true */ ){ unixShm *p; /* The connection to be closed */ unixShmNode *pShmNode; /* The underlying shared-memory file */ unixShm **pp; /* For looping over sibling connections */ unixFile *pDbFd; /* The underlying database file */ pDbFd = (unixFile*)fd; p = pDbFd->pShm; if( p==0 ) return SQLITE_OK; pShmNode = p->pShmNode; assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); /* Remove connection p from the set of connections associated ** with pShmNode */ sqlite3_mutex_enter(pShmNode->mutex); for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} *pp = p->pNext; /* Free the connection p */ sqlite3_free(p); pDbFd->pShm = 0; sqlite3_mutex_leave(pShmNode->mutex); /* If pShmNode->nRef has reached 0, then close the underlying ** shared-memory file, too */ unixEnterMutex(); assert( pShmNode->nRef>0 ); pShmNode->nRef--; if( pShmNode->nRef==0 ){ if( deleteFlag && pShmNode->h>=0 ){ osUnlink(pShmNode->zFilename); } unixShmPurge(pDbFd); } unixLeaveMutex(); return SQLITE_OK; } #else # define unixShmMap 0 # define unixShmLock 0 # define unixShmBarrier 0 # define unixShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ #if SQLITE_MAX_MMAP_SIZE>0 /* ** If it is currently memory mapped, unmap file pFd. */ static void unixUnmapfile(unixFile *pFd){ assert( pFd->nFetchOut==0 ); if( pFd->pMapRegion ){ osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); pFd->pMapRegion = 0; pFd->mmapSize = 0; pFd->mmapSizeActual = 0; } } /* ** Attempt to set the size of the memory mapping maintained by file ** descriptor pFd to nNew bytes. Any existing mapping is discarded. ** ** If successful, this function sets the following variables: ** ** unixFile.pMapRegion ** unixFile.mmapSize ** unixFile.mmapSizeActual ** ** If unsuccessful, an error message is logged via sqlite3_log() and ** the three variables above are zeroed. In this case SQLite should ** continue accessing the database using the xRead() and xWrite() ** methods. */ static void unixRemapfile( unixFile *pFd, /* File descriptor object */ i64 nNew /* Required mapping size */ ){ const char *zErr = "mmap"; int h = pFd->h; /* File descriptor open on db file */ u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */ u8 *pNew = 0; /* Location of new mapping */ int flags = PROT_READ; /* Flags to pass to mmap() */ assert( pFd->nFetchOut==0 ); assert( nNew>pFd->mmapSize ); assert( nNew<=pFd->mmapSizeMax ); assert( nNew>0 ); assert( pFd->mmapSizeActual>=pFd->mmapSize ); assert( MAP_FAILED!=0 ); if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; if( pOrig ){ #if HAVE_MREMAP i64 nReuse = pFd->mmapSize; #else const int szSyspage = osGetpagesize(); i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); #endif u8 *pReq = &pOrig[nReuse]; /* Unmap any pages of the existing mapping that cannot be reused. */ if( nReuse!=nOrig ){ osMunmap(pReq, nOrig-nReuse); } #if HAVE_MREMAP pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); zErr = "mremap"; #else pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); if( pNew!=MAP_FAILED ){ if( pNew!=pReq ){ osMunmap(pNew, nNew - nReuse); pNew = 0; }else{ pNew = pOrig; } } #endif /* The attempt to extend the existing mapping failed. Free it. */ if( pNew==MAP_FAILED || pNew==0 ){ osMunmap(pOrig, nReuse); } } /* If pNew is still NULL, try to create an entirely new mapping. */ if( pNew==0 ){ pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); } if( pNew==MAP_FAILED ){ pNew = 0; nNew = 0; unixLogError(SQLITE_OK, zErr, pFd->zPath); /* If the mmap() above failed, assume that all subsequent mmap() calls ** will probably fail too. Fall back to using xRead/xWrite exclusively ** in this case. */ pFd->mmapSizeMax = 0; } pFd->pMapRegion = (void *)pNew; pFd->mmapSize = pFd->mmapSizeActual = nNew; } /* ** Memory map or remap the file opened by file-descriptor pFd (if the file ** is already mapped, the existing mapping is replaced by the new). Or, if ** there already exists a mapping for this file, and there are still ** outstanding xFetch() references to it, this function is a no-op. ** ** If parameter nByte is non-negative, then it is the requested size of ** the mapping to create. Otherwise, if nByte is less than zero, then the ** requested size is the size of the file on disk. The actual size of the ** created mapping is either the requested size or the value configured ** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. ** ** SQLITE_OK is returned if no error occurs (even if the mapping is not ** recreated as a result of outstanding references) or an SQLite error ** code otherwise. */ static int unixMapfile(unixFile *pFd, i64 nByte){ i64 nMap = nByte; int rc; assert( nMap>=0 || pFd->nFetchOut==0 ); if( pFd->nFetchOut>0 ) return SQLITE_OK; if( nMap<0 ){ struct stat statbuf; /* Low-level file information */ rc = osFstat(pFd->h, &statbuf); if( rc!=SQLITE_OK ){ return SQLITE_IOERR_FSTAT; } nMap = statbuf.st_size; } if( nMap>pFd->mmapSizeMax ){ nMap = pFd->mmapSizeMax; } if( nMap!=pFd->mmapSize ){ if( nMap>0 ){ unixRemapfile(pFd, nMap); }else{ unixUnmapfile(pFd); } } return SQLITE_OK; } #endif /* SQLITE_MAX_MMAP_SIZE>0 */ /* ** If possible, return a pointer to a mapping of file fd starting at offset ** iOff. The mapping must be valid for at least nAmt bytes. ** ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. ** Finally, if an error does occur, return an SQLite error code. The final ** value of *pp is undefined in this case. ** ** If this function does return a pointer, the caller must eventually ** release the reference by calling unixUnfetch(). */ static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ #if SQLITE_MAX_MMAP_SIZE>0 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ #endif *pp = 0; #if SQLITE_MAX_MMAP_SIZE>0 if( pFd->mmapSizeMax>0 ){ if( pFd->pMapRegion==0 ){ int rc = unixMapfile(pFd, -1); if( rc!=SQLITE_OK ) return rc; } if( pFd->mmapSize >= iOff+nAmt ){ *pp = &((u8 *)pFd->pMapRegion)[iOff]; pFd->nFetchOut++; } } #endif return SQLITE_OK; } /* ** If the third argument is non-NULL, then this function releases a ** reference obtained by an earlier call to unixFetch(). The second ** argument passed to this function must be the same as the corresponding ** argument that was passed to the unixFetch() invocation. ** ** Or, if the third argument is NULL, then this function is being called ** to inform the VFS layer that, according to POSIX, any existing mapping ** may now be invalid and should be unmapped. */ static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ #if SQLITE_MAX_MMAP_SIZE>0 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ UNUSED_PARAMETER(iOff); /* If p==0 (unmap the entire file) then there must be no outstanding ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), ** then there must be at least one outstanding. */ assert( (p==0)==(pFd->nFetchOut==0) ); /* If p!=0, it must match the iOff value. */ assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); if( p ){ pFd->nFetchOut--; }else{ unixUnmapfile(pFd); } assert( pFd->nFetchOut>=0 ); #else UNUSED_PARAMETER(fd); UNUSED_PARAMETER(p); UNUSED_PARAMETER(iOff); #endif return SQLITE_OK; } /* ** Here ends the implementation of all sqlite3_file methods. ** ********************** End sqlite3_file Methods ******************************* ******************************************************************************/ /* ** This division contains definitions of sqlite3_io_methods objects that ** implement various file locking strategies. It also contains definitions ** of "finder" functions. A finder-function is used to locate the appropriate ** sqlite3_io_methods object for a particular database file. The pAppData ** field of the sqlite3_vfs VFS objects are initialized to be pointers to ** the correct finder-function for that VFS. ** ** Most finder functions return a pointer to a fixed sqlite3_io_methods ** object. The only interesting finder-function is autolockIoFinder, which ** looks at the filesystem type and tries to guess the best locking ** strategy from that. ** ** For finder-function F, two objects are created: ** ** (1) The real finder-function named "FImpt()". ** ** (2) A constant pointer to this function named just "F". ** ** ** A pointer to the F pointer is used as the pAppData value for VFS ** objects. We have to do this instead of letting pAppData point ** directly at the finder-function since C90 rules prevent a void* ** from be cast into a function pointer. ** ** ** Each instance of this macro generates two objects: ** ** * A constant sqlite3_io_methods object call METHOD that has locking ** methods CLOSE, LOCK, UNLOCK, CKRESLOCK. ** ** * An I/O method finder function called FINDER that returns a pointer ** to the METHOD object in the previous bullet. */ #define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \ static const sqlite3_io_methods METHOD = { \ VERSION, /* iVersion */ \ CLOSE, /* xClose */ \ unixRead, /* xRead */ \ unixWrite, /* xWrite */ \ unixTruncate, /* xTruncate */ \ unixSync, /* xSync */ \ unixFileSize, /* xFileSize */ \ LOCK, /* xLock */ \ UNLOCK, /* xUnlock */ \ CKLOCK, /* xCheckReservedLock */ \ unixFileControl, /* xFileControl */ \ unixSectorSize, /* xSectorSize */ \ unixDeviceCharacteristics, /* xDeviceCapabilities */ \ SHMMAP, /* xShmMap */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ unixShmUnmap, /* xShmUnmap */ \ unixFetch, /* xFetch */ \ unixUnfetch, /* xUnfetch */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ return &METHOD; \ } \ static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \ = FINDER##Impl; /* ** Here are all of the sqlite3_io_methods objects for each of the ** locking strategies. Functions that return pointers to these methods ** are also created. */ IOMETHODS( posixIoFinder, /* Finder function name */ posixIoMethods, /* sqlite3_io_methods object name */ 3, /* shared memory and mmap are enabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ unixUnlock, /* xUnlock method */ unixCheckReservedLock, /* xCheckReservedLock method */ unixShmMap /* xShmMap method */ ) IOMETHODS( nolockIoFinder, /* Finder function name */ nolockIoMethods, /* sqlite3_io_methods object name */ 3, /* shared memory is disabled */ nolockClose, /* xClose method */ nolockLock, /* xLock method */ nolockUnlock, /* xUnlock method */ nolockCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) IOMETHODS( dotlockIoFinder, /* Finder function name */ dotlockIoMethods, /* sqlite3_io_methods object name */ 1, /* shared memory is disabled */ dotlockClose, /* xClose method */ dotlockLock, /* xLock method */ dotlockUnlock, /* xUnlock method */ dotlockCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) #if SQLITE_ENABLE_LOCKING_STYLE IOMETHODS( flockIoFinder, /* Finder function name */ flockIoMethods, /* sqlite3_io_methods object name */ 1, /* shared memory is disabled */ flockClose, /* xClose method */ flockLock, /* xLock method */ flockUnlock, /* xUnlock method */ flockCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) #endif #if OS_VXWORKS IOMETHODS( semIoFinder, /* Finder function name */ semIoMethods, /* sqlite3_io_methods object name */ 1, /* shared memory is disabled */ semXClose, /* xClose method */ semXLock, /* xLock method */ semXUnlock, /* xUnlock method */ semXCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) #endif #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE IOMETHODS( afpIoFinder, /* Finder function name */ afpIoMethods, /* sqlite3_io_methods object name */ 1, /* shared memory is disabled */ afpClose, /* xClose method */ afpLock, /* xLock method */ afpUnlock, /* xUnlock method */ afpCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) #endif /* ** The proxy locking method is a "super-method" in the sense that it ** opens secondary file descriptors for the conch and lock files and ** it uses proxy, dot-file, AFP, and flock() locking methods on those ** secondary files. For this reason, the division that implements ** proxy locking is located much further down in the file. But we need ** to go ahead and define the sqlite3_io_methods and finder function ** for proxy locking here. So we forward declare the I/O methods. */ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE static int proxyClose(sqlite3_file*); static int proxyLock(sqlite3_file*, int); static int proxyUnlock(sqlite3_file*, int); static int proxyCheckReservedLock(sqlite3_file*, int*); IOMETHODS( proxyIoFinder, /* Finder function name */ proxyIoMethods, /* sqlite3_io_methods object name */ 1, /* shared memory is disabled */ proxyClose, /* xClose method */ proxyLock, /* xLock method */ proxyUnlock, /* xUnlock method */ proxyCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) #endif /* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE IOMETHODS( nfsIoFinder, /* Finder function name */ nfsIoMethods, /* sqlite3_io_methods object name */ 1, /* shared memory is disabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ nfsUnlock, /* xUnlock method */ unixCheckReservedLock, /* xCheckReservedLock method */ 0 /* xShmMap method */ ) #endif #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE /* ** This "finder" function attempts to determine the best locking strategy ** for the database file "filePath". It then returns the sqlite3_io_methods ** object that implements that strategy. ** ** This is for MacOSX only. */ static const sqlite3_io_methods *autolockIoFinderImpl( const char *filePath, /* name of the database file */ unixFile *pNew /* open file object for the database file */ ){ static const struct Mapping { const char *zFilesystem; /* Filesystem type name */ const sqlite3_io_methods *pMethods; /* Appropriate locking method */ } aMap[] = { { "hfs", &posixIoMethods }, { "ufs", &posixIoMethods }, { "afpfs", &afpIoMethods }, { "smbfs", &afpIoMethods }, { "webdav", &nolockIoMethods }, { 0, 0 } }; int i; struct statfs fsInfo; struct flock lockInfo; if( !filePath ){ /* If filePath==NULL that means we are dealing with a transient file ** that does not need to be locked. */ return &nolockIoMethods; } if( statfs(filePath, &fsInfo) != -1 ){ if( fsInfo.f_flags & MNT_RDONLY ){ return &nolockIoMethods; } for(i=0; aMap[i].zFilesystem; i++){ if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){ return aMap[i].pMethods; } } } /* Default case. Handles, amongst others, "nfs". ** Test byte-range lock using fcntl(). If the call succeeds, ** assume that the file-system supports POSIX style locks. */ lockInfo.l_len = 1; lockInfo.l_start = 0; lockInfo.l_whence = SEEK_SET; lockInfo.l_type = F_RDLCK; if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){ return &nfsIoMethods; } else { return &posixIoMethods; } }else{ return &dotlockIoMethods; } } static const sqlite3_io_methods *(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl; #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ #if OS_VXWORKS /* ** This "finder" function for VxWorks checks to see if posix advisory ** locking works. If it does, then that is what is used. If it does not ** work, then fallback to named semaphore locking. */ static const sqlite3_io_methods *vxworksIoFinderImpl( const char *filePath, /* name of the database file */ unixFile *pNew /* the open file object */ ){ struct flock lockInfo; if( !filePath ){ /* If filePath==NULL that means we are dealing with a transient file ** that does not need to be locked. */ return &nolockIoMethods; } /* Test if fcntl() is supported and use POSIX style locks. ** Otherwise fall back to the named semaphore method. */ lockInfo.l_len = 1; lockInfo.l_start = 0; lockInfo.l_whence = SEEK_SET; lockInfo.l_type = F_RDLCK; if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) { return &posixIoMethods; }else{ return &semIoMethods; } } static const sqlite3_io_methods *(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl; #endif /* OS_VXWORKS */ /* ** An abstract type for a pointer to an IO method finder function: */ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); /**************************************************************************** **************************** sqlite3_vfs methods **************************** ** ** This division contains the implementation of methods on the ** sqlite3_vfs object. */ /* ** Initialize the contents of the unixFile structure pointed to by pId. */ static int fillInUnixFile( sqlite3_vfs *pVfs, /* Pointer to vfs object */ int h, /* Open file descriptor of file being opened */ sqlite3_file *pId, /* Write to the unixFile structure here */ const char *zFilename, /* Name of the file being opened */ int ctrlFlags /* Zero or more UNIXFILE_* values */ ){ const sqlite3_io_methods *pLockingStyle; unixFile *pNew = (unixFile *)pId; int rc = SQLITE_OK; assert( pNew->pInode==NULL ); /* Usually the path zFilename should not be a relative pathname. The ** exception is when opening the proxy "conch" file in builds that ** include the special Apple locking styles. */ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE assert( zFilename==0 || zFilename[0]=='/' || pVfs->pAppData==(void*)&autolockIoFinder ); #else assert( zFilename==0 || zFilename[0]=='/' ); #endif /* No locking occurs in temporary files */ assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; pNew->pVfs = pVfs; pNew->zPath = zFilename; pNew->ctrlFlags = (u8)ctrlFlags; #if SQLITE_MAX_MMAP_SIZE>0 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; #endif if( SQLITE_POWERSAFE_OVERWRITE ){ pNew->ctrlFlags |= UNIXFILE_PSOW; } if( strcmp(pVfs->zName,"unix-excl")==0 ){ pNew->ctrlFlags |= UNIXFILE_EXCL; } #if OS_VXWORKS pNew->pId = vxworksFindFileId(zFilename); if( pNew->pId==0 ){ ctrlFlags |= UNIXFILE_NOLOCK; rc = SQLITE_NOMEM; } #endif if( ctrlFlags & UNIXFILE_NOLOCK ){ pLockingStyle = &nolockIoMethods; }else{ pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); #if SQLITE_ENABLE_LOCKING_STYLE /* Cache zFilename in the locking context (AFP and dotlock override) for ** proxyLock activation is possible (remote proxy is based on db name) ** zFilename remains valid until file is closed, to support */ pNew->lockingContext = (void*)zFilename; #endif } if( pLockingStyle == &posixIoMethods #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE || pLockingStyle == &nfsIoMethods #endif ){ unixEnterMutex(); rc = findInodeInfo(pNew, &pNew->pInode); if( rc!=SQLITE_OK ){ /* If an error occurred in findInodeInfo(), close the file descriptor ** immediately, before releasing the mutex. findInodeInfo() may fail ** in two scenarios: ** ** (a) A call to fstat() failed. ** (b) A malloc failed. ** ** Scenario (b) may only occur if the process is holding no other ** file descriptors open on the same file. If there were other file ** descriptors on this file, then no malloc would be required by ** findInodeInfo(). If this is the case, it is quite safe to close ** handle h - as it is guaranteed that no posix locks will be released ** by doing so. ** ** If scenario (a) caused the error then things are not so safe. The ** implicit assumption here is that if fstat() fails, things are in ** such bad shape that dropping a lock or two doesn't matter much. */ robust_close(pNew, h, __LINE__); h = -1; } unixLeaveMutex(); } #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) else if( pLockingStyle == &afpIoMethods ){ /* AFP locking uses the file path so it needs to be included in ** the afpLockingContext. */ afpLockingContext *pCtx; pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) ); if( pCtx==0 ){ rc = SQLITE_NOMEM; }else{ /* NB: zFilename exists and remains valid until the file is closed ** according to requirement F11141. So we do not need to make a ** copy of the filename. */ pCtx->dbPath = zFilename; pCtx->reserved = 0; srandomdev(); unixEnterMutex(); rc = findInodeInfo(pNew, &pNew->pInode); if( rc!=SQLITE_OK ){ sqlite3_free(pNew->lockingContext); robust_close(pNew, h, __LINE__); h = -1; } unixLeaveMutex(); } } #endif else if( pLockingStyle == &dotlockIoMethods ){ /* Dotfile locking uses the file path so it needs to be included in ** the dotlockLockingContext */ char *zLockFile; int nFilename; assert( zFilename!=0 ); nFilename = (int)strlen(zFilename) + 6; zLockFile = (char *)sqlite3_malloc64(nFilename); if( zLockFile==0 ){ rc = SQLITE_NOMEM; }else{ sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename); } pNew->lockingContext = zLockFile; } #if OS_VXWORKS else if( pLockingStyle == &semIoMethods ){ /* Named semaphore locking uses the file path so it needs to be ** included in the semLockingContext */ unixEnterMutex(); rc = findInodeInfo(pNew, &pNew->pInode); if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){ char *zSemName = pNew->pInode->aSemName; int n; sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem", pNew->pId->zCanonicalName); for( n=1; zSemName[n]; n++ ) if( zSemName[n]=='/' ) zSemName[n] = '_'; pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1); if( pNew->pInode->pSem == SEM_FAILED ){ rc = SQLITE_NOMEM; pNew->pInode->aSemName[0] = '\0'; } } unixLeaveMutex(); } #endif storeLastErrno(pNew, 0); #if OS_VXWORKS if( rc!=SQLITE_OK ){ if( h>=0 ) robust_close(pNew, h, __LINE__); h = -1; osUnlink(zFilename); pNew->ctrlFlags |= UNIXFILE_DELETE; } #endif if( rc!=SQLITE_OK ){ if( h>=0 ) robust_close(pNew, h, __LINE__); }else{ pNew->pMethod = pLockingStyle; OpenCounter(+1); verifyDbFile(pNew); } return rc; } /* ** Return the name of a directory in which to put temporary files. ** If no suitable temporary file directory can be found, return NULL. */ static const char *unixTempFileDir(void){ static const char *azDirs[] = { 0, 0, 0, "/var/tmp", "/usr/tmp", "/tmp", 0 /* List terminator */ }; unsigned int i; struct stat buf; const char *zDir = 0; azDirs[0] = sqlite3_temp_directory; if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR"); if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR"); for(i=0; imxPathname bytes. */ static int unixGetTempname(int nBuf, char *zBuf){ static const unsigned char zChars[] = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789"; unsigned int i, j; const char *zDir; /* It's odd to simulate an io-error here, but really this is just ** using the io-error infrastructure to test that SQLite handles this ** function failing. */ SimulateIOError( return SQLITE_IOERR ); zDir = unixTempFileDir(); if( zDir==0 ) zDir = "."; /* Check that the output buffer is large enough for the temporary file ** name. If it is not, return SQLITE_ERROR. */ if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){ return SQLITE_ERROR; } do{ sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); j = (int)strlen(zBuf); sqlite3_randomness(15, &zBuf[j]); for(i=0; i<15; i++, j++){ zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; } zBuf[j] = 0; zBuf[j+1] = 0; }while( osAccess(zBuf,0)==0 ); return SQLITE_OK; } #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) /* ** Routine to transform a unixFile into a proxy-locking unixFile. ** Implementation in the proxy-lock division, but used by unixOpen() ** if SQLITE_PREFER_PROXY_LOCKING is defined. */ static int proxyTransformUnixFile(unixFile*, const char*); #endif /* ** Search for an unused file descriptor that was opened on the database ** file (not a journal or master-journal file) identified by pathname ** zPath with SQLITE_OPEN_XXX flags matching those passed as the second ** argument to this function. ** ** Such a file descriptor may exist if a database connection was closed ** but the associated file descriptor could not be closed because some ** other file descriptor open on the same file is holding a file-lock. ** Refer to comments in the unixClose() function and the lengthy comment ** describing "Posix Advisory Locking" at the start of this file for ** further details. Also, ticket #4018. ** ** If a suitable file descriptor is found, then it is returned. If no ** such file descriptor is located, -1 is returned. */ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ UnixUnusedFd *pUnused = 0; /* Do not search for an unused file descriptor on vxworks. Not because ** vxworks would not benefit from the change (it might, we're not sure), ** but because no way to test it is currently available. It is better ** not to risk breaking vxworks support for the sake of such an obscure ** feature. */ #if !OS_VXWORKS struct stat sStat; /* Results of stat() call */ /* A stat() call may fail for various reasons. If this happens, it is ** almost certain that an open() call on the same path will also fail. ** For this reason, if an error occurs in the stat() call here, it is ** ignored and -1 is returned. The caller will try to open a new file ** descriptor on the same path, fail, and return an error to SQLite. ** ** Even if a subsequent open() call does succeed, the consequences of ** not searching for a reusable file descriptor are not dire. */ if( 0==osStat(zPath, &sStat) ){ unixInodeInfo *pInode; unixEnterMutex(); pInode = inodeList; while( pInode && (pInode->fileId.dev!=sStat.st_dev || pInode->fileId.ino!=sStat.st_ino) ){ pInode = pInode->pNext; } if( pInode ){ UnixUnusedFd **pp; for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); pUnused = *pp; if( pUnused ){ *pp = pUnused->pNext; } } unixLeaveMutex(); } #endif /* if !OS_VXWORKS */ return pUnused; } /* ** This function is called by unixOpen() to determine the unix permissions ** to create new files with. If no error occurs, then SQLITE_OK is returned ** and a value suitable for passing as the third argument to open(2) is ** written to *pMode. If an IO error occurs, an SQLite error code is ** returned and the value of *pMode is not modified. ** ** In most cases, this routine sets *pMode to 0, which will become ** an indication to robust_open() to create the file using ** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. ** But if the file being opened is a WAL or regular journal file, then ** this function queries the file-system for the permissions on the ** corresponding database file and sets *pMode to this value. Whenever ** possible, WAL and journal files are created using the same permissions ** as the associated database file. */ static int findCreateFileMode( const char *zPath, /* Path of file (possibly) being created */ int flags, /* Flags passed as 4th argument to xOpen() */ mode_t *pMode, /* OUT: Permissions to open file with */ uid_t *pUid, /* OUT: uid to set on the file */ gid_t *pGid /* OUT: gid to set on the file */ ){ int rc = SQLITE_OK; /* Return Code */ *pMode = 0; *pUid = 0; *pGid = 0; if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ char zDb[MAX_PATHNAME+1]; /* Database file path */ int nDb; /* Number of valid bytes in zDb */ struct stat sStat; /* Output of stat() on database file */ /* zPath is a path to a WAL or journal file. The following block derives ** the path to the associated database file from zPath. This block handles ** the following naming conventions: ** ** "-journal" ** "-wal" ** "-journalNN" ** "-walNN" ** ** where NN is a decimal number. The NN naming schemes are ** used by the test_multiplex.c module. */ nDb = sqlite3Strlen30(zPath) - 1; while( zPath[nDb]!='-' ){ assert( nDb>0 ); assert( zPath[nDb]!='\n' ); nDb--; } memcpy(zDb, zPath, nDb); zDb[nDb] = '\0'; if( 0==osStat(zDb, &sStat) ){ *pMode = sStat.st_mode & 0777; *pUid = sStat.st_uid; *pGid = sStat.st_gid; }else{ rc = SQLITE_IOERR_FSTAT; } }else if( flags & SQLITE_OPEN_DELETEONCLOSE ){ *pMode = 0600; } return rc; } /* ** Open the file zPath. ** ** Previously, the SQLite OS layer used three functions in place of this ** one: ** ** sqlite3OsOpenReadWrite(); ** sqlite3OsOpenReadOnly(); ** sqlite3OsOpenExclusive(); ** ** These calls correspond to the following combinations of flags: ** ** ReadWrite() -> (READWRITE | CREATE) ** ReadOnly() -> (READONLY) ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) ** ** The old OpenExclusive() accepted a boolean argument - "delFlag". If ** true, the file was configured to be automatically deleted when the ** file handle closed. To achieve the same effect using this new ** interface, add the DELETEONCLOSE flag to those specified above for ** OpenExclusive(). */ static int unixOpen( sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */ const char *zPath, /* Pathname of file to be opened */ sqlite3_file *pFile, /* The file descriptor to be filled in */ int flags, /* Input flags to control the opening */ int *pOutFlags /* Output flags returned to SQLite core */ ){ unixFile *p = (unixFile *)pFile; int fd = -1; /* File descriptor returned by open() */ int openFlags = 0; /* Flags to pass to open() */ int eType = flags&0xFFFFFF00; /* Type of file to open */ int noLock; /* True to omit locking primitives */ int rc = SQLITE_OK; /* Function Return Code */ int ctrlFlags = 0; /* UNIXFILE_* flags */ int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); int isCreate = (flags & SQLITE_OPEN_CREATE); int isReadonly = (flags & SQLITE_OPEN_READONLY); int isReadWrite = (flags & SQLITE_OPEN_READWRITE); #if SQLITE_ENABLE_LOCKING_STYLE int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY); #endif #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE struct statfs fsInfo; #endif /* If creating a master or main-file journal, this function will open ** a file-descriptor on the directory too. The first time unixSync() ** is called the directory file descriptor will be fsync()ed and close()d. */ int syncDir = (isCreate && ( eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_WAL )); /* If argument zPath is a NULL pointer, this function is required to open ** a temporary file. Use this buffer to store the file name in. */ char zTmpname[MAX_PATHNAME+2]; const char *zName = zPath; /* Check the following statements are true: ** ** (a) Exactly one of the READWRITE and READONLY flags must be set, and ** (b) if CREATE is set, then READWRITE must also be set, and ** (c) if EXCLUSIVE is set, then CREATE must also be set. ** (d) if DELETEONCLOSE is set, then CREATE must also be set. */ assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); assert(isCreate==0 || isReadWrite); assert(isExclusive==0 || isCreate); assert(isDelete==0 || isCreate); /* The main DB, main journal, WAL file and master journal are never ** automatically deleted. Nor are they ever temporary files. */ assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); /* Assert that the upper layer has set one of the "file-type" flags. */ assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL ); /* Detect a pid change and reset the PRNG. There is a race condition ** here such that two or more threads all trying to open databases at ** the same instant might all reset the PRNG. But multiple resets ** are harmless. */ if( randomnessPid!=osGetpid(0) ){ randomnessPid = osGetpid(0); sqlite3_randomness(0,0); } memset(p, 0, sizeof(unixFile)); if( eType==SQLITE_OPEN_MAIN_DB ){ UnixUnusedFd *pUnused; pUnused = findReusableFd(zName, flags); if( pUnused ){ fd = pUnused->fd; }else{ pUnused = sqlite3_malloc64(sizeof(*pUnused)); if( !pUnused ){ return SQLITE_NOMEM; } } p->pUnused = pUnused; /* Database filenames are double-zero terminated if they are not ** URIs with parameters. Hence, they can always be passed into ** sqlite3_uri_parameter(). */ assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); }else if( !zName ){ /* If zName is NULL, the upper layer is requesting a temp file. */ assert(isDelete && !syncDir); rc = unixGetTempname(MAX_PATHNAME+2, zTmpname); if( rc!=SQLITE_OK ){ return rc; } zName = zTmpname; /* Generated temporary filenames are always double-zero terminated ** for use by sqlite3_uri_parameter(). */ assert( zName[strlen(zName)+1]==0 ); } /* Determine the value of the flags parameter passed to POSIX function ** open(). These must be calculated even if open() is not called, as ** they may be stored as part of the file handle and used by the ** 'conch file' locking functions later on. */ if( isReadonly ) openFlags |= O_RDONLY; if( isReadWrite ) openFlags |= O_RDWR; if( isCreate ) openFlags |= O_CREAT; if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); openFlags |= (O_LARGEFILE|O_BINARY); if( fd<0 ){ mode_t openMode; /* Permissions to create file with */ uid_t uid; /* Userid for the file */ gid_t gid; /* Groupid for the file */ rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); if( rc!=SQLITE_OK ){ assert( !p->pUnused ); assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); return rc; } fd = robust_open(zName, openFlags, openMode); OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags)); if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){ /* Failed to open the file for read/write access. Try read-only. */ flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE); openFlags &= ~(O_RDWR|O_CREAT); flags |= SQLITE_OPEN_READONLY; openFlags |= O_RDONLY; isReadonly = 1; fd = robust_open(zName, openFlags, openMode); } if( fd<0 ){ rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); goto open_finished; } /* If this process is running as root and if creating a new rollback ** journal or WAL file, set the ownership of the journal or WAL to be ** the same as the original database. */ if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ osFchown(fd, uid, gid); } } assert( fd>=0 ); if( pOutFlags ){ *pOutFlags = flags; } if( p->pUnused ){ p->pUnused->fd = fd; p->pUnused->flags = flags; } if( isDelete ){ #if OS_VXWORKS zPath = zName; #elif defined(SQLITE_UNLINK_AFTER_CLOSE) zPath = sqlite3_mprintf("%s", zName); if( zPath==0 ){ robust_close(p, fd, __LINE__); return SQLITE_NOMEM; } #else osUnlink(zName); #endif } #if SQLITE_ENABLE_LOCKING_STYLE else{ p->openFlags = openFlags; } #endif noLock = eType!=SQLITE_OPEN_MAIN_DB; #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE if( fstatfs(fd, &fsInfo) == -1 ){ storeLastErrno(p, errno); robust_close(p, fd, __LINE__); return SQLITE_IOERR_ACCESS; } if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) { ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; } if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) { ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; } #endif /* Set up appropriate ctrlFlags */ if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC; if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; #if SQLITE_ENABLE_LOCKING_STYLE #if SQLITE_PREFER_PROXY_LOCKING isAutoProxy = 1; #endif if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){ char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING"); int useProxy = 0; /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means ** never use proxy, NULL means use proxy for non-local files only. */ if( envforce!=NULL ){ useProxy = atoi(envforce)>0; }else{ useProxy = !(fsInfo.f_flags&MNT_LOCAL); } if( useProxy ){ rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); if( rc==SQLITE_OK ){ rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); if( rc!=SQLITE_OK ){ /* Use unixClose to clean up the resources added in fillInUnixFile ** and clear all the structure's references. Specifically, ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op */ unixClose(pFile); return rc; } } goto open_finished; } } #endif rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); open_finished: if( rc!=SQLITE_OK ){ sqlite3_free(p->pUnused); } return rc; } /* ** Delete the file at zPath. If the dirSync argument is true, fsync() ** the directory after deleting the file. */ static int unixDelete( sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */ const char *zPath, /* Name of file to be deleted */ int dirSync /* If true, fsync() directory after deleting file */ ){ int rc = SQLITE_OK; UNUSED_PARAMETER(NotUsed); SimulateIOError(return SQLITE_IOERR_DELETE); if( osUnlink(zPath)==(-1) ){ if( errno==ENOENT #if OS_VXWORKS || osAccess(zPath,0)!=0 #endif ){ rc = SQLITE_IOERR_DELETE_NOENT; }else{ rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); } return rc; } #ifndef SQLITE_DISABLE_DIRSYNC if( (dirSync & 1)!=0 ){ int fd; rc = osOpenDirectory(zPath, &fd); if( rc==SQLITE_OK ){ #if OS_VXWORKS if( fsync(fd)==-1 ) #else if( fsync(fd) ) #endif { rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath); } robust_close(0, fd, __LINE__); }else if( rc==SQLITE_CANTOPEN ){ rc = SQLITE_OK; } } #endif return rc; } /* ** Test the existence of or access permissions of file zPath. The ** test performed depends on the value of flags: ** ** SQLITE_ACCESS_EXISTS: Return 1 if the file exists ** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. ** SQLITE_ACCESS_READONLY: Return 1 if the file is readable. ** ** Otherwise return 0. */ static int unixAccess( sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */ const char *zPath, /* Path of the file to examine */ int flags, /* What do we want to learn about the zPath file? */ int *pResOut /* Write result boolean here */ ){ int amode = 0; UNUSED_PARAMETER(NotUsed); SimulateIOError( return SQLITE_IOERR_ACCESS; ); switch( flags ){ case SQLITE_ACCESS_EXISTS: amode = F_OK; break; case SQLITE_ACCESS_READWRITE: amode = W_OK|R_OK; break; case SQLITE_ACCESS_READ: amode = R_OK; break; default: assert(!"Invalid flags argument"); } *pResOut = (osAccess(zPath, amode)==0); if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){ struct stat buf; if( 0==osStat(zPath, &buf) && buf.st_size==0 ){ *pResOut = 0; } } return SQLITE_OK; } /* ** Turn a relative pathname into a full pathname. The relative path ** is stored as a nul-terminated string in the buffer pointed to by ** zPath. ** ** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes ** (in this case, MAX_PATHNAME bytes). The full-path is written to ** this buffer before returning. */ static int unixFullPathname( sqlite3_vfs *pVfs, /* Pointer to vfs object */ const char *zPath, /* Possibly relative input path */ int nOut, /* Size of output buffer in bytes */ char *zOut /* Output buffer */ ){ /* It's odd to simulate an io-error here, but really this is just ** using the io-error infrastructure to test that SQLite handles this ** function failing. This function could fail if, for example, the ** current working directory has been unlinked. */ SimulateIOError( return SQLITE_ERROR ); assert( pVfs->mxPathname==MAX_PATHNAME ); UNUSED_PARAMETER(pVfs); zOut[nOut-1] = '\0'; if( zPath[0]=='/' ){ sqlite3_snprintf(nOut, zOut, "%s", zPath); }else{ int nCwd; if( osGetcwd(zOut, nOut-1)==0 ){ return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath); } nCwd = (int)strlen(zOut); sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath); } return SQLITE_OK; } #define unixDlOpen 0 #define unixDlError 0 #define unixDlSym 0 #define unixDlClose 0 /* ** Write nBuf bytes of random data to the supplied buffer zBuf. */ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ UNUSED_PARAMETER(NotUsed); assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int))); /* We have to initialize zBuf to prevent valgrind from reporting ** errors. The reports issued by valgrind are incorrect - we would ** prefer that the randomness be increased by making use of the ** uninitialized space in zBuf - but valgrind errors tend to worry ** some users. Rather than argue, it seems easier just to initialize ** the whole array and silence valgrind, even if that means less randomness ** in the random seed. ** ** When testing, initializing zBuf[] to zero is all we do. That means ** that we always use the same random number sequence. This makes the ** tests repeatable. */ memset(zBuf, 0, nBuf); randomnessPid = osGetpid(0); #if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS) { int fd, got; fd = robust_open("/dev/urandom", O_RDONLY, 0); if( fd<0 ){ time_t t; time(&t); memcpy(zBuf, &t, sizeof(t)); memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid)); assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf ); nBuf = sizeof(t) + sizeof(randomnessPid); }else{ do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); robust_close(0, fd, __LINE__); } } #endif return nBuf; } /* ** Sleep for a little while. Return the amount of time slept. ** The argument is the number of microseconds we want to sleep. ** The return value is the number of microseconds of sleep actually ** requested from the underlying operating system, a number which ** might be greater than or equal to the argument, but not less ** than the argument. */ static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){ #if OS_VXWORKS struct timespec sp; sp.tv_sec = microseconds / 1000000; sp.tv_nsec = (microseconds % 1000000) * 1000; nanosleep(&sp, NULL); UNUSED_PARAMETER(NotUsed); return microseconds; #elif defined(HAVE_USLEEP) && HAVE_USLEEP usleep(microseconds); UNUSED_PARAMETER(NotUsed); return microseconds; #else int seconds = (microseconds+999999)/1000000; sleep(seconds); UNUSED_PARAMETER(NotUsed); return seconds*1000000; #endif } /* ** The following variable, if set to a non-zero value, is interpreted as ** the number of seconds since 1970 and is used to set the result of ** sqlite3OsCurrentTime() during testing. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ #endif /* ** Find the current time (in Universal Coordinated Time). Write into *piNow ** the current time and date as a Julian Day number times 86_400_000. In ** other words, write into *piNow the number of milliseconds since the Julian ** epoch of noon in Greenwich on November 24, 4714 B.C according to the ** proleptic Gregorian calendar. ** ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date ** cannot be found. */ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){ static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; int rc = SQLITE_OK; #if defined(NO_GETTOD) time_t t; time(&t); *piNow = ((sqlite3_int64)t)*1000 + unixEpoch; #elif OS_VXWORKS struct timespec sNow; clock_gettime(CLOCK_REALTIME, &sNow); *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000; #else struct timeval sNow; if( gettimeofday(&sNow, 0)==0 ){ *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000; }else{ rc = SQLITE_ERROR; } #endif #ifdef SQLITE_TEST if( sqlite3_current_time ){ *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; } #endif UNUSED_PARAMETER(NotUsed); return rc; } /* ** Find the current time (in Universal Coordinated Time). Write the ** current time and date as a Julian Day number into *prNow and ** return 0. Return 1 if the time and date cannot be found. */ static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){ sqlite3_int64 i = 0; int rc; UNUSED_PARAMETER(NotUsed); rc = unixCurrentTimeInt64(0, &i); *prNow = i/86400000.0; return rc; } /* ** We added the xGetLastError() method with the intention of providing ** better low-level error messages when operating-system problems come up ** during SQLite operation. But so far, none of that has been implemented ** in the core. So this routine is never called. For now, it is merely ** a place-holder. */ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){ UNUSED_PARAMETER(NotUsed); UNUSED_PARAMETER(NotUsed2); UNUSED_PARAMETER(NotUsed3); return 0; } /* ************************ End of sqlite3_vfs methods *************************** ******************************************************************************/ /****************************************************************************** ************************** Begin Proxy Locking ******************************** ** ** Proxy locking is a "uber-locking-method" in this sense: It uses the ** other locking methods on secondary lock files. Proxy locking is a ** meta-layer over top of the primitive locking implemented above. For ** this reason, the division that implements of proxy locking is deferred ** until late in the file (here) after all of the other I/O methods have ** been defined - so that the primitive locking methods are available ** as services to help with the implementation of proxy locking. ** **** ** ** The default locking schemes in SQLite use byte-range locks on the ** database file to coordinate safe, concurrent access by multiple readers ** and writers [http://sqlite.org/lockingv3.html]. The five file locking ** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented ** as POSIX read & write locks over fixed set of locations (via fsctl), ** on AFP and SMB only exclusive byte-range locks are available via fsctl ** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states. ** To simulate a F_RDLCK on the shared range, on AFP a randomly selected ** address in the shared range is taken for a SHARED lock, the entire ** shared range is taken for an EXCLUSIVE lock): ** ** PENDING_BYTE 0x40000000 ** RESERVED_BYTE 0x40000001 ** SHARED_RANGE 0x40000002 -> 0x40000200 ** ** This works well on the local file system, but shows a nearly 100x ** slowdown in read performance on AFP because the AFP client disables ** the read cache when byte-range locks are present. Enabling the read ** cache exposes a cache coherency problem that is present on all OS X ** supported network file systems. NFS and AFP both observe the ** close-to-open semantics for ensuring cache coherency ** [http://nfs.sourceforge.net/#faq_a8], which does not effectively ** address the requirements for concurrent database access by multiple ** readers and writers ** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html]. ** ** To address the performance and cache coherency issues, proxy file locking ** changes the way database access is controlled by limiting access to a ** single host at a time and moving file locks off of the database file ** and onto a proxy file on the local file system. ** ** ** Using proxy locks ** ----------------- ** ** C APIs ** ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE, ** | ":auto:"); ** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE, ** &); ** ** ** SQL pragmas ** ** PRAGMA [database.]lock_proxy_file= | :auto: ** PRAGMA [database.]lock_proxy_file ** ** Specifying ":auto:" means that if there is a conch file with a matching ** host ID in it, the proxy path in the conch file will be used, otherwise ** a proxy path based on the user's temp dir ** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the ** actual proxy file name is generated from the name and path of the ** database file. For example: ** ** For database path "/Users/me/foo.db" ** The lock path will be "/sqliteplocks/_Users_me_foo.db:auto:") ** ** Once a lock proxy is configured for a database connection, it can not ** be removed, however it may be switched to a different proxy path via ** the above APIs (assuming the conch file is not being held by another ** connection or process). ** ** ** How proxy locking works ** ----------------------- ** ** Proxy file locking relies primarily on two new supporting files: ** ** * conch file to limit access to the database file to a single host ** at a time ** ** * proxy file to act as a proxy for the advisory locks normally ** taken on the database ** ** The conch file - to use a proxy file, sqlite must first "hold the conch" ** by taking an sqlite-style shared lock on the conch file, reading the ** contents and comparing the host's unique host ID (see below) and lock ** proxy path against the values stored in the conch. The conch file is ** stored in the same directory as the database file and the file name ** is patterned after the database file name as ".-conch". ** If the conch file does not exist, or its contents do not match the ** host ID and/or proxy path, then the lock is escalated to an exclusive ** lock and the conch file contents is updated with the host ID and proxy ** path and the lock is downgraded to a shared lock again. If the conch ** is held by another process (with a shared lock), the exclusive lock ** will fail and SQLITE_BUSY is returned. ** ** The proxy file - a single-byte file used for all advisory file locks ** normally taken on the database file. This allows for safe sharing ** of the database file for multiple readers and writers on the same ** host (the conch ensures that they all use the same local lock file). ** ** Requesting the lock proxy does not immediately take the conch, it is ** only taken when the first request to lock database file is made. ** This matches the semantics of the traditional locking behavior, where ** opening a connection to a database file does not take a lock on it. ** The shared lock and an open file descriptor are maintained until ** the connection to the database is closed. ** ** The proxy file and the lock file are never deleted so they only need ** to be created the first time they are used. ** ** Configuration options ** --------------------- ** ** SQLITE_PREFER_PROXY_LOCKING ** ** Database files accessed on non-local file systems are ** automatically configured for proxy locking, lock files are ** named automatically using the same logic as ** PRAGMA lock_proxy_file=":auto:" ** ** SQLITE_PROXY_DEBUG ** ** Enables the logging of error messages during host id file ** retrieval and creation ** ** LOCKPROXYDIR ** ** Overrides the default directory used for lock proxy files that ** are named automatically via the ":auto:" setting ** ** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS ** ** Permissions to use when creating a directory for storing the ** lock proxy files, only used when LOCKPROXYDIR is not set. ** ** ** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING, ** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will ** force proxy locking to be used for every database file opened, and 0 ** will force automatic proxy locking to be disabled for all database ** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or ** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING). */ /* ** Proxy locking is only available on MacOSX */ #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE /* ** The proxyLockingContext has the path and file structures for the remote ** and local proxy files in it */ typedef struct proxyLockingContext proxyLockingContext; struct proxyLockingContext { unixFile *conchFile; /* Open conch file */ char *conchFilePath; /* Name of the conch file */ unixFile *lockProxy; /* Open proxy lock file */ char *lockProxyPath; /* Name of the proxy lock file */ char *dbPath; /* Name of the open file */ int conchHeld; /* 1 if the conch is held, -1 if lockless */ int nFails; /* Number of conch taking failures */ void *oldLockingContext; /* Original lockingcontext to restore on close */ sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */ }; /* ** The proxy lock file path for the database at dbPath is written into lPath, ** which must point to valid, writable memory large enough for a maxLen length ** file path. */ static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){ int len; int dbLen; int i; #ifdef LOCKPROXYDIR len = strlcpy(lPath, LOCKPROXYDIR, maxLen); #else # ifdef _CS_DARWIN_USER_TEMP_DIR { if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){ OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n", lPath, errno, osGetpid(0))); return SQLITE_IOERR_LOCK; } len = strlcat(lPath, "sqliteplocks", maxLen); } # else len = strlcpy(lPath, "/tmp/", maxLen); # endif #endif if( lPath[len-1]!='/' ){ len = strlcat(lPath, "/", maxLen); } /* transform the db path to a unique cache name */ dbLen = (int)strlen(dbPath); for( i=0; i 0) ){ /* only mkdir if leaf dir != "." or "/" or ".." */ if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ buf[i]='\0'; if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ int err=errno; if( err!=EEXIST ) { OSTRACE(("CREATELOCKPATH FAILED creating %s, " "'%s' proxy lock path=%s pid=%d\n", buf, strerror(err), lockPath, osGetpid(0))); return err; } } } start=i+1; } buf[i] = lockPath[i]; } OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n", lockPath, osGetpid(0))); return 0; } /* ** Create a new VFS file descriptor (stored in memory obtained from ** sqlite3_malloc) and open the file named "path" in the file descriptor. ** ** The caller is responsible not only for closing the file descriptor ** but also for freeing the memory associated with the file descriptor. */ static int proxyCreateUnixFile( const char *path, /* path for the new unixFile */ unixFile **ppFile, /* unixFile created and returned by ref */ int islockfile /* if non zero missing dirs will be created */ ) { int fd = -1; unixFile *pNew; int rc = SQLITE_OK; int openFlags = O_RDWR | O_CREAT; sqlite3_vfs dummyVfs; int terrno = 0; UnixUnusedFd *pUnused = NULL; /* 1. first try to open/create the file ** 2. if that fails, and this is a lock file (not-conch), try creating ** the parent directories and then try again. ** 3. if that fails, try to open the file read-only ** otherwise return BUSY (if lock file) or CANTOPEN for the conch file */ pUnused = findReusableFd(path, openFlags); if( pUnused ){ fd = pUnused->fd; }else{ pUnused = sqlite3_malloc64(sizeof(*pUnused)); if( !pUnused ){ return SQLITE_NOMEM; } } if( fd<0 ){ fd = robust_open(path, openFlags, 0); terrno = errno; if( fd<0 && errno==ENOENT && islockfile ){ if( proxyCreateLockPath(path) == SQLITE_OK ){ fd = robust_open(path, openFlags, 0); } } } if( fd<0 ){ openFlags = O_RDONLY; fd = robust_open(path, openFlags, 0); terrno = errno; } if( fd<0 ){ if( islockfile ){ return SQLITE_BUSY; } switch (terrno) { case EACCES: return SQLITE_PERM; case EIO: return SQLITE_IOERR_LOCK; /* even though it is the conch */ default: return SQLITE_CANTOPEN_BKPT; } } pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew)); if( pNew==NULL ){ rc = SQLITE_NOMEM; goto end_create_proxy; } memset(pNew, 0, sizeof(unixFile)); pNew->openFlags = openFlags; memset(&dummyVfs, 0, sizeof(dummyVfs)); dummyVfs.pAppData = (void*)&autolockIoFinder; dummyVfs.zName = "dummy"; pUnused->fd = fd; pUnused->flags = openFlags; pNew->pUnused = pUnused; rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); if( rc==SQLITE_OK ){ *ppFile = pNew; return SQLITE_OK; } end_create_proxy: robust_close(pNew, fd, __LINE__); sqlite3_free(pNew); sqlite3_free(pUnused); return rc; } #ifdef SQLITE_TEST /* simulate multiple hosts by creating unique hostid file paths */ SQLITE_PRIVATE int sqlite3_hostid_num = 0; #endif #define PROXY_HOSTIDLEN 16 /* conch file host id length */ #ifdef HAVE_GETHOSTUUID /* Not always defined in the headers as it ought to be */ extern int gethostuuid(uuid_t id, const struct timespec *wait); #endif /* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN ** bytes of writable memory. */ static int proxyGetHostID(unsigned char *pHostID, int *pError){ assert(PROXY_HOSTIDLEN == sizeof(uuid_t)); memset(pHostID, 0, PROXY_HOSTIDLEN); #ifdef HAVE_GETHOSTUUID { struct timespec timeout = {1, 0}; /* 1 sec timeout */ if( gethostuuid(pHostID, &timeout) ){ int err = errno; if( pError ){ *pError = err; } return SQLITE_IOERR; } } #else UNUSED_PARAMETER(pError); #endif #ifdef SQLITE_TEST /* simulate multiple hosts by creating unique hostid file paths */ if( sqlite3_hostid_num != 0){ pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF)); } #endif return SQLITE_OK; } /* The conch file contains the header, host id and lock file path */ #define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */ #define PROXY_HEADERLEN 1 /* conch file header length */ #define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN) #define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN) /* ** Takes an open conch file, copies the contents to a new path and then moves ** it back. The newly created file's file descriptor is assigned to the ** conch file structure and finally the original conch file descriptor is ** closed. Returns zero if successful. */ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; unixFile *conchFile = pCtx->conchFile; char tPath[MAXPATHLEN]; char buf[PROXY_MAXCONCHLEN]; char *cPath = pCtx->conchFilePath; size_t readLen = 0; size_t pathLen = 0; char errmsg[64] = ""; int fd = -1; int rc = -1; UNUSED_PARAMETER(myHostID); /* create a new path by replace the trailing '-conch' with '-break' */ pathLen = strlcpy(tPath, cPath, MAXPATHLEN); if( pathLen>MAXPATHLEN || pathLen<6 || (strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){ sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen); goto end_breaklock; } /* read the conch content */ readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0); if( readLenh, __LINE__); conchFile->h = fd; conchFile->openFlags = O_RDWR | O_CREAT; end_breaklock: if( rc ){ if( fd>=0 ){ osUnlink(tPath); robust_close(pFile, fd, __LINE__); } fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg); } return rc; } /* Take the requested lock on the conch file and break a stale lock if the ** host id matches. */ static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; unixFile *conchFile = pCtx->conchFile; int rc = SQLITE_OK; int nTries = 0; struct timespec conchModTime; memset(&conchModTime, 0, sizeof(conchModTime)); do { rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); nTries ++; if( rc==SQLITE_BUSY ){ /* If the lock failed (busy): * 1st try: get the mod time of the conch, wait 0.5s and try again. * 2nd try: fail if the mod time changed or host id is different, wait * 10 sec and try again * 3rd try: break the lock unless the mod time has changed. */ struct stat buf; if( osFstat(conchFile->h, &buf) ){ storeLastErrno(pFile, errno); return SQLITE_IOERR_LOCK; } if( nTries==1 ){ conchModTime = buf.st_mtimespec; usleep(500000); /* wait 0.5 sec and try the lock again*/ continue; } assert( nTries>1 ); if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec || conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){ return SQLITE_BUSY; } if( nTries==2 ){ char tBuf[PROXY_MAXCONCHLEN]; int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0); if( len<0 ){ storeLastErrno(pFile, errno); return SQLITE_IOERR_LOCK; } if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){ /* don't break the lock if the host id doesn't match */ if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){ return SQLITE_BUSY; } }else{ /* don't break the lock on short read or a version mismatch */ return SQLITE_BUSY; } usleep(10000000); /* wait 10 sec and try the lock again */ continue; } assert( nTries==3 ); if( 0==proxyBreakConchLock(pFile, myHostID) ){ rc = SQLITE_OK; if( lockType==EXCLUSIVE_LOCK ){ rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK); } if( !rc ){ rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType); } } } } while( rc==SQLITE_BUSY && nTries<3 ); return rc; } /* Takes the conch by taking a shared lock and read the contents conch, if ** lockPath is non-NULL, the host ID and lock file path must match. A NULL ** lockPath means that the lockPath in the conch file will be used if the ** host IDs match, or a new lock path will be generated automatically ** and written to the conch file. */ static int proxyTakeConch(unixFile *pFile){ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; if( pCtx->conchHeld!=0 ){ return SQLITE_OK; }else{ unixFile *conchFile = pCtx->conchFile; uuid_t myHostID; int pError = 0; char readBuf[PROXY_MAXCONCHLEN]; char lockPath[MAXPATHLEN]; char *tempLockPath = NULL; int rc = SQLITE_OK; int createConch = 0; int hostIdMatch = 0; int readLen = 0; int tryOldLockPath = 0; int forceNewLockPath = 0; OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h, (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), osGetpid(0))); rc = proxyGetHostID(myHostID, &pError); if( (rc&0xff)==SQLITE_IOERR ){ storeLastErrno(pFile, pError); goto end_takeconch; } rc = proxyConchLock(pFile, myHostID, SHARED_LOCK); if( rc!=SQLITE_OK ){ goto end_takeconch; } /* read the existing conch file */ readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN); if( readLen<0 ){ /* I/O error: lastErrno set by seekAndRead */ storeLastErrno(pFile, conchFile->lastErrno); rc = SQLITE_IOERR_READ; goto end_takeconch; }else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) || readBuf[0]!=(char)PROXY_CONCHVERSION ){ /* a short read or version format mismatch means we need to create a new ** conch file. */ createConch = 1; } /* if the host id matches and the lock path already exists in the conch ** we'll try to use the path there, if we can't open that path, we'll ** retry with a new auto-generated path */ do { /* in case we need to try again for an :auto: named lock file */ if( !createConch && !forceNewLockPath ){ hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); /* if the conch has data compare the contents */ if( !pCtx->lockProxyPath ){ /* for auto-named local lock file, just check the host ID and we'll ** use the local lock file path that's already in there */ if( hostIdMatch ){ size_t pathLen = (readLen - PROXY_PATHINDEX); if( pathLen>=MAXPATHLEN ){ pathLen=MAXPATHLEN-1; } memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen); lockPath[pathLen] = 0; tempLockPath = lockPath; tryOldLockPath = 1; /* create a copy of the lock path if the conch is taken */ goto end_takeconch; } }else if( hostIdMatch && !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX], readLen-PROXY_PATHINDEX) ){ /* conch host and lock path match */ goto end_takeconch; } } /* if the conch isn't writable and doesn't match, we can't take it */ if( (conchFile->openFlags&O_RDWR) == 0 ){ rc = SQLITE_BUSY; goto end_takeconch; } /* either the conch didn't match or we need to create a new one */ if( !pCtx->lockProxyPath ){ proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN); tempLockPath = lockPath; /* create a copy of the lock path _only_ if the conch is taken */ } /* update conch with host and path (this will fail if other process ** has a shared lock already), if the host id matches, use the big ** stick. */ futimes(conchFile->h, NULL); if( hostIdMatch && !createConch ){ if( conchFile->pInode && conchFile->pInode->nShared>1 ){ /* We are trying for an exclusive lock but another thread in this ** same process is still holding a shared lock. */ rc = SQLITE_BUSY; } else { rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); } }else{ rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK); } if( rc==SQLITE_OK ){ char writeBuffer[PROXY_MAXCONCHLEN]; int writeSize = 0; writeBuffer[0] = (char)PROXY_CONCHVERSION; memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN); if( pCtx->lockProxyPath!=NULL ){ strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN); }else{ strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN); } writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]); robust_ftruncate(conchFile->h, writeSize); rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0); fsync(conchFile->h); /* If we created a new conch file (not just updated the contents of a ** valid conch file), try to match the permissions of the database */ if( rc==SQLITE_OK && createConch ){ struct stat buf; int err = osFstat(pFile->h, &buf); if( err==0 ){ mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP | S_IROTH|S_IWOTH); /* try to match the database file R/W permissions, ignore failure */ #ifndef SQLITE_PROXY_DEBUG osFchmod(conchFile->h, cmode); #else do{ rc = osFchmod(conchFile->h, cmode); }while( rc==(-1) && errno==EINTR ); if( rc!=0 ){ int code = errno; fprintf(stderr, "fchmod %o FAILED with %d %s\n", cmode, code, strerror(code)); } else { fprintf(stderr, "fchmod %o SUCCEDED\n",cmode); } }else{ int code = errno; fprintf(stderr, "STAT FAILED[%d] with %d %s\n", err, code, strerror(code)); #endif } } } conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK); end_takeconch: OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h)); if( rc==SQLITE_OK && pFile->openFlags ){ int fd; if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); } pFile->h = -1; fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); if( fd>=0 ){ pFile->h = fd; }else{ rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called during locking */ } } if( rc==SQLITE_OK && !pCtx->lockProxy ){ char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath; rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1); if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){ /* we couldn't create the proxy lock file with the old lock file path ** so try again via auto-naming */ forceNewLockPath = 1; tryOldLockPath = 0; continue; /* go back to the do {} while start point, try again */ } } if( rc==SQLITE_OK ){ /* Need to make a copy of path if we extracted the value ** from the conch file or the path was allocated on the stack */ if( tempLockPath ){ pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath); if( !pCtx->lockProxyPath ){ rc = SQLITE_NOMEM; } } } if( rc==SQLITE_OK ){ pCtx->conchHeld = 1; if( pCtx->lockProxy->pMethod == &afpIoMethods ){ afpLockingContext *afpCtx; afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext; afpCtx->dbPath = pCtx->lockProxyPath; } } else { conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); } OSTRACE(("TAKECONCH %d %s\n", conchFile->h, rc==SQLITE_OK?"ok":"failed")); return rc; } while (1); /* in case we need to retry the :auto: lock file - ** we should never get here except via the 'continue' call. */ } } /* ** If pFile holds a lock on a conch file, then release that lock. */ static int proxyReleaseConch(unixFile *pFile){ int rc = SQLITE_OK; /* Subroutine return code */ proxyLockingContext *pCtx; /* The locking context for the proxy lock */ unixFile *conchFile; /* Name of the conch file */ pCtx = (proxyLockingContext *)pFile->lockingContext; conchFile = pCtx->conchFile; OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h, (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), osGetpid(0))); if( pCtx->conchHeld>0 ){ rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK); } pCtx->conchHeld = 0; OSTRACE(("RELEASECONCH %d %s\n", conchFile->h, (rc==SQLITE_OK ? "ok" : "failed"))); return rc; } /* ** Given the name of a database file, compute the name of its conch file. ** Store the conch filename in memory obtained from sqlite3_malloc64(). ** Make *pConchPath point to the new name. Return SQLITE_OK on success ** or SQLITE_NOMEM if unable to obtain memory. ** ** The caller is responsible for ensuring that the allocated memory ** space is eventually freed. ** ** *pConchPath is set to NULL if a memory allocation error occurs. */ static int proxyCreateConchPathname(char *dbPath, char **pConchPath){ int i; /* Loop counter */ int len = (int)strlen(dbPath); /* Length of database filename - dbPath */ char *conchPath; /* buffer in which to construct conch name */ /* Allocate space for the conch filename and initialize the name to ** the name of the original database file. */ *pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8); if( conchPath==0 ){ return SQLITE_NOMEM; } memcpy(conchPath, dbPath, len+1); /* now insert a "." before the last / character */ for( i=(len-1); i>=0; i-- ){ if( conchPath[i]=='/' ){ i++; break; } } conchPath[i]='.'; while ( ilockingContext; char *oldPath = pCtx->lockProxyPath; int rc = SQLITE_OK; if( pFile->eFileLock!=NO_LOCK ){ return SQLITE_BUSY; } /* nothing to do if the path is NULL, :auto: or matches the existing path */ if( !path || path[0]=='\0' || !strcmp(path, ":auto:") || (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){ return SQLITE_OK; }else{ unixFile *lockProxy = pCtx->lockProxy; pCtx->lockProxy=NULL; pCtx->conchHeld = 0; if( lockProxy!=NULL ){ rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy); if( rc ) return rc; sqlite3_free(lockProxy); } sqlite3_free(oldPath); pCtx->lockProxyPath = sqlite3DbStrDup(0, path); } return rc; } /* ** pFile is a file that has been opened by a prior xOpen call. dbPath ** is a string buffer at least MAXPATHLEN+1 characters in size. ** ** This routine find the filename associated with pFile and writes it ** int dbPath. */ static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){ #if defined(__APPLE__) if( pFile->pMethod == &afpIoMethods ){ /* afp style keeps a reference to the db path in the filePath field ** of the struct */ assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPATHLEN); } else #endif if( pFile->pMethod == &dotlockIoMethods ){ /* dot lock style uses the locking context to store the dot lock ** file path */ int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX); memcpy(dbPath, (char *)pFile->lockingContext, len + 1); }else{ /* all other styles use the locking context to store the db file path */ assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN ); strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN); } return SQLITE_OK; } /* ** Takes an already filled in unix file and alters it so all file locking ** will be performed on the local proxy lock file. The following fields ** are preserved in the locking context so that they can be restored and ** the unix structure properly cleaned up at close time: ** ->lockingContext ** ->pMethod */ static int proxyTransformUnixFile(unixFile *pFile, const char *path) { proxyLockingContext *pCtx; char dbPath[MAXPATHLEN+1]; /* Name of the database file */ char *lockPath=NULL; int rc = SQLITE_OK; if( pFile->eFileLock!=NO_LOCK ){ return SQLITE_BUSY; } proxyGetDbPathForUnixFile(pFile, dbPath); if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){ lockPath=NULL; }else{ lockPath=(char *)path; } OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h, (lockPath ? lockPath : ":auto:"), osGetpid(0))); pCtx = sqlite3_malloc64( sizeof(*pCtx) ); if( pCtx==0 ){ return SQLITE_NOMEM; } memset(pCtx, 0, sizeof(*pCtx)); rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath); if( rc==SQLITE_OK ){ rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0); if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){ /* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and ** (c) the file system is read-only, then enable no-locking access. ** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts ** that openFlags will have only one of O_RDONLY or O_RDWR. */ struct statfs fsInfo; struct stat conchInfo; int goLockless = 0; if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) { int err = errno; if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){ goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY; } } if( goLockless ){ pCtx->conchHeld = -1; /* read only FS/ lockless */ rc = SQLITE_OK; } } } if( rc==SQLITE_OK && lockPath ){ pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath); } if( rc==SQLITE_OK ){ pCtx->dbPath = sqlite3DbStrDup(0, dbPath); if( pCtx->dbPath==NULL ){ rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ /* all memory is allocated, proxys are created and assigned, ** switch the locking context and pMethod then return. */ pCtx->oldLockingContext = pFile->lockingContext; pFile->lockingContext = pCtx; pCtx->pOldMethod = pFile->pMethod; pFile->pMethod = &proxyIoMethods; }else{ if( pCtx->conchFile ){ pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile); sqlite3_free(pCtx->conchFile); } sqlite3DbFree(0, pCtx->lockProxyPath); sqlite3_free(pCtx->conchFilePath); sqlite3_free(pCtx); } OSTRACE(("TRANSPROXY %d %s\n", pFile->h, (rc==SQLITE_OK ? "ok" : "failed"))); return rc; } /* ** This routine handles sqlite3_file_control() calls that are specific ** to proxy locking. */ static int proxyFileControl(sqlite3_file *id, int op, void *pArg){ switch( op ){ case SQLITE_FCNTL_GET_LOCKPROXYFILE: { unixFile *pFile = (unixFile*)id; if( pFile->pMethod == &proxyIoMethods ){ proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; proxyTakeConch(pFile); if( pCtx->lockProxyPath ){ *(const char **)pArg = pCtx->lockProxyPath; }else{ *(const char **)pArg = ":auto: (not held)"; } } else { *(const char **)pArg = NULL; } return SQLITE_OK; } case SQLITE_FCNTL_SET_LOCKPROXYFILE: { unixFile *pFile = (unixFile*)id; int rc = SQLITE_OK; int isProxyStyle = (pFile->pMethod == &proxyIoMethods); if( pArg==NULL || (const char *)pArg==0 ){ if( isProxyStyle ){ /* turn off proxy locking - not supported. If support is added for ** switching proxy locking mode off then it will need to fail if ** the journal mode is WAL mode. */ rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/; }else{ /* turn off proxy locking - already off - NOOP */ rc = SQLITE_OK; } }else{ const char *proxyPath = (const char *)pArg; if( isProxyStyle ){ proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext; if( !strcmp(pArg, ":auto:") || (pCtx->lockProxyPath && !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN)) ){ rc = SQLITE_OK; }else{ rc = switchLockProxyPath(pFile, proxyPath); } }else{ /* turn on proxy file locking */ rc = proxyTransformUnixFile(pFile, proxyPath); } } return rc; } default: { assert( 0 ); /* The call assures that only valid opcodes are sent */ } } /*NOTREACHED*/ return SQLITE_ERROR; } /* ** Within this division (the proxying locking implementation) the procedures ** above this point are all utilities. The lock-related methods of the ** proxy-locking sqlite3_io_method object follow. */ /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, set *pResOut ** to a non-zero value otherwise *pResOut is set to zero. The return value ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) { unixFile *pFile = (unixFile*)id; int rc = proxyTakeConch(pFile); if( rc==SQLITE_OK ){ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; if( pCtx->conchHeld>0 ){ unixFile *proxy = pCtx->lockProxy; return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut); }else{ /* conchHeld < 0 is lockless */ pResOut=0; } } return rc; } /* ** Lock the file with the lock specified by parameter eFileLock - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** This routine will only increase a lock. Use the sqlite3OsUnlock() ** routine to lower a locking level. */ static int proxyLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; int rc = proxyTakeConch(pFile); if( rc==SQLITE_OK ){ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; if( pCtx->conchHeld>0 ){ unixFile *proxy = pCtx->lockProxy; rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock); pFile->eFileLock = proxy->eFileLock; }else{ /* conchHeld < 0 is lockless */ } } return rc; } /* ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int proxyUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; int rc = proxyTakeConch(pFile); if( rc==SQLITE_OK ){ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; if( pCtx->conchHeld>0 ){ unixFile *proxy = pCtx->lockProxy; rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock); pFile->eFileLock = proxy->eFileLock; }else{ /* conchHeld < 0 is lockless */ } } return rc; } /* ** Close a file that uses proxy locks. */ static int proxyClose(sqlite3_file *id) { if( id ){ unixFile *pFile = (unixFile*)id; proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext; unixFile *lockProxy = pCtx->lockProxy; unixFile *conchFile = pCtx->conchFile; int rc = SQLITE_OK; if( lockProxy ){ rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK); if( rc ) return rc; rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy); if( rc ) return rc; sqlite3_free(lockProxy); pCtx->lockProxy = 0; } if( conchFile ){ if( pCtx->conchHeld ){ rc = proxyReleaseConch(pFile); if( rc ) return rc; } rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile); if( rc ) return rc; sqlite3_free(conchFile); } sqlite3DbFree(0, pCtx->lockProxyPath); sqlite3_free(pCtx->conchFilePath); sqlite3DbFree(0, pCtx->dbPath); /* restore the original locking context and pMethod then close it */ pFile->lockingContext = pCtx->oldLockingContext; pFile->pMethod = pCtx->pOldMethod; sqlite3_free(pCtx); return pFile->pMethod->xClose(id); } return SQLITE_OK; } #endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */ /* ** The proxy locking style is intended for use with AFP filesystems. ** And since AFP is only supported on MacOSX, the proxy locking is also ** restricted to MacOSX. ** ** ******************* End of the proxy lock implementation ********************** ******************************************************************************/ /* ** Initialize the operating system interface. ** ** This routine registers all VFS implementations for unix-like operating ** systems. This routine, and the sqlite3_os_end() routine that follows, ** should be the only routines in this file that are visible from other ** files. ** ** This routine is called once during SQLite initialization and by a ** single thread. The memory allocation and mutex subsystems have not ** necessarily been initialized when this routine is called, and so they ** should not be used. */ SQLITE_PRIVATE int sqlite3_os_init(void){ /* ** The following macro defines an initializer for an sqlite3_vfs object. ** The name of the VFS is NAME. The pAppData is a pointer to a pointer ** to the "finder" function. (pAppData is a pointer to a pointer because ** silly C90 rules prohibit a void* from being cast to a function pointer ** and so we have to go through the intermediate pointer to avoid problems ** when compiling with -pedantic-errors on GCC.) ** ** The FINDER parameter to this macro is the name of the pointer to the ** finder-function. The finder-function returns a pointer to the ** sqlite_io_methods object that implements the desired locking ** behaviors. See the division above that contains the IOMETHODS ** macro for addition information on finder-functions. ** ** Most finders simply return a pointer to a fixed sqlite3_io_methods ** object. But the "autolockIoFinder" available on MacOSX does a little ** more than that; it looks at the filesystem type that hosts the ** database file and tries to choose an locking method appropriate for ** that filesystem time. */ #define UNIXVFS(VFSNAME, FINDER) { \ 3, /* iVersion */ \ sizeof(unixFile), /* szOsFile */ \ MAX_PATHNAME, /* mxPathname */ \ 0, /* pNext */ \ VFSNAME, /* zName */ \ (void*)&FINDER, /* pAppData */ \ unixOpen, /* xOpen */ \ unixDelete, /* xDelete */ \ unixAccess, /* xAccess */ \ unixFullPathname, /* xFullPathname */ \ unixDlOpen, /* xDlOpen */ \ unixDlError, /* xDlError */ \ unixDlSym, /* xDlSym */ \ unixDlClose, /* xDlClose */ \ unixRandomness, /* xRandomness */ \ unixSleep, /* xSleep */ \ unixCurrentTime, /* xCurrentTime */ \ unixGetLastError, /* xGetLastError */ \ unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \ unixSetSystemCall, /* xSetSystemCall */ \ unixGetSystemCall, /* xGetSystemCall */ \ unixNextSystemCall, /* xNextSystemCall */ \ } /* ** All default VFSes for unix are contained in the following array. ** ** Note that the sqlite3_vfs.pNext field of the VFS object is modified ** by the SQLite core when the VFS is registered. So the following ** array cannot be const. */ static sqlite3_vfs aVfs[] = { #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) UNIXVFS("unix", autolockIoFinder ), #elif OS_VXWORKS UNIXVFS("unix", vxworksIoFinder ), #else UNIXVFS("unix", posixIoFinder ), #endif UNIXVFS("unix-none", nolockIoFinder ), UNIXVFS("unix-dotfile", dotlockIoFinder ), UNIXVFS("unix-excl", posixIoFinder ), #if OS_VXWORKS UNIXVFS("unix-namedsem", semIoFinder ), #endif #if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS UNIXVFS("unix-posix", posixIoFinder ), #endif #if SQLITE_ENABLE_LOCKING_STYLE UNIXVFS("unix-flock", flockIoFinder ), #endif #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) UNIXVFS("unix-afp", afpIoFinder ), UNIXVFS("unix-nfs", nfsIoFinder ), UNIXVFS("unix-proxy", proxyIoFinder ), #endif }; unsigned int i; /* Loop counter */ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ assert( ArraySize(aSyscall)==25 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ sqlite3_vfs_register(&aVfs[i], i==0); } return SQLITE_OK; } /* ** Shutdown the operating system interface. ** ** Some operating systems might need to do some cleanup in this routine, ** to release dynamically allocated objects. But not on unix. ** This routine is a no-op for unix. */ SQLITE_PRIVATE int sqlite3_os_end(void){ return SQLITE_OK; } #endif /* SQLITE_OS_UNIX */ /************** End of os_unix.c *********************************************/ /************** Begin file os_win.c ******************************************/ /* ** 2004 May 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code that is specific to Windows. */ /* #include "sqliteInt.h" */ #if SQLITE_OS_WIN /* This file is used for Windows only */ /* ** Include code that is common to all os_*.c files */ /************** Include os_common.h in the middle of os_win.c ****************/ /************** Begin file os_common.h ***************************************/ /* ** 2004 May 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains macros and a little bit of code that is common to ** all of the platform-specific files (os_*.c) and is #included into those ** files. ** ** This file should be #included by the os_*.c files only. It is not a ** general purpose header file. */ #ifndef _OS_COMMON_H_ #define _OS_COMMON_H_ /* ** At least two bugs have slipped in because we changed the MEMORY_DEBUG ** macro to SQLITE_DEBUG and some older makefiles have not yet made the ** switch. The following code should catch this problem at compile-time. */ #ifdef MEMORY_DEBUG # error "The MEMORY_DEBUG macro is obsolete. Use SQLITE_DEBUG instead." #endif /* ** Macros for performance tracing. Normally turned off. Only works ** on i486 hardware. */ #ifdef SQLITE_PERFORMANCE_TRACE /* ** hwtime.h contains inline assembler code for implementing ** high-performance timing routines. */ /* #include "hwtime.h" */ static sqlite_uint64 g_start; static sqlite_uint64 g_elapsed; #define TIMER_START g_start=sqlite3Hwtime() #define TIMER_END g_elapsed=sqlite3Hwtime()-g_start #define TIMER_ELAPSED g_elapsed #else #define TIMER_START #define TIMER_END #define TIMER_ELAPSED ((sqlite_uint64)0) #endif /* ** If we compile with the SQLITE_TEST macro set, then the following block ** of code will give us the ability to simulate a disk I/O error. This ** is used for testing the I/O recovery logic. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_io_error_hit = 0; /* Total number of I/O Errors */ SQLITE_PRIVATE int sqlite3_io_error_hardhit = 0; /* Number of non-benign errors */ SQLITE_PRIVATE int sqlite3_io_error_pending = 0; /* Count down to first I/O error */ SQLITE_PRIVATE int sqlite3_io_error_persist = 0; /* True if I/O errors persist */ SQLITE_PRIVATE int sqlite3_io_error_benign = 0; /* True if errors are benign */ SQLITE_PRIVATE int sqlite3_diskfull_pending = 0; SQLITE_PRIVATE int sqlite3_diskfull = 0; #define SimulateIOErrorBenign(X) sqlite3_io_error_benign=(X) #define SimulateIOError(CODE) \ if( (sqlite3_io_error_persist && sqlite3_io_error_hit) \ || sqlite3_io_error_pending-- == 1 ) \ { local_ioerr(); CODE; } static void local_ioerr(){ IOTRACE(("IOERR\n")); sqlite3_io_error_hit++; if( !sqlite3_io_error_benign ) sqlite3_io_error_hardhit++; } #define SimulateDiskfullError(CODE) \ if( sqlite3_diskfull_pending ){ \ if( sqlite3_diskfull_pending == 1 ){ \ local_ioerr(); \ sqlite3_diskfull = 1; \ sqlite3_io_error_hit = 1; \ CODE; \ }else{ \ sqlite3_diskfull_pending--; \ } \ } #else #define SimulateIOErrorBenign(X) #define SimulateIOError(A) #define SimulateDiskfullError(A) #endif /* ** When testing, keep a count of the number of open files. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_open_file_count = 0; #define OpenCounter(X) sqlite3_open_file_count+=(X) #else #define OpenCounter(X) #endif #endif /* !defined(_OS_COMMON_H_) */ /************** End of os_common.h *******************************************/ /************** Continuing where we left off in os_win.c *********************/ /* ** Include the header file for the Windows VFS. */ /* #include "os_win.h" */ /* ** Compiling and using WAL mode requires several APIs that are only ** available in Windows platforms based on the NT kernel. */ #if !SQLITE_OS_WINNT && !defined(SQLITE_OMIT_WAL) # error "WAL mode requires support from the Windows NT kernel, compile\ with SQLITE_OMIT_WAL." #endif #if !SQLITE_OS_WINNT && SQLITE_MAX_MMAP_SIZE>0 # error "Memory mapped files require support from the Windows NT kernel,\ compile with SQLITE_MAX_MMAP_SIZE=0." #endif /* ** Are most of the Win32 ANSI APIs available (i.e. with certain exceptions ** based on the sub-platform)? */ #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(SQLITE_WIN32_NO_ANSI) # define SQLITE_WIN32_HAS_ANSI #endif /* ** Are most of the Win32 Unicode APIs available (i.e. with certain exceptions ** based on the sub-platform)? */ #if (SQLITE_OS_WINCE || SQLITE_OS_WINNT || SQLITE_OS_WINRT) && \ !defined(SQLITE_WIN32_NO_WIDE) # define SQLITE_WIN32_HAS_WIDE #endif /* ** Make sure at least one set of Win32 APIs is available. */ #if !defined(SQLITE_WIN32_HAS_ANSI) && !defined(SQLITE_WIN32_HAS_WIDE) # error "At least one of SQLITE_WIN32_HAS_ANSI and SQLITE_WIN32_HAS_WIDE\ must be defined." #endif /* ** Define the required Windows SDK version constants if they are not ** already available. */ #ifndef NTDDI_WIN8 # define NTDDI_WIN8 0x06020000 #endif #ifndef NTDDI_WINBLUE # define NTDDI_WINBLUE 0x06030000 #endif /* ** Check to see if the GetVersionEx[AW] functions are deprecated on the ** target system. GetVersionEx was first deprecated in Win8.1. */ #ifndef SQLITE_WIN32_GETVERSIONEX # if defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WINBLUE # define SQLITE_WIN32_GETVERSIONEX 0 /* GetVersionEx() is deprecated */ # else # define SQLITE_WIN32_GETVERSIONEX 1 /* GetVersionEx() is current */ # endif #endif /* ** This constant should already be defined (in the "WinDef.h" SDK file). */ #ifndef MAX_PATH # define MAX_PATH (260) #endif /* ** Maximum pathname length (in chars) for Win32. This should normally be ** MAX_PATH. */ #ifndef SQLITE_WIN32_MAX_PATH_CHARS # define SQLITE_WIN32_MAX_PATH_CHARS (MAX_PATH) #endif /* ** This constant should already be defined (in the "WinNT.h" SDK file). */ #ifndef UNICODE_STRING_MAX_CHARS # define UNICODE_STRING_MAX_CHARS (32767) #endif /* ** Maximum pathname length (in chars) for WinNT. This should normally be ** UNICODE_STRING_MAX_CHARS. */ #ifndef SQLITE_WINNT_MAX_PATH_CHARS # define SQLITE_WINNT_MAX_PATH_CHARS (UNICODE_STRING_MAX_CHARS) #endif /* ** Maximum pathname length (in bytes) for Win32. The MAX_PATH macro is in ** characters, so we allocate 4 bytes per character assuming worst-case of ** 4-bytes-per-character for UTF8. */ #ifndef SQLITE_WIN32_MAX_PATH_BYTES # define SQLITE_WIN32_MAX_PATH_BYTES (SQLITE_WIN32_MAX_PATH_CHARS*4) #endif /* ** Maximum pathname length (in bytes) for WinNT. This should normally be ** UNICODE_STRING_MAX_CHARS * sizeof(WCHAR). */ #ifndef SQLITE_WINNT_MAX_PATH_BYTES # define SQLITE_WINNT_MAX_PATH_BYTES \ (sizeof(WCHAR) * SQLITE_WINNT_MAX_PATH_CHARS) #endif /* ** Maximum error message length (in chars) for WinRT. */ #ifndef SQLITE_WIN32_MAX_ERRMSG_CHARS # define SQLITE_WIN32_MAX_ERRMSG_CHARS (1024) #endif /* ** Returns non-zero if the character should be treated as a directory ** separator. */ #ifndef winIsDirSep # define winIsDirSep(a) (((a) == '/') || ((a) == '\\')) #endif /* ** This macro is used when a local variable is set to a value that is ** [sometimes] not used by the code (e.g. via conditional compilation). */ #ifndef UNUSED_VARIABLE_VALUE # define UNUSED_VARIABLE_VALUE(x) (void)(x) #endif /* ** Returns the character that should be used as the directory separator. */ #ifndef winGetDirSep # define winGetDirSep() '\\' #endif /* ** Do we need to manually define the Win32 file mapping APIs for use with WAL ** mode or memory mapped files (e.g. these APIs are available in the Windows ** CE SDK; however, they are not present in the header file)? */ #if SQLITE_WIN32_FILEMAPPING_API && \ (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) /* ** Two of the file mapping APIs are different under WinRT. Figure out which ** set we need. */ #if SQLITE_OS_WINRT WINBASEAPI HANDLE WINAPI CreateFileMappingFromApp(HANDLE, \ LPSECURITY_ATTRIBUTES, ULONG, ULONG64, LPCWSTR); WINBASEAPI LPVOID WINAPI MapViewOfFileFromApp(HANDLE, ULONG, ULONG64, SIZE_T); #else #if defined(SQLITE_WIN32_HAS_ANSI) WINBASEAPI HANDLE WINAPI CreateFileMappingA(HANDLE, LPSECURITY_ATTRIBUTES, \ DWORD, DWORD, DWORD, LPCSTR); #endif /* defined(SQLITE_WIN32_HAS_ANSI) */ #if defined(SQLITE_WIN32_HAS_WIDE) WINBASEAPI HANDLE WINAPI CreateFileMappingW(HANDLE, LPSECURITY_ATTRIBUTES, \ DWORD, DWORD, DWORD, LPCWSTR); #endif /* defined(SQLITE_WIN32_HAS_WIDE) */ WINBASEAPI LPVOID WINAPI MapViewOfFile(HANDLE, DWORD, DWORD, DWORD, SIZE_T); #endif /* SQLITE_OS_WINRT */ /* ** These file mapping APIs are common to both Win32 and WinRT. */ WINBASEAPI BOOL WINAPI FlushViewOfFile(LPCVOID, SIZE_T); WINBASEAPI BOOL WINAPI UnmapViewOfFile(LPCVOID); #endif /* SQLITE_WIN32_FILEMAPPING_API */ /* ** Some Microsoft compilers lack this definition. */ #ifndef INVALID_FILE_ATTRIBUTES # define INVALID_FILE_ATTRIBUTES ((DWORD)-1) #endif #ifndef FILE_FLAG_MASK # define FILE_FLAG_MASK (0xFF3C0000) #endif #ifndef FILE_ATTRIBUTE_MASK # define FILE_ATTRIBUTE_MASK (0x0003FFF7) #endif #ifndef SQLITE_OMIT_WAL /* Forward references to structures used for WAL */ typedef struct winShm winShm; /* A connection to shared-memory */ typedef struct winShmNode winShmNode; /* A region of shared-memory */ #endif /* ** WinCE lacks native support for file locking so we have to fake it ** with some code of our own. */ #if SQLITE_OS_WINCE typedef struct winceLock { int nReaders; /* Number of reader locks obtained */ BOOL bPending; /* Indicates a pending lock has been obtained */ BOOL bReserved; /* Indicates a reserved lock has been obtained */ BOOL bExclusive; /* Indicates an exclusive lock has been obtained */ } winceLock; #endif /* ** The winFile structure is a subclass of sqlite3_file* specific to the win32 ** portability layer. */ typedef struct winFile winFile; struct winFile { const sqlite3_io_methods *pMethod; /*** Must be first ***/ sqlite3_vfs *pVfs; /* The VFS used to open this file */ HANDLE h; /* Handle for accessing the file */ u8 locktype; /* Type of lock currently held on this file */ short sharedLockByte; /* Randomly chosen byte used as a shared lock */ u8 ctrlFlags; /* Flags. See WINFILE_* below */ DWORD lastErrno; /* The Windows errno from the last I/O error */ #ifndef SQLITE_OMIT_WAL winShm *pShm; /* Instance of shared memory on this file */ #endif const char *zPath; /* Full pathname of this file */ int szChunk; /* Chunk size configured by FCNTL_CHUNK_SIZE */ #if SQLITE_OS_WINCE LPWSTR zDeleteOnClose; /* Name of file to delete when closing */ HANDLE hMutex; /* Mutex used to control access to shared lock */ HANDLE hShared; /* Shared memory segment used for locking */ winceLock local; /* Locks obtained by this instance of winFile */ winceLock *shared; /* Global shared lock memory for the file */ #endif #if SQLITE_MAX_MMAP_SIZE>0 int nFetchOut; /* Number of outstanding xFetch references */ HANDLE hMap; /* Handle for accessing memory mapping */ void *pMapRegion; /* Area memory mapped */ sqlite3_int64 mmapSize; /* Usable size of mapped region */ sqlite3_int64 mmapSizeActual; /* Actual size of mapped region */ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ #endif }; /* ** Allowed values for winFile.ctrlFlags */ #define WINFILE_RDONLY 0x02 /* Connection is read only */ #define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ #define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ /* * The value used with sqlite3_win32_set_directory() to specify that * the temporary directory should be changed. */ #ifndef SQLITE_WIN32_TEMP_DIRECTORY_TYPE # define SQLITE_WIN32_TEMP_DIRECTORY_TYPE (2) #endif /* * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the * various Win32 API heap functions instead of our own. */ #ifdef SQLITE_WIN32_MALLOC /* * If this is non-zero, an isolated heap will be created by the native Win32 * allocator subsystem; otherwise, the default process heap will be used. This * setting has no effect when compiling for WinRT. By default, this is enabled * and an isolated heap will be created to store all allocated data. * ****************************************************************************** * WARNING: It is important to note that when this setting is non-zero and the * winMemShutdown function is called (e.g. by the sqlite3BtreeShutdown * function), all data that was allocated using the isolated heap will * be freed immediately and any attempt to access any of that freed * data will almost certainly result in an immediate access violation. ****************************************************************************** */ #ifndef SQLITE_WIN32_HEAP_CREATE # define SQLITE_WIN32_HEAP_CREATE (TRUE) #endif /* * The initial size of the Win32-specific heap. This value may be zero. */ #ifndef SQLITE_WIN32_HEAP_INIT_SIZE # define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_DEFAULT_CACHE_SIZE) * \ (SQLITE_DEFAULT_PAGE_SIZE) + 4194304) #endif /* * The maximum size of the Win32-specific heap. This value may be zero. */ #ifndef SQLITE_WIN32_HEAP_MAX_SIZE # define SQLITE_WIN32_HEAP_MAX_SIZE (0) #endif /* * The extra flags to use in calls to the Win32 heap APIs. This value may be * zero for the default behavior. */ #ifndef SQLITE_WIN32_HEAP_FLAGS # define SQLITE_WIN32_HEAP_FLAGS (0) #endif /* ** The winMemData structure stores information required by the Win32-specific ** sqlite3_mem_methods implementation. */ typedef struct winMemData winMemData; struct winMemData { #ifndef NDEBUG u32 magic1; /* Magic number to detect structure corruption. */ #endif HANDLE hHeap; /* The handle to our heap. */ BOOL bOwned; /* Do we own the heap (i.e. destroy it on shutdown)? */ #ifndef NDEBUG u32 magic2; /* Magic number to detect structure corruption. */ #endif }; #ifndef NDEBUG #define WINMEM_MAGIC1 0x42b2830b #define WINMEM_MAGIC2 0xbd4d7cf4 #endif static struct winMemData win_mem_data = { #ifndef NDEBUG WINMEM_MAGIC1, #endif NULL, FALSE #ifndef NDEBUG ,WINMEM_MAGIC2 #endif }; #ifndef NDEBUG #define winMemAssertMagic1() assert( win_mem_data.magic1==WINMEM_MAGIC1 ) #define winMemAssertMagic2() assert( win_mem_data.magic2==WINMEM_MAGIC2 ) #define winMemAssertMagic() winMemAssertMagic1(); winMemAssertMagic2(); #else #define winMemAssertMagic() #endif #define winMemGetDataPtr() &win_mem_data #define winMemGetHeap() win_mem_data.hHeap #define winMemGetOwned() win_mem_data.bOwned static void *winMemMalloc(int nBytes); static void winMemFree(void *pPrior); static void *winMemRealloc(void *pPrior, int nBytes); static int winMemSize(void *p); static int winMemRoundup(int n); static int winMemInit(void *pAppData); static void winMemShutdown(void *pAppData); SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void); #endif /* SQLITE_WIN32_MALLOC */ /* ** The following variable is (normally) set once and never changes ** thereafter. It records whether the operating system is Win9x ** or WinNT. ** ** 0: Operating system unknown. ** 1: Operating system is Win9x. ** 2: Operating system is WinNT. ** ** In order to facilitate testing on a WinNT system, the test fixture ** can manually set this value to 1 to emulate Win98 behavior. */ #ifdef SQLITE_TEST SQLITE_PRIVATE LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0; #else static LONG SQLITE_WIN32_VOLATILE sqlite3_os_type = 0; #endif #ifndef SYSCALL # define SYSCALL sqlite3_syscall_ptr #endif /* ** This function is not available on Windows CE or WinRT. */ #if SQLITE_OS_WINCE || SQLITE_OS_WINRT # define osAreFileApisANSI() 1 #endif /* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during ** testing and sandboxing. The following array holds the names and pointers ** to all overrideable system calls. */ static struct win_syscall { const char *zName; /* Name of the system call */ sqlite3_syscall_ptr pCurrent; /* Current value of the system call */ sqlite3_syscall_ptr pDefault; /* Default value */ } aSyscall[] = { #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT { "AreFileApisANSI", (SYSCALL)AreFileApisANSI, 0 }, #else { "AreFileApisANSI", (SYSCALL)0, 0 }, #endif #ifndef osAreFileApisANSI #define osAreFileApisANSI ((BOOL(WINAPI*)(VOID))aSyscall[0].pCurrent) #endif #if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE) { "CharLowerW", (SYSCALL)CharLowerW, 0 }, #else { "CharLowerW", (SYSCALL)0, 0 }, #endif #define osCharLowerW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[1].pCurrent) #if SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_WIDE) { "CharUpperW", (SYSCALL)CharUpperW, 0 }, #else { "CharUpperW", (SYSCALL)0, 0 }, #endif #define osCharUpperW ((LPWSTR(WINAPI*)(LPWSTR))aSyscall[2].pCurrent) { "CloseHandle", (SYSCALL)CloseHandle, 0 }, #define osCloseHandle ((BOOL(WINAPI*)(HANDLE))aSyscall[3].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) { "CreateFileA", (SYSCALL)CreateFileA, 0 }, #else { "CreateFileA", (SYSCALL)0, 0 }, #endif #define osCreateFileA ((HANDLE(WINAPI*)(LPCSTR,DWORD,DWORD, \ LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[4].pCurrent) #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "CreateFileW", (SYSCALL)CreateFileW, 0 }, #else { "CreateFileW", (SYSCALL)0, 0 }, #endif #define osCreateFileW ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD, \ LPSECURITY_ATTRIBUTES,DWORD,DWORD,HANDLE))aSyscall[5].pCurrent) #if (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_ANSI) && \ (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) { "CreateFileMappingA", (SYSCALL)CreateFileMappingA, 0 }, #else { "CreateFileMappingA", (SYSCALL)0, 0 }, #endif #define osCreateFileMappingA ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \ DWORD,DWORD,DWORD,LPCSTR))aSyscall[6].pCurrent) #if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) { "CreateFileMappingW", (SYSCALL)CreateFileMappingW, 0 }, #else { "CreateFileMappingW", (SYSCALL)0, 0 }, #endif #define osCreateFileMappingW ((HANDLE(WINAPI*)(HANDLE,LPSECURITY_ATTRIBUTES, \ DWORD,DWORD,DWORD,LPCWSTR))aSyscall[7].pCurrent) #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "CreateMutexW", (SYSCALL)CreateMutexW, 0 }, #else { "CreateMutexW", (SYSCALL)0, 0 }, #endif #define osCreateMutexW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,BOOL, \ LPCWSTR))aSyscall[8].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) { "DeleteFileA", (SYSCALL)DeleteFileA, 0 }, #else { "DeleteFileA", (SYSCALL)0, 0 }, #endif #define osDeleteFileA ((BOOL(WINAPI*)(LPCSTR))aSyscall[9].pCurrent) #if defined(SQLITE_WIN32_HAS_WIDE) { "DeleteFileW", (SYSCALL)DeleteFileW, 0 }, #else { "DeleteFileW", (SYSCALL)0, 0 }, #endif #define osDeleteFileW ((BOOL(WINAPI*)(LPCWSTR))aSyscall[10].pCurrent) #if SQLITE_OS_WINCE { "FileTimeToLocalFileTime", (SYSCALL)FileTimeToLocalFileTime, 0 }, #else { "FileTimeToLocalFileTime", (SYSCALL)0, 0 }, #endif #define osFileTimeToLocalFileTime ((BOOL(WINAPI*)(CONST FILETIME*, \ LPFILETIME))aSyscall[11].pCurrent) #if SQLITE_OS_WINCE { "FileTimeToSystemTime", (SYSCALL)FileTimeToSystemTime, 0 }, #else { "FileTimeToSystemTime", (SYSCALL)0, 0 }, #endif #define osFileTimeToSystemTime ((BOOL(WINAPI*)(CONST FILETIME*, \ LPSYSTEMTIME))aSyscall[12].pCurrent) { "FlushFileBuffers", (SYSCALL)FlushFileBuffers, 0 }, #define osFlushFileBuffers ((BOOL(WINAPI*)(HANDLE))aSyscall[13].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) { "FormatMessageA", (SYSCALL)FormatMessageA, 0 }, #else { "FormatMessageA", (SYSCALL)0, 0 }, #endif #define osFormatMessageA ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPSTR, \ DWORD,va_list*))aSyscall[14].pCurrent) #if defined(SQLITE_WIN32_HAS_WIDE) { "FormatMessageW", (SYSCALL)FormatMessageW, 0 }, #else { "FormatMessageW", (SYSCALL)0, 0 }, #endif #define osFormatMessageW ((DWORD(WINAPI*)(DWORD,LPCVOID,DWORD,DWORD,LPWSTR, \ DWORD,va_list*))aSyscall[15].pCurrent) { "FreeLibrary", (SYSCALL)0, 0 }, #define osFreeLibrary ((BOOL(WINAPI*)(HMODULE))aSyscall[16].pCurrent) { "GetCurrentProcessId", (SYSCALL)GetCurrentProcessId, 0 }, #define osGetCurrentProcessId ((DWORD(WINAPI*)(VOID))aSyscall[17].pCurrent) #if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI) { "GetDiskFreeSpaceA", (SYSCALL)GetDiskFreeSpaceA, 0 }, #else { "GetDiskFreeSpaceA", (SYSCALL)0, 0 }, #endif #define osGetDiskFreeSpaceA ((BOOL(WINAPI*)(LPCSTR,LPDWORD,LPDWORD,LPDWORD, \ LPDWORD))aSyscall[18].pCurrent) #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "GetDiskFreeSpaceW", (SYSCALL)GetDiskFreeSpaceW, 0 }, #else { "GetDiskFreeSpaceW", (SYSCALL)0, 0 }, #endif #define osGetDiskFreeSpaceW ((BOOL(WINAPI*)(LPCWSTR,LPDWORD,LPDWORD,LPDWORD, \ LPDWORD))aSyscall[19].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) { "GetFileAttributesA", (SYSCALL)GetFileAttributesA, 0 }, #else { "GetFileAttributesA", (SYSCALL)0, 0 }, #endif #define osGetFileAttributesA ((DWORD(WINAPI*)(LPCSTR))aSyscall[20].pCurrent) #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "GetFileAttributesW", (SYSCALL)GetFileAttributesW, 0 }, #else { "GetFileAttributesW", (SYSCALL)0, 0 }, #endif #define osGetFileAttributesW ((DWORD(WINAPI*)(LPCWSTR))aSyscall[21].pCurrent) #if defined(SQLITE_WIN32_HAS_WIDE) { "GetFileAttributesExW", (SYSCALL)GetFileAttributesExW, 0 }, #else { "GetFileAttributesExW", (SYSCALL)0, 0 }, #endif #define osGetFileAttributesExW ((BOOL(WINAPI*)(LPCWSTR,GET_FILEEX_INFO_LEVELS, \ LPVOID))aSyscall[22].pCurrent) #if !SQLITE_OS_WINRT { "GetFileSize", (SYSCALL)GetFileSize, 0 }, #else { "GetFileSize", (SYSCALL)0, 0 }, #endif #define osGetFileSize ((DWORD(WINAPI*)(HANDLE,LPDWORD))aSyscall[23].pCurrent) #if !SQLITE_OS_WINCE && defined(SQLITE_WIN32_HAS_ANSI) { "GetFullPathNameA", (SYSCALL)GetFullPathNameA, 0 }, #else { "GetFullPathNameA", (SYSCALL)0, 0 }, #endif #define osGetFullPathNameA ((DWORD(WINAPI*)(LPCSTR,DWORD,LPSTR, \ LPSTR*))aSyscall[24].pCurrent) #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "GetFullPathNameW", (SYSCALL)GetFullPathNameW, 0 }, #else { "GetFullPathNameW", (SYSCALL)0, 0 }, #endif #define osGetFullPathNameW ((DWORD(WINAPI*)(LPCWSTR,DWORD,LPWSTR, \ LPWSTR*))aSyscall[25].pCurrent) { "GetLastError", (SYSCALL)GetLastError, 0 }, #define osGetLastError ((DWORD(WINAPI*)(VOID))aSyscall[26].pCurrent) { "GetProcAddressA", (SYSCALL)0, 0 }, #define osGetProcAddressA ((FARPROC(WINAPI*)(HMODULE, \ LPCSTR))aSyscall[27].pCurrent) #if !SQLITE_OS_WINRT { "GetSystemInfo", (SYSCALL)GetSystemInfo, 0 }, #else { "GetSystemInfo", (SYSCALL)0, 0 }, #endif #define osGetSystemInfo ((VOID(WINAPI*)(LPSYSTEM_INFO))aSyscall[28].pCurrent) { "GetSystemTime", (SYSCALL)GetSystemTime, 0 }, #define osGetSystemTime ((VOID(WINAPI*)(LPSYSTEMTIME))aSyscall[29].pCurrent) #if !SQLITE_OS_WINCE { "GetSystemTimeAsFileTime", (SYSCALL)GetSystemTimeAsFileTime, 0 }, #else { "GetSystemTimeAsFileTime", (SYSCALL)0, 0 }, #endif #define osGetSystemTimeAsFileTime ((VOID(WINAPI*)( \ LPFILETIME))aSyscall[30].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) { "GetTempPathA", (SYSCALL)GetTempPathA, 0 }, #else { "GetTempPathA", (SYSCALL)0, 0 }, #endif #define osGetTempPathA ((DWORD(WINAPI*)(DWORD,LPSTR))aSyscall[31].pCurrent) #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "GetTempPathW", (SYSCALL)GetTempPathW, 0 }, #else { "GetTempPathW", (SYSCALL)0, 0 }, #endif #define osGetTempPathW ((DWORD(WINAPI*)(DWORD,LPWSTR))aSyscall[32].pCurrent) #if !SQLITE_OS_WINRT { "GetTickCount", (SYSCALL)GetTickCount, 0 }, #else { "GetTickCount", (SYSCALL)0, 0 }, #endif #define osGetTickCount ((DWORD(WINAPI*)(VOID))aSyscall[33].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) && defined(SQLITE_WIN32_GETVERSIONEX) && \ SQLITE_WIN32_GETVERSIONEX { "GetVersionExA", (SYSCALL)GetVersionExA, 0 }, #else { "GetVersionExA", (SYSCALL)0, 0 }, #endif #define osGetVersionExA ((BOOL(WINAPI*)( \ LPOSVERSIONINFOA))aSyscall[34].pCurrent) #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \ defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX { "GetVersionExW", (SYSCALL)GetVersionExW, 0 }, #else { "GetVersionExW", (SYSCALL)0, 0 }, #endif #define osGetVersionExW ((BOOL(WINAPI*)( \ LPOSVERSIONINFOW))aSyscall[35].pCurrent) { "HeapAlloc", (SYSCALL)HeapAlloc, 0 }, #define osHeapAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD, \ SIZE_T))aSyscall[36].pCurrent) #if !SQLITE_OS_WINRT { "HeapCreate", (SYSCALL)HeapCreate, 0 }, #else { "HeapCreate", (SYSCALL)0, 0 }, #endif #define osHeapCreate ((HANDLE(WINAPI*)(DWORD,SIZE_T, \ SIZE_T))aSyscall[37].pCurrent) #if !SQLITE_OS_WINRT { "HeapDestroy", (SYSCALL)HeapDestroy, 0 }, #else { "HeapDestroy", (SYSCALL)0, 0 }, #endif #define osHeapDestroy ((BOOL(WINAPI*)(HANDLE))aSyscall[38].pCurrent) { "HeapFree", (SYSCALL)HeapFree, 0 }, #define osHeapFree ((BOOL(WINAPI*)(HANDLE,DWORD,LPVOID))aSyscall[39].pCurrent) { "HeapReAlloc", (SYSCALL)HeapReAlloc, 0 }, #define osHeapReAlloc ((LPVOID(WINAPI*)(HANDLE,DWORD,LPVOID, \ SIZE_T))aSyscall[40].pCurrent) { "HeapSize", (SYSCALL)HeapSize, 0 }, #define osHeapSize ((SIZE_T(WINAPI*)(HANDLE,DWORD, \ LPCVOID))aSyscall[41].pCurrent) #if !SQLITE_OS_WINRT { "HeapValidate", (SYSCALL)HeapValidate, 0 }, #else { "HeapValidate", (SYSCALL)0, 0 }, #endif #define osHeapValidate ((BOOL(WINAPI*)(HANDLE,DWORD, \ LPCVOID))aSyscall[42].pCurrent) #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT { "HeapCompact", (SYSCALL)HeapCompact, 0 }, #else { "HeapCompact", (SYSCALL)0, 0 }, #endif #define osHeapCompact ((UINT(WINAPI*)(HANDLE,DWORD))aSyscall[43].pCurrent) { "LoadLibraryA", (SYSCALL)0, 0 }, #define osLoadLibraryA ((HMODULE(WINAPI*)(LPCSTR))aSyscall[44].pCurrent) #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) { "LoadLibraryW", (SYSCALL)LoadLibraryW, 0 }, #else { "LoadLibraryW", (SYSCALL)0, 0 }, #endif #define osLoadLibraryW ((HMODULE(WINAPI*)(LPCWSTR))aSyscall[45].pCurrent) #if !SQLITE_OS_WINRT { "LocalFree", (SYSCALL)LocalFree, 0 }, #else { "LocalFree", (SYSCALL)0, 0 }, #endif #define osLocalFree ((HLOCAL(WINAPI*)(HLOCAL))aSyscall[46].pCurrent) #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT { "LockFile", (SYSCALL)LockFile, 0 }, #else { "LockFile", (SYSCALL)0, 0 }, #endif #ifndef osLockFile #define osLockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ DWORD))aSyscall[47].pCurrent) #endif #if !SQLITE_OS_WINCE { "LockFileEx", (SYSCALL)LockFileEx, 0 }, #else { "LockFileEx", (SYSCALL)0, 0 }, #endif #ifndef osLockFileEx #define osLockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD,DWORD, \ LPOVERLAPPED))aSyscall[48].pCurrent) #endif #if SQLITE_OS_WINCE || (!SQLITE_OS_WINRT && \ (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)) { "MapViewOfFile", (SYSCALL)MapViewOfFile, 0 }, #else { "MapViewOfFile", (SYSCALL)0, 0 }, #endif #define osMapViewOfFile ((LPVOID(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ SIZE_T))aSyscall[49].pCurrent) { "MultiByteToWideChar", (SYSCALL)MultiByteToWideChar, 0 }, #define osMultiByteToWideChar ((int(WINAPI*)(UINT,DWORD,LPCSTR,int,LPWSTR, \ int))aSyscall[50].pCurrent) { "QueryPerformanceCounter", (SYSCALL)QueryPerformanceCounter, 0 }, #define osQueryPerformanceCounter ((BOOL(WINAPI*)( \ LARGE_INTEGER*))aSyscall[51].pCurrent) { "ReadFile", (SYSCALL)ReadFile, 0 }, #define osReadFile ((BOOL(WINAPI*)(HANDLE,LPVOID,DWORD,LPDWORD, \ LPOVERLAPPED))aSyscall[52].pCurrent) { "SetEndOfFile", (SYSCALL)SetEndOfFile, 0 }, #define osSetEndOfFile ((BOOL(WINAPI*)(HANDLE))aSyscall[53].pCurrent) #if !SQLITE_OS_WINRT { "SetFilePointer", (SYSCALL)SetFilePointer, 0 }, #else { "SetFilePointer", (SYSCALL)0, 0 }, #endif #define osSetFilePointer ((DWORD(WINAPI*)(HANDLE,LONG,PLONG, \ DWORD))aSyscall[54].pCurrent) #if !SQLITE_OS_WINRT { "Sleep", (SYSCALL)Sleep, 0 }, #else { "Sleep", (SYSCALL)0, 0 }, #endif #define osSleep ((VOID(WINAPI*)(DWORD))aSyscall[55].pCurrent) { "SystemTimeToFileTime", (SYSCALL)SystemTimeToFileTime, 0 }, #define osSystemTimeToFileTime ((BOOL(WINAPI*)(CONST SYSTEMTIME*, \ LPFILETIME))aSyscall[56].pCurrent) #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT { "UnlockFile", (SYSCALL)UnlockFile, 0 }, #else { "UnlockFile", (SYSCALL)0, 0 }, #endif #ifndef osUnlockFile #define osUnlockFile ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ DWORD))aSyscall[57].pCurrent) #endif #if !SQLITE_OS_WINCE { "UnlockFileEx", (SYSCALL)UnlockFileEx, 0 }, #else { "UnlockFileEx", (SYSCALL)0, 0 }, #endif #define osUnlockFileEx ((BOOL(WINAPI*)(HANDLE,DWORD,DWORD,DWORD, \ LPOVERLAPPED))aSyscall[58].pCurrent) #if SQLITE_OS_WINCE || !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 { "UnmapViewOfFile", (SYSCALL)UnmapViewOfFile, 0 }, #else { "UnmapViewOfFile", (SYSCALL)0, 0 }, #endif #define osUnmapViewOfFile ((BOOL(WINAPI*)(LPCVOID))aSyscall[59].pCurrent) { "WideCharToMultiByte", (SYSCALL)WideCharToMultiByte, 0 }, #define osWideCharToMultiByte ((int(WINAPI*)(UINT,DWORD,LPCWSTR,int,LPSTR,int, \ LPCSTR,LPBOOL))aSyscall[60].pCurrent) { "WriteFile", (SYSCALL)WriteFile, 0 }, #define osWriteFile ((BOOL(WINAPI*)(HANDLE,LPCVOID,DWORD,LPDWORD, \ LPOVERLAPPED))aSyscall[61].pCurrent) #if SQLITE_OS_WINRT { "CreateEventExW", (SYSCALL)CreateEventExW, 0 }, #else { "CreateEventExW", (SYSCALL)0, 0 }, #endif #define osCreateEventExW ((HANDLE(WINAPI*)(LPSECURITY_ATTRIBUTES,LPCWSTR, \ DWORD,DWORD))aSyscall[62].pCurrent) #if !SQLITE_OS_WINRT { "WaitForSingleObject", (SYSCALL)WaitForSingleObject, 0 }, #else { "WaitForSingleObject", (SYSCALL)0, 0 }, #endif #define osWaitForSingleObject ((DWORD(WINAPI*)(HANDLE, \ DWORD))aSyscall[63].pCurrent) #if !SQLITE_OS_WINCE { "WaitForSingleObjectEx", (SYSCALL)WaitForSingleObjectEx, 0 }, #else { "WaitForSingleObjectEx", (SYSCALL)0, 0 }, #endif #define osWaitForSingleObjectEx ((DWORD(WINAPI*)(HANDLE,DWORD, \ BOOL))aSyscall[64].pCurrent) #if SQLITE_OS_WINRT { "SetFilePointerEx", (SYSCALL)SetFilePointerEx, 0 }, #else { "SetFilePointerEx", (SYSCALL)0, 0 }, #endif #define osSetFilePointerEx ((BOOL(WINAPI*)(HANDLE,LARGE_INTEGER, \ PLARGE_INTEGER,DWORD))aSyscall[65].pCurrent) #if SQLITE_OS_WINRT { "GetFileInformationByHandleEx", (SYSCALL)GetFileInformationByHandleEx, 0 }, #else { "GetFileInformationByHandleEx", (SYSCALL)0, 0 }, #endif #define osGetFileInformationByHandleEx ((BOOL(WINAPI*)(HANDLE, \ FILE_INFO_BY_HANDLE_CLASS,LPVOID,DWORD))aSyscall[66].pCurrent) #if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) { "MapViewOfFileFromApp", (SYSCALL)MapViewOfFileFromApp, 0 }, #else { "MapViewOfFileFromApp", (SYSCALL)0, 0 }, #endif #define osMapViewOfFileFromApp ((LPVOID(WINAPI*)(HANDLE,ULONG,ULONG64, \ SIZE_T))aSyscall[67].pCurrent) #if SQLITE_OS_WINRT { "CreateFile2", (SYSCALL)CreateFile2, 0 }, #else { "CreateFile2", (SYSCALL)0, 0 }, #endif #define osCreateFile2 ((HANDLE(WINAPI*)(LPCWSTR,DWORD,DWORD,DWORD, \ LPCREATEFILE2_EXTENDED_PARAMETERS))aSyscall[68].pCurrent) { "LoadPackagedLibrary", (SYSCALL)0, 0 }, #define osLoadPackagedLibrary ((HMODULE(WINAPI*)(LPCWSTR, \ DWORD))aSyscall[69].pCurrent) #if SQLITE_OS_WINRT { "GetTickCount64", (SYSCALL)GetTickCount64, 0 }, #else { "GetTickCount64", (SYSCALL)0, 0 }, #endif #define osGetTickCount64 ((ULONGLONG(WINAPI*)(VOID))aSyscall[70].pCurrent) #if SQLITE_OS_WINRT { "GetNativeSystemInfo", (SYSCALL)GetNativeSystemInfo, 0 }, #else { "GetNativeSystemInfo", (SYSCALL)0, 0 }, #endif #define osGetNativeSystemInfo ((VOID(WINAPI*)( \ LPSYSTEM_INFO))aSyscall[71].pCurrent) #if defined(SQLITE_WIN32_HAS_ANSI) { "OutputDebugStringA", (SYSCALL)OutputDebugStringA, 0 }, #else { "OutputDebugStringA", (SYSCALL)0, 0 }, #endif #define osOutputDebugStringA ((VOID(WINAPI*)(LPCSTR))aSyscall[72].pCurrent) #if defined(SQLITE_WIN32_HAS_WIDE) { "OutputDebugStringW", (SYSCALL)OutputDebugStringW, 0 }, #else { "OutputDebugStringW", (SYSCALL)0, 0 }, #endif #define osOutputDebugStringW ((VOID(WINAPI*)(LPCWSTR))aSyscall[73].pCurrent) { "GetProcessHeap", (SYSCALL)GetProcessHeap, 0 }, #define osGetProcessHeap ((HANDLE(WINAPI*)(VOID))aSyscall[74].pCurrent) #if SQLITE_OS_WINRT && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0) { "CreateFileMappingFromApp", (SYSCALL)CreateFileMappingFromApp, 0 }, #else { "CreateFileMappingFromApp", (SYSCALL)0, 0 }, #endif #define osCreateFileMappingFromApp ((HANDLE(WINAPI*)(HANDLE, \ LPSECURITY_ATTRIBUTES,ULONG,ULONG64,LPCWSTR))aSyscall[75].pCurrent) /* ** NOTE: On some sub-platforms, the InterlockedCompareExchange "function" ** is really just a macro that uses a compiler intrinsic (e.g. x64). ** So do not try to make this is into a redefinable interface. */ #if defined(InterlockedCompareExchange) { "InterlockedCompareExchange", (SYSCALL)0, 0 }, #define osInterlockedCompareExchange InterlockedCompareExchange #else { "InterlockedCompareExchange", (SYSCALL)InterlockedCompareExchange, 0 }, #define osInterlockedCompareExchange ((LONG(WINAPI*)(LONG \ SQLITE_WIN32_VOLATILE*, LONG,LONG))aSyscall[76].pCurrent) #endif /* defined(InterlockedCompareExchange) */ #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID { "UuidCreate", (SYSCALL)UuidCreate, 0 }, #else { "UuidCreate", (SYSCALL)0, 0 }, #endif #define osUuidCreate ((RPC_STATUS(RPC_ENTRY*)(UUID*))aSyscall[77].pCurrent) #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID { "UuidCreateSequential", (SYSCALL)UuidCreateSequential, 0 }, #else { "UuidCreateSequential", (SYSCALL)0, 0 }, #endif #define osUuidCreateSequential \ ((RPC_STATUS(RPC_ENTRY*)(UUID*))aSyscall[78].pCurrent) #if !defined(SQLITE_NO_SYNC) && SQLITE_MAX_MMAP_SIZE>0 { "FlushViewOfFile", (SYSCALL)FlushViewOfFile, 0 }, #else { "FlushViewOfFile", (SYSCALL)0, 0 }, #endif #define osFlushViewOfFile \ ((BOOL(WINAPI*)(LPCVOID,SIZE_T))aSyscall[79].pCurrent) }; /* End of the overrideable system calls */ /* ** This is the xSetSystemCall() method of sqlite3_vfs for all of the ** "win32" VFSes. Return SQLITE_OK opon successfully updating the ** system call pointer, or SQLITE_NOTFOUND if there is no configurable ** system call named zName. */ static int winSetSystemCall( sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */ const char *zName, /* Name of system call to override */ sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */ ){ unsigned int i; int rc = SQLITE_NOTFOUND; UNUSED_PARAMETER(pNotUsed); if( zName==0 ){ /* If no zName is given, restore all system calls to their default ** settings and return NULL */ rc = SQLITE_OK; for(i=0; i0 && !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && \ SQLITE_THREADSAFE>0 SQLITE_PRIVATE DWORD sqlite3Win32Wait(HANDLE hObject){ DWORD rc; while( (rc = osWaitForSingleObjectEx(hObject, INFINITE, TRUE))==WAIT_IO_COMPLETION ){} return rc; } #endif /* ** Return true (non-zero) if we are running under WinNT, Win2K, WinXP, ** or WinCE. Return false (zero) for Win95, Win98, or WinME. ** ** Here is an interesting observation: Win95, Win98, and WinME lack ** the LockFileEx() API. But we can still statically link against that ** API as long as we don't call it when running Win95/98/ME. A call to ** this routine is used to determine if the host is Win95/98/ME or ** WinNT/2K/XP so that we will know whether or not we can safely call ** the LockFileEx() API. */ #if !defined(SQLITE_WIN32_GETVERSIONEX) || !SQLITE_WIN32_GETVERSIONEX # define osIsNT() (1) #elif SQLITE_OS_WINCE || SQLITE_OS_WINRT || !defined(SQLITE_WIN32_HAS_ANSI) # define osIsNT() (1) #elif !defined(SQLITE_WIN32_HAS_WIDE) # define osIsNT() (0) #else # define osIsNT() ((sqlite3_os_type==2) || sqlite3_win32_is_nt()) #endif /* ** This function determines if the machine is running a version of Windows ** based on the NT kernel. */ SQLITE_PRIVATE int sqlite3_win32_is_nt(void){ #if SQLITE_OS_WINRT /* ** NOTE: The WinRT sub-platform is always assumed to be based on the NT ** kernel. */ return 1; #elif defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX if( osInterlockedCompareExchange(&sqlite3_os_type, 0, 0)==0 ){ #if defined(SQLITE_WIN32_HAS_ANSI) OSVERSIONINFOA sInfo; sInfo.dwOSVersionInfoSize = sizeof(sInfo); osGetVersionExA(&sInfo); osInterlockedCompareExchange(&sqlite3_os_type, (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0); #elif defined(SQLITE_WIN32_HAS_WIDE) OSVERSIONINFOW sInfo; sInfo.dwOSVersionInfoSize = sizeof(sInfo); osGetVersionExW(&sInfo); osInterlockedCompareExchange(&sqlite3_os_type, (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0); #endif } return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2; #elif SQLITE_TEST return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2; #else /* ** NOTE: All sub-platforms where the GetVersionEx[AW] functions are ** deprecated are always assumed to be based on the NT kernel. */ return 1; #endif } #ifdef SQLITE_WIN32_MALLOC /* ** Allocate nBytes of memory. */ static void *winMemMalloc(int nBytes){ HANDLE hHeap; void *p; winMemAssertMagic(); hHeap = winMemGetHeap(); assert( hHeap!=0 ); assert( hHeap!=INVALID_HANDLE_VALUE ); #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); #endif assert( nBytes>=0 ); p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); if( !p ){ sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%lu), heap=%p", nBytes, osGetLastError(), (void*)hHeap); } return p; } /* ** Free memory. */ static void winMemFree(void *pPrior){ HANDLE hHeap; winMemAssertMagic(); hHeap = winMemGetHeap(); assert( hHeap!=0 ); assert( hHeap!=INVALID_HANDLE_VALUE ); #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); #endif if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */ if( !osHeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){ sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%lu), heap=%p", pPrior, osGetLastError(), (void*)hHeap); } } /* ** Change the size of an existing memory allocation */ static void *winMemRealloc(void *pPrior, int nBytes){ HANDLE hHeap; void *p; winMemAssertMagic(); hHeap = winMemGetHeap(); assert( hHeap!=0 ); assert( hHeap!=INVALID_HANDLE_VALUE ); #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ); #endif assert( nBytes>=0 ); if( !pPrior ){ p = osHeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes); }else{ p = osHeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes); } if( !p ){ sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%lu), heap=%p", pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, osGetLastError(), (void*)hHeap); } return p; } /* ** Return the size of an outstanding allocation, in bytes. */ static int winMemSize(void *p){ HANDLE hHeap; SIZE_T n; winMemAssertMagic(); hHeap = winMemGetHeap(); assert( hHeap!=0 ); assert( hHeap!=INVALID_HANDLE_VALUE ); #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) assert( osHeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, p) ); #endif if( !p ) return 0; n = osHeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p); if( n==(SIZE_T)-1 ){ sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%lu), heap=%p", p, osGetLastError(), (void*)hHeap); return 0; } return (int)n; } /* ** Round up a request size to the next valid allocation size. */ static int winMemRoundup(int n){ return n; } /* ** Initialize this module. */ static int winMemInit(void *pAppData){ winMemData *pWinMemData = (winMemData *)pAppData; if( !pWinMemData ) return SQLITE_ERROR; assert( pWinMemData->magic1==WINMEM_MAGIC1 ); assert( pWinMemData->magic2==WINMEM_MAGIC2 ); #if !SQLITE_OS_WINRT && SQLITE_WIN32_HEAP_CREATE if( !pWinMemData->hHeap ){ DWORD dwInitialSize = SQLITE_WIN32_HEAP_INIT_SIZE; DWORD dwMaximumSize = (DWORD)sqlite3GlobalConfig.nHeap; if( dwMaximumSize==0 ){ dwMaximumSize = SQLITE_WIN32_HEAP_MAX_SIZE; }else if( dwInitialSize>dwMaximumSize ){ dwInitialSize = dwMaximumSize; } pWinMemData->hHeap = osHeapCreate(SQLITE_WIN32_HEAP_FLAGS, dwInitialSize, dwMaximumSize); if( !pWinMemData->hHeap ){ sqlite3_log(SQLITE_NOMEM, "failed to HeapCreate (%lu), flags=%u, initSize=%lu, maxSize=%lu", osGetLastError(), SQLITE_WIN32_HEAP_FLAGS, dwInitialSize, dwMaximumSize); return SQLITE_NOMEM; } pWinMemData->bOwned = TRUE; assert( pWinMemData->bOwned ); } #else pWinMemData->hHeap = osGetProcessHeap(); if( !pWinMemData->hHeap ){ sqlite3_log(SQLITE_NOMEM, "failed to GetProcessHeap (%lu)", osGetLastError()); return SQLITE_NOMEM; } pWinMemData->bOwned = FALSE; assert( !pWinMemData->bOwned ); #endif assert( pWinMemData->hHeap!=0 ); assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); #endif return SQLITE_OK; } /* ** Deinitialize this module. */ static void winMemShutdown(void *pAppData){ winMemData *pWinMemData = (winMemData *)pAppData; if( !pWinMemData ) return; assert( pWinMemData->magic1==WINMEM_MAGIC1 ); assert( pWinMemData->magic2==WINMEM_MAGIC2 ); if( pWinMemData->hHeap ){ assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE ); #if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_MALLOC_VALIDATE) assert( osHeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) ); #endif if( pWinMemData->bOwned ){ if( !osHeapDestroy(pWinMemData->hHeap) ){ sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%lu), heap=%p", osGetLastError(), (void*)pWinMemData->hHeap); } pWinMemData->bOwned = FALSE; } pWinMemData->hHeap = NULL; } } /* ** Populate the low-level memory allocation function pointers in ** sqlite3GlobalConfig.m with pointers to the routines in this file. The ** arguments specify the block of memory to manage. ** ** This routine is only called by sqlite3_config(), and therefore ** is not required to be threadsafe (it is not). */ SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void){ static const sqlite3_mem_methods winMemMethods = { winMemMalloc, winMemFree, winMemRealloc, winMemSize, winMemRoundup, winMemInit, winMemShutdown, &win_mem_data }; return &winMemMethods; } SQLITE_PRIVATE void sqlite3MemSetDefault(void){ sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32()); } #endif /* SQLITE_WIN32_MALLOC */ /* ** Convert a UTF-8 string to Microsoft Unicode (UTF-16?). ** ** Space to hold the returned string is obtained from malloc. */ static LPWSTR winUtf8ToUnicode(const char *zFilename){ int nChar; LPWSTR zWideFilename; nChar = osMultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0); if( nChar==0 ){ return 0; } zWideFilename = sqlite3MallocZero( nChar*sizeof(zWideFilename[0]) ); if( zWideFilename==0 ){ return 0; } nChar = osMultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar); if( nChar==0 ){ sqlite3_free(zWideFilename); zWideFilename = 0; } return zWideFilename; } /* ** Convert Microsoft Unicode to UTF-8. Space to hold the returned string is ** obtained from sqlite3_malloc(). */ static char *winUnicodeToUtf8(LPCWSTR zWideFilename){ int nByte; char *zFilename; nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0); if( nByte == 0 ){ return 0; } zFilename = sqlite3MallocZero( nByte ); if( zFilename==0 ){ return 0; } nByte = osWideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte, 0, 0); if( nByte == 0 ){ sqlite3_free(zFilename); zFilename = 0; } return zFilename; } /* ** Convert an ANSI string to Microsoft Unicode, based on the ** current codepage settings for file apis. ** ** Space to hold the returned string is obtained ** from sqlite3_malloc. */ static LPWSTR winMbcsToUnicode(const char *zFilename){ int nByte; LPWSTR zMbcsFilename; int codepage = osAreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = osMultiByteToWideChar(codepage, 0, zFilename, -1, NULL, 0)*sizeof(WCHAR); if( nByte==0 ){ return 0; } zMbcsFilename = sqlite3MallocZero( nByte*sizeof(zMbcsFilename[0]) ); if( zMbcsFilename==0 ){ return 0; } nByte = osMultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte); if( nByte==0 ){ sqlite3_free(zMbcsFilename); zMbcsFilename = 0; } return zMbcsFilename; } /* ** Convert Microsoft Unicode to multi-byte character string, based on the ** user's ANSI codepage. ** ** Space to hold the returned string is obtained from ** sqlite3_malloc(). */ static char *winUnicodeToMbcs(LPCWSTR zWideFilename){ int nByte; char *zFilename; int codepage = osAreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = osWideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0); if( nByte == 0 ){ return 0; } zFilename = sqlite3MallocZero( nByte ); if( zFilename==0 ){ return 0; } nByte = osWideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte, 0, 0); if( nByte == 0 ){ sqlite3_free(zFilename); zFilename = 0; } return zFilename; } /* ** Convert multibyte character string to UTF-8. Space to hold the ** returned string is obtained from sqlite3_malloc(). */ SQLITE_PRIVATE char *sqlite3_win32_mbcs_to_utf8(const char *zFilename){ char *zFilenameUtf8; LPWSTR zTmpWide; zTmpWide = winMbcsToUnicode(zFilename); if( zTmpWide==0 ){ return 0; } zFilenameUtf8 = winUnicodeToUtf8(zTmpWide); sqlite3_free(zTmpWide); return zFilenameUtf8; } /* ** Convert UTF-8 to multibyte character string. Space to hold the ** returned string is obtained from sqlite3_malloc(). */ SQLITE_PRIVATE char *sqlite3_win32_utf8_to_mbcs(const char *zFilename){ char *zFilenameMbcs; LPWSTR zTmpWide; zTmpWide = winUtf8ToUnicode(zFilename); if( zTmpWide==0 ){ return 0; } zFilenameMbcs = winUnicodeToMbcs(zTmpWide); sqlite3_free(zTmpWide); return zFilenameMbcs; } /* ** The return value of winGetLastErrorMsg ** is zero if the error message fits in the buffer, or non-zero ** otherwise (if the message was truncated). */ static int winGetLastErrorMsg(DWORD lastErrno, int nBuf, char *zBuf){ /* FormatMessage returns 0 on failure. Otherwise it ** returns the number of TCHARs written to the output ** buffer, excluding the terminating null char. */ DWORD dwLen = 0; char *zOut = 0; if( osIsNT() ){ #if SQLITE_OS_WINRT WCHAR zTempWide[SQLITE_WIN32_MAX_ERRMSG_CHARS+1]; dwLen = osFormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, lastErrno, 0, zTempWide, SQLITE_WIN32_MAX_ERRMSG_CHARS, 0); #else LPWSTR zTempWide = NULL; dwLen = osFormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, lastErrno, 0, (LPWSTR) &zTempWide, 0, 0); #endif if( dwLen > 0 ){ /* allocate a buffer and convert to UTF8 */ zOut = winUnicodeToUtf8(zTempWide); #if !SQLITE_OS_WINRT /* free the system buffer allocated by FormatMessage */ osLocalFree(zTempWide); #endif } } #ifdef SQLITE_WIN32_HAS_ANSI else{ char *zTemp = NULL; dwLen = osFormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, lastErrno, 0, (LPSTR) &zTemp, 0, 0); if( dwLen > 0 ){ /* allocate a buffer and convert to UTF8 */ zOut = sqlite3_win32_mbcs_to_utf8(zTemp); /* free the system buffer allocated by FormatMessage */ osLocalFree(zTemp); } } #endif if( 0 == dwLen ){ sqlite3_snprintf(nBuf, zBuf, "OsError 0x%lx (%lu)", lastErrno, lastErrno); }else{ /* copy a maximum of nBuf chars to output buffer */ sqlite3_snprintf(nBuf, zBuf, "%s", zOut); /* free the UTF8 buffer */ sqlite3_free(zOut); } return 0; } /* ** ** This function - winLogErrorAtLine() - is only ever called via the macro ** winLogError(). ** ** This routine is invoked after an error occurs in an OS function. ** It logs a message using sqlite3_log() containing the current value of ** error code and, if possible, the human-readable equivalent from ** FormatMessage. ** ** The first argument passed to the macro should be the error code that ** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN). ** The two subsequent arguments should be the name of the OS function that ** failed and the associated file-system path, if any. */ #define winLogError(a,b,c,d) winLogErrorAtLine(a,b,c,d,__LINE__) static int winLogErrorAtLine( int errcode, /* SQLite error code */ DWORD lastErrno, /* Win32 last error */ const char *zFunc, /* Name of OS function that failed */ const char *zPath, /* File path associated with error */ int iLine /* Source line number where error occurred */ ){ char zMsg[500]; /* Human readable error text */ int i; /* Loop counter */ zMsg[0] = 0; winGetLastErrorMsg(lastErrno, sizeof(zMsg), zMsg); assert( errcode!=SQLITE_OK ); if( zPath==0 ) zPath = ""; for(i=0; zMsg[i] && zMsg[i]!='\r' && zMsg[i]!='\n'; i++){} zMsg[i] = 0; sqlite3_log(errcode, "os_win.c:%d: (%lu) %s(%s) - %s", iLine, lastErrno, zFunc, zPath, zMsg ); return errcode; } /* ** The number of times that a ReadFile(), WriteFile(), and DeleteFile() ** will be retried following a locking error - probably caused by ** antivirus software. Also the initial delay before the first retry. ** The delay increases linearly with each retry. */ #ifndef SQLITE_WIN32_IOERR_RETRY # define SQLITE_WIN32_IOERR_RETRY 10 #endif #ifndef SQLITE_WIN32_IOERR_RETRY_DELAY # define SQLITE_WIN32_IOERR_RETRY_DELAY 25 #endif static int winIoerrRetry = SQLITE_WIN32_IOERR_RETRY; static int winIoerrRetryDelay = SQLITE_WIN32_IOERR_RETRY_DELAY; /* ** The "winIoerrCanRetry1" macro is used to determine if a particular I/O ** error code obtained via GetLastError() is eligible to be retried. It ** must accept the error code DWORD as its only argument and should return ** non-zero if the error code is transient in nature and the operation ** responsible for generating the original error might succeed upon being ** retried. The argument to this macro should be a variable. ** ** Additionally, a macro named "winIoerrCanRetry2" may be defined. If it ** is defined, it will be consulted only when the macro "winIoerrCanRetry1" ** returns zero. The "winIoerrCanRetry2" macro is completely optional and ** may be used to include additional error codes in the set that should ** result in the failing I/O operation being retried by the caller. If ** defined, the "winIoerrCanRetry2" macro must exhibit external semantics ** identical to those of the "winIoerrCanRetry1" macro. */ #if !defined(winIoerrCanRetry1) #define winIoerrCanRetry1(a) (((a)==ERROR_ACCESS_DENIED) || \ ((a)==ERROR_SHARING_VIOLATION) || \ ((a)==ERROR_LOCK_VIOLATION) || \ ((a)==ERROR_DEV_NOT_EXIST) || \ ((a)==ERROR_NETNAME_DELETED) || \ ((a)==ERROR_SEM_TIMEOUT) || \ ((a)==ERROR_NETWORK_UNREACHABLE)) #endif /* ** If a ReadFile() or WriteFile() error occurs, invoke this routine ** to see if it should be retried. Return TRUE to retry. Return FALSE ** to give up with an error. */ static int winRetryIoerr(int *pnRetry, DWORD *pError){ DWORD e = osGetLastError(); if( *pnRetry>=winIoerrRetry ){ if( pError ){ *pError = e; } return 0; } if( winIoerrCanRetry1(e) ){ sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry)); ++*pnRetry; return 1; } #if defined(winIoerrCanRetry2) else if( winIoerrCanRetry2(e) ){ sqlite3_win32_sleep(winIoerrRetryDelay*(1+*pnRetry)); ++*pnRetry; return 1; } #endif if( pError ){ *pError = e; } return 0; } /* ** Log a I/O error retry episode. */ static void winLogIoerr(int nRetry, int lineno){ if( nRetry ){ sqlite3_log(SQLITE_NOTICE, "delayed %dms for lock/sharing conflict at line %d", winIoerrRetryDelay*nRetry*(nRetry+1)/2, lineno ); } } #if SQLITE_OS_WINCE /************************************************************************* ** This section contains code for WinCE only. */ #if !defined(SQLITE_MSVC_LOCALTIME_API) || !SQLITE_MSVC_LOCALTIME_API /* ** The MSVC CRT on Windows CE may not have a localtime() function. So ** create a substitute. */ /* #include */ struct tm *__cdecl localtime(const time_t *t) { static struct tm y; FILETIME uTm, lTm; SYSTEMTIME pTm; sqlite3_int64 t64; t64 = *t; t64 = (t64 + 11644473600)*10000000; uTm.dwLowDateTime = (DWORD)(t64 & 0xFFFFFFFF); uTm.dwHighDateTime= (DWORD)(t64 >> 32); osFileTimeToLocalFileTime(&uTm,&lTm); osFileTimeToSystemTime(&lTm,&pTm); y.tm_year = pTm.wYear - 1900; y.tm_mon = pTm.wMonth - 1; y.tm_wday = pTm.wDayOfWeek; y.tm_mday = pTm.wDay; y.tm_hour = pTm.wHour; y.tm_min = pTm.wMinute; y.tm_sec = pTm.wSecond; return &y; } #endif #define HANDLE_TO_WINFILE(a) (winFile*)&((char*)a)[-(int)offsetof(winFile,h)] /* ** Acquire a lock on the handle h */ static void winceMutexAcquire(HANDLE h){ DWORD dwErr; do { dwErr = osWaitForSingleObject(h, INFINITE); } while (dwErr != WAIT_OBJECT_0 && dwErr != WAIT_ABANDONED); } /* ** Release a lock acquired by winceMutexAcquire() */ #define winceMutexRelease(h) ReleaseMutex(h) /* ** Create the mutex and shared memory used for locking in the file ** descriptor pFile */ static int winceCreateLock(const char *zFilename, winFile *pFile){ LPWSTR zTok; LPWSTR zName; DWORD lastErrno; BOOL bLogged = FALSE; BOOL bInit = TRUE; zName = winUtf8ToUnicode(zFilename); if( zName==0 ){ /* out of memory */ return SQLITE_IOERR_NOMEM; } /* Initialize the local lockdata */ memset(&pFile->local, 0, sizeof(pFile->local)); /* Replace the backslashes from the filename and lowercase it ** to derive a mutex name. */ zTok = osCharLowerW(zName); for (;*zTok;zTok++){ if (*zTok == '\\') *zTok = '_'; } /* Create/open the named mutex */ pFile->hMutex = osCreateMutexW(NULL, FALSE, zName); if (!pFile->hMutex){ pFile->lastErrno = osGetLastError(); sqlite3_free(zName); return winLogError(SQLITE_IOERR, pFile->lastErrno, "winceCreateLock1", zFilename); } /* Acquire the mutex before continuing */ winceMutexAcquire(pFile->hMutex); /* Since the names of named mutexes, semaphores, file mappings etc are ** case-sensitive, take advantage of that by uppercasing the mutex name ** and using that as the shared filemapping name. */ osCharUpperW(zName); pFile->hShared = osCreateFileMappingW(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, sizeof(winceLock), zName); /* Set a flag that indicates we're the first to create the memory so it ** must be zero-initialized */ lastErrno = osGetLastError(); if (lastErrno == ERROR_ALREADY_EXISTS){ bInit = FALSE; } sqlite3_free(zName); /* If we succeeded in making the shared memory handle, map it. */ if( pFile->hShared ){ pFile->shared = (winceLock*)osMapViewOfFile(pFile->hShared, FILE_MAP_READ|FILE_MAP_WRITE, 0, 0, sizeof(winceLock)); /* If mapping failed, close the shared memory handle and erase it */ if( !pFile->shared ){ pFile->lastErrno = osGetLastError(); winLogError(SQLITE_IOERR, pFile->lastErrno, "winceCreateLock2", zFilename); bLogged = TRUE; osCloseHandle(pFile->hShared); pFile->hShared = NULL; } } /* If shared memory could not be created, then close the mutex and fail */ if( pFile->hShared==NULL ){ if( !bLogged ){ pFile->lastErrno = lastErrno; winLogError(SQLITE_IOERR, pFile->lastErrno, "winceCreateLock3", zFilename); bLogged = TRUE; } winceMutexRelease(pFile->hMutex); osCloseHandle(pFile->hMutex); pFile->hMutex = NULL; return SQLITE_IOERR; } /* Initialize the shared memory if we're supposed to */ if( bInit ){ memset(pFile->shared, 0, sizeof(winceLock)); } winceMutexRelease(pFile->hMutex); return SQLITE_OK; } /* ** Destroy the part of winFile that deals with wince locks */ static void winceDestroyLock(winFile *pFile){ if (pFile->hMutex){ /* Acquire the mutex */ winceMutexAcquire(pFile->hMutex); /* The following blocks should probably assert in debug mode, but they are to cleanup in case any locks remained open */ if (pFile->local.nReaders){ pFile->shared->nReaders --; } if (pFile->local.bReserved){ pFile->shared->bReserved = FALSE; } if (pFile->local.bPending){ pFile->shared->bPending = FALSE; } if (pFile->local.bExclusive){ pFile->shared->bExclusive = FALSE; } /* De-reference and close our copy of the shared memory handle */ osUnmapViewOfFile(pFile->shared); osCloseHandle(pFile->hShared); /* Done with the mutex */ winceMutexRelease(pFile->hMutex); osCloseHandle(pFile->hMutex); pFile->hMutex = NULL; } } /* ** An implementation of the LockFile() API of Windows for CE */ static BOOL winceLockFile( LPHANDLE phFile, DWORD dwFileOffsetLow, DWORD dwFileOffsetHigh, DWORD nNumberOfBytesToLockLow, DWORD nNumberOfBytesToLockHigh ){ winFile *pFile = HANDLE_TO_WINFILE(phFile); BOOL bReturn = FALSE; UNUSED_PARAMETER(dwFileOffsetHigh); UNUSED_PARAMETER(nNumberOfBytesToLockHigh); if (!pFile->hMutex) return TRUE; winceMutexAcquire(pFile->hMutex); /* Wanting an exclusive lock? */ if (dwFileOffsetLow == (DWORD)SHARED_FIRST && nNumberOfBytesToLockLow == (DWORD)SHARED_SIZE){ if (pFile->shared->nReaders == 0 && pFile->shared->bExclusive == 0){ pFile->shared->bExclusive = TRUE; pFile->local.bExclusive = TRUE; bReturn = TRUE; } } /* Want a read-only lock? */ else if (dwFileOffsetLow == (DWORD)SHARED_FIRST && nNumberOfBytesToLockLow == 1){ if (pFile->shared->bExclusive == 0){ pFile->local.nReaders ++; if (pFile->local.nReaders == 1){ pFile->shared->nReaders ++; } bReturn = TRUE; } } /* Want a pending lock? */ else if (dwFileOffsetLow == (DWORD)PENDING_BYTE && nNumberOfBytesToLockLow == 1){ /* If no pending lock has been acquired, then acquire it */ if (pFile->shared->bPending == 0) { pFile->shared->bPending = TRUE; pFile->local.bPending = TRUE; bReturn = TRUE; } } /* Want a reserved lock? */ else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE && nNumberOfBytesToLockLow == 1){ if (pFile->shared->bReserved == 0) { pFile->shared->bReserved = TRUE; pFile->local.bReserved = TRUE; bReturn = TRUE; } } winceMutexRelease(pFile->hMutex); return bReturn; } /* ** An implementation of the UnlockFile API of Windows for CE */ static BOOL winceUnlockFile( LPHANDLE phFile, DWORD dwFileOffsetLow, DWORD dwFileOffsetHigh, DWORD nNumberOfBytesToUnlockLow, DWORD nNumberOfBytesToUnlockHigh ){ winFile *pFile = HANDLE_TO_WINFILE(phFile); BOOL bReturn = FALSE; UNUSED_PARAMETER(dwFileOffsetHigh); UNUSED_PARAMETER(nNumberOfBytesToUnlockHigh); if (!pFile->hMutex) return TRUE; winceMutexAcquire(pFile->hMutex); /* Releasing a reader lock or an exclusive lock */ if (dwFileOffsetLow == (DWORD)SHARED_FIRST){ /* Did we have an exclusive lock? */ if (pFile->local.bExclusive){ assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE); pFile->local.bExclusive = FALSE; pFile->shared->bExclusive = FALSE; bReturn = TRUE; } /* Did we just have a reader lock? */ else if (pFile->local.nReaders){ assert(nNumberOfBytesToUnlockLow == (DWORD)SHARED_SIZE || nNumberOfBytesToUnlockLow == 1); pFile->local.nReaders --; if (pFile->local.nReaders == 0) { pFile->shared->nReaders --; } bReturn = TRUE; } } /* Releasing a pending lock */ else if (dwFileOffsetLow == (DWORD)PENDING_BYTE && nNumberOfBytesToUnlockLow == 1){ if (pFile->local.bPending){ pFile->local.bPending = FALSE; pFile->shared->bPending = FALSE; bReturn = TRUE; } } /* Releasing a reserved lock */ else if (dwFileOffsetLow == (DWORD)RESERVED_BYTE && nNumberOfBytesToUnlockLow == 1){ if (pFile->local.bReserved) { pFile->local.bReserved = FALSE; pFile->shared->bReserved = FALSE; bReturn = TRUE; } } winceMutexRelease(pFile->hMutex); return bReturn; } /* ** End of the special code for wince *****************************************************************************/ #endif /* SQLITE_OS_WINCE */ /* ** Lock a file region. */ static BOOL winLockFile( LPHANDLE phFile, DWORD flags, DWORD offsetLow, DWORD offsetHigh, DWORD numBytesLow, DWORD numBytesHigh ){ #if SQLITE_OS_WINCE /* ** NOTE: Windows CE is handled differently here due its lack of the Win32 ** API LockFile. */ return winceLockFile(phFile, offsetLow, offsetHigh, numBytesLow, numBytesHigh); #else if( osIsNT() ){ OVERLAPPED ovlp; memset(&ovlp, 0, sizeof(OVERLAPPED)); ovlp.Offset = offsetLow; ovlp.OffsetHigh = offsetHigh; return osLockFileEx(*phFile, flags, 0, numBytesLow, numBytesHigh, &ovlp); }else{ return osLockFile(*phFile, offsetLow, offsetHigh, numBytesLow, numBytesHigh); } #endif } /* ** Unlock a file region. */ static BOOL winUnlockFile( LPHANDLE phFile, DWORD offsetLow, DWORD offsetHigh, DWORD numBytesLow, DWORD numBytesHigh ){ #if SQLITE_OS_WINCE /* ** NOTE: Windows CE is handled differently here due its lack of the Win32 ** API UnlockFile. */ return winceUnlockFile(phFile, offsetLow, offsetHigh, numBytesLow, numBytesHigh); #else if( osIsNT() ){ OVERLAPPED ovlp; memset(&ovlp, 0, sizeof(OVERLAPPED)); ovlp.Offset = offsetLow; ovlp.OffsetHigh = offsetHigh; return osUnlockFileEx(*phFile, 0, numBytesLow, numBytesHigh, &ovlp); }else{ return osUnlockFile(*phFile, offsetLow, offsetHigh, numBytesLow, numBytesHigh); } #endif } /***************************************************************************** ** The next group of routines implement the I/O methods specified ** by the sqlite3_io_methods object. ******************************************************************************/ /* ** Some Microsoft compilers lack this definition. */ #ifndef INVALID_SET_FILE_POINTER # define INVALID_SET_FILE_POINTER ((DWORD)-1) #endif /* ** Move the current position of the file handle passed as the first ** argument to offset iOffset within the file. If successful, return 0. ** Otherwise, set pFile->lastErrno and return non-zero. */ static int winSeekFile(winFile *pFile, sqlite3_int64 iOffset){ #if !SQLITE_OS_WINRT LONG upperBits; /* Most sig. 32 bits of new offset */ LONG lowerBits; /* Least sig. 32 bits of new offset */ DWORD dwRet; /* Value returned by SetFilePointer() */ DWORD lastErrno; /* Value returned by GetLastError() */ OSTRACE(("SEEK file=%p, offset=%lld\n", pFile->h, iOffset)); upperBits = (LONG)((iOffset>>32) & 0x7fffffff); lowerBits = (LONG)(iOffset & 0xffffffff); /* API oddity: If successful, SetFilePointer() returns a dword ** containing the lower 32-bits of the new file-offset. Or, if it fails, ** it returns INVALID_SET_FILE_POINTER. However according to MSDN, ** INVALID_SET_FILE_POINTER may also be a valid new offset. So to determine ** whether an error has actually occurred, it is also necessary to call ** GetLastError(). */ dwRet = osSetFilePointer(pFile->h, lowerBits, &upperBits, FILE_BEGIN); if( (dwRet==INVALID_SET_FILE_POINTER && ((lastErrno = osGetLastError())!=NO_ERROR)) ){ pFile->lastErrno = lastErrno; winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, "winSeekFile", pFile->zPath); OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); return 1; } OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); return 0; #else /* ** Same as above, except that this implementation works for WinRT. */ LARGE_INTEGER x; /* The new offset */ BOOL bRet; /* Value returned by SetFilePointerEx() */ x.QuadPart = iOffset; bRet = osSetFilePointerEx(pFile->h, x, 0, FILE_BEGIN); if(!bRet){ pFile->lastErrno = osGetLastError(); winLogError(SQLITE_IOERR_SEEK, pFile->lastErrno, "winSeekFile", pFile->zPath); OSTRACE(("SEEK file=%p, rc=SQLITE_IOERR_SEEK\n", pFile->h)); return 1; } OSTRACE(("SEEK file=%p, rc=SQLITE_OK\n", pFile->h)); return 0; #endif } #if SQLITE_MAX_MMAP_SIZE>0 /* Forward references to VFS helper methods used for memory mapped files */ static int winMapfile(winFile*, sqlite3_int64); static int winUnmapfile(winFile*); #endif /* ** Close a file. ** ** It is reported that an attempt to close a handle might sometimes ** fail. This is a very unreasonable result, but Windows is notorious ** for being unreasonable so I do not doubt that it might happen. If ** the close fails, we pause for 100 milliseconds and try again. As ** many as MX_CLOSE_ATTEMPT attempts to close the handle are made before ** giving up and returning an error. */ #define MX_CLOSE_ATTEMPT 3 static int winClose(sqlite3_file *id){ int rc, cnt = 0; winFile *pFile = (winFile*)id; assert( id!=0 ); #ifndef SQLITE_OMIT_WAL assert( pFile->pShm==0 ); #endif assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE ); OSTRACE(("CLOSE pid=%lu, pFile=%p, file=%p\n", osGetCurrentProcessId(), pFile, pFile->h)); #if SQLITE_MAX_MMAP_SIZE>0 winUnmapfile(pFile); #endif do{ rc = osCloseHandle(pFile->h); /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */ }while( rc==0 && ++cnt < MX_CLOSE_ATTEMPT && (sqlite3_win32_sleep(100), 1) ); #if SQLITE_OS_WINCE #define WINCE_DELETION_ATTEMPTS 3 winceDestroyLock(pFile); if( pFile->zDeleteOnClose ){ int cnt = 0; while( osDeleteFileW(pFile->zDeleteOnClose)==0 && osGetFileAttributesW(pFile->zDeleteOnClose)!=0xffffffff && cnt++ < WINCE_DELETION_ATTEMPTS ){ sqlite3_win32_sleep(100); /* Wait a little before trying again */ } sqlite3_free(pFile->zDeleteOnClose); } #endif if( rc ){ pFile->h = NULL; } OpenCounter(-1); OSTRACE(("CLOSE pid=%lu, pFile=%p, file=%p, rc=%s\n", osGetCurrentProcessId(), pFile, pFile->h, rc ? "ok" : "failed")); return rc ? SQLITE_OK : winLogError(SQLITE_IOERR_CLOSE, osGetLastError(), "winClose", pFile->zPath); } /* ** Read data from a file into a buffer. Return SQLITE_OK if all ** bytes were read successfully and SQLITE_IOERR if anything goes ** wrong. */ static int winRead( sqlite3_file *id, /* File to read from */ void *pBuf, /* Write content into this buffer */ int amt, /* Number of bytes to read */ sqlite3_int64 offset /* Begin reading at this offset */ ){ #if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) OVERLAPPED overlapped; /* The offset for ReadFile. */ #endif winFile *pFile = (winFile*)id; /* file handle */ DWORD nRead; /* Number of bytes actually read from file */ int nRetry = 0; /* Number of retrys */ assert( id!=0 ); assert( amt>0 ); assert( offset>=0 ); SimulateIOError(return SQLITE_IOERR_READ); OSTRACE(("READ pid=%lu, pFile=%p, file=%p, buffer=%p, amount=%d, " "offset=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, pFile->h, pBuf, amt, offset, pFile->locktype)); #if SQLITE_MAX_MMAP_SIZE>0 /* Deal with as much of this read request as possible by transfering ** data from the memory mapping using memcpy(). */ if( offsetmmapSize ){ if( offset+amt <= pFile->mmapSize ){ memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); OSTRACE(("READ-MMAP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_OK; }else{ int nCopy = (int)(pFile->mmapSize - offset); memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); pBuf = &((u8 *)pBuf)[nCopy]; amt -= nCopy; offset += nCopy; } } #endif #if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) if( winSeekFile(pFile, offset) ){ OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_FULL\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_FULL; } while( !osReadFile(pFile->h, pBuf, amt, &nRead, 0) ){ #else memset(&overlapped, 0, sizeof(OVERLAPPED)); overlapped.Offset = (LONG)(offset & 0xffffffff); overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); while( !osReadFile(pFile->h, pBuf, amt, &nRead, &overlapped) && osGetLastError()!=ERROR_HANDLE_EOF ){ #endif DWORD lastErrno; if( winRetryIoerr(&nRetry, &lastErrno) ) continue; pFile->lastErrno = lastErrno; OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_READ\n", osGetCurrentProcessId(), pFile, pFile->h)); return winLogError(SQLITE_IOERR_READ, pFile->lastErrno, "winRead", pFile->zPath); } winLogIoerr(nRetry, __LINE__); if( nRead<(DWORD)amt ){ /* Unread parts of the buffer must be zero-filled */ memset(&((char*)pBuf)[nRead], 0, amt-nRead); OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_SHORT_READ\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_IOERR_SHORT_READ; } OSTRACE(("READ pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_OK; } /* ** Write data from a buffer into a file. Return SQLITE_OK on success ** or some other error code on failure. */ static int winWrite( sqlite3_file *id, /* File to write into */ const void *pBuf, /* The bytes to be written */ int amt, /* Number of bytes to write */ sqlite3_int64 offset /* Offset into the file to begin writing at */ ){ int rc = 0; /* True if error has occurred, else false */ winFile *pFile = (winFile*)id; /* File handle */ int nRetry = 0; /* Number of retries */ assert( amt>0 ); assert( pFile ); SimulateIOError(return SQLITE_IOERR_WRITE); SimulateDiskfullError(return SQLITE_FULL); OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, buffer=%p, amount=%d, " "offset=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, pFile->h, pBuf, amt, offset, pFile->locktype)); #if SQLITE_MAX_MMAP_SIZE>0 /* Deal with as much of this write request as possible by transfering ** data from the memory mapping using memcpy(). */ if( offsetmmapSize ){ if( offset+amt <= pFile->mmapSize ){ memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); OSTRACE(("WRITE-MMAP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_OK; }else{ int nCopy = (int)(pFile->mmapSize - offset); memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); pBuf = &((u8 *)pBuf)[nCopy]; amt -= nCopy; offset += nCopy; } } #endif #if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) rc = winSeekFile(pFile, offset); if( rc==0 ){ #else { #endif #if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) OVERLAPPED overlapped; /* The offset for WriteFile. */ #endif u8 *aRem = (u8 *)pBuf; /* Data yet to be written */ int nRem = amt; /* Number of bytes yet to be written */ DWORD nWrite; /* Bytes written by each WriteFile() call */ DWORD lastErrno = NO_ERROR; /* Value returned by GetLastError() */ #if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) memset(&overlapped, 0, sizeof(OVERLAPPED)); overlapped.Offset = (LONG)(offset & 0xffffffff); overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); #endif while( nRem>0 ){ #if SQLITE_OS_WINCE || defined(SQLITE_WIN32_NO_OVERLAPPED) if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, 0) ){ #else if( !osWriteFile(pFile->h, aRem, nRem, &nWrite, &overlapped) ){ #endif if( winRetryIoerr(&nRetry, &lastErrno) ) continue; break; } assert( nWrite==0 || nWrite<=(DWORD)nRem ); if( nWrite==0 || nWrite>(DWORD)nRem ){ lastErrno = osGetLastError(); break; } #if !SQLITE_OS_WINCE && !defined(SQLITE_WIN32_NO_OVERLAPPED) offset += nWrite; overlapped.Offset = (LONG)(offset & 0xffffffff); overlapped.OffsetHigh = (LONG)((offset>>32) & 0x7fffffff); #endif aRem += nWrite; nRem -= nWrite; } if( nRem>0 ){ pFile->lastErrno = lastErrno; rc = 1; } } if( rc ){ if( ( pFile->lastErrno==ERROR_HANDLE_DISK_FULL ) || ( pFile->lastErrno==ERROR_DISK_FULL )){ OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_FULL\n", osGetCurrentProcessId(), pFile, pFile->h)); return winLogError(SQLITE_FULL, pFile->lastErrno, "winWrite1", pFile->zPath); } OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_WRITE\n", osGetCurrentProcessId(), pFile, pFile->h)); return winLogError(SQLITE_IOERR_WRITE, pFile->lastErrno, "winWrite2", pFile->zPath); }else{ winLogIoerr(nRetry, __LINE__); } OSTRACE(("WRITE pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_OK; } /* ** Truncate an open file to a specified size */ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){ winFile *pFile = (winFile*)id; /* File handle object */ int rc = SQLITE_OK; /* Return code for this function */ DWORD lastErrno; assert( pFile ); SimulateIOError(return SQLITE_IOERR_TRUNCATE); OSTRACE(("TRUNCATE pid=%lu, pFile=%p, file=%p, size=%lld, lock=%d\n", osGetCurrentProcessId(), pFile, pFile->h, nByte, pFile->locktype)); /* If the user has configured a chunk-size for this file, truncate the ** file so that it consists of an integer number of chunks (i.e. the ** actual file size after the operation may be larger than the requested ** size). */ if( pFile->szChunk>0 ){ nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; } /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */ if( winSeekFile(pFile, nByte) ){ rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, "winTruncate1", pFile->zPath); }else if( 0==osSetEndOfFile(pFile->h) && ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){ pFile->lastErrno = lastErrno; rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, "winTruncate2", pFile->zPath); } #if SQLITE_MAX_MMAP_SIZE>0 /* If the file was truncated to a size smaller than the currently ** mapped region, reduce the effective mapping size as well. SQLite will ** use read() and write() to access data beyond this point from now on. */ if( pFile->pMapRegion && nBytemmapSize ){ pFile->mmapSize = nByte; } #endif OSTRACE(("TRUNCATE pid=%lu, pFile=%p, file=%p, rc=%s\n", osGetCurrentProcessId(), pFile, pFile->h, sqlite3ErrName(rc))); return rc; } #ifdef SQLITE_TEST /* ** Count the number of fullsyncs and normal syncs. This is used to test ** that syncs and fullsyncs are occuring at the right times. */ SQLITE_PRIVATE int sqlite3_sync_count = 0; SQLITE_PRIVATE int sqlite3_fullsync_count = 0; #endif /* ** Make sure all writes to a particular file are committed to disk. */ static int winSync(sqlite3_file *id, int flags){ #ifndef SQLITE_NO_SYNC /* ** Used only when SQLITE_NO_SYNC is not defined. */ BOOL rc; #endif #if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || \ defined(SQLITE_HAVE_OS_TRACE) /* ** Used when SQLITE_NO_SYNC is not defined and by the assert() and/or ** OSTRACE() macros. */ winFile *pFile = (winFile*)id; #else UNUSED_PARAMETER(id); #endif assert( pFile ); /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */ assert((flags&0x0F)==SQLITE_SYNC_NORMAL || (flags&0x0F)==SQLITE_SYNC_FULL ); /* Unix cannot, but some systems may return SQLITE_FULL from here. This ** line is to test that doing so does not cause any problems. */ SimulateDiskfullError( return SQLITE_FULL ); OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, flags=%x, lock=%d\n", osGetCurrentProcessId(), pFile, pFile->h, flags, pFile->locktype)); #ifndef SQLITE_TEST UNUSED_PARAMETER(flags); #else if( (flags&0x0F)==SQLITE_SYNC_FULL ){ sqlite3_fullsync_count++; } sqlite3_sync_count++; #endif /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a ** no-op */ #ifdef SQLITE_NO_SYNC OSTRACE(("SYNC-NOP pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_OK; #else #if SQLITE_MAX_MMAP_SIZE>0 if( pFile->pMapRegion ){ if( osFlushViewOfFile(pFile->pMapRegion, 0) ){ OSTRACE(("SYNC-MMAP pid=%lu, pFile=%p, pMapRegion=%p, " "rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->pMapRegion)); }else{ pFile->lastErrno = osGetLastError(); OSTRACE(("SYNC-MMAP pid=%lu, pFile=%p, pMapRegion=%p, " "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile, pFile->pMapRegion)); return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, "winSync1", pFile->zPath); } } #endif rc = osFlushFileBuffers(pFile->h); SimulateIOError( rc=FALSE ); if( rc ){ OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile, pFile->h)); return SQLITE_OK; }else{ pFile->lastErrno = osGetLastError(); OSTRACE(("SYNC pid=%lu, pFile=%p, file=%p, rc=SQLITE_IOERR_FSYNC\n", osGetCurrentProcessId(), pFile, pFile->h)); return winLogError(SQLITE_IOERR_FSYNC, pFile->lastErrno, "winSync2", pFile->zPath); } #endif } /* ** Determine the current size of a file in bytes */ static int winFileSize(sqlite3_file *id, sqlite3_int64 *pSize){ winFile *pFile = (winFile*)id; int rc = SQLITE_OK; assert( id!=0 ); assert( pSize!=0 ); SimulateIOError(return SQLITE_IOERR_FSTAT); OSTRACE(("SIZE file=%p, pSize=%p\n", pFile->h, pSize)); #if SQLITE_OS_WINRT { FILE_STANDARD_INFO info; if( osGetFileInformationByHandleEx(pFile->h, FileStandardInfo, &info, sizeof(info)) ){ *pSize = info.EndOfFile.QuadPart; }else{ pFile->lastErrno = osGetLastError(); rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno, "winFileSize", pFile->zPath); } } #else { DWORD upperBits; DWORD lowerBits; DWORD lastErrno; lowerBits = osGetFileSize(pFile->h, &upperBits); *pSize = (((sqlite3_int64)upperBits)<<32) + lowerBits; if( (lowerBits == INVALID_FILE_SIZE) && ((lastErrno = osGetLastError())!=NO_ERROR) ){ pFile->lastErrno = lastErrno; rc = winLogError(SQLITE_IOERR_FSTAT, pFile->lastErrno, "winFileSize", pFile->zPath); } } #endif OSTRACE(("SIZE file=%p, pSize=%p, *pSize=%lld, rc=%s\n", pFile->h, pSize, *pSize, sqlite3ErrName(rc))); return rc; } /* ** LOCKFILE_FAIL_IMMEDIATELY is undefined on some Windows systems. */ #ifndef LOCKFILE_FAIL_IMMEDIATELY # define LOCKFILE_FAIL_IMMEDIATELY 1 #endif #ifndef LOCKFILE_EXCLUSIVE_LOCK # define LOCKFILE_EXCLUSIVE_LOCK 2 #endif /* ** Historically, SQLite has used both the LockFile and LockFileEx functions. ** When the LockFile function was used, it was always expected to fail ** immediately if the lock could not be obtained. Also, it always expected to ** obtain an exclusive lock. These flags are used with the LockFileEx function ** and reflect those expectations; therefore, they should not be changed. */ #ifndef SQLITE_LOCKFILE_FLAGS # define SQLITE_LOCKFILE_FLAGS (LOCKFILE_FAIL_IMMEDIATELY | \ LOCKFILE_EXCLUSIVE_LOCK) #endif /* ** Currently, SQLite never calls the LockFileEx function without wanting the ** call to fail immediately if the lock cannot be obtained. */ #ifndef SQLITE_LOCKFILEEX_FLAGS # define SQLITE_LOCKFILEEX_FLAGS (LOCKFILE_FAIL_IMMEDIATELY) #endif /* ** Acquire a reader lock. ** Different API routines are called depending on whether or not this ** is Win9x or WinNT. */ static int winGetReadLock(winFile *pFile){ int res; OSTRACE(("READ-LOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); if( osIsNT() ){ #if SQLITE_OS_WINCE /* ** NOTE: Windows CE is handled differently here due its lack of the Win32 ** API LockFileEx. */ res = winceLockFile(&pFile->h, SHARED_FIRST, 0, 1, 0); #else res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS, SHARED_FIRST, 0, SHARED_SIZE, 0); #endif } #ifdef SQLITE_WIN32_HAS_ANSI else{ int lk; sqlite3_randomness(sizeof(lk), &lk); pFile->sharedLockByte = (short)((lk & 0x7fffffff)%(SHARED_SIZE - 1)); res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0); } #endif if( res == 0 ){ pFile->lastErrno = osGetLastError(); /* No need to log a failure to lock */ } OSTRACE(("READ-LOCK file=%p, result=%d\n", pFile->h, res)); return res; } /* ** Undo a readlock */ static int winUnlockReadLock(winFile *pFile){ int res; DWORD lastErrno; OSTRACE(("READ-UNLOCK file=%p, lock=%d\n", pFile->h, pFile->locktype)); if( osIsNT() ){ res = winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); } #ifdef SQLITE_WIN32_HAS_ANSI else{ res = winUnlockFile(&pFile->h, SHARED_FIRST+pFile->sharedLockByte, 0, 1, 0); } #endif if( res==0 && ((lastErrno = osGetLastError())!=ERROR_NOT_LOCKED) ){ pFile->lastErrno = lastErrno; winLogError(SQLITE_IOERR_UNLOCK, pFile->lastErrno, "winUnlockReadLock", pFile->zPath); } OSTRACE(("READ-UNLOCK file=%p, result=%d\n", pFile->h, res)); return res; } /* ** Lock the file with the lock specified by parameter locktype - one ** of the following: ** ** (1) SHARED_LOCK ** (2) RESERVED_LOCK ** (3) PENDING_LOCK ** (4) EXCLUSIVE_LOCK ** ** Sometimes when requesting one lock state, additional lock states ** are inserted in between. The locking might fail on one of the later ** transitions leaving the lock state different from what it started but ** still short of its goal. The following chart shows the allowed ** transitions and the inserted intermediate states: ** ** UNLOCKED -> SHARED ** SHARED -> RESERVED ** SHARED -> (PENDING) -> EXCLUSIVE ** RESERVED -> (PENDING) -> EXCLUSIVE ** PENDING -> EXCLUSIVE ** ** This routine will only increase a lock. The winUnlock() routine ** erases all locks at once and returns us immediately to locking level 0. ** It is not possible to lower the locking level one step at a time. You ** must go straight to locking level 0. */ static int winLock(sqlite3_file *id, int locktype){ int rc = SQLITE_OK; /* Return code from subroutines */ int res = 1; /* Result of a Windows lock call */ int newLocktype; /* Set pFile->locktype to this value before exiting */ int gotPendingLock = 0;/* True if we acquired a PENDING lock this time */ winFile *pFile = (winFile*)id; DWORD lastErrno = NO_ERROR; assert( id!=0 ); OSTRACE(("LOCK file=%p, oldLock=%d(%d), newLock=%d\n", pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); /* If there is already a lock of this type or more restrictive on the ** OsFile, do nothing. Don't use the end_lock: exit path, as ** sqlite3OsEnterMutex() hasn't been called yet. */ if( pFile->locktype>=locktype ){ OSTRACE(("LOCK-HELD file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } /* Do not allow any kind of write-lock on a read-only database */ if( (pFile->ctrlFlags & WINFILE_RDONLY)!=0 && locktype>=RESERVED_LOCK ){ return SQLITE_IOERR_LOCK; } /* Make sure the locking sequence is correct */ assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); assert( locktype!=PENDING_LOCK ); assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK ); /* Lock the PENDING_LOCK byte if we need to acquire a PENDING lock or ** a SHARED lock. If we are acquiring a SHARED lock, the acquisition of ** the PENDING_LOCK byte is temporary. */ newLocktype = pFile->locktype; if( (pFile->locktype==NO_LOCK) || ( (locktype==EXCLUSIVE_LOCK) && (pFile->locktype==RESERVED_LOCK)) ){ int cnt = 3; while( cnt-->0 && (res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, PENDING_BYTE, 0, 1, 0))==0 ){ /* Try 3 times to get the pending lock. This is needed to work ** around problems caused by indexing and/or anti-virus software on ** Windows systems. ** If you are using this code as a model for alternative VFSes, do not ** copy this retry logic. It is a hack intended for Windows only. */ lastErrno = osGetLastError(); OSTRACE(("LOCK-PENDING-FAIL file=%p, count=%d, result=%d\n", pFile->h, cnt, res)); if( lastErrno==ERROR_INVALID_HANDLE ){ pFile->lastErrno = lastErrno; rc = SQLITE_IOERR_LOCK; OSTRACE(("LOCK-FAIL file=%p, count=%d, rc=%s\n", pFile->h, cnt, sqlite3ErrName(rc))); return rc; } if( cnt ) sqlite3_win32_sleep(1); } gotPendingLock = res; if( !res ){ lastErrno = osGetLastError(); } } /* Acquire a shared lock */ if( locktype==SHARED_LOCK && res ){ assert( pFile->locktype==NO_LOCK ); res = winGetReadLock(pFile); if( res ){ newLocktype = SHARED_LOCK; }else{ lastErrno = osGetLastError(); } } /* Acquire a RESERVED lock */ if( locktype==RESERVED_LOCK && res ){ assert( pFile->locktype==SHARED_LOCK ); res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, RESERVED_BYTE, 0, 1, 0); if( res ){ newLocktype = RESERVED_LOCK; }else{ lastErrno = osGetLastError(); } } /* Acquire a PENDING lock */ if( locktype==EXCLUSIVE_LOCK && res ){ newLocktype = PENDING_LOCK; gotPendingLock = 0; } /* Acquire an EXCLUSIVE lock */ if( locktype==EXCLUSIVE_LOCK && res ){ assert( pFile->locktype>=SHARED_LOCK ); res = winUnlockReadLock(pFile); res = winLockFile(&pFile->h, SQLITE_LOCKFILE_FLAGS, SHARED_FIRST, 0, SHARED_SIZE, 0); if( res ){ newLocktype = EXCLUSIVE_LOCK; }else{ lastErrno = osGetLastError(); winGetReadLock(pFile); } } /* If we are holding a PENDING lock that ought to be released, then ** release it now. */ if( gotPendingLock && locktype==SHARED_LOCK ){ winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); } /* Update the state of the lock has held in the file descriptor then ** return the appropriate result code. */ if( res ){ rc = SQLITE_OK; }else{ pFile->lastErrno = lastErrno; rc = SQLITE_BUSY; OSTRACE(("LOCK-FAIL file=%p, wanted=%d, got=%d\n", pFile->h, locktype, newLocktype)); } pFile->locktype = (u8)newLocktype; OSTRACE(("LOCK file=%p, lock=%d, rc=%s\n", pFile->h, pFile->locktype, sqlite3ErrName(rc))); return rc; } /* ** This routine checks if there is a RESERVED lock held on the specified ** file by this or any other process. If such a lock is held, return ** non-zero, otherwise zero. */ static int winCheckReservedLock(sqlite3_file *id, int *pResOut){ int res; winFile *pFile = (winFile*)id; SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p\n", pFile->h, pResOut)); assert( id!=0 ); if( pFile->locktype>=RESERVED_LOCK ){ res = 1; OSTRACE(("TEST-WR-LOCK file=%p, result=%d (local)\n", pFile->h, res)); }else{ res = winLockFile(&pFile->h, SQLITE_LOCKFILEEX_FLAGS,RESERVED_BYTE, 0, 1, 0); if( res ){ winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); } res = !res; OSTRACE(("TEST-WR-LOCK file=%p, result=%d (remote)\n", pFile->h, res)); } *pResOut = res; OSTRACE(("TEST-WR-LOCK file=%p, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", pFile->h, pResOut, *pResOut)); return SQLITE_OK; } /* ** Lower the locking level on file descriptor id to locktype. locktype ** must be either NO_LOCK or SHARED_LOCK. ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. ** ** It is not possible for this routine to fail if the second argument ** is NO_LOCK. If the second argument is SHARED_LOCK then this routine ** might return SQLITE_IOERR; */ static int winUnlock(sqlite3_file *id, int locktype){ int type; winFile *pFile = (winFile*)id; int rc = SQLITE_OK; assert( pFile!=0 ); assert( locktype<=SHARED_LOCK ); OSTRACE(("UNLOCK file=%p, oldLock=%d(%d), newLock=%d\n", pFile->h, pFile->locktype, pFile->sharedLockByte, locktype)); type = pFile->locktype; if( type>=EXCLUSIVE_LOCK ){ winUnlockFile(&pFile->h, SHARED_FIRST, 0, SHARED_SIZE, 0); if( locktype==SHARED_LOCK && !winGetReadLock(pFile) ){ /* This should never happen. We should always be able to ** reacquire the read lock */ rc = winLogError(SQLITE_IOERR_UNLOCK, osGetLastError(), "winUnlock", pFile->zPath); } } if( type>=RESERVED_LOCK ){ winUnlockFile(&pFile->h, RESERVED_BYTE, 0, 1, 0); } if( locktype==NO_LOCK && type>=SHARED_LOCK ){ winUnlockReadLock(pFile); } if( type>=PENDING_LOCK ){ winUnlockFile(&pFile->h, PENDING_BYTE, 0, 1, 0); } pFile->locktype = (u8)locktype; OSTRACE(("UNLOCK file=%p, lock=%d, rc=%s\n", pFile->h, pFile->locktype, sqlite3ErrName(rc))); return rc; } /* ** If *pArg is initially negative then this is a query. Set *pArg to ** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. ** ** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. */ static void winModeBit(winFile *pFile, unsigned char mask, int *pArg){ if( *pArg<0 ){ *pArg = (pFile->ctrlFlags & mask)!=0; }else if( (*pArg)==0 ){ pFile->ctrlFlags &= ~mask; }else{ pFile->ctrlFlags |= mask; } } /* Forward references to VFS helper methods used for temporary files */ static int winGetTempname(sqlite3_vfs *, char **); static int winIsDir(const void *); static BOOL winIsDriveLetterAndColon(const char *); /* ** Control and query of the open file handle. */ static int winFileControl(sqlite3_file *id, int op, void *pArg){ winFile *pFile = (winFile*)id; OSTRACE(("FCNTL file=%p, op=%d, pArg=%p\n", pFile->h, op, pArg)); switch( op ){ case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->locktype; OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_LAST_ERRNO: { *(int*)pArg = (int)pFile->lastErrno; OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_CHUNK_SIZE: { pFile->szChunk = *(int *)pArg; OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { if( pFile->szChunk>0 ){ sqlite3_int64 oldSz; int rc = winFileSize(id, &oldSz); if( rc==SQLITE_OK ){ sqlite3_int64 newSz = *(sqlite3_int64*)pArg; if( newSz>oldSz ){ SimulateIOErrorBenign(1); rc = winTruncate(id, newSz); SimulateIOErrorBenign(0); } } OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); return rc; } OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_PERSIST_WAL: { winModeBit(pFile, WINFILE_PERSIST_WAL, (int*)pArg); OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { winModeBit(pFile, WINFILE_PSOW, (int*)pArg); OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_VFSNAME: { *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } case SQLITE_FCNTL_WIN32_AV_RETRY: { int *a = (int*)pArg; if( a[0]>0 ){ winIoerrRetry = a[0]; }else{ a[0] = winIoerrRetry; } if( a[1]>0 ){ winIoerrRetryDelay = a[1]; }else{ a[1] = winIoerrRetryDelay; } OSTRACE(("FCNTL file=%p, rc=SQLITE_OK\n", pFile->h)); return SQLITE_OK; } #ifdef SQLITE_TEST case SQLITE_FCNTL_WIN32_SET_HANDLE: { LPHANDLE phFile = (LPHANDLE)pArg; HANDLE hOldFile = pFile->h; pFile->h = *phFile; *phFile = hOldFile; OSTRACE(("FCNTL oldFile=%p, newFile=%p, rc=SQLITE_OK\n", hOldFile, pFile->h)); return SQLITE_OK; } #endif case SQLITE_FCNTL_TEMPFILENAME: { char *zTFile = 0; int rc = winGetTempname(pFile->pVfs, &zTFile); if( rc==SQLITE_OK ){ *(char**)pArg = zTFile; } OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); return rc; } #if SQLITE_MAX_MMAP_SIZE>0 case SQLITE_FCNTL_MMAP_SIZE: { i64 newLimit = *(i64*)pArg; int rc = SQLITE_OK; if( newLimit>sqlite3GlobalConfig.mxMmap ){ newLimit = sqlite3GlobalConfig.mxMmap; } *(i64*)pArg = pFile->mmapSizeMax; if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ pFile->mmapSizeMax = newLimit; if( pFile->mmapSize>0 ){ winUnmapfile(pFile); rc = winMapfile(pFile, -1); } } OSTRACE(("FCNTL file=%p, rc=%s\n", pFile->h, sqlite3ErrName(rc))); return rc; } #endif } OSTRACE(("FCNTL file=%p, rc=SQLITE_NOTFOUND\n", pFile->h)); return SQLITE_NOTFOUND; } /* ** Return the sector size in bytes of the underlying block device for ** the specified file. This is almost always 512 bytes, but may be ** larger for some devices. ** ** SQLite code assumes this function cannot fail. It also assumes that ** if two files are created in the same file-system directory (i.e. ** a database and its journal file) that the sector size will be the ** same for both. */ static int winSectorSize(sqlite3_file *id){ (void)id; return SQLITE_DEFAULT_SECTOR_SIZE; } /* ** Return a vector of device characteristics. */ static int winDeviceCharacteristics(sqlite3_file *id){ winFile *p = (winFile*)id; return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } /* ** Windows will only let you create file view mappings ** on allocation size granularity boundaries. ** During sqlite3_os_init() we do a GetSystemInfo() ** to get the granularity size. */ static SYSTEM_INFO winSysInfo; #ifndef SQLITE_OMIT_WAL /* ** Helper functions to obtain and relinquish the global mutex. The ** global mutex is used to protect the winLockInfo objects used by ** this file, all of which may be shared by multiple threads. ** ** Function winShmMutexHeld() is used to assert() that the global mutex ** is held when required. This function is only used as part of assert() ** statements. e.g. ** ** winShmEnterMutex() ** assert( winShmMutexHeld() ); ** winShmLeaveMutex() */ static void winShmEnterMutex(void){ sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } static void winShmLeaveMutex(void){ sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #ifndef NDEBUG static int winShmMutexHeld(void) { return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #endif /* ** Object used to represent a single file opened and mmapped to provide ** shared memory. When multiple threads all reference the same ** log-summary, each thread has its own winFile object, but they all ** point to a single instance of this object. In other words, each ** log-summary is opened only once per process. ** ** winShmMutexHeld() must be true when creating or destroying ** this object or while reading or writing the following fields: ** ** nRef ** pNext ** ** The following fields are read-only after the object is created: ** ** fid ** zFilename ** ** Either winShmNode.mutex must be held or winShmNode.nRef==0 and ** winShmMutexHeld() is true when reading or writing any other field ** in this structure. ** */ struct winShmNode { sqlite3_mutex *mutex; /* Mutex to access this object */ char *zFilename; /* Name of the file */ winFile hFile; /* File handle from winOpen */ int szRegion; /* Size of shared-memory regions */ int nRegion; /* Size of array apRegion */ struct ShmRegion { HANDLE hMap; /* File handle from CreateFileMapping */ void *pMap; } *aRegion; DWORD lastErrno; /* The Windows errno from the last I/O error */ int nRef; /* Number of winShm objects pointing to this */ winShm *pFirst; /* All winShm objects pointing to this */ winShmNode *pNext; /* Next in list of all winShmNode objects */ #if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) u8 nextShmId; /* Next available winShm.id value */ #endif }; /* ** A global array of all winShmNode objects. ** ** The winShmMutexHeld() must be true while reading or writing this list. */ static winShmNode *winShmNodeList = 0; /* ** Structure used internally by this VFS to record the state of an ** open shared memory connection. ** ** The following fields are initialized when this object is created and ** are read-only thereafter: ** ** winShm.pShmNode ** winShm.id ** ** All other fields are read/write. The winShm.pShmNode->mutex must be held ** while accessing any read/write fields. */ struct winShm { winShmNode *pShmNode; /* The underlying winShmNode object */ winShm *pNext; /* Next winShm with the same winShmNode */ u8 hasMutex; /* True if holding the winShmNode mutex */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ #if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) u8 id; /* Id of this connection with its winShmNode */ #endif }; /* ** Constants used for locking */ #define WIN_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */ #define WIN_SHM_DMS (WIN_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ /* ** Apply advisory locks for all n bytes beginning at ofst. */ #define _SHM_UNLCK 1 #define _SHM_RDLCK 2 #define _SHM_WRLCK 3 static int winShmSystemLock( winShmNode *pFile, /* Apply locks to this open shared-memory segment */ int lockType, /* _SHM_UNLCK, _SHM_RDLCK, or _SHM_WRLCK */ int ofst, /* Offset to first byte to be locked/unlocked */ int nByte /* Number of bytes to lock or unlock */ ){ int rc = 0; /* Result code form Lock/UnlockFileEx() */ /* Access to the winShmNode object is serialized by the caller */ assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 ); OSTRACE(("SHM-LOCK file=%p, lock=%d, offset=%d, size=%d\n", pFile->hFile.h, lockType, ofst, nByte)); /* Release/Acquire the system-level lock */ if( lockType==_SHM_UNLCK ){ rc = winUnlockFile(&pFile->hFile.h, ofst, 0, nByte, 0); }else{ /* Initialize the locking parameters */ DWORD dwFlags = LOCKFILE_FAIL_IMMEDIATELY; if( lockType == _SHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; rc = winLockFile(&pFile->hFile.h, dwFlags, ofst, 0, nByte, 0); } if( rc!= 0 ){ rc = SQLITE_OK; }else{ pFile->lastErrno = osGetLastError(); rc = SQLITE_BUSY; } OSTRACE(("SHM-LOCK file=%p, func=%s, errno=%lu, rc=%s\n", pFile->hFile.h, (lockType == _SHM_UNLCK) ? "winUnlockFile" : "winLockFile", pFile->lastErrno, sqlite3ErrName(rc))); return rc; } /* Forward references to VFS methods */ static int winOpen(sqlite3_vfs*,const char*,sqlite3_file*,int,int*); static int winDelete(sqlite3_vfs *,const char*,int); /* ** Purge the winShmNodeList list of all entries with winShmNode.nRef==0. ** ** This is not a VFS shared-memory method; it is a utility function called ** by VFS shared-memory methods. */ static void winShmPurge(sqlite3_vfs *pVfs, int deleteFlag){ winShmNode **pp; winShmNode *p; assert( winShmMutexHeld() ); OSTRACE(("SHM-PURGE pid=%lu, deleteFlag=%d\n", osGetCurrentProcessId(), deleteFlag)); pp = &winShmNodeList; while( (p = *pp)!=0 ){ if( p->nRef==0 ){ int i; if( p->mutex ){ sqlite3_mutex_free(p->mutex); } for(i=0; inRegion; i++){ BOOL bRc = osUnmapViewOfFile(p->aRegion[i].pMap); OSTRACE(("SHM-PURGE-UNMAP pid=%lu, region=%d, rc=%s\n", osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); UNUSED_VARIABLE_VALUE(bRc); bRc = osCloseHandle(p->aRegion[i].hMap); OSTRACE(("SHM-PURGE-CLOSE pid=%lu, region=%d, rc=%s\n", osGetCurrentProcessId(), i, bRc ? "ok" : "failed")); UNUSED_VARIABLE_VALUE(bRc); } if( p->hFile.h!=NULL && p->hFile.h!=INVALID_HANDLE_VALUE ){ SimulateIOErrorBenign(1); winClose((sqlite3_file *)&p->hFile); SimulateIOErrorBenign(0); } if( deleteFlag ){ SimulateIOErrorBenign(1); winDelete(pVfs, p->zFilename, 0); SimulateIOErrorBenign(0); } *pp = p->pNext; sqlite3_free(p->aRegion); sqlite3_free(p); }else{ pp = &p->pNext; } } } /* ** Open the shared-memory area associated with database file pDbFd. ** ** When opening a new shared-memory file, if no other instances of that ** file are currently open, in this process or in other processes, then ** the file must be truncated to zero length or have its header cleared. */ static int winOpenSharedMemory(winFile *pDbFd){ struct winShm *p; /* The connection to be opened */ struct winShmNode *pShmNode = 0; /* The underlying mmapped file */ int rc; /* Result code */ struct winShmNode *pNew; /* Newly allocated winShmNode */ int nName; /* Size of zName in bytes */ assert( pDbFd->pShm==0 ); /* Not previously opened */ /* Allocate space for the new sqlite3_shm object. Also speculatively ** allocate space for a new winShmNode and filename. */ p = sqlite3MallocZero( sizeof(*p) ); if( p==0 ) return SQLITE_IOERR_NOMEM; nName = sqlite3Strlen30(pDbFd->zPath); pNew = sqlite3MallocZero( sizeof(*pShmNode) + nName + 17 ); if( pNew==0 ){ sqlite3_free(p); return SQLITE_IOERR_NOMEM; } pNew->zFilename = (char*)&pNew[1]; sqlite3_snprintf(nName+15, pNew->zFilename, "%s-shm", pDbFd->zPath); /* Look to see if there is an existing winShmNode that can be used. ** If no matching winShmNode currently exists, create a new one. */ winShmEnterMutex(); for(pShmNode = winShmNodeList; pShmNode; pShmNode=pShmNode->pNext){ /* TBD need to come up with better match here. Perhaps ** use FILE_ID_BOTH_DIR_INFO Structure. */ if( sqlite3StrICmp(pShmNode->zFilename, pNew->zFilename)==0 ) break; } if( pShmNode ){ sqlite3_free(pNew); }else{ pShmNode = pNew; pNew = 0; ((winFile*)(&pShmNode->hFile))->h = INVALID_HANDLE_VALUE; pShmNode->pNext = winShmNodeList; winShmNodeList = pShmNode; pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); if( pShmNode->mutex==0 ){ rc = SQLITE_IOERR_NOMEM; goto shm_open_err; } rc = winOpen(pDbFd->pVfs, pShmNode->zFilename, /* Name of the file (UTF-8) */ (sqlite3_file*)&pShmNode->hFile, /* File handle here */ SQLITE_OPEN_WAL | SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, 0); if( SQLITE_OK!=rc ){ goto shm_open_err; } /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS, 1)==SQLITE_OK ){ rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0); if( rc!=SQLITE_OK ){ rc = winLogError(SQLITE_IOERR_SHMOPEN, osGetLastError(), "winOpenShm", pDbFd->zPath); } } if( rc==SQLITE_OK ){ winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1); rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS, 1); } if( rc ) goto shm_open_err; } /* Make the new connection a child of the winShmNode */ p->pShmNode = pShmNode; #if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) p->id = pShmNode->nextShmId++; #endif pShmNode->nRef++; pDbFd->pShm = p; winShmLeaveMutex(); /* The reference count on pShmNode has already been incremented under ** the cover of the winShmEnterMutex() mutex and the pointer from the ** new (struct winShm) object to the pShmNode has been set. All that is ** left to do is to link the new object into the linked list starting ** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex ** mutex. */ sqlite3_mutex_enter(pShmNode->mutex); p->pNext = pShmNode->pFirst; pShmNode->pFirst = p; sqlite3_mutex_leave(pShmNode->mutex); return SQLITE_OK; /* Jump here on any error */ shm_open_err: winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1); winShmPurge(pDbFd->pVfs, 0); /* This call frees pShmNode if required */ sqlite3_free(p); sqlite3_free(pNew); winShmLeaveMutex(); return rc; } /* ** Close a connection to shared-memory. Delete the underlying ** storage if deleteFlag is true. */ static int winShmUnmap( sqlite3_file *fd, /* Database holding shared memory */ int deleteFlag /* Delete after closing if true */ ){ winFile *pDbFd; /* Database holding shared-memory */ winShm *p; /* The connection to be closed */ winShmNode *pShmNode; /* The underlying shared-memory file */ winShm **pp; /* For looping over sibling connections */ pDbFd = (winFile*)fd; p = pDbFd->pShm; if( p==0 ) return SQLITE_OK; pShmNode = p->pShmNode; /* Remove connection p from the set of connections associated ** with pShmNode */ sqlite3_mutex_enter(pShmNode->mutex); for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){} *pp = p->pNext; /* Free the connection p */ sqlite3_free(p); pDbFd->pShm = 0; sqlite3_mutex_leave(pShmNode->mutex); /* If pShmNode->nRef has reached 0, then close the underlying ** shared-memory file, too */ winShmEnterMutex(); assert( pShmNode->nRef>0 ); pShmNode->nRef--; if( pShmNode->nRef==0 ){ winShmPurge(pDbFd->pVfs, deleteFlag); } winShmLeaveMutex(); return SQLITE_OK; } /* ** Change the lock state for a shared-memory segment. */ static int winShmLock( sqlite3_file *fd, /* Database file holding the shared memory */ int ofst, /* First lock to acquire or release */ int n, /* Number of locks to acquire or release */ int flags /* What to do with the lock */ ){ winFile *pDbFd = (winFile*)fd; /* Connection holding shared memory */ winShm *p = pDbFd->pShm; /* The shared memory being locked */ winShm *pX; /* For looping over all siblings */ winShmNode *pShmNode = p->pShmNode; int rc = SQLITE_OK; /* Result code */ u16 mask; /* Mask of locks to take or release */ assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); assert( n>=1 ); assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); mask = (u16)((1U<<(ofst+n)) - (1U<1 || mask==(1<mutex); if( flags & SQLITE_SHM_UNLOCK ){ u16 allMask = 0; /* Mask of locks held by siblings */ /* See if any siblings hold this same lock */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( pX==p ) continue; assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); allMask |= pX->sharedMask; } /* Unlock the system-level locks */ if( (mask & allMask)==0 ){ rc = winShmSystemLock(pShmNode, _SHM_UNLCK, ofst+WIN_SHM_BASE, n); }else{ rc = SQLITE_OK; } /* Undo the local locks */ if( rc==SQLITE_OK ){ p->exclMask &= ~mask; p->sharedMask &= ~mask; } }else if( flags & SQLITE_SHM_SHARED ){ u16 allShared = 0; /* Union of locks held by connections other than "p" */ /* Find out which shared locks are already held by sibling connections. ** If any sibling already holds an exclusive lock, go ahead and return ** SQLITE_BUSY. */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( (pX->exclMask & mask)!=0 ){ rc = SQLITE_BUSY; break; } allShared |= pX->sharedMask; } /* Get shared locks at the system level, if necessary */ if( rc==SQLITE_OK ){ if( (allShared & mask)==0 ){ rc = winShmSystemLock(pShmNode, _SHM_RDLCK, ofst+WIN_SHM_BASE, n); }else{ rc = SQLITE_OK; } } /* Get the local shared locks */ if( rc==SQLITE_OK ){ p->sharedMask |= mask; } }else{ /* Make sure no sibling connections hold locks that will block this ** lock. If any do, return SQLITE_BUSY right away. */ for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ rc = SQLITE_BUSY; break; } } /* Get the exclusive locks at the system level. Then if successful ** also mark the local connection as being locked. */ if( rc==SQLITE_OK ){ rc = winShmSystemLock(pShmNode, _SHM_WRLCK, ofst+WIN_SHM_BASE, n); if( rc==SQLITE_OK ){ assert( (p->sharedMask & mask)==0 ); p->exclMask |= mask; } } } sqlite3_mutex_leave(pShmNode->mutex); OSTRACE(("SHM-LOCK pid=%lu, id=%d, sharedMask=%03x, exclMask=%03x, rc=%s\n", osGetCurrentProcessId(), p->id, p->sharedMask, p->exclMask, sqlite3ErrName(rc))); return rc; } /* ** Implement a memory barrier or memory fence on shared memory. ** ** All loads and stores begun before the barrier must complete before ** any load or store begun after the barrier. */ static void winShmBarrier( sqlite3_file *fd /* Database holding the shared memory */ ){ UNUSED_PARAMETER(fd); sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ winShmEnterMutex(); /* Also mutex, for redundancy */ winShmLeaveMutex(); } /* ** This function is called to obtain a pointer to region iRegion of the ** shared-memory associated with the database file fd. Shared-memory regions ** are numbered starting from zero. Each shared-memory region is szRegion ** bytes in size. ** ** If an error occurs, an error code is returned and *pp is set to NULL. ** ** Otherwise, if the isWrite parameter is 0 and the requested shared-memory ** region has not been allocated (by any client, including one running in a ** separate process), then *pp is set to NULL and SQLITE_OK returned. If ** isWrite is non-zero and the requested shared-memory region has not yet ** been allocated, it is allocated by this function. ** ** If the shared-memory region has already been allocated or is allocated by ** this call as described above, then it is mapped into this processes ** address space (if it is not already), *pp is set to point to the mapped ** memory and SQLITE_OK returned. */ static int winShmMap( sqlite3_file *fd, /* Handle open on database file */ int iRegion, /* Region to retrieve */ int szRegion, /* Size of regions */ int isWrite, /* True to extend file if necessary */ void volatile **pp /* OUT: Mapped memory */ ){ winFile *pDbFd = (winFile*)fd; winShm *pShm = pDbFd->pShm; winShmNode *pShmNode; int rc = SQLITE_OK; if( !pShm ){ rc = winOpenSharedMemory(pDbFd); if( rc!=SQLITE_OK ) return rc; pShm = pDbFd->pShm; } pShmNode = pShm->pShmNode; sqlite3_mutex_enter(pShmNode->mutex); assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); if( pShmNode->nRegion<=iRegion ){ struct ShmRegion *apNew; /* New aRegion[] array */ int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ sqlite3_int64 sz; /* Current size of wal-index file */ pShmNode->szRegion = szRegion; /* The requested region is not mapped into this processes address space. ** Check to see if it has been allocated (i.e. if the wal-index file is ** large enough to contain the requested region). */ rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); if( rc!=SQLITE_OK ){ rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(), "winShmMap1", pDbFd->zPath); goto shmpage_out; } if( szhFile, nByte); if( rc!=SQLITE_OK ){ rc = winLogError(SQLITE_IOERR_SHMSIZE, osGetLastError(), "winShmMap2", pDbFd->zPath); goto shmpage_out; } } /* Map the requested memory region into this processes address space. */ apNew = (struct ShmRegion *)sqlite3_realloc64( pShmNode->aRegion, (iRegion+1)*sizeof(apNew[0]) ); if( !apNew ){ rc = SQLITE_IOERR_NOMEM; goto shmpage_out; } pShmNode->aRegion = apNew; while( pShmNode->nRegion<=iRegion ){ HANDLE hMap = NULL; /* file-mapping handle */ void *pMap = 0; /* Mapped memory region */ #if SQLITE_OS_WINRT hMap = osCreateFileMappingFromApp(pShmNode->hFile.h, NULL, PAGE_READWRITE, nByte, NULL ); #elif defined(SQLITE_WIN32_HAS_WIDE) hMap = osCreateFileMappingW(pShmNode->hFile.h, NULL, PAGE_READWRITE, 0, nByte, NULL ); #elif defined(SQLITE_WIN32_HAS_ANSI) hMap = osCreateFileMappingA(pShmNode->hFile.h, NULL, PAGE_READWRITE, 0, nByte, NULL ); #endif OSTRACE(("SHM-MAP-CREATE pid=%lu, region=%d, size=%d, rc=%s\n", osGetCurrentProcessId(), pShmNode->nRegion, nByte, hMap ? "ok" : "failed")); if( hMap ){ int iOffset = pShmNode->nRegion*szRegion; int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity; #if SQLITE_OS_WINRT pMap = osMapViewOfFileFromApp(hMap, FILE_MAP_WRITE | FILE_MAP_READ, iOffset - iOffsetShift, szRegion + iOffsetShift ); #else pMap = osMapViewOfFile(hMap, FILE_MAP_WRITE | FILE_MAP_READ, 0, iOffset - iOffsetShift, szRegion + iOffsetShift ); #endif OSTRACE(("SHM-MAP-MAP pid=%lu, region=%d, offset=%d, size=%d, rc=%s\n", osGetCurrentProcessId(), pShmNode->nRegion, iOffset, szRegion, pMap ? "ok" : "failed")); } if( !pMap ){ pShmNode->lastErrno = osGetLastError(); rc = winLogError(SQLITE_IOERR_SHMMAP, pShmNode->lastErrno, "winShmMap3", pDbFd->zPath); if( hMap ) osCloseHandle(hMap); goto shmpage_out; } pShmNode->aRegion[pShmNode->nRegion].pMap = pMap; pShmNode->aRegion[pShmNode->nRegion].hMap = hMap; pShmNode->nRegion++; } } shmpage_out: if( pShmNode->nRegion>iRegion ){ int iOffset = iRegion*szRegion; int iOffsetShift = iOffset % winSysInfo.dwAllocationGranularity; char *p = (char *)pShmNode->aRegion[iRegion].pMap; *pp = (void *)&p[iOffsetShift]; }else{ *pp = 0; } sqlite3_mutex_leave(pShmNode->mutex); return rc; } #else # define winShmMap 0 # define winShmLock 0 # define winShmBarrier 0 # define winShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ /* ** Cleans up the mapped region of the specified file, if any. */ #if SQLITE_MAX_MMAP_SIZE>0 static int winUnmapfile(winFile *pFile){ assert( pFile!=0 ); OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, pMapRegion=%p, " "mmapSize=%lld, mmapSizeActual=%lld, mmapSizeMax=%lld\n", osGetCurrentProcessId(), pFile, pFile->hMap, pFile->pMapRegion, pFile->mmapSize, pFile->mmapSizeActual, pFile->mmapSizeMax)); if( pFile->pMapRegion ){ if( !osUnmapViewOfFile(pFile->pMapRegion) ){ pFile->lastErrno = osGetLastError(); OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, pMapRegion=%p, " "rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile, pFile->pMapRegion)); return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, "winUnmapfile1", pFile->zPath); } pFile->pMapRegion = 0; pFile->mmapSize = 0; pFile->mmapSizeActual = 0; } if( pFile->hMap!=NULL ){ if( !osCloseHandle(pFile->hMap) ){ pFile->lastErrno = osGetLastError(); OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, hMap=%p, rc=SQLITE_IOERR_MMAP\n", osGetCurrentProcessId(), pFile, pFile->hMap)); return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, "winUnmapfile2", pFile->zPath); } pFile->hMap = NULL; } OSTRACE(("UNMAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFile)); return SQLITE_OK; } /* ** Memory map or remap the file opened by file-descriptor pFd (if the file ** is already mapped, the existing mapping is replaced by the new). Or, if ** there already exists a mapping for this file, and there are still ** outstanding xFetch() references to it, this function is a no-op. ** ** If parameter nByte is non-negative, then it is the requested size of ** the mapping to create. Otherwise, if nByte is less than zero, then the ** requested size is the size of the file on disk. The actual size of the ** created mapping is either the requested size or the value configured ** using SQLITE_FCNTL_MMAP_SIZE, whichever is smaller. ** ** SQLITE_OK is returned if no error occurs (even if the mapping is not ** recreated as a result of outstanding references) or an SQLite error ** code otherwise. */ static int winMapfile(winFile *pFd, sqlite3_int64 nByte){ sqlite3_int64 nMap = nByte; int rc; assert( nMap>=0 || pFd->nFetchOut==0 ); OSTRACE(("MAP-FILE pid=%lu, pFile=%p, size=%lld\n", osGetCurrentProcessId(), pFd, nByte)); if( pFd->nFetchOut>0 ) return SQLITE_OK; if( nMap<0 ){ rc = winFileSize((sqlite3_file*)pFd, &nMap); if( rc ){ OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_IOERR_FSTAT\n", osGetCurrentProcessId(), pFd)); return SQLITE_IOERR_FSTAT; } } if( nMap>pFd->mmapSizeMax ){ nMap = pFd->mmapSizeMax; } nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1); if( nMap==0 && pFd->mmapSize>0 ){ winUnmapfile(pFd); } if( nMap!=pFd->mmapSize ){ void *pNew = 0; DWORD protect = PAGE_READONLY; DWORD flags = FILE_MAP_READ; winUnmapfile(pFd); if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){ protect = PAGE_READWRITE; flags |= FILE_MAP_WRITE; } #if SQLITE_OS_WINRT pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL); #elif defined(SQLITE_WIN32_HAS_WIDE) pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect, (DWORD)((nMap>>32) & 0xffffffff), (DWORD)(nMap & 0xffffffff), NULL); #elif defined(SQLITE_WIN32_HAS_ANSI) pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect, (DWORD)((nMap>>32) & 0xffffffff), (DWORD)(nMap & 0xffffffff), NULL); #endif if( pFd->hMap==NULL ){ pFd->lastErrno = osGetLastError(); rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, "winMapfile1", pFd->zPath); /* Log the error, but continue normal operation using xRead/xWrite */ OSTRACE(("MAP-FILE-CREATE pid=%lu, pFile=%p, rc=%s\n", osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); return SQLITE_OK; } assert( (nMap % winSysInfo.dwPageSize)==0 ); assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff ); #if SQLITE_OS_WINRT pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, (SIZE_T)nMap); #else pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap); #endif if( pNew==NULL ){ osCloseHandle(pFd->hMap); pFd->hMap = NULL; pFd->lastErrno = osGetLastError(); rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, "winMapfile2", pFd->zPath); /* Log the error, but continue normal operation using xRead/xWrite */ OSTRACE(("MAP-FILE-MAP pid=%lu, pFile=%p, rc=%s\n", osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); return SQLITE_OK; } pFd->pMapRegion = pNew; pFd->mmapSize = nMap; pFd->mmapSizeActual = nMap; } OSTRACE(("MAP-FILE pid=%lu, pFile=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), pFd)); return SQLITE_OK; } #endif /* SQLITE_MAX_MMAP_SIZE>0 */ /* ** If possible, return a pointer to a mapping of file fd starting at offset ** iOff. The mapping must be valid for at least nAmt bytes. ** ** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. ** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. ** Finally, if an error does occur, return an SQLite error code. The final ** value of *pp is undefined in this case. ** ** If this function does return a pointer, the caller must eventually ** release the reference by calling winUnfetch(). */ static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ #if SQLITE_MAX_MMAP_SIZE>0 winFile *pFd = (winFile*)fd; /* The underlying database file */ #endif *pp = 0; OSTRACE(("FETCH pid=%lu, pFile=%p, offset=%lld, amount=%d, pp=%p\n", osGetCurrentProcessId(), fd, iOff, nAmt, pp)); #if SQLITE_MAX_MMAP_SIZE>0 if( pFd->mmapSizeMax>0 ){ if( pFd->pMapRegion==0 ){ int rc = winMapfile(pFd, -1); if( rc!=SQLITE_OK ){ OSTRACE(("FETCH pid=%lu, pFile=%p, rc=%s\n", osGetCurrentProcessId(), pFd, sqlite3ErrName(rc))); return rc; } } if( pFd->mmapSize >= iOff+nAmt ){ *pp = &((u8 *)pFd->pMapRegion)[iOff]; pFd->nFetchOut++; } } #endif OSTRACE(("FETCH pid=%lu, pFile=%p, pp=%p, *pp=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), fd, pp, *pp)); return SQLITE_OK; } /* ** If the third argument is non-NULL, then this function releases a ** reference obtained by an earlier call to winFetch(). The second ** argument passed to this function must be the same as the corresponding ** argument that was passed to the winFetch() invocation. ** ** Or, if the third argument is NULL, then this function is being called ** to inform the VFS layer that, according to POSIX, any existing mapping ** may now be invalid and should be unmapped. */ static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){ #if SQLITE_MAX_MMAP_SIZE>0 winFile *pFd = (winFile*)fd; /* The underlying database file */ /* If p==0 (unmap the entire file) then there must be no outstanding ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), ** then there must be at least one outstanding. */ assert( (p==0)==(pFd->nFetchOut==0) ); /* If p!=0, it must match the iOff value. */ assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); OSTRACE(("UNFETCH pid=%lu, pFile=%p, offset=%lld, p=%p\n", osGetCurrentProcessId(), pFd, iOff, p)); if( p ){ pFd->nFetchOut--; }else{ /* FIXME: If Windows truly always prevents truncating or deleting a ** file while a mapping is held, then the following winUnmapfile() call ** is unnecessary can be omitted - potentially improving ** performance. */ winUnmapfile(pFd); } assert( pFd->nFetchOut>=0 ); #endif OSTRACE(("UNFETCH pid=%lu, pFile=%p, rc=SQLITE_OK\n", osGetCurrentProcessId(), fd)); return SQLITE_OK; } /* ** Here ends the implementation of all sqlite3_file methods. ** ********************** End sqlite3_file Methods ******************************* ******************************************************************************/ /* ** This vector defines all the methods that can operate on an ** sqlite3_file for win32. */ static const sqlite3_io_methods winIoMethod = { 3, /* iVersion */ winClose, /* xClose */ winRead, /* xRead */ winWrite, /* xWrite */ winTruncate, /* xTruncate */ winSync, /* xSync */ winFileSize, /* xFileSize */ winLock, /* xLock */ winUnlock, /* xUnlock */ winCheckReservedLock, /* xCheckReservedLock */ winFileControl, /* xFileControl */ winSectorSize, /* xSectorSize */ winDeviceCharacteristics, /* xDeviceCharacteristics */ winShmMap, /* xShmMap */ winShmLock, /* xShmLock */ winShmBarrier, /* xShmBarrier */ winShmUnmap, /* xShmUnmap */ winFetch, /* xFetch */ winUnfetch /* xUnfetch */ }; /**************************************************************************** **************************** sqlite3_vfs methods **************************** ** ** This division contains the implementation of methods on the ** sqlite3_vfs object. */ #if defined(__CYGWIN__) /* ** Convert a filename from whatever the underlying operating system ** supports for filenames into UTF-8. Space to hold the result is ** obtained from malloc and must be freed by the calling function. */ static char *winConvertToUtf8Filename(const void *zFilename){ char *zConverted = 0; if( osIsNT() ){ zConverted = winUnicodeToUtf8(zFilename); } #ifdef SQLITE_WIN32_HAS_ANSI else{ zConverted = sqlite3_win32_mbcs_to_utf8(zFilename); } #endif /* caller will handle out of memory */ return zConverted; } #endif /* ** Convert a UTF-8 filename into whatever form the underlying ** operating system wants filenames in. Space to hold the result ** is obtained from malloc and must be freed by the calling ** function. */ static void *winConvertFromUtf8Filename(const char *zFilename){ void *zConverted = 0; if( osIsNT() ){ zConverted = winUtf8ToUnicode(zFilename); } #ifdef SQLITE_WIN32_HAS_ANSI else{ zConverted = sqlite3_win32_utf8_to_mbcs(zFilename); } #endif /* caller will handle out of memory */ return zConverted; } /* ** This function returns non-zero if the specified UTF-8 string buffer ** ends with a directory separator character or one was successfully ** added to it. */ static int winMakeEndInDirSep(int nBuf, char *zBuf){ if( zBuf ){ int nLen = sqlite3Strlen30(zBuf); if( nLen>0 ){ if( winIsDirSep(zBuf[nLen-1]) ){ return 1; }else if( nLen+1mxPathname; nBuf = nMax + 2; zBuf = sqlite3MallocZero( nBuf ); if( !zBuf ){ OSTRACE(("TEMP-FILENAME rc=SQLITE_IOERR_NOMEM\n")); return SQLITE_IOERR_NOMEM; } /* Figure out the effective temporary directory. First, check if one ** has been explicitly set by the application; otherwise, use the one ** configured by the operating system. */ nDir = nMax - (nPre + 15); assert( nDir>0 ); if( sqlite3_temp_directory ){ int nDirLen = sqlite3Strlen30(sqlite3_temp_directory); if( nDirLen>0 ){ if( !winIsDirSep(sqlite3_temp_directory[nDirLen-1]) ){ nDirLen++; } if( nDirLen>nDir ){ sqlite3_free(zBuf); OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); return winLogError(SQLITE_ERROR, 0, "winGetTempname1", 0); } sqlite3_snprintf(nMax, zBuf, "%s", sqlite3_temp_directory); } } #if defined(__CYGWIN__) else{ static const char *azDirs[] = { 0, /* getenv("SQLITE_TMPDIR") */ 0, /* getenv("TMPDIR") */ 0, /* getenv("TMP") */ 0, /* getenv("TEMP") */ 0, /* getenv("USERPROFILE") */ "/var/tmp", "/usr/tmp", "/tmp", ".", 0 /* List terminator */ }; unsigned int i; const char *zDir = 0; if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR"); if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); if( !azDirs[2] ) azDirs[2] = getenv("TMP"); if( !azDirs[3] ) azDirs[3] = getenv("TEMP"); if( !azDirs[4] ) azDirs[4] = getenv("USERPROFILE"); for(i=0; i/etilqs_XXXXXXXXXXXXXXX\0\0" ** ** If not, return SQLITE_ERROR. The number 17 is used here in order to ** account for the space used by the 15 character random suffix and the ** two trailing NUL characters. The final directory separator character ** has already added if it was not already present. */ nLen = sqlite3Strlen30(zBuf); if( (nLen + nPre + 17) > nBuf ){ sqlite3_free(zBuf); OSTRACE(("TEMP-FILENAME rc=SQLITE_ERROR\n")); return winLogError(SQLITE_ERROR, 0, "winGetTempname5", 0); } sqlite3_snprintf(nBuf-16-nLen, zBuf+nLen, SQLITE_TEMP_FILE_PREFIX); j = sqlite3Strlen30(zBuf); sqlite3_randomness(15, &zBuf[j]); for(i=0; i<15; i++, j++){ zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; } zBuf[j] = 0; zBuf[j+1] = 0; *pzBuf = zBuf; OSTRACE(("TEMP-FILENAME name=%s, rc=SQLITE_OK\n", zBuf)); return SQLITE_OK; } /* ** Return TRUE if the named file is really a directory. Return false if ** it is something other than a directory, or if there is any kind of memory ** allocation failure. */ static int winIsDir(const void *zConverted){ DWORD attr; int rc = 0; DWORD lastErrno; if( osIsNT() ){ int cnt = 0; WIN32_FILE_ATTRIBUTE_DATA sAttrData; memset(&sAttrData, 0, sizeof(sAttrData)); while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted, GetFileExInfoStandard, &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){} if( !rc ){ return 0; /* Invalid name? */ } attr = sAttrData.dwFileAttributes; #if SQLITE_OS_WINCE==0 }else{ attr = osGetFileAttributesA((char*)zConverted); #endif } return (attr!=INVALID_FILE_ATTRIBUTES) && (attr&FILE_ATTRIBUTE_DIRECTORY); } /* ** Open a file. */ static int winOpen( sqlite3_vfs *pVfs, /* Used to get maximum path name length */ const char *zName, /* Name of the file (UTF-8) */ sqlite3_file *id, /* Write the SQLite file handle here */ int flags, /* Open mode flags */ int *pOutFlags /* Status return flags */ ){ HANDLE h; DWORD lastErrno = 0; DWORD dwDesiredAccess; DWORD dwShareMode; DWORD dwCreationDisposition; DWORD dwFlagsAndAttributes = 0; #if SQLITE_OS_WINCE int isTemp = 0; #endif winFile *pFile = (winFile*)id; void *zConverted; /* Filename in OS encoding */ const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */ int cnt = 0; /* If argument zPath is a NULL pointer, this function is required to open ** a temporary file. Use this buffer to store the file name in. */ char *zTmpname = 0; /* For temporary filename, if necessary. */ int rc = SQLITE_OK; /* Function Return Code */ #if !defined(NDEBUG) || SQLITE_OS_WINCE int eType = flags&0xFFFFFF00; /* Type of file to open */ #endif int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); int isCreate = (flags & SQLITE_OPEN_CREATE); int isReadonly = (flags & SQLITE_OPEN_READONLY); int isReadWrite = (flags & SQLITE_OPEN_READWRITE); #ifndef NDEBUG int isOpenJournal = (isCreate && ( eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_WAL )); #endif OSTRACE(("OPEN name=%s, pFile=%p, flags=%x, pOutFlags=%p\n", zUtf8Name, id, flags, pOutFlags)); /* Check the following statements are true: ** ** (a) Exactly one of the READWRITE and READONLY flags must be set, and ** (b) if CREATE is set, then READWRITE must also be set, and ** (c) if EXCLUSIVE is set, then CREATE must also be set. ** (d) if DELETEONCLOSE is set, then CREATE must also be set. */ assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly)); assert(isCreate==0 || isReadWrite); assert(isExclusive==0 || isCreate); assert(isDelete==0 || isCreate); /* The main DB, main journal, WAL file and master journal are never ** automatically deleted. Nor are they ever temporary files. */ assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB ); assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL ); assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL ); assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL ); /* Assert that the upper layer has set one of the "file-type" flags. */ assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL ); assert( pFile!=0 ); memset(pFile, 0, sizeof(winFile)); pFile->h = INVALID_HANDLE_VALUE; #if SQLITE_OS_WINRT if( !zUtf8Name && !sqlite3_temp_directory ){ sqlite3_log(SQLITE_ERROR, "sqlite3_temp_directory variable should be set for WinRT"); } #endif /* If the second argument to this function is NULL, generate a ** temporary file name to use */ if( !zUtf8Name ){ assert( isDelete && !isOpenJournal ); rc = winGetTempname(pVfs, &zTmpname); if( rc!=SQLITE_OK ){ OSTRACE(("OPEN name=%s, rc=%s", zUtf8Name, sqlite3ErrName(rc))); return rc; } zUtf8Name = zTmpname; } /* Database filenames are double-zero terminated if they are not ** URIs with parameters. Hence, they can always be passed into ** sqlite3_uri_parameter(). */ assert( (eType!=SQLITE_OPEN_MAIN_DB) || (flags & SQLITE_OPEN_URI) || zUtf8Name[sqlite3Strlen30(zUtf8Name)+1]==0 ); /* Convert the filename to the system encoding. */ zConverted = winConvertFromUtf8Filename(zUtf8Name); if( zConverted==0 ){ sqlite3_free(zTmpname); OSTRACE(("OPEN name=%s, rc=SQLITE_IOERR_NOMEM", zUtf8Name)); return SQLITE_IOERR_NOMEM; } if( winIsDir(zConverted) ){ sqlite3_free(zConverted); sqlite3_free(zTmpname); OSTRACE(("OPEN name=%s, rc=SQLITE_CANTOPEN_ISDIR", zUtf8Name)); return SQLITE_CANTOPEN_ISDIR; } if( isReadWrite ){ dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; }else{ dwDesiredAccess = GENERIC_READ; } /* SQLITE_OPEN_EXCLUSIVE is used to make sure that a new file is ** created. SQLite doesn't use it to indicate "exclusive access" ** as it is usually understood. */ if( isExclusive ){ /* Creates a new file, only if it does not already exist. */ /* If the file exists, it fails. */ dwCreationDisposition = CREATE_NEW; }else if( isCreate ){ /* Open existing file, or create if it doesn't exist */ dwCreationDisposition = OPEN_ALWAYS; }else{ /* Opens a file, only if it exists. */ dwCreationDisposition = OPEN_EXISTING; } dwShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; if( isDelete ){ #if SQLITE_OS_WINCE dwFlagsAndAttributes = FILE_ATTRIBUTE_HIDDEN; isTemp = 1; #else dwFlagsAndAttributes = FILE_ATTRIBUTE_TEMPORARY | FILE_ATTRIBUTE_HIDDEN | FILE_FLAG_DELETE_ON_CLOSE; #endif }else{ dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; } /* Reports from the internet are that performance is always ** better if FILE_FLAG_RANDOM_ACCESS is used. Ticket #2699. */ #if SQLITE_OS_WINCE dwFlagsAndAttributes |= FILE_FLAG_RANDOM_ACCESS; #endif if( osIsNT() ){ #if SQLITE_OS_WINRT CREATEFILE2_EXTENDED_PARAMETERS extendedParameters; extendedParameters.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS); extendedParameters.dwFileAttributes = dwFlagsAndAttributes & FILE_ATTRIBUTE_MASK; extendedParameters.dwFileFlags = dwFlagsAndAttributes & FILE_FLAG_MASK; extendedParameters.dwSecurityQosFlags = SECURITY_ANONYMOUS; extendedParameters.lpSecurityAttributes = NULL; extendedParameters.hTemplateFile = NULL; while( (h = osCreateFile2((LPCWSTR)zConverted, dwDesiredAccess, dwShareMode, dwCreationDisposition, &extendedParameters))==INVALID_HANDLE_VALUE && winRetryIoerr(&cnt, &lastErrno) ){ /* Noop */ } #else while( (h = osCreateFileW((LPCWSTR)zConverted, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwFlagsAndAttributes, NULL))==INVALID_HANDLE_VALUE && winRetryIoerr(&cnt, &lastErrno) ){ /* Noop */ } #endif } #ifdef SQLITE_WIN32_HAS_ANSI else{ while( (h = osCreateFileA((LPCSTR)zConverted, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwFlagsAndAttributes, NULL))==INVALID_HANDLE_VALUE && winRetryIoerr(&cnt, &lastErrno) ){ /* Noop */ } } #endif winLogIoerr(cnt, __LINE__); OSTRACE(("OPEN file=%p, name=%s, access=%lx, rc=%s\n", h, zUtf8Name, dwDesiredAccess, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); if( h==INVALID_HANDLE_VALUE ){ pFile->lastErrno = lastErrno; winLogError(SQLITE_CANTOPEN, pFile->lastErrno, "winOpen", zUtf8Name); sqlite3_free(zConverted); sqlite3_free(zTmpname); if( isReadWrite && !isExclusive ){ return winOpen(pVfs, zName, id, ((flags|SQLITE_OPEN_READONLY) & ~(SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE)), pOutFlags); }else{ return SQLITE_CANTOPEN_BKPT; } } if( pOutFlags ){ if( isReadWrite ){ *pOutFlags = SQLITE_OPEN_READWRITE; }else{ *pOutFlags = SQLITE_OPEN_READONLY; } } OSTRACE(("OPEN file=%p, name=%s, access=%lx, pOutFlags=%p, *pOutFlags=%d, " "rc=%s\n", h, zUtf8Name, dwDesiredAccess, pOutFlags, pOutFlags ? *pOutFlags : 0, (h==INVALID_HANDLE_VALUE) ? "failed" : "ok")); #if SQLITE_OS_WINCE if( isReadWrite && eType==SQLITE_OPEN_MAIN_DB && (rc = winceCreateLock(zName, pFile))!=SQLITE_OK ){ osCloseHandle(h); sqlite3_free(zConverted); sqlite3_free(zTmpname); OSTRACE(("OPEN-CE-LOCK name=%s, rc=%s\n", zName, sqlite3ErrName(rc))); return rc; } if( isTemp ){ pFile->zDeleteOnClose = zConverted; }else #endif { sqlite3_free(zConverted); } sqlite3_free(zTmpname); pFile->pMethod = &winIoMethod; pFile->pVfs = pVfs; pFile->h = h; if( isReadonly ){ pFile->ctrlFlags |= WINFILE_RDONLY; } pFile->lastErrno = NO_ERROR; pFile->zPath = zName; #if SQLITE_MAX_MMAP_SIZE>0 pFile->hMap = NULL; pFile->pMapRegion = 0; pFile->mmapSize = 0; pFile->mmapSizeActual = 0; pFile->mmapSizeMax = sqlite3GlobalConfig.szMmap; #endif OpenCounter(+1); return rc; } /* ** Delete the named file. ** ** Note that Windows does not allow a file to be deleted if some other ** process has it open. Sometimes a virus scanner or indexing program ** will open a journal file shortly after it is created in order to do ** whatever it does. While this other process is holding the ** file open, we will be unable to delete it. To work around this ** problem, we delay 100 milliseconds and try to delete again. Up ** to MX_DELETION_ATTEMPTs deletion attempts are run before giving ** up and returning an error. */ static int winDelete( sqlite3_vfs *pVfs, /* Not used on win32 */ const char *zFilename, /* Name of file to delete */ int syncDir /* Not used on win32 */ ){ int cnt = 0; int rc; DWORD attr; DWORD lastErrno = 0; void *zConverted; UNUSED_PARAMETER(pVfs); UNUSED_PARAMETER(syncDir); SimulateIOError(return SQLITE_IOERR_DELETE); OSTRACE(("DELETE name=%s, syncDir=%d\n", zFilename, syncDir)); zConverted = winConvertFromUtf8Filename(zFilename); if( zConverted==0 ){ OSTRACE(("DELETE name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); return SQLITE_IOERR_NOMEM; } if( osIsNT() ){ do { #if SQLITE_OS_WINRT WIN32_FILE_ATTRIBUTE_DATA sAttrData; memset(&sAttrData, 0, sizeof(sAttrData)); if ( osGetFileAttributesExW(zConverted, GetFileExInfoStandard, &sAttrData) ){ attr = sAttrData.dwFileAttributes; }else{ lastErrno = osGetLastError(); if( lastErrno==ERROR_FILE_NOT_FOUND || lastErrno==ERROR_PATH_NOT_FOUND ){ rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ }else{ rc = SQLITE_ERROR; } break; } #else attr = osGetFileAttributesW(zConverted); #endif if ( attr==INVALID_FILE_ATTRIBUTES ){ lastErrno = osGetLastError(); if( lastErrno==ERROR_FILE_NOT_FOUND || lastErrno==ERROR_PATH_NOT_FOUND ){ rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ }else{ rc = SQLITE_ERROR; } break; } if ( attr&FILE_ATTRIBUTE_DIRECTORY ){ rc = SQLITE_ERROR; /* Files only. */ break; } if ( osDeleteFileW(zConverted) ){ rc = SQLITE_OK; /* Deleted OK. */ break; } if ( !winRetryIoerr(&cnt, &lastErrno) ){ rc = SQLITE_ERROR; /* No more retries. */ break; } } while(1); } #ifdef SQLITE_WIN32_HAS_ANSI else{ do { attr = osGetFileAttributesA(zConverted); if ( attr==INVALID_FILE_ATTRIBUTES ){ lastErrno = osGetLastError(); if( lastErrno==ERROR_FILE_NOT_FOUND || lastErrno==ERROR_PATH_NOT_FOUND ){ rc = SQLITE_IOERR_DELETE_NOENT; /* Already gone? */ }else{ rc = SQLITE_ERROR; } break; } if ( attr&FILE_ATTRIBUTE_DIRECTORY ){ rc = SQLITE_ERROR; /* Files only. */ break; } if ( osDeleteFileA(zConverted) ){ rc = SQLITE_OK; /* Deleted OK. */ break; } if ( !winRetryIoerr(&cnt, &lastErrno) ){ rc = SQLITE_ERROR; /* No more retries. */ break; } } while(1); } #endif if( rc && rc!=SQLITE_IOERR_DELETE_NOENT ){ rc = winLogError(SQLITE_IOERR_DELETE, lastErrno, "winDelete", zFilename); }else{ winLogIoerr(cnt, __LINE__); } sqlite3_free(zConverted); OSTRACE(("DELETE name=%s, rc=%s\n", zFilename, sqlite3ErrName(rc))); return rc; } /* ** Check the existence and status of a file. */ static int winAccess( sqlite3_vfs *pVfs, /* Not used on win32 */ const char *zFilename, /* Name of file to check */ int flags, /* Type of test to make on this file */ int *pResOut /* OUT: Result */ ){ DWORD attr; int rc = 0; DWORD lastErrno = 0; void *zConverted; UNUSED_PARAMETER(pVfs); SimulateIOError( return SQLITE_IOERR_ACCESS; ); OSTRACE(("ACCESS name=%s, flags=%x, pResOut=%p\n", zFilename, flags, pResOut)); zConverted = winConvertFromUtf8Filename(zFilename); if( zConverted==0 ){ OSTRACE(("ACCESS name=%s, rc=SQLITE_IOERR_NOMEM\n", zFilename)); return SQLITE_IOERR_NOMEM; } if( osIsNT() ){ int cnt = 0; WIN32_FILE_ATTRIBUTE_DATA sAttrData; memset(&sAttrData, 0, sizeof(sAttrData)); while( !(rc = osGetFileAttributesExW((LPCWSTR)zConverted, GetFileExInfoStandard, &sAttrData)) && winRetryIoerr(&cnt, &lastErrno) ){} if( rc ){ /* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file ** as if it does not exist. */ if( flags==SQLITE_ACCESS_EXISTS && sAttrData.nFileSizeHigh==0 && sAttrData.nFileSizeLow==0 ){ attr = INVALID_FILE_ATTRIBUTES; }else{ attr = sAttrData.dwFileAttributes; } }else{ winLogIoerr(cnt, __LINE__); if( lastErrno!=ERROR_FILE_NOT_FOUND && lastErrno!=ERROR_PATH_NOT_FOUND ){ sqlite3_free(zConverted); return winLogError(SQLITE_IOERR_ACCESS, lastErrno, "winAccess", zFilename); }else{ attr = INVALID_FILE_ATTRIBUTES; } } } #ifdef SQLITE_WIN32_HAS_ANSI else{ attr = osGetFileAttributesA((char*)zConverted); } #endif sqlite3_free(zConverted); switch( flags ){ case SQLITE_ACCESS_READ: case SQLITE_ACCESS_EXISTS: rc = attr!=INVALID_FILE_ATTRIBUTES; break; case SQLITE_ACCESS_READWRITE: rc = attr!=INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_READONLY)==0; break; default: assert(!"Invalid flags argument"); } *pResOut = rc; OSTRACE(("ACCESS name=%s, pResOut=%p, *pResOut=%d, rc=SQLITE_OK\n", zFilename, pResOut, *pResOut)); return SQLITE_OK; } /* ** Returns non-zero if the specified path name starts with a drive letter ** followed by a colon character. */ static BOOL winIsDriveLetterAndColon( const char *zPathname ){ return ( sqlite3Isalpha(zPathname[0]) && zPathname[1]==':' ); } /* ** Turn a relative pathname into a full pathname. Write the full ** pathname into zOut[]. zOut[] will be at least pVfs->mxPathname ** bytes in size. */ static int winFullPathname( sqlite3_vfs *pVfs, /* Pointer to vfs object */ const char *zRelative, /* Possibly relative input path */ int nFull, /* Size of output buffer in bytes */ char *zFull /* Output buffer */ ){ #if defined(__CYGWIN__) SimulateIOError( return SQLITE_ERROR ); UNUSED_PARAMETER(nFull); assert( nFull>=pVfs->mxPathname ); char *zOut = sqlite3MallocZero( pVfs->mxPathname+1 ); if( !zOut ){ return SQLITE_IOERR_NOMEM; } if( cygwin_conv_path( (osIsNT() ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_A), zRelative, zOut, pVfs->mxPathname+1)<0 ){ sqlite3_free(zOut); return winLogError(SQLITE_CANTOPEN_CONVPATH, (DWORD)errno, "winFullPathname2", zRelative); }else{ char *zUtf8 = winConvertToUtf8Filename(zOut); if( !zUtf8 ){ sqlite3_free(zOut); return SQLITE_IOERR_NOMEM; } sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zUtf8); sqlite3_free(zUtf8); sqlite3_free(zOut); } return SQLITE_OK; #endif #if (SQLITE_OS_WINCE || SQLITE_OS_WINRT) && !defined(__CYGWIN__) SimulateIOError( return SQLITE_ERROR ); /* WinCE has no concept of a relative pathname, or so I am told. */ /* WinRT has no way to convert a relative path to an absolute one. */ sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zRelative); return SQLITE_OK; #endif #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && !defined(__CYGWIN__) DWORD nByte; void *zConverted; char *zOut; /* If this path name begins with "/X:", where "X" is any alphabetic ** character, discard the initial "/" from the pathname. */ if( zRelative[0]=='/' && winIsDriveLetterAndColon(zRelative+1) ){ zRelative++; } /* It's odd to simulate an io-error here, but really this is just ** using the io-error infrastructure to test that SQLite handles this ** function failing. This function could fail if, for example, the ** current working directory has been unlinked. */ SimulateIOError( return SQLITE_ERROR ); zConverted = winConvertFromUtf8Filename(zRelative); if( zConverted==0 ){ return SQLITE_IOERR_NOMEM; } if( osIsNT() ){ LPWSTR zTemp; nByte = osGetFullPathNameW((LPCWSTR)zConverted, 0, 0, 0); if( nByte==0 ){ sqlite3_free(zConverted); return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), "winFullPathname1", zRelative); } nByte += 3; zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) ); if( zTemp==0 ){ sqlite3_free(zConverted); return SQLITE_IOERR_NOMEM; } nByte = osGetFullPathNameW((LPCWSTR)zConverted, nByte, zTemp, 0); if( nByte==0 ){ sqlite3_free(zConverted); sqlite3_free(zTemp); return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), "winFullPathname2", zRelative); } sqlite3_free(zConverted); zOut = winUnicodeToUtf8(zTemp); sqlite3_free(zTemp); } #ifdef SQLITE_WIN32_HAS_ANSI else{ char *zTemp; nByte = osGetFullPathNameA((char*)zConverted, 0, 0, 0); if( nByte==0 ){ sqlite3_free(zConverted); return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), "winFullPathname3", zRelative); } nByte += 3; zTemp = sqlite3MallocZero( nByte*sizeof(zTemp[0]) ); if( zTemp==0 ){ sqlite3_free(zConverted); return SQLITE_IOERR_NOMEM; } nByte = osGetFullPathNameA((char*)zConverted, nByte, zTemp, 0); if( nByte==0 ){ sqlite3_free(zConverted); sqlite3_free(zTemp); return winLogError(SQLITE_CANTOPEN_FULLPATH, osGetLastError(), "winFullPathname4", zRelative); } sqlite3_free(zConverted); zOut = sqlite3_win32_mbcs_to_utf8(zTemp); sqlite3_free(zTemp); } #endif if( zOut ){ sqlite3_snprintf(MIN(nFull, pVfs->mxPathname), zFull, "%s", zOut); sqlite3_free(zOut); return SQLITE_OK; }else{ return SQLITE_IOERR_NOMEM; } #endif } #define winDlOpen 0 #define winDlError 0 #define winDlSym 0 #define winDlClose 0 /* ** Write up to nBuf bytes of randomness into zBuf. */ static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ int n = 0; UNUSED_PARAMETER(pVfs); #if defined(SQLITE_TEST) || defined(SQLITE_OMIT_RANDOMNESS) n = nBuf; memset(zBuf, 0, nBuf); #else if( sizeof(SYSTEMTIME)<=nBuf-n ){ SYSTEMTIME x; osGetSystemTime(&x); memcpy(&zBuf[n], &x, sizeof(x)); n += sizeof(x); } if( sizeof(DWORD)<=nBuf-n ){ DWORD pid = osGetCurrentProcessId(); memcpy(&zBuf[n], &pid, sizeof(pid)); n += sizeof(pid); } #if SQLITE_OS_WINRT if( sizeof(ULONGLONG)<=nBuf-n ){ ULONGLONG cnt = osGetTickCount64(); memcpy(&zBuf[n], &cnt, sizeof(cnt)); n += sizeof(cnt); } #else if( sizeof(DWORD)<=nBuf-n ){ DWORD cnt = osGetTickCount(); memcpy(&zBuf[n], &cnt, sizeof(cnt)); n += sizeof(cnt); } #endif if( sizeof(LARGE_INTEGER)<=nBuf-n ){ LARGE_INTEGER i; osQueryPerformanceCounter(&i); memcpy(&zBuf[n], &i, sizeof(i)); n += sizeof(i); } #if !SQLITE_OS_WINCE && !SQLITE_OS_WINRT && SQLITE_WIN32_USE_UUID if( sizeof(UUID)<=nBuf-n ){ UUID id; memset(&id, 0, sizeof(UUID)); osUuidCreate(&id); memcpy(&zBuf[n], &id, sizeof(UUID)); n += sizeof(UUID); } if( sizeof(UUID)<=nBuf-n ){ UUID id; memset(&id, 0, sizeof(UUID)); osUuidCreateSequential(&id); memcpy(&zBuf[n], &id, sizeof(UUID)); n += sizeof(UUID); } #endif #endif /* defined(SQLITE_TEST) || defined(SQLITE_ZERO_PRNG_SEED) */ return n; } /* ** Sleep for a little while. Return the amount of time slept. */ static int winSleep(sqlite3_vfs *pVfs, int microsec){ sqlite3_win32_sleep((microsec+999)/1000); UNUSED_PARAMETER(pVfs); return ((microsec+999)/1000)*1000; } /* ** The following variable, if set to a non-zero value, is interpreted as ** the number of seconds since 1970 and is used to set the result of ** sqlite3OsCurrentTime() during testing. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */ #endif /* ** Find the current time (in Universal Coordinated Time). Write into *piNow ** the current time and date as a Julian Day number times 86_400_000. In ** other words, write into *piNow the number of milliseconds since the Julian ** epoch of noon in Greenwich on November 24, 4714 B.C according to the ** proleptic Gregorian calendar. ** ** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date ** cannot be found. */ static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){ /* FILETIME structure is a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (= JD 2305813.5). */ FILETIME ft; static const sqlite3_int64 winFiletimeEpoch = 23058135*(sqlite3_int64)8640000; #ifdef SQLITE_TEST static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000; #endif /* 2^32 - to avoid use of LL and warnings in gcc */ static const sqlite3_int64 max32BitValue = (sqlite3_int64)2000000000 + (sqlite3_int64)2000000000 + (sqlite3_int64)294967296; #if SQLITE_OS_WINCE SYSTEMTIME time; osGetSystemTime(&time); /* if SystemTimeToFileTime() fails, it returns zero. */ if (!osSystemTimeToFileTime(&time,&ft)){ return SQLITE_ERROR; } #else osGetSystemTimeAsFileTime( &ft ); #endif *piNow = winFiletimeEpoch + ((((sqlite3_int64)ft.dwHighDateTime)*max32BitValue) + (sqlite3_int64)ft.dwLowDateTime)/(sqlite3_int64)10000; #ifdef SQLITE_TEST if( sqlite3_current_time ){ *piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch; } #endif UNUSED_PARAMETER(pVfs); return SQLITE_OK; } /* ** Find the current time (in Universal Coordinated Time). Write the ** current time and date as a Julian Day number into *prNow and ** return 0. Return 1 if the time and date cannot be found. */ static int winCurrentTime(sqlite3_vfs *pVfs, double *prNow){ int rc; sqlite3_int64 i; rc = winCurrentTimeInt64(pVfs, &i); if( !rc ){ *prNow = i/86400000.0; } return rc; } /* ** The idea is that this function works like a combination of ** GetLastError() and FormatMessage() on Windows (or errno and ** strerror_r() on Unix). After an error is returned by an OS ** function, SQLite calls this function with zBuf pointing to ** a buffer of nBuf bytes. The OS layer should populate the ** buffer with a nul-terminated UTF-8 encoded error message ** describing the last IO error to have occurred within the calling ** thread. ** ** If the error message is too large for the supplied buffer, ** it should be truncated. The return value of xGetLastError ** is zero if the error message fits in the buffer, or non-zero ** otherwise (if the message was truncated). If non-zero is returned, ** then it is not necessary to include the nul-terminator character ** in the output buffer. ** ** Not supplying an error message will have no adverse effect ** on SQLite. It is fine to have an implementation that never ** returns an error message: ** ** int xGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ ** assert(zBuf[0]=='\0'); ** return 0; ** } ** ** However if an error message is supplied, it will be incorporated ** by sqlite into the error message available to the user using ** sqlite3_errmsg(), possibly making IO errors easier to debug. */ static int winGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ UNUSED_PARAMETER(pVfs); return winGetLastErrorMsg(osGetLastError(), nBuf, zBuf); } /* ** Initialize and deinitialize the operating system interface. */ SQLITE_PRIVATE int sqlite3_os_init(void){ static sqlite3_vfs winVfs = { 3, /* iVersion */ sizeof(winFile), /* szOsFile */ SQLITE_WIN32_MAX_PATH_BYTES, /* mxPathname */ 0, /* pNext */ "win32", /* zName */ 0, /* pAppData */ winOpen, /* xOpen */ winDelete, /* xDelete */ winAccess, /* xAccess */ winFullPathname, /* xFullPathname */ winDlOpen, /* xDlOpen */ winDlError, /* xDlError */ winDlSym, /* xDlSym */ winDlClose, /* xDlClose */ winRandomness, /* xRandomness */ winSleep, /* xSleep */ winCurrentTime, /* xCurrentTime */ winGetLastError, /* xGetLastError */ winCurrentTimeInt64, /* xCurrentTimeInt64 */ winSetSystemCall, /* xSetSystemCall */ winGetSystemCall, /* xGetSystemCall */ winNextSystemCall, /* xNextSystemCall */ }; #if defined(SQLITE_WIN32_HAS_WIDE) static sqlite3_vfs winLongPathVfs = { 3, /* iVersion */ sizeof(winFile), /* szOsFile */ SQLITE_WINNT_MAX_PATH_BYTES, /* mxPathname */ 0, /* pNext */ "win32-longpath", /* zName */ 0, /* pAppData */ winOpen, /* xOpen */ winDelete, /* xDelete */ winAccess, /* xAccess */ winFullPathname, /* xFullPathname */ winDlOpen, /* xDlOpen */ winDlError, /* xDlError */ winDlSym, /* xDlSym */ winDlClose, /* xDlClose */ winRandomness, /* xRandomness */ winSleep, /* xSleep */ winCurrentTime, /* xCurrentTime */ winGetLastError, /* xGetLastError */ winCurrentTimeInt64, /* xCurrentTimeInt64 */ winSetSystemCall, /* xSetSystemCall */ winGetSystemCall, /* xGetSystemCall */ winNextSystemCall, /* xNextSystemCall */ }; #endif /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ assert( ArraySize(aSyscall)==80 ); /* get memory map allocation granularity */ memset(&winSysInfo, 0, sizeof(SYSTEM_INFO)); #if SQLITE_OS_WINRT osGetNativeSystemInfo(&winSysInfo); #else osGetSystemInfo(&winSysInfo); #endif assert( winSysInfo.dwAllocationGranularity>0 ); assert( winSysInfo.dwPageSize>0 ); sqlite3_vfs_register(&winVfs, 1); #if defined(SQLITE_WIN32_HAS_WIDE) sqlite3_vfs_register(&winLongPathVfs, 0); #endif return SQLITE_OK; } SQLITE_PRIVATE int sqlite3_os_end(void){ #if SQLITE_OS_WINRT if( sleepObj!=NULL ){ osCloseHandle(sleepObj); sleepObj = NULL; } #endif return SQLITE_OK; } #endif /* SQLITE_OS_WIN */ /************** End of os_win.c **********************************************/ /************** Begin file bitvec.c ******************************************/ /* ** 2008 February 16 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file implements an object that represents a fixed-length ** bitmap. Bits are numbered starting with 1. ** ** A bitmap is used to record which pages of a database file have been ** journalled during a transaction, or which pages have the "dont-write" ** property. Usually only a few pages are meet either condition. ** So the bitmap is usually sparse and has low cardinality. ** But sometimes (for example when during a DROP of a large table) most ** or all of the pages in a database can get journalled. In those cases, ** the bitmap becomes dense with high cardinality. The algorithm needs ** to handle both cases well. ** ** The size of the bitmap is fixed when the object is created. ** ** All bits are clear when the bitmap is created. Individual bits ** may be set or cleared one at a time. ** ** Test operations are about 100 times more common that set operations. ** Clear operations are exceedingly rare. There are usually between ** 5 and 500 set operations per Bitvec object, though the number of sets can ** sometimes grow into tens of thousands or larger. The size of the ** Bitvec object is the number of pages in the database file at the ** start of a transaction, and is thus usually less than a few thousand, ** but can be as large as 2 billion for a really big database. */ /* #include "sqliteInt.h" */ /* Size of the Bitvec structure in bytes. */ #define BITVEC_SZ 512 /* Round the union size down to the nearest pointer boundary, since that's how ** it will be aligned within the Bitvec struct. */ #define BITVEC_USIZE (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*)) /* Type of the array "element" for the bitmap representation. ** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE. ** Setting this to the "natural word" size of your CPU may improve ** performance. */ #define BITVEC_TELEM u8 /* Size, in bits, of the bitmap element. */ #define BITVEC_SZELEM 8 /* Number of elements in a bitmap array. */ #define BITVEC_NELEM (BITVEC_USIZE/sizeof(BITVEC_TELEM)) /* Number of bits in the bitmap array. */ #define BITVEC_NBIT (BITVEC_NELEM*BITVEC_SZELEM) /* Number of u32 values in hash table. */ #define BITVEC_NINT (BITVEC_USIZE/sizeof(u32)) /* Maximum number of entries in hash table before ** sub-dividing and re-hashing. */ #define BITVEC_MXHASH (BITVEC_NINT/2) /* Hashing function for the aHash representation. ** Empirical testing showed that the *37 multiplier ** (an arbitrary prime)in the hash function provided ** no fewer collisions than the no-op *1. */ #define BITVEC_HASH(X) (((X)*1)%BITVEC_NINT) #define BITVEC_NPTR (BITVEC_USIZE/sizeof(Bitvec *)) /* ** A bitmap is an instance of the following structure. ** ** This bitmap records the existence of zero or more bits ** with values between 1 and iSize, inclusive. ** ** There are three possible representations of the bitmap. ** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight ** bitmap. The least significant bit is bit 1. ** ** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is ** a hash table that will hold up to BITVEC_MXHASH distinct values. ** ** Otherwise, the value i is redirected into one of BITVEC_NPTR ** sub-bitmaps pointed to by Bitvec.u.apSub[]. Each subbitmap ** handles up to iDivisor separate values of i. apSub[0] holds ** values between 1 and iDivisor. apSub[1] holds values between ** iDivisor+1 and 2*iDivisor. apSub[N] holds values between ** N*iDivisor+1 and (N+1)*iDivisor. Each subbitmap is normalized ** to hold deal with values between 1 and iDivisor. */ struct Bitvec { u32 iSize; /* Maximum bit index. Max iSize is 4,294,967,296. */ u32 nSet; /* Number of bits that are set - only valid for aHash ** element. Max is BITVEC_NINT. For BITVEC_SZ of 512, ** this would be 125. */ u32 iDivisor; /* Number of bits handled by each apSub[] entry. */ /* Should >=0 for apSub element. */ /* Max iDivisor is max(u32) / BITVEC_NPTR + 1. */ /* For a BITVEC_SZ of 512, this would be 34,359,739. */ union { BITVEC_TELEM aBitmap[BITVEC_NELEM]; /* Bitmap representation */ u32 aHash[BITVEC_NINT]; /* Hash table representation */ Bitvec *apSub[BITVEC_NPTR]; /* Recursive representation */ } u; }; /* ** Create a new bitmap object able to handle bits between 0 and iSize, ** inclusive. Return a pointer to the new object. Return NULL if ** malloc fails. */ SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32 iSize){ Bitvec *p; assert( sizeof(*p)==BITVEC_SZ ); p = sqlite3MallocZero( sizeof(*p) ); if( p ){ p->iSize = iSize; } return p; } /* ** Check to see if the i-th bit is set. Return true or false. ** If p is NULL (if the bitmap has not been created) or if ** i is out of range, then return false. */ SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ assert( p!=0 ); i--; if( i>=p->iSize ) return 0; while( p->iDivisor ){ u32 bin = i/p->iDivisor; i = i%p->iDivisor; p = p->u.apSub[bin]; if (!p) { return 0; } } if( p->iSize<=BITVEC_NBIT ){ return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0; } else{ u32 h = BITVEC_HASH(i++); while( p->u.aHash[h] ){ if( p->u.aHash[h]==i ) return 1; h = (h+1) % BITVEC_NINT; } return 0; } } SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ return p!=0 && sqlite3BitvecTestNotNull(p,i); } /* ** Set the i-th bit. Return 0 on success and an error code if ** anything goes wrong. ** ** This routine might cause sub-bitmaps to be allocated. Failing ** to get the memory needed to hold the sub-bitmap is the only ** that can go wrong with an insert, assuming p and i are valid. ** ** The calling function must ensure that p is a valid Bitvec object ** and that the value for "i" is within range of the Bitvec object. ** Otherwise the behavior is undefined. */ SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec *p, u32 i){ u32 h; if( p==0 ) return SQLITE_OK; assert( i>0 ); assert( i<=p->iSize ); i--; while((p->iSize > BITVEC_NBIT) && p->iDivisor) { u32 bin = i/p->iDivisor; i = i%p->iDivisor; if( p->u.apSub[bin]==0 ){ p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor ); if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM; } p = p->u.apSub[bin]; } if( p->iSize<=BITVEC_NBIT ){ p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1)); return SQLITE_OK; } h = BITVEC_HASH(i++); /* if there wasn't a hash collision, and this doesn't */ /* completely fill the hash, then just add it without */ /* worring about sub-dividing and re-hashing. */ if( !p->u.aHash[h] ){ if (p->nSet<(BITVEC_NINT-1)) { goto bitvec_set_end; } else { goto bitvec_set_rehash; } } /* there was a collision, check to see if it's already */ /* in hash, if not, try to find a spot for it */ do { if( p->u.aHash[h]==i ) return SQLITE_OK; h++; if( h>=BITVEC_NINT ) h = 0; } while( p->u.aHash[h] ); /* we didn't find it in the hash. h points to the first */ /* available free spot. check to see if this is going to */ /* make our hash too "full". */ bitvec_set_rehash: if( p->nSet>=BITVEC_MXHASH ){ unsigned int j; int rc; u32 *aiValues = sqlite3StackAllocRaw(0, sizeof(p->u.aHash)); if( aiValues==0 ){ return SQLITE_NOMEM; }else{ memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); memset(p->u.apSub, 0, sizeof(p->u.apSub)); p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR; rc = sqlite3BitvecSet(p, i); for(j=0; jnSet++; p->u.aHash[h] = i; return SQLITE_OK; } /* ** Clear the i-th bit. ** ** pBuf must be a pointer to at least BITVEC_SZ bytes of temporary storage ** that BitvecClear can use to rebuilt its hash table. */ SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec *p, u32 i, void *pBuf){ if( p==0 ) return; assert( i>0 ); i--; while( p->iDivisor ){ u32 bin = i/p->iDivisor; i = i%p->iDivisor; p = p->u.apSub[bin]; if (!p) { return; } } if( p->iSize<=BITVEC_NBIT ){ p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1))); }else{ unsigned int j; u32 *aiValues = pBuf; memcpy(aiValues, p->u.aHash, sizeof(p->u.aHash)); memset(p->u.aHash, 0, sizeof(p->u.aHash)); p->nSet = 0; for(j=0; jnSet++; while( p->u.aHash[h] ){ h++; if( h>=BITVEC_NINT ) h = 0; } p->u.aHash[h] = aiValues[j]; } } } } /* ** Destroy a bitmap object. Reclaim all memory used. */ SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec *p){ if( p==0 ) return; if( p->iDivisor ){ unsigned int i; for(i=0; iu.apSub[i]); } } sqlite3_free(p); } /* ** Return the value of the iSize parameter specified when Bitvec *p ** was created. */ SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec *p){ return p->iSize; } /************** End of bitvec.c **********************************************/ /************** Begin file pcache.c ******************************************/ /* ** 2008 August 05 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file implements that page cache. */ /* #include "sqliteInt.h" */ /* ** A complete page cache is an instance of this structure. */ struct PCache { PgHdr *pDirty, *pDirtyTail; /* List of dirty pages in LRU order */ PgHdr *pSynced; /* Last synced page in dirty page list */ int nRefSum; /* Sum of ref counts over all pages */ int szCache; /* Configured cache size */ int szPage; /* Size of every page in this cache */ int szExtra; /* Size of extra space for each page */ u8 bPurgeable; /* True if pages are on backing store */ u8 eCreate; /* eCreate value for for xFetch() */ int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */ void *pStress; /* Argument to xStress */ sqlite3_pcache *pCache; /* Pluggable cache module */ }; /********************************** Linked List Management ********************/ /* Allowed values for second argument to pcacheManageDirtyList() */ #define PCACHE_DIRTYLIST_REMOVE 1 /* Remove pPage from dirty list */ #define PCACHE_DIRTYLIST_ADD 2 /* Add pPage to the dirty list */ #define PCACHE_DIRTYLIST_FRONT 3 /* Move pPage to the front of the list */ /* ** Manage pPage's participation on the dirty list. Bits of the addRemove ** argument determines what operation to do. The 0x01 bit means first ** remove pPage from the dirty list. The 0x02 means add pPage back to ** the dirty list. Doing both moves pPage to the front of the dirty list. */ static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){ PCache *p = pPage->pCache; if( addRemove & PCACHE_DIRTYLIST_REMOVE ){ assert( pPage->pDirtyNext || pPage==p->pDirtyTail ); assert( pPage->pDirtyPrev || pPage==p->pDirty ); /* Update the PCache1.pSynced variable if necessary. */ if( p->pSynced==pPage ){ PgHdr *pSynced = pPage->pDirtyPrev; while( pSynced && (pSynced->flags&PGHDR_NEED_SYNC) ){ pSynced = pSynced->pDirtyPrev; } p->pSynced = pSynced; } if( pPage->pDirtyNext ){ pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev; }else{ assert( pPage==p->pDirtyTail ); p->pDirtyTail = pPage->pDirtyPrev; } if( pPage->pDirtyPrev ){ pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext; }else{ assert( pPage==p->pDirty ); p->pDirty = pPage->pDirtyNext; if( p->pDirty==0 && p->bPurgeable ){ assert( p->eCreate==1 ); p->eCreate = 2; } } pPage->pDirtyNext = 0; pPage->pDirtyPrev = 0; } if( addRemove & PCACHE_DIRTYLIST_ADD ){ assert( pPage->pDirtyNext==0 && pPage->pDirtyPrev==0 && p->pDirty!=pPage ); pPage->pDirtyNext = p->pDirty; if( pPage->pDirtyNext ){ assert( pPage->pDirtyNext->pDirtyPrev==0 ); pPage->pDirtyNext->pDirtyPrev = pPage; }else{ p->pDirtyTail = pPage; if( p->bPurgeable ){ assert( p->eCreate==2 ); p->eCreate = 1; } } p->pDirty = pPage; if( !p->pSynced && 0==(pPage->flags&PGHDR_NEED_SYNC) ){ p->pSynced = pPage; } } } /* ** Wrapper around the pluggable caches xUnpin method. If the cache is ** being used for an in-memory database, this function is a no-op. */ static void pcacheUnpin(PgHdr *p){ if( p->pCache->bPurgeable ){ sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); } } /* ** Compute the number of pages of cache requested. p->szCache is the ** cache size requested by the "PRAGMA cache_size" statement. ** ** */ static int numberOfCachePages(PCache *p){ if( p->szCache>=0 ){ /* IMPLEMENTATION-OF: R-42059-47211 If the argument N is positive then the ** suggested cache size is set to N. */ return p->szCache; }else{ /* IMPLEMENTATION-OF: R-61436-13639 If the argument N is negative, then ** the number of cache pages is adjusted to use approximately abs(N*1024) ** bytes of memory. */ return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra)); } } /*************************************************** General Interfaces ****** ** ** Initialize and shutdown the page cache subsystem. Neither of these ** functions are threadsafe. */ SQLITE_PRIVATE int sqlite3PcacheInitialize(void){ if( sqlite3GlobalConfig.pcache2.xInit==0 ){ /* IMPLEMENTATION-OF: R-26801-64137 If the xInit() method is NULL, then the ** built-in default page cache is used instead of the application defined ** page cache. */ sqlite3PCacheSetDefault(); } return sqlite3GlobalConfig.pcache2.xInit(sqlite3GlobalConfig.pcache2.pArg); } SQLITE_PRIVATE void sqlite3PcacheShutdown(void){ if( sqlite3GlobalConfig.pcache2.xShutdown ){ /* IMPLEMENTATION-OF: R-26000-56589 The xShutdown() method may be NULL. */ sqlite3GlobalConfig.pcache2.xShutdown(sqlite3GlobalConfig.pcache2.pArg); } } /* ** Return the size in bytes of a PCache object. */ SQLITE_PRIVATE int sqlite3PcacheSize(void){ return sizeof(PCache); } /* ** Create a new PCache object. Storage space to hold the object ** has already been allocated and is passed in as the p pointer. ** The caller discovers how much space needs to be allocated by ** calling sqlite3PcacheSize(). */ SQLITE_PRIVATE int sqlite3PcacheOpen( int szPage, /* Size of every page */ int szExtra, /* Extra space associated with each page */ int bPurgeable, /* True if pages are on backing store */ int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */ void *pStress, /* Argument to xStress */ PCache *p /* Preallocated space for the PCache */ ){ memset(p, 0, sizeof(PCache)); p->szPage = 1; p->szExtra = szExtra; p->bPurgeable = bPurgeable; p->eCreate = 2; p->xStress = xStress; p->pStress = pStress; p->szCache = 100; return sqlite3PcacheSetPageSize(p, szPage); } /* ** Change the page size for PCache object. The caller must ensure that there ** are no outstanding page references when this function is called. */ SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ assert( pCache->nRefSum==0 && pCache->pDirty==0 ); if( pCache->szPage ){ sqlite3_pcache *pNew; pNew = sqlite3GlobalConfig.pcache2.xCreate( szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)), pCache->bPurgeable ); if( pNew==0 ) return SQLITE_NOMEM; sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache)); if( pCache->pCache ){ sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); } pCache->pCache = pNew; pCache->szPage = szPage; } return SQLITE_OK; } /* ** Try to obtain a page from the cache. ** ** This routine returns a pointer to an sqlite3_pcache_page object if ** such an object is already in cache, or if a new one is created. ** This routine returns a NULL pointer if the object was not in cache ** and could not be created. ** ** The createFlags should be 0 to check for existing pages and should ** be 3 (not 1, but 3) to try to create a new page. ** ** If the createFlag is 0, then NULL is always returned if the page ** is not already in the cache. If createFlag is 1, then a new page ** is created only if that can be done without spilling dirty pages ** and without exceeding the cache size limit. ** ** The caller needs to invoke sqlite3PcacheFetchFinish() to properly ** initialize the sqlite3_pcache_page object and convert it into a ** PgHdr object. The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish() ** routines are split this way for performance reasons. When separated ** they can both (usually) operate without having to push values to ** the stack on entry and pop them back off on exit, which saves a ** lot of pushing and popping. */ SQLITE_PRIVATE sqlite3_pcache_page *sqlite3PcacheFetch( PCache *pCache, /* Obtain the page from this cache */ Pgno pgno, /* Page number to obtain */ int createFlag /* If true, create page if it does not exist already */ ){ int eCreate; assert( pCache!=0 ); assert( pCache->pCache!=0 ); assert( createFlag==3 || createFlag==0 ); assert( pgno>0 ); /* eCreate defines what to do if the page does not exist. ** 0 Do not allocate a new page. (createFlag==0) ** 1 Allocate a new page if doing so is inexpensive. ** (createFlag==1 AND bPurgeable AND pDirty) ** 2 Allocate a new page even it doing so is difficult. ** (createFlag==1 AND !(bPurgeable AND pDirty) */ eCreate = createFlag & pCache->eCreate; assert( eCreate==0 || eCreate==1 || eCreate==2 ); assert( createFlag==0 || pCache->eCreate==eCreate ); assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) ); return sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate); } /* ** If the sqlite3PcacheFetch() routine is unable to allocate a new ** page because new clean pages are available for reuse and the cache ** size limit has been reached, then this routine can be invoked to ** try harder to allocate a page. This routine might invoke the stress ** callback to spill dirty pages to the journal. It will then try to ** allocate the new page and will only fail to allocate a new page on ** an OOM error. ** ** This routine should be invoked only after sqlite3PcacheFetch() fails. */ SQLITE_PRIVATE int sqlite3PcacheFetchStress( PCache *pCache, /* Obtain the page from this cache */ Pgno pgno, /* Page number to obtain */ sqlite3_pcache_page **ppPage /* Write result here */ ){ PgHdr *pPg; if( pCache->eCreate==2 ) return 0; /* Find a dirty page to write-out and recycle. First try to find a ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC ** cleared), but if that is not possible settle for any other ** unreferenced dirty page. */ for(pPg=pCache->pSynced; pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); pPg=pPg->pDirtyPrev ); pCache->pSynced = pPg; if( !pPg ){ for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev); } if( pPg ){ int rc; #ifdef SQLITE_LOG_CACHE_SPILL sqlite3_log(SQLITE_FULL, "spill page %d making room for %d - cache used: %d/%d", pPg->pgno, pgno, sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache), numberOfCachePages(pCache)); #endif rc = pCache->xStress(pCache->pStress, pPg); if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){ return rc; } } *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2); return *ppPage==0 ? SQLITE_NOMEM : SQLITE_OK; } /* ** This is a helper routine for sqlite3PcacheFetchFinish() ** ** In the uncommon case where the page being fetched has not been ** initialized, this routine is invoked to do the initialization. ** This routine is broken out into a separate function since it ** requires extra stack manipulation that can be avoided in the common ** case. */ static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( PCache *pCache, /* Obtain the page from this cache */ Pgno pgno, /* Page number obtained */ sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ ){ PgHdr *pPgHdr; assert( pPage!=0 ); pPgHdr = (PgHdr*)pPage->pExtra; assert( pPgHdr->pPage==0 ); memset(pPgHdr, 0, sizeof(PgHdr)); pPgHdr->pPage = pPage; pPgHdr->pData = pPage->pBuf; pPgHdr->pExtra = (void *)&pPgHdr[1]; memset(pPgHdr->pExtra, 0, pCache->szExtra); pPgHdr->pCache = pCache; pPgHdr->pgno = pgno; pPgHdr->flags = PGHDR_CLEAN; return sqlite3PcacheFetchFinish(pCache,pgno,pPage); } /* ** This routine converts the sqlite3_pcache_page object returned by ** sqlite3PcacheFetch() into an initialized PgHdr object. This routine ** must be called after sqlite3PcacheFetch() in order to get a usable ** result. */ SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( PCache *pCache, /* Obtain the page from this cache */ Pgno pgno, /* Page number obtained */ sqlite3_pcache_page *pPage /* Page obtained by prior PcacheFetch() call */ ){ PgHdr *pPgHdr; assert( pPage!=0 ); pPgHdr = (PgHdr *)pPage->pExtra; if( !pPgHdr->pPage ){ return pcacheFetchFinishWithInit(pCache, pgno, pPage); } pCache->nRefSum++; pPgHdr->nRef++; return pPgHdr; } /* ** Decrement the reference count on a page. If the page is clean and the ** reference count drops to 0, then it is made eligible for recycling. */ SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ assert( p->nRef>0 ); p->pCache->nRefSum--; if( (--p->nRef)==0 ){ if( p->flags&PGHDR_CLEAN ){ pcacheUnpin(p); }else if( p->pDirtyPrev!=0 ){ /* Move the page to the head of the dirty list. */ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); } } } /* ** Increase the reference count of a supplied page by 1. */ SQLITE_PRIVATE void sqlite3PcacheRef(PgHdr *p){ assert(p->nRef>0); p->nRef++; p->pCache->nRefSum++; } /* ** Drop a page from the cache. There must be exactly one reference to the ** page. This function deletes that reference, so after it returns the ** page pointed to by p is invalid. */ SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ assert( p->nRef==1 ); if( p->flags&PGHDR_DIRTY ){ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); } p->pCache->nRefSum--; sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); } /* ** Make sure the page is marked as dirty. If it isn't dirty already, ** make it so. */ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ assert( p->nRef>0 ); if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){ p->flags &= ~PGHDR_DONT_WRITE; if( p->flags & PGHDR_CLEAN ){ p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN); assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY ); pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); } } } /* ** Make sure the page is marked as clean. If it isn't clean already, ** make it so. */ SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr *p){ if( (p->flags & PGHDR_DIRTY) ){ assert( (p->flags & PGHDR_CLEAN)==0 ); pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE); p->flags |= PGHDR_CLEAN; if( p->nRef==0 ){ pcacheUnpin(p); } } } /* ** Make every page in the cache clean. */ SQLITE_PRIVATE void sqlite3PcacheCleanAll(PCache *pCache){ PgHdr *p; while( (p = pCache->pDirty)!=0 ){ sqlite3PcacheMakeClean(p); } } /* ** Clear the PGHDR_NEED_SYNC flag from all dirty pages. */ SQLITE_PRIVATE void sqlite3PcacheClearSyncFlags(PCache *pCache){ PgHdr *p; for(p=pCache->pDirty; p; p=p->pDirtyNext){ p->flags &= ~PGHDR_NEED_SYNC; } pCache->pSynced = pCache->pDirtyTail; } /* ** Change the page number of page p to newPgno. */ SQLITE_PRIVATE void sqlite3PcacheMove(PgHdr *p, Pgno newPgno){ PCache *pCache = p->pCache; assert( p->nRef>0 ); assert( newPgno>0 ); sqlite3GlobalConfig.pcache2.xRekey(pCache->pCache, p->pPage, p->pgno,newPgno); p->pgno = newPgno; if( (p->flags&PGHDR_DIRTY) && (p->flags&PGHDR_NEED_SYNC) ){ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_FRONT); } } /* ** Drop every cache entry whose page number is greater than "pgno". The ** caller must ensure that there are no outstanding references to any pages ** other than page 1 with a page number greater than pgno. ** ** If there is a reference to page 1 and the pgno parameter passed to this ** function is 0, then the data area associated with page 1 is zeroed, but ** the page object is not dropped. */ SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){ if( pCache->pCache ){ PgHdr *p; PgHdr *pNext; for(p=pCache->pDirty; p; p=pNext){ pNext = p->pDirtyNext; /* This routine never gets call with a positive pgno except right ** after sqlite3PcacheCleanAll(). So if there are dirty pages, ** it must be that pgno==0. */ assert( p->pgno>0 ); if( ALWAYS(p->pgno>pgno) ){ assert( p->flags&PGHDR_DIRTY ); sqlite3PcacheMakeClean(p); } } if( pgno==0 && pCache->nRefSum ){ sqlite3_pcache_page *pPage1; pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because ** pCache->nRefSum>0 */ memset(pPage1->pBuf, 0, pCache->szPage); pgno = 1; } } sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); } } /* ** Close a cache. */ SQLITE_PRIVATE void sqlite3PcacheClose(PCache *pCache){ assert( pCache->pCache!=0 ); sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); } /* ** Discard the contents of the cache. */ SQLITE_PRIVATE void sqlite3PcacheClear(PCache *pCache){ sqlite3PcacheTruncate(pCache, 0); } /* ** Merge two lists of pages connected by pDirty and in pgno order. ** Do not both fixing the pDirtyPrev pointers. */ static PgHdr *pcacheMergeDirtyList(PgHdr *pA, PgHdr *pB){ PgHdr result, *pTail; pTail = &result; while( pA && pB ){ if( pA->pgnopgno ){ pTail->pDirty = pA; pTail = pA; pA = pA->pDirty; }else{ pTail->pDirty = pB; pTail = pB; pB = pB->pDirty; } } if( pA ){ pTail->pDirty = pA; }else if( pB ){ pTail->pDirty = pB; }else{ pTail->pDirty = 0; } return result.pDirty; } /* ** Sort the list of pages in accending order by pgno. Pages are ** connected by pDirty pointers. The pDirtyPrev pointers are ** corrupted by this sort. ** ** Since there cannot be more than 2^31 distinct pages in a database, ** there cannot be more than 31 buckets required by the merge sorter. ** One extra bucket is added to catch overflow in case something ** ever changes to make the previous sentence incorrect. */ #define N_SORT_BUCKET 32 static PgHdr *pcacheSortDirtyList(PgHdr *pIn){ PgHdr *a[N_SORT_BUCKET], *p; int i; memset(a, 0, sizeof(a)); while( pIn ){ p = pIn; pIn = p->pDirty; p->pDirty = 0; for(i=0; ALWAYS(ipDirty; p; p=p->pDirtyNext){ p->pDirty = p->pDirtyNext; } return pcacheSortDirtyList(pCache->pDirty); } /* ** Return the total number of references to all pages held by the cache. ** ** This is not the total number of pages referenced, but the sum of the ** reference count for all pages. */ SQLITE_PRIVATE int sqlite3PcacheRefCount(PCache *pCache){ return pCache->nRefSum; } /* ** Return the number of references to the page supplied as an argument. */ SQLITE_PRIVATE int sqlite3PcachePageRefcount(PgHdr *p){ return p->nRef; } #ifdef SQLITE_TEST /* ** Return the total number of pages in the cache. */ SQLITE_PRIVATE int sqlite3PcachePagecount(PCache *pCache){ assert( pCache->pCache!=0 ); return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache); } /* ** Get the suggested cache-size value. */ SQLITE_PRIVATE int sqlite3PcacheGetCachesize(PCache *pCache){ return numberOfCachePages(pCache); } #endif /* ** Set the suggested cache-size value. */ SQLITE_PRIVATE void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){ assert( pCache->pCache!=0 ); pCache->szCache = mxPage; sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache, numberOfCachePages(pCache)); } #if 0 /* ** Free up as much memory as possible from the page cache. */ SQLITE_PRIVATE void sqlite3PcacheShrink(PCache *pCache){ assert( pCache->pCache!=0 ); sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache); } #endif /* ** Return the size of the header added by this middleware layer ** in the page-cache hierarchy. */ SQLITE_PRIVATE int sqlite3HeaderSizePcache(void){ return ROUND8(sizeof(PgHdr)); } #if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG) /* ** For all dirty pages currently in the cache, invoke the specified ** callback. This is only used if the SQLITE_CHECK_PAGES macro is ** defined. */ SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHdr *)){ PgHdr *pDirty; for(pDirty=pCache->pDirty; pDirty; pDirty=pDirty->pDirtyNext){ xIter(pDirty); } } #endif /************** End of pcache.c **********************************************/ /************** Begin file pcache1.c *****************************************/ /* ** 2008 November 05 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file implements the default page cache implementation (the ** sqlite3_pcache interface). It also contains part of the implementation ** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features. ** If the default page cache implementation is overridden, then neither of ** these two features are available. ** ** A Page cache line looks like this: ** ** ------------------------------------------------------------- ** | database page content | PgHdr1 | MemPage | PgHdr | ** ------------------------------------------------------------- ** ** The database page content is up front (so that buffer overreads tend to ** flow harmlessly into the PgHdr1, MemPage, and PgHdr extensions). MemPage ** is the extension added by the btree.c module containing information such ** as the database page number and how that database page is used. PgHdr ** is added by the pcache.c layer and contains information used to keep track ** of which pages are "dirty". PgHdr1 is an extension added by this ** module (pcache1.c). The PgHdr1 header is a subclass of sqlite3_pcache_page. ** PgHdr1 contains information needed to look up a page by its page number. ** The superclass sqlite3_pcache_page.pBuf points to the start of the ** database page content and sqlite3_pcache_page.pExtra points to PgHdr. ** ** The size of the extension (MemPage+PgHdr+PgHdr1) can be determined at ** runtime using sqlite3_config(SQLITE_CONFIG_PCACHE_HDRSZ, &size). The ** sizes of the extensions sum to 272 bytes on x64 for 3.8.10, but this ** size can vary according to architecture, compile-time options, and ** SQLite library version number. ** ** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained ** using a separate memory allocation from the database page content. This ** seeks to overcome the "clownshoe" problem (also called "internal ** fragmentation" in academic literature) of allocating a few bytes more ** than a power of two with the memory allocator rounding up to the next ** power of two, and leaving the rounded-up space unused. ** ** This module tracks pointers to PgHdr1 objects. Only pcache.c communicates ** with this module. Information is passed back and forth as PgHdr1 pointers. ** ** The pcache.c and pager.c modules deal pointers to PgHdr objects. ** The btree.c module deals with pointers to MemPage objects. ** ** SOURCE OF PAGE CACHE MEMORY: ** ** Memory for a page might come from any of three sources: ** ** (1) The general-purpose memory allocator - sqlite3Malloc() ** (2) Global page-cache memory provided using sqlite3_config() with ** SQLITE_CONFIG_PAGECACHE. ** (3) PCache-local bulk allocation. ** ** The third case is a chunk of heap memory (defaulting to 100 pages worth) ** that is allocated when the page cache is created. The size of the local ** bulk allocation can be adjusted using ** ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, 0, 0, N). ** ** If N is positive, then N pages worth of memory are allocated using a single ** sqlite3Malloc() call and that memory is used for the first N pages allocated. ** Or if N is negative, then -1024*N bytes of memory are allocated and used ** for as many pages as can be accomodated. ** ** Only one of (2) or (3) can be used. Once the memory available to (2) or ** (3) is exhausted, subsequent allocations fail over to the general-purpose ** memory allocator (1). ** ** Earlier versions of SQLite used only methods (1) and (2). But experiments ** show that method (3) with N==100 provides about a 5% performance boost for ** common workloads. */ /* #include "sqliteInt.h" */ typedef struct PCache1 PCache1; typedef struct PgHdr1 PgHdr1; typedef struct PgFreeslot PgFreeslot; typedef struct PGroup PGroup; /* ** Each cache entry is represented by an instance of the following ** structure. Unless SQLITE_PCACHE_SEPARATE_HEADER is defined, a buffer of ** PgHdr1.pCache->szPage bytes is allocated directly before this structure ** in memory. */ struct PgHdr1 { sqlite3_pcache_page page; /* Base class. Must be first. pBuf & pExtra */ unsigned int iKey; /* Key value (page number) */ u8 isPinned; /* Page in use, not on the LRU list */ u8 isBulkLocal; /* This page from bulk local storage */ u8 isAnchor; /* This is the PGroup.lru element */ PgHdr1 *pNext; /* Next in hash table chain */ PCache1 *pCache; /* Cache that currently owns this page */ PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ PgHdr1 *pLruPrev; /* Previous in LRU list of unpinned pages */ }; /* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set ** of one or more PCaches that are able to recycle each other's unpinned ** pages when they are under memory pressure. A PGroup is an instance of ** the following object. ** ** This page cache implementation works in one of two modes: ** ** (1) Every PCache is the sole member of its own PGroup. There is ** one PGroup per PCache. ** ** (2) There is a single global PGroup that all PCaches are a member ** of. ** ** Mode 1 uses more memory (since PCache instances are not able to rob ** unused pages from other PCaches) but it also operates without a mutex, ** and is therefore often faster. Mode 2 requires a mutex in order to be ** threadsafe, but recycles pages more efficiently. ** ** For mode (1), PGroup.mutex is NULL. For mode (2) there is only a single ** PGroup which is the pcache1.grp global variable and its mutex is ** SQLITE_MUTEX_STATIC_LRU. */ struct PGroup { sqlite3_mutex *mutex; /* MUTEX_STATIC_LRU or NULL */ unsigned int nMaxPage; /* Sum of nMax for purgeable caches */ unsigned int nMinPage; /* Sum of nMin for purgeable caches */ unsigned int mxPinned; /* nMaxpage + 10 - nMinPage */ unsigned int nCurrentPage; /* Number of purgeable pages allocated */ PgHdr1 lru; /* The beginning and end of the LRU list */ }; /* Each page cache is an instance of the following object. Every ** open database file (including each in-memory database and each ** temporary or transient database) has a single page cache which ** is an instance of this object. ** ** Pointers to structures of this type are cast and returned as ** opaque sqlite3_pcache* handles. */ struct PCache1 { /* Cache configuration parameters. Page size (szPage) and the purgeable ** flag (bPurgeable) are set when the cache is created. nMax may be ** modified at any time by a call to the pcache1Cachesize() method. ** The PGroup mutex must be held when accessing nMax. */ PGroup *pGroup; /* PGroup this cache belongs to */ int szPage; /* Size of database content section */ int szExtra; /* sizeof(MemPage)+sizeof(PgHdr) */ int szAlloc; /* Total size of one pcache line */ int bPurgeable; /* True if cache is purgeable */ unsigned int nMin; /* Minimum number of pages reserved */ unsigned int nMax; /* Configured "cache_size" value */ unsigned int n90pct; /* nMax*9/10 */ unsigned int iMaxKey; /* Largest key seen since xTruncate() */ /* Hash table of all pages. The following variables may only be accessed ** when the accessor is holding the PGroup mutex. */ unsigned int nRecyclable; /* Number of pages in the LRU list */ unsigned int nPage; /* Total number of pages in apHash */ unsigned int nHash; /* Number of slots in apHash[] */ PgHdr1 **apHash; /* Hash table for fast lookup by key */ PgHdr1 *pFree; /* List of unused pcache-local pages */ void *pBulk; /* Bulk memory used by pcache-local */ }; /* ** Free slots in the allocator used to divide up the global page cache ** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. */ struct PgFreeslot { PgFreeslot *pNext; /* Next free slot */ }; /* ** Global data used by this cache. */ static SQLITE_WSD struct PCacheGlobal { PGroup grp; /* The global PGroup for mode (2) */ /* Variables related to SQLITE_CONFIG_PAGECACHE settings. The ** szSlot, nSlot, pStart, pEnd, nReserve, and isInit values are all ** fixed at sqlite3BtreeInitialize() time and do not require mutex protection. ** The nFreeSlot and pFree values do require mutex protection. */ int isInit; /* True if initialized */ int separateCache; /* Use a new PGroup for each PCache */ int nInitPage; /* Initial bulk allocation size */ int szSlot; /* Size of each free slot */ int nSlot; /* The number of pcache slots */ int nReserve; /* Try to keep nFreeSlot above this */ void *pStart, *pEnd; /* Bounds of global page cache memory */ /* Above requires no mutex. Use mutex below for variable that follow. */ sqlite3_mutex *mutex; /* Mutex for accessing the following: */ PgFreeslot *pFree; /* Free page blocks */ int nFreeSlot; /* Number of unused pcache slots */ /* The following value requires a mutex to change. We skip the mutex on ** reading because (1) most platforms read a 32-bit integer atomically and ** (2) even if an incorrect value is read, no great harm is done since this ** is really just an optimization. */ int bUnderPressure; /* True if low on PAGECACHE memory */ } pcache1_g; /* ** All code in this file should access the global structure above via the ** alias "pcache1". This ensures that the WSD emulation is used when ** compiling for systems that do not support real WSD. */ #define pcache1 (GLOBAL(struct PCacheGlobal, pcache1_g)) /* ** Macros to enter and leave the PCache LRU mutex. */ #if !defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 # define pcache1EnterMutex(X) assert((X)->mutex==0) # define pcache1LeaveMutex(X) assert((X)->mutex==0) # define PCACHE1_MIGHT_USE_GROUP_MUTEX 0 #else # define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) # define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) # define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 #endif /******************************************************************************/ /******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ /* ** This function is called during initialization if a static buffer is ** supplied to use for the page-cache by passing the SQLITE_CONFIG_PAGECACHE ** verb to sqlite3_config(). Parameter pBuf points to an allocation large ** enough to contain 'n' buffers of 'sz' bytes each. ** ** This routine is called from sqlite3_initialize() and so it is guaranteed ** to be serialized already. There is no need for further mutexing. */ SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ if( pcache1.isInit ){ PgFreeslot *p; if( pBuf==0 ) sz = n = 0; sz = ROUNDDOWN8(sz); pcache1.szSlot = sz; pcache1.nSlot = pcache1.nFreeSlot = n; pcache1.nReserve = n>90 ? 10 : (n/10 + 1); pcache1.pStart = pBuf; pcache1.pFree = 0; pcache1.bUnderPressure = 0; while( n-- ){ p = (PgFreeslot*)pBuf; p->pNext = pcache1.pFree; pcache1.pFree = p; pBuf = (void*)&((char*)pBuf)[sz]; } pcache1.pEnd = pBuf; } } /* ** Try to initialize the pCache->pFree and pCache->pBulk fields. Return ** true if pCache->pFree ends up containing one or more free pages. */ static int pcache1InitBulk(PCache1 *pCache){ i64 szBulk; char *zBulk; if( pcache1.nInitPage==0 ) return 0; /* Do not bother with a bulk allocation if the cache size very small */ if( pCache->nMax<3 ) return 0; if( pcache1.nInitPage>0 ){ szBulk = pCache->szAlloc * (i64)pcache1.nInitPage; }else{ szBulk = -1024 * (i64)pcache1.nInitPage; } if( szBulk > pCache->szAlloc*(i64)pCache->nMax ){ szBulk = pCache->szAlloc*pCache->nMax; } zBulk = pCache->pBulk = sqlite3Malloc( szBulk ); if( zBulk ){ int nBulk = sqlite3MallocSize(zBulk)/pCache->szAlloc; int i; for(i=0; iszPage]; pX->page.pBuf = zBulk; pX->page.pExtra = &pX[1]; pX->isBulkLocal = 1; pX->isAnchor = 0; pX->pNext = pCache->pFree; pCache->pFree = pX; zBulk += pCache->szAlloc; } } return pCache->pFree!=0; } /* ** Malloc function used within this file to allocate space from the buffer ** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no ** such buffer exists or there is no space left in it, this function falls ** back to sqlite3Malloc(). ** ** Multiple threads can run this routine at the same time. Global variables ** in pcache1 need to be protected via mutex. */ static void *pcache1Alloc(int nByte){ void *p = 0; assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); if( nByte<=pcache1.szSlot ){ sqlite3_mutex_enter(pcache1.mutex); p = (PgHdr1 *)pcache1.pFree; if( p ){ pcache1.pFree = pcache1.pFree->pNext; pcache1.nFreeSlot--; pcache1.bUnderPressure = pcache1.nFreeSlot=0 ); sqlite3StatusSet(SQLITE_STATUS_PAGECACHE_SIZE, nByte); sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_USED, 1); } sqlite3_mutex_leave(pcache1.mutex); } if( p==0 ){ /* Memory is not available in the SQLITE_CONFIG_PAGECACHE pool. Get ** it from sqlite3Malloc instead. */ p = sqlite3Malloc(nByte); #ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS if( p ){ int sz = sqlite3MallocSize(p); sqlite3_mutex_enter(pcache1.mutex); sqlite3StatusSet(SQLITE_STATUS_PAGECACHE_SIZE, nByte); sqlite3StatusUp(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); sqlite3_mutex_leave(pcache1.mutex); } #endif sqlite3MemdebugSetType(p, MEMTYPE_PCACHE); } return p; } /* ** Free an allocated buffer obtained from pcache1Alloc(). */ static void pcache1Free(void *p){ int nFreed = 0; if( p==0 ) return; if( p>=pcache1.pStart && ppNext = pcache1.pFree; pcache1.pFree = pSlot; pcache1.nFreeSlot++; pcache1.bUnderPressure = pcache1.nFreeSlot=pcache1.pStart && ppGroup->mutex) ); if( pCache->pFree || (pCache->nPage==0 && pcache1InitBulk(pCache)) ){ p = pCache->pFree; pCache->pFree = p->pNext; p->pNext = 0; }else{ #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT /* The group mutex must be released before pcache1Alloc() is called. This ** is because it might call sqlite3_release_memory(), which assumes that ** this mutex is not held. */ assert( pcache1.separateCache==0 ); assert( pCache->pGroup==&pcache1.grp ); pcache1LeaveMutex(pCache->pGroup); #endif #ifdef SQLITE_PCACHE_SEPARATE_HEADER pPg = pcache1Alloc(pCache->szPage); p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); if( !pPg || !p ){ pcache1Free(pPg); sqlite3_free(p); pPg = 0; } #else pPg = pcache1Alloc(pCache->szAlloc); p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; #endif #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT pcache1EnterMutex(pCache->pGroup); #endif if( pPg==0 ) return 0; p->page.pBuf = pPg; p->page.pExtra = &p[1]; p->isBulkLocal = 0; p->isAnchor = 0; } if( pCache->bPurgeable ){ pCache->pGroup->nCurrentPage++; } return p; } /* ** Free a page object allocated by pcache1AllocPage(). */ static void pcache1FreePage(PgHdr1 *p){ PCache1 *pCache; assert( p!=0 ); pCache = p->pCache; assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); if( p->isBulkLocal ){ p->pNext = pCache->pFree; pCache->pFree = p; }else{ pcache1Free(p->page.pBuf); #ifdef SQLITE_PCACHE_SEPARATE_HEADER sqlite3_free(p); #endif } if( pCache->bPurgeable ){ pCache->pGroup->nCurrentPage--; } } /* ** Malloc function used by SQLite to obtain space from the buffer configured ** using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no such buffer ** exists, this function falls back to sqlite3Malloc(). */ SQLITE_PRIVATE void *sqlite3PageMalloc(int sz){ return pcache1Alloc(sz); } /* ** Free an allocated buffer obtained from sqlite3PageMalloc(). */ SQLITE_PRIVATE void sqlite3PageFree(void *p){ pcache1Free(p); } /* ** Return true if it desirable to avoid allocating a new page cache ** entry. ** ** If memory was allocated specifically to the page cache using ** SQLITE_CONFIG_PAGECACHE but that memory has all been used, then ** it is desirable to avoid allocating a new page cache entry because ** presumably SQLITE_CONFIG_PAGECACHE was suppose to be sufficient ** for all page cache needs and we should not need to spill the ** allocation onto the heap. ** ** Or, the heap is used for all page cache memory but the heap is ** under memory pressure, then again it is desirable to avoid ** allocating a new page cache entry in order to avoid stressing ** the heap even further. */ static int pcache1UnderMemoryPressure(PCache1 *pCache){ if( pcache1.nSlot && (pCache->szPage+pCache->szExtra)<=pcache1.szSlot ){ return pcache1.bUnderPressure; }else{ return sqlite3HeapNearlyFull(); } } /******************************************************************************/ /******** General Implementation Functions ************************************/ /* ** This function is used to resize the hash table used by the cache passed ** as the first argument. ** ** The PCache mutex must be held when this function is called. */ static void pcache1ResizeHash(PCache1 *p){ PgHdr1 **apNew; unsigned int nNew; unsigned int i; assert( sqlite3_mutex_held(p->pGroup->mutex) ); nNew = p->nHash*2; if( nNew<256 ){ nNew = 256; } pcache1LeaveMutex(p->pGroup); apNew = (PgHdr1 **)sqlite3MallocZero(sizeof(PgHdr1 *)*nNew); pcache1EnterMutex(p->pGroup); if( apNew ){ for(i=0; inHash; i++){ PgHdr1 *pPage; PgHdr1 *pNext = p->apHash[i]; while( (pPage = pNext)!=0 ){ unsigned int h = pPage->iKey % nNew; pNext = pPage->pNext; pPage->pNext = apNew[h]; apNew[h] = pPage; } } sqlite3_free(p->apHash); p->apHash = apNew; p->nHash = nNew; } } /* ** This function is used internally to remove the page pPage from the ** PGroup LRU list, if is part of it. If pPage is not part of the PGroup ** LRU list, then this function is a no-op. ** ** The PGroup mutex must be held when this function is called. */ static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ PCache1 *pCache; assert( pPage!=0 ); assert( pPage->isPinned==0 ); pCache = pPage->pCache; assert( pPage->pLruNext ); assert( pPage->pLruPrev ); assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); pPage->pLruPrev->pLruNext = pPage->pLruNext; pPage->pLruNext->pLruPrev = pPage->pLruPrev; pPage->pLruNext = 0; pPage->pLruPrev = 0; pPage->isPinned = 1; assert( pPage->isAnchor==0 ); assert( pCache->pGroup->lru.isAnchor==1 ); pCache->nRecyclable--; return pPage; } /* ** Remove the page supplied as an argument from the hash table ** (PCache1.apHash structure) that it is currently stored in. ** Also free the page if freePage is true. ** ** The PGroup mutex must be held when this function is called. */ static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){ unsigned int h; PCache1 *pCache = pPage->pCache; PgHdr1 **pp; assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); h = pPage->iKey % pCache->nHash; for(pp=&pCache->apHash[h]; (*pp)!=pPage; pp=&(*pp)->pNext); *pp = (*pp)->pNext; pCache->nPage--; if( freeFlag ) pcache1FreePage(pPage); } /* ** If there are currently more than nMaxPage pages allocated, try ** to recycle pages to reduce the number allocated to nMaxPage. */ static void pcache1EnforceMaxPage(PCache1 *pCache){ PGroup *pGroup = pCache->pGroup; PgHdr1 *p; assert( sqlite3_mutex_held(pGroup->mutex) ); while( pGroup->nCurrentPage>pGroup->nMaxPage && (p=pGroup->lru.pLruPrev)->isAnchor==0 ){ assert( p->pCache->pGroup==pGroup ); assert( p->isPinned==0 ); pcache1PinPage(p); pcache1RemoveFromHash(p, 1); } if( pCache->nPage==0 && pCache->pBulk ){ sqlite3_free(pCache->pBulk); pCache->pBulk = pCache->pFree = 0; } } /* ** Discard all pages from cache pCache with a page number (key value) ** greater than or equal to iLimit. Any pinned pages that meet this ** criteria are unpinned before they are discarded. ** ** The PCache mutex must be held when this function is called. */ static void pcache1TruncateUnsafe( PCache1 *pCache, /* The cache to truncate */ unsigned int iLimit /* Drop pages with this pgno or larger */ ){ TESTONLY( unsigned int nPage = 0; ) /* To assert pCache->nPage is correct */ unsigned int h; assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); for(h=0; hnHash; h++){ PgHdr1 **pp = &pCache->apHash[h]; PgHdr1 *pPage; while( (pPage = *pp)!=0 ){ if( pPage->iKey>=iLimit ){ pCache->nPage--; *pp = pPage->pNext; if( !pPage->isPinned ) pcache1PinPage(pPage); pcache1FreePage(pPage); }else{ pp = &pPage->pNext; TESTONLY( nPage++; ) } } } assert( pCache->nPage==nPage ); } /******************************************************************************/ /******** sqlite3_pcache Methods **********************************************/ /* ** Implementation of the sqlite3_pcache.xInit method. */ static int pcache1Init(void *NotUsed){ UNUSED_PARAMETER(NotUsed); assert( pcache1.isInit==0 ); memset(&pcache1, 0, sizeof(pcache1)); /* ** The pcache1.separateCache variable is true if each PCache has its own ** private PGroup (mode-1). pcache1.separateCache is false if the single ** PGroup in pcache1.grp is used for all page caches (mode-2). ** ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT ** ** * Use a unified cache in single-threaded applications that have ** configured a start-time buffer for use as page-cache memory using ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL ** pBuf argument. ** ** * Otherwise use separate caches (mode-1) */ #if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) pcache1.separateCache = 0; #elif SQLITE_THREADSAFE pcache1.separateCache = sqlite3GlobalConfig.pPage==0 || sqlite3GlobalConfig.bCoreMutex>0; #else pcache1.separateCache = sqlite3GlobalConfig.pPage==0; #endif #if SQLITE_THREADSAFE if( sqlite3GlobalConfig.bCoreMutex ){ pcache1.grp.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU); pcache1.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PMEM); } #endif if( pcache1.separateCache && sqlite3GlobalConfig.nPage!=0 && sqlite3GlobalConfig.pPage==0 ){ pcache1.nInitPage = sqlite3GlobalConfig.nPage; }else{ pcache1.nInitPage = 0; } pcache1.grp.mxPinned = 10; pcache1.isInit = 1; return SQLITE_OK; } /* ** Implementation of the sqlite3_pcache.xShutdown method. ** Note that the static mutex allocated in xInit does ** not need to be freed. */ static void pcache1Shutdown(void *NotUsed){ UNUSED_PARAMETER(NotUsed); assert( pcache1.isInit!=0 ); memset(&pcache1, 0, sizeof(pcache1)); } /* forward declaration */ static void pcache1Destroy(sqlite3_pcache *p); /* ** Implementation of the sqlite3_pcache.xCreate method. ** ** Allocate a new cache. */ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ PCache1 *pCache; /* The newly created page cache */ PGroup *pGroup; /* The group the new page cache will belong to */ int sz; /* Bytes of memory required to allocate the new cache */ assert( (szPage & (szPage-1))==0 && szPage>=512 && szPage<=65536 ); assert( szExtra < 300 ); sz = sizeof(PCache1) + sizeof(PGroup)*pcache1.separateCache; pCache = (PCache1 *)sqlite3MallocZero(sz); if( pCache ){ if( pcache1.separateCache ){ pGroup = (PGroup*)&pCache[1]; pGroup->mxPinned = 10; }else{ pGroup = &pcache1.grp; } if( pGroup->lru.isAnchor==0 ){ pGroup->lru.isAnchor = 1; pGroup->lru.pLruPrev = pGroup->lru.pLruNext = &pGroup->lru; } pCache->pGroup = pGroup; pCache->szPage = szPage; pCache->szExtra = szExtra; pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); pCache->bPurgeable = (bPurgeable ? 1 : 0); pcache1EnterMutex(pGroup); pcache1ResizeHash(pCache); if( bPurgeable ){ pCache->nMin = 10; pGroup->nMinPage += pCache->nMin; pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; } pcache1LeaveMutex(pGroup); if( pCache->nHash==0 ){ pcache1Destroy((sqlite3_pcache*)pCache); pCache = 0; } } return (sqlite3_pcache *)pCache; } /* ** Implementation of the sqlite3_pcache.xCachesize method. ** ** Configure the cache_size limit for a cache. */ static void pcache1Cachesize(sqlite3_pcache *p, int nMax){ PCache1 *pCache = (PCache1 *)p; if( pCache->bPurgeable ){ PGroup *pGroup = pCache->pGroup; pcache1EnterMutex(pGroup); pGroup->nMaxPage += (nMax - pCache->nMax); pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; pCache->nMax = nMax; pCache->n90pct = pCache->nMax*9/10; pcache1EnforceMaxPage(pCache); pcache1LeaveMutex(pGroup); } } /* ** Implementation of the sqlite3_pcache.xShrink method. ** ** Free up as much memory as possible. */ static void pcache1Shrink(sqlite3_pcache *p){ PCache1 *pCache = (PCache1*)p; if( pCache->bPurgeable ){ PGroup *pGroup = pCache->pGroup; int savedMaxPage; pcache1EnterMutex(pGroup); savedMaxPage = pGroup->nMaxPage; pGroup->nMaxPage = 0; pcache1EnforceMaxPage(pCache); pGroup->nMaxPage = savedMaxPage; pcache1LeaveMutex(pGroup); } } /* ** Implementation of the sqlite3_pcache.xPagecount method. */ static int pcache1Pagecount(sqlite3_pcache *p){ int n; PCache1 *pCache = (PCache1*)p; pcache1EnterMutex(pCache->pGroup); n = pCache->nPage; pcache1LeaveMutex(pCache->pGroup); return n; } /* ** Implement steps 3, 4, and 5 of the pcache1Fetch() algorithm described ** in the header of the pcache1Fetch() procedure. ** ** This steps are broken out into a separate procedure because they are ** usually not needed, and by avoiding the stack initialization required ** for these steps, the main pcache1Fetch() procedure can run faster. */ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( PCache1 *pCache, unsigned int iKey, int createFlag ){ unsigned int nPinned; PGroup *pGroup = pCache->pGroup; PgHdr1 *pPage = 0; /* Step 3: Abort if createFlag is 1 but the cache is nearly full */ assert( pCache->nPage >= pCache->nRecyclable ); nPinned = pCache->nPage - pCache->nRecyclable; assert( pGroup->mxPinned == pGroup->nMaxPage + 10 - pGroup->nMinPage ); assert( pCache->n90pct == pCache->nMax*9/10 ); if( createFlag==1 && ( nPinned>=pGroup->mxPinned || nPinned>=pCache->n90pct || (pcache1UnderMemoryPressure(pCache) && pCache->nRecyclablenPage>=pCache->nHash ) pcache1ResizeHash(pCache); assert( pCache->nHash>0 && pCache->apHash ); /* Step 4. Try to recycle a page. */ if( pCache->bPurgeable && !pGroup->lru.pLruPrev->isAnchor && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache)) ){ PCache1 *pOther; pPage = pGroup->lru.pLruPrev; assert( pPage->isPinned==0 ); pcache1RemoveFromHash(pPage, 0); pcache1PinPage(pPage); pOther = pPage->pCache; if( pOther->szAlloc != pCache->szAlloc ){ pcache1FreePage(pPage); pPage = 0; }else{ pGroup->nCurrentPage -= (pOther->bPurgeable - pCache->bPurgeable); } } /* Step 5. If a usable page buffer has still not been found, ** attempt to allocate a new one. */ if( !pPage ){ pPage = pcache1AllocPage(pCache, createFlag==1); } if( pPage ){ unsigned int h = iKey % pCache->nHash; pCache->nPage++; pPage->iKey = iKey; pPage->pNext = pCache->apHash[h]; pPage->pCache = pCache; pPage->pLruPrev = 0; pPage->pLruNext = 0; pPage->isPinned = 1; *(void **)pPage->page.pExtra = 0; pCache->apHash[h] = pPage; if( iKey>pCache->iMaxKey ){ pCache->iMaxKey = iKey; } } return pPage; } /* ** Implementation of the sqlite3_pcache.xFetch method. ** ** Fetch a page by key value. ** ** Whether or not a new page may be allocated by this function depends on ** the value of the createFlag argument. 0 means do not allocate a new ** page. 1 means allocate a new page if space is easily available. 2 ** means to try really hard to allocate a new page. ** ** For a non-purgeable cache (a cache used as the storage for an in-memory ** database) there is really no difference between createFlag 1 and 2. So ** the calling function (pcache.c) will never have a createFlag of 1 on ** a non-purgeable cache. ** ** There are three different approaches to obtaining space for a page, ** depending on the value of parameter createFlag (which may be 0, 1 or 2). ** ** 1. Regardless of the value of createFlag, the cache is searched for a ** copy of the requested page. If one is found, it is returned. ** ** 2. If createFlag==0 and the page is not already in the cache, NULL is ** returned. ** ** 3. If createFlag is 1, and the page is not already in the cache, then ** return NULL (do not allocate a new page) if any of the following ** conditions are true: ** ** (a) the number of pages pinned by the cache is greater than ** PCache1.nMax, or ** ** (b) the number of pages pinned by the cache is greater than ** the sum of nMax for all purgeable caches, less the sum of ** nMin for all other purgeable caches, or ** ** 4. If none of the first three conditions apply and the cache is marked ** as purgeable, and if one of the following is true: ** ** (a) The number of pages allocated for the cache is already ** PCache1.nMax, or ** ** (b) The number of pages allocated for all purgeable caches is ** already equal to or greater than the sum of nMax for all ** purgeable caches, ** ** (c) The system is under memory pressure and wants to avoid ** unnecessary pages cache entry allocations ** ** then attempt to recycle a page from the LRU list. If it is the right ** size, return the recycled buffer. Otherwise, free the buffer and ** proceed to step 5. ** ** 5. Otherwise, allocate and return a new page buffer. ** ** There are two versions of this routine. pcache1FetchWithMutex() is ** the general case. pcache1FetchNoMutex() is a faster implementation for ** the common case where pGroup->mutex is NULL. The pcache1Fetch() wrapper ** invokes the appropriate routine. */ static PgHdr1 *pcache1FetchNoMutex( sqlite3_pcache *p, unsigned int iKey, int createFlag ){ PCache1 *pCache = (PCache1 *)p; PgHdr1 *pPage = 0; /* Step 1: Search the hash table for an existing entry. */ pPage = pCache->apHash[iKey % pCache->nHash]; while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; } /* Step 2: If the page was found in the hash table, then return it. ** If the page was not in the hash table and createFlag is 0, abort. ** Otherwise (page not in hash and createFlag!=0) continue with ** subsequent steps to try to create the page. */ if( pPage ){ if( !pPage->isPinned ){ return pcache1PinPage(pPage); }else{ return pPage; } }else if( createFlag ){ /* Steps 3, 4, and 5 implemented by this subroutine */ return pcache1FetchStage2(pCache, iKey, createFlag); }else{ return 0; } } #if PCACHE1_MIGHT_USE_GROUP_MUTEX static PgHdr1 *pcache1FetchWithMutex( sqlite3_pcache *p, unsigned int iKey, int createFlag ){ PCache1 *pCache = (PCache1 *)p; PgHdr1 *pPage; pcache1EnterMutex(pCache->pGroup); pPage = pcache1FetchNoMutex(p, iKey, createFlag); assert( pPage==0 || pCache->iMaxKey>=iKey ); pcache1LeaveMutex(pCache->pGroup); return pPage; } #endif static sqlite3_pcache_page *pcache1Fetch( sqlite3_pcache *p, unsigned int iKey, int createFlag ){ #if PCACHE1_MIGHT_USE_GROUP_MUTEX || defined(SQLITE_DEBUG) PCache1 *pCache = (PCache1 *)p; #endif assert( offsetof(PgHdr1,page)==0 ); assert( pCache->bPurgeable || createFlag!=1 ); assert( pCache->bPurgeable || pCache->nMin==0 ); assert( pCache->bPurgeable==0 || pCache->nMin==10 ); assert( pCache->nMin==0 || pCache->bPurgeable ); assert( pCache->nHash>0 ); #if PCACHE1_MIGHT_USE_GROUP_MUTEX if( pCache->pGroup->mutex ){ return (sqlite3_pcache_page*)pcache1FetchWithMutex(p, iKey, createFlag); }else #endif { return (sqlite3_pcache_page*)pcache1FetchNoMutex(p, iKey, createFlag); } } /* ** Implementation of the sqlite3_pcache.xUnpin method. ** ** Mark a page as unpinned (eligible for asynchronous recycling). */ static void pcache1Unpin( sqlite3_pcache *p, sqlite3_pcache_page *pPg, int reuseUnlikely ){ PCache1 *pCache = (PCache1 *)p; PgHdr1 *pPage = (PgHdr1 *)pPg; PGroup *pGroup = pCache->pGroup; assert( pPage->pCache==pCache ); pcache1EnterMutex(pGroup); /* It is an error to call this function if the page is already ** part of the PGroup LRU list. */ assert( pPage->pLruPrev==0 && pPage->pLruNext==0 ); assert( pPage->isPinned==1 ); if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){ pcache1RemoveFromHash(pPage, 1); }else{ /* Add the page to the PGroup LRU list. */ PgHdr1 **ppFirst = &pGroup->lru.pLruNext; pPage->pLruPrev = &pGroup->lru; (pPage->pLruNext = *ppFirst)->pLruPrev = pPage; *ppFirst = pPage; pCache->nRecyclable++; pPage->isPinned = 0; } pcache1LeaveMutex(pCache->pGroup); } /* ** Implementation of the sqlite3_pcache.xRekey method. */ static void pcache1Rekey( sqlite3_pcache *p, sqlite3_pcache_page *pPg, unsigned int iOld, unsigned int iNew ){ PCache1 *pCache = (PCache1 *)p; PgHdr1 *pPage = (PgHdr1 *)pPg; PgHdr1 **pp; unsigned int h; assert( pPage->iKey==iOld ); assert( pPage->pCache==pCache ); pcache1EnterMutex(pCache->pGroup); h = iOld%pCache->nHash; pp = &pCache->apHash[h]; while( (*pp)!=pPage ){ pp = &(*pp)->pNext; } *pp = pPage->pNext; h = iNew%pCache->nHash; pPage->iKey = iNew; pPage->pNext = pCache->apHash[h]; pCache->apHash[h] = pPage; if( iNew>pCache->iMaxKey ){ pCache->iMaxKey = iNew; } pcache1LeaveMutex(pCache->pGroup); } /* ** Implementation of the sqlite3_pcache.xTruncate method. ** ** Discard all unpinned pages in the cache with a page number equal to ** or greater than parameter iLimit. Any pinned pages with a page number ** equal to or greater than iLimit are implicitly unpinned. */ static void pcache1Truncate(sqlite3_pcache *p, unsigned int iLimit){ PCache1 *pCache = (PCache1 *)p; pcache1EnterMutex(pCache->pGroup); if( iLimit<=pCache->iMaxKey ){ pcache1TruncateUnsafe(pCache, iLimit); pCache->iMaxKey = iLimit-1; } pcache1LeaveMutex(pCache->pGroup); } /* ** Implementation of the sqlite3_pcache.xDestroy method. ** ** Destroy a cache allocated using pcache1Create(). */ static void pcache1Destroy(sqlite3_pcache *p){ PCache1 *pCache = (PCache1 *)p; PGroup *pGroup = pCache->pGroup; assert( pCache->bPurgeable || (pCache->nMax==0 && pCache->nMin==0) ); pcache1EnterMutex(pGroup); pcache1TruncateUnsafe(pCache, 0); assert( pGroup->nMaxPage >= pCache->nMax ); pGroup->nMaxPage -= pCache->nMax; assert( pGroup->nMinPage >= pCache->nMin ); pGroup->nMinPage -= pCache->nMin; pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; pcache1EnforceMaxPage(pCache); pcache1LeaveMutex(pGroup); sqlite3_free(pCache->pBulk); sqlite3_free(pCache->apHash); sqlite3_free(pCache); } /* ** This function is called during initialization (sqlite3BtreeInitialize()) to ** install the default pluggable cache module, assuming the user has not ** already provided an alternative. */ SQLITE_PRIVATE void sqlite3PCacheSetDefault(void){ static const sqlite3_pcache_methods2 defaultMethods = { 1, /* iVersion */ 0, /* pArg */ pcache1Init, /* xInit */ pcache1Shutdown, /* xShutdown */ pcache1Create, /* xCreate */ pcache1Cachesize, /* xCachesize */ pcache1Pagecount, /* xPagecount */ pcache1Fetch, /* xFetch */ pcache1Unpin, /* xUnpin */ pcache1Rekey, /* xRekey */ pcache1Truncate, /* xTruncate */ pcache1Destroy, /* xDestroy */ pcache1Shrink /* xShrink */ }; sqlite3_config(SQLITE_CONFIG_PCACHE2, &defaultMethods); } /* ** Return the size of the header on each page of this PCACHE implementation. */ SQLITE_PRIVATE int sqlite3HeaderSizePcache1(void){ return ROUND8(sizeof(PgHdr1)); } /* ** Return the global mutex used by this PCACHE implementation. The ** sqlite3_status() routine needs access to this mutex. */ #ifndef NDEBUG SQLITE_PRIVATE sqlite3_mutex *sqlite3Pcache1Mutex(void){ return pcache1.mutex; } #endif #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT /* ** This function is called to free superfluous dynamically allocated memory ** held by the pager system. Memory in use by any SQLite pager allocated ** by the current thread may be sqlite3_free()ed. ** ** nReq is the number of bytes of memory required. Once this much has ** been released, the function returns. The return value is the total number ** of bytes of memory released. */ SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){ int nFree = 0; assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); assert( sqlite3_mutex_notheld(pcache1.mutex) ); if( sqlite3GlobalConfig.nPage==0 ){ PgHdr1 *p; pcache1EnterMutex(&pcache1.grp); while( (nReq<0 || nFreeisAnchor==0 ){ nFree += pcache1MemSize(p->page.pBuf); #ifdef SQLITE_PCACHE_SEPARATE_HEADER nFree += sqlite3MemSize(p); #endif assert( p->isPinned==0 ); pcache1PinPage(p); pcache1RemoveFromHash(p, 1); } pcache1LeaveMutex(&pcache1.grp); } return nFree; } #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ #ifdef SQLITE_TEST /* ** This function is used by test procedures to inspect the internal state ** of the global cache. */ SQLITE_PRIVATE void sqlite3PcacheStats( int *pnCurrent, /* OUT: Total number of pages cached */ int *pnMax, /* OUT: Global maximum cache size */ int *pnMin, /* OUT: Sum of PCache1.nMin for purgeable caches */ int *pnRecyclable /* OUT: Total number of pages available for recycling */ ){ PgHdr1 *p; int nRecyclable = 0; for(p=pcache1.grp.lru.pLruNext; p && !p->isAnchor; p=p->pLruNext){ assert( p->isPinned==0 ); nRecyclable++; } *pnCurrent = pcache1.grp.nCurrentPage; *pnMax = (int)pcache1.grp.nMaxPage; *pnMin = (int)pcache1.grp.nMinPage; *pnRecyclable = nRecyclable; } #endif /************** End of pcache1.c *********************************************/ /************** Begin file pager.c *******************************************/ /* ** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This is the implementation of the page cache subsystem or "pager". ** ** The pager is used to access a database disk file. It implements ** atomic commit and rollback through the use of a journal file that ** is separate from the database file. The pager also implements file ** locking to prevent two processes from writing the same database ** file simultaneously, or one process from reading the database while ** another is writing. */ #ifndef SQLITE_OMIT_DISKIO /* #include "sqliteInt.h" */ /************** Include wal.h in the middle of pager.c ***********************/ /************** Begin file wal.h *********************************************/ /* ** 2010 February 1 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This header file defines the interface to the write-ahead logging ** system. Refer to the comments below and the header comment attached to ** the implementation of each function in log.c for further details. */ #ifndef _WAL_H_ #define _WAL_H_ /* #include "sqliteInt.h" */ /* Additional values that can be added to the sync_flags argument of ** sqlite3WalFrames(): */ #define WAL_SYNC_TRANSACTIONS 0x20 /* Sync at the end of each transaction */ #define SQLITE_SYNC_MASK 0x13 /* Mask off the SQLITE_SYNC_* values */ #ifdef SQLITE_OMIT_WAL # define sqlite3WalOpen(x,y,z) 0 #if 0 # define sqlite3WalLimit(x,y) #endif # define sqlite3WalClose(w,x,y,z) 0 # define sqlite3WalBeginReadTransaction(y,z) 0 # define sqlite3WalEndReadTransaction(z) # define sqlite3WalDbsize(y) 0 # define sqlite3WalBeginWriteTransaction(y) 0 # define sqlite3WalEndWriteTransaction(x) 0 # define sqlite3WalUndo(x,y,z) 0 # define sqlite3WalSavepoint(y,z) # define sqlite3WalSavepointUndo(y,z) 0 # define sqlite3WalFrames(u,v,w,x,y,z) 0 # define sqlite3WalCheckpoint(r,s,t,u,v,w,x,y,z) 0 #if 0 # define sqlite3WalCallback(z) 0 #endif # define sqlite3WalExclusiveMode(y,z) 0 #if 0 # define sqlite3WalHeapMemory(z) 0 #endif # define sqlite3WalFramesize(z) 0 # define sqlite3WalFindFrame(x,y,z) 0 #else #define WAL_SAVEPOINT_NDATA 4 /* Connection to a write-ahead log (WAL) file. ** There is one object of this type for each pager. */ typedef struct Wal Wal; /* Open and close a connection to a write-ahead log. */ SQLITE_PRIVATE int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**); SQLITE_PRIVATE int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *); #if 0 /* Set the limiting size of a WAL file. */ SQLITE_PRIVATE void sqlite3WalLimit(Wal*, i64); #endif /* Used by readers to open (lock) and close (unlock) a snapshot. A ** snapshot is like a read-transaction. It is the state of the database ** at an instant in time. sqlite3WalOpenSnapshot gets a read lock and ** preserves the current state even if the other threads or processes ** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the ** transaction and releases the lock. */ SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *); SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal); /* Read a page from the write-ahead log, if it is present. */ SQLITE_PRIVATE int sqlite3WalFindFrame(Wal *, Pgno, u32 *); SQLITE_PRIVATE int sqlite3WalReadFrame(Wal *, u32, int, u8 *); /* If the WAL is not empty, return the size of the database. */ SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal); /* Obtain or release the WRITER lock. */ SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal); SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal); /* Undo any frames written (but not committed) to the log */ SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); /* Return an integer that records the current (uncommitted) write ** position in the WAL */ SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData); /* Move the write position of the WAL back to iFrame. Called in ** response to a ROLLBACK TO command. */ SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData); /* Write a frame or frames to the log. */ SQLITE_PRIVATE int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); /* Copy pages from the log to the database file */ SQLITE_PRIVATE int sqlite3WalCheckpoint( Wal *pWal, /* Write-ahead log connection */ int eMode, /* One of PASSIVE, FULL and RESTART */ int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags to sync db file with (or 0) */ int nBuf, /* Size of buffer nBuf */ u8 *zBuf, /* Temporary buffer to use */ int *pnLog, /* OUT: Number of frames in WAL */ int *pnCkpt /* OUT: Number of backfilled frames in WAL */ ); #if 0 /* Return the value to pass to a sqlite3_wal_hook callback, the ** number of frames in the WAL at the point of the last commit since ** sqlite3WalCallback() was called. If no commits have occurred since ** the last call, then return 0. */ SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal); #endif /* Tell the wal layer that an EXCLUSIVE lock has been obtained (or released) ** by the pager layer on the database file. */ SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op); #if 0 /* Return true if the argument is non-NULL and the WAL module is using ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the ** WAL module is using shared-memory, return false. */ SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal); #endif #ifdef SQLITE_ENABLE_ZIPVFS /* If the WAL file is not empty, return the number of bytes of content ** stored in each frame (i.e. the db page-size when the WAL was created). */ SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal); #endif #endif /* ifndef SQLITE_OMIT_WAL */ #endif /* _WAL_H_ */ /************** End of wal.h *************************************************/ /************** Continuing where we left off in pager.c **********************/ /******************* NOTES ON THE DESIGN OF THE PAGER ************************ ** ** This comment block describes invariants that hold when using a rollback ** journal. These invariants do not apply for journal_mode=WAL, ** journal_mode=MEMORY, or journal_mode=OFF. ** ** Within this comment block, a page is deemed to have been synced ** automatically as soon as it is written when PRAGMA synchronous=OFF. ** Otherwise, the page is not synced until the xSync method of the VFS ** is called successfully on the file containing the page. ** ** Definition: A page of the database file is said to be "overwriteable" if ** one or more of the following are true about the page: ** ** (a) The original content of the page as it was at the beginning of ** the transaction has been written into the rollback journal and ** synced. ** ** (b) The page was a freelist leaf page at the start of the transaction. ** ** (c) The page number is greater than the largest page that existed in ** the database file at the start of the transaction. ** ** (1) A page of the database file is never overwritten unless one of the ** following are true: ** ** (a) The page and all other pages on the same sector are overwriteable. ** ** (b) The atomic page write optimization is enabled, and the entire ** transaction other than the update of the transaction sequence ** number consists of a single page change. ** ** (2) The content of a page written into the rollback journal exactly matches ** both the content in the database when the rollback journal was written ** and the content in the database at the beginning of the current ** transaction. ** ** (3) Writes to the database file are an integer multiple of the page size ** in length and are aligned on a page boundary. ** ** (4) Reads from the database file are either aligned on a page boundary and ** an integer multiple of the page size in length or are taken from the ** first 100 bytes of the database file. ** ** (5) All writes to the database file are synced prior to the rollback journal ** being deleted, truncated, or zeroed. ** ** (6) If a master journal file is used, then all writes to the database file ** are synced prior to the master journal being deleted. ** ** Definition: Two databases (or the same database at two points it time) ** are said to be "logically equivalent" if they give the same answer to ** all queries. Note in particular the content of freelist leaf ** pages can be changed arbitrarily without affecting the logical equivalence ** of the database. ** ** (7) At any time, if any subset, including the empty set and the total set, ** of the unsynced changes to a rollback journal are removed and the ** journal is rolled back, the resulting database file will be logically ** equivalent to the database file at the beginning of the transaction. ** ** (8) When a transaction is rolled back, the xTruncate method of the VFS ** is called to restore the database file to the same size it was at ** the beginning of the transaction. (In some VFSes, the xTruncate ** method is a no-op, but that does not change the fact the SQLite will ** invoke it.) ** ** (9) Whenever the database file is modified, at least one bit in the range ** of bytes from 24 through 39 inclusive will be changed prior to releasing ** the EXCLUSIVE lock, thus signaling other connections on the same ** database to flush their caches. ** ** (10) The pattern of bits in bytes 24 through 39 shall not repeat in less ** than one billion transactions. ** ** (11) A database file is well-formed at the beginning and at the conclusion ** of every transaction. ** ** (12) An EXCLUSIVE lock is held on the database file when writing to ** the database file. ** ** (13) A SHARED lock is held on the database file while reading any ** content out of the database file. ** ******************************************************************************/ /* ** Macros for troubleshooting. Normally turned off */ #if 0 int sqlite3PagerTrace=1; /* True to enable tracing */ #define sqlite3DebugPrintf printf #define PAGERTRACE(X) if( sqlite3PagerTrace ){ sqlite3DebugPrintf X; } #else #define PAGERTRACE(X) #endif /* ** The following two macros are used within the PAGERTRACE() macros above ** to print out file-descriptors. ** ** PAGERID() takes a pointer to a Pager struct as its argument. The ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file ** struct as its argument. */ #define PAGERID(p) ((int)(p->fd)) #define FILEHANDLEID(fd) ((int)fd) /* ** The Pager.eState variable stores the current 'state' of a pager. A ** pager may be in any one of the seven states shown in the following ** state diagram. ** ** OPEN <------+------+ ** | | | ** V | | ** +---------> READER-------+ | ** | | | ** | V | ** |<-------WRITER_LOCKED------> ERROR ** | | ^ ** | V | ** |<------WRITER_CACHEMOD-------->| ** | | | ** | V | ** |<-------WRITER_DBMOD---------->| ** | | | ** | V | ** +<------WRITER_FINISHED-------->+ ** ** ** List of state transitions and the C [function] that performs each: ** ** OPEN -> READER [sqlite3PagerSharedLock] ** READER -> OPEN [pager_unlock] ** ** READER -> WRITER_LOCKED [sqlite3PagerBegin] ** WRITER_LOCKED -> WRITER_CACHEMOD [pager_open_journal] ** WRITER_CACHEMOD -> WRITER_DBMOD [syncJournal] ** WRITER_DBMOD -> WRITER_FINISHED [sqlite3PagerCommitPhaseOne] ** WRITER_*** -> READER [pager_end_transaction] ** ** WRITER_*** -> ERROR [pager_error] ** ERROR -> OPEN [pager_unlock] ** ** ** OPEN: ** ** The pager starts up in this state. Nothing is guaranteed in this ** state - the file may or may not be locked and the database size is ** unknown. The database may not be read or written. ** ** * No read or write transaction is active. ** * Any lock, or no lock at all, may be held on the database file. ** * The dbSize, dbOrigSize and dbFileSize variables may not be trusted. ** ** READER: ** ** In this state all the requirements for reading the database in ** rollback (non-WAL) mode are met. Unless the pager is (or recently ** was) in exclusive-locking mode, a user-level read transaction is ** open. The database size is known in this state. ** ** A connection running with locking_mode=normal enters this state when ** it opens a read-transaction on the database and returns to state ** OPEN after the read-transaction is completed. However a connection ** running in locking_mode=exclusive (including temp databases) remains in ** this state even after the read-transaction is closed. The only way ** a locking_mode=exclusive connection can transition from READER to OPEN ** is via the ERROR state (see below). ** ** * A read transaction may be active (but a write-transaction cannot). ** * A SHARED or greater lock is held on the database file. ** * The dbSize variable may be trusted (even if a user-level read ** transaction is not active). The dbOrigSize and dbFileSize variables ** may not be trusted at this point. ** * If the database is a WAL database, then the WAL connection is open. ** * Even if a read-transaction is not open, it is guaranteed that ** there is no hot-journal in the file-system. ** ** WRITER_LOCKED: ** ** The pager moves to this state from READER when a write-transaction ** is first opened on the database. In WRITER_LOCKED state, all locks ** required to start a write-transaction are held, but no actual ** modifications to the cache or database have taken place. ** ** In rollback mode, a RESERVED or (if the transaction was opened with ** BEGIN EXCLUSIVE) EXCLUSIVE lock is obtained on the database file when ** moving to this state, but the journal file is not written to or opened ** to in this state. If the transaction is committed or rolled back while ** in WRITER_LOCKED state, all that is required is to unlock the database ** file. ** ** IN WAL mode, WalBeginWriteTransaction() is called to lock the log file. ** If the connection is running with locking_mode=exclusive, an attempt ** is made to obtain an EXCLUSIVE lock on the database file. ** ** * A write transaction is active. ** * If the connection is open in rollback-mode, a RESERVED or greater ** lock is held on the database file. ** * If the connection is open in WAL-mode, a WAL write transaction ** is open (i.e. sqlite3WalBeginWriteTransaction() has been successfully ** called). ** * The dbSize, dbOrigSize and dbFileSize variables are all valid. ** * The contents of the pager cache have not been modified. ** * The journal file may or may not be open. ** * Nothing (not even the first header) has been written to the journal. ** ** WRITER_CACHEMOD: ** ** A pager moves from WRITER_LOCKED state to this state when a page is ** first modified by the upper layer. In rollback mode the journal file ** is opened (if it is not already open) and a header written to the ** start of it. The database file on disk has not been modified. ** ** * A write transaction is active. ** * A RESERVED or greater lock is held on the database file. ** * The journal file is open and the first header has been written ** to it, but the header has not been synced to disk. ** * The contents of the page cache have been modified. ** ** WRITER_DBMOD: ** ** The pager transitions from WRITER_CACHEMOD into WRITER_DBMOD state ** when it modifies the contents of the database file. WAL connections ** never enter this state (since they do not modify the database file, ** just the log file). ** ** * A write transaction is active. ** * An EXCLUSIVE or greater lock is held on the database file. ** * The journal file is open and the first header has been written ** and synced to disk. ** * The contents of the page cache have been modified (and possibly ** written to disk). ** ** WRITER_FINISHED: ** ** It is not possible for a WAL connection to enter this state. ** ** A rollback-mode pager changes to WRITER_FINISHED state from WRITER_DBMOD ** state after the entire transaction has been successfully written into the ** database file. In this state the transaction may be committed simply ** by finalizing the journal file. Once in WRITER_FINISHED state, it is ** not possible to modify the database further. At this point, the upper ** layer must either commit or rollback the transaction. ** ** * A write transaction is active. ** * An EXCLUSIVE or greater lock is held on the database file. ** * All writing and syncing of journal and database data has finished. ** If no error occurred, all that remains is to finalize the journal to ** commit the transaction. If an error did occur, the caller will need ** to rollback the transaction. ** ** ERROR: ** ** The ERROR state is entered when an IO or disk-full error (including ** SQLITE_IOERR_NOMEM) occurs at a point in the code that makes it ** difficult to be sure that the in-memory pager state (cache contents, ** db size etc.) are consistent with the contents of the file-system. ** ** Temporary pager files may enter the ERROR state, but in-memory pagers ** cannot. ** ** For example, if an IO error occurs while performing a rollback, ** the contents of the page-cache may be left in an inconsistent state. ** At this point it would be dangerous to change back to READER state ** (as usually happens after a rollback). Any subsequent readers might ** report database corruption (due to the inconsistent cache), and if ** they upgrade to writers, they may inadvertently corrupt the database ** file. To avoid this hazard, the pager switches into the ERROR state ** instead of READER following such an error. ** ** Once it has entered the ERROR state, any attempt to use the pager ** to read or write data returns an error. Eventually, once all ** outstanding transactions have been abandoned, the pager is able to ** transition back to OPEN state, discarding the contents of the ** page-cache and any other in-memory state at the same time. Everything ** is reloaded from disk (and, if necessary, hot-journal rollback peformed) ** when a read-transaction is next opened on the pager (transitioning ** the pager into READER state). At that point the system has recovered ** from the error. ** ** Specifically, the pager jumps into the ERROR state if: ** ** 1. An error occurs while attempting a rollback. This happens in ** function sqlite3PagerRollback(). ** ** 2. An error occurs while attempting to finalize a journal file ** following a commit in function sqlite3PagerCommitPhaseTwo(). ** ** 3. An error occurs while attempting to write to the journal or ** database file in function pagerStress() in order to free up ** memory. ** ** In other cases, the error is returned to the b-tree layer. The b-tree ** layer then attempts a rollback operation. If the error condition ** persists, the pager enters the ERROR state via condition (1) above. ** ** Condition (3) is necessary because it can be triggered by a read-only ** statement executed within a transaction. In this case, if the error ** code were simply returned to the user, the b-tree layer would not ** automatically attempt a rollback, as it assumes that an error in a ** read-only statement cannot leave the pager in an internally inconsistent ** state. ** ** * The Pager.errCode variable is set to something other than SQLITE_OK. ** * There are one or more outstanding references to pages (after the ** last reference is dropped the pager should move back to OPEN state). ** * The pager is not an in-memory pager. ** ** ** Notes: ** ** * A pager is never in WRITER_DBMOD or WRITER_FINISHED state if the ** connection is open in WAL mode. A WAL connection is always in one ** of the first four states. ** ** * Normally, a connection open in exclusive mode is never in PAGER_OPEN ** state. There are two exceptions: immediately after exclusive-mode has ** been turned on (and before any read or write transactions are ** executed), and when the pager is leaving the "error state". ** ** * See also: assert_pager_state(). */ #define PAGER_OPEN 0 #define PAGER_READER 1 #define PAGER_WRITER_LOCKED 2 #define PAGER_WRITER_CACHEMOD 3 #define PAGER_WRITER_DBMOD 4 #define PAGER_WRITER_FINISHED 5 #define PAGER_ERROR 6 /* ** The Pager.eLock variable is almost always set to one of the ** following locking-states, according to the lock currently held on ** the database file: NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK. ** This variable is kept up to date as locks are taken and released by ** the pagerLockDb() and pagerUnlockDb() wrappers. ** ** If the VFS xLock() or xUnlock() returns an error other than SQLITE_BUSY ** (i.e. one of the SQLITE_IOERR subtypes), it is not clear whether or not ** the operation was successful. In these circumstances pagerLockDb() and ** pagerUnlockDb() take a conservative approach - eLock is always updated ** when unlocking the file, and only updated when locking the file if the ** VFS call is successful. This way, the Pager.eLock variable may be set ** to a less exclusive (lower) value than the lock that is actually held ** at the system level, but it is never set to a more exclusive value. ** ** This is usually safe. If an xUnlock fails or appears to fail, there may ** be a few redundant xLock() calls or a lock may be held for longer than ** required, but nothing really goes wrong. ** ** The exception is when the database file is unlocked as the pager moves ** from ERROR to OPEN state. At this point there may be a hot-journal file ** in the file-system that needs to be rolled back (as part of an OPEN->SHARED ** transition, by the same pager or any other). If the call to xUnlock() ** fails at this point and the pager is left holding an EXCLUSIVE lock, this ** can confuse the call to xCheckReservedLock() call made later as part ** of hot-journal detection. ** ** xCheckReservedLock() is defined as returning true "if there is a RESERVED ** lock held by this process or any others". So xCheckReservedLock may ** return true because the caller itself is holding an EXCLUSIVE lock (but ** doesn't know it because of a previous error in xUnlock). If this happens ** a hot-journal may be mistaken for a journal being created by an active ** transaction in another process, causing SQLite to read from the database ** without rolling it back. ** ** To work around this, if a call to xUnlock() fails when unlocking the ** database in the ERROR state, Pager.eLock is set to UNKNOWN_LOCK. It ** is only changed back to a real locking state after a successful call ** to xLock(EXCLUSIVE). Also, the code to do the OPEN->SHARED state transition ** omits the check for a hot-journal if Pager.eLock is set to UNKNOWN_LOCK ** lock. Instead, it assumes a hot-journal exists and obtains an EXCLUSIVE ** lock on the database file before attempting to roll it back. See function ** PagerSharedLock() for more detail. ** ** Pager.eLock may only be set to UNKNOWN_LOCK when the pager is in ** PAGER_OPEN state. */ #define UNKNOWN_LOCK (EXCLUSIVE_LOCK+1) /* ** A macro used for invoking the codec if there is one */ #ifdef SQLITE_HAS_CODEC # define CODEC1(P,D,N,X,E) \ if( P->xCodec && P->xCodec(P->pCodec,D,N,X)==0 ){ E; } # define CODEC2(P,D,N,X,E,O) \ if( P->xCodec==0 ){ O=(char*)D; }else \ if( (O=(char*)(P->xCodec(P->pCodec,D,N,X)))==0 ){ E; } #else # define CODEC1(P,D,N,X,E) /* NO-OP */ # define CODEC2(P,D,N,X,E,O) O=(char*)D #endif /* ** The maximum allowed sector size. 64KiB. If the xSectorsize() method ** returns a value larger than this, then MAX_SECTOR_SIZE is used instead. ** This could conceivably cause corruption following a power failure on ** such a system. This is currently an undocumented limit. */ #define MAX_SECTOR_SIZE 0x10000 /* ** An instance of the following structure is allocated for each active ** savepoint and statement transaction in the system. All such structures ** are stored in the Pager.aSavepoint[] array, which is allocated and ** resized using sqlite3Realloc(). ** ** When a savepoint is created, the PagerSavepoint.iHdrOffset field is ** set to 0. If a journal-header is written into the main journal while ** the savepoint is active, then iHdrOffset is set to the byte offset ** immediately following the last journal record written into the main ** journal before the journal-header. This is required during savepoint ** rollback (see pagerPlaybackSavepoint()). */ typedef struct PagerSavepoint PagerSavepoint; struct PagerSavepoint { i64 iOffset; /* Starting offset in main journal */ i64 iHdrOffset; /* See above */ Bitvec *pInSavepoint; /* Set of pages in this savepoint */ Pgno nOrig; /* Original number of pages in file */ Pgno iSubRec; /* Index of first record in sub-journal */ #ifndef SQLITE_OMIT_WAL u32 aWalData[WAL_SAVEPOINT_NDATA]; /* WAL savepoint context */ #endif }; /* ** Bits of the Pager.doNotSpill flag. See further description below. */ #define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ #define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ #define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ /* ** An open page cache is an instance of struct Pager. A description of ** some of the more important member variables follows: ** ** eState ** ** The current 'state' of the pager object. See the comment and state ** diagram above for a description of the pager state. ** ** eLock ** ** For a real on-disk database, the current lock held on the database file - ** NO_LOCK, SHARED_LOCK, RESERVED_LOCK or EXCLUSIVE_LOCK. ** ** For a temporary or in-memory database (neither of which require any ** locks), this variable is always set to EXCLUSIVE_LOCK. Since such ** databases always have Pager.exclusiveMode==1, this tricks the pager ** logic into thinking that it already has all the locks it will ever ** need (and no reason to release them). ** ** In some (obscure) circumstances, this variable may also be set to ** UNKNOWN_LOCK. See the comment above the #define of UNKNOWN_LOCK for ** details. ** ** changeCountDone ** ** This boolean variable is used to make sure that the change-counter ** (the 4-byte header field at byte offset 24 of the database file) is ** not updated more often than necessary. ** ** It is set to true when the change-counter field is updated, which ** can only happen if an exclusive lock is held on the database file. ** It is cleared (set to false) whenever an exclusive lock is ** relinquished on the database file. Each time a transaction is committed, ** The changeCountDone flag is inspected. If it is true, the work of ** updating the change-counter is omitted for the current transaction. ** ** This mechanism means that when running in exclusive mode, a connection ** need only update the change-counter once, for the first transaction ** committed. ** ** setMaster ** ** When PagerCommitPhaseOne() is called to commit a transaction, it may ** (or may not) specify a master-journal name to be written into the ** journal file before it is synced to disk. ** ** Whether or not a journal file contains a master-journal pointer affects ** the way in which the journal file is finalized after the transaction is ** committed or rolled back when running in "journal_mode=PERSIST" mode. ** If a journal file does not contain a master-journal pointer, it is ** finalized by overwriting the first journal header with zeroes. If ** it does contain a master-journal pointer the journal file is finalized ** by truncating it to zero bytes, just as if the connection were ** running in "journal_mode=truncate" mode. ** ** Journal files that contain master journal pointers cannot be finalized ** simply by overwriting the first journal-header with zeroes, as the ** master journal pointer could interfere with hot-journal rollback of any ** subsequently interrupted transaction that reuses the journal file. ** ** The flag is cleared as soon as the journal file is finalized (either ** by PagerCommitPhaseTwo or PagerRollback). If an IO error prevents the ** journal file from being successfully finalized, the setMaster flag ** is cleared anyway (and the pager will move to ERROR state). ** ** doNotSpill ** ** This variables control the behavior of cache-spills (calls made by ** the pcache module to the pagerStress() routine to write cached data ** to the file-system in order to free up memory). ** ** When bits SPILLFLAG_OFF or SPILLFLAG_ROLLBACK of doNotSpill are set, ** writing to the database from pagerStress() is disabled altogether. ** The SPILLFLAG_ROLLBACK case is done in a very obscure case that ** comes up during savepoint rollback that requires the pcache module ** to allocate a new page to prevent the journal file from being written ** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF ** case is a user preference. ** ** If the SPILLFLAG_NOSYNC bit is set, writing to the database from ** pagerStress() is permitted, but syncing the journal file is not. ** This flag is set by sqlite3PagerWrite() when the file-system sector-size ** is larger than the database page-size in order to prevent a journal sync ** from happening in between the journalling of two pages on the same sector. ** ** subjInMemory ** ** This is a boolean variable. If true, then any required sub-journal ** is opened as an in-memory journal file. If false, then in-memory ** sub-journals are only used for in-memory pager files. ** ** This variable is updated by the upper layer each time a new ** write-transaction is opened. ** ** dbSize, dbOrigSize, dbFileSize ** ** Variable dbSize is set to the number of pages in the database file. ** It is valid in PAGER_READER and higher states (all states except for ** OPEN and ERROR). ** ** dbSize is set based on the size of the database file, which may be ** larger than the size of the database (the value stored at offset ** 28 of the database header by the btree). If the size of the file ** is not an integer multiple of the page-size, the value stored in ** dbSize is rounded down (i.e. a 5KB file with 2K page-size has dbSize==2). ** Except, any file that is greater than 0 bytes in size is considered ** to have at least one page. (i.e. a 1KB file with 2K page-size leads ** to dbSize==1). ** ** During a write-transaction, if pages with page-numbers greater than ** dbSize are modified in the cache, dbSize is updated accordingly. ** Similarly, if the database is truncated using PagerTruncateImage(), ** dbSize is updated. ** ** Variables dbOrigSize and dbFileSize are valid in states ** PAGER_WRITER_LOCKED and higher. dbOrigSize is a copy of the dbSize ** variable at the start of the transaction. It is used during rollback, ** and to determine whether or not pages need to be journalled before ** being modified. ** ** Throughout a write-transaction, dbFileSize contains the size of ** the file on disk in pages. It is set to a copy of dbSize when the ** write-transaction is first opened, and updated when VFS calls are made ** to write or truncate the database file on disk. ** ** The only reason the dbFileSize variable is required is to suppress ** unnecessary calls to xTruncate() after committing a transaction. If, ** when a transaction is committed, the dbFileSize variable indicates ** that the database file is larger than the database image (Pager.dbSize), ** pager_truncate() is called. The pager_truncate() call uses xFilesize() ** to measure the database file on disk, and then truncates it if required. ** dbFileSize is not used when rolling back a transaction. In this case ** pager_truncate() is called unconditionally (which means there may be ** a call to xFilesize() that is not strictly required). In either case, ** pager_truncate() may cause the file to become smaller or larger. ** ** dbHintSize ** ** The dbHintSize variable is used to limit the number of calls made to ** the VFS xFileControl(FCNTL_SIZE_HINT) method. ** ** dbHintSize is set to a copy of the dbSize variable when a ** write-transaction is opened (at the same time as dbFileSize and ** dbOrigSize). If the xFileControl(FCNTL_SIZE_HINT) method is called, ** dbHintSize is increased to the number of pages that correspond to the ** size-hint passed to the method call. See pager_write_pagelist() for ** details. ** ** errCode ** ** The Pager.errCode variable is only ever used in PAGER_ERROR state. It ** is set to zero in all other states. In PAGER_ERROR state, Pager.errCode ** is always set to SQLITE_FULL, SQLITE_IOERR or one of the SQLITE_IOERR_XXX ** sub-codes. */ struct Pager { sqlite3_vfs *pVfs; /* OS functions to use for IO */ u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */ u8 useJournal; /* Use a rollback journal on this file */ u8 noSync; /* Do not sync the journal if true */ u8 fullSync; /* Do extra syncs of the journal for robustness */ u8 ckptSyncFlags; /* SYNC_NORMAL or SYNC_FULL for checkpoint */ u8 walSyncFlags; /* SYNC_NORMAL or SYNC_FULL for wal writes */ u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */ u8 tempFile; /* zFilename is a temporary or immutable file */ u8 noLock; /* Do not lock (except in WAL mode) */ u8 readOnly; /* True for a read-only database */ u8 memDb; /* True to inhibit all file I/O */ /************************************************************************** ** The following block contains those class members that change during ** routine operation. Class members not in this block are either fixed ** when the pager is first created or else only change when there is a ** significant mode change (such as changing the page_size, locking_mode, ** or the journal_mode). From another view, these class members describe ** the "state" of the pager, while other class members describe the ** "configuration" of the pager. */ u8 eState; /* Pager state (OPEN, READER, WRITER_LOCKED..) */ u8 eLock; /* Current lock held on database file */ u8 changeCountDone; /* Set after incrementing the change-counter */ u8 setMaster; /* True if a m-j name has been written to jrnl */ u8 doNotSpill; /* Do not spill the cache when non-zero */ u8 subjInMemory; /* True to use in-memory sub-journals */ u8 bUseFetch; /* True to use xFetch() */ u8 hasHeldSharedLock; /* True if a shared lock has ever been held */ Pgno dbSize; /* Number of pages in the database */ Pgno dbOrigSize; /* dbSize before the current transaction */ Pgno dbFileSize; /* Number of pages in the database file */ Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */ int errCode; /* One of several kinds of errors */ int nRec; /* Pages journalled since last j-header written */ u32 cksumInit; /* Quasi-random value added to every checksum */ u32 nSubRec; /* Number of records written to sub-journal */ Bitvec *pInJournal; /* One bit for each page in the database file */ sqlite3_file *fd; /* File descriptor for database */ sqlite3_file *jfd; /* File descriptor for main journal */ sqlite3_file *sjfd; /* File descriptor for sub-journal */ i64 journalOff; /* Current write offset in the journal file */ i64 journalHdr; /* Byte offset to previous journal header */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ PagerSavepoint *aSavepoint; /* Array of active savepoints */ int nSavepoint; /* Number of elements in aSavepoint[] */ u32 iDataVersion; /* Changes whenever database content changes */ char dbFileVers[16]; /* Changes whenever database file changes */ int nMmapOut; /* Number of mmap pages currently outstanding */ sqlite3_int64 szMmap; /* Desired maximum mmap size */ PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */ /* ** End of the routinely-changing class members ***************************************************************************/ u16 nExtra; /* Add this many bytes to each in-memory page */ i16 nReserve; /* Number of unused bytes at end of each page */ u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ u32 sectorSize; /* Assumed sector size during rollback */ int pageSize; /* Number of bytes in a page */ Pgno mxPgno; /* Maximum allowed size of the database */ i64 journalSizeLimit; /* Size limit for persistent journal files */ char *zFilename; /* Name of the database file */ char *zJournal; /* Name of the journal file */ int (*xBusyHandler)(void*); /* Function to call when busy */ void *pBusyHandlerArg; /* Context argument for xBusyHandler */ int aStat[3]; /* Total cache hits, misses and writes */ #ifdef SQLITE_TEST int nRead; /* Database pages read */ #endif void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */ #ifdef SQLITE_HAS_CODEC void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */ void (*xCodecFree)(void*); /* Destructor for the codec */ void *pCodec; /* First argument to xCodec... methods */ #endif char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ PCache *pPCache; /* Pointer to page cache object */ #ifndef SQLITE_OMIT_WAL Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ char *zWal; /* File name for write-ahead log */ #endif }; /* ** Indexes for use with Pager.aStat[]. The Pager.aStat[] array contains ** the values accessed by passing SQLITE_DBSTATUS_CACHE_HIT, CACHE_MISS ** or CACHE_WRITE to sqlite3_db_status(). */ #define PAGER_STAT_HIT 0 #define PAGER_STAT_MISS 1 #define PAGER_STAT_WRITE 2 /* ** The following global variables hold counters used for ** testing purposes only. These variables do not exist in ** a non-testing build. These variables are not thread-safe. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */ SQLITE_PRIVATE int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */ SQLITE_PRIVATE int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */ # define PAGER_INCR(v) v++ #else # define PAGER_INCR(v) #endif /* ** Journal files begin with the following magic string. The data ** was obtained from /dev/random. It is used only as a sanity check. ** ** Since version 2.8.0, the journal format contains additional sanity ** checking information. If the power fails while the journal is being ** written, semi-random garbage data might appear in the journal ** file after power is restored. If an attempt is then made ** to roll the journal back, the database could be corrupted. The additional ** sanity checking data is an attempt to discover the garbage in the ** journal and ignore it. ** ** The sanity checking information for the new journal format consists ** of a 32-bit checksum on each page of data. The checksum covers both ** the page number and the pPager->pageSize bytes of data for the page. ** This cksum is initialized to a 32-bit random value that appears in the ** journal file right after the header. The random initializer is important, ** because garbage data that appears at the end of a journal is likely ** data that was once in other files that have now been deleted. If the ** garbage data came from an obsolete journal file, the checksums might ** be correct. But by initializing the checksum to random value which ** is different for every journal, we minimize that risk. */ static const unsigned char aJournalMagic[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7, }; /* ** The size of the of each page record in the journal is given by ** the following macro. */ #define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8) /* ** The journal header size for this pager. This is usually the same ** size as a single disk sector. See also setSectorSize(). */ #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize) /* ** The macro USEFETCH is true if we are allowed to use the xFetch and xUnfetch ** interfaces to access the database using memory-mapped I/O. */ #if SQLITE_MAX_MMAP_SIZE>0 # define USEFETCH(x) ((x)->bUseFetch) #else # define USEFETCH(x) 0 #endif /* ** The maximum legal page number is (2^31 - 1). */ #define PAGER_MAX_PGNO 2147483647 /* ** The argument to this macro is a file descriptor (type sqlite3_file*). ** Return 0 if it is not open, or non-zero (but not 1) if it is. ** ** This is so that expressions can be written as: ** ** if( isOpen(pPager->jfd) ){ ... ** ** instead of ** ** if( pPager->jfd->pMethods ){ ... */ #define isOpen(pFd) ((pFd)->pMethods!=0) /* ** Return true if this pager uses a write-ahead log instead of the usual ** rollback journal. Otherwise false. */ #ifndef SQLITE_OMIT_WAL static int pagerUseWal(Pager *pPager){ return (pPager->pWal!=0); } #else # define pagerUseWal(x) 0 # define pagerRollbackWal(x) 0 # define pagerWalFrames(v,w,x,y) 0 # define pagerOpenWalIfPresent(z) SQLITE_OK # define pagerBeginReadTransaction(z) SQLITE_OK #endif #ifndef NDEBUG /* ** Usage: ** ** assert( assert_pager_state(pPager) ); ** ** This function runs many asserts to try to find inconsistencies in ** the internal state of the Pager object. */ static int assert_pager_state(Pager *p){ Pager *pPager = p; /* State must be valid. */ assert( p->eState==PAGER_OPEN || p->eState==PAGER_READER || p->eState==PAGER_WRITER_LOCKED || p->eState==PAGER_WRITER_CACHEMOD || p->eState==PAGER_WRITER_DBMOD || p->eState==PAGER_WRITER_FINISHED || p->eState==PAGER_ERROR ); /* Regardless of the current state, a temp-file connection always behaves ** as if it has an exclusive lock on the database file. It never updates ** the change-counter field, so the changeCountDone flag is always set. */ assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK ); assert( p->tempFile==0 || pPager->changeCountDone ); /* If the useJournal flag is clear, the journal-mode must be "OFF". ** And if the journal-mode is "OFF", the journal file must not be open. */ assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal ); assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) ); /* If changeCountDone is set, a RESERVED lock or greater must be held ** on the file. */ assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK ); assert( p->eLock!=PENDING_LOCK ); switch( p->eState ){ case PAGER_OPEN: assert( pPager->errCode==SQLITE_OK ); assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile ); break; case PAGER_READER: assert( pPager->errCode==SQLITE_OK ); assert( p->eLock!=UNKNOWN_LOCK ); assert( p->eLock>=SHARED_LOCK ); break; case PAGER_WRITER_LOCKED: assert( p->eLock!=UNKNOWN_LOCK ); assert( pPager->errCode==SQLITE_OK ); if( !pagerUseWal(pPager) ){ assert( p->eLock>=RESERVED_LOCK ); } assert( pPager->dbSize==pPager->dbOrigSize ); assert( pPager->dbOrigSize==pPager->dbFileSize ); assert( pPager->dbOrigSize==pPager->dbHintSize ); assert( pPager->setMaster==0 ); break; case PAGER_WRITER_CACHEMOD: assert( p->eLock!=UNKNOWN_LOCK ); assert( pPager->errCode==SQLITE_OK ); if( !pagerUseWal(pPager) ){ /* It is possible that if journal_mode=wal here that neither the ** journal file nor the WAL file are open. This happens during ** a rollback transaction that switches from journal_mode=off ** to journal_mode=wal. */ assert( p->eLock>=RESERVED_LOCK ); assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL ); } assert( pPager->dbOrigSize==pPager->dbFileSize ); assert( pPager->dbOrigSize==pPager->dbHintSize ); break; case PAGER_WRITER_DBMOD: assert( p->eLock==EXCLUSIVE_LOCK ); assert( pPager->errCode==SQLITE_OK ); assert( !pagerUseWal(pPager) ); assert( p->eLock>=EXCLUSIVE_LOCK ); assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL ); assert( pPager->dbOrigSize<=pPager->dbHintSize ); break; case PAGER_WRITER_FINISHED: assert( p->eLock==EXCLUSIVE_LOCK ); assert( pPager->errCode==SQLITE_OK ); assert( !pagerUseWal(pPager) ); assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL ); break; case PAGER_ERROR: /* There must be at least one outstanding reference to the pager if ** in ERROR state. Otherwise the pager should have already dropped ** back to OPEN state. */ assert( pPager->errCode!=SQLITE_OK ); assert( sqlite3PcacheRefCount(pPager->pPCache)>0 ); break; } return 1; } #endif /* ifndef NDEBUG */ #ifdef SQLITE_DEBUG /* ** Return a pointer to a human readable string in a static buffer ** containing the state of the Pager object passed as an argument. This ** is intended to be used within debuggers. For example, as an alternative ** to "print *pPager" in gdb: ** ** (gdb) printf "%s", print_pager_state(pPager) */ static char *print_pager_state(Pager *p){ static char zRet[1024]; sqlite3_snprintf(1024, zRet, "Filename: %s\n" "State: %s errCode=%d\n" "Lock: %s\n" "Locking mode: locking_mode=%s\n" "Journal mode: journal_mode=%s\n" "Backing store: tempFile=%d memDb=%d useJournal=%d\n" "Journal: journalOff=%lld journalHdr=%lld\n" "Size: dbsize=%d dbOrigSize=%d dbFileSize=%d\n" , p->zFilename , p->eState==PAGER_OPEN ? "OPEN" : p->eState==PAGER_READER ? "READER" : p->eState==PAGER_WRITER_LOCKED ? "WRITER_LOCKED" : p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" : p->eState==PAGER_WRITER_DBMOD ? "WRITER_DBMOD" : p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" : p->eState==PAGER_ERROR ? "ERROR" : "?error?" , (int)p->errCode , p->eLock==NO_LOCK ? "NO_LOCK" : p->eLock==RESERVED_LOCK ? "RESERVED" : p->eLock==EXCLUSIVE_LOCK ? "EXCLUSIVE" : p->eLock==SHARED_LOCK ? "SHARED" : p->eLock==UNKNOWN_LOCK ? "UNKNOWN" : "?error?" , p->exclusiveMode ? "exclusive" : "normal" , p->journalMode==PAGER_JOURNALMODE_MEMORY ? "memory" : p->journalMode==PAGER_JOURNALMODE_OFF ? "off" : p->journalMode==PAGER_JOURNALMODE_DELETE ? "delete" : p->journalMode==PAGER_JOURNALMODE_PERSIST ? "persist" : p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" : p->journalMode==PAGER_JOURNALMODE_WAL ? "wal" : "?error?" , (int)p->tempFile, (int)p->memDb, (int)p->useJournal , p->journalOff, p->journalHdr , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize ); return zRet; } #endif /* ** Return true if it is necessary to write page *pPg into the sub-journal. ** A page needs to be written into the sub-journal if there exists one ** or more open savepoints for which: ** ** * The page-number is less than or equal to PagerSavepoint.nOrig, and ** * The bit corresponding to the page-number is not set in ** PagerSavepoint.pInSavepoint. */ static int subjRequiresPage(PgHdr *pPg){ Pager *pPager = pPg->pPager; PagerSavepoint *p; Pgno pgno = pPg->pgno; int i; for(i=0; inSavepoint; i++){ p = &pPager->aSavepoint[i]; if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ return 1; } } return 0; } #ifdef SQLITE_DEBUG /* ** Return true if the page is already in the journal file. */ static int pageInJournal(Pager *pPager, PgHdr *pPg){ return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); } #endif /* ** Read a 32-bit integer from the given file descriptor. Store the integer ** that is read in *pRes. Return SQLITE_OK if everything worked, or an ** error code is something goes wrong. ** ** All values are stored on disk as big-endian. */ static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ unsigned char ac[4]; int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); if( rc==SQLITE_OK ){ *pRes = sqlite3Get4byte(ac); } return rc; } /* ** Write a 32-bit integer into a string buffer in big-endian byte order. */ #define put32bits(A,B) sqlite3Put4byte((u8*)A,B) /* ** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK ** on success or an error code is something goes wrong. */ static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ char ac[4]; put32bits(ac, val); return sqlite3OsWrite(fd, ac, 4, offset); } /* ** Unlock the database file to level eLock, which must be either NO_LOCK ** or SHARED_LOCK. Regardless of whether or not the call to xUnlock() ** succeeds, set the Pager.eLock variable to match the (attempted) new lock. ** ** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is ** called, do not modify it. See the comment above the #define of ** UNKNOWN_LOCK for an explanation of this. */ static int pagerUnlockDb(Pager *pPager, int eLock){ int rc = SQLITE_OK; assert( !pPager->exclusiveMode || pPager->eLock==eLock ); assert( eLock==NO_LOCK || eLock==SHARED_LOCK ); assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 ); if( isOpen(pPager->fd) ){ assert( pPager->eLock>=eLock ); rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock); if( pPager->eLock!=UNKNOWN_LOCK ){ pPager->eLock = (u8)eLock; } IOTRACE(("UNLOCK %p %d\n", pPager, eLock)) } return rc; } /* ** Lock the database file to level eLock, which must be either SHARED_LOCK, ** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the ** Pager.eLock variable to the new locking state. ** ** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is ** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. ** See the comment above the #define of UNKNOWN_LOCK for an explanation ** of this. */ static int pagerLockDb(Pager *pPager, int eLock){ int rc = SQLITE_OK; assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK ); if( pPager->eLockeLock==UNKNOWN_LOCK ){ rc = pPager->noLock ? SQLITE_OK : sqlite3OsLock(pPager->fd, eLock); if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){ pPager->eLock = (u8)eLock; IOTRACE(("LOCK %p %d\n", pPager, eLock)) } } return rc; } /* ** This function determines whether or not the atomic-write optimization ** can be used with this pager. The optimization can be used if: ** ** (a) the value returned by OsDeviceCharacteristics() indicates that ** a database page may be written atomically, and ** (b) the value returned by OsSectorSize() is less than or equal ** to the page size. ** ** The optimization is also always enabled for temporary files. It is ** an error to call this function if pPager is opened on an in-memory ** database. ** ** If the optimization cannot be used, 0 is returned. If it can be used, ** then the value returned is the size of the journal file when it ** contains rollback data for exactly one page. */ #ifdef SQLITE_ENABLE_ATOMIC_WRITE static int jrnlBufferSize(Pager *pPager){ if( !pPager->tempFile ){ int dc; /* Device characteristics */ int nSector; /* Sector size */ int szPage; /* Page size */ assert( isOpen(pPager->fd) ); dc = sqlite3OsDeviceCharacteristics(pPager->fd); nSector = pPager->sectorSize; szPage = pPager->pageSize; assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){ return 0; } } return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); } #endif /* ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking ** on the cache using a hash function. This is used for testing ** and debugging only. */ #ifdef SQLITE_CHECK_PAGES /* ** Return a 32-bit hash of the page data for pPage. */ static u32 pager_datahash(int nByte, unsigned char *pData){ u32 hash = 0; int i; for(i=0; ipPager->pageSize, (unsigned char *)pPage->pData); } static void pager_set_pagehash(PgHdr *pPage){ pPage->pageHash = pager_pagehash(pPage); } /* ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES ** is defined, and NDEBUG is not defined, an assert() statement checks ** that the page is either dirty or still matches the calculated page-hash. */ #define CHECK_PAGE(x) checkPage(x) static void checkPage(PgHdr *pPg){ Pager *pPager = pPg->pPager; assert( pPager->eState!=PAGER_ERROR ); assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) ); } #else #define pager_datahash(X,Y) 0 #define pager_pagehash(X) 0 #define pager_set_pagehash(X) #define CHECK_PAGE(x) #endif /* SQLITE_CHECK_PAGES */ /* ** When this is called the journal file for pager pPager must be open. ** This function attempts to read a master journal file name from the ** end of the file and, if successful, copies it into memory supplied ** by the caller. See comments above writeMasterJournal() for the format ** used to store a master journal file name at the end of a journal file. ** ** zMaster must point to a buffer of at least nMaster bytes allocated by ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is ** enough space to write the master journal name). If the master journal ** name in the journal is longer than nMaster bytes (including a ** nul-terminator), then this is handled as if no master journal name ** were present in the journal. ** ** If a master journal file name is present at the end of the journal ** file, then it is copied into the buffer pointed to by zMaster. A ** nul-terminator byte is appended to the buffer following the master ** journal file name. ** ** If it is determined that no master journal file name is present ** zMaster[0] is set to 0 and SQLITE_OK returned. ** ** If an error occurs while reading from the journal file, an SQLite ** error code is returned. */ static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){ int rc; /* Return code */ u32 len; /* Length in bytes of master journal name */ i64 szJ; /* Total size in bytes of journal file pJrnl */ u32 cksum; /* MJ checksum value read from journal */ u32 u; /* Unsigned loop counter */ unsigned char aMagic[8]; /* A buffer to hold the magic header */ zMaster[0] = '\0'; if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ)) || szJ<16 || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len)) || len>=nMaster || len==0 || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum)) || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8)) || memcmp(aMagic, aJournalMagic, 8) || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len)) ){ return rc; } /* See if the checksum matches the master journal name */ for(u=0; ujournalOff, assuming a sector ** size of pPager->sectorSize bytes. ** ** i.e for a sector size of 512: ** ** Pager.journalOff Return value ** --------------------------------------- ** 0 0 ** 512 512 ** 100 512 ** 2000 2048 ** */ static i64 journalHdrOffset(Pager *pPager){ i64 offset = 0; i64 c = pPager->journalOff; if( c ){ offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager); } assert( offset%JOURNAL_HDR_SZ(pPager)==0 ); assert( offset>=c ); assert( (offset-c)jfd) ); if( pPager->journalOff ){ const i64 iLimit = pPager->journalSizeLimit; /* Local cache of jsl */ IOTRACE(("JZEROHDR %p\n", pPager)) if( doTruncate || iLimit==0 ){ rc = sqlite3OsTruncate(pPager->jfd, 0); }else{ static const char zeroHdr[28] = {0}; rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0); } if( rc==SQLITE_OK && !pPager->noSync ){ rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags); } /* At this point the transaction is committed but the write lock ** is still held on the file. If there is a size limit configured for ** the persistent journal and the journal file currently consumes more ** space than that limit allows for, truncate it now. There is no need ** to sync the file following this operation. */ if( rc==SQLITE_OK && iLimit>0 ){ i64 sz; rc = sqlite3OsFileSize(pPager->jfd, &sz); if( rc==SQLITE_OK && sz>iLimit ){ rc = sqlite3OsTruncate(pPager->jfd, iLimit); } } } return rc; } /* ** The journal file must be open when this routine is called. A journal ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the ** current location. ** ** The format for the journal header is as follows: ** - 8 bytes: Magic identifying journal format. ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on. ** - 4 bytes: Random number used for page hash. ** - 4 bytes: Initial database page count. ** - 4 bytes: Sector size used by the process that wrote this journal. ** - 4 bytes: Database page size. ** ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space. */ static int writeJournalHdr(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ char *zHeader = pPager->pTmpSpace; /* Temporary space used to build header */ u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */ u32 nWrite; /* Bytes of header sector written */ int ii; /* Loop counter */ assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ if( nHeader>JOURNAL_HDR_SZ(pPager) ){ nHeader = JOURNAL_HDR_SZ(pPager); } /* If there are active savepoints and any of them were created ** since the most recent journal header was written, update the ** PagerSavepoint.iHdrOffset fields now. */ for(ii=0; iinSavepoint; ii++){ if( pPager->aSavepoint[ii].iHdrOffset==0 ){ pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff; } } pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager); /* ** Write the nRec Field - the number of page records that follow this ** journal header. Normally, zero is written to this value at this time. ** After the records are added to the journal (and the journal synced, ** if in full-sync mode), the zero is overwritten with the true number ** of records (see syncJournal()). ** ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When ** reading the journal this value tells SQLite to assume that the ** rest of the journal file contains valid page records. This assumption ** is dangerous, as if a failure occurred whilst writing to the journal ** file it may contain some garbage data. There are two scenarios ** where this risk can be ignored: ** ** * When the pager is in no-sync mode. Corruption can follow a ** power failure in this case anyway. ** ** * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees ** that garbage data is never appended to the journal file. */ assert( isOpen(pPager->fd) || pPager->noSync ); if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY) || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) ){ memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff); }else{ memset(zHeader, 0, sizeof(aJournalMagic)+4); } /* The random check-hash initializer */ sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit); put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit); /* The initial database size */ put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize); /* The assumed sector size for this process */ put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize); /* The page size */ put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize); /* Initializing the tail of the buffer is not necessary. Everything ** works find if the following memset() is omitted. But initializing ** the memory prevents valgrind from complaining, so we are willing to ** take the performance hit. */ memset(&zHeader[sizeof(aJournalMagic)+20], 0, nHeader-(sizeof(aJournalMagic)+20)); /* In theory, it is only necessary to write the 28 bytes that the ** journal header consumes to the journal file here. Then increment the ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next ** record is written to the following sector (leaving a gap in the file ** that will be implicitly filled in by the OS). ** ** However it has been discovered that on some systems this pattern can ** be significantly slower than contiguously writing data to the file, ** even if that means explicitly writing data to the block of ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what ** is done. ** ** The loop is required here in case the sector-size is larger than the ** database page size. Since the zHeader buffer is only Pager.pageSize ** bytes in size, more than one call to sqlite3OsWrite() may be required ** to populate the entire journal header sector. */ for(nWrite=0; rc==SQLITE_OK&&nWritejournalHdr, nHeader)) rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff); assert( pPager->journalHdr <= pPager->journalOff ); pPager->journalOff += nHeader; } return rc; } /* ** The journal file must be open when this is called. A journal header file ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal ** file. The current location in the journal file is given by ** pPager->journalOff. See comments above function writeJournalHdr() for ** a description of the journal header format. ** ** If the header is read successfully, *pNRec is set to the number of ** page records following this header and *pDbSize is set to the size of the ** database before the transaction began, in pages. Also, pPager->cksumInit ** is set to the value read from the journal header. SQLITE_OK is returned ** in this case. ** ** If the journal header file appears to be corrupted, SQLITE_DONE is ** returned and *pNRec and *PDbSize are undefined. If JOURNAL_HDR_SZ bytes ** cannot be read from the journal file an error code is returned. */ static int readJournalHdr( Pager *pPager, /* Pager object */ int isHot, i64 journalSize, /* Size of the open journal file in bytes */ u32 *pNRec, /* OUT: Value read from the nRec field */ u32 *pDbSize /* OUT: Value of original database size field */ ){ int rc; /* Return code */ unsigned char aMagic[8]; /* A buffer to hold the magic header */ i64 iHdrOff; /* Offset of journal header being read */ assert( isOpen(pPager->jfd) ); /* Journal file must be open. */ /* Advance Pager.journalOff to the start of the next sector. If the ** journal file is too small for there to be a header stored at this ** point, return SQLITE_DONE. */ pPager->journalOff = journalHdrOffset(pPager); if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){ return SQLITE_DONE; } iHdrOff = pPager->journalOff; /* Read in the first 8 bytes of the journal header. If they do not match ** the magic string found at the start of each journal header, return ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise, ** proceed. */ if( isHot || iHdrOff!=pPager->journalHdr ){ rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff); if( rc ){ return rc; } if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){ return SQLITE_DONE; } } /* Read the first three 32-bit fields of the journal header: The nRec ** field, the checksum-initializer and the database size at the start ** of the transaction. Return an error code if anything goes wrong. */ if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec)) || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit)) || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize)) ){ return rc; } if( pPager->journalOff==0 ){ u32 iPageSize; /* Page-size field of journal header */ u32 iSectorSize; /* Sector-size field of journal header */ /* Read the page-size and sector-size journal header fields. */ if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize)) || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize)) ){ return rc; } /* Versions of SQLite prior to 3.5.8 set the page-size field of the ** journal header to zero. In this case, assume that the Pager.pageSize ** variable is already set to the correct page size. */ if( iPageSize==0 ){ iPageSize = pPager->pageSize; } /* Check that the values read from the page-size and sector-size fields ** are within range. To be 'in range', both values need to be a power ** of two greater than or equal to 512 or 32, and not greater than their ** respective compile time maximum limits. */ if( iPageSize<512 || iSectorSize<32 || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE || ((iPageSize-1)&iPageSize)!=0 || ((iSectorSize-1)&iSectorSize)!=0 ){ /* If the either the page-size or sector-size in the journal-header is ** invalid, then the process that wrote the journal-header must have ** crashed before the header was synced. In this case stop reading ** the journal file here. */ return SQLITE_DONE; } /* Update the page-size to match the value read from the journal. ** Use a testcase() macro to make sure that malloc failure within ** PagerSetPagesize() is tested. */ rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1); testcase( rc!=SQLITE_OK ); /* Update the assumed sector-size to match the value used by ** the process that created this journal. If this journal was ** created by a process other than this one, then this routine ** is being called from within pager_playback(). The local value ** of Pager.sectorSize is restored at the end of that routine. */ pPager->sectorSize = iSectorSize; } pPager->journalOff += JOURNAL_HDR_SZ(pPager); return rc; } /* ** Write the supplied master journal name into the journal file for pager ** pPager at the current location. The master journal name must be the last ** thing written to a journal file. If the pager is in full-sync mode, the ** journal file descriptor is advanced to the next sector boundary before ** anything is written. The format is: ** ** + 4 bytes: PAGER_MJ_PGNO. ** + N bytes: Master journal filename in utf-8. ** + 4 bytes: N (length of master journal name in bytes, no nul-terminator). ** + 4 bytes: Master journal name checksum. ** + 8 bytes: aJournalMagic[]. ** ** The master journal page checksum is the sum of the bytes in the master ** journal name, where each byte is interpreted as a signed 8-bit integer. ** ** If zMaster is a NULL pointer (occurs for a single database transaction), ** this call is a no-op. */ static int writeMasterJournal(Pager *pPager, const char *zMaster){ int rc; /* Return code */ int nMaster; /* Length of string zMaster */ i64 iHdrOff; /* Offset of header in journal file */ i64 jrnlSize; /* Size of journal file on disk */ u32 cksum = 0; /* Checksum of string zMaster */ assert( pPager->setMaster==0 ); assert( !pagerUseWal(pPager) ); if( !zMaster || pPager->journalMode==PAGER_JOURNALMODE_MEMORY || !isOpen(pPager->jfd) ){ return SQLITE_OK; } pPager->setMaster = 1; assert( pPager->journalHdr <= pPager->journalOff ); /* Calculate the length in bytes and the checksum of zMaster */ for(nMaster=0; zMaster[nMaster]; nMaster++){ cksum += zMaster[nMaster]; } /* If in full-sync mode, advance to the next disk sector before writing ** the master journal name. This is in case the previous page written to ** the journal has already been synced. */ if( pPager->fullSync ){ pPager->journalOff = journalHdrOffset(pPager); } iHdrOff = pPager->journalOff; /* Write the master journal data to the end of the journal file. If ** an error occurs, return the error code to the caller. */ if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager)))) || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum))) || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8))) ){ return rc; } pPager->journalOff += (nMaster+20); /* If the pager is in peristent-journal mode, then the physical ** journal-file may extend past the end of the master-journal name ** and 8 bytes of magic data just written to the file. This is ** dangerous because the code to rollback a hot-journal file ** will not be able to find the master-journal name to determine ** whether or not the journal is hot. ** ** Easiest thing to do in this scenario is to truncate the journal ** file to the required size. */ if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize)) && jrnlSize>pPager->journalOff ){ rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff); } return rc; } /* ** Discard the entire contents of the in-memory page-cache. */ static void pager_reset(Pager *pPager){ pPager->iDataVersion++; sqlite3BackupRestart(pPager->pBackup); sqlite3PcacheClear(pPager->pPCache); } /* ** Return the pPager->iDataVersion value */ SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager *pPager){ assert( pPager->eState>PAGER_OPEN ); return pPager->iDataVersion; } /* ** Free all structures in the Pager.aSavepoint[] array and set both ** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal ** if it is open and the pager is not in exclusive mode. */ static void releaseAllSavepoints(Pager *pPager){ int ii; /* Iterator for looping through Pager.aSavepoint */ for(ii=0; iinSavepoint; ii++){ sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); } if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){ sqlite3OsClose(pPager->sjfd); } sqlite3_free(pPager->aSavepoint); pPager->aSavepoint = 0; pPager->nSavepoint = 0; pPager->nSubRec = 0; } /* ** Set the bit number pgno in the PagerSavepoint.pInSavepoint ** bitvecs of all open savepoints. Return SQLITE_OK if successful ** or SQLITE_NOMEM if a malloc failure occurs. */ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ int ii; /* Loop counter */ int rc = SQLITE_OK; /* Result code */ for(ii=0; iinSavepoint; ii++){ PagerSavepoint *p = &pPager->aSavepoint[ii]; if( pgno<=p->nOrig ){ rc |= sqlite3BitvecSet(p->pInSavepoint, pgno); testcase( rc==SQLITE_NOMEM ); assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); } } return rc; } /* ** This function is a no-op if the pager is in exclusive mode and not ** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN ** state. ** ** If the pager is not in exclusive-access mode, the database file is ** completely unlocked. If the file is unlocked and the file-system does ** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is ** closed (if it is open). ** ** If the pager is in ERROR state when this function is called, the ** contents of the pager cache are discarded before switching back to ** the OPEN state. Regardless of whether the pager is in exclusive-mode ** or not, any journal file left in the file-system will be treated ** as a hot-journal and rolled back the next time a read-transaction ** is opened (by this or by any other connection). */ static void pager_unlock(Pager *pPager){ assert( pPager->eState==PAGER_READER || pPager->eState==PAGER_OPEN || pPager->eState==PAGER_ERROR ); sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; releaseAllSavepoints(pPager); if( pagerUseWal(pPager) ){ assert( !isOpen(pPager->jfd) ); sqlite3WalEndReadTransaction(pPager->pWal); pPager->eState = PAGER_OPEN; }else if( !pPager->exclusiveMode ){ int rc; /* Error code returned by pagerUnlockDb() */ int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0; /* If the operating system support deletion of open files, then ** close the journal file when dropping the database lock. Otherwise ** another connection with journal_mode=delete might delete the file ** out from under us. */ assert( (PAGER_JOURNALMODE_MEMORY & 5)!=1 ); assert( (PAGER_JOURNALMODE_OFF & 5)!=1 ); assert( (PAGER_JOURNALMODE_WAL & 5)!=1 ); assert( (PAGER_JOURNALMODE_DELETE & 5)!=1 ); assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN) || 1!=(pPager->journalMode & 5) ){ sqlite3OsClose(pPager->jfd); } /* If the pager is in the ERROR state and the call to unlock the database ** file fails, set the current lock to UNKNOWN_LOCK. See the comment ** above the #define for UNKNOWN_LOCK for an explanation of why this ** is necessary. */ rc = pagerUnlockDb(pPager, NO_LOCK); if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){ pPager->eLock = UNKNOWN_LOCK; } /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here ** without clearing the error code. This is intentional - the error ** code is cleared and the cache reset in the block below. */ assert( pPager->errCode || pPager->eState!=PAGER_ERROR ); pPager->changeCountDone = 0; pPager->eState = PAGER_OPEN; } /* If Pager.errCode is set, the contents of the pager cache cannot be ** trusted. Now that there are no outstanding references to the pager, ** it can safely move back to PAGER_OPEN state. This happens in both ** normal and exclusive-locking mode. */ if( pPager->errCode ){ pager_reset(pPager); pPager->changeCountDone = pPager->tempFile; pPager->eState = PAGER_OPEN; pPager->errCode = SQLITE_OK; if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); } pPager->journalOff = 0; pPager->journalHdr = 0; pPager->setMaster = 0; } /* ** This function is called whenever an IOERR or FULL error that requires ** the pager to transition into the ERROR state may ahve occurred. ** The first argument is a pointer to the pager structure, the second ** the error-code about to be returned by a pager API function. The ** value returned is a copy of the second argument to this function. ** ** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the ** IOERR sub-codes, the pager enters the ERROR state and the error code ** is stored in Pager.errCode. While the pager remains in the ERROR state, ** all major API calls on the Pager will immediately return Pager.errCode. ** ** The ERROR state indicates that the contents of the pager-cache ** cannot be trusted. This state can be cleared by completely discarding ** the contents of the pager-cache. If a transaction was active when ** the persistent error occurred, then the rollback journal may need ** to be replayed to restore the contents of the database file (as if ** it were a hot-journal). */ static int pager_error(Pager *pPager, int rc){ int rc2 = rc & 0xff; assert( rc==SQLITE_OK ); assert( pPager->errCode==SQLITE_FULL || pPager->errCode==SQLITE_OK || (pPager->errCode & 0xff)==SQLITE_IOERR ); if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){ pPager->errCode = rc; pPager->eState = PAGER_ERROR; } return rc; } static int pager_truncate(Pager *pPager, Pgno nPage); /* ** This routine ends a transaction. A transaction is usually ended by ** either a COMMIT or a ROLLBACK operation. This routine may be called ** after rollback of a hot-journal, or if an error occurs while opening ** the journal file or writing the very first journal-header of a ** database transaction. ** ** This routine is never called in PAGER_ERROR state. If it is called ** in PAGER_NONE or PAGER_SHARED state and the lock held is less ** exclusive than a RESERVED lock, it is a no-op. ** ** Otherwise, any active savepoints are released. ** ** If the journal file is open, then it is "finalized". Once a journal ** file has been finalized it is not possible to use it to roll back a ** transaction. Nor will it be considered to be a hot-journal by this ** or any other database connection. Exactly how a journal is finalized ** depends on whether or not the pager is running in exclusive mode and ** the current journal-mode (Pager.journalMode value), as follows: ** ** journalMode==MEMORY ** Journal file descriptor is simply closed. This destroys an ** in-memory journal. ** ** journalMode==TRUNCATE ** Journal file is truncated to zero bytes in size. ** ** journalMode==PERSIST ** The first 28 bytes of the journal file are zeroed. This invalidates ** the first journal header in the file, and hence the entire journal ** file. An invalid journal file cannot be rolled back. ** ** journalMode==DELETE ** The journal file is closed and deleted using sqlite3OsDelete(). ** ** If the pager is running in exclusive mode, this method of finalizing ** the journal file is never used. Instead, if the journalMode is ** DELETE and the pager is in exclusive mode, the method described under ** journalMode==PERSIST is used instead. ** ** After the journal is finalized, the pager moves to PAGER_READER state. ** If running in non-exclusive rollback mode, the lock on the file is ** downgraded to a SHARED_LOCK. ** ** SQLITE_OK is returned if no error occurs. If an error occurs during ** any of the IO operations to finalize the journal file or unlock the ** database then the IO error code is returned to the user. If the ** operation to finalize the journal file fails, then the code still ** tries to unlock the database file if not in exclusive mode. If the ** unlock operation fails as well, then the first error code related ** to the first error encountered (the journal finalization one) is ** returned. */ static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){ int rc = SQLITE_OK; /* Error code from journal finalization operation */ int rc2 = SQLITE_OK; /* Error code from db file unlock operation */ /* Do nothing if the pager does not have an open write transaction ** or at least a RESERVED lock. This function may be called when there ** is no write-transaction active but a RESERVED or greater lock is ** held under two circumstances: ** ** 1. After a successful hot-journal rollback, it is called with ** eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK. ** ** 2. If a connection with locking_mode=exclusive holding an EXCLUSIVE ** lock switches back to locking_mode=normal and then executes a ** read-transaction, this function is called with eState==PAGER_READER ** and eLock==EXCLUSIVE_LOCK when the read-transaction is closed. */ assert( assert_pager_state(pPager) ); assert( pPager->eState!=PAGER_ERROR ); if( pPager->eStateeLockjfd) || pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ assert( !pagerUseWal(pPager) ); /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ); sqlite3OsClose(pPager->jfd); }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){ if( pPager->journalOff==0 ){ rc = SQLITE_OK; }else{ rc = sqlite3OsTruncate(pPager->jfd, 0); if( rc==SQLITE_OK && pPager->fullSync ){ /* Make sure the new file size is written into the inode right away. ** Otherwise the journal might resurrect following a power loss and ** cause the last transaction to roll back. See ** https://bugzilla.mozilla.org/show_bug.cgi?id=1072773 */ rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); } } pPager->journalOff = 0; }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL) ){ rc = zeroJournalHdr(pPager, hasMaster); pPager->journalOff = 0; }else{ /* This branch may be executed with Pager.journalMode==MEMORY if ** a hot-journal was just rolled back. In this case the journal ** file should be closed and deleted. If this connection writes to ** the database file, it will do so using an in-memory journal. */ int bDelete = (!pPager->tempFile && sqlite3JournalExists(pPager->jfd)); assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->journalMode==PAGER_JOURNALMODE_WAL ); sqlite3OsClose(pPager->jfd); if( bDelete ){ rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } } } #ifdef SQLITE_CHECK_PAGES sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash); if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){ PgHdr *p = sqlite3PagerLookup(pPager, 1); if( p ){ p->pageHash = 0; sqlite3PagerUnrefNotNull(p); } } #endif sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize); if( pagerUseWal(pPager) ){ /* Drop the WAL write-lock, if any. Also, if the connection was in ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE ** lock held on the database file. */ rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); assert( rc2==SQLITE_OK ); }else if( rc==SQLITE_OK && bCommit && pPager->dbFileSize>pPager->dbSize ){ /* This branch is taken when committing a transaction in rollback-journal ** mode if the database file on disk is larger than the database image. ** At this point the journal has been finalized and the transaction ** successfully committed, but the EXCLUSIVE lock is still held on the ** file. So it is safe to truncate the database file to its minimum ** required size. */ assert( pPager->eLock==EXCLUSIVE_LOCK ); rc = pager_truncate(pPager, pPager->dbSize); } if( rc==SQLITE_OK && bCommit && isOpen(pPager->fd) ){ rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_COMMIT_PHASETWO, 0); if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; } if( !pPager->exclusiveMode && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) ){ rc2 = pagerUnlockDb(pPager, SHARED_LOCK); pPager->changeCountDone = 0; } pPager->eState = PAGER_READER; pPager->setMaster = 0; return (rc==SQLITE_OK?rc2:rc); } /* ** Execute a rollback if a transaction is active and unlock the ** database file. ** ** If the pager has already entered the ERROR state, do not attempt ** the rollback at this time. Instead, pager_unlock() is called. The ** call to pager_unlock() will discard all in-memory pages, unlock ** the database file and move the pager back to OPEN state. If this ** means that there is a hot-journal left in the file-system, the next ** connection to obtain a shared lock on the pager (which may be this one) ** will roll it back. ** ** If the pager has not already entered the ERROR state, but an IO or ** malloc error occurs during a rollback, then this will itself cause ** the pager to enter the ERROR state. Which will be cleared by the ** call to pager_unlock(), as described above. */ static void pagerUnlockAndRollback(Pager *pPager){ if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){ assert( assert_pager_state(pPager) ); if( pPager->eState>=PAGER_WRITER_LOCKED ){ sqlite3PagerRollback(pPager); }else if( !pPager->exclusiveMode ){ assert( pPager->eState==PAGER_READER ); pager_end_transaction(pPager, 0, 0); } } pager_unlock(pPager); } /* ** Parameter aData must point to a buffer of pPager->pageSize bytes ** of data. Compute and return a checksum based ont the contents of the ** page of data and the current value of pPager->cksumInit. ** ** This is not a real checksum. It is really just the sum of the ** random initial value (pPager->cksumInit) and every 200th byte ** of the page data, starting with byte offset (pPager->pageSize%200). ** Each byte is interpreted as an 8-bit unsigned integer. ** ** Changing the formula used to compute this checksum results in an ** incompatible journal file format. ** ** If journal corruption occurs due to a power failure, the most likely ** scenario is that one end or the other of the record will be changed. ** It is much less likely that the two ends of the journal record will be ** correct and the middle be corrupt. Thus, this "checksum" scheme, ** though fast and simple, catches the mostly likely kind of corruption. */ static u32 pager_cksum(Pager *pPager, const u8 *aData){ u32 cksum = pPager->cksumInit; /* Checksum value to return */ int i = pPager->pageSize-200; /* Loop counter */ while( i>0 ){ cksum += aData[i]; i -= 200; } return cksum; } /* ** Report the current page size and number of reserved bytes back ** to the codec. */ #ifdef SQLITE_HAS_CODEC static void pagerReportSize(Pager *pPager){ if( pPager->xCodecSizeChng ){ pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize, (int)pPager->nReserve); } } #else # define pagerReportSize(X) /* No-op if we do not support a codec */ #endif #ifdef SQLITE_HAS_CODEC /* ** Make sure the number of reserved bits is the same in the destination ** pager as it is in the source. This comes up when a VACUUM changes the ** number of reserved bits to the "optimal" amount. */ SQLITE_PRIVATE void sqlite3PagerAlignReserve(Pager *pDest, Pager *pSrc){ if( pDest->nReserve!=pSrc->nReserve ){ pDest->nReserve = pSrc->nReserve; pagerReportSize(pDest); } } #endif /* ** Read a single page from either the journal file (if isMainJrnl==1) or ** from the sub-journal (if isMainJrnl==0) and playback that page. ** The page begins at offset *pOffset into the file. The *pOffset ** value is increased to the start of the next page in the journal. ** ** The main rollback journal uses checksums - the statement journal does ** not. ** ** If the page number of the page record read from the (sub-)journal file ** is greater than the current value of Pager.dbSize, then playback is ** skipped and SQLITE_OK is returned. ** ** If pDone is not NULL, then it is a record of pages that have already ** been played back. If the page at *pOffset has already been played back ** (if the corresponding pDone bit is set) then skip the playback. ** Make sure the pDone bit corresponding to the *pOffset page is set ** prior to returning. ** ** If the page record is successfully read from the (sub-)journal file ** and played back, then SQLITE_OK is returned. If an IO error occurs ** while reading the record from the (sub-)journal file or while writing ** to the database file, then the IO error code is returned. If data ** is successfully read from the (sub-)journal file but appears to be ** corrupted, SQLITE_DONE is returned. Data is considered corrupted in ** two circumstances: ** ** * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or ** * If the record is being rolled back from the main journal file ** and the checksum field does not match the record content. ** ** Neither of these two scenarios are possible during a savepoint rollback. ** ** If this is a savepoint rollback, then memory may have to be dynamically ** allocated by this function. If this is the case and an allocation fails, ** SQLITE_NOMEM is returned. */ static int pager_playback_one_page( Pager *pPager, /* The pager being played back */ i64 *pOffset, /* Offset of record to playback */ Bitvec *pDone, /* Bitvec of pages already played back */ int isMainJrnl, /* 1 -> main journal. 0 -> sub-journal. */ int isSavepnt /* True for a savepoint rollback */ ){ int rc; PgHdr *pPg; /* An existing page in the cache */ Pgno pgno; /* The page number of a page in journal */ u32 cksum; /* Checksum used for sanity checking */ char *aData; /* Temporary storage for the page */ sqlite3_file *jfd; /* The file descriptor for the journal file */ int isSynced; /* True if journal page is synced */ assert( (isMainJrnl&~1)==0 ); /* isMainJrnl is 0 or 1 */ assert( (isSavepnt&~1)==0 ); /* isSavepnt is 0 or 1 */ assert( isMainJrnl || pDone ); /* pDone always used on sub-journals */ assert( isSavepnt || pDone==0 ); /* pDone never used on non-savepoint */ aData = pPager->pTmpSpace; assert( aData ); /* Temp storage must have already been allocated */ assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) ); /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction ** or savepoint rollback done at the request of the caller) or this is ** a hot-journal rollback. If it is a hot-journal rollback, the pager ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback ** only reads from the main journal, not the sub-journal. */ assert( pPager->eState>=PAGER_WRITER_CACHEMOD || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK) ); assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl ); /* Read the page number and page data from the journal or sub-journal ** file. Return an error code to the caller if an IO error occurs. */ jfd = isMainJrnl ? pPager->jfd : pPager->sjfd; rc = read32bits(jfd, *pOffset, &pgno); if( rc!=SQLITE_OK ) return rc; rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4); if( rc!=SQLITE_OK ) return rc; *pOffset += pPager->pageSize + 4 + isMainJrnl*4; /* Sanity checking on the page. This is more important that I originally ** thought. If a power failure occurs while the journal is being written, ** it could cause invalid data to be written into the journal. We need to ** detect this invalid data (with high probability) and ignore it. */ if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ assert( !isSavepnt ); return SQLITE_DONE; } if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){ return SQLITE_OK; } if( isMainJrnl ){ rc = read32bits(jfd, (*pOffset)-4, &cksum); if( rc ) return rc; if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){ return SQLITE_DONE; } } /* If this page has already been played back before during the current ** rollback, then don't bother to play it back again. */ if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ return rc; } /* When playing back page 1, restore the nReserve setting */ if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){ pPager->nReserve = ((u8*)aData)[20]; pagerReportSize(pPager); } /* If the pager is in CACHEMOD state, then there must be a copy of this ** page in the pager cache. In this case just update the pager cache, ** not the database file. The page is left marked dirty in this case. ** ** An exception to the above rule: If the database is in no-sync mode ** and a page is moved during an incremental vacuum then the page may ** not be in the pager cache. Later: if a malloc() or IO error occurs ** during a Movepage() call, then the page may not be in the cache ** either. So the condition described in the above paragraph is not ** assert()able. ** ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the ** pager cache if it exists and the main file. The page is then marked ** not dirty. Since this code is only executed in PAGER_OPEN state for ** a hot-journal rollback, it is guaranteed that the page-cache is empty ** if the pager is in OPEN state. ** ** Ticket #1171: The statement journal might contain page content that is ** different from the page content at the start of the transaction. ** This occurs when a page is changed prior to the start of a statement ** then changed again within the statement. When rolling back such a ** statement we must not write to the original database unless we know ** for certain that original page contents are synced into the main rollback ** journal. Otherwise, a power loss might leave modified data in the ** database file without an entry in the rollback journal that can ** restore the database to its original form. Two conditions must be ** met before writing to the database files. (1) the database must be ** locked. (2) we know that the original page content is fully synced ** in the main journal either because the page is not in cache or else ** the page is marked as needSync==0. ** ** 2008-04-14: When attempting to vacuum a corrupt database file, it ** is possible to fail a statement on a database that does not yet exist. ** Do not attempt to write if database file has never been opened. */ if( pagerUseWal(pPager) ){ pPg = 0; }else{ pPg = sqlite3PagerLookup(pPager, pgno); } assert( pPg ); assert( pPager->eState!=PAGER_OPEN || pPg==0 ); PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n", PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData), (isMainJrnl?"main-journal":"sub-journal") )); if( isMainJrnl ){ isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr); }else{ isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC)); } if( isOpen(pPager->fd) && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) && isSynced ){ i64 ofst = (pgno-1)*(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); assert( !pagerUseWal(pPager) ); rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } if( pPager->pBackup ){ CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM); sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData); CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData); } }else if( !isMainJrnl && pPg==0 ){ /* If this is a rollback of a savepoint and data was not written to ** the database and the page is not in-memory, there is a potential ** problem. When the page is next fetched by the b-tree layer, it ** will be read from the database file, which may or may not be ** current. ** ** There are a couple of different ways this can happen. All are quite ** obscure. When running in synchronous mode, this can only happen ** if the page is on the free-list at the start of the transaction, then ** populated, then moved using sqlite3PagerMovepage(). ** ** The solution is to add an in-memory page to the cache containing ** the data just read from the sub-journal. Mark the page as dirty ** and if the pager requires a journal-sync, then mark the page as ** requiring a journal-sync before it is written. */ assert( isSavepnt ); assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)==0 ); pPager->doNotSpill |= SPILLFLAG_ROLLBACK; rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1); assert( (pPager->doNotSpill & SPILLFLAG_ROLLBACK)!=0 ); pPager->doNotSpill &= ~SPILLFLAG_ROLLBACK; if( rc!=SQLITE_OK ) return rc; pPg->flags &= ~PGHDR_NEED_READ; sqlite3PcacheMakeDirty(pPg); } if( pPg ){ /* No page should ever be explicitly rolled back that is in use, except ** for page 1 which is held in use in order to keep the lock on the ** database active. However such a page may be rolled back as a result ** of an internal error resulting in an automatic call to ** sqlite3PagerRollback(). */ void *pData; pData = pPg->pData; memcpy(pData, (u8*)aData, pPager->pageSize); pPager->xReiniter(pPg); if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){ /* If the contents of this page were just restored from the main ** journal file, then its content must be as they were when the ** transaction was first opened. In this case we can mark the page ** as clean, since there will be no need to write it out to the ** database. ** ** There is one exception to this rule. If the page is being rolled ** back as part of a savepoint (or statement) rollback from an ** unsynced portion of the main journal file, then it is not safe ** to mark the page as clean. This is because marking the page as ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is ** already in the journal file (recorded in Pager.pInJournal) and ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to ** again within this transaction, it will be marked as dirty but ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially ** be written out into the database file before its journal file ** segment is synced. If a crash occurs during or following this, ** database corruption may ensue. */ assert( !pagerUseWal(pPager) ); sqlite3PcacheMakeClean(pPg); } pager_set_pagehash(pPg); /* If this was page 1, then restore the value of Pager.dbFileVers. ** Do this before any decoding. */ if( pgno==1 ){ memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); } /* Decode the page just read from disk */ CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM); sqlite3PcacheRelease(pPg); } return rc; } /* ** Parameter zMaster is the name of a master journal file. A single journal ** file that referred to the master journal file has just been rolled back. ** This routine checks if it is possible to delete the master journal file, ** and does so if it is. ** ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not ** available for use within this function. ** ** When a master journal file is created, it is populated with the names ** of all of its child journals, one after another, formatted as utf-8 ** encoded text. The end of each child journal file is marked with a ** nul-terminator byte (0x00). i.e. the entire contents of a master journal ** file for a transaction involving two databases might be: ** ** "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00" ** ** A master journal file may only be deleted once all of its child ** journals have been rolled back. ** ** This function reads the contents of the master-journal file into ** memory and loops through each of the child journal names. For ** each child journal, it checks if: ** ** * if the child journal exists, and if so ** * if the child journal contains a reference to master journal ** file zMaster ** ** If a child journal can be found that matches both of the criteria ** above, this function returns without doing anything. Otherwise, if ** no such child journal can be found, file zMaster is deleted from ** the file-system using sqlite3OsDelete(). ** ** If an IO error within this function, an error code is returned. This ** function allocates memory by calling sqlite3Malloc(). If an allocation ** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors ** occur, SQLITE_OK is returned. ** ** TODO: This function allocates a single block of memory to load ** the entire contents of the master journal file. This could be ** a couple of kilobytes or so - potentially larger than the page ** size. */ static int pager_delmaster(Pager *pPager, const char *zMaster){ sqlite3_vfs *pVfs = pPager->pVfs; int rc; /* Return code */ sqlite3_file *pMaster; /* Malloc'd master-journal file descriptor */ sqlite3_file *pJournal; /* Malloc'd child-journal file descriptor */ char *zMasterJournal = 0; /* Contents of master journal file */ i64 nMasterJournal; /* Size of master journal file */ char *zJournal; /* Pointer to one journal within MJ file */ char *zMasterPtr; /* Space to hold MJ filename from a journal file */ int nMasterPtr; /* Amount of space allocated to zMasterPtr[] */ /* Allocate space for both the pJournal and pMaster file descriptors. ** If successful, open the master journal file for reading. */ pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2); pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile); if( !pMaster ){ rc = SQLITE_NOMEM; }else{ const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL); rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0); } if( rc!=SQLITE_OK ) goto delmaster_out; /* Load the entire master journal file into space obtained from ** sqlite3_malloc() and pointed to by zMasterJournal. Also obtain ** sufficient space (in zMasterPtr) to hold the names of master ** journal files extracted from regular rollback-journals. */ rc = sqlite3OsFileSize(pMaster, &nMasterJournal); if( rc!=SQLITE_OK ) goto delmaster_out; nMasterPtr = pVfs->mxPathname+1; zMasterJournal = sqlite3Malloc(nMasterJournal + nMasterPtr + 1); if( !zMasterJournal ){ rc = SQLITE_NOMEM; goto delmaster_out; } zMasterPtr = &zMasterJournal[nMasterJournal+1]; rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0); if( rc!=SQLITE_OK ) goto delmaster_out; zMasterJournal[nMasterJournal] = 0; zJournal = zMasterJournal; while( (zJournal-zMasterJournal)pageSize bytes). ** If the file on disk is currently larger than nPage pages, then use the VFS ** xTruncate() method to truncate it. ** ** Or, it might be the case that the file on disk is smaller than ** nPage pages. Some operating system implementations can get confused if ** you try to truncate a file to some size that is larger than it ** currently is, so detect this case and write a single zero byte to ** the end of the new file instead. ** ** If successful, return SQLITE_OK. If an IO error occurs while modifying ** the database file, return the error code to the caller. */ static int pager_truncate(Pager *pPager, Pgno nPage){ int rc = SQLITE_OK; assert( pPager->eState!=PAGER_ERROR ); assert( pPager->eState!=PAGER_READER ); if( isOpen(pPager->fd) && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) ){ i64 currentSize, newSize; int szPage = pPager->pageSize; assert( pPager->eLock==EXCLUSIVE_LOCK ); /* TODO: Is it safe to use Pager.dbFileSize here? */ rc = sqlite3OsFileSize(pPager->fd, ¤tSize); newSize = szPage*(i64)nPage; if( rc==SQLITE_OK && currentSize!=newSize ){ if( currentSize>newSize ){ rc = sqlite3OsTruncate(pPager->fd, newSize); }else if( (currentSize+szPage)<=newSize ){ char *pTmp = pPager->pTmpSpace; memset(pTmp, 0, szPage); testcase( (newSize-szPage) == currentSize ); testcase( (newSize-szPage) > currentSize ); rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage); } if( rc==SQLITE_OK ){ pPager->dbFileSize = nPage; } } } return rc; } /* ** Return a sanitized version of the sector-size of OS file pFile. The ** return value is guaranteed to lie between 32 and MAX_SECTOR_SIZE. */ SQLITE_PRIVATE int sqlite3SectorSize(sqlite3_file *pFile){ int iRet = sqlite3OsSectorSize(pFile); if( iRet<32 ){ iRet = 512; }else if( iRet>MAX_SECTOR_SIZE ){ assert( MAX_SECTOR_SIZE>=512 ); iRet = MAX_SECTOR_SIZE; } return iRet; } /* ** Set the value of the Pager.sectorSize variable for the given ** pager based on the value returned by the xSectorSize method ** of the open database file. The sector size will be used ** to determine the size and alignment of journal header and ** master journal pointers within created journal files. ** ** For temporary files the effective sector size is always 512 bytes. ** ** Otherwise, for non-temporary files, the effective sector size is ** the value returned by the xSectorSize() method rounded up to 32 if ** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it ** is greater than MAX_SECTOR_SIZE. ** ** If the file has the SQLITE_IOCAP_POWERSAFE_OVERWRITE property, then set ** the effective sector size to its minimum value (512). The purpose of ** pPager->sectorSize is to define the "blast radius" of bytes that ** might change if a crash occurs while writing to a single byte in ** that range. But with POWERSAFE_OVERWRITE, the blast radius is zero ** (that is what POWERSAFE_OVERWRITE means), so we minimize the sector ** size. For backwards compatibility of the rollback journal file format, ** we cannot reduce the effective sector size below 512. */ static void setSectorSize(Pager *pPager){ assert( isOpen(pPager->fd) || pPager->tempFile ); if( pPager->tempFile || (sqlite3OsDeviceCharacteristics(pPager->fd) & SQLITE_IOCAP_POWERSAFE_OVERWRITE)!=0 ){ /* Sector size doesn't matter for temporary files. Also, the file ** may not have been opened yet, in which case the OsSectorSize() ** call will segfault. */ pPager->sectorSize = 512; }else{ pPager->sectorSize = sqlite3SectorSize(pPager->fd); } } /* ** Playback the journal and thus restore the database file to ** the state it was in before we started making changes. ** ** The journal file format is as follows: ** ** (1) 8 byte prefix. A copy of aJournalMagic[]. ** (2) 4 byte big-endian integer which is the number of valid page records ** in the journal. If this value is 0xffffffff, then compute the ** number of page records from the journal size. ** (3) 4 byte big-endian integer which is the initial value for the ** sanity checksum. ** (4) 4 byte integer which is the number of pages to truncate the ** database to during a rollback. ** (5) 4 byte big-endian integer which is the sector size. The header ** is this many bytes in size. ** (6) 4 byte big-endian integer which is the page size. ** (7) zero padding out to the next sector size. ** (8) Zero or more pages instances, each as follows: ** + 4 byte page number. ** + pPager->pageSize bytes of data. ** + 4 byte checksum ** ** When we speak of the journal header, we mean the first 7 items above. ** Each entry in the journal is an instance of the 8th item. ** ** Call the value from the second bullet "nRec". nRec is the number of ** valid page entries in the journal. In most cases, you can compute the ** value of nRec from the size of the journal file. But if a power ** failure occurred while the journal was being written, it could be the ** case that the size of the journal file had already been increased but ** the extra entries had not yet made it safely to disk. In such a case, ** the value of nRec computed from the file size would be too large. For ** that reason, we always use the nRec value in the header. ** ** If the nRec value is 0xffffffff it means that nRec should be computed ** from the file size. This value is used when the user selects the ** no-sync option for the journal. A power failure could lead to corruption ** in this case. But for things like temporary table (which will be ** deleted when the power is restored) we don't care. ** ** If the file opened as the journal file is not a well-formed ** journal file then all pages up to the first corrupted page are rolled ** back (or no pages if the journal header is corrupted). The journal file ** is then deleted and SQLITE_OK returned, just as if no corruption had ** been encountered. ** ** If an I/O or malloc() error occurs, the journal-file is not deleted ** and an error code is returned. ** ** The isHot parameter indicates that we are trying to rollback a journal ** that might be a hot journal. Or, it could be that the journal is ** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE. ** If the journal really is hot, reset the pager cache prior rolling ** back any content. If the journal is merely persistent, no reset is ** needed. */ static int pager_playback(Pager *pPager, int isHot){ sqlite3_vfs *pVfs = pPager->pVfs; i64 szJ; /* Size of the journal file in bytes */ u32 nRec; /* Number of Records in the journal */ u32 u; /* Unsigned loop counter */ Pgno mxPg = 0; /* Size of the original file in pages */ int rc; /* Result code of a subroutine */ int res = 1; /* Value returned by sqlite3OsAccess() */ char *zMaster = 0; /* Name of master journal file if any */ int needPagerReset; /* True to reset page prior to first page rollback */ int nPlayback = 0; /* Total number of pages restored from journal */ /* Figure out how many records are in the journal. Abort early if ** the journal is empty. */ assert( isOpen(pPager->jfd) ); rc = sqlite3OsFileSize(pPager->jfd, &szJ); if( rc!=SQLITE_OK ){ goto end_playback; } /* Read the master journal name from the journal, if it is present. ** If a master journal file name is specified, but the file is not ** present on disk, then the journal is not hot and does not need to be ** played back. ** ** TODO: Technically the following is an error because it assumes that ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c, ** mxPathname is 512, which is the same as the minimum allowable value ** for pageSize. */ zMaster = pPager->pTmpSpace; rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); if( rc==SQLITE_OK && zMaster[0] ){ rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res); } zMaster = 0; if( rc!=SQLITE_OK || !res ){ goto end_playback; } pPager->journalOff = 0; needPagerReset = isHot; /* This loop terminates either when a readJournalHdr() or ** pager_playback_one_page() call returns SQLITE_DONE or an IO error ** occurs. */ while( 1 ){ /* Read the next journal header from the journal file. If there are ** not enough bytes left in the journal file for a complete header, or ** it is corrupted, then a process must have failed while writing it. ** This indicates nothing more needs to be rolled back. */ rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg); if( rc!=SQLITE_OK ){ if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } goto end_playback; } /* If nRec is 0xffffffff, then this journal was created by a process ** working in no-sync mode. This means that the rest of the journal ** file consists of pages, there are no more journal headers. Compute ** the value of nRec based on this assumption. */ if( nRec==0xffffffff ){ assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ); nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)); } /* If nRec is 0 and this rollback is of a transaction created by this ** process and if this is the final header in the journal, then it means ** that this part of the journal was being filled but has not yet been ** synced to disk. Compute the number of pages based on the remaining ** size of the file. ** ** The third term of the test was added to fix ticket #2565. ** When rolling back a hot journal, nRec==0 always means that the next ** chunk of the journal contains zero pages to be rolled back. But ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in ** the journal, it means that the journal might contain additional ** pages that need to be rolled back and that the number of pages ** should be computed based on the journal file size. */ if( nRec==0 && !isHot && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager)); } /* If this is the first header read from the journal, truncate the ** database file back to its original size. */ if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){ rc = pager_truncate(pPager, mxPg); if( rc!=SQLITE_OK ){ goto end_playback; } pPager->dbSize = mxPg; } /* Copy original pages out of the journal and back into the ** database file and/or page cache. */ for(u=0; ujournalOff,0,1,0); if( rc==SQLITE_OK ){ nPlayback++; }else{ if( rc==SQLITE_DONE ){ pPager->journalOff = szJ; break; }else if( rc==SQLITE_IOERR_SHORT_READ ){ /* If the journal has been truncated, simply stop reading and ** processing the journal. This might happen if the journal was ** not completely written and synced prior to a crash. In that ** case, the database should have never been written in the ** first place so it is OK to simply abandon the rollback. */ rc = SQLITE_OK; goto end_playback; }else{ /* If we are unable to rollback, quit and return the error ** code. This will cause the pager to enter the error state ** so that no further harm will be done. Perhaps the next ** process to come along will be able to rollback the database. */ goto end_playback; } } } } /*NOTREACHED*/ assert( 0 ); end_playback: /* Following a rollback, the database file should be back in its original ** state prior to the start of the transaction, so invoke the ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the ** assertion that the transaction counter was modified. */ #ifdef SQLITE_DEBUG if( pPager->fd->pMethods ){ sqlite3OsFileControlHint(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0); } #endif /* If this playback is happening automatically as a result of an IO or ** malloc error that occurred after the change-counter was updated but ** before the transaction was committed, then the change-counter ** modification may just have been reverted. If this happens in exclusive ** mode, then subsequent transactions performed by the connection will not ** update the change-counter at all. This may lead to cache inconsistency ** problems for other processes at some point in the future. So, just ** in case this has happened, clear the changeCountDone flag now. */ pPager->changeCountDone = pPager->tempFile; if( rc==SQLITE_OK ){ zMaster = pPager->pTmpSpace; rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1); testcase( rc!=SQLITE_OK ); } if( rc==SQLITE_OK && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) ){ rc = sqlite3PagerSync(pPager, 0); } if( rc==SQLITE_OK ){ rc = pager_end_transaction(pPager, zMaster[0]!='\0', 0); testcase( rc!=SQLITE_OK ); } if( rc==SQLITE_OK && zMaster[0] && res ){ /* If there was a master journal and this routine will return success, ** see if it is possible to delete the master journal. */ rc = pager_delmaster(pPager, zMaster); testcase( rc!=SQLITE_OK ); } if( isHot && nPlayback ){ sqlite3_log(SQLITE_NOTICE_RECOVER_ROLLBACK, "recovered %d pages from %s", nPlayback, pPager->zJournal); } /* The Pager.sectorSize variable may have been updated while rolling ** back a journal created by a process with a different sector size ** value. Reset it to the correct value for this process. */ setSectorSize(pPager); return rc; } /* ** Read the content for page pPg out of the database file and into ** pPg->pData. A shared lock or greater must be held on the database ** file before this function is called. ** ** If page 1 is read, then the value of Pager.dbFileVers[] is set to ** the value read from the database file. ** ** If an IO error occurs, then the IO error is returned to the caller. ** Otherwise, SQLITE_OK is returned. */ static int readDbPage(PgHdr *pPg, u32 iFrame){ Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ Pgno pgno = pPg->pgno; /* Page number to read */ int rc = SQLITE_OK; /* Return code */ int pgsz = pPager->pageSize; /* Number of bytes to read */ assert( pPager->eState>=PAGER_READER ); assert( isOpen(pPager->fd) ); #ifndef SQLITE_OMIT_WAL if( iFrame ){ /* Try to pull the page from the write-ahead log. */ rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData); }else #endif { i64 iOffset = (pgno-1)*(i64)pPager->pageSize; rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } } if( pgno==1 ){ if( rc ){ /* If the read is unsuccessful, set the dbFileVers[] to something ** that will never be a valid file version. dbFileVers[] is a copy ** of bytes 24..39 of the database. Bytes 28..31 should always be ** zero or the size of the database in page. Bytes 32..35 and 35..39 ** should be page numbers which are never 0xffffffff. So filling ** pPager->dbFileVers[] with all 0xff bytes should suffice. ** ** For an encrypted database, the situation is more complex: bytes ** 24..39 of the database are white noise. But the probability of ** white noise equaling 16 bytes of 0xff is vanishingly small so ** we should still be ok. */ memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers)); }else{ u8 *dbFileVers = &((u8*)pPg->pData)[24]; memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers)); } } CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM); PAGER_INCR(sqlite3_pager_readdb_count); PAGER_INCR(pPager->nRead); IOTRACE(("PGIN %p %d\n", pPager, pgno)); PAGERTRACE(("FETCH %d page %d hash(%08x)\n", PAGERID(pPager), pgno, pager_pagehash(pPg))); return rc; } /* ** Update the value of the change-counter at offsets 24 and 92 in ** the header and the sqlite version number at offset 96. ** ** This is an unconditional update. See also the pager_incr_changecounter() ** routine which only updates the change-counter if the update is actually ** needed, as determined by the pPager->changeCountDone state variable. */ static void pager_write_changecounter(PgHdr *pPg){ u32 change_counter; /* Increment the value just read and write it back to byte 24. */ change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1; put32bits(((char*)pPg->pData)+24, change_counter); /* Also store the SQLite version number in bytes 96..99 and in ** bytes 92..95 store the change counter for which the version number ** is valid. */ put32bits(((char*)pPg->pData)+92, change_counter); put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER); } #ifndef SQLITE_OMIT_WAL /* ** This function is invoked once for each page that has already been ** written into the log file when a WAL transaction is rolled back. ** Parameter iPg is the page number of said page. The pCtx argument ** is actually a pointer to the Pager structure. ** ** If page iPg is present in the cache, and has no outstanding references, ** it is discarded. Otherwise, if there are one or more outstanding ** references, the page content is reloaded from the database. If the ** attempt to reload content from the database is required and fails, ** return an SQLite error code. Otherwise, SQLITE_OK. */ static int pagerUndoCallback(void *pCtx, Pgno iPg){ int rc = SQLITE_OK; Pager *pPager = (Pager *)pCtx; PgHdr *pPg; assert( pagerUseWal(pPager) ); pPg = sqlite3PagerLookup(pPager, iPg); if( pPg ){ if( sqlite3PcachePageRefcount(pPg)==1 ){ sqlite3PcacheDrop(pPg); }else{ u32 iFrame = 0; rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); if( rc==SQLITE_OK ){ rc = readDbPage(pPg, iFrame); } if( rc==SQLITE_OK ){ pPager->xReiniter(pPg); } sqlite3PagerUnrefNotNull(pPg); } } /* Normally, if a transaction is rolled back, any backup processes are ** updated as data is copied out of the rollback journal and into the ** database. This is not generally possible with a WAL database, as ** rollback involves simply truncating the log file. Therefore, if one ** or more frames have already been written to the log (and therefore ** also copied into the backup databases) as part of this transaction, ** the backups must be restarted. */ sqlite3BackupRestart(pPager->pBackup); return rc; } /* ** This function is called to rollback a transaction on a WAL database. */ static int pagerRollbackWal(Pager *pPager){ int rc; /* Return Code */ PgHdr *pList; /* List of dirty pages to revert */ /* For all pages in the cache that are currently dirty or have already ** been written (but not committed) to the log file, do one of the ** following: ** ** + Discard the cached page (if refcount==0), or ** + Reload page content from the database (if refcount>0). */ pPager->dbSize = pPager->dbOrigSize; rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); pList = sqlite3PcacheDirtyList(pPager->pPCache); while( pList && rc==SQLITE_OK ){ PgHdr *pNext = pList->pDirty; rc = pagerUndoCallback((void *)pPager, pList->pgno); pList = pNext; } return rc; } /* ** This function is a wrapper around sqlite3WalFrames(). As well as logging ** the contents of the list of pages headed by pList (connected by pDirty), ** this function notifies any active backup processes that the pages have ** changed. ** ** The list of pages passed into this routine is always sorted by page number. ** Hence, if page 1 appears anywhere on the list, it will be the first page. */ static int pagerWalFrames( Pager *pPager, /* Pager object */ PgHdr *pList, /* List of frames to log */ Pgno nTruncate, /* Database size after this commit */ int isCommit /* True if this is a commit */ ){ int rc; /* Return code */ int nList; /* Number of pages in pList */ PgHdr *p; /* For looping over pages */ assert( pPager->pWal ); assert( pList ); #ifdef SQLITE_DEBUG /* Verify that the page list is in accending order */ for(p=pList; p && p->pDirty; p=p->pDirty){ assert( p->pgno < p->pDirty->pgno ); } #endif assert( pList->pDirty==0 || isCommit ); if( isCommit ){ /* If a WAL transaction is being committed, there is no point in writing ** any pages with page numbers greater than nTruncate into the WAL file. ** They will never be read by any client. So remove them from the pDirty ** list here. */ PgHdr **ppNext = &pList; nList = 0; for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ if( p->pgno<=nTruncate ){ ppNext = &p->pDirty; nList++; } } assert( pList ); }else{ nList = 1; } pPager->aStat[PAGER_STAT_WRITE] += nList; if( pList->pgno==1 ) pager_write_changecounter(pList); rc = sqlite3WalFrames(pPager->pWal, pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags ); if( rc==SQLITE_OK && pPager->pBackup ){ for(p=pList; p; p=p->pDirty){ sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData); } } #ifdef SQLITE_CHECK_PAGES pList = sqlite3PcacheDirtyList(pPager->pPCache); for(p=pList; p; p=p->pDirty){ pager_set_pagehash(p); } #endif return rc; } /* ** Begin a read transaction on the WAL. ** ** This routine used to be called "pagerOpenSnapshot()" because it essentially ** makes a snapshot of the database at the current point in time and preserves ** that snapshot for use by the reader in spite of concurrently changes by ** other writers or checkpointers. */ static int pagerBeginReadTransaction(Pager *pPager){ int rc; /* Return code */ int changed = 0; /* True if cache must be reset */ assert( pagerUseWal(pPager) ); assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); /* sqlite3WalEndReadTransaction() was not called for the previous ** transaction in locking_mode=EXCLUSIVE. So call it now. If we ** are in locking_mode=NORMAL and EndRead() was previously called, ** the duplicate call is harmless. */ sqlite3WalEndReadTransaction(pPager->pWal); rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); if( rc!=SQLITE_OK || changed ){ pager_reset(pPager); if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); } return rc; } #endif /* ** This function is called as part of the transition from PAGER_OPEN ** to PAGER_READER state to determine the size of the database file ** in pages (assuming the page size currently stored in Pager.pageSize). ** ** If no error occurs, SQLITE_OK is returned and the size of the database ** in pages is stored in *pnPage. Otherwise, an error code (perhaps ** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified. */ static int pagerPagecount(Pager *pPager, Pgno *pnPage){ Pgno nPage; /* Value to return via *pnPage */ /* Query the WAL sub-system for the database size. The WalDbsize() ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or ** if the database size is not available. The database size is not ** available from the WAL sub-system if the log file is empty or ** contains no valid committed transactions. */ assert( pPager->eState==PAGER_OPEN ); assert( pPager->eLock>=SHARED_LOCK ); nPage = sqlite3WalDbsize(pPager->pWal); /* If the number of pages in the database is not available from the ** WAL sub-system, determine the page counte based on the size of ** the database file. If the size of the database file is not an ** integer multiple of the page-size, round up the result. */ if( nPage==0 ){ i64 n = 0; /* Size of db file in bytes */ assert( isOpen(pPager->fd) || pPager->tempFile ); if( isOpen(pPager->fd) ){ int rc = sqlite3OsFileSize(pPager->fd, &n); if( rc!=SQLITE_OK ){ return rc; } } nPage = (Pgno)((n+pPager->pageSize-1) / pPager->pageSize); } /* If the current number of pages in the file is greater than the ** configured maximum pager number, increase the allowed limit so ** that the file can be read. */ if( nPage>pPager->mxPgno ){ pPager->mxPgno = (Pgno)nPage; } *pnPage = nPage; return SQLITE_OK; } #ifndef SQLITE_OMIT_WAL /* ** Check if the *-wal file that corresponds to the database opened by pPager ** exists if the database is not empy, or verify that the *-wal file does ** not exist (by deleting it) if the database file is empty. ** ** If the database is not empty and the *-wal file exists, open the pager ** in WAL mode. If the database is empty or if no *-wal file exists and ** if no error occurs, make sure Pager.journalMode is not set to ** PAGER_JOURNALMODE_WAL. ** ** Return SQLITE_OK or an error code. ** ** The caller must hold a SHARED lock on the database file to call this ** function. Because an EXCLUSIVE lock on the db file is required to delete ** a WAL on a none-empty database, this ensures there is no race condition ** between the xAccess() below and an xDelete() being executed by some ** other connection. */ static int pagerOpenWalIfPresent(Pager *pPager){ int rc = SQLITE_OK; assert( pPager->eState==PAGER_OPEN ); assert( pPager->eLock>=SHARED_LOCK ); if( !pPager->tempFile ){ int isWal; /* True if WAL file exists */ Pgno nPage; /* Size of the database file */ rc = pagerPagecount(pPager, &nPage); if( rc ) return rc; if( nPage==0 ){ rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0); if( rc==SQLITE_IOERR_DELETE_NOENT ) rc = SQLITE_OK; isWal = 0; }else{ rc = sqlite3OsAccess( pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal ); } if( rc==SQLITE_OK ){ if( isWal ){ testcase( sqlite3PcachePagecount(pPager->pPCache)==0 ); rc = sqlite3PagerOpenWal(pPager, 0); }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ pPager->journalMode = PAGER_JOURNALMODE_DELETE; } } } return rc; } #endif /* ** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback ** the entire master journal file. The case pSavepoint==NULL occurs when ** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction ** savepoint. ** ** When pSavepoint is not NULL (meaning a non-transaction savepoint is ** being rolled back), then the rollback consists of up to three stages, ** performed in the order specified: ** ** * Pages are played back from the main journal starting at byte ** offset PagerSavepoint.iOffset and continuing to ** PagerSavepoint.iHdrOffset, or to the end of the main journal ** file if PagerSavepoint.iHdrOffset is zero. ** ** * If PagerSavepoint.iHdrOffset is not zero, then pages are played ** back starting from the journal header immediately following ** PagerSavepoint.iHdrOffset to the end of the main journal file. ** ** * Pages are then played back from the sub-journal file, starting ** with the PagerSavepoint.iSubRec and continuing to the end of ** the journal file. ** ** Throughout the rollback process, each time a page is rolled back, the ** corresponding bit is set in a bitvec structure (variable pDone in the ** implementation below). This is used to ensure that a page is only ** rolled back the first time it is encountered in either journal. ** ** If pSavepoint is NULL, then pages are only played back from the main ** journal file. There is no need for a bitvec in this case. ** ** In either case, before playback commences the Pager.dbSize variable ** is reset to the value that it held at the start of the savepoint ** (or transaction). No page with a page-number greater than this value ** is played back. If one is encountered it is simply skipped. */ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ i64 szJ; /* Effective size of the main journal */ i64 iHdrOff; /* End of first segment of main-journal records */ int rc = SQLITE_OK; /* Return code */ Bitvec *pDone = 0; /* Bitvec to ensure pages played back only once */ assert( pPager->eState!=PAGER_ERROR ); assert( pPager->eState>=PAGER_WRITER_LOCKED ); /* Allocate a bitvec to use to store the set of pages rolled back */ if( pSavepoint ){ pDone = sqlite3BitvecCreate(pSavepoint->nOrig); if( !pDone ){ return SQLITE_NOMEM; } } /* Set the database size back to the value it was before the savepoint ** being reverted was opened. */ pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; pPager->changeCountDone = pPager->tempFile; if( !pSavepoint && pagerUseWal(pPager) ){ return pagerRollbackWal(pPager); } /* Use pPager->journalOff as the effective size of the main rollback ** journal. The actual file might be larger than this in ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST. But anything ** past pPager->journalOff is off-limits to us. */ szJ = pPager->journalOff; assert( pagerUseWal(pPager)==0 || szJ==0 ); /* Begin by rolling back records from the main journal starting at ** PagerSavepoint.iOffset and continuing to the next journal header. ** There might be records in the main journal that have a page number ** greater than the current database size (pPager->dbSize) but those ** will be skipped automatically. Pages are added to pDone as they ** are played back. */ if( pSavepoint && !pagerUseWal(pPager) ){ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOffjournalOff, pDone, 1, 1); } assert( rc!=SQLITE_DONE ); }else{ pPager->journalOff = 0; } /* Continue rolling back records out of the main journal starting at ** the first journal header seen and continuing until the effective end ** of the main journal file. Continue to skip out-of-range pages and ** continue adding pages rolled back to pDone. */ while( rc==SQLITE_OK && pPager->journalOffjournalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff" ** test is related to ticket #2565. See the discussion in the ** pager_playback() function for additional information. */ if( nJRec==0 && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){ nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager)); } for(ii=0; rc==SQLITE_OK && iijournalOffjournalOff, pDone, 1, 1); } assert( rc!=SQLITE_DONE ); } assert( rc!=SQLITE_OK || pPager->journalOff>=szJ ); /* Finally, rollback pages from the sub-journal. Page that were ** previously rolled back out of the main journal (and are hence in pDone) ** will be skipped. Out-of-range pages are also skipped. */ if( pSavepoint ){ u32 ii; /* Loop counter */ i64 offset = (i64)pSavepoint->iSubRec*(4+pPager->pageSize); if( pagerUseWal(pPager) ){ rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData); } for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && iinSubRec; ii++){ assert( offset==(i64)ii*(4+pPager->pageSize) ); rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1); } assert( rc!=SQLITE_DONE ); } sqlite3BitvecDestroy(pDone); if( rc==SQLITE_OK ){ pPager->journalOff = szJ; } return rc; } /* ** Change the maximum number of in-memory pages that are allowed. */ SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); } /* ** Invoke SQLITE_FCNTL_MMAP_SIZE based on the current value of szMmap. */ static void pagerFixMaplimit(Pager *pPager){ #if SQLITE_MAX_MMAP_SIZE>0 sqlite3_file *fd = pPager->fd; if( isOpen(fd) && fd->pMethods->iVersion>=3 ){ sqlite3_int64 sz; sz = pPager->szMmap; pPager->bUseFetch = (sz>0); sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_SIZE, &sz); } #endif } /* ** Change the maximum size of any memory mapping made of the database file. */ SQLITE_PRIVATE void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 szMmap){ pPager->szMmap = szMmap; pagerFixMaplimit(pPager); } /* ** Adjust settings of the pager to those specified in the pgFlags parameter. ** ** The "level" in pgFlags & PAGER_SYNCHRONOUS_MASK sets the robustness ** of the database to damage due to OS crashes or power failures by ** changing the number of syncs()s when writing the journals. ** There are three levels: ** ** OFF sqlite3OsSync() is never called. This is the default ** for temporary and transient files. ** ** NORMAL The journal is synced once before writes begin on the ** database. This is normally adequate protection, but ** it is theoretically possible, though very unlikely, ** that an inopertune power failure could leave the journal ** in a state which would cause damage to the database ** when it is rolled back. ** ** FULL The journal is synced twice before writes begin on the ** database (with some additional information - the nRec field ** of the journal header - being written in between the two ** syncs). If we assume that writing a ** single disk sector is atomic, then this mode provides ** assurance that the journal will not be corrupted to the ** point of causing damage to the database during rollback. ** ** The above is for a rollback-journal mode. For WAL mode, OFF continues ** to mean that no syncs ever occur. NORMAL means that the WAL is synced ** prior to the start of checkpoint and that the database file is synced ** at the conclusion of the checkpoint if the entire content of the WAL ** was written back into the database. But no sync operations occur for ** an ordinary commit in NORMAL mode with WAL. FULL means that the WAL ** file is synced following each commit operation, in addition to the ** syncs associated with NORMAL. ** ** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL. The ** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync ** using fcntl(F_FULLFSYNC). SQLITE_SYNC_NORMAL means to do an ** ordinary fsync() call. There is no difference between SQLITE_SYNC_FULL ** and SQLITE_SYNC_NORMAL on platforms other than MacOSX. But the ** synchronous=FULL versus synchronous=NORMAL setting determines when ** the xSync primitive is called and is relevant to all platforms. ** ** Numeric values associated with these states are OFF==1, NORMAL=2, ** and FULL=3. */ #ifndef SQLITE_OMIT_PAGER_PRAGMAS SQLITE_PRIVATE void sqlite3PagerSetFlags( Pager *pPager, /* The pager to set safety level for */ unsigned pgFlags /* Various flags */ ){ unsigned level = pgFlags & PAGER_SYNCHRONOUS_MASK; assert( level>=1 && level<=3 ); pPager->noSync = (level==1 || pPager->tempFile) ?1:0; pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0; if( pPager->noSync ){ pPager->syncFlags = 0; pPager->ckptSyncFlags = 0; }else if( pgFlags & PAGER_FULLFSYNC ){ pPager->syncFlags = SQLITE_SYNC_FULL; pPager->ckptSyncFlags = SQLITE_SYNC_FULL; }else if( pgFlags & PAGER_CKPT_FULLFSYNC ){ pPager->syncFlags = SQLITE_SYNC_NORMAL; pPager->ckptSyncFlags = SQLITE_SYNC_FULL; }else{ pPager->syncFlags = SQLITE_SYNC_NORMAL; pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL; } pPager->walSyncFlags = pPager->syncFlags; if( pPager->fullSync ){ pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS; } if( pgFlags & PAGER_CACHESPILL ){ pPager->doNotSpill &= ~SPILLFLAG_OFF; }else{ pPager->doNotSpill |= SPILLFLAG_OFF; } } #endif /* ** The following global variable is incremented whenever the library ** attempts to open a temporary file. This information is used for ** testing and analysis only. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_opentemp_count = 0; #endif /* ** Open a temporary file. ** ** Write the file descriptor into *pFile. Return SQLITE_OK on success ** or some other error code if we fail. The OS will automatically ** delete the temporary file when it is closed. ** ** The flags passed to the VFS layer xOpen() call are those specified ** by parameter vfsFlags ORed with the following: ** ** SQLITE_OPEN_READWRITE ** SQLITE_OPEN_CREATE ** SQLITE_OPEN_EXCLUSIVE ** SQLITE_OPEN_DELETEONCLOSE */ static int pagerOpentemp( Pager *pPager, /* The pager object */ sqlite3_file *pFile, /* Write the file descriptor here */ int vfsFlags /* Flags passed through to the VFS */ ){ int rc; /* Return code */ #ifdef SQLITE_TEST sqlite3_opentemp_count++; /* Used for testing and analysis only */ #endif vfsFlags |= SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE; rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0); assert( rc!=SQLITE_OK || isOpen(pFile) ); return rc; } /* ** Set the busy handler function. ** ** The pager invokes the busy-handler if sqlite3OsLock() returns ** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock, ** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE ** lock. It does *not* invoke the busy handler when upgrading from ** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE ** (which occurs during hot-journal rollback). Summary: ** ** Transition | Invokes xBusyHandler ** -------------------------------------------------------- ** NO_LOCK -> SHARED_LOCK | Yes ** SHARED_LOCK -> RESERVED_LOCK | No ** SHARED_LOCK -> EXCLUSIVE_LOCK | No ** RESERVED_LOCK -> EXCLUSIVE_LOCK | Yes ** ** If the busy-handler callback returns non-zero, the lock is ** retried. If it returns zero, then the SQLITE_BUSY error is ** returned to the caller of the pager API function. */ SQLITE_PRIVATE void sqlite3PagerSetBusyhandler( Pager *pPager, /* Pager object */ int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ ){ pPager->xBusyHandler = xBusyHandler; pPager->pBusyHandlerArg = pBusyHandlerArg; if( isOpen(pPager->fd) ){ void **ap = (void **)&pPager->xBusyHandler; assert( ((int(*)(void *))(ap[0]))==xBusyHandler ); assert( ap[1]==pBusyHandlerArg ); sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_BUSYHANDLER, (void *)ap); } } /* ** Change the page size used by the Pager object. The new page size ** is passed in *pPageSize. ** ** If the pager is in the error state when this function is called, it ** is a no-op. The value returned is the error state error code (i.e. ** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL). ** ** Otherwise, if all of the following are true: ** ** * the new page size (value of *pPageSize) is valid (a power ** of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and ** ** * there are no outstanding page references, and ** ** * the database is either not an in-memory database or it is ** an in-memory database that currently consists of zero pages. ** ** then the pager object page size is set to *pPageSize. ** ** If the page size is changed, then this function uses sqlite3PagerMalloc() ** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt ** fails, SQLITE_NOMEM is returned and the page size remains unchanged. ** In all other cases, SQLITE_OK is returned. ** ** If the page size is not changed, either because one of the enumerated ** conditions above is not true, the pager was in error state when this ** function was called, or because the memory allocation attempt failed, ** then *pPageSize is set to the old, retained page size before returning. */ SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){ int rc = SQLITE_OK; /* It is not possible to do a full assert_pager_state() here, as this ** function may be called from within PagerOpen(), before the state ** of the Pager object is internally consistent. ** ** At one point this function returned an error if the pager was in ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that ** there is at least one outstanding page reference, this function ** is a no-op for that case anyhow. */ u32 pageSize = *pPageSize; assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) ); if( (pPager->memDb==0 || pPager->dbSize==0) && sqlite3PcacheRefCount(pPager->pPCache)==0 && pageSize && pageSize!=(u32)pPager->pageSize ){ char *pNew = NULL; /* New temp space */ i64 nByte = 0; if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){ rc = sqlite3OsFileSize(pPager->fd, &nByte); } if( rc==SQLITE_OK ){ pNew = (char *)sqlite3PageMalloc(pageSize); if( !pNew ) rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ pager_reset(pPager); rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize); } if( rc==SQLITE_OK ){ sqlite3PageFree(pPager->pTmpSpace); pPager->pTmpSpace = pNew; pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize); pPager->pageSize = pageSize; }else{ sqlite3PageFree(pNew); } } *pPageSize = pPager->pageSize; if( rc==SQLITE_OK ){ if( nReserve<0 ) nReserve = pPager->nReserve; assert( nReserve>=0 && nReserve<1000 ); pPager->nReserve = (i16)nReserve; pagerReportSize(pPager); pagerFixMaplimit(pPager); } return rc; } /* ** Return a pointer to the "temporary page" buffer held internally ** by the pager. This is a buffer that is big enough to hold the ** entire content of a database page. This buffer is used internally ** during rollback and will be overwritten whenever a rollback ** occurs. But other modules are free to use it too, as long as ** no rollbacks are happening. */ SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager *pPager){ return pPager->pTmpSpace; } /* ** Attempt to set the maximum database page count if mxPage is positive. ** Make no changes if mxPage is zero or negative. And never reduce the ** maximum page count below the current size of the database. ** ** Regardless of mxPage, return the current maximum page count. */ SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){ if( mxPage>0 ){ pPager->mxPgno = mxPage; } assert( pPager->eState!=PAGER_OPEN ); /* Called only by OP_MaxPgcnt */ assert( pPager->mxPgno>=pPager->dbSize ); /* OP_MaxPgcnt enforces this */ return pPager->mxPgno; } /* ** The following set of routines are used to disable the simulated ** I/O error mechanism. These routines are used to avoid simulated ** errors in places where we do not care about errors. ** ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops ** and generate no code. */ #ifdef SQLITE_TEST SQLITE_PRIVATE int sqlite3_io_error_pending; SQLITE_PRIVATE int sqlite3_io_error_hit; static int saved_cnt; void disable_simulated_io_errors(void){ saved_cnt = sqlite3_io_error_pending; sqlite3_io_error_pending = -1; } void enable_simulated_io_errors(void){ sqlite3_io_error_pending = saved_cnt; } #else # define disable_simulated_io_errors() # define enable_simulated_io_errors() #endif /* ** Read the first N bytes from the beginning of the file into memory ** that pDest points to. ** ** If the pager was opened on a transient file (zFilename==""), or ** opened on a file less than N bytes in size, the output buffer is ** zeroed and SQLITE_OK returned. The rationale for this is that this ** function is used to read database headers, and a new transient or ** zero sized database has a header than consists entirely of zeroes. ** ** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered, ** the error code is returned to the caller and the contents of the ** output buffer undefined. */ SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){ int rc = SQLITE_OK; memset(pDest, 0, N); assert( isOpen(pPager->fd) || pPager->tempFile ); /* This routine is only called by btree immediately after creating ** the Pager object. There has not been an opportunity to transition ** to WAL mode yet. */ assert( !pagerUseWal(pPager) ); if( isOpen(pPager->fd) ){ IOTRACE(("DBHDR %p 0 %d\n", pPager, N)) rc = sqlite3OsRead(pPager->fd, pDest, N, 0); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } } return rc; } /* ** This function may only be called when a read-transaction is open on ** the pager. It returns the total number of pages in the database. ** ** However, if the file is between 1 and bytes in size, then ** this is considered a 1 page file. */ SQLITE_PRIVATE void sqlite3PagerPagecount(Pager *pPager, int *pnPage){ assert( pPager->eState>=PAGER_READER ); assert( pPager->eState!=PAGER_WRITER_FINISHED ); *pnPage = (int)pPager->dbSize; } /* ** Try to obtain a lock of type locktype on the database file. If ** a similar or greater lock is already held, this function is a no-op ** (returning SQLITE_OK immediately). ** ** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke ** the busy callback if the lock is currently not available. Repeat ** until the busy callback returns false or until the attempt to ** obtain the lock succeeds. ** ** Return SQLITE_OK on success and an error code if we cannot obtain ** the lock. If the lock is obtained successfully, set the Pager.state ** variable to locktype before returning. */ static int pager_wait_on_lock(Pager *pPager, int locktype){ int rc; /* Return code */ /* Check that this is either a no-op (because the requested lock is ** already held), or one of the transitions that the busy-handler ** may be invoked during, according to the comment above ** sqlite3PagerSetBusyhandler(). */ assert( (pPager->eLock>=locktype) || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK) || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK) ); do { rc = pagerLockDb(pPager, locktype); }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) ); return rc; } /* ** Function assertTruncateConstraint(pPager) checks that one of the ** following is true for all dirty pages currently in the page-cache: ** ** a) The page number is less than or equal to the size of the ** current database image, in pages, OR ** ** b) if the page content were written at this time, it would not ** be necessary to write the current content out to the sub-journal ** (as determined by function subjRequiresPage()). ** ** If the condition asserted by this function were not true, and the ** dirty page were to be discarded from the cache via the pagerStress() ** routine, pagerStress() would not write the current page content to ** the database file. If a savepoint transaction were rolled back after ** this happened, the correct behavior would be to restore the current ** content of the page. However, since this content is not present in either ** the database file or the portion of the rollback journal and ** sub-journal rolled back the content could not be restored and the ** database image would become corrupt. It is therefore fortunate that ** this circumstance cannot arise. */ #if defined(SQLITE_DEBUG) static void assertTruncateConstraintCb(PgHdr *pPg){ assert( pPg->flags&PGHDR_DIRTY ); assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize ); } static void assertTruncateConstraint(Pager *pPager){ sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb); } #else # define assertTruncateConstraint(pPager) #endif /* ** Truncate the in-memory database file image to nPage pages. This ** function does not actually modify the database file on disk. It ** just sets the internal state of the pager object so that the ** truncation will be done when the current transaction is committed. ** ** This function is only called right before committing a transaction. ** Once this function has been called, the transaction must either be ** rolled back or committed. It is not safe to call this function and ** then continue writing to the database. */ SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){ assert( pPager->dbSize>=nPage ); assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); pPager->dbSize = nPage; /* At one point the code here called assertTruncateConstraint() to ** ensure that all pages being truncated away by this operation are, ** if one or more savepoints are open, present in the savepoint ** journal so that they can be restored if the savepoint is rolled ** back. This is no longer necessary as this function is now only ** called right before committing a transaction. So although the ** Pager object may still have open savepoints (Pager.nSavepoint!=0), ** they cannot be rolled back. So the assertTruncateConstraint() call ** is no longer correct. */ } /* ** This function is called before attempting a hot-journal rollback. It ** syncs the journal file to disk, then sets pPager->journalHdr to the ** size of the journal file so that the pager_playback() routine knows ** that the entire journal file has been synced. ** ** Syncing a hot-journal to disk before attempting to roll it back ensures ** that if a power-failure occurs during the rollback, the process that ** attempts rollback following system recovery sees the same journal ** content as this process. ** ** If everything goes as planned, SQLITE_OK is returned. Otherwise, ** an SQLite error code. */ static int pagerSyncHotJournal(Pager *pPager){ int rc = SQLITE_OK; if( !pPager->noSync ){ rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL); } if( rc==SQLITE_OK ){ rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr); } return rc; } /* ** Obtain a reference to a memory mapped page object for page number pgno. ** The new object will use the pointer pData, obtained from xFetch(). ** If successful, set *ppPage to point to the new page reference ** and return SQLITE_OK. Otherwise, return an SQLite error code and set ** *ppPage to zero. ** ** Page references obtained by calling this function should be released ** by calling pagerReleaseMapPage(). */ static int pagerAcquireMapPage( Pager *pPager, /* Pager object */ Pgno pgno, /* Page number */ void *pData, /* xFetch()'d data for this page */ PgHdr **ppPage /* OUT: Acquired page object */ ){ PgHdr *p; /* Memory mapped page to return */ if( pPager->pMmapFreelist ){ *ppPage = p = pPager->pMmapFreelist; pPager->pMmapFreelist = p->pDirty; p->pDirty = 0; memset(p->pExtra, 0, pPager->nExtra); }else{ *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra); if( p==0 ){ sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData); return SQLITE_NOMEM; } p->pExtra = (void *)&p[1]; p->flags = PGHDR_MMAP; p->nRef = 1; p->pPager = pPager; } assert( p->pExtra==(void *)&p[1] ); assert( p->pPage==0 ); assert( p->flags==PGHDR_MMAP ); assert( p->pPager==pPager ); assert( p->nRef==1 ); p->pgno = pgno; p->pData = pData; pPager->nMmapOut++; return SQLITE_OK; } /* ** Release a reference to page pPg. pPg must have been returned by an ** earlier call to pagerAcquireMapPage(). */ static void pagerReleaseMapPage(PgHdr *pPg){ Pager *pPager = pPg->pPager; pPager->nMmapOut--; pPg->pDirty = pPager->pMmapFreelist; pPager->pMmapFreelist = pPg; assert( pPager->fd->pMethods->iVersion>=3 ); sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData); } /* ** Free all PgHdr objects stored in the Pager.pMmapFreelist list. */ static void pagerFreeMapHdrs(Pager *pPager){ PgHdr *p; PgHdr *pNext; for(p=pPager->pMmapFreelist; p; p=pNext){ pNext = p->pDirty; sqlite3_free(p); } } /* ** Shutdown the page cache. Free all memory and close all files. ** ** If a transaction was in progress when this routine is called, that ** transaction is rolled back. All outstanding pages are invalidated ** and their memory is freed. Any attempt to use a page associated ** with this page cache after this function returns will likely ** result in a coredump. ** ** This function always succeeds. If a transaction is active an attempt ** is made to roll it back. If an error occurs during the rollback ** a hot journal may be left in the filesystem but no error is returned ** to the caller. */ SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){ u8 *pTmp = (u8 *)pPager->pTmpSpace; assert( assert_pager_state(pPager) ); disable_simulated_io_errors(); pagerFreeMapHdrs(pPager); /* pPager->errCode = 0; */ pPager->exclusiveMode = 0; #ifndef SQLITE_OMIT_WAL sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp); pPager->pWal = 0; #endif pager_reset(pPager); /* If it is open, sync the journal file before calling UnlockAndRollback. ** If this is not done, then an unsynced portion of the open journal ** file may be played back into the database. If a power failure occurs ** while this is happening, the database could become corrupt. ** ** If an error occurs while trying to sync the journal, shift the pager ** into the ERROR state. This causes UnlockAndRollback to unlock the ** database and close the journal file without attempting to roll it ** back or finalize it. The next database user will have to do hot-journal ** rollback before accessing the database file. */ if( isOpen(pPager->jfd) ){ pager_error(pPager, pagerSyncHotJournal(pPager)); } pagerUnlockAndRollback(pPager); enable_simulated_io_errors(); PAGERTRACE(("CLOSE %d\n", PAGERID(pPager))); IOTRACE(("CLOSE %p\n", pPager)) sqlite3OsClose(pPager->jfd); sqlite3OsClose(pPager->fd); sqlite3PageFree(pTmp); sqlite3PcacheClose(pPager->pPCache); #ifdef SQLITE_HAS_CODEC if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec); #endif assert( !pPager->aSavepoint && !pPager->pInJournal ); assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ); sqlite3_free(pPager); return SQLITE_OK; } #if !defined(NDEBUG) || defined(SQLITE_TEST) /* ** Return the page number for page pPg. */ SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage *pPg){ return pPg->pgno; } #endif /* ** Increment the reference count for page pPg. */ SQLITE_PRIVATE void sqlite3PagerRef(DbPage *pPg){ sqlite3PcacheRef(pPg); } /* ** Sync the journal. In other words, make sure all the pages that have ** been written to the journal have actually reached the surface of the ** disk and can be restored in the event of a hot-journal rollback. ** ** If the Pager.noSync flag is set, then this function is a no-op. ** Otherwise, the actions required depend on the journal-mode and the ** device characteristics of the file-system, as follows: ** ** * If the journal file is an in-memory journal file, no action need ** be taken. ** ** * Otherwise, if the device does not support the SAFE_APPEND property, ** then the nRec field of the most recently written journal header ** is updated to contain the number of journal records that have ** been written following it. If the pager is operating in full-sync ** mode, then the journal file is synced before this field is updated. ** ** * If the device does not support the SEQUENTIAL property, then ** journal file is synced. ** ** Or, in pseudo-code: ** ** if( NOT ){ ** if( NOT SAFE_APPEND ){ ** if( ) xSync(); ** ** } ** if( NOT SEQUENTIAL ) xSync(); ** } ** ** If successful, this routine clears the PGHDR_NEED_SYNC flag of every ** page currently held in memory before returning SQLITE_OK. If an IO ** error is encountered, then the IO error code is returned to the caller. */ static int syncJournal(Pager *pPager, int newHdr){ int rc; /* Return code */ assert( pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD ); assert( assert_pager_state(pPager) ); assert( !pagerUseWal(pPager) ); rc = sqlite3PagerExclusiveLock(pPager); if( rc!=SQLITE_OK ) return rc; if( !pPager->noSync ){ assert( !pPager->tempFile ); if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); assert( isOpen(pPager->jfd) ); if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ /* This block deals with an obscure problem. If the last connection ** that wrote to this database was operating in persistent-journal ** mode, then the journal file may at this point actually be larger ** than Pager.journalOff bytes. If the next thing in the journal ** file happens to be a journal-header (written as part of the ** previous connection's transaction), and a crash or power-failure ** occurs after nRec is updated but before this connection writes ** anything else to the journal file (or commits/rolls back its ** transaction), then SQLite may become confused when doing the ** hot-journal rollback following recovery. It may roll back all ** of this connections data, then proceed to rolling back the old, ** out-of-date data that follows it. Database corruption. ** ** To work around this, if the journal file does appear to contain ** a valid header following Pager.journalOff, then write a 0x00 ** byte to the start of it to prevent it from being recognized. ** ** Variable iNextHdrOffset is set to the offset at which this ** problematic header will occur, if it exists. aMagic is used ** as a temporary buffer to inspect the first couple of bytes of ** the potential journal header. */ i64 iNextHdrOffset; u8 aMagic[8]; u8 zHeader[sizeof(aJournalMagic)+4]; memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic)); put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec); iNextHdrOffset = journalHdrOffset(pPager); rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset); if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){ static const u8 zerobyte = 0; rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset); } if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ return rc; } /* Write the nRec value into the journal file header. If in ** full-synchronous mode, sync the journal first. This ensures that ** all data has really hit the disk before nRec is updated to mark ** it as a candidate for rollback. ** ** This is not required if the persistent media supports the ** SAFE_APPEND property. Because in this case it is not possible ** for garbage data to be appended to the file, the nRec field ** is populated with 0xFFFFFFFF when the journal header is written ** and never needs to be updated. */ if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); IOTRACE(("JSYNC %p\n", pPager)) rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags); if( rc!=SQLITE_OK ) return rc; } IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr)); rc = sqlite3OsWrite( pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr ); if( rc!=SQLITE_OK ) return rc; } if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){ PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager))); IOTRACE(("JSYNC %p\n", pPager)) rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0) ); if( rc!=SQLITE_OK ) return rc; } pPager->journalHdr = pPager->journalOff; if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){ pPager->nRec = 0; rc = writeJournalHdr(pPager); if( rc!=SQLITE_OK ) return rc; } }else{ pPager->journalHdr = pPager->journalOff; } } /* Unless the pager is in noSync mode, the journal file was just ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on ** all pages. */ sqlite3PcacheClearSyncFlags(pPager->pPCache); pPager->eState = PAGER_WRITER_DBMOD; assert( assert_pager_state(pPager) ); return SQLITE_OK; } /* ** The argument is the first in a linked list of dirty pages connected ** by the PgHdr.pDirty pointer. This function writes each one of the ** in-memory pages in the list to the database file. The argument may ** be NULL, representing an empty list. In this case this function is ** a no-op. ** ** The pager must hold at least a RESERVED lock when this function ** is called. Before writing anything to the database file, this lock ** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained, ** SQLITE_BUSY is returned and no data is written to the database file. ** ** If the pager is a temp-file pager and the actual file-system file ** is not yet open, it is created and opened before any data is ** written out. ** ** Once the lock has been upgraded and, if necessary, the file opened, ** the pages are written out to the database file in list order. Writing ** a page is skipped if it meets either of the following criteria: ** ** * The page number is greater than Pager.dbSize, or ** * The PGHDR_DONT_WRITE flag is set on the page. ** ** If writing out a page causes the database file to grow, Pager.dbFileSize ** is updated accordingly. If page 1 is written out, then the value cached ** in Pager.dbFileVers[] is updated to match the new value stored in ** the database file. ** ** If everything is successful, SQLITE_OK is returned. If an IO error ** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot ** be obtained, SQLITE_BUSY is returned. */ static int pager_write_pagelist(Pager *pPager, PgHdr *pList){ int rc = SQLITE_OK; /* Return code */ /* This function is only called for rollback pagers in WRITER_DBMOD state. */ assert( !pagerUseWal(pPager) ); assert( pPager->eState==PAGER_WRITER_DBMOD ); assert( pPager->eLock==EXCLUSIVE_LOCK ); /* If the file is a temp-file has not yet been opened, open it now. It ** is not possible for rc to be other than SQLITE_OK if this branch ** is taken, as pager_wait_on_lock() is a no-op for temp-files. */ if( !isOpen(pPager->fd) ){ assert( pPager->tempFile && rc==SQLITE_OK ); rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags); } /* Before the first write, give the VFS a hint of what the final ** file size will be. */ assert( rc!=SQLITE_OK || isOpen(pPager->fd) ); if( rc==SQLITE_OK && pPager->dbHintSizedbSize && (pList->pDirty || pList->pgno>pPager->dbHintSize) ){ sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize; sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile); pPager->dbHintSize = pPager->dbSize; } while( rc==SQLITE_OK && pList ){ Pgno pgno = pList->pgno; /* If there are dirty pages in the page cache with page numbers greater ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to ** make the file smaller (presumably by auto-vacuum code). Do not write ** any such pages to the file. ** ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag ** set (set by sqlite3PagerDontWrite()). */ if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){ i64 offset = (pgno-1)*(i64)pPager->pageSize; /* Offset to write */ char *pData; /* Data to write */ assert( (pList->flags&PGHDR_NEED_SYNC)==0 ); if( pList->pgno==1 ) pager_write_changecounter(pList); /* Encode the database */ CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData); /* Write out the page data. */ rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset); /* If page 1 was just written, update Pager.dbFileVers to match ** the value now stored in the database file. If writing this ** page caused the database file to grow, update dbFileSize. */ if( pgno==1 ){ memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers)); } if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } pPager->aStat[PAGER_STAT_WRITE]++; /* Update any backup objects copying the contents of this pager. */ sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData); PAGERTRACE(("STORE %d page %d hash(%08x)\n", PAGERID(pPager), pgno, pager_pagehash(pList))); IOTRACE(("PGOUT %p %d\n", pPager, pgno)); PAGER_INCR(sqlite3_pager_writedb_count); }else{ PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno)); } pager_set_pagehash(pList); pList = pList->pDirty; } return rc; } /* ** Ensure that the sub-journal file is open. If it is already open, this ** function is a no-op. ** ** SQLITE_OK is returned if everything goes according to plan. An ** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() ** fails. */ static int openSubJournal(Pager *pPager){ int rc = SQLITE_OK; if( !isOpen(pPager->sjfd) ){ if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ sqlite3MemJournalOpen(pPager->sjfd); }else{ rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL); } } return rc; } /* ** Append a record of the current state of page pPg to the sub-journal. ** ** If successful, set the bit corresponding to pPg->pgno in the bitvecs ** for all open savepoints before returning. ** ** This function returns SQLITE_OK if everything is successful, an IO ** error code if the attempt to write to the sub-journal fails, or ** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint ** bitvec. */ static int subjournalPage(PgHdr *pPg){ int rc = SQLITE_OK; Pager *pPager = pPg->pPager; if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ /* Open the sub-journal, if it has not already been opened */ assert( pPager->useJournal ); assert( isOpen(pPager->jfd) || pagerUseWal(pPager) ); assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 ); assert( pagerUseWal(pPager) || pageInJournal(pPager, pPg) || pPg->pgno>pPager->dbOrigSize ); rc = openSubJournal(pPager); /* If the sub-journal was opened successfully (or was already open), ** write the journal record into the file. */ if( rc==SQLITE_OK ){ void *pData = pPg->pData; i64 offset = (i64)pPager->nSubRec*(4+pPager->pageSize); char *pData2; CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); rc = write32bits(pPager->sjfd, offset, pPg->pgno); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4); } } } if( rc==SQLITE_OK ){ pPager->nSubRec++; assert( pPager->nSavepoint>0 ); rc = addToSavepointBitvecs(pPager, pPg->pgno); } return rc; } static int subjournalPageIfRequired(PgHdr *pPg){ if( subjRequiresPage(pPg) ){ return subjournalPage(pPg); }else{ return SQLITE_OK; } } /* ** This function is called by the pcache layer when it has reached some ** soft memory limit. The first argument is a pointer to a Pager object ** (cast as a void*). The pager is always 'purgeable' (not an in-memory ** database). The second argument is a reference to a page that is ** currently dirty but has no outstanding references. The page ** is always associated with the Pager object passed as the first ** argument. ** ** The job of this function is to make pPg clean by writing its contents ** out to the database file, if possible. This may involve syncing the ** journal file. ** ** If successful, sqlite3PcacheMakeClean() is called on the page and ** SQLITE_OK returned. If an IO error occurs while trying to make the ** page clean, the IO error code is returned. If the page cannot be ** made clean for some other reason, but no error occurs, then SQLITE_OK ** is returned by sqlite3PcacheMakeClean() is not called. */ static int pagerStress(void *p, PgHdr *pPg){ Pager *pPager = (Pager *)p; int rc = SQLITE_OK; assert( pPg->pPager==pPager ); assert( pPg->flags&PGHDR_DIRTY ); /* The doNotSpill NOSYNC bit is set during times when doing a sync of ** journal (and adding a new header) is not allowed. This occurs ** during calls to sqlite3PagerWrite() while trying to journal multiple ** pages belonging to the same sector. ** ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling ** regardless of whether or not a sync is required. This is set during ** a rollback or by user request, respectively. ** ** Spilling is also prohibited when in an error state since that could ** lead to database corruption. In the current implementation it ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3 ** while in the error state, hence it is impossible for this routine to ** be called in the error state. Nevertheless, we include a NEVER() ** test for the error state as a safeguard against future changes. */ if( NEVER(pPager->errCode) ) return SQLITE_OK; testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK ); testcase( pPager->doNotSpill & SPILLFLAG_OFF ); testcase( pPager->doNotSpill & SPILLFLAG_NOSYNC ); if( pPager->doNotSpill && ((pPager->doNotSpill & (SPILLFLAG_ROLLBACK|SPILLFLAG_OFF))!=0 || (pPg->flags & PGHDR_NEED_SYNC)!=0) ){ return SQLITE_OK; } pPg->pDirty = 0; if( pagerUseWal(pPager) ){ /* Write a single frame for this page to the log. */ rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } }else{ /* Sync the journal file if required. */ if( pPg->flags&PGHDR_NEED_SYNC || pPager->eState==PAGER_WRITER_CACHEMOD ){ rc = syncJournal(pPager, 1); } /* Write the contents of the page out to the database file. */ if( rc==SQLITE_OK ){ assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); rc = pager_write_pagelist(pPager, pPg); } } /* Mark the page as clean. */ if( rc==SQLITE_OK ){ PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno)); sqlite3PcacheMakeClean(pPg); } return pager_error(pPager, rc); } /* ** Allocate and initialize a new Pager object and put a pointer to it ** in *ppPager. The pager should eventually be freed by passing it ** to sqlite3PagerClose(). ** ** The zFilename argument is the path to the database file to open. ** If zFilename is NULL then a randomly-named temporary file is created ** and used as the file to be cached. Temporary files are be deleted ** automatically when they are closed. If zFilename is ":memory:" then ** all information is held in cache. It is never written to disk. ** This can be used to implement an in-memory database. ** ** The nExtra parameter specifies the number of bytes of space allocated ** along with each page reference. This space is available to the user ** via the sqlite3PagerGetExtra() API. ** ** The flags argument is used to specify properties that affect the ** operation of the pager. It should be passed some bitwise combination ** of the PAGER_* flags. ** ** The vfsFlags parameter is a bitmask to pass to the flags parameter ** of the xOpen() method of the supplied VFS when opening files. ** ** If the pager object is allocated and the specified file opened ** successfully, SQLITE_OK is returned and *ppPager set to point to ** the new pager object. If an error occurs, *ppPager is set to NULL ** and error code returned. This function may return SQLITE_NOMEM ** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or ** various SQLITE_IO_XXX errors. */ SQLITE_PRIVATE int sqlite3PagerOpen( sqlite3_vfs *pVfs, /* The virtual file system to use */ Pager **ppPager, /* OUT: Return the Pager structure here */ const char *zFilename, /* Name of the database file to open */ int nExtra, /* Extra bytes append to each in-memory page */ int flags, /* flags controlling this file */ int vfsFlags, /* flags passed through to sqlite3_vfs.xOpen() */ void (*xReinit)(DbPage*) /* Function to reinitialize pages */ ){ u8 *pPtr; Pager *pPager = 0; /* Pager object to allocate and return */ int rc = SQLITE_OK; /* Return code */ int tempFile = 0; /* True for temp files (incl. in-memory files) */ int memDb = 0; /* True if this is an in-memory file */ int readOnly = 0; /* True if this is a read-only file */ int journalFileSize; /* Bytes to allocate for each journal fd */ char *zPathname = 0; /* Full path to database file */ int nPathname = 0; /* Number of bytes in zPathname */ int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */ int pcacheSize = sqlite3PcacheSize(); /* Bytes to allocate for PCache */ u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE; /* Default page size */ const char *zUri = 0; /* URI args to copy */ int nUri = 0; /* Number of bytes of URI args at *zUri */ /* Figure out how much space is required for each journal file-handle ** (there are two of them, the main journal and the sub-journal). This ** is the maximum space required for an in-memory journal file handle ** and a regular journal file-handle. Note that a "regular journal-handle" ** may be a wrapper capable of caching the first portion of the journal ** file in memory to implement the atomic-write optimization (see ** source file journal.c). */ if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){ journalFileSize = ROUND8(sqlite3JournalSize(pVfs)); }else{ journalFileSize = ROUND8(sqlite3MemJournalSize()); } /* Set the output variable to NULL in case an error occurs. */ *ppPager = 0; /* Compute and store the full pathname in an allocated buffer pointed ** to by zPathname, length nPathname. Or, if this is a temporary file, ** leave both nPathname and zPathname set to 0. */ if( zFilename && zFilename[0] ){ const char *z; nPathname = pVfs->mxPathname+1; zPathname = sqlite3DbMallocRaw(0, nPathname*2); if( zPathname==0 ){ return SQLITE_NOMEM; } zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */ rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname); nPathname = sqlite3Strlen30(zPathname); z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1]; while( *z ){ z += sqlite3Strlen30(z)+1; z += sqlite3Strlen30(z)+1; } nUri = (int)(&z[1] - zUri); assert( nUri>=0 ); if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){ /* This branch is taken when the journal path required by ** the database being opened will be more than pVfs->mxPathname ** bytes in length. This means the database cannot be opened, ** as it will not be possible to open the journal file or even ** check for a hot-journal before reading. */ rc = SQLITE_CANTOPEN_BKPT; } if( rc!=SQLITE_OK ){ sqlite3DbFree(0, zPathname); return rc; } } /* Allocate memory for the Pager structure, PCache object, the ** three file descriptors, the database file name and the journal ** file name. The layout in memory is as follows: ** ** Pager object (sizeof(Pager) bytes) ** PCache object (sqlite3PcacheSize() bytes) ** Database file handle (pVfs->szOsFile bytes) ** Sub-journal file handle (journalFileSize bytes) ** Main journal file handle (journalFileSize bytes) ** Database file name (nPathname+1 bytes) ** Journal file name (nPathname+8+1 bytes) */ pPtr = (u8 *)sqlite3MallocZero( ROUND8(sizeof(*pPager)) + /* Pager structure */ ROUND8(pcacheSize) + /* PCache object */ ROUND8(pVfs->szOsFile) + /* The main db file */ journalFileSize * 2 + /* The two journal files */ nPathname + 1 + nUri + /* zFilename */ nPathname + 8 + 2 /* zJournal */ #ifndef SQLITE_OMIT_WAL + nPathname + 4 + 2 /* zWal */ #endif ); assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) ); if( !pPtr ){ sqlite3DbFree(0, zPathname); return SQLITE_NOMEM; } pPager = (Pager*)(pPtr); pPager->pPCache = (PCache*)(pPtr += ROUND8(sizeof(*pPager))); pPager->fd = (sqlite3_file*)(pPtr += ROUND8(pcacheSize)); pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile)); pPager->jfd = (sqlite3_file*)(pPtr += journalFileSize); pPager->zFilename = (char*)(pPtr += journalFileSize); assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) ); /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */ if( zPathname ){ assert( nPathname>0 ); pPager->zJournal = (char*)(pPtr += nPathname + 1 + nUri); memcpy(pPager->zFilename, zPathname, nPathname); if( nUri ) memcpy(&pPager->zFilename[nPathname+1], zUri, nUri); memcpy(pPager->zJournal, zPathname, nPathname); memcpy(&pPager->zJournal[nPathname], "-journal\000", 8+2); #ifndef SQLITE_OMIT_WAL pPager->zWal = &pPager->zJournal[nPathname+8+1]; memcpy(pPager->zWal, zPathname, nPathname); memcpy(&pPager->zWal[nPathname], "-wal\000", 4+1); #endif sqlite3DbFree(0, zPathname); } pPager->pVfs = pVfs; pPager->vfsFlags = vfsFlags; /* Open the pager file. */ if( zFilename && zFilename[0] ){ int fout = 0; /* VFS flags returned by xOpen() */ rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout); assert( !memDb ); readOnly = (fout&SQLITE_OPEN_READONLY); /* If the file was successfully opened for read/write access, ** choose a default page size in case we have to create the ** database file. The default page size is the maximum of: ** ** + SQLITE_DEFAULT_PAGE_SIZE, ** + The value returned by sqlite3OsSectorSize() ** + The largest page size that can be written atomically. */ if( rc==SQLITE_OK ){ int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); if( !readOnly ){ setSectorSize(pPager); assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE); if( szPageDfltsectorSize ){ if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){ szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE; }else{ szPageDflt = (u32)pPager->sectorSize; } } #ifdef SQLITE_ENABLE_ATOMIC_WRITE { int ii; assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536); for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){ if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){ szPageDflt = ii; } } } #endif } pPager->noLock = 0; if( (iDc & SQLITE_IOCAP_IMMUTABLE)!=0 ){ vfsFlags |= SQLITE_OPEN_READONLY; goto act_like_temp_file; } } }else{ /* If a temporary file is requested, it is not opened immediately. ** In this case we accept the default page size and delay actually ** opening the file until the first call to OsWrite(). ** ** This branch is also run for an in-memory database. An in-memory ** database is the same as a temp-file that is never written out to ** disk and uses an in-memory rollback journal. ** ** This branch also runs for files marked as immutable. */ act_like_temp_file: tempFile = 1; pPager->eState = PAGER_READER; /* Pretend we already have a lock */ pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */ pPager->noLock = 1; /* Do no locking */ readOnly = (vfsFlags&SQLITE_OPEN_READONLY); } /* The following call to PagerSetPagesize() serves to set the value of ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer. */ if( rc==SQLITE_OK ){ assert( pPager->memDb==0 ); rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1); testcase( rc!=SQLITE_OK ); } /* Initialize the PCache object. */ if( rc==SQLITE_OK ){ assert( nExtra<1000 ); nExtra = ROUND8(nExtra); rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); } /* If an error occurred above, free the Pager structure and close the file. */ if( rc!=SQLITE_OK ){ sqlite3OsClose(pPager->fd); sqlite3PageFree(pPager->pTmpSpace); sqlite3_free(pPager); return rc; } PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename)); IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename)) pPager->useJournal = (u8)useJournal; /* pPager->stmtOpen = 0; */ /* pPager->stmtInUse = 0; */ /* pPager->nRef = 0; */ /* pPager->stmtSize = 0; */ /* pPager->stmtJSize = 0; */ /* pPager->nPage = 0; */ pPager->mxPgno = SQLITE_MAX_PAGE_COUNT; /* pPager->state = PAGER_UNLOCK; */ /* pPager->errMask = 0; */ pPager->tempFile = (u8)tempFile; assert( tempFile==PAGER_LOCKINGMODE_NORMAL || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE ); assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 ); pPager->exclusiveMode = (u8)tempFile; pPager->changeCountDone = pPager->tempFile; pPager->memDb = (u8)memDb; pPager->readOnly = (u8)readOnly; assert( useJournal || pPager->tempFile ); pPager->noSync = pPager->tempFile; if( pPager->noSync ){ assert( pPager->fullSync==0 ); assert( pPager->syncFlags==0 ); assert( pPager->walSyncFlags==0 ); assert( pPager->ckptSyncFlags==0 ); }else{ pPager->fullSync = 1; pPager->syncFlags = SQLITE_SYNC_NORMAL; pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS; pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL; } /* pPager->pFirst = 0; */ /* pPager->pFirstSynced = 0; */ /* pPager->pLast = 0; */ pPager->nExtra = (u16)nExtra; pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT; assert( isOpen(pPager->fd) || tempFile ); setSectorSize(pPager); if( !useJournal ){ pPager->journalMode = PAGER_JOURNALMODE_OFF; }else if( memDb ){ pPager->journalMode = PAGER_JOURNALMODE_MEMORY; } /* pPager->xBusyHandler = 0; */ /* pPager->pBusyHandlerArg = 0; */ pPager->xReiniter = xReinit; /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ /* pPager->szMmap = SQLITE_DEFAULT_MMAP_SIZE // will be set by btree.c */ *ppPager = pPager; return SQLITE_OK; } /* Verify that the database file has not be deleted or renamed out from ** under the pager. Return SQLITE_OK if the database is still were it ought ** to be on disk. Return non-zero (SQLITE_READONLY_DBMOVED or some other error ** code from sqlite3OsAccess()) if the database has gone missing. */ static int databaseIsUnmoved(Pager *pPager){ int bHasMoved = 0; int rc; if( pPager->tempFile ) return SQLITE_OK; if( pPager->dbSize==0 ) return SQLITE_OK; assert( pPager->zFilename && pPager->zFilename[0] ); rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_HAS_MOVED, &bHasMoved); if( rc==SQLITE_NOTFOUND ){ /* If the HAS_MOVED file-control is unimplemented, assume that the file ** has not been moved. That is the historical behavior of SQLite: prior to ** version 3.8.3, it never checked */ rc = SQLITE_OK; }else if( rc==SQLITE_OK && bHasMoved ){ rc = SQLITE_READONLY_DBMOVED; } return rc; } /* ** This function is called after transitioning from PAGER_UNLOCK to ** PAGER_SHARED state. It tests if there is a hot journal present in ** the file-system for the given pager. A hot journal is one that ** needs to be played back. According to this function, a hot-journal ** file exists if the following criteria are met: ** ** * The journal file exists in the file system, and ** * No process holds a RESERVED or greater lock on the database file, and ** * The database file itself is greater than 0 bytes in size, and ** * The first byte of the journal file exists and is not 0x00. ** ** If the current size of the database file is 0 but a journal file ** exists, that is probably an old journal left over from a prior ** database with the same name. In this case the journal file is ** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK ** is returned. ** ** This routine does not check if there is a master journal filename ** at the end of the file. If there is, and that master journal file ** does not exist, then the journal file is not really hot. In this ** case this routine will return a false-positive. The pager_playback() ** routine will discover that the journal file is not really hot and ** will not roll it back. ** ** If a hot-journal file is found to exist, *pExists is set to 1 and ** SQLITE_OK returned. If no hot-journal file is present, *pExists is ** set to 0 and SQLITE_OK returned. If an IO error occurs while trying ** to determine whether or not a hot-journal file exists, the IO error ** code is returned and the value of *pExists is undefined. */ static int hasHotJournal(Pager *pPager, int *pExists){ sqlite3_vfs * const pVfs = pPager->pVfs; int rc = SQLITE_OK; /* Return code */ int exists = 1; /* True if a journal file is present */ int jrnlOpen = !!isOpen(pPager->jfd); assert( pPager->useJournal ); assert( isOpen(pPager->fd) ); assert( pPager->eState==PAGER_OPEN ); assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN )); *pExists = 0; if( !jrnlOpen ){ rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists); } if( rc==SQLITE_OK && exists ){ int locked = 0; /* True if some process holds a RESERVED lock */ /* Race condition here: Another process might have been holding the ** the RESERVED lock and have a journal open at the sqlite3OsAccess() ** call above, but then delete the journal and drop the lock before ** we get to the following sqlite3OsCheckReservedLock() call. If that ** is the case, this routine might think there is a hot journal when ** in fact there is none. This results in a false-positive which will ** be dealt with by the playback routine. Ticket #3883. */ rc = sqlite3OsCheckReservedLock(pPager->fd, &locked); if( rc==SQLITE_OK && !locked ){ Pgno nPage; /* Number of pages in database file */ rc = pagerPagecount(pPager, &nPage); if( rc==SQLITE_OK ){ /* If the database is zero pages in size, that means that either (1) the ** journal is a remnant from a prior database with the same name where ** the database file but not the journal was deleted, or (2) the initial ** transaction that populates a new database is being rolled back. ** In either case, the journal file can be deleted. However, take care ** not to delete the journal file if it is already open due to ** journal_mode=PERSIST. */ if( nPage==0 && !jrnlOpen ){ if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){ sqlite3OsDelete(pVfs, pPager->zJournal, 0); if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK); } }else{ /* The journal file exists and no other connection has a reserved ** or greater lock on the database file. Now check that there is ** at least one non-zero bytes at the start of the journal file. ** If there is, then we consider this journal to be hot. If not, ** it can be ignored. */ if( !jrnlOpen ){ int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL; rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f); } if( rc==SQLITE_OK ){ u8 first = 0; rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } if( !jrnlOpen ){ sqlite3OsClose(pPager->jfd); } *pExists = (first!=0); }else if( rc==SQLITE_CANTOPEN ){ /* If we cannot open the rollback journal file in order to see if ** it has a zero header, that might be due to an I/O error, or ** it might be due to the race condition described above and in ** ticket #3883. Either way, assume that the journal is hot. ** This might be a false positive. But if it is, then the ** automatic journal playback and recovery mechanism will deal ** with it under an EXCLUSIVE lock where we do not need to ** worry so much with race conditions. */ *pExists = 1; rc = SQLITE_OK; } } } } } return rc; } /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerAcquire() until after this function ** has been successfully called. If a shared-lock is already held when ** this function is called, it is a no-op. ** ** The following operations are also performed by this function. ** ** 1) If the pager is currently in PAGER_OPEN state (no lock held ** on the database file), then an attempt is made to obtain a ** SHARED lock on the database file. Immediately after obtaining ** the SHARED lock, the file-system is checked for a hot-journal, ** which is played back if present. Following any hot-journal ** rollback, the contents of the cache are validated by checking ** the 'change-counter' field of the database file header and ** discarded if they are found to be invalid. ** ** 2) If the pager is running in exclusive-mode, and there are currently ** no outstanding references to any pages, and is in the error state, ** then an attempt is made to clear the error state by discarding ** the contents of the page cache and rolling back any open journal ** file. ** ** If everything is successful, SQLITE_OK is returned. If an IO error ** occurs while locking the database, checking for a hot-journal file or ** rolling back a journal file, the IO error code is returned. */ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ /* This routine is only called from b-tree and only when there are no ** outstanding pages. This implies that the pager state should either ** be OPEN or READER. READER is only possible if the pager is or was in ** exclusive access mode. */ assert( sqlite3PcacheRefCount(pPager->pPCache)==0 ); assert( assert_pager_state(pPager) ); assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){ int bHotJournal = 1; /* True if there exists a hot journal-file */ rc = pager_wait_on_lock(pPager, SHARED_LOCK); if( rc!=SQLITE_OK ){ assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK ); goto failed; } /* If a journal file exists, and there is no RESERVED lock on the ** database file, then it either needs to be played back or deleted. */ if( pPager->eLock<=SHARED_LOCK ){ rc = hasHotJournal(pPager, &bHotJournal); } if( rc!=SQLITE_OK ){ goto failed; } if( bHotJournal ){ if( pPager->readOnly ){ rc = SQLITE_READONLY_ROLLBACK; goto failed; } /* Get an EXCLUSIVE lock on the database file. At this point it is ** important that a RESERVED lock is not obtained on the way to the ** EXCLUSIVE lock. If it were, another process might open the ** database file, detect the RESERVED lock, and conclude that the ** database is safe to read while this process is still rolling the ** hot-journal back. ** ** Because the intermediate RESERVED lock is not requested, any ** other process attempting to access the database file will get to ** this point in the code and fail to obtain its own EXCLUSIVE lock ** on the database file. ** ** Unless the pager is in locking_mode=exclusive mode, the lock is ** downgraded to SHARED_LOCK before this function returns. */ rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); if( rc!=SQLITE_OK ){ goto failed; } /* If it is not already open and the file exists on disk, open the ** journal for read/write access. Write access is required because ** in exclusive-access mode the file descriptor will be kept open ** and possibly used for a transaction later on. Also, write-access ** is usually required to finalize the journal in journal_mode=persist ** mode (and also for journal_mode=truncate on some systems). ** ** If the journal does not exist, it usually means that some ** other connection managed to get in and roll it back before ** this connection obtained the exclusive lock above. Or, it ** may mean that the pager was in the error-state when this ** function was called and the journal file does not exist. */ if( !isOpen(pPager->jfd) ){ sqlite3_vfs * const pVfs = pPager->pVfs; int bExists; /* True if journal file exists */ rc = sqlite3OsAccess( pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists); if( rc==SQLITE_OK && bExists ){ int fout = 0; int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; assert( !pPager->tempFile ); rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout); assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){ rc = SQLITE_CANTOPEN_BKPT; sqlite3OsClose(pPager->jfd); } } } /* Playback and delete the journal. Drop the database write ** lock and reacquire the read lock. Purge the cache before ** playing back the hot-journal so that we don't end up with ** an inconsistent cache. Sync the hot journal before playing ** it back since the process that crashed and left the hot journal ** probably did not sync it and we are required to always sync ** the journal before playing it back. */ if( isOpen(pPager->jfd) ){ assert( rc==SQLITE_OK ); rc = pagerSyncHotJournal(pPager); if( rc==SQLITE_OK ){ rc = pager_playback(pPager, 1); pPager->eState = PAGER_OPEN; } }else if( !pPager->exclusiveMode ){ pagerUnlockDb(pPager, SHARED_LOCK); } if( rc!=SQLITE_OK ){ /* This branch is taken if an error occurs while trying to open ** or roll back a hot-journal while holding an EXCLUSIVE lock. The ** pager_unlock() routine will be called before returning to unlock ** the file. If the unlock attempt fails, then Pager.eLock must be ** set to UNKNOWN_LOCK (see the comment above the #define for ** UNKNOWN_LOCK above for an explanation). ** ** In order to get pager_unlock() to do this, set Pager.eState to ** PAGER_ERROR now. This is not actually counted as a transition ** to ERROR state in the state diagram at the top of this file, ** since we know that the same call to pager_unlock() will very ** shortly transition the pager object to the OPEN state. Calling ** assert_pager_state() would fail now, as it should not be possible ** to be in ERROR state when there are zero outstanding page ** references. */ pager_error(pPager, rc); goto failed; } assert( pPager->eState==PAGER_OPEN ); assert( (pPager->eLock==SHARED_LOCK) || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) ); } if( !pPager->tempFile && pPager->hasHeldSharedLock ){ /* The shared-lock has just been acquired then check to ** see if the database has been modified. If the database has changed, ** flush the cache. The hasHeldSharedLock flag prevents this from ** occurring on the very first access to a file, in order to save a ** single unnecessary sqlite3OsRead() call at the start-up. ** ** Database changes are detected by looking at 15 bytes beginning ** at offset 24 into the file. The first 4 of these 16 bytes are ** a 32-bit counter that is incremented with each change. The ** other bytes change randomly with each file change when ** a codec is in use. ** ** There is a vanishingly small chance that a change will not be ** detected. The chance of an undetected change is so small that ** it can be neglected. */ Pgno nPage = 0; char dbFileVers[sizeof(pPager->dbFileVers)]; rc = pagerPagecount(pPager, &nPage); if( rc ) goto failed; if( nPage>0 ){ IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ goto failed; } }else{ memset(dbFileVers, 0, sizeof(dbFileVers)); } if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ pager_reset(pPager); /* Unmap the database file. It is possible that external processes ** may have truncated the database file and then extended it back ** to its original size while this process was not holding a lock. ** In this case there may exist a Pager.pMap mapping that appears ** to be the right size but is not actually valid. Avoid this ** possibility by unmapping the db here. */ if( USEFETCH(pPager) ){ sqlite3OsUnfetch(pPager->fd, 0, 0); } } } /* If there is a WAL file in the file-system, open this database in WAL ** mode. Otherwise, the following function call is a no-op. */ rc = pagerOpenWalIfPresent(pPager); #ifndef SQLITE_OMIT_WAL assert( pPager->pWal==0 || rc==SQLITE_OK ); #endif } if( pagerUseWal(pPager) ){ assert( rc==SQLITE_OK ); rc = pagerBeginReadTransaction(pPager); } if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){ rc = pagerPagecount(pPager, &pPager->dbSize); } failed: if( rc!=SQLITE_OK ){ pager_unlock(pPager); assert( pPager->eState==PAGER_OPEN ); }else{ pPager->eState = PAGER_READER; pPager->hasHeldSharedLock = 1; } return rc; } /* ** If the reference count has reached zero, rollback any active ** transaction and unlock the pager. ** ** Except, in locking_mode=EXCLUSIVE when there is nothing to in ** the rollback journal, the unlock is not performed and there is ** nothing to rollback, so this routine is a no-op. */ static void pagerUnlockIfUnused(Pager *pPager){ if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){ pagerUnlockAndRollback(pPager); } } /* ** Acquire a reference to page number pgno in pager pPager (a page ** reference has type DbPage*). If the requested reference is ** successfully obtained, it is copied to *ppPage and SQLITE_OK returned. ** ** If the requested page is already in the cache, it is returned. ** Otherwise, a new page object is allocated and populated with data ** read from the database file. In some cases, the pcache module may ** choose not to allocate a new page object and may reuse an existing ** object with no outstanding references. ** ** The extra data appended to a page is always initialized to zeros the ** first time a page is loaded into memory. If the page requested is ** already in the cache when this function is called, then the extra ** data is left as it was when the page object was last used. ** ** If the database image is smaller than the requested page or if a ** non-zero value is passed as the noContent parameter and the ** requested page is not already stored in the cache, then no ** actual disk read occurs. In this case the memory image of the ** page is initialized to all zeros. ** ** If noContent is true, it means that we do not care about the contents ** of the page. This occurs in two scenarios: ** ** a) When reading a free-list leaf page from the database, and ** ** b) When a savepoint is being rolled back and we need to load ** a new page into the cache to be filled with the data read ** from the savepoint journal. ** ** If noContent is true, then the data returned is zeroed instead of ** being read from the database. Additionally, the bits corresponding ** to pgno in Pager.pInJournal (bitvec of pages already written to the ** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open ** savepoints are set. This means if the page is made writable at any ** point in the future, using a call to sqlite3PagerWrite(), its contents ** will not be journaled. This saves IO. ** ** The acquisition might fail for several reasons. In all cases, ** an appropriate error code is returned and *ppPage is set to NULL. ** ** See also sqlite3PagerLookup(). Both this routine and Lookup() attempt ** to find a page in the in-memory cache first. If the page is not already ** in memory, this routine goes to disk to read it in whereas Lookup() ** just returns 0. This routine acquires a read-lock the first time it ** has to go to disk, and could also playback an old journal if necessary. ** Since Lookup() never goes to disk, it never has to deal with locks ** or journal files. */ SQLITE_PRIVATE int sqlite3PagerAcquire( Pager *pPager, /* The pager open on the database file */ Pgno pgno, /* Page number to fetch */ DbPage **ppPage, /* Write a pointer to the page here */ int flags /* PAGER_GET_XXX flags */ ){ int rc = SQLITE_OK; PgHdr *pPg = 0; u32 iFrame = 0; /* Frame to read from WAL file */ const int noContent = (flags & PAGER_GET_NOCONTENT); /* It is acceptable to use a read-only (mmap) page for any page except ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY ** flag was specified by the caller. And so long as the db is not a ** temporary or in-memory database. */ const int bMmapOk = (pgno>1 && USEFETCH(pPager) && (pPager->eState==PAGER_READER || (flags & PAGER_GET_READONLY)) #ifdef SQLITE_HAS_CODEC && pPager->xCodec==0 #endif ); /* Optimization note: Adding the "pgno<=1" term before "pgno==0" here ** allows the compiler optimizer to reuse the results of the "pgno>1" ** test in the previous statement, and avoid testing pgno==0 in the ** common case where pgno is large. */ if( pgno<=1 && pgno==0 ){ return SQLITE_CORRUPT_BKPT; } assert( pPager->eState>=PAGER_READER ); assert( assert_pager_state(pPager) ); assert( noContent==0 || bMmapOk==0 ); assert( pPager->hasHeldSharedLock==1 ); /* If the pager is in the error state, return an error immediately. ** Otherwise, request the page from the PCache layer. */ if( pPager->errCode!=SQLITE_OK ){ rc = pPager->errCode; }else{ if( bMmapOk && pagerUseWal(pPager) ){ rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); if( rc!=SQLITE_OK ) goto pager_acquire_err; } if( bMmapOk && iFrame==0 ){ void *pData = 0; rc = sqlite3OsFetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData ); if( rc==SQLITE_OK && pData ){ if( pPager->eState>PAGER_READER ){ pPg = sqlite3PagerLookup(pPager, pgno); } if( pPg==0 ){ rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg); }else{ sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData); } if( pPg ){ assert( rc==SQLITE_OK ); *ppPage = pPg; return SQLITE_OK; } } if( rc!=SQLITE_OK ){ goto pager_acquire_err; } } { sqlite3_pcache_page *pBase; pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3); if( pBase==0 ){ rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); if( rc!=SQLITE_OK ) goto pager_acquire_err; if( pBase==0 ){ pPg = *ppPage = 0; rc = SQLITE_NOMEM; goto pager_acquire_err; } } pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); assert( pPg!=0 ); } } if( rc!=SQLITE_OK ){ /* Either the call to sqlite3PcacheFetch() returned an error or the ** pager was already in the error-state when this function was called. ** Set pPg to 0 and jump to the exception handler. */ pPg = 0; goto pager_acquire_err; } assert( pPg==(*ppPage) ); assert( pPg->pgno==pgno ); assert( pPg->pPager==pPager || pPg->pPager==0 ); if( pPg->pPager && !noContent ){ /* In this case the pcache already contains an initialized copy of ** the page. Return without further ado. */ assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) ); pPager->aStat[PAGER_STAT_HIT]++; return SQLITE_OK; }else{ /* The pager cache has created a new page. Its content needs to ** be initialized. */ pPg->pPager = pPager; /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page ** number greater than this, or the unused locking-page, is requested. */ if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){ rc = SQLITE_CORRUPT_BKPT; goto pager_acquire_err; } if( pPager->dbSizefd) ){ if( pgno>pPager->mxPgno ){ rc = SQLITE_FULL; goto pager_acquire_err; } if( noContent ){ /* Failure to set the bits in the InJournal bit-vectors is benign. ** It merely means that we might do some extra work to journal a ** page that does not need to be journaled. Nevertheless, be sure ** to test the case where a malloc error occurs while trying to set ** a bit in a bit vector. */ if( pgno<=pPager->dbOrigSize ){ TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno); testcase( rc==SQLITE_NOMEM ); } TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno); testcase( rc==SQLITE_NOMEM ); } memset(pPg->pData, 0, pPager->pageSize); IOTRACE(("ZERO %p %d\n", pPager, pgno)); }else{ if( pagerUseWal(pPager) && bMmapOk==0 ){ rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); if( rc!=SQLITE_OK ) goto pager_acquire_err; } assert( pPg->pPager==pPager ); pPager->aStat[PAGER_STAT_MISS]++; rc = readDbPage(pPg, iFrame); if( rc!=SQLITE_OK ){ goto pager_acquire_err; } } pager_set_pagehash(pPg); } return SQLITE_OK; pager_acquire_err: assert( rc!=SQLITE_OK ); if( pPg ){ sqlite3PcacheDrop(pPg); } pagerUnlockIfUnused(pPager); *ppPage = 0; return rc; } /* ** Acquire a page if it is already in the in-memory cache. Do ** not read the page from disk. Return a pointer to the page, ** or 0 if the page is not in cache. ** ** See also sqlite3PagerGet(). The difference between this routine ** and sqlite3PagerGet() is that _get() will go to the disk and read ** in the page if the page is not already in cache. This routine ** returns NULL if the page is not in cache or if a disk I/O error ** has ever happened. */ SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ sqlite3_pcache_page *pPage; assert( pPager!=0 ); assert( pgno!=0 ); assert( pPager->pPCache!=0 ); pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); assert( pPage==0 || pPager->hasHeldSharedLock ); if( pPage==0 ) return 0; return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); } /* ** Release a page reference. ** ** If the number of references to the page drop to zero, then the ** page is added to the LRU list. When all references to all pages ** are released, a rollback occurs and the lock on the database is ** removed. */ SQLITE_PRIVATE void sqlite3PagerUnrefNotNull(DbPage *pPg){ Pager *pPager; assert( pPg!=0 ); pPager = pPg->pPager; if( pPg->flags & PGHDR_MMAP ){ pagerReleaseMapPage(pPg); }else{ sqlite3PcacheRelease(pPg); } pagerUnlockIfUnused(pPager); } SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){ if( pPg ) sqlite3PagerUnrefNotNull(pPg); } /* ** This function is called at the start of every write transaction. ** There must already be a RESERVED or EXCLUSIVE lock on the database ** file when this routine is called. ** ** Open the journal file for pager pPager and write a journal header ** to the start of it. If there are active savepoints, open the sub-journal ** as well. This function is only used when the journal file is being ** opened to write a rollback log for a transaction. It is not used ** when opening a hot journal file to roll it back. ** ** If the journal file is already open (as it may be in exclusive mode), ** then this function just writes a journal header to the start of the ** already open file. ** ** Whether or not the journal file is opened by this function, the ** Pager.pInJournal bitvec structure is allocated. ** ** Return SQLITE_OK if everything is successful. Otherwise, return ** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or ** an IO error code if opening or writing the journal file fails. */ static int pager_open_journal(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ sqlite3_vfs * const pVfs = pPager->pVfs; /* Local cache of vfs pointer */ assert( pPager->eState==PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); assert( pPager->pInJournal==0 ); /* If already in the error state, this function is a no-op. But on ** the other hand, this routine is never called if we are already in ** an error state. */ if( NEVER(pPager->errCode) ) return pPager->errCode; if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){ pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize); if( pPager->pInJournal==0 ){ return SQLITE_NOMEM; } /* Open the journal file if it is not already open. */ if( !isOpen(pPager->jfd) ){ if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){ sqlite3MemJournalOpen(pPager->jfd); }else{ const int flags = /* VFS flags to open journal file */ SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE| (pPager->tempFile ? (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL): (SQLITE_OPEN_MAIN_JOURNAL) ); /* Verify that the database still has the same name as it did when ** it was originally opened. */ rc = databaseIsUnmoved(pPager); if( rc==SQLITE_OK ){ #ifdef SQLITE_ENABLE_ATOMIC_WRITE rc = sqlite3JournalOpen( pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager) ); #else rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0); #endif } } assert( rc!=SQLITE_OK || isOpen(pPager->jfd) ); } /* Write the first journal header to the journal file and open ** the sub-journal if necessary. */ if( rc==SQLITE_OK ){ /* TODO: Check if all of these are really required. */ pPager->nRec = 0; pPager->journalOff = 0; pPager->setMaster = 0; pPager->journalHdr = 0; rc = writeJournalHdr(pPager); } } if( rc!=SQLITE_OK ){ sqlite3BitvecDestroy(pPager->pInJournal); pPager->pInJournal = 0; }else{ assert( pPager->eState==PAGER_WRITER_LOCKED ); pPager->eState = PAGER_WRITER_CACHEMOD; } return rc; } /* ** Begin a write-transaction on the specified pager object. If a ** write-transaction has already been opened, this function is a no-op. ** ** If the exFlag argument is false, then acquire at least a RESERVED ** lock on the database file. If exFlag is true, then acquire at least ** an EXCLUSIVE lock. If such a lock is already held, no locking ** functions need be called. ** ** If the subjInMemory argument is non-zero, then any sub-journal opened ** within this transaction will be opened as an in-memory file. This ** has no effect if the sub-journal is already opened (as it may be when ** running in exclusive mode) or if the transaction does not require a ** sub-journal. If the subjInMemory argument is zero, then any required ** sub-journal is implemented in-memory if pPager is an in-memory database, ** or using a temporary file otherwise. */ SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ int rc = SQLITE_OK; if( pPager->errCode ) return pPager->errCode; assert( pPager->eState>=PAGER_READER && pPager->eStatesubjInMemory = (u8)subjInMemory; if( ALWAYS(pPager->eState==PAGER_READER) ){ assert( pPager->pInJournal==0 ); if( pagerUseWal(pPager) ){ /* If the pager is configured to use locking_mode=exclusive, and an ** exclusive lock on the database is not already held, obtain it now. */ if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){ rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); if( rc!=SQLITE_OK ){ return rc; } sqlite3WalExclusiveMode(pPager->pWal, 1); } /* Grab the write lock on the log file. If successful, upgrade to ** PAGER_RESERVED state. Otherwise, return an error code to the caller. ** The busy-handler is not invoked if another connection already ** holds the write-lock. If possible, the upper layer will call it. */ rc = sqlite3WalBeginWriteTransaction(pPager->pWal); }else{ /* Obtain a RESERVED lock on the database file. If the exFlag parameter ** is true, then immediately upgrade this to an EXCLUSIVE lock. The ** busy-handler callback can be used when upgrading to the EXCLUSIVE ** lock, but not when obtaining the RESERVED lock. */ rc = pagerLockDb(pPager, RESERVED_LOCK); if( rc==SQLITE_OK && exFlag ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } } if( rc==SQLITE_OK ){ /* Change to WRITER_LOCKED state. ** ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD ** when it has an open transaction, but never to DBMOD or FINISHED. ** This is because in those states the code to roll back savepoint ** transactions may copy data from the sub-journal into the database ** file as well as into the page cache. Which would be incorrect in ** WAL mode. */ pPager->eState = PAGER_WRITER_LOCKED; pPager->dbHintSize = pPager->dbSize; pPager->dbFileSize = pPager->dbSize; pPager->dbOrigSize = pPager->dbSize; pPager->journalOff = 0; } assert( rc==SQLITE_OK || pPager->eState==PAGER_READER ); assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); } PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager))); return rc; } /* ** Write page pPg onto the end of the rollback journal. */ static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ Pager *pPager = pPg->pPager; int rc; u32 cksum; char *pData2; i64 iOff = pPager->journalOff; /* We should never write to the journal file the page that ** contains the database locks. The following assert verifies ** that we do not. */ assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); assert( pPager->journalHdr<=pPager->journalOff ); CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); cksum = pager_cksum(pPager, (u8*)pData2); /* Even if an IO or diskfull error occurs while journalling the ** page in the block above, set the need-sync flag for the page. ** Otherwise, when the transaction is rolled back, the logic in ** playback_one_page() will think that the page needs to be restored ** in the database file. And if an IO error occurs while doing so, ** then corruption may follow. */ pPg->flags |= PGHDR_NEED_SYNC; rc = write32bits(pPager->jfd, iOff, pPg->pgno); if( rc!=SQLITE_OK ) return rc; rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); if( rc!=SQLITE_OK ) return rc; rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); if( rc!=SQLITE_OK ) return rc; IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, pPager->journalOff, pPager->pageSize)); PAGER_INCR(sqlite3_pager_writej_count); PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", PAGERID(pPager), pPg->pgno, ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); pPager->journalOff += 8 + pPager->pageSize; pPager->nRec++; assert( pPager->pInJournal!=0 ); rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); testcase( rc==SQLITE_NOMEM ); assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); rc |= addToSavepointBitvecs(pPager, pPg->pgno); assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); return rc; } /* ** Mark a single data page as writeable. The page is written into the ** main journal or sub-journal as required. If the page is written into ** one of the journals, the corresponding bit is set in the ** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs ** of any open savepoints as appropriate. */ static int pager_write(PgHdr *pPg){ Pager *pPager = pPg->pPager; int rc = SQLITE_OK; /* This routine is not called unless a write-transaction has already ** been started. The journal file may or may not be open at this point. ** It is never called in the ERROR state. */ assert( pPager->eState==PAGER_WRITER_LOCKED || pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD ); assert( assert_pager_state(pPager) ); assert( pPager->errCode==0 ); assert( pPager->readOnly==0 ); CHECK_PAGE(pPg); /* The journal file needs to be opened. Higher level routines have already ** obtained the necessary locks to begin the write-transaction, but the ** rollback journal might not yet be open. Open it now if this is the case. ** ** This is done before calling sqlite3PcacheMakeDirty() on the page. ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then ** an error might occur and the pager would end up in WRITER_LOCKED state ** with pages marked as dirty in the cache. */ if( pPager->eState==PAGER_WRITER_LOCKED ){ rc = pager_open_journal(pPager); if( rc!=SQLITE_OK ) return rc; } assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); assert( assert_pager_state(pPager) ); /* Mark the page that is about to be modified as dirty. */ sqlite3PcacheMakeDirty(pPg); /* If a rollback journal is in use, them make sure the page that is about ** to change is in the rollback journal, or if the page is a new page off ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. */ assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); if( pPager->pInJournal!=0 && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 ){ assert( pagerUseWal(pPager)==0 ); if( pPg->pgno<=pPager->dbOrigSize ){ rc = pagerAddPageToRollbackJournal(pPg); if( rc!=SQLITE_OK ){ return rc; } }else{ if( pPager->eState!=PAGER_WRITER_DBMOD ){ pPg->flags |= PGHDR_NEED_SYNC; } PAGERTRACE(("APPEND %d page %d needSync=%d\n", PAGERID(pPager), pPg->pgno, ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); } } /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list ** and before writing the page into the rollback journal. Wait until now, ** after the page has been successfully journalled, before setting the ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. */ pPg->flags |= PGHDR_WRITEABLE; /* If the statement journal is open and the page is not in it, ** then write the page into the statement journal. */ if( pPager->nSavepoint>0 ){ rc = subjournalPageIfRequired(pPg); } /* Update the database size and return. */ if( pPager->dbSizepgno ){ pPager->dbSize = pPg->pgno; } return rc; } /* ** This is a variant of sqlite3PagerWrite() that runs when the sector size ** is larger than the page size. SQLite makes the (reasonable) assumption that ** all bytes of a sector are written together by hardware. Hence, all bytes of ** a sector need to be journalled in case of a power loss in the middle of ** a write. ** ** Usually, the sector size is less than or equal to the page size, in which ** case pages can be individually written. This routine only runs in the ** exceptional case where the page size is smaller than the sector size. */ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ int rc = SQLITE_OK; /* Return code */ Pgno nPageCount; /* Total number of pages in database file */ Pgno pg1; /* First page of the sector pPg is located on. */ int nPage = 0; /* Number of pages starting at pg1 to journal */ int ii; /* Loop counter */ int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ Pager *pPager = pPg->pPager; /* The pager that owns pPg */ Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow ** a journal header to be written between the pages journaled by ** this function. */ assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)==0 ); pPager->doNotSpill |= SPILLFLAG_NOSYNC; /* This trick assumes that both the page-size and sector-size are ** an integer power of 2. It sets variable pg1 to the identifier ** of the first page of the sector pPg is located on. */ pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1; nPageCount = pPager->dbSize; if( pPg->pgno>nPageCount ){ nPage = (pPg->pgno - pg1)+1; }else if( (pg1+nPagePerSector-1)>nPageCount ){ nPage = nPageCount+1-pg1; }else{ nPage = nPagePerSector; } assert(nPage>0); assert(pg1<=pPg->pgno); assert((pg1+nPage)>pPg->pgno); for(ii=0; iipgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){ if( pg!=PAGER_MJ_PGNO(pPager) ){ rc = sqlite3PagerGet(pPager, pg, &pPage); if( rc==SQLITE_OK ){ rc = pager_write(pPage); if( pPage->flags&PGHDR_NEED_SYNC ){ needSync = 1; } sqlite3PagerUnrefNotNull(pPage); } } }else if( (pPage = sqlite3PagerLookup(pPager, pg))!=0 ){ if( pPage->flags&PGHDR_NEED_SYNC ){ needSync = 1; } sqlite3PagerUnrefNotNull(pPage); } } /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages ** starting at pg1, then it needs to be set for all of them. Because ** writing to any of these nPage pages may damage the others, the ** journal file must contain sync()ed copies of all of them ** before any of them can be written out to the database file. */ if( rc==SQLITE_OK && needSync ){ for(ii=0; iiflags |= PGHDR_NEED_SYNC; sqlite3PagerUnrefNotNull(pPage); } } } assert( (pPager->doNotSpill & SPILLFLAG_NOSYNC)!=0 ); pPager->doNotSpill &= ~SPILLFLAG_NOSYNC; return rc; } /* ** Mark a data page as writeable. This routine must be called before ** making changes to a page. The caller must check the return value ** of this function and be careful not to change any page data unless ** this routine returns SQLITE_OK. ** ** The difference between this function and pager_write() is that this ** function also deals with the special case where 2 or more pages ** fit on a single disk sector. In this case all co-resident pages ** must have been written to the journal file before returning. ** ** If an error occurs, SQLITE_NOMEM or an IO error code is returned ** as appropriate. Otherwise, SQLITE_OK. */ SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ Pager *pPager = pPg->pPager; assert( (pPg->flags & PGHDR_MMAP)==0 ); assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( pPager->eState!=PAGER_ERROR ); assert( assert_pager_state(pPager) ); if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); return SQLITE_OK; }else if( pPager->sectorSize > (u32)pPager->pageSize ){ return pagerWriteLargeSector(pPg); }else{ return pager_write(pPg); } } /* ** Return TRUE if the page given in the argument was previously passed ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok ** to change the content of the page. */ #ifndef NDEBUG SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){ return pPg->flags & PGHDR_WRITEABLE; } #endif /* ** A call to this routine tells the pager that it is not necessary to ** write the information on page pPg back to the disk, even though ** that page might be marked as dirty. This happens, for example, when ** the page has been added as a leaf of the freelist and so its ** content no longer matters. ** ** The overlying software layer calls this routine when all of the data ** on the given page is unused. The pager marks the page as clean so ** that it does not get written to disk. ** ** Tests show that this optimization can quadruple the speed of large ** DELETE operations. */ SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){ Pager *pPager = pPg->pPager; if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){ PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) pPg->flags |= PGHDR_DONT_WRITE; pPg->flags &= ~PGHDR_WRITEABLE; pager_set_pagehash(pPg); } } /* ** This routine is called to increment the value of the database file ** change-counter, stored as a 4-byte big-endian integer starting at ** byte offset 24 of the pager file. The secondary change counter at ** 92 is also updated, as is the SQLite version number at offset 96. ** ** But this only happens if the pPager->changeCountDone flag is false. ** To avoid excess churning of page 1, the update only happens once. ** See also the pager_write_changecounter() routine that does an ** unconditional update of the change counters. ** ** If the isDirectMode flag is zero, then this is done by calling ** sqlite3PagerWrite() on page 1, then modifying the contents of the ** page data. In this case the file will be updated when the current ** transaction is committed. ** ** The isDirectMode flag may only be non-zero if the library was compiled ** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case, ** if isDirect is non-zero, then the database file is updated directly ** by writing an updated version of page 1 using a call to the ** sqlite3OsWrite() function. */ static int pager_incr_changecounter(Pager *pPager, int isDirectMode){ int rc = SQLITE_OK; assert( pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD ); assert( assert_pager_state(pPager) ); /* Declare and initialize constant integer 'isDirect'. If the ** atomic-write optimization is enabled in this build, then isDirect ** is initialized to the value passed as the isDirectMode parameter ** to this function. Otherwise, it is always set to zero. ** ** The idea is that if the atomic-write optimization is not ** enabled at compile time, the compiler can omit the tests of ** 'isDirect' below, as well as the block enclosed in the ** "if( isDirect )" condition. */ #ifndef SQLITE_ENABLE_ATOMIC_WRITE # define DIRECT_MODE 0 assert( isDirectMode==0 ); UNUSED_PARAMETER(isDirectMode); #else # define DIRECT_MODE isDirectMode #endif if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){ PgHdr *pPgHdr; /* Reference to page 1 */ assert( !pPager->tempFile && isOpen(pPager->fd) ); /* Open page 1 of the file for writing. */ rc = sqlite3PagerGet(pPager, 1, &pPgHdr); assert( pPgHdr==0 || rc==SQLITE_OK ); /* If page one was fetched successfully, and this function is not ** operating in direct-mode, make page 1 writable. When not in ** direct mode, page 1 is always held in cache and hence the PagerGet() ** above is always successful - hence the ALWAYS on rc==SQLITE_OK. */ if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){ rc = sqlite3PagerWrite(pPgHdr); } if( rc==SQLITE_OK ){ /* Actually do the update of the change counter */ pager_write_changecounter(pPgHdr); /* If running in direct mode, write the contents of page 1 to the file. */ if( DIRECT_MODE ){ const void *zBuf; assert( pPager->dbFileSize>0 ); CODEC2(pPager, pPgHdr->pData, 1, 6, rc=SQLITE_NOMEM, zBuf); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0); pPager->aStat[PAGER_STAT_WRITE]++; } if( rc==SQLITE_OK ){ /* Update the pager's copy of the change-counter. Otherwise, the ** next time a read transaction is opened the cache will be ** flushed (as the change-counter values will not match). */ const void *pCopy = (const void *)&((const char *)zBuf)[24]; memcpy(&pPager->dbFileVers, pCopy, sizeof(pPager->dbFileVers)); pPager->changeCountDone = 1; } }else{ pPager->changeCountDone = 1; } } /* Release the page reference. */ sqlite3PagerUnref(pPgHdr); } return rc; } /* ** Sync the database file to disk. This is a no-op for in-memory databases ** or pages with the Pager.noSync flag set. ** ** If successful, or if called on a pager for which it is a no-op, this ** function returns SQLITE_OK. Otherwise, an IO error code is returned. */ SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zMaster){ int rc = SQLITE_OK; if( isOpen(pPager->fd) ){ void *pArg = (void*)zMaster; rc = sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC, pArg); if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; } if( rc==SQLITE_OK && !pPager->noSync ){ rc = sqlite3OsSync(pPager->fd, pPager->syncFlags); } return rc; } /* ** This function may only be called while a write-transaction is active in ** rollback. If the connection is in WAL mode, this call is a no-op. ** Otherwise, if the connection does not already have an EXCLUSIVE lock on ** the database file, an attempt is made to obtain one. ** ** If the EXCLUSIVE lock is already held or the attempt to obtain it is ** successful, or the connection is in WAL mode, SQLITE_OK is returned. ** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is ** returned. */ SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager){ int rc = SQLITE_OK; assert( pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD || pPager->eState==PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); if( 0==pagerUseWal(pPager) ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } return rc; } /* ** Sync the database file for the pager pPager. zMaster points to the name ** of a master journal file that should be written into the individual ** journal file. zMaster may be NULL, which is interpreted as no master ** journal (a single database transaction). ** ** This routine ensures that: ** ** * The database file change-counter is updated, ** * the journal is synced (unless the atomic-write optimization is used), ** * all dirty pages are written to the database file, ** * the database file is truncated (if required), and ** * the database file synced. ** ** The only thing that remains to commit the transaction is to finalize ** (delete, truncate or zero the first part of) the journal file (or ** delete the master journal file if specified). ** ** Note that if zMaster==NULL, this does not overwrite a previous value ** passed to an sqlite3PagerCommitPhaseOne() call. ** ** If the final parameter - noSync - is true, then the database file itself ** is not synced. The caller must call sqlite3PagerSync() directly to ** sync the database file before calling CommitPhaseTwo() to delete the ** journal file in this case. */ SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( Pager *pPager, /* Pager object */ const char *zMaster, /* If not NULL, the master journal name */ int noSync /* True to omit the xSync on the db file */ ){ int rc = SQLITE_OK; /* Return code */ assert( pPager->eState==PAGER_WRITER_LOCKED || pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD || pPager->eState==PAGER_ERROR ); assert( assert_pager_state(pPager) ); /* If a prior error occurred, report that error again. */ if( NEVER(pPager->errCode) ) return pPager->errCode; PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n", pPager->zFilename, zMaster, pPager->dbSize)); /* If no database changes have been made, return early. */ if( pPager->eStatepPCache); PgHdr *pPageOne = 0; if( pList==0 ){ /* Must have at least one page for the WAL commit flag. ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */ rc = sqlite3PagerGet(pPager, 1, &pPageOne); pList = pPageOne; pList->pDirty = 0; } assert( rc==SQLITE_OK ); if( ALWAYS(pList) ){ rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); } sqlite3PagerUnref(pPageOne); if( rc==SQLITE_OK ){ sqlite3PcacheCleanAll(pPager->pPCache); } }else{ /* The following block updates the change-counter. Exactly how it ** does this depends on whether or not the atomic-update optimization ** was enabled at compile time, and if this transaction meets the ** runtime criteria to use the operation: ** ** * The file-system supports the atomic-write property for ** blocks of size page-size, and ** * This commit is not part of a multi-file transaction, and ** * Exactly one page has been modified and store in the journal file. ** ** If the optimization was not enabled at compile time, then the ** pager_incr_changecounter() function is called to update the change ** counter in 'indirect-mode'. If the optimization is compiled in but ** is not applicable to this transaction, call sqlite3JournalCreate() ** to make sure the journal file has actually been created, then call ** pager_incr_changecounter() to update the change-counter in indirect ** mode. ** ** Otherwise, if the optimization is both enabled and applicable, ** then call pager_incr_changecounter() to update the change-counter ** in 'direct' mode. In this case the journal file will never be ** created for this transaction. */ #ifdef SQLITE_ENABLE_ATOMIC_WRITE PgHdr *pPg; assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF || pPager->journalMode==PAGER_JOURNALMODE_WAL ); if( !zMaster && isOpen(pPager->jfd) && pPager->journalOff==jrnlBufferSize(pPager) && pPager->dbSize>=pPager->dbOrigSize && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) ){ /* Update the db file change counter via the direct-write method. The ** following call will modify the in-memory representation of page 1 ** to include the updated change counter and then write page 1 ** directly to the database file. Because of the atomic-write ** property of the host file-system, this is safe. */ rc = pager_incr_changecounter(pPager, 1); }else{ rc = sqlite3JournalCreate(pPager->jfd); if( rc==SQLITE_OK ){ rc = pager_incr_changecounter(pPager, 0); } } #else rc = pager_incr_changecounter(pPager, 0); #endif if( rc!=SQLITE_OK ) goto commit_phase_one_exit; /* Write the master journal name into the journal file. If a master ** journal file name has already been written to the journal file, ** or if zMaster is NULL (no master journal), then this call is a no-op. */ rc = writeMasterJournal(pPager, zMaster); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; /* Sync the journal file and write all dirty pages to the database. ** If the atomic-update optimization is being used, this sync will not ** create the journal file or perform any real IO. ** ** Because the change-counter page was just modified, unless the ** atomic-update optimization is used it is almost certain that the ** journal requires a sync here. However, in locking_mode=exclusive ** on a system under memory pressure it is just possible that this is ** not the case. In this case it is likely enough that the redundant ** xSync() call will be changed to a no-op by the OS anyhow. */ rc = syncJournal(pPager, 0); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache)); if( rc!=SQLITE_OK ){ assert( rc!=SQLITE_IOERR_BLOCKED ); goto commit_phase_one_exit; } sqlite3PcacheCleanAll(pPager->pPCache); /* If the file on disk is smaller than the database image, use ** pager_truncate to grow the file here. This can happen if the database ** image was extended as part of the current transaction and then the ** last page in the db image moved to the free-list. In this case the ** last page is never written out to disk, leaving the database file ** undersized. Fix this now if it is the case. */ if( pPager->dbSize>pPager->dbFileSize ){ Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager)); assert( pPager->eState==PAGER_WRITER_DBMOD ); rc = pager_truncate(pPager, nNew); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; } /* Finally, sync the database file. */ if( !noSync ){ rc = sqlite3PagerSync(pPager, zMaster); } IOTRACE(("DBSYNC %p\n", pPager)) } } commit_phase_one_exit: if( rc==SQLITE_OK && !pagerUseWal(pPager) ){ pPager->eState = PAGER_WRITER_FINISHED; } return rc; } /* ** When this function is called, the database file has been completely ** updated to reflect the changes made by the current transaction and ** synced to disk. The journal file still exists in the file-system ** though, and if a failure occurs at this point it will eventually ** be used as a hot-journal and the current transaction rolled back. ** ** This function finalizes the journal file, either by deleting, ** truncating or partially zeroing it, so that it cannot be used ** for hot-journal rollback. Once this is done the transaction is ** irrevocably committed. ** ** If an error occurs, an IO error code is returned and the pager ** moves into the error state. Otherwise, SQLITE_OK is returned. */ SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ /* This routine should not be called if a prior error has occurred. ** But if (due to a coding error elsewhere in the system) it does get ** called, just return the same error code without doing anything. */ if( NEVER(pPager->errCode) ) return pPager->errCode; assert( pPager->eState==PAGER_WRITER_LOCKED || pPager->eState==PAGER_WRITER_FINISHED || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD) ); assert( assert_pager_state(pPager) ); /* An optimization. If the database was not actually modified during ** this transaction, the pager is running in exclusive-mode and is ** using persistent journals, then this function is a no-op. ** ** The start of the journal file currently contains a single journal ** header with the nRec field set to 0. If such a journal is used as ** a hot-journal during hot-journal rollback, 0 changes will be made ** to the database file. So there is no need to zero the journal ** header. Since the pager is in exclusive mode, there is no need ** to drop any locks either. */ if( pPager->eState==PAGER_WRITER_LOCKED && pPager->exclusiveMode && pPager->journalMode==PAGER_JOURNALMODE_PERSIST ){ assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff ); pPager->eState = PAGER_READER; return SQLITE_OK; } PAGERTRACE(("COMMIT %d\n", PAGERID(pPager))); pPager->iDataVersion++; rc = pager_end_transaction(pPager, pPager->setMaster, 1); return pager_error(pPager, rc); } /* ** If a write transaction is open, then all changes made within the ** transaction are reverted and the current write-transaction is closed. ** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR ** state if an error occurs. ** ** If the pager is already in PAGER_ERROR state when this function is called, ** it returns Pager.errCode immediately. No work is performed in this case. ** ** Otherwise, in rollback mode, this function performs two functions: ** ** 1) It rolls back the journal file, restoring all database file and ** in-memory cache pages to the state they were in when the transaction ** was opened, and ** ** 2) It finalizes the journal file, so that it is not used for hot ** rollback at any point in the future. ** ** Finalization of the journal file (task 2) is only performed if the ** rollback is successful. ** ** In WAL mode, all cache-entries containing data modified within the ** current transaction are either expelled from the cache or reverted to ** their pre-transaction state by re-reading data from the database or ** WAL files. The WAL transaction is then closed. */ SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); /* PagerRollback() is a no-op if called in READER or OPEN state. If ** the pager is already in the ERROR state, the rollback is not ** attempted here. Instead, the error code is returned to the caller. */ assert( assert_pager_state(pPager) ); if( pPager->eState==PAGER_ERROR ) return pPager->errCode; if( pPager->eState<=PAGER_READER ) return SQLITE_OK; if( pagerUseWal(pPager) ){ int rc2; rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); rc2 = pager_end_transaction(pPager, pPager->setMaster, 0); if( rc==SQLITE_OK ) rc = rc2; }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){ int eState = pPager->eState; rc = pager_end_transaction(pPager, 0, 0); if( eState>PAGER_WRITER_LOCKED ){ /* This can happen using journal_mode=off. Move the pager to the error ** state to indicate that the contents of the cache may not be trusted. ** Any active readers will get SQLITE_ABORT. */ pPager->errCode = SQLITE_ABORT; pPager->eState = PAGER_ERROR; return rc; } }else{ rc = pager_playback(pPager, 0); } assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK ); assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR || rc==SQLITE_CANTOPEN ); /* If an error occurs during a ROLLBACK, we can no longer trust the pager ** cache. So call pager_error() on the way out to make any error persistent. */ return pager_error(pPager, rc); } /* ** Return TRUE if the database file is opened read-only. Return FALSE ** if the database is (in theory) writable. */ SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){ return pPager->readOnly; } #ifdef SQLITE_DEBUG /* ** Return the sum of the reference counts for all pages held by pPager. */ SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){ return sqlite3PcacheRefCount(pPager->pPCache); } #endif #if 0 /* ** Return the approximate number of bytes of memory currently ** used by the pager and its associated cache. */ SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager *pPager){ int perPageSize = pPager->pageSize + pPager->nExtra + sizeof(PgHdr) + 5*sizeof(void*); return perPageSize*sqlite3PcachePagecount(pPager->pPCache) + sqlite3MallocSize(pPager) + pPager->pageSize; } #endif /* ** Return the number of references to the specified page. */ SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage *pPage){ return sqlite3PcachePageRefcount(pPage); } #ifdef SQLITE_TEST /* ** This routine is used for testing and analysis only. */ SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){ static int a[11]; a[0] = sqlite3PcacheRefCount(pPager->pPCache); a[1] = sqlite3PcachePagecount(pPager->pPCache); a[2] = sqlite3PcacheGetCachesize(pPager->pPCache); a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize; a[4] = pPager->eState; a[5] = pPager->errCode; a[6] = pPager->aStat[PAGER_STAT_HIT]; a[7] = pPager->aStat[PAGER_STAT_MISS]; a[8] = 0; /* Used to be pPager->nOvfl */ a[9] = pPager->nRead; a[10] = pPager->aStat[PAGER_STAT_WRITE]; return a; } #endif #if 0 /* ** Parameter eStat must be either SQLITE_DBSTATUS_CACHE_HIT or ** SQLITE_DBSTATUS_CACHE_MISS. Before returning, *pnVal is incremented by the ** current cache hit or miss count, according to the value of eStat. If the ** reset parameter is non-zero, the cache hit or miss count is zeroed before ** returning. */ SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){ assert( eStat==SQLITE_DBSTATUS_CACHE_HIT || eStat==SQLITE_DBSTATUS_CACHE_MISS || eStat==SQLITE_DBSTATUS_CACHE_WRITE ); assert( SQLITE_DBSTATUS_CACHE_HIT+1==SQLITE_DBSTATUS_CACHE_MISS ); assert( SQLITE_DBSTATUS_CACHE_HIT+2==SQLITE_DBSTATUS_CACHE_WRITE ); assert( PAGER_STAT_HIT==0 && PAGER_STAT_MISS==1 && PAGER_STAT_WRITE==2 ); *pnVal += pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT]; if( reset ){ pPager->aStat[eStat - SQLITE_DBSTATUS_CACHE_HIT] = 0; } } #endif /* ** Check that there are at least nSavepoint savepoints open. If there are ** currently less than nSavepoints open, then open one or more savepoints ** to make up the difference. If the number of savepoints is already ** equal to nSavepoint, then this function is a no-op. ** ** If a memory allocation fails, SQLITE_NOMEM is returned. If an error ** occurs while opening the sub-journal file, then an IO error code is ** returned. Otherwise, SQLITE_OK. */ static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ int rc = SQLITE_OK; /* Return code */ int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ int ii; /* Iterator variable */ PagerSavepoint *aNew; /* New Pager.aSavepoint array */ assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); assert( nSavepoint>nCurrent && pPager->useJournal ); /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM ** if the allocation fails. Otherwise, zero the new portion in case a ** malloc failure occurs while populating it in the for(...) loop below. */ aNew = (PagerSavepoint *)sqlite3Realloc( pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint ); if( !aNew ){ return SQLITE_NOMEM; } memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); pPager->aSavepoint = aNew; /* Populate the PagerSavepoint structures just allocated. */ for(ii=nCurrent; iidbSize; if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ aNew[ii].iOffset = pPager->journalOff; }else{ aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); } aNew[ii].iSubRec = pPager->nSubRec; aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); if( !aNew[ii].pInSavepoint ){ return SQLITE_NOMEM; } if( pagerUseWal(pPager) ){ sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); } pPager->nSavepoint = ii+1; } assert( pPager->nSavepoint==nSavepoint ); assertTruncateConstraint(pPager); return rc; } SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ return pagerOpenSavepoint(pPager, nSavepoint); }else{ return SQLITE_OK; } } /* ** This function is called to rollback or release (commit) a savepoint. ** The savepoint to release or rollback need not be the most recently ** created savepoint. ** ** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE. ** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with ** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes ** that have occurred since the specified savepoint was created. ** ** The savepoint to rollback or release is identified by parameter ** iSavepoint. A value of 0 means to operate on the outermost savepoint ** (the first created). A value of (Pager.nSavepoint-1) means operate ** on the most recently created savepoint. If iSavepoint is greater than ** (Pager.nSavepoint-1), then this function is a no-op. ** ** If a negative value is passed to this function, then the current ** transaction is rolled back. This is different to calling ** sqlite3PagerRollback() because this function does not terminate ** the transaction or unlock the database, it just restores the ** contents of the database to its original state. ** ** In any case, all savepoints with an index greater than iSavepoint ** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE), ** then savepoint iSavepoint is also destroyed. ** ** This function may return SQLITE_NOMEM if a memory allocation fails, ** or an IO error code if an IO error occurs while rolling back a ** savepoint. If no errors occur, SQLITE_OK is returned. */ SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ int rc = pPager->errCode; /* Return code */ assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK ); if( rc==SQLITE_OK && iSavepointnSavepoint ){ int ii; /* Iterator variable */ int nNew; /* Number of remaining savepoints after this op. */ /* Figure out how many savepoints will still be active after this ** operation. Store this value in nNew. Then free resources associated ** with any savepoints that are destroyed by this operation. */ nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1); for(ii=nNew; iinSavepoint; ii++){ sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint); } pPager->nSavepoint = nNew; /* If this is a release of the outermost savepoint, truncate ** the sub-journal to zero bytes in size. */ if( op==SAVEPOINT_RELEASE ){ if( nNew==0 && isOpen(pPager->sjfd) ){ /* Only truncate if it is an in-memory sub-journal. */ if( sqlite3IsMemJournal(pPager->sjfd) ){ rc = sqlite3OsTruncate(pPager->sjfd, 0); assert( rc==SQLITE_OK ); } pPager->nSubRec = 0; } } /* Else this is a rollback operation, playback the specified savepoint. ** If this is a temp-file, it is possible that the journal file has ** not yet been opened. In this case there have been no changes to ** the database file, so the playback operation can be skipped. */ else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){ PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; rc = pagerPlaybackSavepoint(pPager, pSavepoint); assert(rc!=SQLITE_DONE); } } return rc; } /* ** Return the full pathname of the database file. ** ** Except, if the pager is in-memory only, then return an empty string if ** nullIfMemDb is true. This routine is called with nullIfMemDb==1 when ** used to report the filename to the user, for compatibility with legacy ** behavior. But when the Btree needs to know the filename for matching to ** shared cache, it uses nullIfMemDb==0 so that in-memory databases can ** participate in shared-cache. */ SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager *pPager, int nullIfMemDb){ return (nullIfMemDb && pPager->memDb) ? "" : pPager->zFilename; } /* ** Return the VFS structure for the pager. */ SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ return pPager->pVfs; } #ifdef SQLITE_DIRECT_OVERFLOW_READ /* ** Return the file handle for the database file associated ** with the pager. This might return NULL if the file has ** not yet been opened. */ SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){ return pPager->fd; } #endif /* ** Return the full pathname of the journal file. */ SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager *pPager){ return pPager->zJournal; } /* ** Return true if fsync() calls are disabled for this pager. Return FALSE ** if fsync()s are executed normally. */ SQLITE_PRIVATE int sqlite3PagerNosync(Pager *pPager){ return pPager->noSync; } #ifdef SQLITE_HAS_CODEC /* ** Set or retrieve the codec for this pager */ SQLITE_PRIVATE void sqlite3PagerSetCodec( Pager *pPager, void *(*xCodec)(void*,void*,Pgno,int), void (*xCodecSizeChng)(void*,int,int), void (*xCodecFree)(void*), void *pCodec ){ if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec); pPager->xCodec = pPager->memDb ? 0 : xCodec; pPager->xCodecSizeChng = xCodecSizeChng; pPager->xCodecFree = xCodecFree; pPager->pCodec = pCodec; pagerReportSize(pPager); } SQLITE_PRIVATE void *sqlite3PagerGetCodec(Pager *pPager){ return pPager->pCodec; } /* ** This function is called by the wal module when writing page content ** into the log file. ** ** This function returns a pointer to a buffer containing the encrypted ** page content. If a malloc fails, this function may return NULL. */ SQLITE_PRIVATE void *sqlite3PagerCodec(PgHdr *pPg){ void *aData = 0; CODEC2(pPg->pPager, pPg->pData, pPg->pgno, 6, return 0, aData); return aData; } /* ** Return the current pager state */ SQLITE_PRIVATE int sqlite3PagerState(Pager *pPager){ return pPager->eState; } #endif /* SQLITE_HAS_CODEC */ #ifndef SQLITE_OMIT_AUTOVACUUM /* ** Move the page pPg to location pgno in the file. ** ** There must be no references to the page previously located at ** pgno (which we call pPgOld) though that page is allowed to be ** in cache. If the page previously located at pgno is not already ** in the rollback journal, it is not put there by by this routine. ** ** References to the page pPg remain valid. Updating any ** meta-data associated with pPg (i.e. data stored in the nExtra bytes ** allocated along with the page) is the responsibility of the caller. ** ** A transaction must be active when this routine is called. It used to be ** required that a statement transaction was not active, but this restriction ** has been removed (CREATE INDEX needs to move a page when a statement ** transaction is active). ** ** If the fourth argument, isCommit, is non-zero, then this page is being ** moved as part of a database reorganization just before the transaction ** is being committed. In this case, it is guaranteed that the database page ** pPg refers to will not be written to again within this transaction. ** ** This function may return SQLITE_NOMEM or an IO error code if an error ** occurs. Otherwise, it returns SQLITE_OK. */ SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){ PgHdr *pPgOld; /* The page being overwritten. */ Pgno needSyncPgno = 0; /* Old value of pPg->pgno, if sync is required */ int rc; /* Return code */ assert( pPg->nRef>0 ); assert( pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD ); assert( assert_pager_state(pPager) ); /* If the page being moved is dirty and has not been saved by the latest ** savepoint, then save the current contents of the page into the ** sub-journal now. This is required to handle the following scenario: ** ** BEGIN; ** ** SAVEPOINT one; ** ** ROLLBACK TO one; ** ** If page X were not written to the sub-journal here, it would not ** be possible to restore its contents when the "ROLLBACK TO one" ** statement were is processed. ** ** subjournalPage() may need to allocate space to store pPg->pgno into ** one or more savepoint bitvecs. This is the reason this function ** may return SQLITE_NOMEM. */ if( (pPg->flags & PGHDR_DIRTY)!=0 && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) ){ return rc; } PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno)); IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno)) /* If the journal needs to be sync()ed before page pPg->pgno can ** be written to, store pPg->pgno in local variable needSyncPgno. ** ** If the isCommit flag is set, there is no need to remember that ** the journal needs to be sync()ed before database page pPg->pgno ** can be written to. The caller has already promised not to write to it. */ if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){ needSyncPgno = pPg->pgno; assert( pPager->journalMode==PAGER_JOURNALMODE_OFF || pageInJournal(pPager, pPg) || pPg->pgno>pPager->dbOrigSize ); assert( pPg->flags&PGHDR_DIRTY ); } /* If the cache contains a page with page-number pgno, remove it ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for ** page pgno before the 'move' operation, it needs to be retained ** for the page moved there. */ pPg->flags &= ~PGHDR_NEED_SYNC; pPgOld = sqlite3PagerLookup(pPager, pgno); assert( !pPgOld || pPgOld->nRef==1 ); if( pPgOld ){ pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC); sqlite3PcacheDrop(pPgOld); } sqlite3PcacheMove(pPg, pgno); sqlite3PcacheMakeDirty(pPg); if( needSyncPgno ){ /* If needSyncPgno is non-zero, then the journal file needs to be ** sync()ed before any data is written to database file page needSyncPgno. ** Currently, no such page exists in the page-cache and the ** "is journaled" bitvec flag has been set. This needs to be remedied by ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC ** flag. ** ** If the attempt to load the page into the page-cache fails, (due ** to a malloc() or IO failure), clear the bit in the pInJournal[] ** array. Otherwise, if the page is loaded and written again in ** this transaction, it may be written to the database file before ** it is synced into the journal file. This way, it may end up in ** the journal file twice, but that is not a problem. */ PgHdr *pPgHdr; rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr); if( rc!=SQLITE_OK ){ if( needSyncPgno<=pPager->dbOrigSize ){ assert( pPager->pTmpSpace!=0 ); sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace); } return rc; } pPgHdr->flags |= PGHDR_NEED_SYNC; sqlite3PcacheMakeDirty(pPgHdr); sqlite3PagerUnrefNotNull(pPgHdr); } return SQLITE_OK; } #endif /* ** The page handle passed as the first argument refers to a dirty page ** with a page number other than iNew. This function changes the page's ** page number to iNew and sets the value of the PgHdr.flags field to ** the value passed as the third parameter. */ SQLITE_PRIVATE void sqlite3PagerRekey(DbPage *pPg, Pgno iNew, u16 flags){ assert( pPg->pgno!=iNew ); pPg->flags = flags; sqlite3PcacheMove(pPg, iNew); } /* ** Return a pointer to the data for the specified page. */ SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *pPg){ assert( pPg->nRef>0 || pPg->pPager->memDb ); return pPg->pData; } /* ** Return a pointer to the Pager.nExtra bytes of "extra" space ** allocated along with the specified page. */ SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *pPg){ return pPg->pExtra; } #if 0 /* ** Get/set the locking-mode for this pager. Parameter eMode must be one ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then ** the locking-mode is set to the value specified. ** ** The returned value is either PAGER_LOCKINGMODE_NORMAL or ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated) ** locking-mode. */ SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *pPager, int eMode){ assert( eMode==PAGER_LOCKINGMODE_QUERY || eMode==PAGER_LOCKINGMODE_NORMAL || eMode==PAGER_LOCKINGMODE_EXCLUSIVE ); assert( PAGER_LOCKINGMODE_QUERY<0 ); assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 ); assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) ); if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){ pPager->exclusiveMode = (u8)eMode; } return (int)pPager->exclusiveMode; } /* ** Set the journal-mode for this pager. Parameter eMode must be one of: ** ** PAGER_JOURNALMODE_DELETE ** PAGER_JOURNALMODE_TRUNCATE ** PAGER_JOURNALMODE_PERSIST ** PAGER_JOURNALMODE_OFF ** PAGER_JOURNALMODE_MEMORY ** PAGER_JOURNALMODE_WAL ** ** The journalmode is set to the value specified if the change is allowed. ** The change may be disallowed for the following reasons: ** ** * An in-memory database can only have its journal_mode set to _OFF ** or _MEMORY. ** ** * Temporary databases cannot have _WAL journalmode. ** ** The returned indicate the current (possibly updated) journal-mode. */ SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ u8 eOld = pPager->journalMode; /* Prior journalmode */ #ifdef SQLITE_DEBUG /* The print_pager_state() routine is intended to be used by the debugger ** only. We invoke it once here to suppress a compiler warning. */ print_pager_state(pPager); #endif /* The eMode parameter is always valid */ assert( eMode==PAGER_JOURNALMODE_DELETE || eMode==PAGER_JOURNALMODE_TRUNCATE || eMode==PAGER_JOURNALMODE_PERSIST || eMode==PAGER_JOURNALMODE_OFF || eMode==PAGER_JOURNALMODE_WAL || eMode==PAGER_JOURNALMODE_MEMORY ); /* This routine is only called from the OP_JournalMode opcode, and ** the logic there will never allow a temporary file to be changed ** to WAL mode. */ assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL ); if( eMode!=eOld ){ /* Change the journal mode. */ assert( pPager->eState!=PAGER_ERROR ); pPager->journalMode = (u8)eMode; /* When transistioning from TRUNCATE or PERSIST to any other journal ** mode except WAL, unless the pager is in locking_mode=exclusive mode, ** delete the journal file. */ assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 ); assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 ); assert( (PAGER_JOURNALMODE_DELETE & 5)==0 ); assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 ); assert( (PAGER_JOURNALMODE_OFF & 5)==0 ); assert( (PAGER_JOURNALMODE_WAL & 5)==5 ); assert( isOpen(pPager->fd) || pPager->exclusiveMode ); if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){ /* In this case we would like to delete the journal file. If it is ** not possible, then that is not a problem. Deleting the journal file ** here is an optimization only. ** ** Before deleting the journal file, obtain a RESERVED lock on the ** database file. This ensures that the journal file is not deleted ** while it is in use by some other client. */ sqlite3OsClose(pPager->jfd); if( pPager->eLock>=RESERVED_LOCK ){ sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); }else{ int rc = SQLITE_OK; int state = pPager->eState; assert( state==PAGER_OPEN || state==PAGER_READER ); if( state==PAGER_OPEN ){ rc = sqlite3PagerSharedLock(pPager); } if( pPager->eState==PAGER_READER ){ assert( rc==SQLITE_OK ); rc = pagerLockDb(pPager, RESERVED_LOCK); } if( rc==SQLITE_OK ){ sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0); } if( rc==SQLITE_OK && state==PAGER_READER ){ pagerUnlockDb(pPager, SHARED_LOCK); }else if( state==PAGER_OPEN ){ pager_unlock(pPager); } assert( state==pPager->eState ); } }else if( eMode==PAGER_JOURNALMODE_OFF ){ sqlite3OsClose(pPager->jfd); } } /* Return the new journal mode */ return (int)pPager->journalMode; } /* ** Return the current journal mode. */ SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){ return (int)pPager->journalMode; } /* ** Return TRUE if the pager is in a state where it is OK to change the ** journalmode. Journalmode changes can only happen when the database ** is unmodified. */ SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){ assert( assert_pager_state(pPager) ); if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0; if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0; return 1; } /* ** Get/set the size-limit used for persistent journal files. ** ** Setting the size limit to -1 means no limit is enforced. ** An attempt to set a limit smaller than -1 is a no-op. */ SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){ if( iLimit>=-1 ){ pPager->journalSizeLimit = iLimit; sqlite3WalLimit(pPager->pWal, iLimit); } return pPager->journalSizeLimit; } #endif #ifndef SQLITE_OMIT_VACUUM #if 0 /* ** Clear the pager cache. */ SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *pPager){ if( pPager->tempFile==0 ) pager_reset(pPager); } #endif #endif #ifndef SQLITE_OMIT_WAL /* ** This function is called when the user invokes "PRAGMA wal_checkpoint", ** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint() ** or wal_blocking_checkpoint() API functions. ** ** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. */ SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int eMode, int *pnLog, int *pnCkpt){ int rc = SQLITE_OK; if( pPager->pWal ){ rc = sqlite3WalCheckpoint(pPager->pWal, eMode, (eMode==SQLITE_CHECKPOINT_PASSIVE ? 0 : pPager->xBusyHandler), pPager->pBusyHandlerArg, pPager->ckptSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace, pnLog, pnCkpt ); } return rc; } #if 0 SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager){ return sqlite3WalCallback(pPager->pWal); } #endif /* ** Return true if the underlying VFS for the given pager supports the ** primitives necessary for write-ahead logging. */ SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){ const sqlite3_io_methods *pMethods = pPager->fd->pMethods; return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap); } /* ** Attempt to take an exclusive lock on the database file. If a PENDING lock ** is obtained instead, immediately release it. */ static int pagerExclusiveLock(Pager *pPager){ int rc; /* Return code */ assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); rc = pagerLockDb(pPager, EXCLUSIVE_LOCK); if( rc!=SQLITE_OK ){ /* If the attempt to grab the exclusive lock failed, release the ** pending lock that may have been obtained instead. */ pagerUnlockDb(pPager, SHARED_LOCK); } return rc; } /* ** Call sqlite3WalOpen() to open the WAL handle. If the pager is in ** exclusive-locking mode when this function is called, take an EXCLUSIVE ** lock on the database file and use heap-memory to store the wal-index ** in. Otherwise, use the normal shared-memory. */ static int pagerOpenWal(Pager *pPager){ int rc = SQLITE_OK; assert( pPager->pWal==0 && pPager->tempFile==0 ); assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK ); /* If the pager is already in exclusive-mode, the WAL module will use ** heap-memory for the wal-index instead of the VFS shared-memory ** implementation. Take the exclusive lock now, before opening the WAL ** file, to make sure this is safe. */ if( pPager->exclusiveMode ){ rc = pagerExclusiveLock(pPager); } /* Open the connection to the log file. If this operation fails, ** (e.g. due to malloc() failure), return an error code. */ if( rc==SQLITE_OK ){ rc = sqlite3WalOpen(pPager->pVfs, pPager->fd, pPager->zWal, pPager->exclusiveMode, pPager->journalSizeLimit, &pPager->pWal ); } pagerFixMaplimit(pPager); return rc; } /* ** The caller must be holding a SHARED lock on the database file to call ** this function. ** ** If the pager passed as the first argument is open on a real database ** file (not a temp file or an in-memory database), and the WAL file ** is not already open, make an attempt to open it now. If successful, ** return SQLITE_OK. If an error occurs or the VFS used by the pager does ** not support the xShmXXX() methods, return an error code. *pbOpen is ** not modified in either case. ** ** If the pager is open on a temp-file (or in-memory database), or if ** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK ** without doing anything. */ SQLITE_PRIVATE int sqlite3PagerOpenWal( Pager *pPager, /* Pager object */ int *pbOpen /* OUT: Set to true if call is a no-op */ ){ int rc = SQLITE_OK; /* Return code */ assert( assert_pager_state(pPager) ); assert( pPager->eState==PAGER_OPEN || pbOpen ); assert( pPager->eState==PAGER_READER || !pbOpen ); assert( pbOpen==0 || *pbOpen==0 ); assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) ); if( !pPager->tempFile && !pPager->pWal ){ if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN; /* Close any rollback journal previously open */ sqlite3OsClose(pPager->jfd); rc = pagerOpenWal(pPager); if( rc==SQLITE_OK ){ pPager->journalMode = PAGER_JOURNALMODE_WAL; pPager->eState = PAGER_OPEN; } }else{ *pbOpen = 1; } return rc; } /* ** This function is called to close the connection to the log file prior ** to switching from WAL to rollback mode. ** ** Before closing the log file, this function attempts to take an ** EXCLUSIVE lock on the database file. If this cannot be obtained, an ** error (SQLITE_BUSY) is returned and the log connection is not closed. ** If successful, the EXCLUSIVE lock is not released before returning. */ #if 0 SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){ int rc = SQLITE_OK; assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); /* If the log file is not already open, but does exist in the file-system, ** it may need to be checkpointed before the connection can switch to ** rollback mode. Open it now so this can happen. */ if( !pPager->pWal ){ int logexists = 0; rc = pagerLockDb(pPager, SHARED_LOCK); if( rc==SQLITE_OK ){ rc = sqlite3OsAccess( pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists ); } if( rc==SQLITE_OK && logexists ){ rc = pagerOpenWal(pPager); } } /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on ** the database file, the log and log-summary files will be deleted. */ if( rc==SQLITE_OK && pPager->pWal ){ rc = pagerExclusiveLock(pPager); if( rc==SQLITE_OK ){ rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, (u8*)pPager->pTmpSpace); pPager->pWal = 0; pagerFixMaplimit(pPager); } } return rc; } #endif #endif /* !SQLITE_OMIT_WAL */ #ifdef SQLITE_ENABLE_ZIPVFS /* ** A read-lock must be held on the pager when this function is called. If ** the pager is in WAL mode and the WAL file currently contains one or more ** frames, return the size in bytes of the page images stored within the ** WAL frames. Otherwise, if this is not a WAL database or the WAL file ** is empty, return 0. */ SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){ assert( pPager->eState>=PAGER_READER ); return sqlite3WalFramesize(pPager->pWal); } #endif #endif /* SQLITE_OMIT_DISKIO */ /************** End of pager.c ***********************************************/ /************** Begin file wal.c *********************************************/ /* ** 2010 February 1 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains the implementation of a write-ahead log (WAL) used in ** "journal_mode=WAL" mode. ** ** WRITE-AHEAD LOG (WAL) FILE FORMAT ** ** A WAL file consists of a header followed by zero or more "frames". ** Each frame records the revised content of a single page from the ** database file. All changes to the database are recorded by writing ** frames into the WAL. Transactions commit when a frame is written that ** contains a commit marker. A single WAL can and usually does record ** multiple transactions. Periodically, the content of the WAL is ** transferred back into the database file in an operation called a ** "checkpoint". ** ** A single WAL file can be used multiple times. In other words, the ** WAL can fill up with frames and then be checkpointed and then new ** frames can overwrite the old ones. A WAL always grows from beginning ** toward the end. Checksums and counters attached to each frame are ** used to determine which frames within the WAL are valid and which ** are leftovers from prior checkpoints. ** ** The WAL header is 32 bytes in size and consists of the following eight ** big-endian 32-bit unsigned integer values: ** ** 0: Magic number. 0x377f0682 or 0x377f0683 ** 4: File format version. Currently 3007000 ** 8: Database page size. Example: 1024 ** 12: Checkpoint sequence number ** 16: Salt-1, random integer incremented with each checkpoint ** 20: Salt-2, a different random integer changing with each ckpt ** 24: Checksum-1 (first part of checksum for first 24 bytes of header). ** 28: Checksum-2 (second part of checksum for first 24 bytes of header). ** ** Immediately following the wal-header are zero or more frames. Each ** frame consists of a 24-byte frame-header followed by a bytes ** of page data. The frame-header is six big-endian 32-bit unsigned ** integer values, as follows: ** ** 0: Page number. ** 4: For commit records, the size of the database image in pages ** after the commit. For all other records, zero. ** 8: Salt-1 (copied from the header) ** 12: Salt-2 (copied from the header) ** 16: Checksum-1. ** 20: Checksum-2. ** ** A frame is considered valid if and only if the following conditions are ** true: ** ** (1) The salt-1 and salt-2 values in the frame-header match ** salt values in the wal-header ** ** (2) The checksum values in the final 8 bytes of the frame-header ** exactly match the checksum computed consecutively on the ** WAL header and the first 8 bytes and the content of all frames ** up to and including the current frame. ** ** The checksum is computed using 32-bit big-endian integers if the ** magic number in the first 4 bytes of the WAL is 0x377f0683 and it ** is computed using little-endian if the magic number is 0x377f0682. ** The checksum values are always stored in the frame header in a ** big-endian format regardless of which byte order is used to compute ** the checksum. The checksum is computed by interpreting the input as ** an even number of unsigned 32-bit integers: x[0] through x[N]. The ** algorithm used for the checksum is as follows: ** ** for i from 0 to n-1 step 2: ** s0 += x[i] + s1; ** s1 += x[i+1] + s0; ** endfor ** ** Note that s0 and s1 are both weighted checksums using fibonacci weights ** in reverse order (the largest fibonacci weight occurs on the first element ** of the sequence being summed.) The s1 value spans all 32-bit ** terms of the sequence whereas s0 omits the final term. ** ** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the ** WAL is transferred into the database, then the database is VFS.xSync-ed. ** The VFS.xSync operations serve as write barriers - all writes launched ** before the xSync must complete before any write that launches after the ** xSync begins. ** ** After each checkpoint, the salt-1 value is incremented and the salt-2 ** value is randomized. This prevents old and new frames in the WAL from ** being considered valid at the same time and being checkpointing together ** following a crash. ** ** READER ALGORITHM ** ** To read a page from the database (call it page number P), a reader ** first checks the WAL to see if it contains page P. If so, then the ** last valid instance of page P that is a followed by a commit frame ** or is a commit frame itself becomes the value read. If the WAL ** contains no copies of page P that are valid and which are a commit ** frame or are followed by a commit frame, then page P is read from ** the database file. ** ** To start a read transaction, the reader records the index of the last ** valid frame in the WAL. The reader uses this recorded "mxFrame" value ** for all subsequent read operations. New transactions can be appended ** to the WAL, but as long as the reader uses its original mxFrame value ** and ignores the newly appended content, it will see a consistent snapshot ** of the database from a single point in time. This technique allows ** multiple concurrent readers to view different versions of the database ** content simultaneously. ** ** The reader algorithm in the previous paragraphs works correctly, but ** because frames for page P can appear anywhere within the WAL, the ** reader has to scan the entire WAL looking for page P frames. If the ** WAL is large (multiple megabytes is typical) that scan can be slow, ** and read performance suffers. To overcome this problem, a separate ** data structure called the wal-index is maintained to expedite the ** search for frames of a particular page. ** ** WAL-INDEX FORMAT ** ** Conceptually, the wal-index is shared memory, though VFS implementations ** might choose to implement the wal-index using a mmapped file. Because ** the wal-index is shared memory, SQLite does not support journal_mode=WAL ** on a network filesystem. All users of the database must be able to ** share memory. ** ** The wal-index is transient. After a crash, the wal-index can (and should ** be) reconstructed from the original WAL file. In fact, the VFS is required ** to either truncate or zero the header of the wal-index when the last ** connection to it closes. Because the wal-index is transient, it can ** use an architecture-specific format; it does not have to be cross-platform. ** Hence, unlike the database and WAL file formats which store all values ** as big endian, the wal-index can store multi-byte values in the native ** byte order of the host computer. ** ** The purpose of the wal-index is to answer this question quickly: Given ** a page number P and a maximum frame index M, return the index of the ** last frame in the wal before frame M for page P in the WAL, or return ** NULL if there are no frames for page P in the WAL prior to M. ** ** The wal-index consists of a header region, followed by an one or ** more index blocks. ** ** The wal-index header contains the total number of frames within the WAL ** in the mxFrame field. ** ** Each index block except for the first contains information on ** HASHTABLE_NPAGE frames. The first index block contains information on ** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and ** HASHTABLE_NPAGE are selected so that together the wal-index header and ** first index block are the same size as all other index blocks in the ** wal-index. ** ** Each index block contains two sections, a page-mapping that contains the ** database page number associated with each wal frame, and a hash-table ** that allows readers to query an index block for a specific page number. ** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE ** for the first index block) 32-bit page numbers. The first entry in the ** first index-block contains the database page number corresponding to the ** first frame in the WAL file. The first entry in the second index block ** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in ** the log, and so on. ** ** The last index block in a wal-index usually contains less than the full ** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers, ** depending on the contents of the WAL file. This does not change the ** allocated size of the page-mapping array - the page-mapping array merely ** contains unused entries. ** ** Even without using the hash table, the last frame for page P ** can be found by scanning the page-mapping sections of each index block ** starting with the last index block and moving toward the first, and ** within each index block, starting at the end and moving toward the ** beginning. The first entry that equals P corresponds to the frame ** holding the content for that page. ** ** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers. ** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the ** hash table for each page number in the mapping section, so the hash ** table is never more than half full. The expected number of collisions ** prior to finding a match is 1. Each entry of the hash table is an ** 1-based index of an entry in the mapping section of the same ** index block. Let K be the 1-based index of the largest entry in ** the mapping section. (For index blocks other than the last, K will ** always be exactly HASHTABLE_NPAGE (4096) and for the last index block ** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table ** contain a value of 0. ** ** To look for page P in the hash table, first compute a hash iKey on ** P as follows: ** ** iKey = (P * 383) % HASHTABLE_NSLOT ** ** Then start scanning entries of the hash table, starting with iKey ** (wrapping around to the beginning when the end of the hash table is ** reached) until an unused hash slot is found. Let the first unused slot ** be at index iUnused. (iUnused might be less than iKey if there was ** wrap-around.) Because the hash table is never more than half full, ** the search is guaranteed to eventually hit an unused entry. Let ** iMax be the value between iKey and iUnused, closest to iUnused, ** where aHash[iMax]==P. If there is no iMax entry (if there exists ** no hash slot such that aHash[i]==p) then page P is not in the ** current index block. Otherwise the iMax-th mapping entry of the ** current index block corresponds to the last entry that references ** page P. ** ** A hash search begins with the last index block and moves toward the ** first index block, looking for entries corresponding to page P. On ** average, only two or three slots in each index block need to be ** examined in order to either find the last entry for page P, or to ** establish that no such entry exists in the block. Each index block ** holds over 4000 entries. So two or three index blocks are sufficient ** to cover a typical 10 megabyte WAL file, assuming 1K pages. 8 or 10 ** comparisons (on average) suffice to either locate a frame in the ** WAL or to establish that the frame does not exist in the WAL. This ** is much faster than scanning the entire 10MB WAL. ** ** Note that entries are added in order of increasing K. Hence, one ** reader might be using some value K0 and a second reader that started ** at a later time (after additional transactions were added to the WAL ** and to the wal-index) might be using a different value K1, where K1>K0. ** Both readers can use the same hash table and mapping section to get ** the correct result. There may be entries in the hash table with ** K>K0 but to the first reader, those entries will appear to be unused ** slots in the hash table and so the first reader will get an answer as ** if no values greater than K0 had ever been inserted into the hash table ** in the first place - which is what reader one wants. Meanwhile, the ** second reader using K1 will see additional values that were inserted ** later, which is exactly what reader two wants. ** ** When a rollback occurs, the value of K is decreased. Hash table entries ** that correspond to frames greater than the new K value are removed ** from the hash table at this point. */ #ifndef SQLITE_OMIT_WAL /* #include "wal.h" */ /* ** Trace output macros */ #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) SQLITE_PRIVATE int sqlite3WalTrace = 0; # define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X #else # define WALTRACE(X) #endif /* ** The maximum (and only) versions of the wal and wal-index formats ** that may be interpreted by this version of SQLite. ** ** If a client begins recovering a WAL file and finds that (a) the checksum ** values in the wal-header are correct and (b) the version field is not ** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN. ** ** Similarly, if a client successfully reads a wal-index header (i.e. the ** checksum test is successful) and finds that the version field is not ** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite ** returns SQLITE_CANTOPEN. */ #define WAL_MAX_VERSION 3007000 #define WALINDEX_MAX_VERSION 3007000 /* ** Indices of various locking bytes. WAL_NREADER is the number ** of available reader locks and should be at least 3. */ #define WAL_WRITE_LOCK 0 #define WAL_ALL_BUT_WRITE 1 #define WAL_CKPT_LOCK 1 #define WAL_RECOVER_LOCK 2 #define WAL_READ_LOCK(I) (3+(I)) #define WAL_NREADER (SQLITE_SHM_NLOCK-3) /* Object declarations */ typedef struct WalIndexHdr WalIndexHdr; typedef struct WalIterator WalIterator; typedef struct WalCkptInfo WalCkptInfo; /* ** The following object holds a copy of the wal-index header content. ** ** The actual header in the wal-index consists of two copies of this ** object. ** ** The szPage value can be any power of 2 between 512 and 32768, inclusive. ** Or it can be 1 to represent a 65536-byte page. The latter case was ** added in 3.7.1 when support for 64K pages was added. */ struct WalIndexHdr { u32 iVersion; /* Wal-index version */ u32 unused; /* Unused (padding) field */ u32 iChange; /* Counter incremented each transaction */ u8 isInit; /* 1 when initialized */ u8 bigEndCksum; /* True if checksums in WAL are big-endian */ u16 szPage; /* Database page size in bytes. 1==64K */ u32 mxFrame; /* Index of last valid frame in the WAL */ u32 nPage; /* Size of database in pages */ u32 aFrameCksum[2]; /* Checksum of last frame in log */ u32 aSalt[2]; /* Two salt values copied from WAL header */ u32 aCksum[2]; /* Checksum over all prior fields */ }; /* ** A copy of the following object occurs in the wal-index immediately ** following the second copy of the WalIndexHdr. This object stores ** information used by checkpoint. ** ** nBackfill is the number of frames in the WAL that have been written ** back into the database. (We call the act of moving content from WAL to ** database "backfilling".) The nBackfill number is never greater than ** WalIndexHdr.mxFrame. nBackfill can only be increased by threads ** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). ** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from ** mxFrame back to zero when the WAL is reset. ** ** There is one entry in aReadMark[] for each reader lock. If a reader ** holds read-lock K, then the value in aReadMark[K] is no greater than ** the mxFrame for that reader. The value READMARK_NOT_USED (0xffffffff) ** for any aReadMark[] means that entry is unused. aReadMark[0] is ** a special case; its value is never used and it exists as a place-holder ** to avoid having to offset aReadMark[] indexs by one. Readers holding ** WAL_READ_LOCK(0) always ignore the entire WAL and read all content ** directly from the database. ** ** The value of aReadMark[K] may only be changed by a thread that ** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of ** aReadMark[K] cannot changed while there is a reader is using that mark ** since the reader will be holding a shared lock on WAL_READ_LOCK(K). ** ** The checkpointer may only transfer frames from WAL to database where ** the frame numbers are less than or equal to every aReadMark[] that is ** in use (that is, every aReadMark[j] for which there is a corresponding ** WAL_READ_LOCK(j)). New readers (usually) pick the aReadMark[] with the ** largest value and will increase an unused aReadMark[] to mxFrame if there ** is not already an aReadMark[] equal to mxFrame. The exception to the ** previous sentence is when nBackfill equals mxFrame (meaning that everything ** in the WAL has been backfilled into the database) then new readers ** will choose aReadMark[0] which has value 0 and hence such reader will ** get all their all content directly from the database file and ignore ** the WAL. ** ** Writers normally append new frames to the end of the WAL. However, ** if nBackfill equals mxFrame (meaning that all WAL content has been ** written back into the database) and if no readers are using the WAL ** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then ** the writer will first "reset" the WAL back to the beginning and start ** writing new content beginning at frame 1. ** ** We assume that 32-bit loads are atomic and so no locks are needed in ** order to read from any aReadMark[] entries. */ struct WalCkptInfo { u32 nBackfill; /* Number of WAL frames backfilled into DB */ u32 aReadMark[WAL_NREADER]; /* Reader marks */ }; #define READMARK_NOT_USED 0xffffffff /* A block of WALINDEX_LOCK_RESERVED bytes beginning at ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems ** only support mandatory file-locks, we do not read or write data ** from the region of the file on which locks are applied. */ #define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo)) #define WALINDEX_LOCK_RESERVED 16 #define WALINDEX_HDR_SIZE (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED) /* Size of header before each frame in wal */ #define WAL_FRAME_HDRSIZE 24 /* Size of write ahead log header, including checksum. */ /* #define WAL_HDRSIZE 24 */ #define WAL_HDRSIZE 32 /* WAL magic value. Either this value, or the same value with the least ** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit ** big-endian format in the first 4 bytes of a WAL file. ** ** If the LSB is set, then the checksums for each frame within the WAL ** file are calculated by treating all data as an array of 32-bit ** big-endian words. Otherwise, they are calculated by interpreting ** all data as 32-bit little-endian words. */ #define WAL_MAGIC 0x377f0682 /* ** Return the offset of frame iFrame in the write-ahead log file, ** assuming a database page size of szPage bytes. The offset returned ** is to the start of the write-ahead log frame-header. */ #define walFrameOffset(iFrame, szPage) ( \ WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE) \ ) /* ** An open write-ahead log file is represented by an instance of the ** following object. */ struct Wal { sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pWalFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ i64 mxWalSize; /* Truncate WAL to this size upon reset */ int nWiData; /* Size of array apWiData */ int szFirstBlock; /* Size of first block written to WAL file */ volatile u32 **apWiData; /* Pointer to wal-index content in memory */ u32 szPage; /* Database page size */ i16 readLock; /* Which read lock is being held. -1 for none */ u8 syncFlags; /* Flags to use to sync header writes */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ #endif }; /* ** Candidate values for Wal.exclusiveMode. */ #define WAL_NORMAL_MODE 0 #define WAL_EXCLUSIVE_MODE 1 #define WAL_HEAPMEMORY_MODE 2 /* ** Possible values for WAL.readOnly */ #define WAL_RDWR 0 /* Normal read/write connection */ #define WAL_RDONLY 1 /* The WAL file is readonly */ #define WAL_SHM_RDONLY 2 /* The SHM file is readonly */ /* ** Each page of the wal-index mapping contains a hash-table made up of ** an array of HASHTABLE_NSLOT elements of the following type. */ typedef u16 ht_slot; /* ** This structure is used to implement an iterator that loops through ** all frames in the WAL in database page order. Where two or more frames ** correspond to the same database page, the iterator visits only the ** frame most recently written to the WAL (in other words, the frame with ** the largest index). ** ** The internals of this structure are only accessed by: ** ** walIteratorInit() - Create a new iterator, ** walIteratorNext() - Step an iterator, ** walIteratorFree() - Free an iterator. ** ** This functionality is used by the checkpoint code (see walCheckpoint()). */ struct WalIterator { int iPrior; /* Last result returned from the iterator */ int nSegment; /* Number of entries in aSegment[] */ struct WalSegment { int iNext; /* Next slot in aIndex[] not yet returned */ ht_slot *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */ u32 *aPgno; /* Array of page numbers. */ int nEntry; /* Nr. of entries in aPgno[] and aIndex[] */ int iZero; /* Frame number associated with aPgno[0] */ } aSegment[1]; /* One for every 32KB page in the wal-index */ }; /* ** Define the parameters of the hash tables in the wal-index file. There ** is a hash-table following every HASHTABLE_NPAGE page numbers in the ** wal-index. ** ** Changing any of these constants will alter the wal-index format and ** create incompatibilities. */ #define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ #define HASHTABLE_HASH_1 383 /* Should be prime */ #define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ /* ** The block of page numbers associated with the first hash-table in a ** wal-index is smaller than usual. This is so that there is a complete ** hash-table on each aligned 32KB page of the wal-index. */ #define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32))) /* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */ #define WALINDEX_PGSZ ( \ sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \ ) /* ** Obtain a pointer to the iPage'th page of the wal-index. The wal-index ** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are ** numbered from zero. ** ** If this call is successful, *ppPage is set to point to the wal-index ** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs, ** then an SQLite error code is returned and *ppPage is set to 0. */ static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){ int rc = SQLITE_OK; /* Enlarge the pWal->apWiData[] array if required */ if( pWal->nWiData<=iPage ){ int nByte = sizeof(u32*)*(iPage+1); volatile u32 **apNew; apNew = (volatile u32 **)sqlite3_realloc64((void *)pWal->apWiData, nByte); if( !apNew ){ *ppPage = 0; return SQLITE_NOMEM; } memset((void*)&apNew[pWal->nWiData], 0, sizeof(u32*)*(iPage+1-pWal->nWiData)); pWal->apWiData = apNew; pWal->nWiData = iPage+1; } /* Request a pointer to the required page from the VFS */ if( pWal->apWiData[iPage]==0 ){ if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ); if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM; }else{ rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, pWal->writeLock, (void volatile **)&pWal->apWiData[iPage] ); if( rc==SQLITE_READONLY ){ pWal->readOnly |= WAL_SHM_RDONLY; rc = SQLITE_OK; } } } *ppPage = pWal->apWiData[iPage]; assert( iPage==0 || *ppPage || rc!=SQLITE_OK ); return rc; } /* ** Return a pointer to the WalCkptInfo structure in the wal-index. */ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ assert( pWal->nWiData>0 && pWal->apWiData[0] ); return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]); } /* ** Return a pointer to the WalIndexHdr structure in the wal-index. */ static volatile WalIndexHdr *walIndexHdr(Wal *pWal){ assert( pWal->nWiData>0 && pWal->apWiData[0] ); return (volatile WalIndexHdr*)pWal->apWiData[0]; } /* ** The argument to this macro must be of type u32. On a little-endian ** architecture, it returns the u32 value that results from interpreting ** the 4 bytes as a big-endian value. On a big-endian architecture, it ** returns the value that would be produced by interpreting the 4 bytes ** of the input value as a little-endian integer. */ #define BYTESWAP32(x) ( \ (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \ + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \ ) /* ** Generate or extend an 8 byte checksum based on the data in ** array aByte[] and the initial values of aIn[0] and aIn[1] (or ** initial values of 0 and 0 if aIn==NULL). ** ** The checksum is written back into aOut[] before returning. ** ** nByte must be a positive multiple of 8. */ static void walChecksumBytes( int nativeCksum, /* True for native byte-order, false for non-native */ u8 *a, /* Content to be checksummed */ int nByte, /* Bytes of content in a[]. Must be a multiple of 8. */ const u32 *aIn, /* Initial checksum value input */ u32 *aOut /* OUT: Final checksum value output */ ){ u32 s1, s2; u32 *aData = (u32 *)a; u32 *aEnd = (u32 *)&a[nByte]; if( aIn ){ s1 = aIn[0]; s2 = aIn[1]; }else{ s1 = s2 = 0; } assert( nByte>=8 ); assert( (nByte&0x00000007)==0 ); if( nativeCksum ){ do { s1 += *aData++ + s2; s2 += *aData++ + s1; }while( aDataexclusiveMode!=WAL_HEAPMEMORY_MODE ){ sqlite3OsShmBarrier(pWal->pDbFd); } } /* ** Write the header information in pWal->hdr into the wal-index. ** ** The checksum on pWal->hdr is updated before it is written. */ static void walIndexWriteHdr(Wal *pWal){ volatile WalIndexHdr *aHdr = walIndexHdr(pWal); const int nCksum = offsetof(WalIndexHdr, aCksum); assert( pWal->writeLock ); pWal->hdr.isInit = 1; pWal->hdr.iVersion = WALINDEX_MAX_VERSION; walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); walShmBarrier(pWal); memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); } /* ** This function encodes a single frame header and writes it to a buffer ** supplied by the caller. A frame-header is made up of a series of ** 4-byte big-endian integers, as follows: ** ** 0: Page number. ** 4: For commit records, the size of the database image in pages ** after the commit. For all other records, zero. ** 8: Salt-1 (copied from the wal-header) ** 12: Salt-2 (copied from the wal-header) ** 16: Checksum-1. ** 20: Checksum-2. */ static void walEncodeFrame( Wal *pWal, /* The write-ahead log */ u32 iPage, /* Database page number for frame */ u32 nTruncate, /* New db size (or 0 for non-commit frames) */ u8 *aData, /* Pointer to page data */ u8 *aFrame /* OUT: Write encoded frame here */ ){ int nativeCksum; /* True for native byte-order checksums */ u32 *aCksum = pWal->hdr.aFrameCksum; assert( WAL_FRAME_HDRSIZE==24 ); sqlite3Put4byte(&aFrame[0], iPage); sqlite3Put4byte(&aFrame[4], nTruncate); memcpy(&aFrame[8], pWal->hdr.aSalt, 8); nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); sqlite3Put4byte(&aFrame[16], aCksum[0]); sqlite3Put4byte(&aFrame[20], aCksum[1]); } /* ** Check to see if the frame with header in aFrame[] and content ** in aData[] is valid. If it is a valid frame, fill *piPage and ** *pnTruncate and return true. Return if the frame is not valid. */ static int walDecodeFrame( Wal *pWal, /* The write-ahead log */ u32 *piPage, /* OUT: Database page number for frame */ u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* Frame data */ ){ int nativeCksum; /* True for native byte-order checksums */ u32 *aCksum = pWal->hdr.aFrameCksum; u32 pgno; /* Page number of the frame */ assert( WAL_FRAME_HDRSIZE==24 ); /* A frame is only valid if the salt values in the frame-header ** match the salt values in the wal-header. */ if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){ return 0; } /* A frame is only valid if the page number is creater than zero. */ pgno = sqlite3Get4byte(&aFrame[0]); if( pgno==0 ){ return 0; } /* A frame is only valid if a checksum of the WAL header, ** all prior frams, the first 16 bytes of this frame-header, ** and the frame-data matches the checksum in the last 8 ** bytes of this frame-header. */ nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) ){ /* Checksum failed. */ return 0; } /* If we reach this point, the frame is valid. Return the page number ** and the new database size. */ *piPage = pgno; *pnTruncate = sqlite3Get4byte(&aFrame[4]); return 1; } #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) /* ** Names of locks. This routine is used to provide debugging output and is not ** a part of an ordinary build. */ static const char *walLockName(int lockIdx){ if( lockIdx==WAL_WRITE_LOCK ){ return "WRITE-LOCK"; }else if( lockIdx==WAL_CKPT_LOCK ){ return "CKPT-LOCK"; }else if( lockIdx==WAL_RECOVER_LOCK ){ return "RECOVER-LOCK"; }else{ static char zName[15]; sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]", lockIdx-WAL_READ_LOCK(0)); return zName; } } #endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */ /* ** Set or release locks on the WAL. Locks are either shared or exclusive. ** A lock cannot be moved directly between shared and exclusive - it must go ** through the unlocked state first. ** ** In locking_mode=EXCLUSIVE, all of these routines become no-ops. */ static int walLockShared(Wal *pWal, int lockIdx){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, walLockName(lockIdx), rc ? "failed" : "ok")); VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) return rc; } static void walUnlockShared(Wal *pWal, int lockIdx){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); } static int walLockExclusive(Wal *pWal, int lockIdx, int n, int fBlock){ int rc; if( pWal->exclusiveMode ) return SQLITE_OK; if( fBlock ) sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_WAL_BLOCK, 0); rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, walLockName(lockIdx), n, rc ? "failed" : "ok")); VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); ) return rc; } static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ if( pWal->exclusiveMode ) return; (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, walLockName(lockIdx), n)); } /* ** Compute a hash on a page number. The resulting hash value must land ** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances ** the hash to the next value in the event of a collision. */ static int walHash(u32 iPage){ assert( iPage>0 ); assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 ); return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1); } static int walNextHash(int iPriorHash){ return (iPriorHash+1)&(HASHTABLE_NSLOT-1); } /* ** Return pointers to the hash table and page number array stored on ** page iHash of the wal-index. The wal-index is broken into 32KB pages ** numbered starting from 0. ** ** Set output variable *paHash to point to the start of the hash table ** in the wal-index file. Set *piZero to one less than the frame ** number of the first frame indexed by this hash table. If a ** slot in the hash table is set to N, it refers to frame number ** (*piZero+N) in the log. ** ** Finally, set *paPgno so that *paPgno[1] is the page number of the ** first frame indexed by the hash table, frame (*piZero+1). */ static int walHashGet( Wal *pWal, /* WAL handle */ int iHash, /* Find the iHash'th table */ volatile ht_slot **paHash, /* OUT: Pointer to hash index */ volatile u32 **paPgno, /* OUT: Pointer to page number array */ u32 *piZero /* OUT: Frame associated with *paPgno[0] */ ){ int rc; /* Return code */ volatile u32 *aPgno; rc = walIndexPage(pWal, iHash, &aPgno); assert( rc==SQLITE_OK || iHash>0 ); if( rc==SQLITE_OK ){ u32 iZero; volatile ht_slot *aHash; aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE]; if( iHash==0 ){ aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)]; iZero = 0; }else{ iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE; } *paPgno = &aPgno[-1]; *paHash = aHash; *piZero = iZero; } return rc; } /* ** Return the number of the wal-index page that contains the hash-table ** and page-number array that contain entries corresponding to WAL frame ** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages ** are numbered starting from 0. */ static int walFramePage(u32 iFrame){ int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE; assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE) && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE) && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)) && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE) && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE)) ); return iHash; } /* ** Return the page number associated with frame iFrame in this WAL. */ static u32 walFramePgno(Wal *pWal, u32 iFrame){ int iHash = walFramePage(iFrame); if( iHash==0 ){ return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1]; } return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; } /* ** Remove entries from the hash table that point to WAL slots greater ** than pWal->hdr.mxFrame. ** ** This function is called whenever pWal->hdr.mxFrame is decreased due ** to a rollback or savepoint. ** ** At most only the hash table containing pWal->hdr.mxFrame needs to be ** updated. Any later hash tables will be automatically cleared when ** pWal->hdr.mxFrame advances to the point where those hash tables are ** actually needed. */ static void walCleanupHash(Wal *pWal){ volatile ht_slot *aHash = 0; /* Pointer to hash table to clear */ volatile u32 *aPgno = 0; /* Page number array for hash table */ u32 iZero = 0; /* frame == (aHash[x]+iZero) */ int iLimit = 0; /* Zero values greater than this */ int nByte; /* Number of bytes to zero in aPgno[] */ int i; /* Used to iterate through aHash[] */ assert( pWal->writeLock ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 ); if( pWal->hdr.mxFrame==0 ) return; /* Obtain pointers to the hash-table and page-number array containing ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed ** that the page said hash-table and array reside on is already mapped. */ assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero); /* Zero all hash-table entries that correspond to frame numbers greater ** than pWal->hdr.mxFrame. */ iLimit = pWal->hdr.mxFrame - iZero; assert( iLimit>0 ); for(i=0; iiLimit ){ aHash[i] = 0; } } /* Zero the entries in the aPgno array that correspond to frames with ** frame numbers greater than pWal->hdr.mxFrame. */ nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]); memset((void *)&aPgno[iLimit+1], 0, nByte); #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* Verify that the every entry in the mapping region is still reachable ** via the hash table even after the cleanup. */ if( iLimit ){ int j; /* Loop counter */ int iKey; /* Hash key */ for(j=1; j<=iLimit; j++){ for(iKey=walHash(aPgno[j]); aHash[iKey]; iKey=walNextHash(iKey)){ if( aHash[iKey]==j ) break; } assert( aHash[iKey]==j ); } } #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ u32 iZero = 0; /* One less than frame number of aPgno[1] */ volatile u32 *aPgno = 0; /* Page number array */ volatile ht_slot *aHash = 0; /* Hash table */ rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero); /* Assuming the wal-index file was successfully mapped, populate the ** page number array and hash table entry. */ if( rc==SQLITE_OK ){ int iKey; /* Hash table key */ int idx; /* Value to write to hash-table slot */ int nCollide; /* Number of hash collisions */ idx = iFrame - iZero; assert( idx <= HASHTABLE_NSLOT/2 + 1 ); /* If this is the first entry to be added to this hash-table, zero the ** entire hash table and aPgno[] array before proceeding. */ if( idx==1 ){ int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]); memset((void*)&aPgno[1], 0, nByte); } /* If the entry in aPgno[] is already set, then the previous writer ** must have exited unexpectedly in the middle of a transaction (after ** writing one or more dirty pages to the WAL to free up memory). ** Remove the remnants of that writers uncommitted transaction from ** the hash-table before writing any new entries. */ if( aPgno[idx] ){ walCleanupHash(pWal); assert( !aPgno[idx] ); } /* Write the aPgno[] array entry and the hash-table slot. */ nCollide = idx; for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){ if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; } aPgno[idx] = iPage; aHash[iKey] = (ht_slot)idx; #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* Verify that the number of entries in the hash table exactly equals ** the number of entries in the mapping region. */ { int i; /* Loop counter */ int nEntry = 0; /* Number of entries in the hash table */ for(i=0; ickptLock==1 || pWal->ckptLock==0 ); assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); assert( pWal->writeLock ); iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; nLock = SQLITE_SHM_NLOCK - iLock; rc = walLockExclusive(pWal, iLock, nLock, 0); if( rc ){ return rc; } WALTRACE(("WAL%p: recovery begin...\n", pWal)); memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); if( rc!=SQLITE_OK ){ goto recovery_error; } if( nSize>WAL_HDRSIZE ){ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int szFrame; /* Number of bytes in buffer aFrame[] */ u8 *aData; /* Pointer to data part of aFrame buffer */ int iFrame; /* Index of last frame read */ i64 iOffset; /* Next offset to read from log file */ int szPage; /* Page size according to the log */ u32 magic; /* Magic value read from WAL header */ u32 version; /* Magic value read from WAL header */ int isValid; /* True if this frame is valid */ /* Read in the WAL header. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ goto recovery_error; } /* If the database page size is not a power of two, or is greater than ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid ** data. Similarly, if the 'magic' value is invalid, ignore the whole ** WAL file. */ magic = sqlite3Get4byte(&aBuf[0]); szPage = sqlite3Get4byte(&aBuf[8]); if( (magic&0xFFFFFFFE)!=WAL_MAGIC || szPage&(szPage-1) || szPage>SQLITE_MAX_PAGE_SIZE || szPage<512 ){ goto finished; } pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); pWal->szPage = szPage; pWal->nCkpt = sqlite3Get4byte(&aBuf[12]); memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); /* Verify that the WAL header checksum is correct */ walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum ); if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) ){ goto finished; } /* Verify that the version number on the WAL format is one that ** are able to understand */ version = sqlite3Get4byte(&aBuf[4]); if( version!=WAL_MAX_VERSION ){ rc = SQLITE_CANTOPEN_BKPT; goto finished; } /* Malloc a buffer to read frames into. */ szFrame = szPage + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc64(szFrame); if( !aFrame ){ rc = SQLITE_NOMEM; goto recovery_error; } aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ /* Read and decode the next log frame. */ iFrame++; rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); if( rc!=SQLITE_OK ) break; isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); if( !isValid ) break; rc = walIndexAppend(pWal, iFrame, pgno); if( rc!=SQLITE_OK ) break; /* If nTruncate is non-zero, this is a commit record. */ if( nTruncate ){ pWal->hdr.mxFrame = iFrame; pWal->hdr.nPage = nTruncate; pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); testcase( szPage<=32768 ); testcase( szPage>=65536 ); aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; } } sqlite3_free(aFrame); } finished: if( rc==SQLITE_OK ){ volatile WalCkptInfo *pInfo; int i; pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); /* Reset the checkpoint-header. This is safe because this thread is ** currently holding locks that exclude all other readers, writers and ** checkpointers. */ pInfo = walCkptInfo(pWal); pInfo->nBackfill = 0; pInfo->aReadMark[0] = 0; for(i=1; iaReadMark[i] = READMARK_NOT_USED; if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame; /* If more than one frame was recovered from the log file, report an ** event via sqlite3_log(). This is to help with identifying performance ** problems caused by applications routinely shutting down without ** checkpointing the log file. */ if( pWal->hdr.nPage ){ sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, "recovered %d frames from WAL file %s", pWal->hdr.mxFrame, pWal->zWalName ); } } recovery_error: WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); walUnlockExclusive(pWal, iLock, nLock); return rc; } /* ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){ int i; for(i=0; inWiData; i++){ sqlite3_free((void *)pWal->apWiData[i]); pWal->apWiData[i] = 0; } }else{ sqlite3OsShmUnmap(pWal->pDbFd, isDelete); } } /* ** Open a connection to the WAL file zWalName. The database file must ** already be opened on connection pDbFd. The buffer that zWalName points ** to must remain valid for the lifetime of the returned Wal* handle. ** ** A SHARED lock should be held on the database file when this function ** is called. The purpose of this SHARED lock is to prevent any other ** client from unlinking the WAL or wal-index file. If another process ** were to do this just after this client opened one of these files, the ** system would be badly broken. ** ** If the log file is successfully opened, SQLITE_OK is returned and ** *ppWal is set to point to a new WAL handle. If an error occurs, ** an SQLite error code is returned and *ppWal is left unmodified. */ SQLITE_PRIVATE int sqlite3WalOpen( sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ sqlite3_file *pDbFd, /* The open database file */ const char *zWalName, /* Name of the WAL file */ int bNoShm, /* True to run in heap-memory mode */ i64 mxWalSize, /* Truncate WAL to this size on reset */ Wal **ppWal /* OUT: Allocated Wal handle */ ){ int rc; /* Return Code */ Wal *pRet; /* Object to allocate and return */ int flags; /* Flags passed to OsOpen() */ assert( zWalName && zWalName[0] ); assert( pDbFd ); /* In the amalgamation, the os_unix.c and os_win.c source files come before ** this source file. Verify that the #defines of the locking byte offsets ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value. */ #ifdef WIN_SHM_BASE assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET ); #endif #ifdef UNIX_SHM_BASE assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); #endif /* Allocate an instance of struct Wal to return. */ *ppWal = 0; pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); if( !pRet ){ return SQLITE_NOMEM; } pRet->pVfs = pVfs; pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->readLock = -1; pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; pRet->syncHeader = 1; pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); /* Open file handle on the write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ pRet->readOnly = WAL_RDONLY; } if( rc!=SQLITE_OK ){ walIndexClose(pRet, 0); sqlite3OsClose(pRet->pWalFd); sqlite3_free(pRet); }else{ int iDC = sqlite3OsDeviceCharacteristics(pDbFd); if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; } if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){ pRet->padToSectorBoundary = 0; } *ppWal = pRet; WALTRACE(("WAL%d: opened\n", pRet)); } return rc; } #if 0 /* ** Change the size to which the WAL file is trucated on each reset. */ SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){ if( pWal ) pWal->mxWalSize = iLimit; } #endif /* ** Find the smallest page number out of all pages held in the WAL that ** has not been returned by any prior invocation of this method on the ** same WalIterator object. Write into *piFrame the frame index where ** that page was last written into the WAL. Write into *piPage the page ** number. ** ** Return 0 on success. If there are no pages in the WAL with a page ** number larger than *piPage, then return 1. */ static int walIteratorNext( WalIterator *p, /* Iterator */ u32 *piPage, /* OUT: The page number of the next page */ u32 *piFrame /* OUT: Wal frame index of next page */ ){ u32 iMin; /* Result pgno must be greater than iMin */ u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ int i; /* For looping through segments */ iMin = p->iPrior; assert( iMin<0xffffffff ); for(i=p->nSegment-1; i>=0; i--){ struct WalSegment *pSegment = &p->aSegment[i]; while( pSegment->iNextnEntry ){ u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ if( iPgiZero + pSegment->aIndex[pSegment->iNext]; } break; } pSegment->iNext++; } } *piPage = p->iPrior = iRet; return (iRet==0xFFFFFFFF); } /* ** This function merges two sorted lists into a single sorted list. ** ** aLeft[] and aRight[] are arrays of indices. The sort key is ** aContent[aLeft[]] and aContent[aRight[]]. Upon entry, the following ** is guaranteed for all J0 && nRight>0 ); while( iRight=nRight || aContent[aLeft[iLeft]]=nLeft || aContent[aLeft[iLeft]]>dbpage ); assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage ); } *paRight = aLeft; *pnRight = iOut; memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut); } /* ** Sort the elements in list aList using aContent[] as the sort key. ** Remove elements with duplicate keys, preferring to keep the ** larger aList[] values. ** ** The aList[] entries are indices into aContent[]. The values in ** aList[] are to be sorted so that for all J0 ); assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) ); for(iList=0; iListaList && p->nList<=(1<aList==&aList[iList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); } aSub[iSub].aList = aMerge; aSub[iSub].nList = nMerge; } for(iSub++; iSubnList<=(1<aList==&aList[nList&~((2<aList, p->nList, &aMerge, &nMerge, aBuffer); } } assert( aMerge==aList ); *pnList = nMerge; #ifdef SQLITE_DEBUG { int i; for(i=1; i<*pnList; i++){ assert( aContent[aList[i]] > aContent[aList[i-1]] ); } } #endif } /* ** Free an iterator allocated by walIteratorInit(). */ static void walIteratorFree(WalIterator *p){ sqlite3_free(p); } /* ** Construct a WalInterator object that can be used to loop over all ** pages in the WAL in ascending order. The caller must hold the checkpoint ** lock. ** ** On success, make *pp point to the newly allocated WalInterator object ** return SQLITE_OK. Otherwise, return an error code. If this routine ** returns an error, the value of *pp is undefined. ** ** The calling routine should invoke walIteratorFree() to destroy the ** WalIterator object when it has finished with it. */ static int walIteratorInit(Wal *pWal, WalIterator **pp){ WalIterator *p; /* Return value */ int nSegment; /* Number of segments to merge */ u32 iLast; /* Last frame in log */ int nByte; /* Number of bytes to allocate */ int i; /* Iterator variable */ ht_slot *aTmp; /* Temp space used by merge-sort */ int rc = SQLITE_OK; /* Return Code */ /* This routine only runs while holding the checkpoint lock. And ** it only runs if there is actually content in the log (mxFrame>0). */ assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); iLast = pWal->hdr.mxFrame; /* Allocate space for the WalIterator object. */ nSegment = walFramePage(iLast) + 1; nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + iLast*sizeof(ht_slot); p = (WalIterator *)sqlite3_malloc64(nByte); if( !p ){ return SQLITE_NOMEM; } memset(p, 0, nByte); p->nSegment = nSegment; /* Allocate temporary space used by the merge-sort routine. This block ** of memory will be freed before this function returns. */ aTmp = (ht_slot *)sqlite3_malloc64( sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) ); if( !aTmp ){ rc = SQLITE_NOMEM; } for(i=0; rc==SQLITE_OK && iaSegment[p->nSegment])[iZero]; iZero++; for(j=0; jaSegment[i].iZero = iZero; p->aSegment[i].nEntry = nEntry; p->aSegment[i].aIndex = aIndex; p->aSegment[i].aPgno = (u32 *)aPgno; } } sqlite3_free(aTmp); if( rc!=SQLITE_OK ){ walIteratorFree(p); } *pp = p; return rc; } /* ** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and ** n. If the attempt fails and parameter xBusy is not NULL, then it is a ** busy-handler function. Invoke it and retry the lock until either the ** lock is successfully obtained or the busy-handler returns 0. */ static int walBusyLock( Wal *pWal, /* WAL connection */ int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int lockIdx, /* Offset of first byte to lock */ int n /* Number of bytes to lock */ ){ int rc; do { rc = walLockExclusive(pWal, lockIdx, n, 0); }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) ); return rc; } /* ** The cache of the wal-index header must be valid to call this function. ** Return the page-size in bytes used by the database. */ static int walPagesize(Wal *pWal){ return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); } /* ** The following is guaranteed when this function is called: ** ** a) the WRITER lock is held, ** b) the entire log file has been checkpointed, and ** c) any existing readers are reading exclusively from the database ** file - there are no readers that may attempt to read a frame from ** the log file. ** ** This function updates the shared-memory structures so that the next ** client to write to the database (which may be this one) does so by ** writing frames into the start of the log file. ** ** The value of parameter salt1 is used as the aSalt[1] value in the ** new wal-index header. It should be passed a pseudo-random value (i.e. ** one obtained from sqlite3_randomness()). */ static void walRestartHdr(Wal *pWal, u32 salt1){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); int i; /* Loop counter */ u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ pWal->nCkpt++; pWal->hdr.mxFrame = 0; sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); memcpy(&pWal->hdr.aSalt[1], &salt1, 4); walIndexWriteHdr(pWal); pInfo->nBackfill = 0; pInfo->aReadMark[1] = 0; for(i=2; iaReadMark[i] = READMARK_NOT_USED; assert( pInfo->aReadMark[0]==0 ); } /* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. ** ** The amount of information copies from WAL to database might be limited ** by active readers. This routine will never overwrite a database page ** that a concurrent reader might be using. ** ** All I/O barrier operations (a.k.a fsyncs) occur in this routine when ** SQLite is in WAL-mode in synchronous=NORMAL. That means that if ** checkpoints are always run by a background thread or background ** process, foreground threads will never block on a lengthy fsync call. ** ** Fsync is called on the WAL before writing content out of the WAL and ** into the database. This ensures that if the new content is persistent ** in the WAL and can be recovered following a power-loss or hard reset. ** ** Fsync is also called on the database file if (and only if) the entire ** WAL content is copied into the database file. This second fsync makes ** it safe to delete the WAL since the new content will persist in the ** database file. ** ** This routine uses and updates the nBackfill field of the wal-index header. ** This is the only routine that will increase the value of nBackfill. ** (A WAL reset or recovery will revert nBackfill to zero, but not increase ** its value.) ** ** The caller must be holding sufficient locks to ensure that no other ** checkpoint is running (in any other thread or process) at the same ** time. */ static int walCheckpoint( Wal *pWal, /* Wal connection */ int eMode, /* One of PASSIVE, FULL or RESTART */ int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags for OsSync() (or 0) */ u8 *zBuf /* Temporary buffer to use */ ){ int rc = SQLITE_OK; /* Return code */ int szPage; /* Database page-size */ WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ u32 iFrame = 0; /* Wal frame containing data for iDbpage */ u32 mxSafeFrame; /* Max frame that can be backfilled */ u32 mxPage; /* Max database page to write */ int i; /* Loop counter */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ szPage = walPagesize(pWal); testcase( szPage<=32768 ); testcase( szPage>=65536 ); pInfo = walCkptInfo(pWal); if( pInfo->nBackfillhdr.mxFrame ){ /* Allocate the iterator */ rc = walIteratorInit(pWal, &pIter); if( rc!=SQLITE_OK ){ return rc; } assert( pIter ); /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); /* Compute in mxSafeFrame the index of the last frame of the WAL that is ** safe to write into the database. Frames beyond mxSafeFrame might ** overwrite database pages that are in use by active readers and thus ** cannot be backfilled from the WAL. */ mxSafeFrame = pWal->hdr.mxFrame; mxPage = pWal->hdr.nPage; for(i=1; iaReadMark[i]; if( mxSafeFrame>y ){ assert( y<=pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); if( rc==SQLITE_OK ){ pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); }else if( rc==SQLITE_BUSY ){ mxSafeFrame = y; xBusy = 0; }else{ goto walcheckpoint_out; } } } if( pInfo->nBackfillnBackfill; /* Sync the WAL to disk */ if( sync_flags ){ rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } /* If the database may grow as a result of this checkpoint, hint ** about the eventual size of the db file to the VFS layer. */ if( rc==SQLITE_OK ){ i64 nReq = ((i64)mxPage * szPage); rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); if( rc==SQLITE_OK && nSizepDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); } } /* Iterate through the contents of the WAL, copying data to the db file */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ i64 iOffset; assert( walFramePgno(pWal, iFrame)==iDbpage ); if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ continue; } iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); if( rc!=SQLITE_OK ) break; iOffset = (iDbpage-1)*(i64)szPage; testcase( IS_BIG_INT(iOffset) ); rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); if( rc!=SQLITE_OK ) break; } /* If work was actually accomplished... */ if( rc==SQLITE_OK ){ if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ i64 szDb = pWal->hdr.nPage*(i64)szPage; testcase( IS_BIG_INT(szDb) ); rc = sqlite3OsTruncate(pWal->pDbFd, szDb); if( rc==SQLITE_OK && sync_flags ){ rc = sqlite3OsSync(pWal->pDbFd, sync_flags); } } if( rc==SQLITE_OK ){ pInfo->nBackfill = mxSafeFrame; } } /* Release the reader lock held while backfilling */ walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); } if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ rc = SQLITE_OK; } } /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the ** entire wal file has been copied into the database file, then block ** until all readers have finished using the wal file. This ensures that ** the next process to write to the database restarts the wal file. */ if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ assert( pWal->writeLock ); if( pInfo->nBackfillhdr.mxFrame ){ rc = SQLITE_BUSY; }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ u32 salt1; sqlite3_randomness(4, &salt1); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as ** SQLITE_CHECKPOINT_RESTART with the addition that it also ** truncates the log file to zero bytes just prior to a ** successful return. ** ** In theory, it might be safe to do this without updating the ** wal-index header in shared memory, as all subsequent reader or ** writer clients should see that the entire log file has been ** checkpointed and behave accordingly. This seems unsafe though, ** as it would leave the system in a state where the contents of ** the wal-index header do not match the contents of the ** file-system. To avoid this, update the wal-index header to ** indicate that the log file contains zero valid frames. */ walRestartHdr(pWal, salt1); rc = sqlite3OsTruncate(pWal->pWalFd, 0); } walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } } walcheckpoint_out: walIteratorFree(pIter); return rc; } /* ** If the WAL file is currently larger than nMax bytes in size, truncate ** it to exactly nMax bytes. If an error occurs while doing so, ignore it. */ static void walLimitSize(Wal *pWal, i64 nMax){ i64 sz; int rx; rx = sqlite3OsFileSize(pWal->pWalFd, &sz); if( rx==SQLITE_OK && (sz > nMax ) ){ rx = sqlite3OsTruncate(pWal->pWalFd, nMax); } if( rx ){ sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); } } /* ** Close a connection to a log file. */ SQLITE_PRIVATE int sqlite3WalClose( Wal *pWal, /* Wal to close */ int sync_flags, /* Flags to pass to OsSync() (or 0) */ int nBuf, u8 *zBuf /* Buffer of at least nBuf bytes */ ){ int rc = SQLITE_OK; if( pWal ){ int isDelete = 0; /* True to unlink wal and wal-index files */ /* If an EXCLUSIVE lock can be obtained on the database file (using the ** ordinary, rollback-mode locking methods, this guarantees that the ** connection associated with this log file is the only connection to ** the database. In this case checkpoint the database and unlink both ** the wal and wal-index files. ** ** The EXCLUSIVE lock is not released before returning. */ rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; } rc = sqlite3WalCheckpoint( pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 ); if( rc==SQLITE_OK ){ int bPersist = -1; sqlite3OsFileControlHint( pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist ); if( bPersist!=1 ){ /* Try to delete the WAL file if the checkpoint completed and ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal ** mode (!bPersist) */ isDelete = 1; }else if( pWal->mxWalSize>=0 ){ /* Try to truncate the WAL file to zero bytes if the checkpoint ** completed and fsynced (rc==SQLITE_OK) and we are in persistent ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a ** non-negative value (pWal->mxWalSize>=0). Note that we truncate ** to zero bytes as truncating to the journal_size_limit might ** leave a corrupt WAL file on disk. */ walLimitSize(pWal, 0); } } } walIndexClose(pWal, isDelete); sqlite3OsClose(pWal->pWalFd); if( isDelete ){ sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); } WALTRACE(("WAL%p: closed\n", pWal)); sqlite3_free((void *)pWal->apWiData); sqlite3_free(pWal); } return rc; } /* ** Try to read the wal-index header. Return 0 on success and 1 if ** there is a problem. ** ** The wal-index is in shared memory. Another thread or process might ** be writing the header at the same time this procedure is trying to ** read it, which might result in inconsistency. A dirty read is detected ** by verifying that both copies of the header are the same and also by ** a checksum on the header. ** ** If and only if the read is consistent and the header is different from ** pWal->hdr, then pWal->hdr is updated to the content of the new header ** and *pChanged is set to 1. ** ** If the checksum cannot be verified return non-zero. If the header ** is read successfully and the checksum verified, return zero. */ static int walIndexTryHdr(Wal *pWal, int *pChanged){ u32 aCksum[2]; /* Checksum on the header content */ WalIndexHdr h1, h2; /* Two copies of the header content */ WalIndexHdr volatile *aHdr; /* Header in shared memory */ /* The first page of the wal-index must be mapped at this point. */ assert( pWal->nWiData>0 && pWal->apWiData[0] ); /* Read the header. This might happen concurrently with a write to the ** same area of shared memory on a different CPU in a SMP, ** meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return non-zero. ** ** There are two copies of the header at the beginning of the wal-index. ** When reading, read [0] first then [1]. Writes are in the reverse order. ** Memory barriers are used to prevent the compiler or the hardware from ** reordering the reads and writes. */ aHdr = walIndexHdr(pWal); memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); walShmBarrier(pWal); memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ return 1; /* Dirty read */ } if( h1.isInit==0 ){ return 1; /* Malformed header - probably all zeros */ } walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){ return 1; /* Checksum does not match */ } if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){ *pChanged = 1; memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr)); pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); testcase( pWal->szPage<=32768 ); testcase( pWal->szPage>=65536 ); } /* The header was successfully read. Return zero. */ return 0; } /* ** Read the wal-index header from the wal-index and into pWal->hdr. ** If the wal-header appears to be corrupt, try to reconstruct the ** wal-index from the WAL before returning. ** ** Set *pChanged to 1 if the wal-index header value in pWal->hdr is ** changed by this operation. If pWal->hdr is unchanged, set *pChanged ** to 0. ** ** If the wal-index header is successfully read, return SQLITE_OK. ** Otherwise an SQLite error code. */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ int badHdr; /* True if a header read failed */ volatile u32 *page0; /* Chunk of wal-index containing header */ /* Ensure that page 0 of the wal-index (the page that contains the ** wal-index header) is mapped. Return early if an error occurs here. */ assert( pChanged ); rc = walIndexPage(pWal, 0, &page0); if( rc!=SQLITE_OK ){ return rc; }; assert( page0 || pWal->writeLock==0 ); /* If the first page of the wal-index has been mapped, try to read the ** wal-index header immediately, without holding any lock. This usually ** works, but may fail if the wal-index header is corrupt or currently ** being modified by another thread or process. */ badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); /* If the first attempt failed, it might have been due to a race ** with a writer. So get a WRITE lock and try again. */ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr ){ if( pWal->readOnly & WAL_SHM_RDONLY ){ if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; } }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 1)) ){ pWal->writeLock = 1; if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ badHdr = walIndexTryHdr(pWal, pChanged); if( badHdr ){ /* If the wal-index header is still malformed even while holding ** a WRITE lock, it can only mean that the header is corrupted and ** needs to be reconstructed. So run recovery to do exactly that. */ rc = walIndexRecover(pWal); *pChanged = 1; } } pWal->writeLock = 0; walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); } } /* If the header is read successfully, check the version number to make ** sure the wal-index was not constructed with some future format that ** this version of SQLite cannot understand. */ if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ rc = SQLITE_CANTOPEN_BKPT; } return rc; } /* ** This is the value that walTryBeginRead returns when it needs to ** be retried. */ #define WAL_RETRY (-1) /* ** Attempt to start a read transaction. This might fail due to a race or ** other transient condition. When that happens, it returns WAL_RETRY to ** indicate to the caller that it is safe to retry immediately. ** ** On success return SQLITE_OK. On a permanent failure (such an ** I/O error or an SQLITE_BUSY because another process is running ** recovery) return a positive error code. ** ** The useWal parameter is true to force the use of the WAL and disable ** the case where the WAL is bypassed because it has been completely ** checkpointed. If useWal==0 then this routine calls walIndexReadHdr() ** to make a copy of the wal-index header into pWal->hdr. If the ** wal-index header has changed, *pChanged is set to 1 (as an indication ** to the caller that the local paget cache is obsolete and needs to be ** flushed.) When useWal==1, the wal-index header is assumed to already ** be loaded and the pChanged parameter is unused. ** ** The caller must set the cnt parameter to the number of prior calls to ** this routine during the current read attempt that returned WAL_RETRY. ** This routine will start taking more aggressive measures to clear the ** race conditions after multiple WAL_RETRY returns, and after an excessive ** number of errors will ultimately return SQLITE_PROTOCOL. The ** SQLITE_PROTOCOL return indicates that some other process has gone rogue ** and is not honoring the locking protocol. There is a vanishingly small ** chance that SQLITE_PROTOCOL could be returned because of a run of really ** bad luck when there is lots of contention for the wal-index, but that ** possibility is so small that it can be safely neglected, we believe. ** ** On success, this routine obtains a read lock on ** WAL_READ_LOCK(pWal->readLock). The pWal->readLock integer is ** in the range 0 <= pWal->readLock < WAL_NREADER. If pWal->readLock==(-1) ** that means the Wal does not hold any read lock. The reader must not ** access any database page that is modified by a WAL frame up to and ** including frame number aReadMark[pWal->readLock]. The reader will ** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0 ** Or if pWal->readLock==0, then the reader will ignore the WAL ** completely and get all content directly from the database file. ** If the useWal parameter is 1 then the WAL will never be ignored and ** this routine will always set pWal->readLock>0 on success. ** When the read transaction is completed, the caller must release the ** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1. ** ** This routine uses the nBackfill and aReadMark[] fields of the header ** to select a particular WAL_READ_LOCK() that strives to let the ** checkpoint process do as much work as possible. This routine might ** update values of the aReadMark[] array in the header, but if it does ** so it takes care to hold an exclusive lock on the corresponding ** WAL_READ_LOCK() while changing values. */ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ int rc = SQLITE_OK; /* Return code */ assert( pWal->readLock<0 ); /* Not currently locked */ /* Take steps to avoid spinning forever if there is a protocol error. ** ** Circumstances that cause a RETRY should only last for the briefest ** instances of time. No I/O or other system calls are done while the ** locks are held, so the locks should not be held for very long. But ** if we are unlucky, another process that is holding a lock might get ** paged out or take a page-fault that is time-consuming to resolve, ** during the few nanoseconds that it is holding the lock. In that case, ** it might take longer than normal for the lock to free. ** ** After 5 RETRYs, we begin calling sqlite3OsSleep(). The first few ** calls to sqlite3OsSleep() have a delay of 1 microsecond. Really this ** is more of a scheduler yield than an actual delay. But on the 10th ** an subsequent retries, the delays start becoming longer and longer, ** so that on the 100th (and last) RETRY we delay for 323 milliseconds. ** The total delay time before giving up is less than 10 seconds. */ if( cnt>5 ){ int nDelay = 1; /* Pause time in microseconds */ if( cnt>100 ){ VVA_ONLY( pWal->lockError = 1; ) return SQLITE_PROTOCOL; } if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; sqlite3OsSleep(pWal->pVfs, nDelay); } if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to ** be running, convert BUSY to BUSY_RECOVERY. There is a race here ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY ** would be technically correct. But the race is benign since with ** WAL_RETRY this routine will be called again and will probably be ** right on the second iteration. */ if( pWal->apWiData[0]==0 ){ /* This branch is taken when the xShmMap() method returns SQLITE_BUSY. ** We assume this is a transient condition, so return WAL_RETRY. The ** xShmMap() implementation used by the default unix and win32 VFS ** modules may return SQLITE_BUSY due to a race condition in the ** code that determines whether or not the shared-memory region ** must be zeroed before the requested page is returned. */ rc = WAL_RETRY; }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){ walUnlockShared(pWal, WAL_RECOVER_LOCK); rc = WAL_RETRY; }else if( rc==SQLITE_BUSY ){ rc = SQLITE_BUSY_RECOVERY; } } if( rc!=SQLITE_OK ){ return rc; } } pInfo = walCkptInfo(pWal); if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ /* The WAL has been completely backfilled (or it is empty). ** and can be safely ignored. */ rc = walLockShared(pWal, WAL_READ_LOCK(0)); walShmBarrier(pWal); if( rc==SQLITE_OK ){ if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ /* It is not safe to allow the reader to continue here if frames ** may have been appended to the log before READ_LOCK(0) was obtained. ** When holding READ_LOCK(0), the reader ignores the entire log file, ** which implies that the database file contains a trustworthy ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from ** happening, this is usually correct. ** ** However, if frames have been appended to the log (or if the log ** is wrapped and written for that matter) before the READ_LOCK(0) ** is obtained, that is not necessarily true. A checkpointer may ** have started to backfill the appended frames but crashed before ** it finished. Leaving a corrupt image in the database file. */ walUnlockShared(pWal, WAL_READ_LOCK(0)); return WAL_RETRY; } pWal->readLock = 0; return SQLITE_OK; }else if( rc!=SQLITE_BUSY ){ return rc; } } /* If we get this far, it means that the reader will want to use ** the WAL to get at content from recent commits. The job now is ** to select one of the aReadMark[] entries that is closest to ** but not exceeding pWal->hdr.mxFrame and lock that entry. */ mxReadMark = 0; mxI = 0; for(i=1; iaReadMark[i]; if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){ assert( thisMark!=READMARK_NOT_USED ); mxReadMark = thisMark; mxI = i; } } /* There was once an "if" here. The extra "{" is to preserve indentation. */ { if( (pWal->readOnly & WAL_SHM_RDONLY)==0 && (mxReadMarkhdr.mxFrame || mxI==0) ){ for(i=1; iaReadMark[i] = pWal->hdr.mxFrame; mxI = i; walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); break; }else if( rc!=SQLITE_BUSY ){ return rc; } } } if( mxI==0 ){ assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 ); return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK; } rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); if( rc ){ return rc==SQLITE_BUSY ? WAL_RETRY : rc; } /* Now that the read-lock has been obtained, check that neither the ** value in the aReadMark[] array or the contents of the wal-index ** header have changed. ** ** It is necessary to check that the wal-index header did not change ** between the time it was read and when the shared-lock was obtained ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility ** that the log file may have been wrapped by a writer, or that frames ** that occur later in the log than pWal->hdr.mxFrame may have been ** copied into the database by a checkpointer. If either of these things ** happened, then reading the database with the current value of ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry ** instead. ** ** Before checking that the live wal-index header has not changed ** since it was read, set Wal.minFrame to the first frame in the wal ** file that has not yet been checkpointed. This client will not need ** to read any frames earlier than minFrame from the wal file - they ** can be safely read directly from the database file. ** ** Because a ShmBarrier() call is made between taking the copy of ** nBackfill and checking that the wal-header in shared-memory still ** matches the one cached in pWal->hdr, it is guaranteed that the ** checkpointer that set nBackfill was not working with a wal-index ** header newer than that cached in pWal->hdr. If it were, that could ** cause a problem. The checkpointer could omit to checkpoint ** a version of page X that lies before pWal->minFrame (call that version ** A) on the basis that there is a newer version (version B) of the same ** page later in the wal file. But if version B happens to like past ** frame pWal->hdr.mxFrame - then the client would incorrectly assume ** that it can read version A from the database file. However, since ** we can guarantee that the checkpointer that set nBackfill could not ** see any pages past pWal->hdr.mxFrame, this problem does not come up. */ pWal->minFrame = pInfo->nBackfill+1; walShmBarrier(pWal); if( pInfo->aReadMark[mxI]!=mxReadMark || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ walUnlockShared(pWal, WAL_READ_LOCK(mxI)); return WAL_RETRY; }else{ assert( mxReadMark<=pWal->hdr.mxFrame ); pWal->readLock = (i16)mxI; } } return rc; } /* ** Begin a read transaction on the database. ** ** This routine used to be called sqlite3OpenSnapshot() and with good reason: ** it takes a snapshot of the state of the WAL and wal-index for the current ** instant in time. The current thread will continue to use this snapshot. ** Other threads might append new content to the WAL and wal-index but ** that extra content is ignored by the current thread. ** ** If the database contents have changes since the previous read ** transaction, then *pChanged is set to 1 before returning. The ** Pager layer will use this to know that is cache is stale and ** needs to be flushed. */ SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ int rc; /* Return code */ int cnt = 0; /* Number of TryBeginRead attempts */ do{ rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); }while( rc==WAL_RETRY ); testcase( (rc&0xff)==SQLITE_BUSY ); testcase( (rc&0xff)==SQLITE_IOERR ); testcase( rc==SQLITE_PROTOCOL ); testcase( rc==SQLITE_OK ); return rc; } /* ** Finish with a read transaction. All this does is release the ** read-lock. */ SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){ sqlite3WalEndWriteTransaction(pWal); if( pWal->readLock>=0 ){ walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->readLock = -1; } } /* ** Search the wal file for page pgno. If found, set *piRead to the frame that ** contains the page. Otherwise, if pgno is not in the wal file, set *piRead ** to zero. ** ** Return SQLITE_OK if successful, or an error code if an error occurs. If an ** error does occur, the final value of *piRead is undefined. */ SQLITE_PRIVATE int sqlite3WalFindFrame( Wal *pWal, /* WAL handle */ Pgno pgno, /* Database page number to read data for */ u32 *piRead /* OUT: Frame number (or zero) */ ){ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ int iMinHash; /* This routine is only be called from within a read transaction. */ assert( pWal->readLock>=0 || pWal->lockError ); /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early ** in this case as an optimization. Likewise, if pWal->readLock==0, ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ if( iLast==0 || pWal->readLock==0 ){ *piRead = 0; return SQLITE_OK; } /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** ** This code might run concurrently to the code in walIndexAppend() ** that adds entries to the wal-index (and possibly to this hash ** table). This means the value just read from the hash ** slot (aHash[iKey]) may have been added before or after the ** current read transaction was opened. Values added after the ** read transaction was opened may have been written incorrectly - ** i.e. these slots may contain garbage data. However, we assume ** that any slots written before the current read transaction was ** opened remain unmodified. ** ** For the reasons above, the if(...) condition featured in the inner ** loop of the following block is more stringent that would be required ** if we had exclusive access to the hash-table: ** ** (aPgno[iFrame]==pgno): ** This condition filters out normal hash-table collisions. ** ** (iFrame<=iLast): ** This condition filters out entries that were added to the hash ** table after the current read-transaction had started. */ iMinHash = walFramePage(pWal->minFrame); for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){ volatile ht_slot *aHash; /* Pointer to hash table */ volatile u32 *aPgno; /* Pointer to array of page numbers */ u32 iZero; /* Frame number corresponding to aPgno[0] */ int iKey; /* Hash slot index */ int nCollide; /* Number of hash collisions remaining */ int rc; /* Error code */ rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero); if( rc!=SQLITE_OK ){ return rc; } nCollide = HASHTABLE_NSLOT; for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){ assert( iFrame>iRead || CORRUPT_DB ); iRead = iFrame; } if( (nCollide--)==0 ){ return SQLITE_CORRUPT_BKPT; } } } #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ { u32 iRead2 = 0; u32 iTest; assert( pWal->minFrame>0 ); for(iTest=iLast; iTest>=pWal->minFrame; iTest--){ if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; break; } } assert( iRead==iRead2 ); } #endif *piRead = iRead; return SQLITE_OK; } /* ** Read the contents of frame iRead from the wal file into buffer pOut ** (which is nOut bytes in size). Return SQLITE_OK if successful, or an ** error code otherwise. */ SQLITE_PRIVATE int sqlite3WalReadFrame( Wal *pWal, /* WAL handle */ u32 iRead, /* Frame to read */ int nOut, /* Size of buffer pOut in bytes */ u8 *pOut /* Buffer to write page data to */ ){ int sz; i64 iOffset; sz = pWal->hdr.szPage; sz = (sz&0xfe00) + ((sz&0x0001)<<16); testcase( sz<=32768 ); testcase( sz>=65536 ); iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); } /* ** Return the size of the database in pages (or zero, if unknown). */ SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal){ if( pWal && ALWAYS(pWal->readLock>=0) ){ return pWal->hdr.nPage; } return 0; } /* ** This function starts a write transaction on the WAL. ** ** A read transaction must have already been started by a prior call ** to sqlite3WalBeginReadTransaction(). ** ** If another thread or process has written into the database since ** the read transaction was started, then it is not possible for this ** thread to write as doing so would cause a fork. So this routine ** returns SQLITE_BUSY in that case and no write transaction is started. ** ** There can only be a single writer active at a time. */ SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){ int rc; /* Cannot start a write transaction without first holding a read ** transaction. */ assert( pWal->readLock>=0 ); if( pWal->readOnly ){ return SQLITE_READONLY; } /* Only one writer allowed at a time. Get the write lock. Return ** SQLITE_BUSY if unable. */ rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1, 0); if( rc ){ return rc; } pWal->writeLock = 1; /* If another connection has written to the database file since the ** time the read transaction on this connection was started, then ** the write is disallowed. */ if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; rc = SQLITE_BUSY_SNAPSHOT; } return rc; } /* ** End a write transaction. The commit has already been done. This ** routine merely releases the lock. */ SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal){ if( pWal->writeLock ){ walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); pWal->writeLock = 0; pWal->truncateOnCommit = 0; } return SQLITE_OK; } /* ** If any data has been written (but not committed) to the log file, this ** function moves the write-pointer back to the start of the transaction. ** ** Additionally, the callback function is invoked for each frame written ** to the WAL since the start of the transaction. If the callback returns ** other than SQLITE_OK, it is not invoked again and the error code is ** returned to the caller. ** ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; if( ALWAYS(pWal->writeLock) ){ Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; /* Restore the clients cache of the wal-index header to the state it ** was in before the client began writing to the database. */ memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); for(iFrame=pWal->hdr.mxFrame+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++ ){ /* This call cannot fail. Unless the page for which the page number ** is passed as the second argument is (a) in the cache and ** (b) has an outstanding reference, then xUndo is either a no-op ** (if (a) is false) or simply expels the page from the cache (if (b) ** is false). ** ** If the upper layer is doing a rollback, it is guaranteed that there ** are no outstanding references to any page other than page 1. And ** page 1 is never written to the log until the transaction is ** committed. As a result, the call to xUndo may not fail. */ assert( walFramePgno(pWal, iFrame)!=1 ); rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); } if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); } return rc; } /* ** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 ** values. This function populates the array with values required to ** "rollback" the write position of the WAL handle back to the current ** point in the event of a savepoint rollback (via WalSavepointUndo()). */ SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ assert( pWal->writeLock ); aWalData[0] = pWal->hdr.mxFrame; aWalData[1] = pWal->hdr.aFrameCksum[0]; aWalData[2] = pWal->hdr.aFrameCksum[1]; aWalData[3] = pWal->nCkpt; } /* ** Move the write position of the WAL back to the point identified by ** the values in the aWalData[] array. aWalData must point to an array ** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated ** by a call to WalSavepoint(). */ SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ int rc = SQLITE_OK; assert( pWal->writeLock ); assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); if( aWalData[3]!=pWal->nCkpt ){ /* This savepoint was opened immediately after the write-transaction ** was started. Right after that, the writer decided to wrap around ** to the start of the log. Update the savepoint values to match. */ aWalData[0] = 0; aWalData[3] = pWal->nCkpt; } if( aWalData[0]hdr.mxFrame ){ pWal->hdr.mxFrame = aWalData[0]; pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; walCleanupHash(pWal); } return rc; } /* ** This function is called just before writing a set of frames to the log ** file (see sqlite3WalFrames()). It checks to see if, instead of appending ** to the current log file, it is possible to overwrite the start of the ** existing log file with the new frames (i.e. "reset" the log). If so, ** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left ** unchanged. ** ** SQLITE_OK is returned if no error is encountered (regardless of whether ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned ** if an error occurs. */ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; int cnt; if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ u32 salt1; sqlite3_randomness(4, &salt1); rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1, 0); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no ** readers are currently using the WAL), then the transactions ** frames will overwrite the start of the existing log. Update the ** wal-index header to reflect this. ** ** In theory it would be Ok to update the cache of the header only ** at this point. But updating the actual wal-index header is also ** safe and means there is no special case for sqlite3WalUndo() ** to handle if this transaction is rolled back. */ walRestartHdr(pWal, salt1); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); }else if( rc!=SQLITE_BUSY ){ return rc; } } walUnlockShared(pWal, WAL_READ_LOCK(0)); pWal->readLock = -1; cnt = 0; do{ int notUsed; rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt); }while( rc==WAL_RETRY ); assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ testcase( (rc&0xff)==SQLITE_IOERR ); testcase( rc==SQLITE_PROTOCOL ); testcase( rc==SQLITE_OK ); } return rc; } /* ** Information about the current state of the WAL file and where ** the next fsync should occur - passed from sqlite3WalFrames() into ** walWriteToLog(). */ typedef struct WalWriter { Wal *pWal; /* The complete WAL information */ sqlite3_file *pFd; /* The WAL file to which we write */ sqlite3_int64 iSyncPoint; /* Fsync at this offset */ int syncFlags; /* Flags for the fsync */ int szPage; /* Size of one page */ } WalWriter; /* ** Write iAmt bytes of content into the WAL file beginning at iOffset. ** Do a sync when crossing the p->iSyncPoint boundary. ** ** In other words, if iSyncPoint is in between iOffset and iOffset+iAmt, ** first write the part before iSyncPoint, then sync, then write the ** rest. */ static int walWriteToLog( WalWriter *p, /* WAL to write to */ void *pContent, /* Content to be written */ int iAmt, /* Number of bytes to write */ sqlite3_int64 iOffset /* Start writing at this offset */ ){ int rc; if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ int iFirstAmt = (int)(p->iSyncPoint - iOffset); rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); if( rc ) return rc; iOffset += iFirstAmt; iAmt -= iFirstAmt; pContent = (void*)(iFirstAmt + (char*)pContent); assert( p->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) ); rc = sqlite3OsSync(p->pFd, p->syncFlags & SQLITE_SYNC_MASK); if( iAmt==0 || rc ) return rc; } rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); return rc; } /* ** Write out a single frame of the WAL */ static int walWriteOneFrame( WalWriter *p, /* Where to write the frame */ PgHdr *pPage, /* The page of the frame to be written */ int nTruncate, /* The commit flag. Usually 0. >0 for commit */ sqlite3_int64 iOffset /* Byte offset at which to write */ ){ int rc; /* Result code from subfunctions */ void *pData; /* Data actually written */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ #if defined(SQLITE_HAS_CODEC) if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM; #else pData = pPage->pData; #endif walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); if( rc ) return rc; /* Write the page data */ rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); return rc; } /* ** Write a set of frames to the log. The caller must hold the write-lock ** on the log file (obtained using sqlite3WalBeginWriteTransaction()). */ SQLITE_PRIVATE int sqlite3WalFrames( Wal *pWal, /* Wal handle to write to */ int szPage, /* Database page-size in bytes */ PgHdr *pList, /* List of dirty pages to write */ Pgno nTruncate, /* Database size after this commit */ int isCommit, /* True if this is a commit */ int sync_flags /* Flags to pass to OsSync() (or 0) */ ){ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ PgHdr *p; /* Iterator to run through pList with. */ PgHdr *pLast = 0; /* Last frame in list */ int nExtra = 0; /* Number of extra copies of last page */ int szFrame; /* The size of a single frame */ i64 iOffset; /* Next byte to write in WAL file */ WalWriter w; /* The writer */ assert( pList ); assert( pWal->writeLock ); /* If this frame set completes a transaction, then nTruncate>0. If ** nTruncate==0 then this frame set does not complete the transaction. */ assert( (isCommit!=0)==(nTruncate!=0) ); #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); } #endif /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ return rc; } /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. */ iFrame = pWal->hdr.mxFrame; if( iFrame==0 ){ u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ u32 aCksum[2]; /* Checksum for wal-header */ sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); sqlite3Put4byte(&aWalHdr[8], szPage); sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); sqlite3Put4byte(&aWalHdr[24], aCksum[0]); sqlite3Put4byte(&aWalHdr[28], aCksum[1]); pWal->szPage = szPage; pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN; pWal->hdr.aFrameCksum[0] = aCksum[0]; pWal->hdr.aFrameCksum[1] = aCksum[1]; pWal->truncateOnCommit = 1; rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); if( rc!=SQLITE_OK ){ return rc; } /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless ** all syncing is turned off by PRAGMA synchronous=OFF). Otherwise ** an out-of-order write following a WAL restart could result in ** database corruption. See the ticket: ** ** http://localhost:591/sqlite/info/ff5be73dee */ if( pWal->syncHeader && sync_flags ){ rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK); if( rc ) return rc; } } assert( (int)pWal->szPage==szPage ); /* Setup information needed to write frames into the WAL */ w.pWal = pWal; w.pFd = pWal->pWalFd; w.iSyncPoint = 0; w.syncFlags = sync_flags; w.szPage = szPage; iOffset = walFrameOffset(iFrame+1, szPage); szFrame = szPage + WAL_FRAME_HDRSIZE; /* Write all frames into the log file exactly once */ for(p=pList; p; p=p->pDirty){ int nDbSize; /* 0 normally. Positive == commit flag */ iFrame++; assert( iOffset==walFrameOffset(iFrame, szPage) ); nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0; rc = walWriteOneFrame(&w, p, nDbSize, iOffset); if( rc ) return rc; pLast = p; iOffset += szFrame; } /* If this is the end of a transaction, then we might need to pad ** the transaction and/or sync the WAL file. ** ** Padding and syncing only occur if this set of frames complete a ** transaction and if PRAGMA synchronous=FULL. If synchronous==NORMAL ** or synchronous==OFF, then no padding or syncing are needed. ** ** If SQLITE_IOCAP_POWERSAFE_OVERWRITE is defined, then padding is not ** needed and only the sync is done. If padding is needed, then the ** final frame is repeated (with its commit mark) until the next sector ** boundary is crossed. Only the part of the WAL prior to the last ** sector boundary is synced; the part of the last frame that extends ** past the sector boundary is written after the sync. */ if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){ if( pWal->padToSectorBoundary ){ int sectorSize = sqlite3SectorSize(pWal->pWalFd); w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; while( iOffsettruncateOnCommit && pWal->mxWalSize>=0 ){ i64 sz = pWal->mxWalSize; if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){ sz = walFrameOffset(iFrame+nExtra+1, szPage); } walLimitSize(pWal, sz); pWal->truncateOnCommit = 0; } /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index ** guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. */ iFrame = pWal->hdr.mxFrame; for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ iFrame++; rc = walIndexAppend(pWal, iFrame, p->pgno); } while( rc==SQLITE_OK && nExtra>0 ){ iFrame++; nExtra--; rc = walIndexAppend(pWal, iFrame, pLast->pgno); } if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); testcase( szPage<=32768 ); testcase( szPage>=65536 ); pWal->hdr.mxFrame = iFrame; if( isCommit ){ pWal->hdr.iChange++; pWal->hdr.nPage = nTruncate; } /* If this is a commit, update the wal-index header too. */ if( isCommit ){ walIndexWriteHdr(pWal); pWal->iCallback = iFrame; } } WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } /* ** This routine is called to implement sqlite3_wal_checkpoint() and ** related interfaces. ** ** Obtain a CHECKPOINT lock and then backfill as much information as ** we can from WAL into the database. ** ** If parameter xBusy is not NULL, it is a pointer to a busy-handler ** callback. In this case this function runs a blocking checkpoint. */ SQLITE_PRIVATE int sqlite3WalCheckpoint( Wal *pWal, /* Wal connection */ int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags to sync db file with (or 0) */ int nBuf, /* Size of temporary buffer */ u8 *zBuf, /* Temporary buffer to use */ int *pnLog, /* OUT: Number of frames in WAL */ int *pnCkpt /* OUT: Number of backfilled frames in WAL */ ){ int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ assert( pWal->ckptLock==0 ); assert( pWal->writeLock==0 ); /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); if( pWal->readOnly ) return SQLITE_READONLY; WALTRACE(("WAL%p: checkpoint begins\n", pWal)); /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive ** "checkpoint" lock on the database file. */ rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1, 0); if( rc ){ /* EVIDENCE-OF: R-10421-19736 If any other process is running a ** checkpoint operation at the same time, the lock cannot be obtained and ** SQLITE_BUSY is returned. ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, ** it will not be invoked in this case. */ testcase( rc==SQLITE_BUSY ); testcase( xBusy!=0 ); return rc; } pWal->ckptLock = 1; /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and ** TRUNCATE modes also obtain the exclusive "writer" lock on the database ** file. ** ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained ** immediately, and a busy-handler is configured, it is invoked and the ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; }else if( rc==SQLITE_BUSY ){ eMode2 = SQLITE_CHECKPOINT_PASSIVE; xBusy2 = 0; rc = SQLITE_OK; } } /* Read the wal-index header. */ if( rc==SQLITE_OK ){ rc = walIndexReadHdr(pWal, &isChanged); if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ sqlite3OsUnfetch(pWal->pDbFd, 0, 0); } } /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; }else{ rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); } /* If no error occurred, set the output variables. */ if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); } } if( isChanged ){ /* If a new wal-index header was loaded before the checkpoint was ** performed, then the pager-cache associated with pWal is now ** out of date. So zero the cached wal-index header to ensure that ** next time the pager opens a snapshot on this database it knows that ** the cache needs to be reset. */ memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } /* Release the locks. */ sqlite3WalEndWriteTransaction(pWal); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); pWal->ckptLock = 0; WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc); } #if 0 /* Return the value to pass to a sqlite3_wal_hook callback, the ** number of frames in the WAL at the point of the last commit since ** sqlite3WalCallback() was called. If no commits have occurred since ** the last call, then return 0. */ SQLITE_PRIVATE int sqlite3WalCallback(Wal *pWal){ u32 ret = 0; if( pWal ){ ret = pWal->iCallback; pWal->iCallback = 0; } return (int)ret; } #endif /* ** This function is called to change the WAL subsystem into or out ** of locking_mode=EXCLUSIVE. ** ** If op is zero, then attempt to change from locking_mode=EXCLUSIVE ** into locking_mode=NORMAL. This means that we must acquire a lock ** on the pWal->readLock byte. If the WAL is already in locking_mode=NORMAL ** or if the acquisition of the lock fails, then return 0. If the ** transition out of exclusive-mode is successful, return 1. This ** operation must occur while the pager is still holding the exclusive ** lock on the main database file. ** ** If op is one, then change from locking_mode=NORMAL into ** locking_mode=EXCLUSIVE. This means that the pWal->readLock must ** be released. Return 1 if the transition is made and 0 if the ** WAL is already in exclusive-locking mode - meaning that this ** routine is a no-op. The pager must already hold the exclusive lock ** on the main database file before invoking this operation. ** ** If op is negative, then do a dry-run of the op==1 case but do ** not actually change anything. The pager uses this to see if it ** should acquire the database exclusive lock prior to invoking ** the op==1 case. */ SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op){ int rc; assert( pWal->writeLock==0 ); assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 ); /* pWal->readLock is usually set, but might be -1 if there was a ** prior error while attempting to acquire are read-lock. This cannot ** happen if the connection is actually in exclusive mode (as no xShmLock ** locks are taken in this case). Nor should the pager attempt to ** upgrade to exclusive-mode following such an error. */ assert( pWal->readLock>=0 || pWal->lockError ); assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) ); if( op==0 ){ if( pWal->exclusiveMode ){ pWal->exclusiveMode = 0; if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ pWal->exclusiveMode = 1; } rc = pWal->exclusiveMode==0; }else{ /* Already in locking_mode=NORMAL */ rc = 0; } }else if( op>0 ){ assert( pWal->exclusiveMode==0 ); assert( pWal->readLock>=0 ); walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); pWal->exclusiveMode = 1; rc = 1; }else{ rc = pWal->exclusiveMode==0; } return rc; } #if 0 /* ** Return true if the argument is non-NULL and the WAL module is using ** heap-memory for the wal-index. Otherwise, if the argument is NULL or the ** WAL module is using shared-memory, return false. */ SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal){ return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ); } #endif #ifdef SQLITE_ENABLE_ZIPVFS /* ** If the argument is not NULL, it points to a Wal object that holds a ** read-lock. This function returns the database page-size if it is known, ** or zero if it is not (or if pWal is NULL). */ SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ assert( pWal==0 || pWal->readLock>=0 ); return (pWal ? pWal->szPage : 0); } #endif #endif /* #ifndef SQLITE_OMIT_WAL */ /************** End of wal.c *************************************************/ /************** Begin file btmutex.c *****************************************/ /* ** 2007 August 27 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains code used to implement mutexes on Btree objects. ** This code really belongs in btree.c. But btree.c is getting too ** big and we want to break it down some. This packaged seemed like ** a good breakout. */ /* #include "btreeInt.h" */ #ifndef SQLITE_OMIT_SHARED_CACHE #if SQLITE_THREADSAFE /* ** Obtain the BtShared mutex associated with B-Tree handle p. Also, ** set BtShared.db to the database handle associated with p and the ** p->locked boolean to true. */ static void lockBtreeMutex(Btree *p){ assert( p->locked==0 ); assert( sqlite3_mutex_notheld(p->pBt->mutex) ); assert( sqlite3_mutex_held(p->mutex) ); sqlite3_mutex_enter(p->pBt->mutex); p->pBt->pBt = p; p->locked = 1; } /* ** Release the BtShared mutex associated with B-Tree handle p and ** clear the p->locked boolean. */ static void SQLITE_NOINLINE unlockBtreeMutex(Btree *p){ BtShared *pBt = p->pBt; assert( p->locked==1 ); assert( sqlite3_mutex_held(pBt->mutex) ); assert( sqlite3_mutex_held(p->db->mutex) ); assert( p->db==pBt->db ); sqlite3_mutex_leave(pBt->mutex); p->locked = 0; } /* Forward reference */ static void SQLITE_NOINLINE btreeLockCarefully(Btree *p); /* ** Enter a mutex on the given BTree object. ** ** If the object is not sharable, then no mutex is ever required ** and this routine is a no-op. The underlying mutex is non-recursive. ** But we keep a reference count in Btree.wantToLock so the behavior ** of this interface is recursive. ** ** To avoid deadlocks, multiple Btrees are locked in the same order ** by all database connections. The p->pNext is a list of other ** Btrees belonging to the same database connection as the p Btree ** which need to be locked after p. If we cannot get a lock on ** p, then first unlock all of the others on p->pNext, then wait ** for the lock to become available on p, then relock all of the ** subsequent Btrees that desire a lock. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeEnter(Btree *p){ /* Some basic sanity checking on the Btree. The list of Btrees ** connected by pNext and pPrev should be in sorted order by ** Btree.pBt value. All elements of the list should belong to ** the same connection. Only shared Btrees are on the list. */ assert( p->pNext==0 || p->pNext->pBt>p->pBt ); assert( p->pPrev==0 || p->pPrev->pBtpBt ); assert( p->pNext==0 || p->pNext->db==p->db ); assert( p->pPrev==0 || p->pPrev->db==p->db ); assert( p->sharable || (p->pNext==0 && p->pPrev==0) ); /* Check for locking consistency */ assert( !p->locked || p->wantToLock>0 ); assert( p->sharable || p->wantToLock==0 ); /* We should already hold a lock on the database connection */ assert( sqlite3_mutex_held(p->db->mutex) ); /* Unless the database is sharable and unlocked, then BtShared.db ** should already be set correctly. */ assert( (p->locked==0 && p->sharable) || p->pBt->db==p->db ); if( !p->sharable ) return; p->wantToLock++; if( p->locked ) return; btreeLockCarefully(p); } /* This is a helper function for sqlite3BtreeLock(). By moving ** complex, but seldom used logic, out of sqlite3BtreeLock() and ** into this routine, we avoid unnecessary stack pointer changes ** and thus help the sqlite3BtreeLock() routine to run much faster ** in the common case. */ static void SQLITE_NOINLINE btreeLockCarefully(Btree *p){ Btree *pLater; /* In most cases, we should be able to acquire the lock we ** want without having to go through the ascending lock ** procedure that follows. Just be sure not to block. */ if( sqlite3_mutex_try(p->pBt->mutex)==SQLITE_OK ){ p->pBt->pBt = p; p->locked = 1; return; } /* To avoid deadlock, first release all locks with a larger ** BtShared address. Then acquire our lock. Then reacquire ** the other BtShared locks that we used to hold in ascending ** order. */ for(pLater=p->pNext; pLater; pLater=pLater->pNext){ assert( pLater->sharable ); assert( pLater->pNext==0 || pLater->pNext->pBt>pLater->pBt ); assert( !pLater->locked || pLater->wantToLock>0 ); if( pLater->locked ){ unlockBtreeMutex(pLater); } } lockBtreeMutex(p); for(pLater=p->pNext; pLater; pLater=pLater->pNext){ if( pLater->wantToLock ){ lockBtreeMutex(pLater); } } } /* ** Exit the recursive mutex on a Btree. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeLeave(Btree *p){ assert( sqlite3_mutex_held(p->db->mutex) ); if( p->sharable ){ assert( p->wantToLock>0 ); p->wantToLock--; if( p->wantToLock==0 ){ unlockBtreeMutex(p); } } } #ifndef NDEBUG /* ** Return true if the BtShared mutex is held on the btree, or if the ** B-Tree is not marked as sharable. ** ** This routine is used only from within assert() statements. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeHoldsMutex(Btree *p){ assert( p->sharable==0 || p->locked==0 || p->wantToLock>0 ); assert( p->sharable==0 || p->locked==0 || p->db==p->pBt->db ); assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->pBt->mutex) ); assert( p->sharable==0 || p->locked==0 || sqlite3_mutex_held(p->db->mutex) ); return (p->sharable==0 || p->locked); } #endif #ifndef SQLITE_OMIT_INCRBLOB /* ** Enter and leave a mutex on a Btree given a cursor owned by that ** Btree. These entry points are used by incremental I/O and can be ** omitted if that module is not used. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeEnterCursor(BtCursor *pCur){ sqlite3BtreeEnter(pCur->pBtree); } SQLITE_API void SQLITE_STDCALL sqlite3BtreeLeaveCursor(BtCursor *pCur){ sqlite3BtreeLeave(pCur->pBtree); } #endif /* SQLITE_OMIT_INCRBLOB */ /* ** Return true if a particular Btree requires a lock. Return FALSE if ** no lock is ever required since it is not sharable. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSharable(Btree *p){ return p->sharable; } #else /* SQLITE_THREADSAFE>0 above. SQLITE_THREADSAFE==0 below */ /* ** The following are special cases for mutex enter routines for use ** in single threaded applications that use shared cache. Except for ** these two routines, all mutex operations are no-ops in that case and ** are null #defines in btree.h. ** ** If shared cache is disabled, then all btree mutex routines, including ** the ones below, are no-ops and are null #defines in btree.h. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeEnter(Btree *p){ p->pBt->db = p->db; } #endif /* if SQLITE_THREADSAFE */ #endif /* ifndef SQLITE_OMIT_SHARED_CACHE */ /************** End of btmutex.c *********************************************/ /************** Begin file btree.c *******************************************/ /* ** 2004 April 6 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** This file implements an external (disk-based) database using BTrees. ** See the header comment on "btreeInt.h" for additional information. ** Including a description of file format and an overview of operation. */ /* #include "btreeInt.h" */ /* ** The header string that appears at the beginning of every ** SQLite database. */ static const char zMagicHeader[] = SQLITE_FILE_HEADER; /* ** Set this global variable to 1 to enable tracing using the TRACE ** macro. */ #if 0 int sqlite3BtreeTrace=1; /* True to enable tracing */ # define TRACE(X) if(sqlite3BtreeTrace){printf X;fflush(stdout);} #else # define TRACE(X) #endif /* ** Extract a 2-byte big-endian integer from an array of unsigned bytes. ** But if the value is zero, make it 65536. ** ** This routine is used to extract the "offset to cell content area" value ** from the header of a btree page. If the page size is 65536 and the page ** is empty, the offset should be 65536, but the 2-byte value stores zero. ** This routine makes the necessary adjustment to 65536. */ #define get2byteNotZero(X) (((((int)get2byte(X))-1)&0xffff)+1) /* ** Values passed as the 5th argument to allocateBtreePage() */ #define BTALLOC_ANY 0 /* Allocate any page */ #define BTALLOC_EXACT 1 /* Allocate exact page if possible */ #define BTALLOC_LE 2 /* Allocate any page <= the parameter */ /* ** Macro IfNotOmitAV(x) returns (x) if SQLITE_OMIT_AUTOVACUUM is not ** defined, or 0 if it is. For example: ** ** bIncrVacuum = IfNotOmitAV(pBtShared->incrVacuum); */ #ifndef SQLITE_OMIT_AUTOVACUUM #define IfNotOmitAV(expr) (expr) #else #define IfNotOmitAV(expr) 0 #endif #ifndef SQLITE_OMIT_SHARED_CACHE /* ** A list of BtShared objects that are eligible for participation ** in shared cache. This variable has file scope during normal builds, ** but the test harness needs to access it so we make it global for ** test builds. ** ** Access to this variable is protected by SQLITE_MUTEX_STATIC_MASTER. */ #ifdef SQLITE_TEST SQLITE_PRIVATE BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; #else static BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; #endif #endif /* SQLITE_OMIT_SHARED_CACHE */ #ifdef SQLITE_OMIT_SHARED_CACHE /* ** The functions querySharedCacheTableLock(), setSharedCacheTableLock(), ** and clearAllSharedCacheTableLocks() ** manipulate entries in the BtShared.pLock linked list used to store ** shared-cache table level locks. If the library is compiled with the ** shared-cache feature disabled, then there is only ever one user ** of each BtShared structure and so this locking is not necessary. ** So define the lock related functions as no-ops. */ #define querySharedCacheTableLock(a,b,c) SQLITE_OK #define setSharedCacheTableLock(a,b,c) SQLITE_OK #define clearAllSharedCacheTableLocks(a) #define downgradeAllSharedCacheTableLocks(a) #define hasSharedCacheTableLock(a,b,c,d) 1 #define hasReadConflicts(a, b) 0 #endif #ifndef SQLITE_OMIT_SHARED_CACHE #ifdef SQLITE_DEBUG /* **** This function is only used as part of an assert() statement. *** ** ** Check to see if pBtree holds the required locks to read or write to the ** table with root page iRoot. Return 1 if it does and 0 if not. ** ** For example, when writing to a table with root-page iRoot via ** Btree connection pBtree: ** ** assert( hasSharedCacheTableLock(pBtree, iRoot, 0, WRITE_LOCK) ); ** ** When writing to an index that resides in a sharable database, the ** caller should have first obtained a lock specifying the root page of ** the corresponding table. This makes things a bit more complicated, ** as this module treats each table as a separate structure. To determine ** the table corresponding to the index being written, this ** function has to search through the database schema. ** ** Instead of a lock on the table/index rooted at page iRoot, the caller may ** hold a write-lock on the schema table (root page 1). This is also ** acceptable. */ static int hasSharedCacheTableLock( Btree *pBtree, /* Handle that must hold lock */ Pgno iRoot, /* Root page of b-tree */ int isIndex, /* True if iRoot is the root of an index b-tree */ int eLockType /* Required lock type (READ_LOCK or WRITE_LOCK) */ ){ Schema *pSchema = (Schema *)pBtree->pBt->pSchema; Pgno iTab = 0; BtLock *pLock; /* If this database is not shareable, or if the client is reading ** and has the read-uncommitted flag set, then no lock is required. ** Return true immediately. */ if( (pBtree->sharable==0) || (eLockType==READ_LOCK && (pBtree->flags & SQLITE_ReadUncommitted)) ){ return 1; } /* If the client is reading or writing an index and the schema is ** not loaded, then it is too difficult to actually check to see if ** the correct locks are held. So do not bother - just return true. ** This case does not come up very often anyhow. */ if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){ return 1; } /* Figure out the root-page that the lock should be held on. For table ** b-trees, this is just the root page of the b-tree being read or ** written. For index b-trees, it is the root page of the associated ** table. */ if( isIndex ){ HashElem *p; for(p=sqliteHashFirst(&pSchema->idxHash); p; p=sqliteHashNext(p)){ Index *pIdx = (Index *)sqliteHashData(p); if( pIdx->tnum==(int)iRoot ){ if( iTab ){ /* Two or more indexes share the same root page. There must ** be imposter tables. So just return true. The assert is not ** useful in that case. */ return 1; } iTab = pIdx->pTable->tnum; } } }else{ iTab = iRoot; } /* Search for the required lock. Either a write-lock on root-page iTab, a ** write-lock on the schema table, or (if the client is reading) a ** read-lock on iTab will suffice. Return 1 if any of these are found. */ for(pLock=pBtree->pBt->pLock; pLock; pLock=pLock->pNext){ if( pLock->pBtree==pBtree && (pLock->iTable==iTab || (pLock->eLock==WRITE_LOCK && pLock->iTable==1)) && pLock->eLock>=eLockType ){ return 1; } } /* Failed to find the required lock. */ return 0; } #endif /* SQLITE_DEBUG */ #ifdef SQLITE_DEBUG /* **** This function may be used as part of assert() statements only. **** ** ** Return true if it would be illegal for pBtree to write into the ** table or index rooted at iRoot because other shared connections are ** simultaneously reading that same table or index. ** ** It is illegal for pBtree to write if some other Btree object that ** shares the same BtShared object is currently reading or writing ** the iRoot table. Except, if the other Btree object has the ** read-uncommitted flag set, then it is OK for the other object to ** have a read cursor. ** ** For example, before writing to any part of the table or index ** rooted at page iRoot, one should call: ** ** assert( !hasReadConflicts(pBtree, iRoot) ); */ static int hasReadConflicts(Btree *pBtree, Pgno iRoot){ BtCursor *p; for(p=pBtree->pBt->pCursor; p; p=p->pNext){ if( p->pgnoRoot==iRoot && p->pBtree!=pBtree && 0==(p->pBtree->flags & SQLITE_ReadUncommitted) ){ return 1; } } return 0; } #endif /* #ifdef SQLITE_DEBUG */ /* ** Query to see if Btree handle p may obtain a lock of type eLock ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return ** SQLITE_OK if the lock may be obtained (by calling ** setSharedCacheTableLock()), or SQLITE_LOCKED if not. */ static int querySharedCacheTableLock(Btree *p, Pgno iTab, u8 eLock){ BtShared *pBt = p->pBt; BtLock *pIter; assert( sqlite3BtreeHoldsMutex(p) ); assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); assert( !(p->flags&SQLITE_ReadUncommitted)||eLock==WRITE_LOCK||iTab==1 ); /* If requesting a write-lock, then the Btree must have an open write ** transaction on this file. And, obviously, for this to be so there ** must be an open write transaction on the file itself. */ assert( eLock==READ_LOCK || (p==pBt->pWriter && p->inTrans==TRANS_WRITE) ); assert( eLock==READ_LOCK || pBt->inTransaction==TRANS_WRITE ); /* This routine is a no-op if the shared-cache is not enabled */ if( !p->sharable ){ return SQLITE_OK; } /* If some other connection is holding an exclusive lock, the ** requested lock may not be obtained. */ if( pBt->pWriter!=p && (pBt->btsFlags & BTS_EXCLUSIVE)!=0 ){ sqlite3ConnectionBlocked(p->db, pBt->pWriter->db); return SQLITE_LOCKED_SHAREDCACHE; } for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ /* The condition (pIter->eLock!=eLock) in the following if(...) ** statement is a simplification of: ** ** (eLock==WRITE_LOCK || pIter->eLock==WRITE_LOCK) ** ** since we know that if eLock==WRITE_LOCK, then no other connection ** may hold a WRITE_LOCK on any table in this file (since there can ** only be a single writer). */ assert( pIter->eLock==READ_LOCK || pIter->eLock==WRITE_LOCK ); assert( eLock==READ_LOCK || pIter->pBtree==p || pIter->eLock==READ_LOCK); if( pIter->pBtree!=p && pIter->iTable==iTab && pIter->eLock!=eLock ){ sqlite3ConnectionBlocked(p->db, pIter->pBtree->db); if( eLock==WRITE_LOCK ){ assert( p==pBt->pWriter ); pBt->btsFlags |= BTS_PENDING; } return SQLITE_LOCKED_SHAREDCACHE; } } return SQLITE_OK; } #endif /* !SQLITE_OMIT_SHARED_CACHE */ #ifndef SQLITE_OMIT_SHARED_CACHE /* ** Add a lock on the table with root-page iTable to the shared-btree used ** by Btree handle p. Parameter eLock must be either READ_LOCK or ** WRITE_LOCK. ** ** This function assumes the following: ** ** (a) The specified Btree object p is connected to a sharable ** database (one with the BtShared.sharable flag set), and ** ** (b) No other Btree objects hold a lock that conflicts ** with the requested lock (i.e. querySharedCacheTableLock() has ** already been called and returned SQLITE_OK). ** ** SQLITE_OK is returned if the lock is added successfully. SQLITE_NOMEM ** is returned if a malloc attempt fails. */ static int setSharedCacheTableLock(Btree *p, Pgno iTable, u8 eLock){ BtShared *pBt = p->pBt; BtLock *pLock = 0; BtLock *pIter; assert( sqlite3BtreeHoldsMutex(p) ); assert( eLock==READ_LOCK || eLock==WRITE_LOCK ); assert( p->db!=0 ); /* A connection with the read-uncommitted flag set will never try to ** obtain a read-lock using this function. The only read-lock obtained ** by a connection in read-uncommitted mode is on the sqlite_master ** table, and that lock is obtained in BtreeBeginTrans(). */ assert( 0==(p->db->flags&SQLITE_ReadUncommitted) || eLock==WRITE_LOCK ); /* This function should only be called on a sharable b-tree after it ** has been determined that no other b-tree holds a conflicting lock. */ assert( p->sharable ); assert( SQLITE_OK==querySharedCacheTableLock(p, iTable, eLock) ); /* First search the list for an existing lock on this table. */ for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ if( pIter->iTable==iTable && pIter->pBtree==p ){ pLock = pIter; break; } } /* If the above search did not find a BtLock struct associating Btree p ** with table iTable, allocate one and link it into the list. */ if( !pLock ){ pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock)); if( !pLock ){ return SQLITE_NOMEM; } pLock->iTable = iTable; pLock->pBtree = p; pLock->pNext = pBt->pLock; pBt->pLock = pLock; } /* Set the BtLock.eLock variable to the maximum of the current lock ** and the requested lock. This means if a write-lock was already held ** and a read-lock requested, we don't incorrectly downgrade the lock. */ assert( WRITE_LOCK>READ_LOCK ); if( eLock>pLock->eLock ){ pLock->eLock = eLock; } return SQLITE_OK; } #endif /* !SQLITE_OMIT_SHARED_CACHE */ #ifndef SQLITE_OMIT_SHARED_CACHE /* ** Release all the table locks (locks obtained via calls to ** the setSharedCacheTableLock() procedure) held by Btree object p. ** ** This function assumes that Btree p has an open read or write ** transaction. If it does not, then the BTS_PENDING flag ** may be incorrectly cleared. */ static void clearAllSharedCacheTableLocks(Btree *p){ BtShared *pBt = p->pBt; BtLock **ppIter = &pBt->pLock; assert( sqlite3BtreeHoldsMutex(p) ); assert( p->sharable || 0==*ppIter ); assert( p->inTrans>0 ); while( *ppIter ){ BtLock *pLock = *ppIter; assert( (pBt->btsFlags & BTS_EXCLUSIVE)==0 || pBt->pWriter==pLock->pBtree ); assert( pLock->pBtree->inTrans>=pLock->eLock ); if( pLock->pBtree==p ){ *ppIter = pLock->pNext; assert( pLock->iTable!=1 || pLock==&p->lock ); if( pLock->iTable!=1 ){ sqlite3_free(pLock); } }else{ ppIter = &pLock->pNext; } } assert( (pBt->btsFlags & BTS_PENDING)==0 || pBt->pWriter ); if( pBt->pWriter==p ){ pBt->pWriter = 0; pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); }else if( pBt->nTransaction==2 ){ /* This function is called when Btree p is concluding its ** transaction. If there currently exists a writer, and p is not ** that writer, then the number of locks held by connections other ** than the writer must be about to drop to zero. In this case ** set the BTS_PENDING flag to 0. ** ** If there is not currently a writer, then BTS_PENDING must ** be zero already. So this next line is harmless in that case. */ pBt->btsFlags &= ~BTS_PENDING; } } /* ** This function changes all write-locks held by Btree p into read-locks. */ static void downgradeAllSharedCacheTableLocks(Btree *p){ BtShared *pBt = p->pBt; if( pBt->pWriter==p ){ BtLock *pLock; pBt->pWriter = 0; pBt->btsFlags &= ~(BTS_EXCLUSIVE|BTS_PENDING); for(pLock=pBt->pLock; pLock; pLock=pLock->pNext){ assert( pLock->eLock==READ_LOCK || pLock->pBtree==p ); pLock->eLock = READ_LOCK; } } } #endif /* SQLITE_OMIT_SHARED_CACHE */ static void releasePage(MemPage *pPage); /* Forward reference */ /* ***** This routine is used inside of assert() only **** ** ** Verify that the cursor holds the mutex on its BtShared */ #ifdef SQLITE_DEBUG static int cursorHoldsMutex(BtCursor *p){ return sqlite3_mutex_held(p->pBt->mutex); } #endif /* ** Invalidate the overflow cache of the cursor passed as the first argument. ** on the shared btree structure pBt. */ #define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) /* ** Invalidate the overflow page-list cache for all cursors opened ** on the shared btree structure pBt. */ static void invalidateAllOverflowCache(BtShared *pBt){ BtCursor *p; assert( sqlite3_mutex_held(pBt->mutex) ); for(p=pBt->pCursor; p; p=p->pNext){ invalidateOverflowCache(p); } } #ifndef SQLITE_OMIT_INCRBLOB /* ** This function is called before modifying the contents of a table ** to invalidate any incrblob cursors that are open on the ** row or one of the rows being modified. ** ** If argument isClearTable is true, then the entire contents of the ** table is about to be deleted. In this case invalidate all incrblob ** cursors open on any row within the table with root-page pgnoRoot. ** ** Otherwise, if argument isClearTable is false, then the row with ** rowid iRow is being replaced or deleted. In this case invalidate ** only those incrblob cursors open on that specific row. */ static void invalidateIncrblobCursors( Btree *pBtree, /* The database file to check */ i64 iRow, /* The rowid that might be changing */ int isClearTable /* True if all rows are being deleted */ ){ BtCursor *p; if( pBtree->hasIncrblobCur==0 ) return; assert( sqlite3BtreeHoldsMutex(pBtree) ); pBtree->hasIncrblobCur = 0; for(p=pBtree->pBt->pCursor; p; p=p->pNext){ if( (p->curFlags & BTCF_Incrblob)!=0 ){ pBtree->hasIncrblobCur = 1; if( isClearTable || p->info.nKey==iRow ){ p->eState = CURSOR_INVALID; } } } } #else /* Stub function when INCRBLOB is omitted */ #define invalidateIncrblobCursors(x,y,z) #endif /* SQLITE_OMIT_INCRBLOB */ /* ** Set bit pgno of the BtShared.pHasContent bitvec. This is called ** when a page that previously contained data becomes a free-list leaf ** page. ** ** The BtShared.pHasContent bitvec exists to work around an obscure ** bug caused by the interaction of two useful IO optimizations surrounding ** free-list leaf pages: ** ** 1) When all data is deleted from a page and the page becomes ** a free-list leaf page, the page is not written to the database ** (as free-list leaf pages contain no meaningful data). Sometimes ** such a page is not even journalled (as it will not be modified, ** why bother journalling it?). ** ** 2) When a free-list leaf page is reused, its content is not read ** from the database or written to the journal file (why should it ** be, if it is not at all meaningful?). ** ** By themselves, these optimizations work fine and provide a handy ** performance boost to bulk delete or insert operations. However, if ** a page is moved to the free-list and then reused within the same ** transaction, a problem comes up. If the page is not journalled when ** it is moved to the free-list and it is also not journalled when it ** is extracted from the free-list and reused, then the original data ** may be lost. In the event of a rollback, it may not be possible ** to restore the database to its original configuration. ** ** The solution is the BtShared.pHasContent bitvec. Whenever a page is ** moved to become a free-list leaf page, the corresponding bit is ** set in the bitvec. Whenever a leaf page is extracted from the free-list, ** optimization 2 above is omitted if the corresponding bit is already ** set in BtShared.pHasContent. The contents of the bitvec are cleared ** at the end of every transaction. */ static int btreeSetHasContent(BtShared *pBt, Pgno pgno){ int rc = SQLITE_OK; if( !pBt->pHasContent ){ assert( pgno<=pBt->nPage ); pBt->pHasContent = sqlite3BitvecCreate(pBt->nPage); if( !pBt->pHasContent ){ rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK && pgno<=sqlite3BitvecSize(pBt->pHasContent) ){ rc = sqlite3BitvecSet(pBt->pHasContent, pgno); } return rc; } /* ** Query the BtShared.pHasContent vector. ** ** This function is called when a free-list leaf page is removed from the ** free-list for reuse. It returns false if it is safe to retrieve the ** page from the pager layer with the 'no-content' flag set. True otherwise. */ static int btreeGetHasContent(BtShared *pBt, Pgno pgno){ Bitvec *p = pBt->pHasContent; return (p && (pgno>sqlite3BitvecSize(p) || sqlite3BitvecTest(p, pgno))); } /* ** Clear (destroy) the BtShared.pHasContent bitvec. This should be ** invoked at the conclusion of each write-transaction. */ static void btreeClearHasContent(BtShared *pBt){ sqlite3BitvecDestroy(pBt->pHasContent); pBt->pHasContent = 0; } /* ** Release all of the apPage[] pages for a cursor. */ static void btreeReleaseAllCursorPages(BtCursor *pCur){ int i; for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); pCur->apPage[i] = 0; } pCur->iPage = -1; } /* ** The cursor passed as the only argument must point to a valid entry ** when this function is called (i.e. have eState==CURSOR_VALID). This ** function saves the current cursor key in variables pCur->nKey and ** pCur->pKey. SQLITE_OK is returned if successful or an SQLite error ** code otherwise. ** ** If the cursor is open on an intkey table, then the integer key ** (the rowid) is stored in pCur->nKey and pCur->pKey is left set to ** NULL. If the cursor is open on a non-intkey table, then pCur->pKey is ** set to point to a malloced buffer pCur->nKey bytes in size containing ** the key. */ static int saveCursorKey(BtCursor *pCur){ int rc; assert( CURSOR_VALID==pCur->eState ); assert( 0==pCur->pKey ); assert( cursorHoldsMutex(pCur) ); rc = sqlite3BtreeKeySize(pCur, &pCur->nKey); assert( rc==SQLITE_OK ); /* KeySize() cannot fail */ /* If this is an intKey table, then the above call to BtreeKeySize() ** stores the integer key in pCur->nKey. In this case this value is ** all that is required. Otherwise, if pCur is not open on an intKey ** table, then malloc space for and store the pCur->nKey bytes of key ** data. */ if( 0==pCur->curIntKey ){ void *pKey = sqlite3Malloc( pCur->nKey ); if( pKey ){ rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); if( rc==SQLITE_OK ){ pCur->pKey = pKey; }else{ sqlite3_free(pKey); } }else{ rc = SQLITE_NOMEM; } } assert( !pCur->curIntKey || !pCur->pKey ); return rc; } /* ** Save the current cursor position in the variables BtCursor.nKey ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK. ** ** The caller must ensure that the cursor is valid (has eState==CURSOR_VALID) ** prior to calling this routine. */ static int saveCursorPosition(BtCursor *pCur){ int rc; assert( CURSOR_VALID==pCur->eState || CURSOR_SKIPNEXT==pCur->eState ); assert( 0==pCur->pKey ); assert( cursorHoldsMutex(pCur) ); if( pCur->eState==CURSOR_SKIPNEXT ){ pCur->eState = CURSOR_VALID; }else{ pCur->skipNext = 0; } rc = saveCursorKey(pCur); if( rc==SQLITE_OK ){ btreeReleaseAllCursorPages(pCur); pCur->eState = CURSOR_REQUIRESEEK; } pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl|BTCF_AtLast); return rc; } /* Forward reference */ static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); /* ** Save the positions of all cursors (except pExcept) that are open on ** the table with root-page iRoot. "Saving the cursor position" means that ** the location in the btree is remembered in such a way that it can be ** moved back to the same spot after the btree has been modified. This ** routine is called just before cursor pExcept is used to modify the ** table, for example in BtreeDelete() or BtreeInsert(). ** ** If there are two or more cursors on the same btree, then all such ** cursors should have their BTCF_Multiple flag set. The btreeCursor() ** routine enforces that rule. This routine only needs to be called in ** the uncommon case when pExpect has the BTCF_Multiple flag set. ** ** If pExpect!=NULL and if no other cursors are found on the same root-page, ** then the BTCF_Multiple flag on pExpect is cleared, to avoid another ** pointless call to this routine. ** ** Implementation note: This routine merely checks to see if any cursors ** need to be saved. It calls out to saveCursorsOnList() in the (unusual) ** event that cursors are in need to being saved. */ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ BtCursor *p; assert( sqlite3_mutex_held(pBt->mutex) ); assert( pExcept==0 || pExcept->pBt==pBt ); for(p=pBt->pCursor; p; p=p->pNext){ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; } if( p ) return saveCursorsOnList(p, iRoot, pExcept); if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; return SQLITE_OK; } /* This helper routine to saveAllCursors does the actual work of saving ** the cursors if and when a cursor is found that actually requires saving. ** The common case is that no cursors need to be saved, so this routine is ** broken out from its caller to avoid unnecessary stack pointer movement. */ static int SQLITE_NOINLINE saveCursorsOnList( BtCursor *p, /* The first cursor that needs saving */ Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */ BtCursor *pExcept /* Do not save this cursor */ ){ do{ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ int rc = saveCursorPosition(p); if( SQLITE_OK!=rc ){ return rc; } }else{ testcase( p->iPage>0 ); btreeReleaseAllCursorPages(p); } } p = p->pNext; }while( p ); return SQLITE_OK; } /* ** Clear the current cursor position. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeClearCursor(BtCursor *pCur){ assert( cursorHoldsMutex(pCur) ); sqlite3_free(pCur->pKey); pCur->pKey = 0; pCur->eState = CURSOR_INVALID; } /* ** In this version of BtreeMoveto, pKey is a packed index record ** such as is generated by the OP_MakeRecord opcode. Unpack the ** record and then call BtreeMovetoUnpacked() to do the work. */ static int btreeMoveto( BtCursor *pCur, /* Cursor open on the btree to be searched */ const void *pKey, /* Packed key if the btree is an index */ i64 nKey, /* Integer key for tables. Size of pKey for indices */ int bias, /* Bias search to the high end */ int *pRes /* Write search results here */ ){ int rc; /* Status code */ UnpackedRecord *pIdxKey; /* Unpacked index key */ char aSpace[200]; /* Temp space for pIdxKey - to avoid a malloc */ char *pFree = 0; if( pKey ){ assert( nKey==(i64)(int)nKey ); pIdxKey = sqlite3VdbeAllocUnpackedRecord( pCur->pKeyInfo, aSpace, sizeof(aSpace), &pFree ); if( pIdxKey==0 ) return SQLITE_NOMEM; sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey); if( pIdxKey->nField==0 ){ sqlite3DbFree(pCur->pKeyInfo->pBtree, pFree); return SQLITE_CORRUPT_BKPT; } }else{ pIdxKey = 0; } rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes); if( pFree ){ sqlite3DbFree(pCur->pKeyInfo->pBtree, pFree); } return rc; } /* ** Restore the cursor to the position it was in (or as close to as possible) ** when saveCursorPosition() was called. Note that this call deletes the ** saved position info stored by saveCursorPosition(), so there can be ** at most one effective restoreCursorPosition() call after each ** saveCursorPosition(). */ static int btreeRestoreCursorPosition(BtCursor *pCur){ int rc; int skipNext; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState>=CURSOR_REQUIRESEEK ); if( pCur->eState==CURSOR_FAULT ){ return pCur->skipNext; } pCur->eState = CURSOR_INVALID; rc = btreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &skipNext); if( rc==SQLITE_OK ){ sqlite3_free(pCur->pKey); pCur->pKey = 0; assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); pCur->skipNext |= skipNext; if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ pCur->eState = CURSOR_SKIPNEXT; } } return rc; } #define restoreCursorPosition(p) \ (p->eState>=CURSOR_REQUIRESEEK ? \ btreeRestoreCursorPosition(p) : \ SQLITE_OK) /* ** Determine whether or not a cursor has moved from the position where ** it was last placed, or has been invalidated for any other reason. ** Cursors can move when the row they are pointing at is deleted out ** from under them, for example. Cursor might also move if a btree ** is rebalanced. ** ** Calling this routine with a NULL cursor pointer returns false. ** ** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor ** back to where it ought to be if this routine returns true. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorHasMoved(BtCursor *pCur){ return pCur->eState!=CURSOR_VALID; } /* ** This routine restores a cursor back to its original position after it ** has been moved by some outside activity (such as a btree rebalance or ** a row having been deleted out from under the cursor). ** ** On success, the *pDifferentRow parameter is false if the cursor is left ** pointing at exactly the same row. *pDifferntRow is the row the cursor ** was pointing to has been deleted, forcing the cursor to point to some ** nearby row. ** ** This routine should only be called for a cursor that just returned ** TRUE from sqlite3BtreeCursorHasMoved(). */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ int rc; assert( pCur!=0 ); assert( pCur->eState!=CURSOR_VALID ); rc = restoreCursorPosition(pCur); if( rc ){ *pDifferentRow = 1; return rc; } if( pCur->eState!=CURSOR_VALID ){ *pDifferentRow = 1; }else{ assert( pCur->skipNext==0 ); *pDifferentRow = 0; } return SQLITE_OK; } #ifndef SQLITE_OMIT_AUTOVACUUM /* ** Given a page number of a regular database page, return the page ** number for the pointer-map page that contains the entry for the ** input page number. ** ** Return 0 (not a valid page) for pgno==1 since there is ** no pointer map associated with page 1. The integrity_check logic ** requires that ptrmapPageno(*,1)!=1. */ static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){ int nPagesPerMapPage; Pgno iPtrMap, ret; assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno<2 ) return 0; nPagesPerMapPage = (pBt->usableSize/5)+1; iPtrMap = (pgno-2)/nPagesPerMapPage; ret = (iPtrMap*nPagesPerMapPage) + 2; if( ret==PENDING_BYTE_PAGE(pBt) ){ ret++; } return ret; } /* ** Write an entry into the pointer map. ** ** This routine updates the pointer map entry for page number 'key' ** so that it maps to type 'eType' and parent page number 'pgno'. ** ** If *pRC is initially non-zero (non-SQLITE_OK) then this routine is ** a no-op. If an error occurs, the appropriate error code is written ** into *pRC. */ static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){ DbPage *pDbPage; /* The pointer map page */ u8 *pPtrmap; /* The pointer map data */ Pgno iPtrmap; /* The pointer map page number */ int offset; /* Offset in pointer map page */ int rc; /* Return code from subfunctions */ if( *pRC ) return; assert( sqlite3_mutex_held(pBt->mutex) ); /* The master-journal page number must never be used as a pointer map page */ assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); assert( pBt->autoVacuum ); if( key==0 ){ *pRC = SQLITE_CORRUPT_BKPT; return; } iPtrmap = PTRMAP_PAGENO(pBt, key); rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); if( rc!=SQLITE_OK ){ *pRC = rc; return; } offset = PTRMAP_PTROFFSET(iPtrmap, key); if( offset<0 ){ *pRC = SQLITE_CORRUPT_BKPT; goto ptrmap_exit; } assert( offset <= (int)pBt->usableSize-5 ); pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){ TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent)); *pRC= rc = sqlite3PagerWrite(pDbPage); if( rc==SQLITE_OK ){ pPtrmap[offset] = eType; put4byte(&pPtrmap[offset+1], parent); } } ptrmap_exit: sqlite3PagerUnref(pDbPage); } /* ** Read an entry from the pointer map. ** ** This routine retrieves the pointer map entry for page 'key', writing ** the type and parent page number to *pEType and *pPgno respectively. ** An error code is returned if something goes wrong, otherwise SQLITE_OK. */ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ DbPage *pDbPage; /* The pointer map page */ int iPtrmap; /* Pointer map page index */ u8 *pPtrmap; /* Pointer map page data */ int offset; /* Offset of entry in pointer map */ int rc; assert( sqlite3_mutex_held(pBt->mutex) ); iPtrmap = PTRMAP_PAGENO(pBt, key); rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage); if( rc!=0 ){ return rc; } pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage); offset = PTRMAP_PTROFFSET(iPtrmap, key); if( offset<0 ){ sqlite3PagerUnref(pDbPage); return SQLITE_CORRUPT_BKPT; } assert( offset <= (int)pBt->usableSize-5 ); assert( pEType!=0 ); *pEType = pPtrmap[offset]; if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]); sqlite3PagerUnref(pDbPage); if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT; return SQLITE_OK; } #else /* if defined SQLITE_OMIT_AUTOVACUUM */ #define ptrmapPut(w,x,y,z,rc) #define ptrmapGet(w,x,y,z) SQLITE_OK #define ptrmapPutOvflPtr(x, y, rc) #endif /* ** Given a btree page and a cell index (0 means the first cell on ** the page, 1 means the second cell, and so forth) return a pointer ** to the cell content. ** ** findCellPastPtr() does the same except it skips past the initial ** 4-byte child pointer found on interior pages, if there is one. ** ** This routine works only for pages that do not contain overflow cells. */ #define findCell(P,I) \ ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) #define findCellPastPtr(P,I) \ ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) /* ** This is common tail processing for btreeParseCellPtr() and ** btreeParseCellPtrIndex() for the case when the cell does not fit entirely ** on a single B-tree page. Make necessary adjustments to the CellInfo ** structure. */ static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ CellInfo *pInfo /* Fill in this structure */ ){ /* If the payload will not fit completely on the local page, we have ** to decide how much to store locally and how much to spill onto ** overflow pages. The strategy is to minimize the amount of unused ** space on overflow pages while keeping the amount of local storage ** in between minLocal and maxLocal. ** ** Warning: changing the way overflow payload is distributed in any ** way will result in an incompatible file format. */ int minLocal; /* Minimum amount of payload held locally */ int maxLocal; /* Maximum amount of payload held locally */ int surplus; /* Overflow payload available for local storage */ minLocal = pPage->minLocal; maxLocal = pPage->maxLocal; surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4); testcase( surplus==maxLocal ); testcase( surplus==maxLocal+1 ); if( surplus <= maxLocal ){ pInfo->nLocal = (u16)surplus; }else{ pInfo->nLocal = (u16)minLocal; } pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); pInfo->nSize = pInfo->iOverflow + 4; } /* ** The following routines are implementations of the MemPage.xParseCell() ** method. ** ** Parse a cell content block and fill in the CellInfo structure. ** ** btreeParseCellPtr() => table btree leaf nodes ** btreeParseCellNoPayload() => table btree internal nodes ** btreeParseCellPtrIndex() => index btree nodes ** ** There is also a wrapper function btreeParseCell() that works for ** all MemPage types and that references the cell by index rather than ** by pointer. */ static void btreeParseCellPtrNoPayload( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ CellInfo *pInfo /* Fill in this structure */ ){ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->leaf==0 ); assert( pPage->noPayload ); assert( pPage->childPtrSize==4 ); #ifndef SQLITE_DEBUG UNUSED_PARAMETER(pPage); #endif pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); pInfo->nPayload = 0; pInfo->nLocal = 0; pInfo->iOverflow = 0; pInfo->pPayload = 0; return; } static void btreeParseCellPtr( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ CellInfo *pInfo /* Fill in this structure */ ){ u8 *pIter; /* For scanning through pCell */ u32 nPayload; /* Number of bytes of cell payload */ u64 iKey; /* Extracted Key value */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->leaf==0 || pPage->leaf==1 ); assert( pPage->intKeyLeaf || pPage->noPayload ); assert( pPage->noPayload==0 ); assert( pPage->intKeyLeaf ); assert( pPage->childPtrSize==0 ); pIter = pCell; /* The next block of code is equivalent to: ** ** pIter += getVarint32(pIter, nPayload); ** ** The code is inlined to avoid a function call. */ nPayload = *pIter; if( nPayload>=0x80 ){ u8 *pEnd = &pIter[8]; nPayload &= 0x7f; do{ nPayload = (nPayload<<7) | (*++pIter & 0x7f); }while( (*pIter)>=0x80 && pIternKey); ** ** The code is inlined to avoid a function call. */ iKey = *pIter; if( iKey>=0x80 ){ u8 *pEnd = &pIter[7]; iKey &= 0x7f; while(1){ iKey = (iKey<<7) | (*++pIter & 0x7f); if( (*pIter)<0x80 ) break; if( pIter>=pEnd ){ iKey = (iKey<<8) | *++pIter; break; } } } pIter++; pInfo->nKey = *(i64*)&iKey; pInfo->nPayload = nPayload; pInfo->pPayload = pIter; testcase( nPayload==pPage->maxLocal ); testcase( nPayload==pPage->maxLocal+1 ); if( nPayload<=pPage->maxLocal ){ /* This is the (easy) common case where the entire payload fits ** on the local page. No overflow is required. */ pInfo->nSize = nPayload + (u16)(pIter - pCell); if( pInfo->nSize<4 ) pInfo->nSize = 4; pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; }else{ btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); } } static void btreeParseCellPtrIndex( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ CellInfo *pInfo /* Fill in this structure */ ){ u8 *pIter; /* For scanning through pCell */ u32 nPayload; /* Number of bytes of cell payload */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->leaf==0 || pPage->leaf==1 ); assert( pPage->intKeyLeaf==0 ); assert( pPage->noPayload==0 ); pIter = pCell + pPage->childPtrSize; nPayload = *pIter; if( nPayload>=0x80 ){ u8 *pEnd = &pIter[8]; nPayload &= 0x7f; do{ nPayload = (nPayload<<7) | (*++pIter & 0x7f); }while( *(pIter)>=0x80 && pIternKey = nPayload; pInfo->nPayload = nPayload; pInfo->pPayload = pIter; testcase( nPayload==pPage->maxLocal ); testcase( nPayload==pPage->maxLocal+1 ); if( nPayload<=pPage->maxLocal ){ /* This is the (easy) common case where the entire payload fits ** on the local page. No overflow is required. */ pInfo->nSize = nPayload + (u16)(pIter - pCell); if( pInfo->nSize<4 ) pInfo->nSize = 4; pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; }else{ btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); } } static void btreeParseCell( MemPage *pPage, /* Page containing the cell */ int iCell, /* The cell index. First cell is 0 */ CellInfo *pInfo /* Fill in this structure */ ){ pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo); } /* ** The following routines are implementations of the MemPage.xCellSize ** method. ** ** Compute the total number of bytes that a Cell needs in the cell ** data area of the btree-page. The return number includes the cell ** data header and the local payload, but not any overflow page or ** the space used by the cell pointer. ** ** cellSizePtrNoPayload() => table internal nodes ** cellSizePtr() => all index nodes & table leaf nodes */ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ u8 *pEnd; /* End mark for a varint */ u32 nSize; /* Size value to return */ #ifdef SQLITE_DEBUG /* The value returned by this function should always be the same as ** the (CellInfo.nSize) value found by doing a full parse of the ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of ** this function verifies that this invariant is not violated. */ CellInfo debuginfo; pPage->xParseCell(pPage, pCell, &debuginfo); #endif assert( pPage->noPayload==0 ); nSize = *pIter; if( nSize>=0x80 ){ pEnd = &pIter[8]; nSize &= 0x7f; do{ nSize = (nSize<<7) | (*++pIter & 0x7f); }while( *(pIter)>=0x80 && pIterintKey ){ /* pIter now points at the 64-bit integer key value, a variable length ** integer. The following block moves pIter to point at the first byte ** past the end of the key value. */ pEnd = &pIter[9]; while( (*pIter++)&0x80 && pItermaxLocal ); testcase( nSize==pPage->maxLocal+1 ); if( nSize<=pPage->maxLocal ){ nSize += (u32)(pIter - pCell); if( nSize<4 ) nSize = 4; }else{ int minLocal = pPage->minLocal; nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4); testcase( nSize==pPage->maxLocal ); testcase( nSize==pPage->maxLocal+1 ); if( nSize>pPage->maxLocal ){ nSize = minLocal; } nSize += 4 + (u16)(pIter - pCell); } assert( nSize==debuginfo.nSize || CORRUPT_DB ); return (u16)nSize; } static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){ u8 *pIter = pCell + 4; /* For looping over bytes of pCell */ u8 *pEnd; /* End mark for a varint */ #ifdef SQLITE_DEBUG /* The value returned by this function should always be the same as ** the (CellInfo.nSize) value found by doing a full parse of the ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of ** this function verifies that this invariant is not violated. */ CellInfo debuginfo; pPage->xParseCell(pPage, pCell, &debuginfo); #else UNUSED_PARAMETER(pPage); #endif assert( pPage->childPtrSize==4 ); pEnd = pIter + 9; while( (*pIter++)&0x80 && pIterxCellSize(pPage, findCell(pPage, iCell)); } #endif #ifndef SQLITE_OMIT_AUTOVACUUM /* ** If the cell pCell, part of page pPage contains a pointer ** to an overflow page, insert an entry into the pointer-map ** for the overflow page. */ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ CellInfo info; if( *pRC ) return; assert( pCell!=0 ); pPage->xParseCell(pPage, pCell, &info); if( info.iOverflow ){ Pgno ovfl = get4byte(&pCell[info.iOverflow]); ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); } } #endif /* ** Defragment the page given. All Cells are moved to the ** end of the page and all free space is collected into one ** big FreeBlk that occurs in between the header and cell ** pointer array and the cell content area. ** ** EVIDENCE-OF: R-44582-60138 SQLite may from time to time reorganize a ** b-tree page so that there are no freeblocks or fragment bytes, all ** unused bytes are contained in the unallocated space region, and all ** cells are packed tightly at the end of the page. */ static int defragmentPage(MemPage *pPage){ int i; /* Loop counter */ int pc; /* Address of the i-th cell */ int hdr; /* Offset to the page header */ int size; /* Size of a cell */ int usableSize; /* Number of usable bytes on a page */ int cellOffset; /* Offset to the cell pointer array */ int cbrk; /* Offset to the cell content area */ int nCell; /* Number of cells on the page */ unsigned char *data; /* The page data */ unsigned char *temp; /* Temp area for cell content */ unsigned char *src; /* Source of content */ int iCellFirst; /* First allowable cell index */ int iCellLast; /* Last possible cell index */ assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( pPage->pBt!=0 ); assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE ); assert( pPage->nOverflow==0 ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); temp = 0; src = data = pPage->aData; hdr = pPage->hdrOffset; cellOffset = pPage->cellOffset; nCell = pPage->nCell; assert( nCell==get2byte(&data[hdr+3]) ); usableSize = pPage->pBt->usableSize; cbrk = usableSize; iCellFirst = cellOffset + 2*nCell; iCellLast = usableSize - 4; for(i=0; iiCellLast ){ return SQLITE_CORRUPT_BKPT; } assert( pc>=iCellFirst && pc<=iCellLast ); size = pPage->xCellSize(pPage, &src[pc]); cbrk -= size; if( cbrkusableSize ){ return SQLITE_CORRUPT_BKPT; } assert( cbrk+size<=usableSize && cbrk>=iCellFirst ); testcase( cbrk+size==usableSize ); testcase( pc+size==usableSize ); put2byte(pAddr, cbrk); if( temp==0 ){ int x; if( cbrk==pc ) continue; temp = sqlite3PagerTempSpace(pPage->pBt->pPager); x = get2byte(&data[hdr+5]); memcpy(&temp[x], &data[x], (cbrk+size) - x); src = temp; } memcpy(&data[cbrk], &src[pc], size); } assert( cbrk>=iCellFirst ); put2byte(&data[hdr+5], cbrk); data[hdr+1] = 0; data[hdr+2] = 0; data[hdr+7] = 0; memset(&data[iCellFirst], 0, cbrk-iCellFirst); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); if( cbrk-iCellFirst!=pPage->nFree ){ return SQLITE_CORRUPT_BKPT; } return SQLITE_OK; } /* ** Search the free-list on page pPg for space to store a cell nByte bytes in ** size. If one can be found, return a pointer to the space and remove it ** from the free-list. ** ** If no suitable space can be found on the free-list, return NULL. ** ** This function may detect corruption within pPg. If corruption is ** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. ** ** Slots on the free list that are between 1 and 3 bytes larger than nByte ** will be ignored if adding the extra space to the fragmentation count ** causes the fragmentation count to exceed 60. */ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ const int hdr = pPg->hdrOffset; u8 * const aData = pPg->aData; int iAddr = hdr + 1; int pc = get2byte(&aData[iAddr]); int x; int usableSize = pPg->pBt->usableSize; assert( pc>0 ); do{ int size; /* Size of the free slot */ /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ if( pc>usableSize-4 || pc=0 ){ testcase( x==4 ); testcase( x==3 ); if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){ *pRc = SQLITE_CORRUPT_BKPT; return 0; }else if( x<4 ){ /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total ** number of bytes in fragments may not exceed 60. */ if( aData[hdr+7]>57 ) return 0; /* Remove the slot from the free-list. Update the number of ** fragmented bytes within the page. */ memcpy(&aData[iAddr], &aData[pc], 2); aData[hdr+7] += (u8)x; }else{ /* The slot remains on the free-list. Reduce its size to account ** for the portion used by the new allocation. */ put2byte(&aData[pc+2], x); } return &aData[pc + x]; } iAddr = pc; pc = get2byte(&aData[pc]); }while( pc ); return 0; } /* ** Allocate nByte bytes of space from within the B-Tree page passed ** as the first argument. Write into *pIdx the index into pPage->aData[] ** of the first byte of allocated space. Return either SQLITE_OK or ** an error code (usually SQLITE_CORRUPT). ** ** The caller guarantees that there is sufficient space to make the ** allocation. This routine might need to defragment in order to bring ** all the space together, however. This routine will avoid using ** the first two bytes past the cell pointer area since presumably this ** allocation is being made in order to insert a new cell, so we will ** also end up needing a new cell pointer. */ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */ u8 * const data = pPage->aData; /* Local cache of pPage->aData */ int top; /* First byte of cell content area */ int rc = SQLITE_OK; /* Integer return code */ int gap; /* First byte of gap between cell pointers and cell content */ assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( pPage->pBt ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( nByte>=0 ); /* Minimum cell size is 4 */ assert( pPage->nFree>=nByte ); assert( pPage->nOverflow==0 ); assert( nByte < (int)(pPage->pBt->usableSize-8) ); assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); gap = pPage->cellOffset + 2*pPage->nCell; assert( gap<=65536 ); /* EVIDENCE-OF: R-29356-02391 If the database uses a 65536-byte page size ** and the reserved space is zero (the usual value for reserved space) ** then the cell content offset of an empty page wants to be 65536. ** However, that integer is too large to be stored in a 2-byte unsigned ** integer, so a value of 0 is used in its place. */ top = get2byte(&data[hdr+5]); assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */ if( gap>top ){ if( top==0 && pPage->pBt->usableSize==65536 ){ top = 65536; }else{ return SQLITE_CORRUPT_BKPT; } } /* If there is enough space between gap and top for one more cell pointer ** array entry offset, and if the freelist is not empty, then search the ** freelist looking for a free slot big enough to satisfy the request. */ testcase( gap+2==top ); testcase( gap+1==top ); testcase( gap==top ); if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ u8 *pSpace = pageFindSlot(pPage, nByte, &rc); if( pSpace ){ assert( pSpace>=data && (pSpace - data)<65536 ); *pIdx = (int)(pSpace - data); return SQLITE_OK; }else if( rc ){ return rc; } } /* The request could not be fulfilled using a freelist slot. Check ** to see if defragmentation is necessary. */ testcase( gap+2+nByte==top ); if( gap+2+nByte>top ){ assert( pPage->nCell>0 || CORRUPT_DB ); rc = defragmentPage(pPage); if( rc ) return rc; top = get2byteNotZero(&data[hdr+5]); assert( gap+nByte<=top ); } /* Allocate memory from the gap in between the cell pointer array ** and the cell content area. The btreeInitPage() call has already ** validated the freelist. Given that the freelist is valid, there ** is no way that the allocation can extend off the end of the page. ** The assert() below verifies the previous sentence. */ top -= nByte; put2byte(&data[hdr+5], top); assert( top+nByte <= (int)pPage->pBt->usableSize ); *pIdx = top; return SQLITE_OK; } /* ** Return a section of the pPage->aData to the freelist. ** The first byte of the new free block is pPage->aData[iStart] ** and the size of the block is iSize bytes. ** ** Adjacent freeblocks are coalesced. ** ** Note that even though the freeblock list was checked by btreeInitPage(), ** that routine will not detect overlap between cells or freeblocks. Nor ** does it detect cells or freeblocks that encrouch into the reserved bytes ** at the end of the page. So do additional corruption checks inside this ** routine and return SQLITE_CORRUPT if any problems are found. */ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ u16 iPtr; /* Address of ptr to next freeblock */ u16 iFreeBlk; /* Address of the next freeblock */ u8 hdr; /* Page header size. 0 or 100 */ u8 nFrag = 0; /* Reduction in fragmentation */ u16 iOrigSize = iSize; /* Original value of iSize */ u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */ u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */ unsigned char *data = pPage->aData; /* Page content */ assert( pPage->pBt!=0 ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( iSize>=4 ); /* Minimum cell size is 4 */ assert( iStart<=iLast ); /* Overwrite deleted information with zeros when the secure_delete ** option is enabled */ if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){ memset(&data[iStart], 0, iSize); } /* The list of freeblocks must be in ascending order. Find the ** spot on the list where iStart should be inserted. */ hdr = pPage->hdrOffset; iPtr = hdr + 1; if( data[iPtr+1]==0 && data[iPtr]==0 ){ iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ }else{ while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlkiLast ) return SQLITE_CORRUPT_BKPT; assert( iFreeBlk>iPtr || iFreeBlk==0 ); /* At this point: ** iFreeBlk: First freeblock after iStart, or zero if none ** iPtr: The address of a pointer to iFreeBlk ** ** Check to see if iFreeBlk should be coalesced onto the end of iStart. */ if( iFreeBlk && iEnd+3>=iFreeBlk ){ nFrag = iFreeBlk - iEnd; if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); if( iEnd > pPage->pBt->usableSize ) return SQLITE_CORRUPT_BKPT; iSize = iEnd - iStart; iFreeBlk = get2byte(&data[iFreeBlk]); } /* If iPtr is another freeblock (that is, if iPtr is not the freelist ** pointer in the page header) then check to see if iStart should be ** coalesced onto the end of iPtr. */ if( iPtr>hdr+1 ){ int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); if( iPtrEnd+3>=iStart ){ if( iPtrEnd>iStart ) return SQLITE_CORRUPT_BKPT; nFrag += iStart - iPtrEnd; iSize = iEnd - iPtr; iStart = iPtr; } } if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_BKPT; data[hdr+7] -= nFrag; } if( iStart==get2byte(&data[hdr+5]) ){ /* The new freeblock is at the beginning of the cell content area, ** so just extend the cell content area rather than create another ** freelist entry */ if( iPtr!=hdr+1 ) return SQLITE_CORRUPT_BKPT; put2byte(&data[hdr+1], iFreeBlk); put2byte(&data[hdr+5], iEnd); }else{ /* Insert the new freeblock into the freelist */ put2byte(&data[iPtr], iStart); put2byte(&data[iStart], iFreeBlk); put2byte(&data[iStart+2], iSize); } pPage->nFree += iOrigSize; return SQLITE_OK; } /* ** Decode the flags byte (the first byte of the header) for a page ** and initialize fields of the MemPage structure accordingly. ** ** Only the following combinations are supported. Anything different ** indicates a corrupt database files: ** ** PTF_ZERODATA ** PTF_ZERODATA | PTF_LEAF ** PTF_LEAFDATA | PTF_INTKEY ** PTF_LEAFDATA | PTF_INTKEY | PTF_LEAF */ static int decodeFlags(MemPage *pPage, int flagByte){ BtShared *pBt; /* A copy of pPage->pBt */ assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); flagByte &= ~PTF_LEAF; pPage->childPtrSize = 4-4*pPage->leaf; pPage->xCellSize = cellSizePtr; pBt = pPage->pBt; if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior ** table b-tree page. */ assert( (PTF_LEAFDATA|PTF_INTKEY)==5 ); /* EVIDENCE-OF: R-20501-61796 A value of 13 means the page is a leaf ** table b-tree page. */ assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); pPage->intKey = 1; if( pPage->leaf ){ pPage->intKeyLeaf = 1; pPage->noPayload = 0; pPage->xParseCell = btreeParseCellPtr; }else{ pPage->intKeyLeaf = 0; pPage->noPayload = 1; pPage->xCellSize = cellSizePtrNoPayload; pPage->xParseCell = btreeParseCellPtrNoPayload; } pPage->maxLocal = pBt->maxLeaf; pPage->minLocal = pBt->minLeaf; }else if( flagByte==PTF_ZERODATA ){ /* EVIDENCE-OF: R-27225-53936 A value of 2 means the page is an interior ** index b-tree page. */ assert( (PTF_ZERODATA)==2 ); /* EVIDENCE-OF: R-16571-11615 A value of 10 means the page is a leaf ** index b-tree page. */ assert( (PTF_ZERODATA|PTF_LEAF)==10 ); pPage->intKey = 0; pPage->intKeyLeaf = 0; pPage->noPayload = 0; pPage->xParseCell = btreeParseCellPtrIndex; pPage->maxLocal = pBt->maxLocal; pPage->minLocal = pBt->minLocal; }else{ /* EVIDENCE-OF: R-47608-56469 Any other value for the b-tree page type is ** an error. */ return SQLITE_CORRUPT_BKPT; } pPage->max1bytePayload = pBt->max1bytePayload; return SQLITE_OK; } /* ** Initialize the auxiliary information for a disk block. ** ** Return SQLITE_OK on success. If we see that the page does ** not contain a well-formed database page, then return ** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not ** guarantee that the page is well-formed. It only shows that ** we failed to detect any corruption. */ static int btreeInitPage(MemPage *pPage){ assert( pPage->pBt!=0 ); assert( pPage->pBt->db!=0 ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) ); if( !pPage->isInit ){ u16 pc; /* Address of a freeblock within pPage->aData[] */ u8 hdr; /* Offset to beginning of page header */ u8 *data; /* Equal to pPage->aData */ BtShared *pBt; /* The main btree structure */ int usableSize; /* Amount of usable space on each page */ u16 cellOffset; /* Offset from start of page to first cell pointer */ int nFree; /* Number of unused bytes on the page */ int top; /* First byte of the cell content area */ int iCellFirst; /* First allowable cell or freeblock offset */ int iCellLast; /* Last possible cell or freeblock offset */ pBt = pPage->pBt; hdr = pPage->hdrOffset; data = pPage->aData; /* EVIDENCE-OF: R-28594-02890 The one-byte flag at offset 0 indicating ** the b-tree page type. */ if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT; assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); pPage->maskPage = (u16)(pBt->pageSize - 1); pPage->nOverflow = 0; usableSize = pBt->usableSize; pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize; pPage->aDataEnd = &data[usableSize]; pPage->aCellIdx = &data[cellOffset]; pPage->aDataOfst = &data[pPage->childPtrSize]; /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates ** the start of the cell content area. A zero value for this integer is ** interpreted as 65536. */ top = get2byteNotZero(&data[hdr+5]); /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the ** number of cells on the page. */ pPage->nCell = get2byte(&data[hdr+3]); if( pPage->nCell>MX_CELL(pBt) ){ /* To many cells for a single page. The page must be corrupt */ return SQLITE_CORRUPT_BKPT; } testcase( pPage->nCell==MX_CELL(pBt) ); /* EVIDENCE-OF: R-24089-57979 If a page contains no cells (which is only ** possible for a root page of a table that contains no rows) then the ** offset to the cell content area will equal the page size minus the ** bytes of reserved space. */ assert( pPage->nCell>0 || top==usableSize || CORRUPT_DB ); /* A malformed database page might cause us to read past the end ** of page when parsing a cell. ** ** The following block of code checks early to see if a cell extends ** past the end of a page boundary and causes SQLITE_CORRUPT to be ** returned if it does. */ iCellFirst = cellOffset + 2*pPage->nCell; iCellLast = usableSize - 4; if( pBt->pBt->flags & SQLITE_CellSizeCk ){ int i; /* Index into the cell pointer array */ int sz; /* Size of a cell */ if( !pPage->leaf ) iCellLast--; for(i=0; inCell; i++){ pc = get2byteAligned(&data[cellOffset+i*2]); testcase( pc==iCellFirst ); testcase( pc==iCellLast ); if( pciCellLast ){ return SQLITE_CORRUPT_BKPT; } sz = pPage->xCellSize(pPage, &data[pc]); testcase( pc+sz==usableSize ); if( pc+sz>usableSize ){ return SQLITE_CORRUPT_BKPT; } } if( !pPage->leaf ) iCellLast++; } /* Compute the total free space on the page ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the ** start of the first freeblock on the page, or is zero if there are no ** freeblocks. */ pc = get2byte(&data[hdr+1]); nFree = data[hdr+7] + top; /* Init nFree to non-freeblock free space */ while( pc>0 ){ u16 next, size; if( pciCellLast ){ /* EVIDENCE-OF: R-55530-52930 In a well-formed b-tree page, there will ** always be at least one cell before the first freeblock. ** ** Or, the freeblock is off the end of the page */ return SQLITE_CORRUPT_BKPT; } next = get2byte(&data[pc]); size = get2byte(&data[pc+2]); if( (next>0 && next<=pc+size+3) || pc+size>usableSize ){ /* Free blocks must be in ascending order. And the last byte of ** the free-block must lie on the database page. */ return SQLITE_CORRUPT_BKPT; } nFree = nFree + size; pc = next; } /* At this point, nFree contains the sum of the offset to the start ** of the cell-content area plus the number of free bytes within ** the cell-content area. If this is greater than the usable-size ** of the page, then the page must be corrupted. This check also ** serves to verify that the offset to the start of the cell-content ** area, according to the page header, lies within the page. */ if( nFree>usableSize ){ return SQLITE_CORRUPT_BKPT; } pPage->nFree = (u16)(nFree - iCellFirst); pPage->isInit = 1; } return SQLITE_OK; } /* ** Set up a raw page so that it looks like a database page holding ** no entries. */ static void zeroPage(MemPage *pPage, int flags){ unsigned char *data = pPage->aData; BtShared *pBt = pPage->pBt; u8 hdr = pPage->hdrOffset; u16 first; assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno ); assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); assert( sqlite3PagerGetData(pPage->pDbPage) == data ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( sqlite3_mutex_held(pBt->mutex) ); if( pBt->btsFlags & BTS_SECURE_DELETE ){ memset(&data[hdr], 0, pBt->usableSize - hdr); } data[hdr] = (char)flags; first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); memset(&data[hdr+1], 0, 4); data[hdr+7] = 0; put2byte(&data[hdr+5], pBt->usableSize); pPage->nFree = (u16)(pBt->usableSize - first); decodeFlags(pPage, flags); pPage->cellOffset = first; pPage->aDataEnd = &data[pBt->usableSize]; pPage->aCellIdx = &data[first]; pPage->aDataOfst = &data[pPage->childPtrSize]; pPage->nOverflow = 0; assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); pPage->maskPage = (u16)(pBt->pageSize - 1); pPage->nCell = 0; pPage->isInit = 1; } /* ** Convert a DbPage obtained from the pager into a MemPage used by ** the btree layer. */ static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ MemPage *pPage = (MemPage*)sqlite3PagerGetExtra(pDbPage); pPage->aData = sqlite3PagerGetData(pDbPage); pPage->pDbPage = pDbPage; pPage->pBt = pBt; pPage->pgno = pgno; pPage->hdrOffset = pgno==1 ? 100 : 0; return pPage; } /* ** Get a page from the pager. Initialize the MemPage.pBt and ** MemPage.aData elements if needed. See also: btreeGetUnusedPage(). ** ** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care ** about the content of the page at this time. So do not go to the disk ** to fetch the content. Just fill in the content with zeros for now. ** If in the future we call sqlite3PagerWrite() on this page, that ** means we have started to be concerned about content and the disk ** read should occur at that point. */ static int btreeGetPage( BtShared *pBt, /* The btree */ Pgno pgno, /* Number of the page to fetch */ MemPage **ppPage, /* Return the page in this parameter */ int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ ){ int rc; DbPage *pDbPage; assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); assert( sqlite3_mutex_held(pBt->mutex) ); rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); if( rc ) return rc; *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); return SQLITE_OK; } /* ** Retrieve a page from the pager cache. If the requested page is not ** already in the pager cache return NULL. Initialize the MemPage.pBt and ** MemPage.aData elements if needed. */ static MemPage *btreePageLookup(BtShared *pBt, Pgno pgno){ DbPage *pDbPage; assert( sqlite3_mutex_held(pBt->mutex) ); pDbPage = sqlite3PagerLookup(pBt->pPager, pgno); if( pDbPage ){ return btreePageFromDbPage(pDbPage, pgno, pBt); } return 0; } /* ** Return the size of the database file in pages. If there is any kind of ** error, return ((unsigned int)-1). */ static Pgno btreePagecount(BtShared *pBt){ return pBt->nPage; } SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeLastPage(Btree *p){ assert( sqlite3BtreeHoldsMutex(p) ); assert( ((p->pBt->nPage)&0x8000000)==0 ); return btreePagecount(p->pBt); } /* ** Get a page from the pager and initialize it. ** ** If pCur!=0 then the page is being fetched as part of a moveToChild() ** call. Do additional sanity checking on the page in this case. ** And if the fetch fails, this routine must decrement pCur->iPage. ** ** The page is fetched as read-write unless pCur is not NULL and is ** a read-only cursor. ** ** If an error occurs, then *ppPage is undefined. It ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( BtShared *pBt, /* The database file */ Pgno pgno, /* Number of the page to get */ MemPage **ppPage, /* Write the page pointer here */ BtCursor *pCur, /* Cursor to receive the page, or NULL */ int bReadOnly /* True for a read-only page */ ){ int rc; DbPage *pDbPage; assert( sqlite3_mutex_held(pBt->mutex) ); assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] ); assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); assert( pCur==0 || pCur->iPage>0 ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; goto getAndInitPage_error; } rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); if( rc ){ goto getAndInitPage_error; } *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); if( (*ppPage)->isInit==0 ){ rc = btreeInitPage(*ppPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); goto getAndInitPage_error; } } /* If obtaining a child page for a cursor, we must verify that the page is ** compatible with the root page. */ if( pCur && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) ){ rc = SQLITE_CORRUPT_BKPT; releasePage(*ppPage); goto getAndInitPage_error; } return SQLITE_OK; getAndInitPage_error: if( pCur ) pCur->iPage--; testcase( pgno==0 ); assert( pgno!=0 || rc==SQLITE_CORRUPT ); return rc; } /* ** Release a MemPage. This should be called once for each prior ** call to btreeGetPage. */ static void releasePageNotNull(MemPage *pPage){ assert( pPage->aData ); assert( pPage->pBt ); assert( pPage->pDbPage!=0 ); assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); sqlite3PagerUnrefNotNull(pPage->pDbPage); } static void releasePage(MemPage *pPage){ if( pPage ) releasePageNotNull(pPage); } /* ** Get an unused page. ** ** This works just like btreeGetPage() with the addition: ** ** * If the page is already in use for some other purpose, immediately ** release it and return an SQLITE_CURRUPT error. ** * Make sure the isInit flag is clear */ static int btreeGetUnusedPage( BtShared *pBt, /* The btree */ Pgno pgno, /* Number of the page to fetch */ MemPage **ppPage, /* Return the page in this parameter */ int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ ){ int rc = btreeGetPage(pBt, pgno, ppPage, flags); if( rc==SQLITE_OK ){ if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ releasePage(*ppPage); *ppPage = 0; return SQLITE_CORRUPT_BKPT; } (*ppPage)->isInit = 0; }else{ *ppPage = 0; } return rc; } /* ** During a rollback, when the pager reloads information into the cache ** so that the cache is restored to its original state at the start of ** the transaction, for each page restored this routine is called. ** ** This routine needs to reset the extra data section at the end of the ** page to agree with the restored data. */ static void pageReinit(DbPage *pData){ MemPage *pPage; pPage = (MemPage *)sqlite3PagerGetExtra(pData); assert( sqlite3PagerPageRefcount(pData)>0 ); if( pPage->isInit ){ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); pPage->isInit = 0; if( sqlite3PagerPageRefcount(pData)>1 ){ /* pPage might not be a btree page; it might be an overflow page ** or ptrmap page or a free page. In those cases, the following ** call to btreeInitPage() will likely return SQLITE_CORRUPT. ** But no harm is done by this. And it is very important that ** btreeInitPage() be called on every btree page so we make ** the call for every page that comes in for re-initing. */ btreeInitPage(pPage); } } } /* ** Invoke the busy handler for a btree. */ static int btreeInvokeBusyHandler(void *pArg){ BtShared *pBt = (BtShared*)pArg; assert( pBt->pBt ); assert( sqlite3_mutex_held(pBt->pBt->mutex) ); return sqlite3InvokeBusyHandler(&pBt->pBt->busyHandler); } /* ** Open a database file. ** ** zFilename is the name of the database file. If zFilename is NULL ** then an ephemeral database is created. The ephemeral database might ** be exclusively in memory, or it might use a disk-based memory cache. ** Either way, the ephemeral database will be automatically deleted ** when sqlite3BtreeClose() is called. ** ** If zFilename is ":memory:" then an in-memory database is created ** that is automatically destroyed when it is closed. ** ** The "flags" parameter is a bitmask that might contain bits like ** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. ** ** If the database is already opened in the same database connection ** and we are in shared cache mode, then the open will fail with an ** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared ** objects in the same database connection since doing so will lead ** to problems with locking. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeOpen( const char *zVfs, /* VFS to use for this b-tree */ const char *zFilename, /* Name of the file containing the BTree database */ Btree **ppBtree, /* Pointer to new Btree object written here */ int flags, /* Options */ int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ ){ BtShared *pBt = 0; /* Shared part of btree structure */ Btree *p; /* Handle to return */ sqlite3_mutex *mutexOpen = 0; /* Prevents a race condition. Ticket #3537 */ int rc = SQLITE_OK; /* Result code from this function */ u8 nReserve; /* Byte of unused space on each page */ unsigned char zDbHeader[100]; /* Database header content */ int isThreadsafe; /* True for threadsafe connections */ #ifdef SQLITE_ENABLE_API_ARMOR if( ppBtree==0 ) return SQLITE_MISUSE_BKPT; #endif *ppBtree = 0; #ifndef SQLITE_OMIT_AUTOINIT rc = sqlite3BtreeInitialize(); if( rc ) return rc; #endif /* Only allow sensible combinations of bits in the vfsFlags argument. ** Throw an error if any non-sense combination is used. If we ** do not block illegal combinations here, it could trigger ** assert() statements in deeper layers. Sensible combinations ** are: ** ** 1: SQLITE_OPEN_READONLY ** 2: SQLITE_OPEN_READWRITE ** 6: SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE */ assert( SQLITE_OPEN_READONLY == 0x01 ); assert( SQLITE_OPEN_READWRITE == 0x02 ); assert( SQLITE_OPEN_CREATE == 0x04 ); testcase( (1<<(vfsFlags&7))==0x02 ); /* READONLY */ testcase( (1<<(vfsFlags&7))==0x04 ); /* READWRITE */ testcase( (1<<(vfsFlags&7))==0x40 ); /* READWRITE | CREATE */ if( ((1<<(vfsFlags&7)) & 0x46)==0 ){ return SQLITE_MISUSE_BKPT; /* IMP: R-65497-44594 */ } if( sqlite3GlobalConfig.bCoreMutex==0 ){ isThreadsafe = 0; }else if( vfsFlags & SQLITE_OPEN_NOMUTEX ){ isThreadsafe = 0; }else if( vfsFlags & SQLITE_OPEN_FULLMUTEX ){ isThreadsafe = 1; }else{ isThreadsafe = sqlite3GlobalConfig.bFullMutex; } if( vfsFlags & SQLITE_OPEN_PRIVATECACHE ){ vfsFlags &= ~SQLITE_OPEN_SHAREDCACHE; }else if( sqlite3GlobalConfig.sharedCacheEnabled ){ vfsFlags |= SQLITE_OPEN_SHAREDCACHE; } /* Remove harmful bits from the vfsFlags parameter ** ** The SQLITE_OPEN_NOMUTEX and SQLITE_OPEN_FULLMUTEX vfsFlags were ** dealt with in the previous code block. Besides these, the only ** valid input vfsFlags for sqlite3_open_v2() are SQLITE_OPEN_READONLY, ** SQLITE_OPEN_READWRITE, SQLITE_OPEN_CREATE, SQLITE_OPEN_SHAREDCACHE, ** SQLITE_OPEN_PRIVATECACHE, and some reserved bits. Silently mask ** off all other vfsFlags. */ vfsFlags &= ~( SQLITE_OPEN_DELETEONCLOSE | SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_MAIN_DB | SQLITE_OPEN_TEMP_DB | SQLITE_OPEN_TRANSIENT_DB | SQLITE_OPEN_MAIN_JOURNAL | SQLITE_OPEN_TEMP_JOURNAL | SQLITE_OPEN_SUBJOURNAL | SQLITE_OPEN_MASTER_JOURNAL | SQLITE_OPEN_NOMUTEX | SQLITE_OPEN_FULLMUTEX | SQLITE_OPEN_WAL ); assert( (vfsFlags&0xff)==vfsFlags ); /* flags fit in 8 bits */ /* Only a BTREE_SINGLE database can be BTREE_UNORDERED */ assert( (vfsFlags & BTREE_UNORDERED)==0 || (vfsFlags & BTREE_SINGLE)!=0 ); sqlite3_vfs *pVfs = sqlite3_vfs_find(zVfs); if( pVfs==0 ){ return SQLITE_ERROR; } p = sqlite3MallocZero(sizeof(Btree)); if( !p ){ return SQLITE_NOMEM; } p->inTrans = TRANS_NONE; #ifndef SQLITE_OMIT_SHARED_CACHE p->lock.pBtree = p; p->lock.iTable = 1; #endif p->magic = SQLITE_MAGIC_BUSY; if( isThreadsafe ){ p->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); if( p->mutex==0 ){ sqlite3_free(p); p = 0; goto btree_open_out; } } sqlite3_mutex_enter(p->mutex); p->enc = SQLITE_UTF8; p->szMmap = sqlite3GlobalConfig.szMmap; p->flags |= 0 #if SQLITE_DEFAULT_CKPTFULLFSYNC | SQLITE_CkptFullFSync #endif #if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) | SQLITE_CellSizeCk #endif ; #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) /* ** If this Btree is a candidate for shared cache, try to find an ** existing BtShared object that we can share with */ if( (vfsFlags&SQLITE_OPEN_URI)!=0 ){ if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){ int nFilename = sqlite3Strlen30(zFilename)+1; int nFullPathname = pVfs->mxPathname+1; char *zFullPathname = sqlite3Malloc(MAX(nFullPathname,nFilename)); MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) p->sharable = 1; if( !zFullPathname ){ sqlite3_free(p); return SQLITE_NOMEM; } rc = sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname); if( rc ){ sqlite3_free(zFullPathname); sqlite3_free(p); return rc; } #if SQLITE_THREADSAFE mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN); sqlite3_mutex_enter(mutexOpen); mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); sqlite3_mutex_enter(mutexShared); #endif for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){ assert( pBt->nRef>0 ); if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0)) && sqlite3PagerVfs(pBt->pPager)==pVfs ){ p->pBt = pBt; pBt->nRef++; break; } } sqlite3_mutex_leave(mutexShared); sqlite3_free(zFullPathname); } #ifdef SQLITE_DEBUG else{ /* In debug mode, we mark all persistent databases as sharable ** even when they are not. This exercises the locking code and ** gives more opportunity for asserts(sqlite3_mutex_held()) ** statements to find locking problems. */ p->sharable = 1; } #endif } #endif if( pBt==0 ){ /* ** The following asserts make sure that structures used by the btree are ** the right size. This is to guard against size changes that result ** when compiling on a different architecture. */ assert( sizeof(i64)==8 ); assert( sizeof(u64)==8 ); assert( sizeof(u32)==4 ); assert( sizeof(u16)==2 ); assert( sizeof(Pgno)==4 ); pBt = sqlite3MallocZero( sizeof(*pBt) ); if( pBt==0 ){ rc = SQLITE_NOMEM; goto btree_open_out; } rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, EXTRA_SIZE, flags, vfsFlags, pageReinit); if( rc==SQLITE_OK ){ sqlite3PagerSetMmapLimit(pBt->pPager, p->szMmap); rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); } if( rc!=SQLITE_OK ){ goto btree_open_out; } pBt->openFlags = (u8)flags; pBt->pBt = p; sqlite3PagerSetBusyhandler(pBt->pPager, btreeInvokeBusyHandler, pBt); p->pBt = pBt; pBt->pCursor = 0; pBt->pPage1 = 0; if( sqlite3PagerIsreadonly(pBt->pPager) ) pBt->btsFlags |= BTS_READ_ONLY; #ifdef SQLITE_SECURE_DELETE pBt->btsFlags |= BTS_SECURE_DELETE; #endif /* EVIDENCE-OF: R-51873-39618 The page size for a database file is ** determined by the 2-byte integer located at an offset of 16 bytes from ** the beginning of the database file. */ pBt->pageSize = (zDbHeader[16]<<8) | (zDbHeader[17]<<16); if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){ pBt->pageSize = 0; #ifndef SQLITE_OMIT_AUTOVACUUM /* If the magic name ":memory:" will create an in-memory database, then ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if ** SQLITE_DEFAULT_AUTOVACUUM is true. */ if( zFilename ){ pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0); pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0); } #endif nReserve = 0; }else{ /* EVIDENCE-OF: R-37497-42412 The size of the reserved region is ** determined by the one-byte unsigned integer found at an offset of 20 ** into the database file header. */ nReserve = zDbHeader[20]; pBt->btsFlags |= BTS_PAGESIZE_FIXED; #ifndef SQLITE_OMIT_AUTOVACUUM pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0); pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0); #endif } rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); if( rc ) goto btree_open_out; pBt->usableSize = pBt->pageSize - nReserve; assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */ /* ** file_format==1 Version 3.0.0. ** file_format==2 Version 3.1.3. // ALTER TABLE ADD COLUMN ** file_format==3 Version 3.1.4. // ditto but with non-NULL defaults ** file_format==4 Version 3.3.0. // DESC indices. Boolean constants */ pBt->file_format = zDbHeader[18]; if( pBt->file_format==0 ){ pBt->file_format = 1; } if( pBt->file_format>SQLITE_MAX_FILE_FORMAT ){ rc = SQLITE_ERROR; goto btree_open_out; } #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO) /* Add the new BtShared object to the linked list sharable BtShareds. */ if( p->sharable ){ MUTEX_LOGIC( sqlite3_mutex *mutexShared; ) pBt->nRef = 1; MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);) if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){ pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST); if( pBt->mutex==0 ){ rc = SQLITE_NOMEM; p->mallocFailed = 0; goto btree_open_out; } } sqlite3_mutex_enter(mutexShared); pBt->pNext = GLOBAL(BtShared*,sqlite3SharedCacheList); GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt; sqlite3_mutex_leave(mutexShared); } #endif } sqlite3PagerSetCachesize(pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE); p->magic = SQLITE_MAGIC_OPEN; /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking ** mode. Doing nothing at all also makes NORMAL the default. */ #ifdef SQLITE_DEFAULT_LOCKING_MODE sqlite3PagerLockingMode(sqlite3BtreePager(db->aDb[0].pBt), SQLITE_DEFAULT_LOCKING_MODE); #endif *ppBtree = p; btree_open_out: if( rc!=SQLITE_OK ){ if( pBt && pBt->pPager ){ sqlite3PagerClose(pBt->pPager); } sqlite3_free(pBt); sqlite3_free(p); *ppBtree = 0; }else{ if (p) { assert( p->mutex!=0 || isThreadsafe==0 || sqlite3GlobalConfig.bFullMutex==0 ); sqlite3_mutex_leave(p->mutex); } /* If the B-Tree was successfully opened, set the pager-cache size to the ** default value. Except, when opening on an existing shared pager-cache, ** do not change the pager-cache size. */ if( sqlite3BtreeSchema(p, 0, 0)==0 ){ sqlite3PagerSetCachesize(pBt->pPager, SQLITE_DEFAULT_CACHE_SIZE); } } if( mutexOpen ){ assert( sqlite3_mutex_held(mutexOpen) ); sqlite3_mutex_leave(mutexOpen); } return rc; } /* ** Decrement the BtShared.nRef counter. When it reaches zero, ** remove the BtShared structure from the sharing list. Return ** true if the BtShared.nRef counter reaches zero and return ** false if it is still positive. */ static int removeFromSharingList(BtShared *pBt){ #ifndef SQLITE_OMIT_SHARED_CACHE MUTEX_LOGIC( sqlite3_mutex *pMaster; ) BtShared *pList; int removed = 0; assert( sqlite3_mutex_notheld(pBt->mutex) ); MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) sqlite3_mutex_enter(pMaster); pBt->nRef--; if( pBt->nRef<=0 ){ if( GLOBAL(BtShared*,sqlite3SharedCacheList)==pBt ){ GLOBAL(BtShared*,sqlite3SharedCacheList) = pBt->pNext; }else{ pList = GLOBAL(BtShared*,sqlite3SharedCacheList); while( ALWAYS(pList) && pList->pNext!=pBt ){ pList=pList->pNext; } if( ALWAYS(pList) ){ pList->pNext = pBt->pNext; } } if( SQLITE_THREADSAFE ){ sqlite3_mutex_free(pBt->mutex); } removed = 1; } sqlite3_mutex_leave(pMaster); return removed; #else return 1; #endif } /* ** Make sure pBt->pTmpSpace points to an allocation of ** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child ** pointer. */ static void allocateTempSpace(BtShared *pBt){ if( !pBt->pTmpSpace ){ pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); /* One of the uses of pBt->pTmpSpace is to format cells before ** inserting them into a leaf page (function fillInCell()). If ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes ** by the various routines that manipulate binary cells. Which ** can mean that fillInCell() only initializes the first 2 or 3 ** bytes of pTmpSpace, but that the first 4 bytes are copied from ** it into a database page. This is not actually a problem, but it ** does cause a valgrind error when the 1 or 2 bytes of unitialized ** data is passed to system call write(). So to avoid this error, ** zero the first 4 bytes of temp space here. ** ** Also: Provide four bytes of initialized space before the ** beginning of pTmpSpace as an area available to prepend the ** left-child pointer to the beginning of a cell. */ if( pBt->pTmpSpace ){ memset(pBt->pTmpSpace, 0, 8); pBt->pTmpSpace += 4; } } } /* ** Free the pBt->pTmpSpace allocation */ static void freeTempSpace(BtShared *pBt){ if( pBt->pTmpSpace ){ pBt->pTmpSpace -= 4; sqlite3PageFree(pBt->pTmpSpace); pBt->pTmpSpace = 0; } } /* ** Close an open database and invalidate all cursors. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeClose(Btree *p){ if( !p ){ /* EVIDENCE-OF: R-63257-11740 Calling sqlite3BtreeClose() ** with a NULL pointer argument is a harmless no-op. */ return SQLITE_OK; } if( !sqlite3SafetyCheckSickOrOk(p) ){ return SQLITE_MISUSE_BKPT; } sqlite3_mutex_enter(p->mutex); p->magic = SQLITE_MAGIC_ZOMBIE; BtShared *pBt = p->pBt; BtCursor *pCur; /* Close all cursors opened via this handle. */ sqlite3BtreeEnter(p); pCur = pBt->pCursor; while( pCur ){ BtCursor *pTmp = pCur; pCur = pCur->pNext; if( pTmp->pBtree==p ){ sqlite3BtreeCloseCursor(pTmp); } } /* Rollback any active transaction and free the handle structure. ** The call to sqlite3BtreeRollback() drops any table-locks held by ** this handle. */ sqlite3BtreeRollback(p, SQLITE_OK, 0); sqlite3BtreeLeave(p); /* If there are still other outstanding references to the shared-btree ** structure, return now. The remainder of this procedure cleans ** up the shared-btree. */ assert( p->wantToLock==0 && p->locked==0 ); if( !p->sharable || removeFromSharingList(pBt) ){ /* The pBt is no longer on the sharing list, so we can access ** it without having to hold the mutex. ** ** Clean out and delete the BtShared object. */ assert( !pBt->pCursor ); sqlite3PagerClose(pBt->pPager); if( pBt->xFreeSchema && pBt->pSchema ){ pBt->xFreeSchema(pBt->pSchema); } sqlite3DbFree(0, pBt->pSchema); freeTempSpace(pBt); sqlite3_free(pBt); } #ifndef SQLITE_OMIT_SHARED_CACHE assert( p->wantToLock==0 ); assert( p->locked==0 ); if( p->pPrev ) p->pPrev->pNext = p->pNext; if( p->pNext ) p->pNext->pPrev = p->pPrev; #endif sqlite3_mutex_leave(p->mutex); sqlite3_mutex_free(p->mutex); p->magic = SQLITE_MAGIC_CLOSED; sqlite3_free(p); return SQLITE_OK; } /* ** Change the limit on the number of pages allowed in the cache. ** ** The maximum number of cache pages is set to the absolute ** value of mxPage. If mxPage is negative, the pager will ** operate asynchronously - it will not stop to do fsync()s ** to insure data is written to the disk surface before ** continuing. Transactions still work if synchronous is off, ** and the database cannot be corrupted if this program ** crashes. But if the operating system crashes or there is ** an abrupt power failure when synchronous is off, the database ** could be left in an inconsistent and unrecoverable state. ** Synchronous is on by default so database corruption is not ** normally a worry. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ BtShared *pBt = p->pBt; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); sqlite3PagerSetCachesize(pBt->pPager, mxPage); sqlite3BtreeLeave(p); return SQLITE_OK; } #if SQLITE_MAX_MMAP_SIZE>0 /* ** Change the limit on the amount of the database file that may be ** memory mapped. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ BtShared *pBt = p->pBt; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); sqlite3PagerSetMmapLimit(pBt->pPager, szMmap); sqlite3BtreeLeave(p); return SQLITE_OK; } #endif /* SQLITE_MAX_MMAP_SIZE>0 */ /* ** Change the way data is synced to disk in order to increase or decrease ** how well the database resists damage due to OS crashes and power ** failures. Level 1 is the same as asynchronous (no syncs() occur and ** there is a high probability of damage) Level 2 is the default. There ** is a very low but non-zero probability of damage. Level 3 reduces the ** probability of damage to near zero but with a write performance reduction. */ #ifndef SQLITE_OMIT_PAGER_PRAGMAS SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetPagerFlags( Btree *p, /* The btree to set the safety level on */ unsigned pgFlags /* Various PAGER_* flags */ ){ BtShared *pBt = p->pBt; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); sqlite3PagerSetFlags(pBt->pPager, pgFlags); sqlite3BtreeLeave(p); return SQLITE_OK; } #endif /* ** Return TRUE if the given btree is set to safety level 1. In other ** words, return TRUE if no sync() occurs on the disk files. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSyncDisabled(Btree *p){ BtShared *pBt = p->pBt; int rc; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); assert( pBt && pBt->pPager ); rc = sqlite3PagerNosync(pBt->pPager); sqlite3BtreeLeave(p); return rc; } /* ** Change the default pages size and the number of reserved bytes per page. ** Or, if the page size has already been fixed, return SQLITE_READONLY ** without changing anything. ** ** The page size must be a power of 2 between 512 and 65536. If the page ** size supplied does not meet this constraint then the page size is not ** changed. ** ** Page sizes are constrained to be a power of two so that the region ** of the database file used for locking (beginning at PENDING_BYTE, ** the first byte past the 1GB boundary, 0x40000000) needs to occur ** at the beginning of a page. ** ** If parameter nReserve is less than zero, then the number of reserved ** bytes per page is left unchanged. ** ** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size ** and autovacuum mode can no longer be changed. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve, int iFix){ int rc = SQLITE_OK; BtShared *pBt = p->pBt; assert( nReserve>=-1 && nReserve<=255 ); sqlite3BtreeEnter(p); #if SQLITE_HAS_CODEC if( nReserve>pBt->optimalReserve ) pBt->optimalReserve = (u8)nReserve; #endif if( pBt->btsFlags & BTS_PAGESIZE_FIXED ){ sqlite3BtreeLeave(p); return SQLITE_READONLY; } if( nReserve<0 ){ nReserve = pBt->pageSize - pBt->usableSize; } assert( nReserve>=0 && nReserve<=255 ); if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE && ((pageSize-1)&pageSize)==0 ){ assert( (pageSize & 7)==0 ); assert( !pBt->pCursor ); pBt->pageSize = (u32)pageSize; freeTempSpace(pBt); } rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, nReserve); pBt->usableSize = pBt->pageSize - (u16)nReserve; if( iFix ) pBt->btsFlags |= BTS_PAGESIZE_FIXED; sqlite3BtreeLeave(p); return rc; } /* ** Return the currently defined page size */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetPageSize(Btree *p){ return p->pBt->pageSize; } /* ** This function is similar to sqlite3BtreeGetReserve(), except that it ** may only be called if it is guaranteed that the b-tree mutex is already ** held. ** ** This is useful in one special case in the backup API code where it is ** known that the shared b-tree mutex is held, but the mutex on the ** database handle that owns *p is not. In this case if sqlite3BtreeEnter() ** were to be called, it might collide with some other operation on the ** database handle that owns *p, causing undefined behavior. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetReserveNoMutex(Btree *p){ int n; assert( sqlite3_mutex_held(p->pBt->mutex) ); n = p->pBt->pageSize - p->pBt->usableSize; return n; } /* ** Return the number of bytes of space at the end of every page that ** are intentually left unused. This is the "reserved" space that is ** sometimes used by extensions. ** ** If SQLITE_HAS_MUTEX is defined then the number returned is the ** greater of the current reserved space and the maximum requested ** reserve space. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetOptimalReserve(Btree *p){ int n; sqlite3BtreeEnter(p); n = sqlite3BtreeGetReserveNoMutex(p); #ifdef SQLITE_HAS_CODEC if( npBt->optimalReserve ) n = p->pBt->optimalReserve; #endif sqlite3BtreeLeave(p); return n; } /* ** Set the maximum page count for a database if mxPage is positive. ** No changes are made if mxPage is 0 or negative. ** Regardless of the value of mxPage, return the maximum page count. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeMaxPageCount(Btree *p, int mxPage){ int n; sqlite3BtreeEnter(p); n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage); sqlite3BtreeLeave(p); return n; } /* ** Set the BTS_SECURE_DELETE flag if newFlag is 0 or 1. If newFlag is -1, ** then make no changes. Always return the value of the BTS_SECURE_DELETE ** setting after the change. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSecureDelete(Btree *p, int newFlag){ int b; if( p==0 ) return 0; sqlite3BtreeEnter(p); if( newFlag>=0 ){ p->pBt->btsFlags &= ~BTS_SECURE_DELETE; if( newFlag ) p->pBt->btsFlags |= BTS_SECURE_DELETE; } b = (p->pBt->btsFlags & BTS_SECURE_DELETE)!=0; sqlite3BtreeLeave(p); return b; } /* ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it ** is disabled. The default value for the auto-vacuum property is ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){ #ifdef SQLITE_OMIT_AUTOVACUUM return SQLITE_READONLY; #else BtShared *pBt = p->pBt; int rc = SQLITE_OK; u8 av = (u8)autoVacuum; sqlite3BtreeEnter(p); if( (pBt->btsFlags & BTS_PAGESIZE_FIXED)!=0 && (av ?1:0)!=pBt->autoVacuum ){ rc = SQLITE_READONLY; }else{ pBt->autoVacuum = av ?1:0; pBt->incrVacuum = av==2 ?1:0; } sqlite3BtreeLeave(p); return rc; #endif } /* ** Return the value of the 'auto-vacuum' property. If auto-vacuum is ** enabled 1 is returned. Otherwise 0. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeGetAutoVacuum(Btree *p){ #ifdef SQLITE_OMIT_AUTOVACUUM return BTREE_AUTOVACUUM_NONE; #else int rc; sqlite3BtreeEnter(p); rc = ( (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE: (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL: BTREE_AUTOVACUUM_INCR ); sqlite3BtreeLeave(p); return rc; #endif } /* ** Get a reference to pPage1 of the database file. This will ** also acquire a readlock on that file. ** ** SQLITE_OK is returned on success. If the file is not a ** well-formed database file, then SQLITE_CORRUPT is returned. ** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM ** is returned if we run out of memory. */ static int lockBtree(BtShared *pBt){ int rc; /* Result code from subfunctions */ MemPage *pPage1; /* Page 1 of the database file */ int nPage; /* Number of pages in the database */ int nPageFile = 0; /* Number of pages in the database file */ int nPageHeader; /* Number of pages in the database according to hdr */ assert( sqlite3_mutex_held(pBt->mutex) ); assert( pBt->pPage1==0 ); rc = sqlite3PagerSharedLock(pBt->pPager); if( rc!=SQLITE_OK ) return rc; rc = btreeGetPage(pBt, 1, &pPage1, 0); if( rc!=SQLITE_OK ) return rc; /* Do some checking to help insure the file we opened really is ** a valid database file. */ nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData); sqlite3PagerPagecount(pBt->pPager, &nPageFile); if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){ nPage = nPageFile; } if( nPage>0 ){ u32 pageSize; u32 usableSize; u8 *page1 = pPage1->aData; rc = SQLITE_NOTADB; /* EVIDENCE-OF: R-43737-39999 Every valid SQLite database file begins ** with the following 16 bytes (in hex): 53 51 4c 69 74 65 20 66 6f 72 6d ** 61 74 20 33 00. */ if( memcmp(page1, zMagicHeader, 16)!=0 ){ goto page1_init_failed; } #ifdef SQLITE_OMIT_WAL if( page1[18]>1 ){ pBt->btsFlags |= BTS_READ_ONLY; } if( page1[19]>1 ){ goto page1_init_failed; } #else if( page1[18]>2 ){ pBt->btsFlags |= BTS_READ_ONLY; } if( page1[19]>2 ){ goto page1_init_failed; } /* If the write version is set to 2, this database should be accessed ** in WAL mode. If the log is not already open, open it now. Then ** return SQLITE_OK and return without populating BtShared.pPage1. ** The caller detects this and calls this function again. This is ** required as the version of page 1 currently in the page1 buffer ** may not be the latest version - there may be a newer one in the log ** file. */ if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ int isOpen = 0; rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); if( rc!=SQLITE_OK ){ goto page1_init_failed; }else if( isOpen==0 ){ releasePage(pPage1); return SQLITE_OK; } rc = SQLITE_NOTADB; } #endif /* EVIDENCE-OF: R-15465-20813 The maximum and minimum embedded payload ** fractions and the leaf payload fraction values must be 64, 32, and 32. ** ** The original design allowed these amounts to vary, but as of ** version 3.6.0, we require them to be fixed. */ if( memcmp(&page1[21], "\100\040\040",3)!=0 ){ goto page1_init_failed; } /* EVIDENCE-OF: R-51873-39618 The page size for a database file is ** determined by the 2-byte integer located at an offset of 16 bytes from ** the beginning of the database file. */ pageSize = (page1[16]<<8) | (page1[17]<<16); /* EVIDENCE-OF: R-25008-21688 The size of a page is a power of two ** between 512 and 65536 inclusive. */ if( ((pageSize-1)&pageSize)!=0 || pageSize>SQLITE_MAX_PAGE_SIZE || pageSize<=256 ){ goto page1_init_failed; } assert( (pageSize & 7)==0 ); /* EVIDENCE-OF: R-59310-51205 The "reserved space" size in the 1-byte ** integer at offset 20 is the number of bytes of space at the end of ** each page to reserve for extensions. ** ** EVIDENCE-OF: R-37497-42412 The size of the reserved region is ** determined by the one-byte unsigned integer found at an offset of 20 ** into the database file header. */ usableSize = pageSize - page1[20]; if( (u32)pageSize!=pBt->pageSize ){ /* After reading the first page of the database assuming a page size ** of BtShared.pageSize, we have discovered that the page-size is ** actually pageSize. Unlock the database, leave pBt->pPage1 at ** zero and return SQLITE_OK. The caller will call this function ** again with the correct page-size. */ releasePage(pPage1); pBt->usableSize = usableSize; pBt->pageSize = pageSize; freeTempSpace(pBt); rc = sqlite3PagerSetPagesize(pBt->pPager, &pBt->pageSize, pageSize-usableSize); return rc; } if( (pBt->pBt->flags & SQLITE_RecoveryMode)==0 && nPage>nPageFile ){ rc = SQLITE_CORRUPT_BKPT; goto page1_init_failed; } /* EVIDENCE-OF: R-28312-64704 However, the usable size is not allowed to ** be less than 480. In other words, if the page size is 512, then the ** reserved space size cannot exceed 32. */ if( usableSize<480 ){ goto page1_init_failed; } pBt->pageSize = pageSize; pBt->usableSize = usableSize; #ifndef SQLITE_OMIT_AUTOVACUUM pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0); pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0); #endif } /* maxLocal is the maximum amount of payload to store locally for ** a cell. Make sure it is small enough so that at least minFanout ** cells can will fit on one page. We assume a 10-byte page header. ** Besides the payload, the cell must store: ** 2-byte pointer to the cell ** 4-byte child pointer ** 9-byte nKey value ** 4-byte nData value ** 4-byte overflow page pointer ** So a cell consists of a 2-byte pointer, a header which is as much as ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow ** page pointer. */ pBt->maxLocal = (u16)((pBt->usableSize-12)*64/255 - 23); pBt->minLocal = (u16)((pBt->usableSize-12)*32/255 - 23); pBt->maxLeaf = (u16)(pBt->usableSize - 35); pBt->minLeaf = (u16)((pBt->usableSize-12)*32/255 - 23); if( pBt->maxLocal>127 ){ pBt->max1bytePayload = 127; }else{ pBt->max1bytePayload = (u8)pBt->maxLocal; } assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) ); pBt->pPage1 = pPage1; pBt->nPage = nPage; return SQLITE_OK; page1_init_failed: releasePage(pPage1); pBt->pPage1 = 0; return rc; } #ifndef NDEBUG /* ** Return the number of cursors open on pBt. This is for use ** in assert() expressions, so it is only compiled if NDEBUG is not ** defined. ** ** Only write cursors are counted if wrOnly is true. If wrOnly is ** false then all cursors are counted. ** ** For the purposes of this routine, a cursor is any cursor that ** is capable of reading or writing to the database. Cursors that ** have been tripped into the CURSOR_FAULT state are not counted. */ static int countValidCursors(BtShared *pBt, int wrOnly){ BtCursor *pCur; int r = 0; for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0) && pCur->eState!=CURSOR_FAULT ) r++; } return r; } #endif /* ** If there are no outstanding cursors and we are not in the middle ** of a transaction but there is a read lock on the database, then ** this routine unrefs the first page of the database file which ** has the effect of releasing the read lock. ** ** If there is a transaction in progress, this routine is a no-op. */ static void unlockBtreeIfUnused(BtShared *pBt){ assert( sqlite3_mutex_held(pBt->mutex) ); assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ MemPage *pPage1 = pBt->pPage1; assert( pPage1->aData ); assert( sqlite3PagerRefcount(pBt->pPager)==1 ); pBt->pPage1 = 0; releasePageNotNull(pPage1); } } /* ** If pBt points to an empty file then convert that empty file ** into a new empty database by initializing the first page of ** the database. */ static int newDatabase(BtShared *pBt){ MemPage *pP1; unsigned char *data; int rc; assert( sqlite3_mutex_held(pBt->mutex) ); if( pBt->nPage>0 ){ return SQLITE_OK; } pP1 = pBt->pPage1; assert( pP1!=0 ); data = pP1->aData; rc = sqlite3PagerWrite(pP1->pDbPage); if( rc ) return rc; memcpy(data, zMagicHeader, sizeof(zMagicHeader)); assert( sizeof(zMagicHeader)==16 ); data[16] = (u8)((pBt->pageSize>>8)&0xff); data[17] = (u8)((pBt->pageSize>>16)&0xff); data[18] = 1; data[19] = 1; assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+255>=pBt->pageSize); data[20] = (u8)(pBt->pageSize - pBt->usableSize); data[21] = 64; data[22] = 32; data[23] = 32; memset(&data[24], 0, 100-24); zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA ); pBt->btsFlags |= BTS_PAGESIZE_FIXED; #ifndef SQLITE_OMIT_AUTOVACUUM assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 ); assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 ); put4byte(&data[36 + 4*4], pBt->autoVacuum); put4byte(&data[36 + 7*4], pBt->incrVacuum); #endif pBt->nPage = 1; data[31] = 1; return SQLITE_OK; } /* ** Initialize the first page of the database file (creating a database ** consisting of a single page and no schema objects). Return SQLITE_OK ** if successful, or an SQLite error code otherwise. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeNewDb(Btree *p){ int rc; sqlite3BtreeEnter(p); p->pBt->nPage = 0; rc = newDatabase(p->pBt); sqlite3BtreeLeave(p); return rc; } /* ** Attempt to start a new transaction. A write-transaction ** is started if the second argument is nonzero, otherwise a read- ** transaction. If the second argument is 2 or more and exclusive ** transaction is started, meaning that no other process is allowed ** to access the database. A preexisting transaction may not be ** upgraded to exclusive by calling this routine a second time - the ** exclusivity flag only works for a new transaction. ** ** A write-transaction must be started before attempting any ** changes to the database. None of the following routines ** will work unless a transaction is started first: ** ** sqlite3BtreeCreateTable() ** sqlite3BtreeCreateIndex() ** sqlite3BtreeClearTable() ** sqlite3BtreeDropTable() ** sqlite3BtreeInsert() ** sqlite3BtreeDelete() ** sqlite3BtreeUpdateMeta() ** ** If an initial attempt to acquire the lock fails because of lock contention ** and the database was previously unlocked, then invoke the busy handler ** if there is one. But if there was previously a read-lock, do not ** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is ** returned when there is already a read-lock in order to avoid a deadlock. ** ** Suppose there are two processes A and B. A has a read lock and B has ** a reserved lock. B tries to promote to exclusive but is blocked because ** of A's read lock. A tries to promote to reserved but is blocked by B. ** One or the other of the two processes must give way or there can be ** no progress. By returning SQLITE_BUSY and not invoking the busy callback ** when A already has a read lock, we encourage A to give up and let B ** proceed. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeBeginTrans(Btree *p, int wrflag){ Btree *pBlock = 0; BtShared *pBt = p->pBt; int rc = SQLITE_OK; sqlite3BtreeEnter(p); btreeIntegrity(p); /* If the btree is already in a write-transaction, or it ** is already in a read-transaction and a read-transaction ** is requested, this is a no-op. */ if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ goto trans_begun; } assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); /* Write transactions are not possible on a read-only database */ if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ rc = SQLITE_READONLY; goto trans_begun; } #ifndef SQLITE_OMIT_SHARED_CACHE /* If another database handle has already opened a write transaction ** on this shared-btree structure and a second write transaction is ** requested, return SQLITE_LOCKED. */ if( (wrflag && pBt->inTransaction==TRANS_WRITE) || (pBt->btsFlags & BTS_PENDING)!=0 ){ pBlock = pBt->pWriter; }else if( wrflag>1 ){ BtLock *pIter; for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){ if( pIter->pBtree!=p ){ pBlock = pIter->pBtree; break; } } } if( pBlock ){ sqlite3ConnectionBlocked(p, pBlock); rc = SQLITE_LOCKED_SHAREDCACHE; goto trans_begun; } #endif /* Any read-only or read-write transaction implies a read-lock on ** page 1. So if some other shared-cache client already has a write-lock ** on page 1, the transaction cannot be opened. */ rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK); if( SQLITE_OK!=rc ) goto trans_begun; pBt->btsFlags &= ~BTS_INITIALLY_EMPTY; if( pBt->nPage==0 ) pBt->btsFlags |= BTS_INITIALLY_EMPTY; do { /* Call lockBtree() until either pBt->pPage1 is populated or ** lockBtree() returns something other than SQLITE_OK. lockBtree() ** may return SQLITE_OK but leave pBt->pPage1 set to 0 if after ** reading page 1 it discovers that the page-size of the database ** file is not pBt->pageSize. In this case lockBtree() will update ** pBt->pageSize to the page-size of the file on disk. */ while( pBt->pPage1==0 && SQLITE_OK==(rc = lockBtree(pBt)) ); if( rc==SQLITE_OK && wrflag ){ if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ rc = SQLITE_READONLY; }else{ rc = sqlite3PagerBegin(pBt->pPager,wrflag>1,0); if( rc==SQLITE_OK ){ rc = newDatabase(pBt); } } } if( rc!=SQLITE_OK ){ unlockBtreeIfUnused(pBt); } }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && btreeInvokeBusyHandler(pBt) ); if( rc==SQLITE_OK ){ if( p->inTrans==TRANS_NONE ){ pBt->nTransaction++; #ifndef SQLITE_OMIT_SHARED_CACHE if( p->sharable ){ assert( p->lock.pBtree==p && p->lock.iTable==1 ); p->lock.eLock = READ_LOCK; p->lock.pNext = pBt->pLock; pBt->pLock = &p->lock; } #endif } p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ); if( p->inTrans>pBt->inTransaction ){ pBt->inTransaction = p->inTrans; } if( wrflag ){ MemPage *pPage1 = pBt->pPage1; #ifndef SQLITE_OMIT_SHARED_CACHE assert( !pBt->pWriter ); pBt->pWriter = p; pBt->btsFlags &= ~BTS_EXCLUSIVE; if( wrflag>1 ) pBt->btsFlags |= BTS_EXCLUSIVE; #endif /* If the db-size header field is incorrect (as it may be if an old ** client has been writing the database file), update it now. Doing ** this sooner rather than later means the database size can safely ** re-read the database size from page 1 if a savepoint or transaction ** rollback occurs within the transaction. */ if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){ rc = sqlite3PagerWrite(pPage1->pDbPage); if( rc==SQLITE_OK ){ put4byte(&pPage1->aData[28], pBt->nPage); } } } } trans_begun: if( rc==SQLITE_OK && wrflag ){ /* This call makes sure that the pager has the correct number of ** open savepoints. If the second parameter is greater than 0 and ** the sub-journal is not already open, then it will be opened here. */ rc = sqlite3PagerOpenSavepoint(pBt->pPager, p->nSavepoint); } btreeIntegrity(p); sqlite3BtreeLeave(p); return rc; } #ifndef SQLITE_OMIT_AUTOVACUUM /* ** Set the pointer-map entries for all children of page pPage. Also, if ** pPage contains cells that point to overflow pages, set the pointer ** map entries for the overflow pages as well. */ static int setChildPtrmaps(MemPage *pPage){ int i; /* Counter variable */ int nCell; /* Number of cells in page pPage */ int rc; /* Return code */ BtShared *pBt = pPage->pBt; u8 isInitOrig = pPage->isInit; Pgno pgno = pPage->pgno; assert( sqlite3_mutex_held(pPage->pBt->mutex) ); rc = btreeInitPage(pPage); if( rc!=SQLITE_OK ){ goto set_child_ptrmaps_out; } nCell = pPage->nCell; for(i=0; ileaf ){ Pgno childPgno = get4byte(pCell); ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); } } if( !pPage->leaf ){ Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno, &rc); } set_child_ptrmaps_out: pPage->isInit = isInitOrig; return rc; } /* ** Somewhere on pPage is a pointer to page iFrom. Modify this pointer so ** that it points to iTo. Parameter eType describes the type of pointer to ** be modified, as follows: ** ** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child ** page of pPage. ** ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow ** page pointed to by one of the cells on pPage. ** ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next ** overflow page in the list. */ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); if( eType==PTRMAP_OVERFLOW2 ){ /* The pointer is always the first 4 bytes of the page in this case. */ if( get4byte(pPage->aData)!=iFrom ){ return SQLITE_CORRUPT_BKPT; } put4byte(pPage->aData, iTo); }else{ u8 isInitOrig = pPage->isInit; int i; int nCell; int rc; rc = btreeInitPage(pPage); if( rc ) return rc; nCell = pPage->nCell; for(i=0; ixParseCell(pPage, pCell, &info); if( info.iOverflow && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage && iFrom==get4byte(&pCell[info.iOverflow]) ){ put4byte(&pCell[info.iOverflow], iTo); break; } }else{ if( get4byte(pCell)==iFrom ){ put4byte(pCell, iTo); break; } } } if( i==nCell ){ if( eType!=PTRMAP_BTREE || get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){ return SQLITE_CORRUPT_BKPT; } put4byte(&pPage->aData[pPage->hdrOffset+8], iTo); } pPage->isInit = isInitOrig; } return SQLITE_OK; } /* ** Move the open database page pDbPage to location iFreePage in the ** database. The pDbPage reference remains valid. ** ** The isCommit flag indicates that there is no need to remember that ** the journal needs to be sync()ed before database page pDbPage->pgno ** can be written to. The caller has already promised not to write to that ** page. */ static int relocatePage( BtShared *pBt, /* Btree */ MemPage *pDbPage, /* Open page to move */ u8 eType, /* Pointer map 'type' entry for pDbPage */ Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */ Pgno iFreePage, /* The location to move pDbPage to */ int isCommit /* isCommit flag passed to sqlite3PagerMovepage */ ){ MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */ Pgno iDbPage = pDbPage->pgno; Pager *pPager = pBt->pPager; int rc; assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ); assert( sqlite3_mutex_held(pBt->mutex) ); assert( pDbPage->pBt==pBt ); /* Move page iDbPage from its current location to page number iFreePage */ TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", iDbPage, iFreePage, iPtrPage, eType)); rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage, isCommit); if( rc!=SQLITE_OK ){ return rc; } pDbPage->pgno = iFreePage; /* If pDbPage was a btree-page, then it may have child pages and/or cells ** that point to overflow pages. The pointer map entries for all these ** pages need to be changed. ** ** If pDbPage is an overflow page, then the first 4 bytes may store a ** pointer to a subsequent overflow page. If this is the case, then ** the pointer map needs to be updated for the subsequent overflow page. */ if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){ rc = setChildPtrmaps(pDbPage); if( rc!=SQLITE_OK ){ return rc; } }else{ Pgno nextOvfl = get4byte(pDbPage->aData); if( nextOvfl!=0 ){ ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage, &rc); if( rc!=SQLITE_OK ){ return rc; } } } /* Fix the database pointer on page iPtrPage that pointed at iDbPage so ** that it points at iFreePage. Also fix the pointer map entry for ** iPtrPage. */ if( eType!=PTRMAP_ROOTPAGE ){ rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); if( rc!=SQLITE_OK ){ return rc; } rc = sqlite3PagerWrite(pPtrPage->pDbPage); if( rc!=SQLITE_OK ){ releasePage(pPtrPage); return rc; } rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType); releasePage(pPtrPage); if( rc==SQLITE_OK ){ ptrmapPut(pBt, iFreePage, eType, iPtrPage, &rc); } } return rc; } /* Forward declaration required by incrVacuumStep(). */ static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); /* ** Perform a single step of an incremental-vacuum. If successful, return ** SQLITE_OK. If there is no work to do (and therefore no point in ** calling this function again), return SQLITE_DONE. Or, if an error ** occurs, return some other error code. ** ** More specifically, this function attempts to re-organize the database so ** that the last page of the file currently in use is no longer in use. ** ** Parameter nFin is the number of pages that this database would contain ** were this function called until it returns SQLITE_DONE. ** ** If the bCommit parameter is non-zero, this function assumes that the ** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE ** or an error. bCommit is passed true for an auto-vacuum-on-commit ** operation, or false for an incremental vacuum. */ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ Pgno nFreeList; /* Number of pages still on the free-list */ int rc; assert( sqlite3_mutex_held(pBt->mutex) ); assert( iLastPg>nFin ); if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ u8 eType; Pgno iPtrPage; nFreeList = get4byte(&pBt->pPage1->aData[36]); if( nFreeList==0 ){ return SQLITE_DONE; } rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage); if( rc!=SQLITE_OK ){ return rc; } if( eType==PTRMAP_ROOTPAGE ){ return SQLITE_CORRUPT_BKPT; } if( eType==PTRMAP_FREEPAGE ){ if( bCommit==0 ){ /* Remove the page from the files free-list. This is not required ** if bCommit is non-zero. In that case, the free-list will be ** truncated to zero after this function returns, so it doesn't ** matter if it still contains some garbage entries. */ Pgno iFreePg; MemPage *pFreePg; rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT); if( rc!=SQLITE_OK ){ return rc; } assert( iFreePg==iLastPg ); releasePage(pFreePg); } } else { Pgno iFreePg; /* Index of free page to move pLastPg to */ MemPage *pLastPg; u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); if( rc!=SQLITE_OK ){ return rc; } /* If bCommit is zero, this loop runs exactly once and page pLastPg ** is swapped with the first free page pulled off the free list. ** ** On the other hand, if bCommit is greater than zero, then keep ** looping until a free-page located within the first nFin pages ** of the file is found. */ if( bCommit==0 ){ eMode = BTALLOC_LE; iNear = nFin; } do { MemPage *pFreePg; rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode); if( rc!=SQLITE_OK ){ releasePage(pLastPg); return rc; } releasePage(pFreePg); }while( bCommit && iFreePg>nFin ); assert( iFreePgbDoTruncate = 1; pBt->nPage = iLastPg; } return SQLITE_OK; } /* ** The database opened by the first argument is an auto-vacuum database ** nOrig pages in size containing nFree free pages. Return the expected ** size of the database in pages following an auto-vacuum operation. */ static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){ int nEntry; /* Number of entries on one ptrmap page */ Pgno nPtrmap; /* Number of PtrMap pages to be freed */ Pgno nFin; /* Return value */ nEntry = pBt->usableSize/5; nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; nFin = nOrig - nFree - nPtrmap; if( nOrig>PENDING_BYTE_PAGE(pBt) && nFinpBt; sqlite3BtreeEnter(p); assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); if( !pBt->autoVacuum ){ rc = SQLITE_DONE; }else{ Pgno nOrig = btreePagecount(pBt); Pgno nFree = get4byte(&pBt->pPage1->aData[36]); Pgno nFin = finalDbSize(pBt, nOrig, nFree); if( nOrig0 ){ rc = saveAllCursors(pBt, 0, 0); if( rc==SQLITE_OK ){ invalidateAllOverflowCache(pBt); rc = incrVacuumStep(pBt, nFin, nOrig, 0); } if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); put4byte(&pBt->pPage1->aData[28], pBt->nPage); } }else{ rc = SQLITE_DONE; } } sqlite3BtreeLeave(p); return rc; } /* ** This routine is called prior to sqlite3PagerCommit when a transaction ** is committed for an auto-vacuum database. ** ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages ** the database file should be truncated to during the commit process. ** i.e. the database has been reorganized so that only the first *pnTrunc ** pages are in use. */ static int autoVacuumCommit(BtShared *pBt){ int rc = SQLITE_OK; Pager *pPager = pBt->pPager; VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); ) assert( sqlite3_mutex_held(pBt->mutex) ); invalidateAllOverflowCache(pBt); assert(pBt->autoVacuum); if( !pBt->incrVacuum ){ Pgno nFin; /* Number of pages in database after autovacuuming */ Pgno nFree; /* Number of pages on the freelist initially */ Pgno iFree; /* The next page to be freed */ Pgno nOrig; /* Database size before freeing */ nOrig = btreePagecount(pBt); if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){ /* It is not possible to create a database for which the final page ** is either a pointer-map page or the pending-byte page. If one ** is encountered, this indicates corruption. */ return SQLITE_CORRUPT_BKPT; } nFree = get4byte(&pBt->pPage1->aData[36]); nFin = finalDbSize(pBt, nOrig, nFree); if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; if( nFinnFin && rc==SQLITE_OK; iFree--){ rc = incrVacuumStep(pBt, nFin, iFree, 1); } if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); put4byte(&pBt->pPage1->aData[32], 0); put4byte(&pBt->pPage1->aData[36], 0); put4byte(&pBt->pPage1->aData[28], nFin); pBt->bDoTruncate = 1; pBt->nPage = nFin; } if( rc!=SQLITE_OK ){ sqlite3PagerRollback(pPager); } } assert( nRef>=sqlite3PagerRefcount(pPager) ); return rc; } #else /* ifndef SQLITE_OMIT_AUTOVACUUM */ # define setChildPtrmaps(x) SQLITE_OK #endif /* ** This routine does the first phase of a two-phase commit. This routine ** causes a rollback journal to be created (if it does not already exist) ** and populated with enough information so that if a power loss occurs ** the database can be restored to its original state by playing back ** the journal. Then the contents of the journal are flushed out to ** the disk. After the journal is safely on oxide, the changes to the ** database are written into the database file and flushed to oxide. ** At the end of this call, the rollback journal still exists on the ** disk and we are still holding all locks, so the transaction has not ** committed. See sqlite3BtreeCommitPhaseTwo() for the second phase of the ** commit process. ** ** This call is a no-op if no write-transaction is currently active on pBt. ** ** Otherwise, sync the database file for the btree pBt. zMaster points to ** the name of a master journal file that should be written into the ** individual journal file, or is NULL, indicating no master journal file ** (single database transaction). ** ** When this is called, the master journal should already have been ** created, populated with this journal pointer and synced to disk. ** ** Once this is routine has returned, the only thing required to commit ** the write-transaction for this database file is to delete the journal. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ int rc = SQLITE_OK; if( p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ rc = autoVacuumCommit(pBt); if( rc!=SQLITE_OK ){ sqlite3BtreeLeave(p); return rc; } } if( pBt->bDoTruncate ){ sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); } #endif rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, 0); sqlite3BtreeLeave(p); } return rc; } /* ** This function is called from both BtreeCommitPhaseTwo() and BtreeRollback() ** at the conclusion of a transaction. */ static void btreeEndTransaction(Btree *p){ BtShared *pBt = p->pBt; assert( sqlite3BtreeHoldsMutex(p) ); #ifndef SQLITE_OMIT_AUTOVACUUM pBt->bDoTruncate = 0; #endif if( p->inTrans>TRANS_NONE && p->nVdbeRead>1 ){ /* If there are other active statements that belong to this database ** handle, downgrade to a read-only transaction. The other statements ** may still be reading from the database. */ downgradeAllSharedCacheTableLocks(p); p->inTrans = TRANS_READ; }else{ /* If the handle had any kind of transaction open, decrement the ** transaction count of the shared btree. If the transaction count ** reaches 0, set the shared state to TRANS_NONE. The unlockBtreeIfUnused() ** call below will unlock the pager. */ if( p->inTrans!=TRANS_NONE ){ clearAllSharedCacheTableLocks(p); pBt->nTransaction--; if( 0==pBt->nTransaction ){ pBt->inTransaction = TRANS_NONE; } } /* Set the current transaction state to TRANS_NONE and unlock the ** pager if this call closed the only read or write transaction. */ p->inTrans = TRANS_NONE; unlockBtreeIfUnused(pBt); } btreeIntegrity(p); } /* ** Commit the transaction currently in progress. ** ** This routine implements the second phase of a 2-phase commit. The ** sqlite3BtreeCommitPhaseOne() routine does the first phase and should ** be invoked prior to calling this routine. The sqlite3BtreeCommitPhaseOne() ** routine did all the work of writing information out to disk and flushing the ** contents so that they are written onto the disk platter. All this ** routine has to do is delete or truncate or zero the header in the ** the rollback journal (which causes the transaction to commit) and ** drop locks. ** ** Normally, if an error occurs while the pager layer is attempting to ** finalize the underlying journal file, this function returns an error and ** the upper layer will attempt a rollback. However, if the second argument ** is non-zero then this b-tree transaction is part of a multi-file ** transaction. In this case, the transaction has already been committed ** (by deleting a master journal file) and the caller will ignore this ** functions return code. So, even if an error occurs in the pager layer, ** reset the b-tree objects internal state to indicate that the write ** transaction has been closed. This is quite safe, as the pager will have ** transitioned to the error state. ** ** This will release the write lock on the database file. If there ** are no active cursors, it also releases the read lock. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ if( p->inTrans==TRANS_NONE ) return SQLITE_OK; sqlite3BtreeEnter(p); btreeIntegrity(p); /* If the handle has a write-transaction open, commit the shared-btrees ** transaction and set the shared state to TRANS_READ. */ if( p->inTrans==TRANS_WRITE ){ int rc; BtShared *pBt = p->pBt; assert( pBt->inTransaction==TRANS_WRITE ); assert( pBt->nTransaction>0 ); rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); if( rc!=SQLITE_OK && bCleanup==0 ){ sqlite3BtreeLeave(p); return rc; } p->iDataVersion--; /* Compensate for pPager->iDataVersion++; */ pBt->inTransaction = TRANS_READ; btreeClearHasContent(pBt); } btreeEndTransaction(p); sqlite3BtreeLeave(p); return SQLITE_OK; } /* ** Do both phases of a commit. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCommit(Btree *p){ int rc; sqlite3BtreeEnter(p); rc = sqlite3BtreeCommitPhaseOne(p, 0); if( rc==SQLITE_OK ){ rc = sqlite3BtreeCommitPhaseTwo(p, 0); } sqlite3BtreeLeave(p); return rc; } /* ** This routine sets the state to CURSOR_FAULT and the error ** code to errCode for every cursor on any BtShared that pBtree ** references. Or if the writeOnly flag is set to 1, then only ** trip write cursors and leave read cursors unchanged. ** ** Every cursor is a candidate to be tripped, including cursors ** that belong to other database connections that happen to be ** sharing the cache with pBtree. ** ** This routine gets called when a rollback occurs. If the writeOnly ** flag is true, then only write-cursors need be tripped - read-only ** cursors save their current positions so that they may continue ** following the rollback. Or, if writeOnly is false, all cursors are ** tripped. In general, writeOnly is false if the transaction being ** rolled back modified the database schema. In this case b-tree root ** pages may be moved or deleted from the database altogether, making ** it unsafe for read cursors to continue. ** ** If the writeOnly flag is true and an error is encountered while ** saving the current position of a read-only cursor, all cursors, ** including all read-cursors are tripped. ** ** SQLITE_OK is returned if successful, or if an error occurs while ** saving a cursor position, an SQLite error code. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ BtCursor *p; int rc = SQLITE_OK; assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); if( pBtree ){ sqlite3BtreeEnter(pBtree); for(p=pBtree->pBt->pCursor; p; p=p->pNext){ int i; if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ if( p->eState==CURSOR_VALID || p->eState==CURSOR_SKIPNEXT ){ rc = saveCursorPosition(p); if( rc!=SQLITE_OK ){ (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); break; } } }else{ sqlite3BtreeClearCursor(p); p->eState = CURSOR_FAULT; p->skipNext = errCode; } for(i=0; i<=p->iPage; i++){ releasePage(p->apPage[i]); p->apPage[i] = 0; } } sqlite3BtreeLeave(pBtree); } return rc; } /* ** Rollback the transaction in progress. ** ** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). ** Only write cursors are tripped if writeOnly is true but all cursors are ** tripped if writeOnly is false. Any attempt to use ** a tripped cursor will result in an error. ** ** This will release the write lock on the database file. If there ** are no active cursors, it also releases the read lock. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){ int rc; BtShared *pBt = p->pBt; MemPage *pPage1; assert( writeOnly==1 || writeOnly==0 ); assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK ); sqlite3BtreeEnter(p); if( tripCode==SQLITE_OK ){ rc = tripCode = saveAllCursors(pBt, 0, 0); if( rc ) writeOnly = 0; }else{ rc = SQLITE_OK; } if( tripCode ){ int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly); assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) ); if( rc2!=SQLITE_OK ) rc = rc2; } btreeIntegrity(p); if( p->inTrans==TRANS_WRITE ){ int rc2; assert( TRANS_WRITE==pBt->inTransaction ); rc2 = sqlite3PagerRollback(pBt->pPager); if( rc2!=SQLITE_OK ){ rc = rc2; } /* The rollback may have destroyed the pPage1->aData value. So ** call btreeGetPage() on page 1 again to make ** sure pPage1->aData is set correctly. */ if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ int nPage = get4byte(28+(u8*)pPage1->aData); testcase( nPage==0 ); if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); testcase( pBt->nPage!=nPage ); pBt->nPage = nPage; releasePage(pPage1); } assert( countValidCursors(pBt, 1)==0 ); pBt->inTransaction = TRANS_READ; btreeClearHasContent(pBt); } btreeEndTransaction(p); sqlite3BtreeLeave(p); return rc; } /* ** Start a statement subtransaction. The subtransaction can be rolled ** back independently of the main transaction. You must start a transaction ** before starting a subtransaction. The subtransaction is ended automatically ** if the main transaction commits or rolls back. ** ** Statement subtransactions are used around individual SQL statements ** that are contained within a BEGIN...COMMIT block. If a constraint ** error occurs within the statement, the effect of that one statement ** can be rolled back without having to rollback the entire transaction. ** ** A statement sub-transaction is implemented as an anonymous savepoint. The ** value passed as the second parameter is the total number of savepoints, ** including the new anonymous savepoint, open on the B-Tree. i.e. if there ** are no active savepoints and no other statement-transactions open, ** iStatement is 1. This anonymous savepoint can be released or rolled back ** using the sqlite3BtreeSavepoint() function. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeBeginStmt(Btree *p, int iStatement){ int rc; BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); assert( p->inTrans==TRANS_WRITE ); assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); assert( iStatement>0 ); assert( iStatement>p->nSavepoint ); assert( pBt->inTransaction==TRANS_WRITE ); /* At the pager level, a statement transaction is a savepoint with ** an index greater than all savepoints created explicitly using ** SQL statements. It is illegal to open, release or rollback any ** such savepoints while the statement transaction savepoint is active. */ rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement); sqlite3BtreeLeave(p); return rc; } /* ** The second argument to this function, op, is always SAVEPOINT_ROLLBACK ** or SAVEPOINT_RELEASE. This function either releases or rolls back the ** savepoint identified by parameter iSavepoint, depending on the value ** of op. ** ** Normally, iSavepoint is greater than or equal to zero. However, if op is ** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the ** contents of the entire transaction are rolled back. This is different ** from a normal transaction rollback, as no locks are released and the ** transaction remains open. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ int rc = SQLITE_OK; if( p && p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); sqlite3BtreeEnter(p); rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); if( rc==SQLITE_OK ){ if( iSavepoint<0 && (pBt->btsFlags & BTS_INITIALLY_EMPTY)!=0 ){ pBt->nPage = 0; } rc = newDatabase(pBt); pBt->nPage = get4byte(28 + pBt->pPage1->aData); /* The database size was written into the offset 28 of the header ** when the transaction started, so we know that the value at offset ** 28 is nonzero. */ assert( pBt->nPage>0 ); } sqlite3BtreeLeave(p); } return rc; } /* ** Create a new cursor for the BTree whose root is on the page ** iTable. If a read-only cursor is requested, it is assumed that ** the caller already has at least a read-only transaction open ** on the database already. If a write-cursor is requested, then ** the caller is assumed to have an open write transaction. ** ** If wrFlag==0, then the cursor can only be used for reading. ** If wrFlag==1, then the cursor can be used for reading or for ** writing if other conditions for writing are also met. These ** are the conditions that must be met in order for writing to ** be allowed: ** ** 1: The cursor must have been opened with wrFlag==1 ** ** 2: Other database connections that share the same pager cache ** but which are not in the READ_UNCOMMITTED state may not have ** cursors open with wrFlag==0 on the same table. Otherwise ** the changes made by this write cursor would be visible to ** the read cursors in the other database connection. ** ** 3: The database must be writable (not on read-only media) ** ** 4: There must be an active transaction. ** ** No checking is done to make sure that page iTable really is the ** root page of a b-tree. If it is not, then the cursor acquired ** will not work correctly. */ static int btreeCursor( Btree *p, /* The btree */ int iTable, /* Root page of table to open */ int wrFlag, /* 1 to write. 0 read-only */ struct KeyInfo *pKeyInfo, /* First arg to comparison function */ BtCursor *pCur /* Space for new cursor */ ){ BtShared *pBt = p->pBt; /* Shared b-tree handle */ BtCursor *pX; /* Looping over other all cursors */ assert( sqlite3BtreeHoldsMutex(p) ); assert( wrFlag==0 || wrFlag==1 ); /* The following assert statements verify that if this is a sharable ** b-tree database, the connection is holding the required table locks, ** and that no other connection has any open cursor that conflicts with ** this lock. */ assert( hasSharedCacheTableLock(p, iTable, pKeyInfo!=0, wrFlag+1) ); assert( wrFlag==0 || !hasReadConflicts(p, iTable) ); /* Assert that the caller has opened the required transaction. */ assert( p->inTrans>TRANS_NONE ); assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); assert( pBt->pPage1 && pBt->pPage1->aData ); assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); if( wrFlag ){ allocateTempSpace(pBt); if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; } if( iTable==1 && btreePagecount(pBt)==0 ){ assert( wrFlag==0 ); iTable = 0; } /* Now that no other errors can occur, finish filling in the BtCursor ** variables and link the cursor into the BtShared list. */ pCur->pgnoRoot = (Pgno)iTable; pCur->iPage = -1; pCur->pKeyInfo = pKeyInfo; pCur->pBtree = p; pCur->pBt = pBt; assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); pCur->curFlags = wrFlag; pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; /* If there are two or more cursors on the same btree, then all such ** cursors *must* have the BTCF_Multiple flag set. */ for(pX=pBt->pCursor; pX; pX=pX->pNext){ if( pX->pgnoRoot==(Pgno)iTable ){ pX->curFlags |= BTCF_Multiple; pCur->curFlags |= BTCF_Multiple; } } pCur->pNext = pBt->pCursor; pBt->pCursor = pCur; pCur->eState = CURSOR_INVALID; return SQLITE_OK; } SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursor( Btree *p, /* The btree */ int iTable, /* Root page of table to open */ int wrFlag, /* 1 to write. 0 read-only */ int N, int X, /* index of N key columns and X extra columns */ BtCursor **ppCur /* Write new cursor here */ ){ int rc; *ppCur = 0; if( iTable<1 ){ rc = SQLITE_CORRUPT_BKPT; }else{ KeyInfo *pKeyInfo = NULL; if (N > 0 || X > 0) { pKeyInfo = sqlite3DbMallocZero(0, sizeof(KeyInfo) + (N+X)*(sizeof(CollSeq*)+1)); if( pKeyInfo ){ pKeyInfo->aSortOrder = (u8*)&pKeyInfo->aColl[N+X]; pKeyInfo->nField = (u16)N; pKeyInfo->nXField = (u16)X; pKeyInfo->enc = p->enc; pKeyInfo->pBtree = p; pKeyInfo->nRef = 1; }else{ p->mallocFailed = 1; return SQLITE_NOMEM; } } BtCursor *pCur = sqlite3MallocZero(sizeof(BtCursor)); if (!pCur) { p->mallocFailed = 1; sqlite3_free(pKeyInfo); return SQLITE_NOMEM; } sqlite3BtreeEnter(p); rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); sqlite3BtreeLeave(p); if (rc != SQLITE_OK) { sqlite3_free(pKeyInfo); sqlite3_free(pCur); return rc; } *ppCur = pCur; } return rc; } /* ** Return the size of a BtCursor object in bytes. ** ** This interfaces is needed so that users of cursors can preallocate ** sufficient storage to hold a cursor. The BtCursor object is opaque ** to users so they cannot do the sizeof() themselves - they must call ** this routine. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorSize(void){ return ROUND8(sizeof(BtCursor)); } /* ** Close a cursor. The read lock on the database file is released ** when the last cursor is closed. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCloseCursor(BtCursor *pCur){ Btree *pBtree = pCur->pBtree; if( pBtree ){ int i; BtShared *pBt = pCur->pBt; sqlite3BtreeEnter(pBtree); sqlite3BtreeClearCursor(pCur); assert( pBt->pCursor!=0 ); if( pBt->pCursor==pCur ){ pBt->pCursor = pCur->pNext; }else{ BtCursor *pPrev = pBt->pCursor; do{ if( pPrev->pNext==pCur ){ pPrev->pNext = pCur->pNext; break; } pPrev = pPrev->pNext; }while( ALWAYS(pPrev) ); } for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); } unlockBtreeIfUnused(pBt); sqlite3_free(pCur->aOverflow); /* sqlite3_free(pCur); */ sqlite3BtreeLeave(pBtree); } if( pCur->pKeyInfo ) { assert( pCur->pKeyInfo->nRef>0 ); pCur->pKeyInfo->nRef--; if( pCur->pKeyInfo->nRef==0 ) sqlite3DbFree(0, pCur->pKeyInfo); } sqlite3_free(pCur); return SQLITE_OK; } /* ** Make sure the BtCursor* given in the argument has a valid ** BtCursor.info structure. If it is not already valid, call ** btreeParseCell() to fill it in. ** ** BtCursor.info is a cache of the information in the current cell. ** Using this cache reduces the number of calls to btreeParseCell(). */ #ifndef NDEBUG static void assertCellInfo(BtCursor *pCur){ CellInfo info; int iPage = pCur->iPage; memset(&info, 0, sizeof(info)); btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info); assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 ); } #else #define assertCellInfo(x) #endif static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){ if( pCur->info.nSize==0 ){ int iPage = pCur->iPage; pCur->curFlags |= BTCF_ValidNKey; btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); }else{ assertCellInfo(pCur); } } #ifndef NDEBUG /* The next routine used only within assert() statements */ /* ** Return true if the given BtCursor is valid. A valid cursor is one ** that is currently pointing to a row in a (non-empty) table. ** This is a verification routine is used only within assert() statements. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorIsValid(BtCursor *pCur){ return pCur && pCur->eState==CURSOR_VALID; } #endif /* NDEBUG */ /* ** Set *pSize to the size of the buffer needed to hold the value of ** the key for the current entry. If the cursor is not pointing ** to a valid entry, *pSize is set to 0. ** ** For a table with the INTKEY flag set, this routine returns the key ** itself, not the number of bytes in the key. ** ** The caller must position the cursor prior to invoking this routine. ** ** This routine cannot fail. It always returns SQLITE_OK. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); getCellInfo(pCur); *pSize = pCur->info.nKey; return SQLITE_OK; } /* ** Set *pSize to the number of bytes of data in the entry the ** cursor currently points to. ** ** The caller must guarantee that the cursor is pointing to a non-NULL ** valid entry. In other words, the calling procedure must guarantee ** that the cursor has Cursor.eState==CURSOR_VALID. ** ** Failure is not possible. This function always returns SQLITE_OK. ** It might just as well be a procedure (returning void) but we continue ** to return an integer result code for historical reasons. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPage>=0 ); assert( pCur->iPageapPage[pCur->iPage]->intKeyLeaf==1 ); getCellInfo(pCur); *pSize = pCur->info.nPayload; return SQLITE_OK; } /* ** Given the page number of an overflow page in the database (parameter ** ovfl), this function finds the page number of the next page in the ** linked list of overflow pages. If possible, it uses the auto-vacuum ** pointer-map data instead of reading the content of page ovfl to do so. ** ** If an error occurs an SQLite error code is returned. Otherwise: ** ** The page number of the next overflow page in the linked list is ** written to *pPgnoNext. If page ovfl is the last page in its linked ** list, *pPgnoNext is set to zero. ** ** If ppPage is not NULL, and a reference to the MemPage object corresponding ** to page number pOvfl was obtained, then *ppPage is set to point to that ** reference. It is the responsibility of the caller to call releasePage() ** on *ppPage to free the reference. In no reference was obtained (because ** the pointer-map was used to obtain the value for *pPgnoNext), then ** *ppPage is set to zero. */ static int getOverflowPage( BtShared *pBt, /* The database file */ Pgno ovfl, /* Current overflow page number */ MemPage **ppPage, /* OUT: MemPage handle (may be NULL) */ Pgno *pPgnoNext /* OUT: Next overflow page number */ ){ Pgno next = 0; MemPage *pPage = 0; int rc = SQLITE_OK; assert( sqlite3_mutex_held(pBt->mutex) ); assert(pPgnoNext); #ifndef SQLITE_OMIT_AUTOVACUUM /* Try to find the next page in the overflow list using the ** autovacuum pointer-map pages. Guess that the next page in ** the overflow list is page number (ovfl+1). If that guess turns ** out to be wrong, fall back to loading the data of page ** number ovfl to determine the next page number. */ if( pBt->autoVacuum ){ Pgno pgno; Pgno iGuess = ovfl+1; u8 eType; while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){ iGuess++; } if( iGuess<=btreePagecount(pBt) ){ rc = ptrmapGet(pBt, iGuess, &eType, &pgno); if( rc==SQLITE_OK && eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){ next = iGuess; rc = SQLITE_DONE; } } } #endif assert( next==0 || rc==SQLITE_DONE ); if( rc==SQLITE_OK ){ rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0); assert( rc==SQLITE_OK || pPage==0 ); if( rc==SQLITE_OK ){ next = get4byte(pPage->aData); } } *pPgnoNext = next; if( ppPage ){ *ppPage = pPage; }else{ releasePage(pPage); } return (rc==SQLITE_DONE ? SQLITE_OK : rc); } /* ** Copy data from a buffer to a page, or from a page to a buffer. ** ** pPayload is a pointer to data stored on database page pDbPage. ** If argument eOp is false, then nByte bytes of data are copied ** from pPayload to the buffer pointed at by pBuf. If eOp is true, ** then sqlite3PagerWrite() is called on pDbPage and nByte bytes ** of data are copied from the buffer pBuf to pPayload. ** ** SQLITE_OK is returned on success, otherwise an error code. */ static int copyPayload( void *pPayload, /* Pointer to page data */ void *pBuf, /* Pointer to buffer */ int nByte, /* Number of bytes to copy */ int eOp, /* 0 -> copy from page, 1 -> copy to page */ DbPage *pDbPage /* Page containing pPayload */ ){ if( eOp ){ /* Copy data from buffer to page (a write operation) */ int rc = sqlite3PagerWrite(pDbPage); if( rc!=SQLITE_OK ){ return rc; } memcpy(pPayload, pBuf, nByte); }else{ /* Copy data from page to buffer (a read operation) */ memcpy(pBuf, pPayload, nByte); } return SQLITE_OK; } /* ** This function is used to read or overwrite payload information ** for the entry that the pCur cursor is pointing to. The eOp ** argument is interpreted as follows: ** ** 0: The operation is a read. Populate the overflow cache. ** 1: The operation is a write. Populate the overflow cache. ** 2: The operation is a read. Do not populate the overflow cache. ** ** A total of "amt" bytes are read or written beginning at "offset". ** Data is read to or from the buffer pBuf. ** ** The content being read or written might appear on the main page ** or be scattered out on multiple overflow pages. ** ** If the current cursor entry uses one or more overflow pages and the ** eOp argument is not 2, this function may allocate space for and lazily ** populates the overflow page-list cache array (BtCursor.aOverflow). ** Subsequent calls use this cache to make seeking to the supplied offset ** more efficient. ** ** Once an overflow page-list cache has been allocated, it may be ** invalidated if some other cursor writes to the same table, or if ** the cursor is moved to a different row. Additionally, in auto-vacuum ** mode, the following events may invalidate an overflow page-list cache. ** ** * An incremental vacuum, ** * A commit in auto_vacuum="full" mode, ** * Creating a table (may require moving an overflow page). */ static int accessPayload( BtCursor *pCur, /* Cursor pointing to entry to read from */ u32 offset, /* Begin reading this far into payload */ u32 amt, /* Read this many bytes */ unsigned char *pBuf, /* Write the bytes into this buffer */ int eOp /* zero to read. non-zero to write. */ ){ unsigned char *aPayload; int rc = SQLITE_OK; int iIdx = 0; MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ #ifdef SQLITE_DIRECT_OVERFLOW_READ unsigned char * const pBufStart = pBuf; int bEnd; /* True if reading to end of data */ #endif assert( pPage ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->aiIdx[pCur->iPage]nCell ); assert( cursorHoldsMutex(pCur) ); assert( eOp!=2 || offset==0 ); /* Always start from beginning for eOp==2 */ getCellInfo(pCur); aPayload = pCur->info.pPayload; #ifdef SQLITE_DIRECT_OVERFLOW_READ bEnd = offset+amt==pCur->info.nPayload; #endif assert( offset+amt <= pCur->info.nPayload ); if( &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] ){ /* Trying to read or write past the end of the data is an error */ return SQLITE_CORRUPT_BKPT; } /* Check if data must be read/written to/from the btree page itself. */ if( offsetinfo.nLocal ){ int a = amt; if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); offset = 0; pBuf += a; amt -= a; }else{ offset -= pCur->info.nLocal; } if( rc==SQLITE_OK && amt>0 ){ const u32 ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */ Pgno nextPage; nextPage = get4byte(&aPayload[pCur->info.nLocal]); /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. ** Except, do not allocate aOverflow[] for eOp==2. ** ** The aOverflow[] array is sized at one entry for each overflow page ** in the overflow chain. The page number of the first overflow page is ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array ** means "not yet known" (the cache is lazily populated). */ if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){ int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; if( nOvfl>pCur->nOvflAlloc ){ Pgno *aNew = (Pgno*)sqlite3Realloc( pCur->aOverflow, nOvfl*2*sizeof(Pgno) ); if( aNew==0 ){ rc = SQLITE_NOMEM; }else{ pCur->nOvflAlloc = nOvfl*2; pCur->aOverflow = aNew; } } if( rc==SQLITE_OK ){ memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); pCur->curFlags |= BTCF_ValidOvfl; } } /* If the overflow page-list cache has been allocated and the ** entry for the first required overflow page is valid, skip ** directly to it. */ if( (pCur->curFlags & BTCF_ValidOvfl)!=0 && pCur->aOverflow[offset/ovflSize] ){ iIdx = (offset/ovflSize); nextPage = pCur->aOverflow[iIdx]; offset = (offset%ovflSize); } for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ /* If required, populate the overflow page-list cache. */ if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){ assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); pCur->aOverflow[iIdx] = nextPage; } if( offset>=ovflSize ){ /* The only reason to read this page is to obtain the page ** number for the next page in the overflow chain. The page ** data is not required. So first try to lookup the overflow ** page-list cache, if any, then fall back to the getOverflowPage() ** function. ** ** Note that the aOverflow[] array must be allocated because eOp!=2 ** here. If eOp==2, then offset==0 and this branch is never taken. */ assert( eOp!=2 ); assert( pCur->curFlags & BTCF_ValidOvfl ); assert( pCur->pBtree->db==pBt->db ); if( pCur->aOverflow[iIdx+1] ){ nextPage = pCur->aOverflow[iIdx+1]; }else{ rc = getOverflowPage(pBt, nextPage, 0, &nextPage); } offset -= ovflSize; }else{ /* Need to read this page properly. It contains some of the ** range of data that is being read (eOp==0) or written (eOp!=0). */ #ifdef SQLITE_DIRECT_OVERFLOW_READ sqlite3_file *fd; #endif int a = amt; if( a + offset > ovflSize ){ a = ovflSize - offset; } #ifdef SQLITE_DIRECT_OVERFLOW_READ /* If all the following are true: ** ** 1) this is a read operation, and ** 2) data is required from the start of this overflow page, and ** 3) the database is file-backed, and ** 4) there is no open write-transaction, and ** 5) the database is not a WAL database, ** 6) all data from the page is being read. ** 7) at least 4 bytes have already been read into the output buffer ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds ** up loading large records that span many overflow pages. */ if( (eOp&0x01)==0 /* (1) */ && offset==0 /* (2) */ && (bEnd || a==ovflSize) /* (6) */ && pBt->inTransaction==TRANS_READ /* (4) */ && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ && pBt->pPage1->aData[19]==0x01 /* (5) */ && &pBuf[-4]>=pBufStart /* (7) */ ){ u8 aSave[4]; u8 *aWrite = &pBuf[-4]; assert( aWrite>=pBufStart ); /* hence (7) */ memcpy(aSave, aWrite, 4); rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); nextPage = get4byte(aWrite); memcpy(aWrite, aSave, 4); }else #endif { DbPage *pDbPage; rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) ); if( rc==SQLITE_OK ){ aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); sqlite3PagerUnref(pDbPage); offset = 0; } } amt -= a; pBuf += a; } } } if( rc==SQLITE_OK && amt>0 ){ return SQLITE_CORRUPT_BKPT; } return rc; } /* ** Read part of the key associated with cursor pCur. Exactly ** "amt" bytes will be transferred into pBuf[]. The transfer ** begins at "offset". ** ** The caller must ensure that pCur is pointing to a valid row ** in the table. ** ** Return SQLITE_OK on success or an error code if anything goes ** wrong. An error is returned if "offset+amt" is larger than ** the available payload. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); return accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0); } /* ** Read part of the data associated with cursor pCur. Exactly ** "amt" bytes will be transfered into pBuf[]. The transfer ** begins at "offset". ** ** Return SQLITE_OK on success or an error code if anything goes ** wrong. An error is returned if "offset+amt" is larger than ** the available payload. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ int rc; #ifndef SQLITE_OMIT_INCRBLOB if ( pCur->eState==CURSOR_INVALID ){ return SQLITE_ABORT; } #endif assert( cursorHoldsMutex(pCur) ); rc = restoreCursorPosition(pCur); if( rc==SQLITE_OK ){ assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] ); assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); rc = accessPayload(pCur, offset, amt, pBuf, 0); } return rc; } /* ** Return a pointer to payload information from the entry that the ** pCur cursor is pointing to. The pointer is to the beginning of ** the key if index btrees (pPage->intKey==0) and is the data for ** table btrees (pPage->intKey==1). The number of bytes of available ** key/data is written into *pAmt. If *pAmt==0, then the value ** returned will not be a valid pointer. ** ** This routine is an optimization. It is common for the entire key ** and data to fit on the local page and for there to be no overflow ** pages. When that is so, this routine can be used to access the ** key and data without making a copy. If the key and/or data spills ** onto overflow pages, then accessPayload() must be used to reassemble ** the key/data and copy it into a preallocated buffer. ** ** The pointer returned by this routine looks directly into the cached ** page of the database. The data might change or move the next time ** any btree routine is called. */ static const void *fetchPayload( BtCursor *pCur, /* Cursor pointing to entry to read from */ u32 *pAmt /* Write the number of available bytes here */ ){ u32 amt; assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); assert( pCur->eState==CURSOR_VALID ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); assert( cursorHoldsMutex(pCur) ); assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); assert( pCur->info.nSize>0 ); assert( pCur->info.pPayload>pCur->apPage[pCur->iPage]->aData || CORRUPT_DB ); assert( pCur->info.pPayloadapPage[pCur->iPage]->aDataEnd ||CORRUPT_DB); amt = (int)(pCur->apPage[pCur->iPage]->aDataEnd - pCur->info.pPayload); if( pCur->info.nLocalinfo.nLocal; *pAmt = amt; return (void*)pCur->info.pPayload; } /* ** For the entry that cursor pCur is point to, return as ** many bytes of the key or data as are available on the local ** b-tree page. Write the number of available bytes into *pAmt. ** ** The pointer returned is ephemeral. The key/data may move ** or be destroyed on the next call to any Btree routine, ** including calls from other threads against the same cache. ** Hence, a mutex on the BtShared should be held prior to calling ** this routine. ** ** These routines is used to get quick access to key and data ** in the common case where no overflow pages are used. */ SQLITE_API const void *SQLITE_STDCALL sqlite3BtreeKeyFetch(BtCursor *pCur, u32 *pAmt){ return fetchPayload(pCur, pAmt); } SQLITE_API const void *SQLITE_STDCALL sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ return fetchPayload(pCur, pAmt); } /* ** Move the cursor down to a new child page. The newPgno argument is the ** page number of the child page to move to. ** ** This function returns SQLITE_CORRUPT if the page-header flags field of ** the new child page does not match the flags field of the parent (i.e. ** if an intkey page appears to be the parent of a non-intkey page, or ** vice-versa). */ static int moveToChild(BtCursor *pCur, u32 newPgno){ BtShared *pBt = pCur->pBt; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPageiPage>=0 ); if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); pCur->iPage++; pCur->aiIdx[pCur->iPage] = 0; return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage], pCur, pCur->curPagerFlags); } #if SQLITE_DEBUG /* ** Page pParent is an internal (non-leaf) tree page. This function ** asserts that page number iChild is the left-child if the iIdx'th ** cell in page pParent. Or, if iIdx is equal to the total number of ** cells in pParent, that page number iChild is the right-child of ** the page. */ static void assertParentIndex(MemPage *pParent, int iIdx, Pgno iChild){ if( CORRUPT_DB ) return; /* The conditions tested below might not be true ** in a corrupt database */ assert( iIdx<=pParent->nCell ); if( iIdx==pParent->nCell ){ assert( get4byte(&pParent->aData[pParent->hdrOffset+8])==iChild ); }else{ assert( get4byte(findCell(pParent, iIdx))==iChild ); } } #else # define assertParentIndex(x,y,z) #endif /* ** Move the cursor up to the parent page. ** ** pCur->idx is set to the cell index that contains the pointer ** to the page we are coming from. If we are coming from the ** right-most child page then pCur->idx is set to one more than ** the largest cell index. */ static void moveToParent(BtCursor *pCur){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPage>0 ); assert( pCur->apPage[pCur->iPage] ); assertParentIndex( pCur->apPage[pCur->iPage-1], pCur->aiIdx[pCur->iPage-1], pCur->apPage[pCur->iPage]->pgno ); testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); releasePageNotNull(pCur->apPage[pCur->iPage--]); } /* ** Move the cursor to point to the root page of its b-tree structure. ** ** If the table has a virtual root page, then the cursor is moved to point ** to the virtual root page instead of the actual root page. A table has a ** virtual root page when the actual root page contains no cells and a ** single child page. This can only happen with the table rooted at page 1. ** ** If the b-tree structure is empty, the cursor state is set to ** CURSOR_INVALID. Otherwise, the cursor is set to point to the first ** cell located on the root (or virtual root) page and the cursor state ** is set to CURSOR_VALID. ** ** If this function returns successfully, it may be assumed that the ** page-header flags indicate that the [virtual] root-page is the expected ** kind of b-tree page (i.e. if when opening the cursor the caller did not ** specify a KeyInfo structure the flags byte is set to 0x05 or 0x0D, ** indicating a table b-tree, or if the caller did specify a KeyInfo ** structure the flags byte is set to 0x02 or 0x0A, indicating an index ** b-tree). */ static int moveToRoot(BtCursor *pCur){ MemPage *pRoot; int rc = SQLITE_OK; assert( cursorHoldsMutex(pCur) ); assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); assert( CURSOR_VALID < CURSOR_REQUIRESEEK ); assert( CURSOR_FAULT > CURSOR_REQUIRESEEK ); if( pCur->eState>=CURSOR_REQUIRESEEK ){ if( pCur->eState==CURSOR_FAULT ){ assert( pCur->skipNext!=SQLITE_OK ); return pCur->skipNext; } sqlite3BtreeClearCursor(pCur); } if( pCur->iPage>=0 ){ while( pCur->iPage ){ assert( pCur->apPage[pCur->iPage]!=0 ); releasePageNotNull(pCur->apPage[pCur->iPage--]); } }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ assert( pCur->iPage==(-1) ); rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], 0, pCur->curPagerFlags); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; } pCur->iPage = 0; pCur->curIntKey = pCur->apPage[0]->intKey; } pRoot = pCur->apPage[0]; assert( pRoot->pgno==pCur->pgnoRoot ); /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is ** NULL, the caller expects a table b-tree. If this is not the case, ** return an SQLITE_CORRUPT error. ** ** Earlier versions of SQLite assumed that this test could not fail ** if the root page was already loaded when this function was called (i.e. ** if pCur->iPage>=0). But this is not so if the database is corrupted ** in such a way that page pRoot is linked into a second b-tree table ** (or the freelist). */ assert( pRoot->intKey==1 || pRoot->intKey==0 ); if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ return SQLITE_CORRUPT_BKPT; } pCur->aiIdx[0] = 0; pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); if( pRoot->nCell>0 ){ pCur->eState = CURSOR_VALID; }else if( !pRoot->leaf ){ Pgno subpage; if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); pCur->eState = CURSOR_VALID; rc = moveToChild(pCur, subpage); }else{ pCur->eState = CURSOR_INVALID; } return rc; } /* ** Move the cursor down to the left-most leaf entry beneath the ** entry to which it is currently pointing. ** ** The left-most leaf is the one with the smallest key - the first ** in ascending order. */ static int moveToLeftmost(BtCursor *pCur){ Pgno pgno; int rc = SQLITE_OK; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ assert( pCur->aiIdx[pCur->iPage]nCell ); pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage])); rc = moveToChild(pCur, pgno); } return rc; } /* ** Move the cursor down to the right-most leaf entry beneath the ** page to which it is currently pointing. Notice the difference ** between moveToLeftmost() and moveToRightmost(). moveToLeftmost() ** finds the left-most entry beneath the *entry* whereas moveToRightmost() ** finds the right-most entry beneath the *page*. ** ** The right-most entry is the one with the largest key - the last ** key in ascending order. */ static int moveToRightmost(BtCursor *pCur){ Pgno pgno; int rc = SQLITE_OK; MemPage *pPage = 0; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){ pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); pCur->aiIdx[pCur->iPage] = pPage->nCell; rc = moveToChild(pCur, pgno); if( rc ) return rc; } pCur->aiIdx[pCur->iPage] = pPage->nCell-1; assert( pCur->info.nSize==0 ); assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); return SQLITE_OK; } /* Move the cursor to the first entry in the table. Return SQLITE_OK ** on success. Set *pRes to 0 if the cursor actually points to something ** or set *pRes to 1 if the table is empty. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeFirst(BtCursor *pCur, int *pRes){ int rc; assert( cursorHoldsMutex(pCur) ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); rc = moveToRoot(pCur); if( rc==SQLITE_OK ){ if( pCur->eState==CURSOR_INVALID ){ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); *pRes = 1; }else{ assert( pCur->apPage[pCur->iPage]->nCell>0 ); *pRes = 0; rc = moveToLeftmost(pCur); } } return rc; } /* Move the cursor to the last entry in the table. Return SQLITE_OK ** on success. Set *pRes to 0 if the cursor actually points to something ** or set *pRes to 1 if the table is empty. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeLast(BtCursor *pCur, int *pRes){ int rc; assert( cursorHoldsMutex(pCur) ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); /* If the cursor already points to the last entry, this is a no-op. */ if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ #ifdef SQLITE_DEBUG /* This block serves to assert() that the cursor really does point ** to the last entry in the b-tree. */ int ii; for(ii=0; iiiPage; ii++){ assert( pCur->aiIdx[ii]==pCur->apPage[ii]->nCell ); } assert( pCur->aiIdx[pCur->iPage]==pCur->apPage[pCur->iPage]->nCell-1 ); assert( pCur->apPage[pCur->iPage]->leaf ); #endif return SQLITE_OK; } rc = moveToRoot(pCur); if( rc==SQLITE_OK ){ if( CURSOR_INVALID==pCur->eState ){ assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); *pRes = 1; }else{ assert( pCur->eState==CURSOR_VALID ); *pRes = 0; rc = moveToRightmost(pCur); if( rc==SQLITE_OK ){ pCur->curFlags |= BTCF_AtLast; }else{ pCur->curFlags &= ~BTCF_AtLast; } } } return rc; } SQLITE_API void SQLITE_STDCALL sqlite3BtreeInitUnpackedRecord( UnpackedRecord *pUnKey, BtCursor* pCur, int nField, int default_rc, Mem* pMem) { pUnKey->pKeyInfo = pCur->pKeyInfo; pUnKey->nField = nField; pUnKey->default_rc = default_rc; pUnKey->aMem = pMem; } /* Move the cursor so that it points to an entry near the key ** specified by pIdxKey or intKey. Return a success code. ** ** For INTKEY tables, the intKey parameter is used. pIdxKey ** must be NULL. For index tables, pIdxKey is used and intKey ** is ignored. ** ** If an exact match is not found, then the cursor is always ** left pointing at a leaf page which would hold the entry if it ** were present. The cursor might point to an entry that comes ** before or after the key. ** ** An integer is written into *pRes which is the result of ** comparing the key with the entry to which the cursor is ** pointing. The meaning of the integer written into ** *pRes is as follows: ** ** *pRes<0 The cursor is left pointing at an entry that ** is smaller than intKey/pIdxKey or if the table is empty ** and the cursor is therefore left point to nothing. ** ** *pRes==0 The cursor is left pointing at an entry that ** exactly matches intKey/pIdxKey. ** ** *pRes>0 The cursor is left pointing at an entry that ** is larger than intKey/pIdxKey. ** */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeMovetoUnpacked( BtCursor *pCur, /* The cursor to be moved */ UnpackedRecord *pIdxKey, /* Unpacked index key */ i64 intKey, /* The table key */ int biasRight, /* If true, bias the search to the high end */ int *pRes /* Write search results here */ ){ int rc; RecordCompare xRecordCompare; assert( cursorHoldsMutex(pCur) ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); assert( pRes ); assert( (pIdxKey==0)==(pCur->pKeyInfo==0) ); /* If the cursor is already positioned at the point we are trying ** to move to, then just return without doing any work */ if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 && pCur->curIntKey ){ if( pCur->info.nKey==intKey ){ *pRes = 0; return SQLITE_OK; } if( (pCur->curFlags & BTCF_AtLast)!=0 && pCur->info.nKeyerrCode = 0; assert( pIdxKey->default_rc==1 || pIdxKey->default_rc==0 || pIdxKey->default_rc==-1 ); }else{ xRecordCompare = 0; /* All keys are integers */ } rc = moveToRoot(pCur); if( rc ){ return rc; } assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] ); assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit ); assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 ); if( pCur->eState==CURSOR_INVALID ){ *pRes = -1; assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); return SQLITE_OK; } assert( pCur->apPage[0]->intKey==pCur->curIntKey ); assert( pCur->curIntKey || pIdxKey ); for(;;){ int lwr, upr, idx, c; Pgno chldPg; MemPage *pPage = pCur->apPage[pCur->iPage]; u8 *pCell; /* Pointer to current cell in pPage */ /* pPage->nCell must be greater than zero. If this is the root-page ** the cursor would have been INVALID above and this for(;;) loop ** not run. If this is not the root-page, then the moveToChild() routine ** would have already detected db corruption. Similarly, pPage must ** be the right kind (index or table) of b-tree page. Otherwise ** a moveToChild() or moveToRoot() call would have detected corruption. */ assert( pPage->nCell>0 ); assert( pPage->intKey==(pIdxKey==0) ); lwr = 0; upr = pPage->nCell-1; assert( biasRight==0 || biasRight==1 ); idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ pCur->aiIdx[pCur->iPage] = (u16)idx; if( xRecordCompare==0 ){ for(;;){ i64 nCellKey; pCell = findCellPastPtr(pPage, idx); if( pPage->intKeyLeaf ){ while( 0x80 <= *(pCell++) ){ if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; } } getVarint(pCell, (u64*)&nCellKey); if( nCellKeyupr ){ c = -1; break; } }else if( nCellKey>intKey ){ upr = idx-1; if( lwr>upr ){ c = +1; break; } }else{ assert( nCellKey==intKey ); pCur->curFlags |= BTCF_ValidNKey; pCur->info.nKey = nCellKey; pCur->aiIdx[pCur->iPage] = (u16)idx; if( !pPage->leaf ){ lwr = idx; goto moveto_next_layer; }else{ *pRes = 0; rc = SQLITE_OK; goto moveto_finish; } } assert( lwr+upr>=0 ); idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ } }else{ for(;;){ int nCell; /* Size of the pCell cell in bytes */ pCell = findCellPastPtr(pPage, idx); /* The maximum supported page-size is 65536 bytes. This means that ** the maximum number of record bytes stored on an index B-Tree ** page is less than 16384 bytes and may be stored as a 2-byte ** varint. This information is used to attempt to avoid parsing ** the entire cell by checking for the cases where the record is ** stored entirely within the b-tree page by inspecting the first ** 2 bytes of the cell. */ nCell = pCell[0]; if( nCell<=pPage->max1bytePayload ){ /* This branch runs if the record-size field of the cell is a ** single byte varint and the record fits entirely on the main ** b-tree page. */ testcase( pCell+nCell+1==pPage->aDataEnd ); c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); }else if( !(pCell[1] & 0x80) && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal ){ /* The record-size field is a 2 byte varint and the record ** fits entirely on the main b-tree page. */ testcase( pCell+nCell+2==pPage->aDataEnd ); c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); }else{ /* The record flows over onto one or more overflow pages. In ** this case the whole cell needs to be parsed, a buffer allocated ** and accessPayload() used to retrieve the record into the ** buffer before VdbeRecordCompare() can be called. ** ** If the record is corrupt, the xRecordCompare routine may read ** up to two varints past the end of the buffer. An extra 18 ** bytes of padding is allocated at the end of the buffer in ** case this happens. */ void *pCellKey; u8 * const pCellBody = pCell - pPage->childPtrSize; pPage->xParseCell(pPage, pCellBody, &pCur->info); nCell = (int)pCur->info.nKey; testcase( nCell<0 ); /* True if key size is 2^32 or more */ testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ testcase( nCell==2 ); /* Minimum legal index key size */ if( nCell<2 ){ rc = SQLITE_CORRUPT_BKPT; goto moveto_finish; } pCellKey = sqlite3Malloc( nCell+18 ); if( pCellKey==0 ){ rc = SQLITE_NOMEM; goto moveto_finish; } pCur->aiIdx[pCur->iPage] = (u16)idx; rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); if( rc ){ sqlite3_free(pCellKey); goto moveto_finish; } c = xRecordCompare(nCell, pCellKey, pIdxKey); sqlite3_free(pCellKey); } assert( (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->mallocFailed) ); if( c<0 ){ lwr = idx+1; }else if( c>0 ){ upr = idx-1; }else{ assert( c==0 ); *pRes = 0; rc = SQLITE_OK; pCur->aiIdx[pCur->iPage] = (u16)idx; if( pIdxKey->errCode ) rc = SQLITE_CORRUPT; goto moveto_finish; } if( lwr>upr ) break; assert( lwr+upr>=0 ); idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */ } } assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) ); assert( pPage->isInit ); if( pPage->leaf ){ assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); pCur->aiIdx[pCur->iPage] = (u16)idx; *pRes = c; rc = SQLITE_OK; goto moveto_finish; } moveto_next_layer: if( lwr>=pPage->nCell ){ chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); }else{ chldPg = get4byte(findCell(pPage, lwr)); } pCur->aiIdx[pCur->iPage] = (u16)lwr; rc = moveToChild(pCur, chldPg); if( rc ) break; } moveto_finish: pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); return rc; } /* ** Return TRUE if the cursor is not pointing at an entry of the table. ** ** TRUE will be returned after a call to sqlite3BtreeNext() moves ** past the last entry in the table or sqlite3BtreePrev() moves past ** the first entry. TRUE is also returned if the table is empty. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeEof(BtCursor *pCur){ /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries ** have been deleted? This API will need to change to return an error code ** as well as the boolean result value. */ return (CURSOR_VALID!=pCur->eState); } /* ** Advance the cursor to the next entry in the database. If ** successful then set *pRes=0. If the cursor ** was already pointing to the last entry in the database before ** this routine was called, then set *pRes=1. ** ** The main entry point is sqlite3BtreeNext(). That routine is optimized ** for the common case of merely incrementing the cell counter BtCursor.aiIdx ** to the next cell on the current page. The (slower) btreeNext() helper ** routine is called when it is necessary to move to a different page or ** to restore the cursor. ** ** The calling function will set *pRes to 0 or 1. The initial *pRes value ** will be 1 if the cursor being stepped corresponds to an SQL index and ** if this routine could have been skipped if that SQL index had been ** a unique index. Otherwise the caller will have set *pRes to zero. ** Zero is the common case. The btree implementation is free to use the ** initial *pRes value as a hint to improve performance, but the current ** SQLite btree implementation does not. (Note that the comdb2 btree ** implementation does use this hint, however.) */ static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){ int rc; int idx; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); assert( *pRes==0 ); if( pCur->eState!=CURSOR_VALID ){ assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); rc = restoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ return rc; } if( CURSOR_INVALID==pCur->eState ){ *pRes = 1; return SQLITE_OK; } if( pCur->skipNext ){ assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); pCur->eState = CURSOR_VALID; if( pCur->skipNext>0 ){ pCur->skipNext = 0; return SQLITE_OK; } pCur->skipNext = 0; } } pPage = pCur->apPage[pCur->iPage]; idx = ++pCur->aiIdx[pCur->iPage]; assert( pPage->isInit ); /* If the database file is corrupt, it is possible for the value of idx ** to be invalid here. This can only occur if a second cursor modifies ** the page while cursor pCur is holding a reference to it. Which can ** only happen if the database is corrupt in such a way as to link the ** page into more than one b-tree structure. */ testcase( idx>pPage->nCell ); if( idx>=pPage->nCell ){ if( !pPage->leaf ){ rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); if( rc ) return rc; return moveToLeftmost(pCur); } do{ if( pCur->iPage==0 ){ *pRes = 1; pCur->eState = CURSOR_INVALID; return SQLITE_OK; } moveToParent(pCur); pPage = pCur->apPage[pCur->iPage]; }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell ); if( pPage->intKey ){ return sqlite3BtreeNext(pCur, pRes); }else{ return SQLITE_OK; } } if( pPage->leaf ){ return SQLITE_OK; }else{ return moveToLeftmost(pCur); } } SQLITE_API int SQLITE_STDCALL sqlite3BtreeNext(BtCursor *pCur, int *pRes){ MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); *pRes = 0; if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes); pPage = pCur->apPage[pCur->iPage]; if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){ pCur->aiIdx[pCur->iPage]--; return btreeNext(pCur, pRes); } if( pPage->leaf ){ return SQLITE_OK; }else{ return moveToLeftmost(pCur); } } /* ** Step the cursor to the back to the previous entry in the database. If ** successful then set *pRes=0. If the cursor ** was already pointing to the first entry in the database before ** this routine was called, then set *pRes=1. ** ** The main entry point is sqlite3BtreePrevious(). That routine is optimized ** for the common case of merely decrementing the cell counter BtCursor.aiIdx ** to the previous cell on the current page. The (slower) btreePrevious() ** helper routine is called when it is necessary to move to a different page ** or to restore the cursor. ** ** The calling function will set *pRes to 0 or 1. The initial *pRes value ** will be 1 if the cursor being stepped corresponds to an SQL index and ** if this routine could have been skipped if that SQL index had been ** a unique index. Otherwise the caller will have set *pRes to zero. ** Zero is the common case. The btree implementation is free to use the ** initial *pRes value as a hint to improve performance, but the current ** SQLite btree implementation does not. (Note that the comdb2 btree ** implementation does use this hint, however.) */ static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){ int rc; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); assert( pCur->info.nSize==0 ); if( pCur->eState!=CURSOR_VALID ){ rc = restoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ return rc; } if( CURSOR_INVALID==pCur->eState ){ *pRes = 1; return SQLITE_OK; } if( pCur->skipNext ){ assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); pCur->eState = CURSOR_VALID; if( pCur->skipNext<0 ){ pCur->skipNext = 0; return SQLITE_OK; } pCur->skipNext = 0; } } pPage = pCur->apPage[pCur->iPage]; assert( pPage->isInit ); if( !pPage->leaf ){ int idx = pCur->aiIdx[pCur->iPage]; rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); if( rc ) return rc; rc = moveToRightmost(pCur); }else{ while( pCur->aiIdx[pCur->iPage]==0 ){ if( pCur->iPage==0 ){ pCur->eState = CURSOR_INVALID; *pRes = 1; return SQLITE_OK; } moveToParent(pCur); } assert( pCur->info.nSize==0 ); assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 ); pCur->aiIdx[pCur->iPage]--; pPage = pCur->apPage[pCur->iPage]; if( pPage->intKey && !pPage->leaf ){ rc = sqlite3BtreePrevious(pCur, pRes); }else{ rc = SQLITE_OK; } } return rc; } SQLITE_API int SQLITE_STDCALL sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); *pRes = 0; pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); pCur->info.nSize = 0; if( pCur->eState!=CURSOR_VALID || pCur->aiIdx[pCur->iPage]==0 || pCur->apPage[pCur->iPage]->leaf==0 ){ return btreePrevious(pCur, pRes); } pCur->aiIdx[pCur->iPage]--; return SQLITE_OK; } /* ** Allocate a new page from the database file. ** ** The new page is marked as dirty. (In other words, sqlite3PagerWrite() ** has already been called on the new page.) The new page has also ** been referenced and the calling routine is responsible for calling ** sqlite3PagerUnref() on the new page when it is done. ** ** SQLITE_OK is returned on success. Any other return value indicates ** an error. *ppPage is set to NULL in the event of an error. ** ** If the "nearby" parameter is not 0, then an effort is made to ** locate a page close to the page number "nearby". This can be used in an ** attempt to keep related pages close to each other in the database file, ** which in turn can make database access faster. ** ** If the eMode parameter is BTALLOC_EXACT and the nearby page exists ** anywhere on the free-list, then it is guaranteed to be returned. If ** eMode is BTALLOC_LT then the page returned will be less than or equal ** to nearby if any such page exists. If eMode is BTALLOC_ANY then there ** are no restrictions on which page is returned. */ static int allocateBtreePage( BtShared *pBt, /* The btree */ MemPage **ppPage, /* Store pointer to the allocated page here */ Pgno *pPgno, /* Store the page number here */ Pgno nearby, /* Search for a page near this one */ u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ ){ MemPage *pPage1; int rc; u32 n; /* Number of pages on the freelist */ u32 k; /* Number of leaves on the trunk of the freelist */ MemPage *pTrunk = 0; MemPage *pPrevTrunk = 0; Pgno mxPage; /* Total size of the database file */ assert( sqlite3_mutex_held(pBt->mutex) ); assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); /* EVIDENCE-OF: R-05119-02637 The 4-byte big-endian integer at offset 36 ** stores stores the total number of pages on the freelist. */ n = get4byte(&pPage1->aData[36]); testcase( n==mxPage-1 ); if( n>=mxPage ){ return SQLITE_CORRUPT_BKPT; } if( n>0 ){ /* There are pages on the freelist. Reuse one of those pages. */ Pgno iTrunk; u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ u32 nSearch = 0; /* Count of the number of search attempts */ /* If eMode==BTALLOC_EXACT and a query of the pointer-map ** shows that the page 'nearby' is somewhere on the free-list, then ** the entire-list will be searched for that page. */ #ifndef SQLITE_OMIT_AUTOVACUUM if( eMode==BTALLOC_EXACT ){ if( nearby<=mxPage ){ u8 eType; assert( nearby>0 ); assert( pBt->autoVacuum ); rc = ptrmapGet(pBt, nearby, &eType, 0); if( rc ) return rc; if( eType==PTRMAP_FREEPAGE ){ searchList = 1; } } }else if( eMode==BTALLOC_LE ){ searchList = 1; } #endif /* Decrement the free-list count by 1. Set iTrunk to the index of the ** first free-list trunk page. iPrevTrunk is initially 1. */ rc = sqlite3PagerWrite(pPage1->pDbPage); if( rc ) return rc; put4byte(&pPage1->aData[36], n-1); /* The code within this loop is run only once if the 'searchList' variable ** is not true. Otherwise, it runs once for each trunk-page on the ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) ** or until a page less than 'nearby' is located (eMode==BTALLOC_LT) */ do { pPrevTrunk = pTrunk; if( pPrevTrunk ){ /* EVIDENCE-OF: R-01506-11053 The first integer on a freelist trunk page ** is the page number of the next freelist trunk page in the list or ** zero if this is the last freelist trunk page. */ iTrunk = get4byte(&pPrevTrunk->aData[0]); }else{ /* EVIDENCE-OF: R-59841-13798 The 4-byte big-endian integer at offset 32 ** stores the page number of the first page of the freelist, or zero if ** the freelist is empty. */ iTrunk = get4byte(&pPage1->aData[32]); } testcase( iTrunk==mxPage ); if( iTrunk>mxPage || nSearch++ > n ){ rc = SQLITE_CORRUPT_BKPT; }else{ rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); } if( rc ){ pTrunk = 0; goto end_allocate_page; } assert( pTrunk!=0 ); assert( pTrunk->aData!=0 ); /* EVIDENCE-OF: R-13523-04394 The second integer on a freelist trunk page ** is the number of leaf page pointers to follow. */ k = get4byte(&pTrunk->aData[4]); if( k==0 && !searchList ){ /* The trunk has no leaves and the list is not being searched. ** So extract the trunk page itself and use it as the newly ** allocated page */ assert( pPrevTrunk==0 ); rc = sqlite3PagerWrite(pTrunk->pDbPage); if( rc ){ goto end_allocate_page; } *pPgno = iTrunk; memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); *ppPage = pTrunk; pTrunk = 0; TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); }else if( k>(u32)(pBt->usableSize/4 - 2) ){ /* Value of k is out of range. Database corruption */ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; #ifndef SQLITE_OMIT_AUTOVACUUM }else if( searchList && (nearby==iTrunk || (iTrunkpDbPage); if( rc ){ goto end_allocate_page; } if( k==0 ){ if( !pPrevTrunk ){ memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4); }else{ rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); if( rc!=SQLITE_OK ){ goto end_allocate_page; } memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4); } }else{ /* The trunk page is required by the caller but it contains ** pointers to free-list leaves. The first leaf becomes a trunk ** page in this case. */ MemPage *pNewTrunk; Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); if( iNewTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; } testcase( iNewTrunk==mxPage ); rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } rc = sqlite3PagerWrite(pNewTrunk->pDbPage); if( rc!=SQLITE_OK ){ releasePage(pNewTrunk); goto end_allocate_page; } memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4); put4byte(&pNewTrunk->aData[4], k-1); memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4); releasePage(pNewTrunk); if( !pPrevTrunk ){ assert( sqlite3PagerIswriteable(pPage1->pDbPage) ); put4byte(&pPage1->aData[32], iNewTrunk); }else{ rc = sqlite3PagerWrite(pPrevTrunk->pDbPage); if( rc ){ goto end_allocate_page; } put4byte(&pPrevTrunk->aData[0], iNewTrunk); } } pTrunk = 0; TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); #endif }else if( k>0 ){ /* Extract a leaf from the trunk */ u32 closest; Pgno iPage; unsigned char *aData = pTrunk->aData; if( nearby>0 ){ u32 i; closest = 0; if( eMode==BTALLOC_LE ){ for(i=0; imxPage ){ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; } testcase( iPage==mxPage ); if( !searchList || (iPage==nearby || (iPagepgno, n-1)); rc = sqlite3PagerWrite(pTrunk->pDbPage); if( rc ) goto end_allocate_page; if( closestpDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); *ppPage = 0; } } searchList = 0; } } releasePage(pPrevTrunk); pPrevTrunk = 0; }while( searchList ); }else{ /* There are no pages on the freelist, so append a new page to the ** database image. ** ** Normally, new pages allocated by this block can be requested from the ** pager layer with the 'no-content' flag set. This prevents the pager ** from trying to read the pages content from disk. However, if the ** current transaction has already run one or more incremental-vacuum ** steps, then the page we are about to allocate may contain content ** that is required in the event of a rollback. In this case, do ** not set the no-content flag. This causes the pager to load and journal ** the current page content before overwriting it. ** ** Note that the pager will not actually attempt to load or journal ** content for any page that really does lie past the end of the database ** file on disk. So the effects of disabling the no-content optimization ** here are confined to those pages that lie between the end of the ** database image and the end of the database file. */ int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0; rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); if( rc ) return rc; pBt->nPage++; if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){ /* If *pPgno refers to a pointer-map page, allocate two new pages ** at the end of the file instead of one. The first allocated page ** becomes a new pointer-map page, the second is used by the caller. */ MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); } if( rc ) return rc; pBt->nPage++; if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; } } #endif put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage); *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); *ppPage = 0; } TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); } assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); end_allocate_page: releasePage(pTrunk); releasePage(pPrevTrunk); assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 ); assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 ); return rc; } /* ** This function is used to add page iPage to the database file free-list. ** It is assumed that the page is not already a part of the free-list. ** ** The value passed as the second argument to this function is optional. ** If the caller happens to have a pointer to the MemPage object ** corresponding to page iPage handy, it may pass it as the second value. ** Otherwise, it may pass NULL. ** ** If a pointer to a MemPage object is passed as the second argument, ** its reference count is not altered by this function. */ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ MemPage *pTrunk = 0; /* Free-list trunk page */ Pgno iTrunk = 0; /* Page number of free-list trunk page */ MemPage *pPage1 = pBt->pPage1; /* Local reference to page 1 */ MemPage *pPage; /* Page being freed. May be NULL. */ int rc; /* Return Code */ int nFree; /* Initial number of pages on free-list */ assert( sqlite3_mutex_held(pBt->mutex) ); assert( CORRUPT_DB || iPage>1 ); assert( !pMemPage || pMemPage->pgno==iPage ); if( iPage<2 ) return SQLITE_CORRUPT_BKPT; if( pMemPage ){ pPage = pMemPage; sqlite3PagerRef(pPage->pDbPage); }else{ pPage = btreePageLookup(pBt, iPage); } /* Increment the free page count on pPage1 */ rc = sqlite3PagerWrite(pPage1->pDbPage); if( rc ) goto freepage_out; nFree = get4byte(&pPage1->aData[36]); put4byte(&pPage1->aData[36], nFree+1); if( pBt->btsFlags & BTS_SECURE_DELETE ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) ){ goto freepage_out; } memset(pPage->aData, 0, pPage->pBt->pageSize); } /* If the database supports auto-vacuum, write an entry in the pointer-map ** to indicate that the page is free. */ if( ISAUTOVACUUM ){ ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); if( rc ) goto freepage_out; } /* Now manipulate the actual database free-list structure. There are two ** possibilities. If the free-list is currently empty, or if the first ** trunk page in the free-list is full, then this page will become a ** new free-list trunk page. Otherwise, it will become a leaf of the ** first trunk page in the current free-list. This block tests if it ** is possible to add the page as a new free-list leaf. */ if( nFree!=0 ){ u32 nLeaf; /* Initial number of leaf cells on trunk page */ iTrunk = get4byte(&pPage1->aData[32]); rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); if( rc!=SQLITE_OK ){ goto freepage_out; } nLeaf = get4byte(&pTrunk->aData[4]); assert( pBt->usableSize>32 ); if( nLeaf > (u32)pBt->usableSize/4 - 2 ){ rc = SQLITE_CORRUPT_BKPT; goto freepage_out; } if( nLeaf < (u32)pBt->usableSize/4 - 8 ){ /* In this case there is room on the trunk page to insert the page ** being freed as a new leaf. ** ** Note that the trunk page is not really full until it contains ** usableSize/4 - 2 entries, not usableSize/4 - 8 entries as we have ** coded. But due to a coding error in versions of SQLite prior to ** 3.6.0, databases with freelist trunk pages holding more than ** usableSize/4 - 8 entries will be reported as corrupt. In order ** to maintain backwards compatibility with older versions of SQLite, ** we will continue to restrict the number of entries to usableSize/4 - 8 ** for now. At some point in the future (once everyone has upgraded ** to 3.6.0 or later) we should consider fixing the conditional above ** to read "usableSize/4-2" instead of "usableSize/4-8". ** ** EVIDENCE-OF: R-19920-11576 However, newer versions of SQLite still ** avoid using the last six entries in the freelist trunk page array in ** order that database files created by newer versions of SQLite can be ** read by older versions of SQLite. */ rc = sqlite3PagerWrite(pTrunk->pDbPage); if( rc==SQLITE_OK ){ put4byte(&pTrunk->aData[4], nLeaf+1); put4byte(&pTrunk->aData[8+nLeaf*4], iPage); if( pPage && (pBt->btsFlags & BTS_SECURE_DELETE)==0 ){ sqlite3PagerDontWrite(pPage->pDbPage); } rc = btreeSetHasContent(pBt, iPage); } TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno)); goto freepage_out; } } /* If control flows to this point, then it was not possible to add the ** the page being freed as a leaf page of the first trunk in the free-list. ** Possibly because the free-list is empty, or possibly because the ** first trunk in the free-list is full. Either way, the page being freed ** will become the new first trunk page in the free-list. */ if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ goto freepage_out; } rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ goto freepage_out; } put4byte(pPage->aData, iTrunk); put4byte(&pPage->aData[4], 0); put4byte(&pPage1->aData[32], iPage); TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk)); freepage_out: if( pPage ){ pPage->isInit = 0; } releasePage(pPage); releasePage(pTrunk); return rc; } static void freePage(MemPage *pPage, int *pRC){ if( (*pRC)==SQLITE_OK ){ *pRC = freePage2(pPage->pBt, pPage, pPage->pgno); } } /* ** Free any overflow pages associated with the given Cell. Write the ** local Cell size (the number of bytes on the original page, omitting ** overflow) into *pnSize. */ static int clearCell( MemPage *pPage, /* The page that contains the Cell */ unsigned char *pCell, /* First byte of the Cell */ u16 *pnSize /* Write the size of the Cell here */ ){ BtShared *pBt = pPage->pBt; CellInfo info; Pgno ovflPgno; int rc; int nOvfl; u32 ovflPageSize; assert( sqlite3_mutex_held(pPage->pBt->mutex) ); pPage->xParseCell(pPage, pCell, &info); *pnSize = info.nSize; if( info.iOverflow==0 ){ return SQLITE_OK; /* No overflow pages. Return without doing anything */ } if( pCell+info.iOverflow+3 > pPage->aData+pPage->maskPage ){ return SQLITE_CORRUPT_BKPT; /* Cell extends past end of page */ } ovflPgno = get4byte(&pCell[info.iOverflow]); assert( pBt->usableSize > 4 ); ovflPageSize = pBt->usableSize - 4; nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; assert( nOvfl>0 || (CORRUPT_DB && (info.nPayload + ovflPageSize)btreePagecount(pBt) ){ /* 0 is not a legal page number and page 1 cannot be an ** overflow page. Therefore if ovflPgno<2 or past the end of the ** file the database must be corrupt. */ return SQLITE_CORRUPT_BKPT; } if( nOvfl ){ rc = getOverflowPage(pBt, ovflPgno, &pOvfl, &iNext); if( rc ) return rc; } if( ( pOvfl || ((pOvfl = btreePageLookup(pBt, ovflPgno))!=0) ) && sqlite3PagerPageRefcount(pOvfl->pDbPage)!=1 ){ /* There is no reason any cursor should have an outstanding reference ** to an overflow page belonging to a cell that is being deleted/updated. ** So if there exists more than one reference to this page, then it ** must not really be an overflow page and the database must be corrupt. ** It is helpful to detect this before calling freePage2(), as ** freePage2() may zero the page contents if secure-delete mode is ** enabled. If this 'overflow' page happens to be a page that the ** caller is iterating through or using in some other way, this ** can be problematic. */ rc = SQLITE_CORRUPT_BKPT; }else{ rc = freePage2(pBt, pOvfl, ovflPgno); } if( pOvfl ){ sqlite3PagerUnref(pOvfl->pDbPage); } if( rc ) return rc; ovflPgno = iNext; } return SQLITE_OK; } /* ** Create the byte sequence used to represent a cell on page pPage ** and write that byte sequence into pCell[]. Overflow pages are ** allocated and filled in as necessary. The calling procedure ** is responsible for making sure sufficient space has been allocated ** for pCell[]. ** ** Note that pCell does not necessary need to point to the pPage->aData ** area. pCell might point to some temporary storage. The cell will ** be constructed in this temporary area then copied into pPage->aData ** later. */ static int fillInCell( MemPage *pPage, /* The page that contains the cell */ unsigned char *pCell, /* Complete text of the cell */ const void *pKey, i64 nKey, /* The key */ const void *pData,int nData, /* The data */ int nZero, /* Extra zero bytes to append to pData */ int *pnSize /* Write cell size here */ ){ int nPayload; const u8 *pSrc; int nSrc, n, rc; int spaceLeft; MemPage *pOvfl = 0; MemPage *pToRelease = 0; unsigned char *pPrior; unsigned char *pPayload; BtShared *pBt = pPage->pBt; Pgno pgnoOvfl = 0; int nHeader; assert( sqlite3_mutex_held(pPage->pBt->mutex) ); /* pPage is not necessarily writeable since pCell might be auxiliary ** buffer space that is separate from the pPage buffer area */ assert( pCellaData || pCell>=&pPage->aData[pBt->pageSize] || sqlite3PagerIswriteable(pPage->pDbPage) ); /* Fill in the header. */ nHeader = pPage->childPtrSize; nPayload = nData + nZero; if( pPage->intKeyLeaf ){ nHeader += putVarint32(&pCell[nHeader], nPayload); }else{ assert( nData==0 ); assert( nZero==0 ); } nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); /* Fill in the payload size */ if( pPage->intKey ){ pSrc = pData; nSrc = nData; nData = 0; }else{ assert( nKey<=0x7fffffff && pKey!=0 ); nPayload = (int)nKey; pSrc = pKey; nSrc = (int)nKey; } if( nPayload<=pPage->maxLocal ){ n = nHeader + nPayload; testcase( n==3 ); testcase( n==4 ); if( n<4 ) n = 4; *pnSize = n; spaceLeft = nPayload; pPrior = pCell; }else{ int mn = pPage->minLocal; n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4); testcase( n==pPage->maxLocal ); testcase( n==pPage->maxLocal+1 ); if( n > pPage->maxLocal ) n = mn; spaceLeft = n; *pnSize = n + nHeader + 4; pPrior = &pCell[nHeader+n]; } pPayload = &pCell[nHeader]; /* At this point variables should be set as follows: ** ** nPayload Total payload size in bytes ** pPayload Begin writing payload here ** spaceLeft Space available at pPayload. If nPayload>spaceLeft, ** that means content must spill into overflow pages. ** *pnSize Size of the local cell (not counting overflow pages) ** pPrior Where to write the pgno of the first overflow page ** ** Use a call to btreeParseCellPtr() to verify that the values above ** were computed correctly. */ #if SQLITE_DEBUG { CellInfo info; pPage->xParseCell(pPage, pCell, &info); assert( nHeader=(int)(info.pPayload - pCell) ); assert( info.nKey==nKey ); assert( *pnSize == info.nSize ); assert( spaceLeft == info.nLocal ); assert( pPrior == &pCell[info.iOverflow] ); } #endif /* Write the payload into the local Cell and any extra into overflow pages */ while( nPayload>0 ){ if( spaceLeft==0 ){ #ifndef SQLITE_OMIT_AUTOVACUUM Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */ if( pBt->autoVacuum ){ do{ pgnoOvfl++; } while( PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) ); } #endif rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0); #ifndef SQLITE_OMIT_AUTOVACUUM /* If the database supports auto-vacuum, and the second or subsequent ** overflow page is being allocated, add an entry to the pointer-map ** for that page now. ** ** If this is the first overflow page, then write a partial entry ** to the pointer-map. If we write nothing to this pointer-map slot, ** then the optimistic overflow chain processing in clearCell() ** may misinterpret the uninitialized values and delete the ** wrong pages from the database. */ if( pBt->autoVacuum && rc==SQLITE_OK ){ u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc); if( rc ){ releasePage(pOvfl); } } #endif if( rc ){ releasePage(pToRelease); return rc; } /* If pToRelease is not zero than pPrior points into the data area ** of pToRelease. Make sure pToRelease is still writeable. */ assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); /* If pPrior is part of the data area of pPage, then make sure pPage ** is still writeable */ assert( pPrioraData || pPrior>=&pPage->aData[pBt->pageSize] || sqlite3PagerIswriteable(pPage->pDbPage) ); put4byte(pPrior, pgnoOvfl); releasePage(pToRelease); pToRelease = pOvfl; pPrior = pOvfl->aData; put4byte(pPrior, 0); pPayload = &pOvfl->aData[4]; spaceLeft = pBt->usableSize - 4; } n = nPayload; if( n>spaceLeft ) n = spaceLeft; /* If pToRelease is not zero than pPayload points into the data area ** of pToRelease. Make sure pToRelease is still writeable. */ assert( pToRelease==0 || sqlite3PagerIswriteable(pToRelease->pDbPage) ); /* If pPayload is part of the data area of pPage, then make sure pPage ** is still writeable */ assert( pPayloadaData || pPayload>=&pPage->aData[pBt->pageSize] || sqlite3PagerIswriteable(pPage->pDbPage) ); if( nSrc>0 ){ if( n>nSrc ) n = nSrc; assert( pSrc ); memcpy(pPayload, pSrc, n); }else{ memset(pPayload, 0, n); } nPayload -= n; pPayload += n; pSrc += n; nSrc -= n; spaceLeft -= n; if( nSrc==0 ){ nSrc = nData; pSrc = pData; } } releasePage(pToRelease); return SQLITE_OK; } /* ** Remove the i-th cell from pPage. This routine effects pPage only. ** The cell content is not freed or deallocated. It is assumed that ** the cell content has been copied someplace else. This routine just ** removes the reference to the cell from pPage. ** ** "sz" must be the number of bytes in the cell. */ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ u32 pc; /* Offset to cell content of cell being deleted */ u8 *data; /* pPage->aData */ u8 *ptr; /* Used to move bytes around within data[] */ int rc; /* The return code */ int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ if( *pRC ) return; assert( idx>=0 && idxnCell ); assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); data = pPage->aData; ptr = &pPage->aCellIdx[2*idx]; pc = get2byte(ptr); hdr = pPage->hdrOffset; testcase( pc==get2byte(&data[hdr+5]) ); testcase( pc+sz==pPage->pBt->usableSize ); if( pc < (u32)get2byte(&data[hdr+5]) || pc+sz > pPage->pBt->usableSize ){ *pRC = SQLITE_CORRUPT_BKPT; return; } rc = freeSpace(pPage, pc, sz); if( rc ){ *pRC = rc; return; } pPage->nCell--; if( pPage->nCell==0 ){ memset(&data[hdr+1], 0, 4); data[hdr+7] = 0; put2byte(&data[hdr+5], pPage->pBt->usableSize); pPage->nFree = pPage->pBt->usableSize - pPage->hdrOffset - pPage->childPtrSize - 8; }else{ memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); put2byte(&data[hdr+3], pPage->nCell); pPage->nFree += 2; } } /* ** Insert a new cell on pPage at cell index "i". pCell points to the ** content of the cell. ** ** If the cell content will fit on the page, then put it there. If it ** will not fit, then make a copy of the cell content into pTemp if ** pTemp is not null. Regardless of pTemp, allocate a new entry ** in pPage->apOvfl[] and make it point to the cell content (either ** in pTemp or the original pCell) and also record its index. ** Allocating a new entry in pPage->aCell[] implies that ** pPage->nOverflow is incremented. */ static void insertCell( MemPage *pPage, /* Page into which we are copying */ int i, /* New cell becomes the i-th cell of the page */ u8 *pCell, /* Content of the new cell */ int sz, /* Bytes of content in pCell */ u8 *pTemp, /* Temp storage space for pCell, if needed */ Pgno iChild, /* If non-zero, replace first 4 bytes with this value */ int *pRC /* Read and write return code from here */ ){ int idx = 0; /* Where to write new cell content in data[] */ int j; /* Loop counter */ u8 *data; /* The content of the whole page */ u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ if( *pRC ) return; assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); assert( MX_CELL(pPage->pBt)<=10921 ); assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); /* The cell should normally be sized correctly. However, when moving a ** malformed cell from a leaf page to an interior page, if the cell size ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size ** might be less than 8 (leaf-size + pointer) on the interior node. Hence ** the term after the || in the following assert(). */ assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) ); if( pPage->nOverflow || sz+2>pPage->nFree ){ if( pTemp ){ memcpy(pTemp, pCell, sz); pCell = pTemp; } if( iChild ){ put4byte(pCell, iChild); } j = pPage->nOverflow++; assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); pPage->apOvfl[j] = pCell; pPage->aiOvfl[j] = (u16)i; /* When multiple overflows occur, they are always sequential and in ** sorted order. This invariants arise because multiple overflows can ** only occur when inserting divider cells into the parent page during ** balancing, and the dividers are adjacent and sorted. */ assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ int rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ *pRC = rc; return; } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); data = pPage->aData; assert( &data[pPage->cellOffset]==pPage->aCellIdx ); rc = allocateSpace(pPage, sz, &idx); if( rc ){ *pRC = rc; return; } /* The allocateSpace() routine guarantees the following properties ** if it returns successfully */ assert( idx >= 0 ); assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); assert( idx+sz <= (int)pPage->pBt->usableSize ); pPage->nFree -= (u16)(2 + sz); memcpy(&data[idx], pCell, sz); if( iChild ){ put4byte(&data[idx], iChild); } pIns = pPage->aCellIdx + i*2; memmove(pIns+2, pIns, 2*(pPage->nCell - i)); put2byte(pIns, idx); pPage->nCell++; /* increment the cell count */ if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell ); #ifndef SQLITE_OMIT_AUTOVACUUM if( pPage->pBt->autoVacuum ){ /* The cell may contain a pointer to an overflow page. If so, write ** the entry for the overflow page into the pointer map. */ ptrmapPutOvflPtr(pPage, pCell, pRC); } #endif } } /* ** A CellArray object contains a cache of pointers and sizes for a ** consecutive sequence of cells that might be held multiple pages. */ typedef struct CellArray CellArray; struct CellArray { int nCell; /* Number of cells in apCell[] */ MemPage *pRef; /* Reference page */ u8 **apCell; /* All cells begin balanced */ u16 *szCell; /* Local size of all cells in apCell[] */ }; /* ** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been ** computed. */ static void populateCellCache(CellArray *p, int idx, int N){ assert( idx>=0 && idx+N<=p->nCell ); while( N>0 ){ assert( p->apCell[idx]!=0 ); if( p->szCell[idx]==0 ){ p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); }else{ assert( CORRUPT_DB || p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); } idx++; N--; } } /* ** Return the size of the Nth element of the cell array */ static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ assert( N>=0 && NnCell ); assert( p->szCell[N]==0 ); p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); return p->szCell[N]; } static u16 cachedCellSize(CellArray *p, int N){ assert( N>=0 && NnCell ); if( p->szCell[N] ) return p->szCell[N]; return computeCellSize(p, N); } /* ** Array apCell[] contains pointers to nCell b-tree page cells. The ** szCell[] array contains the size in bytes of each cell. This function ** replaces the current contents of page pPg with the contents of the cell ** array. ** ** Some of the cells in apCell[] may currently be stored in pPg. This ** function works around problems caused by this by making a copy of any ** such cells before overwriting the page data. ** ** The MemPage.nFree field is invalidated by this function. It is the ** responsibility of the caller to set it correctly. */ static int rebuildPage( MemPage *pPg, /* Edit this page */ int nCell, /* Final number of cells on page */ u8 **apCell, /* Array of cells */ u16 *szCell /* Array of cell sizes */ ){ const int hdr = pPg->hdrOffset; /* Offset of header on pPg */ u8 * const aData = pPg->aData; /* Pointer to data for pPg */ const int usableSize = pPg->pBt->usableSize; u8 * const pEnd = &aData[usableSize]; int i; u8 *pCellptr = pPg->aCellIdx; u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); u8 *pData; i = get2byte(&aData[hdr+5]); memcpy(&pTmp[i], &aData[i], usableSize - i); pData = pEnd; for(i=0; iaData && pCellxCellSize(pPg, pCell) || CORRUPT_DB ); testcase( szCell[i]!=pPg->xCellSize(pPg,pCell) ); } /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ pPg->nCell = nCell; pPg->nOverflow = 0; put2byte(&aData[hdr+1], 0); put2byte(&aData[hdr+3], pPg->nCell); put2byte(&aData[hdr+5], pData - aData); aData[hdr+7] = 0x00; return SQLITE_OK; } /* ** Array apCell[] contains nCell pointers to b-tree cells. Array szCell ** contains the size in bytes of each such cell. This function attempts to ** add the cells stored in the array to page pPg. If it cannot (because ** the page needs to be defragmented before the cells will fit), non-zero ** is returned. Otherwise, if the cells are added successfully, zero is ** returned. ** ** Argument pCellptr points to the first entry in the cell-pointer array ** (part of page pPg) to populate. After cell apCell[0] is written to the ** page body, a 16-bit offset is written to pCellptr. And so on, for each ** cell in the array. It is the responsibility of the caller to ensure ** that it is safe to overwrite this part of the cell-pointer array. ** ** When this function is called, *ppData points to the start of the ** content area on page pPg. If the size of the content area is extended, ** *ppData is updated to point to the new start of the content area ** before returning. ** ** Finally, argument pBegin points to the byte immediately following the ** end of the space required by this page for the cell-pointer area (for ** all cells - not just those inserted by the current call). If the content ** area must be extended to before this point in order to accomodate all ** cells in apCell[], then the cells do not fit and non-zero is returned. */ static int pageInsertArray( MemPage *pPg, /* Page to add cells to */ u8 *pBegin, /* End of cell-pointer array */ u8 **ppData, /* IN/OUT: Page content -area pointer */ u8 *pCellptr, /* Pointer to cell-pointer area */ int iFirst, /* Index of first cell to add */ int nCell, /* Number of cells to add to pPg */ CellArray *pCArray /* Array of cells */ ){ int i; u8 *aData = pPg->aData; u8 *pData = *ppData; int iEnd = iFirst + nCell; assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ for(i=iFirst; iapCell[i] will never overlap on a well-formed ** database. But they might for a corrupt database. Hence use memmove() ** since memcpy() sends SIGABORT with overlapping buffers on OpenBSD */ assert( (pSlot+sz)<=pCArray->apCell[i] || pSlot>=(pCArray->apCell[i]+sz) || CORRUPT_DB ); memmove(pSlot, pCArray->apCell[i], sz); put2byte(pCellptr, (pSlot - aData)); pCellptr += 2; } *ppData = pData; return 0; } /* ** Array apCell[] contains nCell pointers to b-tree cells. Array szCell ** contains the size in bytes of each such cell. This function adds the ** space associated with each cell in the array that is currently stored ** within the body of pPg to the pPg free-list. The cell-pointers and other ** fields of the page are not updated. ** ** This function returns the total number of cells added to the free-list. */ static int pageFreeArray( MemPage *pPg, /* Page to edit */ int iFirst, /* First cell to delete */ int nCell, /* Cells to delete */ CellArray *pCArray /* Array of cells */ ){ u8 * const aData = pPg->aData; u8 * const pEnd = &aData[pPg->pBt->usableSize]; u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize]; int nRet = 0; int i; int iEnd = iFirst + nCell; u8 *pFree = 0; int szFree = 0; for(i=iFirst; iapCell[i]; if( pCell>=pStart && pCellszCell[i]; assert( sz>0 ); if( pFree!=(pCell + sz) ){ if( pFree ){ assert( pFree>aData && (pFree - aData)<65536 ); freeSpace(pPg, (u16)(pFree - aData), szFree); } pFree = pCell; szFree = sz; if( pFree+sz>pEnd ) return 0; }else{ pFree = pCell; szFree += sz; } nRet++; } } if( pFree ){ assert( pFree>aData && (pFree - aData)<65536 ); freeSpace(pPg, (u16)(pFree - aData), szFree); } return nRet; } /* ** apCell[] and szCell[] contains pointers to and sizes of all cells in the ** pages being balanced. The current page, pPg, has pPg->nCell cells starting ** with apCell[iOld]. After balancing, this page should hold nNew cells ** starting at apCell[iNew]. ** ** This routine makes the necessary adjustments to pPg so that it contains ** the correct cells after being balanced. ** ** The pPg->nFree field is invalid when this function returns. It is the ** responsibility of the caller to set it correctly. */ static int editPage( MemPage *pPg, /* Edit this page */ int iOld, /* Index of first cell currently on page */ int iNew, /* Index of new first cell on page */ int nNew, /* Final number of cells on page */ CellArray *pCArray /* Array of cells and sizes */ ){ u8 * const aData = pPg->aData; const int hdr = pPg->hdrOffset; u8 *pBegin = &pPg->aCellIdx[nNew * 2]; int nCell = pPg->nCell; /* Cells stored on pPg */ u8 *pData; u8 *pCellptr; int i; int iOldEnd = iOld + pPg->nCell + pPg->nOverflow; int iNewEnd = iNew + nNew; #ifdef SQLITE_DEBUG u8 *pTmp = sqlite3PagerTempSpace(pPg->pBt->pPager); memcpy(pTmp, aData, pPg->pBt->usableSize); #endif /* Remove cells from the start and end of the page */ if( iOldaCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); nCell -= nShift; } if( iNewEnd < iOldEnd ){ nCell -= pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray); } pData = &aData[get2byteNotZero(&aData[hdr+5])]; if( pDataaCellIdx; memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); if( pageInsertArray( pPg, pBegin, &pData, pCellptr, iNew, nAdd, pCArray ) ) goto editpage_fail; nCell += nAdd; } /* Add any overflow cells */ for(i=0; inOverflow; i++){ int iCell = (iOld + pPg->aiOvfl[i]) - iNew; if( iCell>=0 && iCellaCellIdx[iCell * 2]; memmove(&pCellptr[2], pCellptr, (nCell - iCell) * 2); nCell++; if( pageInsertArray( pPg, pBegin, &pData, pCellptr, iCell+iNew, 1, pCArray ) ) goto editpage_fail; } } /* Append cells to the end of the page */ pCellptr = &pPg->aCellIdx[nCell*2]; if( pageInsertArray( pPg, pBegin, &pData, pCellptr, iNew+nCell, nNew-nCell, pCArray ) ) goto editpage_fail; pPg->nCell = nNew; pPg->nOverflow = 0; put2byte(&aData[hdr+3], pPg->nCell); put2byte(&aData[hdr+5], pData - aData); #ifdef SQLITE_DEBUG for(i=0; iapCell[i+iNew]; int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } assert( 0==memcmp(pCell, &aData[iOff], pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); } #endif return SQLITE_OK; editpage_fail: /* Unable to edit this page. Rebuild it from scratch instead. */ populateCellCache(pCArray, iNew, nNew); return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]); } /* ** The following parameters determine how many adjacent pages get involved ** in a balancing operation. NN is the number of neighbors on either side ** of the page that participate in the balancing operation. NB is the ** total number of pages that participate, including the target page and ** NN neighbors on either side. ** ** The minimum value of NN is 1 (of course). Increasing NN above 1 ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance ** in exchange for a larger degradation in INSERT and UPDATE performance. ** The value of NN appears to give the best results overall. */ #define NN 1 /* Number of neighbors on either side of pPage */ #define NB (NN*2+1) /* Total pages involved in the balance */ #ifndef SQLITE_OMIT_QUICKBALANCE /* ** This version of balance() handles the common special case where ** a new entry is being inserted on the extreme right-end of the ** tree, in other words, when the new entry will become the largest ** entry in the tree. ** ** Instead of trying to balance the 3 right-most leaf pages, just add ** a new page to the right-hand side and put the one new entry in ** that page. This leaves the right side of the tree somewhat ** unbalanced. But odds are that we will be inserting new entries ** at the end soon afterwards so the nearly empty page will quickly ** fill up. On average. ** ** pPage is the leaf page which is the right-most page in the tree. ** pParent is its parent. pPage must have a single overflow entry ** which is also the right-most entry on the page. ** ** The pSpace buffer is used to store a temporary copy of the divider ** cell that will be inserted into pParent. Such a cell consists of a 4 ** byte page number followed by a variable length integer. In other ** words, at most 13 bytes. Hence the pSpace buffer must be at ** least 13 bytes in size. */ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ BtShared *const pBt = pPage->pBt; /* B-Tree Database */ MemPage *pNew; /* Newly allocated page */ int rc; /* Return Code */ Pgno pgnoNew; /* Page number of pNew */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); assert( pPage->nOverflow==1 ); /* This error condition is now caught prior to reaching this function */ if( NEVER(pPage->nCell==0) ) return SQLITE_CORRUPT_BKPT; /* Allocate a new page. This page will become the right-sibling of ** pPage. Make the parent page writable, so that the new divider cell ** may be inserted. If both these operations are successful, proceed. */ rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0); if( rc==SQLITE_OK ){ u8 *pOut = &pSpace[4]; u8 *pCell = pPage->apOvfl[0]; u16 szCell = pPage->xCellSize(pPage, pCell); u8 *pStop; assert( sqlite3PagerIswriteable(pNew->pDbPage) ); assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); rc = rebuildPage(pNew, 1, &pCell, &szCell); if( NEVER(rc) ) return rc; pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; /* If this is an auto-vacuum database, update the pointer map ** with entries for the new page, and any pointer from the ** cell on the page to an overflow page. If either of these ** operations fails, the return code is set, but the contents ** of the parent page are still manipulated by thh code below. ** That is Ok, at this point the parent page is guaranteed to ** be marked as dirty. Returning an error code will cause a ** rollback, undoing any changes made to the parent page. */ if( ISAUTOVACUUM ){ ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc); if( szCell>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); } } /* Create a divider cell to insert into pParent. The divider cell ** consists of a 4-byte page number (the page number of pPage) and ** a variable length key value (which must be the same value as the ** largest key on pPage). ** ** To find the largest key value on pPage, first find the right-most ** cell on pPage. The first two fields of this cell are the ** record-length (a variable length integer at most 32-bits in size) ** and the key value (a variable length integer, may have any value). ** The first of the while(...) loops below skips over the record-length ** field. The second while(...) loop copies the key value from the ** cell on pPage into the pSpace buffer. */ pCell = findCell(pPage, pPage->nCell-1); pStop = &pCell[9]; while( (*(pCell++)&0x80) && pCellnCell, pSpace, (int)(pOut-pSpace), 0, pPage->pgno, &rc); /* Set the right-child pointer of pParent to point to the new page. */ put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew); /* Release the reference to the new page. */ releasePage(pNew); } return rc; } #endif /* SQLITE_OMIT_QUICKBALANCE */ #if 0 /* ** This function does not contribute anything to the operation of SQLite. ** it is sometimes activated temporarily while debugging code responsible ** for setting pointer-map entries. */ static int ptrmapCheckPages(MemPage **apPage, int nPage){ int i, j; for(i=0; ipBt; assert( pPage->isInit ); for(j=0; jnCell; j++){ CellInfo info; u8 *z; z = findCell(pPage, j); pPage->xParseCell(pPage, z, &info); if( info.iOverflow ){ Pgno ovfl = get4byte(&z[info.iOverflow]); ptrmapGet(pBt, ovfl, &e, &n); assert( n==pPage->pgno && e==PTRMAP_OVERFLOW1 ); } if( !pPage->leaf ){ Pgno child = get4byte(z); ptrmapGet(pBt, child, &e, &n); assert( n==pPage->pgno && e==PTRMAP_BTREE ); } } if( !pPage->leaf ){ Pgno child = get4byte(&pPage->aData[pPage->hdrOffset+8]); ptrmapGet(pBt, child, &e, &n); assert( n==pPage->pgno && e==PTRMAP_BTREE ); } } return 1; } #endif /* ** This function is used to copy the contents of the b-tree node stored ** on page pFrom to page pTo. If page pFrom was not a leaf page, then ** the pointer-map entries for each child page are updated so that the ** parent page stored in the pointer map is page pTo. If pFrom contained ** any cells with overflow page pointers, then the corresponding pointer ** map entries are also updated so that the parent page is page pTo. ** ** If pFrom is currently carrying any overflow cells (entries in the ** MemPage.apOvfl[] array), they are not copied to pTo. ** ** Before returning, page pTo is reinitialized using btreeInitPage(). ** ** The performance of this function is not critical. It is only used by ** the balance_shallower() and balance_deeper() procedures, neither of ** which are called often under normal circumstances. */ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){ if( (*pRC)==SQLITE_OK ){ BtShared * const pBt = pFrom->pBt; u8 * const aFrom = pFrom->aData; u8 * const aTo = pTo->aData; int const iFromHdr = pFrom->hdrOffset; int const iToHdr = ((pTo->pgno==1) ? 100 : 0); int rc; int iData; assert( pFrom->isInit ); assert( pFrom->nFree>=iToHdr ); assert( get2byte(&aFrom[iFromHdr+5]) <= (int)pBt->usableSize ); /* Copy the b-tree node content from page pFrom to page pTo. */ iData = get2byte(&aFrom[iFromHdr+5]); memcpy(&aTo[iData], &aFrom[iData], pBt->usableSize-iData); memcpy(&aTo[iToHdr], &aFrom[iFromHdr], pFrom->cellOffset + 2*pFrom->nCell); /* Reinitialize page pTo so that the contents of the MemPage structure ** match the new data. The initialization of pTo can actually fail under ** fairly obscure circumstances, even though it is a copy of initialized ** page pFrom. */ pTo->isInit = 0; rc = btreeInitPage(pTo); if( rc!=SQLITE_OK ){ *pRC = rc; return; } /* If this is an auto-vacuum database, update the pointer-map entries ** for any b-tree or overflow pages that pTo now contains the pointers to. */ if( ISAUTOVACUUM ){ *pRC = setChildPtrmaps(pTo); } } } /* ** This routine redistributes cells on the iParentIdx'th child of pParent ** (hereafter "the page") and up to 2 siblings so that all pages have about the ** same amount of free space. Usually a single sibling on either side of the ** page are used in the balancing, though both siblings might come from one ** side if the page is the first or last child of its parent. If the page ** has fewer than 2 siblings (something which can only happen if the page ** is a root page or a child of a root page) then all available siblings ** participate in the balancing. ** ** The number of siblings of the page might be increased or decreased by ** one or two in an effort to keep pages nearly full but not over full. ** ** Note that when this routine is called, some of the cells on the page ** might not actually be stored in MemPage.aData[]. This can happen ** if the page is overfull. This routine ensures that all cells allocated ** to the page and its siblings fit into MemPage.aData[] before returning. ** ** In the course of balancing the page and its siblings, cells may be ** inserted into or removed from the parent page (pParent). Doing so ** may cause the parent page to become overfull or underfull. If this ** happens, it is the responsibility of the caller to invoke the correct ** balancing routine to fix this problem (see the balance() routine). ** ** If this routine fails for any reason, it might leave the database ** in a corrupted state. So if this routine fails, the database should ** be rolled back. ** ** The third argument to this function, aOvflSpace, is a pointer to a ** buffer big enough to hold one page. If while inserting cells into the parent ** page (pParent) the parent page becomes overfull, this buffer is ** used to store the parent's overflow cells. Because this function inserts ** a maximum of four divider cells into the parent page, and the maximum ** size of a cell stored within an internal node is always less than 1/4 ** of the page-size, the aOvflSpace[] buffer is guaranteed to be large ** enough for all overflow cells. ** ** If aOvflSpace is set to a null pointer, this function returns ** SQLITE_NOMEM. */ #if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) #pragma optimize("", off) #endif static int balance_nonroot( MemPage *pParent, /* Parent page of siblings being balanced */ int iParentIdx, /* Index of "the page" in pParent */ u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ int isRoot, /* True if pParent is a root-page */ int bBulk /* True if this call is part of a bulk load */ ){ BtShared *pBt; /* The whole database */ int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ int nNew = 0; /* Number of pages in apNew[] */ int nOld; /* Number of pages in apOld[] */ int i, j, k; /* Loop counters */ int nxDiv; /* Next divider slot in pParent->aCell[] */ int rc = SQLITE_OK; /* The return code */ u16 leafCorrection; /* 4 if pPage is a leaf. 0 if not */ int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ int iSpace1 = 0; /* First unused byte of aSpace1[] */ int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ int szScratch; /* Size of scratch memory requested */ MemPage *apOld[NB]; /* pPage and up to two siblings */ MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ u8 *pRight; /* Location in parent of right-sibling pointer */ u8 *apDiv[NB-1]; /* Divider cells in pParent */ int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ int cntOld[NB+2]; /* Old index in b.apCell[] */ int szNew[NB+2]; /* Combined size of cells placed on i-th page */ u8 *aSpace1; /* Space for copies of dividers cells */ Pgno pgno; /* Temp var to store a page number in */ u8 abDone[NB+2]; /* True after i'th new page is populated */ Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ CellArray b; /* Parsed information on cells being balanced */ memset(abDone, 0, sizeof(abDone)); b.nCell = 0; b.apCell = 0; pBt = pParent->pBt; assert( sqlite3_mutex_held(pBt->mutex) ); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); #if 0 TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno)); #endif /* At this point pParent may have at most one overflow cell. And if ** this overflow cell is present, it must be the cell with ** index iParentIdx. This scenario comes about when this function ** is called (indirectly) from sqlite3BtreeDelete(). */ assert( pParent->nOverflow==0 || pParent->nOverflow==1 ); assert( pParent->nOverflow==0 || pParent->aiOvfl[0]==iParentIdx ); if( !aOvflSpace ){ return SQLITE_NOMEM; } /* Find the sibling pages to balance. Also locate the cells in pParent ** that divide the siblings. An attempt is made to find NN siblings on ** either side of pPage. More siblings are taken from one side, however, ** if there are fewer than NN siblings on the other side. If pParent ** has NB or fewer children then all children of pParent are taken. ** ** This loop also drops the divider cells from the parent page. This ** way, the remainder of the function does not have to deal with any ** overflow cells in the parent page, since if any existed they will ** have already been removed. */ i = pParent->nOverflow + pParent->nCell; if( i<2 ){ nxDiv = 0; }else{ assert( bBulk==0 || bBulk==1 ); if( iParentIdx==0 ){ nxDiv = 0; }else if( iParentIdx==i ){ nxDiv = i-2+bBulk; }else{ nxDiv = iParentIdx-1; } i = 2-bBulk; } nOld = i+1; if( (i+nxDiv-pParent->nOverflow)==pParent->nCell ){ pRight = &pParent->aData[pParent->hdrOffset+8]; }else{ pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); } pgno = get4byte(pRight); while( 1 ){ rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; } nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; if( (i--)==0 ) break; if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ apDiv[i] = pParent->apOvfl[0]; pgno = get4byte(apDiv[i]); szNew[i] = pParent->xCellSize(pParent, apDiv[i]); pParent->nOverflow = 0; }else{ apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); pgno = get4byte(apDiv[i]); szNew[i] = pParent->xCellSize(pParent, apDiv[i]); /* Drop the cell from the parent page. apDiv[i] still points to ** the cell within the parent, even though it has been dropped. ** This is safe because dropping a cell only overwrites the first ** four bytes of it, and this function does not need the first ** four bytes of the divider cell. So the pointer is safe to use ** later on. ** ** But not if we are in secure-delete mode. In secure-delete mode, ** the dropCell() routine will overwrite the entire cell with zeroes. ** In this case, temporarily copy the cell into the aOvflSpace[] ** buffer. It will be copied out again as soon as the aSpace[] buffer ** is allocated. */ if( pBt->btsFlags & BTS_SECURE_DELETE ){ int iOff; iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData); if( (iOff+szNew[i])>(int)pBt->usableSize ){ rc = SQLITE_CORRUPT_BKPT; memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; }else{ memcpy(&aOvflSpace[iOff], apDiv[i], szNew[i]); apDiv[i] = &aOvflSpace[apDiv[i]-pParent->aData]; } } dropCell(pParent, i+nxDiv-pParent->nOverflow, szNew[i], &rc); } } /* Make nMaxCells a multiple of 4 in order to preserve 8-byte ** alignment */ nMaxCells = (nMaxCells + 3)&~3; /* ** Allocate space for memory structures */ szScratch = nMaxCells*sizeof(u8*) /* b.apCell */ + nMaxCells*sizeof(u16) /* b.szCell */ + pBt->pageSize; /* aSpace1 */ /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer ** that is more than 6 times the database page size. */ assert( szScratch<=6*(int)pBt->pageSize ); b.apCell = sqlite3ScratchMalloc( szScratch ); if( b.apCell==0 ){ rc = SQLITE_NOMEM; goto balance_cleanup; } b.szCell = (u16*)&b.apCell[nMaxCells]; aSpace1 = (u8*)&b.szCell[nMaxCells]; assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); /* ** Load pointers to all cells on sibling pages and the divider cells ** into the local b.apCell[] array. Make copies of the divider cells ** into space obtained from aSpace1[]. The divider cells have already ** been removed from pParent. ** ** If the siblings are on leaf pages, then the child pointers of the ** divider cells are stripped from the cells before they are copied ** into aSpace1[]. In this way, all cells in b.apCell[] are without ** child pointers. If siblings are not leaves, then all cell in ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] ** are alike. ** ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. ** leafData: 1 if pPage holds key+data and pParent holds only keys. */ b.pRef = apOld[0]; leafCorrection = b.pRef->leaf*4; leafData = b.pRef->intKeyLeaf; for(i=0; inCell; u8 *aData = pOld->aData; u16 maskPage = pOld->maskPage; u8 *piCell = aData + pOld->cellOffset; u8 *piEnd; /* Verify that all sibling pages are of the same "type" (table-leaf, ** table-interior, index-leaf, or index-interior). */ if( pOld->aData[0]!=apOld[0]->aData[0] ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } /* Load b.apCell[] with pointers to all cells in pOld. If pOld ** constains overflow cells, include them in the b.apCell[] array ** in the correct spot. ** ** Note that when there are multiple overflow cells, it is always the ** case that they are sequential and adjacent. This invariant arises ** because multiple overflows can only occurs when inserting divider ** cells into a parent on a prior balance, and divider cells are always ** adjacent and are inserted in order. There is an assert() tagged ** with "NOTE 1" in the overflow cell insertion loop to prove this ** invariant. ** ** This must be done in advance. Once the balance starts, the cell ** offset section of the btree page will be overwritten and we will no ** long be able to find the cells if a pointer to each cell is not saved ** first. */ memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit); if( pOld->nOverflow>0 ){ memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow); limit = pOld->aiOvfl[0]; for(j=0; jnOverflow; k++){ assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ b.apCell[b.nCell] = pOld->apOvfl[k]; b.nCell++; } } piEnd = aData + pOld->cellOffset + 2*pOld->nCell; while( piCellmaxLocal+23 ); assert( iSpace1 <= (int)pBt->pageSize ); memcpy(pTemp, apDiv[i], sz); b.apCell[b.nCell] = pTemp+leafCorrection; assert( leafCorrection==0 || leafCorrection==4 ); b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; if( !pOld->leaf ){ assert( leafCorrection==0 ); assert( pOld->hdrOffset==0 ); /* The right pointer of the child page pOld becomes the left ** pointer of the divider cell */ memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); }else{ assert( leafCorrection==4 ); while( b.szCell[b.nCell]<4 ){ /* Do not allow any cells smaller than 4 bytes. If a smaller cell ** does exist, pad it with 0x00 bytes. */ assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); aSpace1[iSpace1++] = 0x00; b.szCell[b.nCell]++; } } b.nCell++; } } /* ** Figure out the number of pages needed to hold all b.nCell cells. ** Store this number in "k". Also compute szNew[] which is the total ** size of all cells on the i-th page and cntNew[] which is the index ** in b.apCell[] of the cell that divides page i from page i+1. ** cntNew[k] should equal b.nCell. ** ** Values computed by this block: ** ** k: The total number of sibling pages ** szNew[i]: Spaced used on the i-th sibling page. ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to ** the right of the i-th sibling page. ** usableSpace: Number of bytes of space available on each sibling. ** */ usableSpace = pBt->usableSize - 12 + leafCorrection; for(i=0; inFree; if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } for(j=0; jnOverflow; j++){ szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); } cntNew[i] = cntOld[i]; } k = nOld; for(i=0; iusableSpace ){ if( i+1>=k ){ k = i+2; if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } szNew[k-1] = 0; cntNew[k-1] = b.nCell; } sz = 2 + cachedCellSize(&b, cntNew[i]-1); szNew[i] -= sz; if( !leafData ){ if( cntNew[i]usableSpace ) break; szNew[i] += sz; cntNew[i]++; if( !leafData ){ if( cntNew[i]=b.nCell ){ k = i+1; }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } } /* ** The packing computed by the previous block is biased toward the siblings ** on the left side (siblings with smaller keys). The left siblings are ** always nearly full, while the right-most sibling might be nearly empty. ** The next block of code attempts to adjust the packing of siblings to ** get a better balance. ** ** This adjustment is more than an optimization. The packing above might ** be so out of balance as to be illegal. For example, the right-most ** sibling might be completely empty. This adjustment is not optional. */ for(i=k-1; i>0; i--){ int szRight = szNew[i]; /* Size of sibling on the right */ int szLeft = szNew[i-1]; /* Size of sibling on the left */ int r; /* Index of right-most cell in left sibling */ int d; /* Index of first cell to the left of right sibling */ r = cntNew[i-1] - 1; d = r + 1 - leafData; (void)cachedCellSize(&b, d); do{ assert( d szLeft-(b.szCell[r]+2)) ){ break; } szRight += b.szCell[d] + 2; szLeft -= b.szCell[r] + 2; cntNew[i-1] = r; r--; d--; }while( r>=0 ); szNew[i] = szRight; szNew[i-1] = szLeft; if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } } /* Sanity check: For a non-corrupt database file one of the follwing ** must be true: ** (1) We found one or more cells (cntNew[0])>0), or ** (2) pPage is a virtual root page. A virtual root page is when ** the real root page is page 1 and we are the only child of ** that page. */ assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) || CORRUPT_DB); TRACE(("BALANCE: old: %d(nc=%d) %d(nc=%d) %d(nc=%d)\n", apOld[0]->pgno, apOld[0]->nCell, nOld>=2 ? apOld[1]->pgno : 0, nOld>=2 ? apOld[1]->nCell : 0, nOld>=3 ? apOld[2]->pgno : 0, nOld>=3 ? apOld[2]->nCell : 0 )); /* ** Allocate k new pages. Reuse old pages where possible. */ pageFlags = apOld[0]->aData[0]; for(i=0; ipDbPage); nNew++; if( rc ) goto balance_cleanup; }else{ assert( i>0 ); rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); if( rc ) goto balance_cleanup; zeroPage(pNew, pageFlags); apNew[i] = pNew; nNew++; cntOld[i] = b.nCell; /* Set the pointer-map entry for the new sibling page. */ if( ISAUTOVACUUM ){ ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); if( rc!=SQLITE_OK ){ goto balance_cleanup; } } } } /* ** Reassign page numbers so that the new pages are in ascending order. ** This helps to keep entries in the disk file in order so that a scan ** of the table is closer to a linear scan through the file. That in turn ** helps the operating system to deliver pages from the disk more rapidly. ** ** An O(n^2) insertion sort algorithm is used, but since n is never more ** than (NB+2) (a small constant), that should not be a problem. ** ** When NB==3, this one optimization makes the database about 25% faster ** for large insertions and deletions. */ for(i=0; ipgno; aPgFlags[i] = apNew[i]->pDbPage->flags; for(j=0; ji ){ sqlite3PagerRekey(apNew[iBest]->pDbPage, pBt->nPage+iBest+1, 0); } sqlite3PagerRekey(apNew[i]->pDbPage, pgno, aPgFlags[iBest]); apNew[i]->pgno = pgno; } } TRACE(("BALANCE: new: %d(%d nc=%d) %d(%d nc=%d) %d(%d nc=%d) " "%d(%d nc=%d) %d(%d nc=%d)\n", apNew[0]->pgno, szNew[0], cntNew[0], nNew>=2 ? apNew[1]->pgno : 0, nNew>=2 ? szNew[1] : 0, nNew>=2 ? cntNew[1] - cntNew[0] - !leafData : 0, nNew>=3 ? apNew[2]->pgno : 0, nNew>=3 ? szNew[2] : 0, nNew>=3 ? cntNew[2] - cntNew[1] - !leafData : 0, nNew>=4 ? apNew[3]->pgno : 0, nNew>=4 ? szNew[3] : 0, nNew>=4 ? cntNew[3] - cntNew[2] - !leafData : 0, nNew>=5 ? apNew[4]->pgno : 0, nNew>=5 ? szNew[4] : 0, nNew>=5 ? cntNew[4] - cntNew[3] - !leafData : 0 )); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); put4byte(pRight, apNew[nNew-1]->pgno); /* If the sibling pages are not leaves, ensure that the right-child pointer ** of the right-most new sibling page is set to the value that was ** originally in the same field of the right-most old sibling page. */ if( (pageFlags & PTF_LEAF)==0 && nOld!=nNew ){ MemPage *pOld = (nNew>nOld ? apNew : apOld)[nOld-1]; memcpy(&apNew[nNew-1]->aData[8], &pOld->aData[8], 4); } /* Make any required updates to pointer map entries associated with ** cells stored on sibling pages following the balance operation. Pointer ** map entries associated with divider cells are set by the insertCell() ** routine. The associated pointer map entries are: ** ** a) if the cell contains a reference to an overflow chain, the ** entry associated with the first page in the overflow chain, and ** ** b) if the sibling pages are not leaves, the child page associated ** with the cell. ** ** If the sibling pages are not leaves, then the pointer map entry ** associated with the right-child of each sibling may also need to be ** updated. This happens below, after the sibling pages have been ** populated, not here. */ if( ISAUTOVACUUM ){ MemPage *pNew = apNew[0]; u8 *aOld = pNew->aData; int cntOldNext = pNew->nCell + pNew->nOverflow; int usableSize = pBt->usableSize; int iNew = 0; int iOld = 0; for(i=0; inCell + pOld->nOverflow + !leafData; aOld = pOld->aData; } if( i==cntNew[iNew] ){ pNew = apNew[++iNew]; if( !leafData ) continue; } /* Cell pCell is destined for new sibling page pNew. Originally, it ** was either part of sibling page iOld (possibly an overflow cell), ** or else the divider cell to the left of sibling page iOld. So, ** if sibling page iOld had the same page number as pNew, and if ** pCell really was a part of sibling page iOld (not a divider or ** overflow cell), we can skip updating the pointer map entries. */ if( iOld>=nNew || pNew->pgno!=aPgno[iOld] || pCell=&aOld[usableSize] ){ if( !leafCorrection ){ ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); } if( cachedCellSize(&b,i)>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); } if( rc ) goto balance_cleanup; } } } /* Insert new divider cells into pParent. */ for(i=0; ileaf ){ memcpy(&pNew->aData[8], pCell, 4); }else if( leafData ){ /* If the tree is a leaf-data tree, and the siblings are leaves, ** then there is no divider cell in b.apCell[]. Instead, the divider ** cell consists of the integer key for the right-most cell of ** the sibling-page assembled above only. */ CellInfo info; j--; pNew->xParseCell(pNew, b.apCell[j], &info); pCell = pTemp; sz = 4 + putVarint(&pCell[4], info.nKey); pTemp = 0; }else{ pCell -= 4; /* Obscure case for non-leaf-data trees: If the cell at pCell was ** previously stored on a leaf node, and its reported size was 4 ** bytes, then it may actually be smaller than this ** (see btreeParseCellPtr(), 4 bytes is the minimum size of ** any cell). But it is important to pass the correct size to ** insertCell(), so reparse the cell now. ** ** Note that this can never happen in an SQLite data file, as all ** cells are at least 4 bytes. It only happens in b-trees used ** to evaluate "IN (SELECT ...)" and similar clauses. */ if( b.szCell[j]==4 ){ assert(leafCorrection==4); sz = pParent->xCellSize(pParent, pCell); } } iOvflSpace += sz; assert( sz<=pBt->maxLocal+23 ); assert( iOvflSpace <= (int)pBt->pageSize ); insertCell(pParent, nxDiv+i, pCell, sz, pTemp, pNew->pgno, &rc); if( rc!=SQLITE_OK ) goto balance_cleanup; assert( sqlite3PagerIswriteable(pParent->pDbPage) ); } /* Now update the actual sibling pages. The order in which they are updated ** is important, as this code needs to avoid disrupting any page from which ** cells may still to be read. In practice, this means: ** ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) ** then it is not safe to update page apNew[iPg] until after ** the left-hand sibling apNew[iPg-1] has been updated. ** ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) ** then it is not safe to update page apNew[iPg] until after ** the right-hand sibling apNew[iPg+1] has been updated. ** ** If neither of the above apply, the page is safe to update. ** ** The iPg value in the following loop starts at nNew-1 goes down ** to 0, then back up to nNew-1 again, thus making two passes over ** the pages. On the initial downward pass, only condition (1) above ** needs to be tested because (2) will always be true from the previous ** step. On the upward pass, both conditions are always true, so the ** upwards pass simply processes pages that were missed on the downward ** pass. */ for(i=1-nNew; i=0 && iPg=0 /* On the upwards pass, or... */ || cntOld[iPg-1]>=cntNew[iPg-1] /* Condition (1) is true */ ){ int iNew; int iOld; int nNewCell; /* Verify condition (1): If cells are moving left, update iPg ** only after iPg-1 has already been updated. */ assert( iPg==0 || cntOld[iPg-1]>=cntNew[iPg-1] || abDone[iPg-1] ); /* Verify condition (2): If cells are moving right, update iPg ** only after iPg+1 has already been updated. */ assert( cntNew[iPg]>=cntOld[iPg] || abDone[iPg+1] ); if( iPg==0 ){ iNew = iOld = 0; nNewCell = cntNew[0]; }else{ iOld = iPgnFree = usableSpace-szNew[iPg]; assert( apNew[iPg]->nOverflow==0 ); assert( apNew[iPg]->nCell==nNewCell ); } } /* All pages have been processed exactly once */ assert( memcmp(abDone, "\01\01\01\01\01", nNew)==0 ); assert( nOld>0 ); assert( nNew>0 ); if( isRoot && pParent->nCell==0 && pParent->hdrOffset<=apNew[0]->nFree ){ /* The root page of the b-tree now contains no cells. The only sibling ** page is the right-child of the parent. Copy the contents of the ** child page into the parent, decreasing the overall height of the ** b-tree structure by one. This is described as the "balance-shallower" ** sub-algorithm in some documentation. ** ** If this is an auto-vacuum database, the call to copyNodeContent() ** sets all pointer-map entries corresponding to database image pages ** for which the pointer is stored within the content being copied. ** ** It is critical that the child page be defragmented before being ** copied into the parent, because if the parent is page 1 then it will ** by smaller than the child due to the database header, and so all the ** free space needs to be up front. */ assert( nNew==1 || CORRUPT_DB ); rc = defragmentPage(apNew[0]); testcase( rc!=SQLITE_OK ); assert( apNew[0]->nFree == (get2byte(&apNew[0]->aData[5])-apNew[0]->cellOffset-apNew[0]->nCell*2) || rc!=SQLITE_OK ); copyNodeContent(apNew[0], pParent, &rc); freePage(apNew[0], &rc); }else if( ISAUTOVACUUM && !leafCorrection ){ /* Fix the pointer map entries associated with the right-child of each ** sibling page. All other pointer map entries have already been taken ** care of. */ for(i=0; iaData[8]); ptrmapPut(pBt, key, PTRMAP_BTREE, apNew[i]->pgno, &rc); } } assert( pParent->isInit ); TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", nOld, nNew, b.nCell)); /* Free any old pages that were not reused as new pages. */ for(i=nNew; iisInit ){ /* The ptrmapCheckPages() contains assert() statements that verify that ** all pointer map pages are set correctly. This is helpful while ** debugging. This is usually disabled because a corrupt database may ** cause an assert() statement to fail. */ ptrmapCheckPages(apNew, nNew); ptrmapCheckPages(&pParent, 1); } #endif /* ** Cleanup before returning. */ balance_cleanup: sqlite3ScratchFree(b.apCell); for(i=0; i= 1700 && defined(_M_ARM) #pragma optimize("", on) #endif /* ** This function is called when the root page of a b-tree structure is ** overfull (has one or more overflow pages). ** ** A new child page is allocated and the contents of the current root ** page, including overflow cells, are copied into the child. The root ** page is then overwritten to make it an empty page with the right-child ** pointer pointing to the new page. ** ** Before returning, all pointer-map entries corresponding to pages ** that the new child-page now contains pointers to are updated. The ** entry corresponding to the new right-child pointer of the root ** page is also updated. ** ** If successful, *ppChild is set to contain a reference to the child ** page and SQLITE_OK is returned. In this case the caller is required ** to call releasePage() on *ppChild exactly once. If an error occurs, ** an error code is returned and *ppChild is set to 0. */ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){ int rc; /* Return value from subprocedures */ MemPage *pChild = 0; /* Pointer to a new child page */ Pgno pgnoChild = 0; /* Page number of the new child page */ BtShared *pBt = pRoot->pBt; /* The BTree */ assert( pRoot->nOverflow>0 ); assert( sqlite3_mutex_held(pBt->mutex) ); /* Make pRoot, the root page of the b-tree, writable. Allocate a new ** page that will become the new right-child of pPage. Copy the contents ** of the node stored on pRoot into the new child page. */ rc = sqlite3PagerWrite(pRoot->pDbPage); if( rc==SQLITE_OK ){ rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); copyNodeContent(pRoot, pChild, &rc); if( ISAUTOVACUUM ){ ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); } } if( rc ){ *ppChild = 0; releasePage(pChild); return rc; } assert( sqlite3PagerIswriteable(pChild->pDbPage) ); assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); assert( pChild->nCell==pRoot->nCell ); TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno)); /* Copy the overflow cells from pRoot to pChild */ memcpy(pChild->aiOvfl, pRoot->aiOvfl, pRoot->nOverflow*sizeof(pRoot->aiOvfl[0])); memcpy(pChild->apOvfl, pRoot->apOvfl, pRoot->nOverflow*sizeof(pRoot->apOvfl[0])); pChild->nOverflow = pRoot->nOverflow; /* Zero the contents of pRoot. Then install pChild as the right-child. */ zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF); put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild); *ppChild = pChild; return SQLITE_OK; } /* ** The page that pCur currently points to has just been modified in ** some way. This function figures out if this modification means the ** tree needs to be balanced, and if so calls the appropriate balancing ** routine. Balancing routines are: ** ** balance_quick() ** balance_deeper() ** balance_nonroot() */ static int balance(BtCursor *pCur){ int rc = SQLITE_OK; const int nMin = pCur->pBt->usableSize * 2 / 3; u8 aBalanceQuickSpace[13]; u8 *pFree = 0; TESTONLY( int balance_quick_called = 0 ); TESTONLY( int balance_deeper_called = 0 ); do { int iPage = pCur->iPage; MemPage *pPage = pCur->apPage[iPage]; if( iPage==0 ){ if( pPage->nOverflow ){ /* The root page of the b-tree is overfull. In this case call the ** balance_deeper() function to create a new child for the root-page ** and copy the current contents of the root-page to it. The ** next iteration of the do-loop will balance the child page. */ assert( (balance_deeper_called++)==0 ); rc = balance_deeper(pPage, &pCur->apPage[1]); if( rc==SQLITE_OK ){ pCur->iPage = 1; pCur->aiIdx[0] = 0; pCur->aiIdx[1] = 0; assert( pCur->apPage[1]->nOverflow ); } }else{ break; } }else if( pPage->nOverflow==0 && pPage->nFree<=nMin ){ break; }else{ MemPage * const pParent = pCur->apPage[iPage-1]; int const iIdx = pCur->aiIdx[iPage-1]; rc = sqlite3PagerWrite(pParent->pDbPage); if( rc==SQLITE_OK ){ #ifndef SQLITE_OMIT_QUICKBALANCE if( pPage->intKeyLeaf && pPage->nOverflow==1 && pPage->aiOvfl[0]==pPage->nCell && pParent->pgno!=1 && pParent->nCell==iIdx ){ /* Call balance_quick() to create a new sibling of pPage on which ** to store the overflow cell. balance_quick() inserts a new cell ** into pParent, which may cause pParent overflow. If this ** happens, the next iteration of the do-loop will balance pParent ** use either balance_nonroot() or balance_deeper(). Until this ** happens, the overflow cell is stored in the aBalanceQuickSpace[] ** buffer. ** ** The purpose of the following assert() is to check that only a ** single call to balance_quick() is made for each call to this ** function. If this were not verified, a subtle bug involving reuse ** of the aBalanceQuickSpace[] might sneak in. */ assert( (balance_quick_called++)==0 ); rc = balance_quick(pParent, pPage, aBalanceQuickSpace); }else #endif { /* In this case, call balance_nonroot() to redistribute cells ** between pPage and up to 2 of its sibling pages. This involves ** modifying the contents of pParent, which may cause pParent to ** become overfull or underfull. The next iteration of the do-loop ** will balance the parent page to correct this. ** ** If the parent page becomes overfull, the overflow cell or cells ** are stored in the pSpace buffer allocated immediately below. ** A subsequent iteration of the do-loop will deal with this by ** calling balance_nonroot() (balance_deeper() may be called first, ** but it doesn't deal with overflow cells - just moves them to a ** different page). Once this subsequent call to balance_nonroot() ** has completed, it is safe to release the pSpace buffer used by ** the previous call, as the overflow cell data will have been ** copied either into the body of a database page or into the new ** pSpace buffer passed to the latter call to balance_nonroot(). */ u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, pCur->hints&BTREE_BULKLOAD); if( pFree ){ /* If pFree is not NULL, it points to the pSpace buffer used ** by a previous call to balance_nonroot(). Its contents are ** now stored either on real database pages or within the ** new pSpace buffer, so it may be safely freed here. */ sqlite3PageFree(pFree); } /* The pSpace buffer will be freed after the next call to ** balance_nonroot(), or just before this function returns, whichever ** comes first. */ pFree = pSpace; } } pPage->nOverflow = 0; /* The next iteration of the do-loop balances the parent page. */ releasePage(pPage); pCur->iPage--; assert( pCur->iPage>=0 ); } }while( rc==SQLITE_OK ); if( pFree ){ sqlite3PageFree(pFree); } return rc; } /* ** Insert a new record into the BTree. The key is given by (pKey,nKey) ** and the data is given by (pData,nData). The cursor is used only to ** define what table the record should be inserted into. The cursor ** is left pointing at a random location. ** ** For an INTKEY table, only the nKey value of the key is used. pKey is ** ignored. For a ZERODATA table, the pData and nData are both ignored. ** ** If the seekResult parameter is non-zero, then a successful call to ** MovetoUnpacked() to seek cursor pCur to (pKey, nKey) has already ** been performed. seekResult is the search result returned (a negative ** number if pCur points at an entry that is smaller than (pKey, nKey), or ** a positive value if pCur points at an entry that is larger than ** (pKey, nKey)). ** ** If the seekResult parameter is non-zero, then the caller guarantees that ** cursor pCur is pointing at the existing copy of a row that is to be ** overwritten. If the seekResult parameter is 0, then cursor pCur may ** point to any entry or to no entry at all and so this function has to seek ** the cursor before the new key can be inserted. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeInsert( BtCursor *pCur, /* Insert data into the table of this cursor */ const void *pKey, i64 nKey, /* The key of the new record */ const void *pData, int nData, /* The data of the new record */ int nZero, /* Number of extra 0 bytes to append to data */ int appendBias, /* True if this is likely an append */ int seekResult /* Result of prior MovetoUnpacked() call */ ){ int rc; int loc = seekResult; /* -1: before desired location +1: after */ int szNew = 0; int idx; MemPage *pPage; Btree *p = pCur->pBtree; BtShared *pBt = p->pBt; unsigned char *oldCell; unsigned char *newCell = 0; if( pCur->eState==CURSOR_FAULT ){ assert( pCur->skipNext!=SQLITE_OK ); return pCur->skipNext; } assert( cursorHoldsMutex(pCur) ); assert( (pCur->curFlags & BTCF_WriteFlag)!=0 && pBt->inTransaction==TRANS_WRITE && (pBt->btsFlags & BTS_READ_ONLY)==0 ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); /* Assert that the caller has been consistent. If this cursor was opened ** expecting an index b-tree, then the caller should be inserting blob ** keys with no associated data. If the cursor was opened expecting an ** intkey table, the caller should be inserting integer keys with a ** blob of associated data. */ assert( (pKey==0)==(pCur->pKeyInfo==0) ); /* Save the positions of any other cursors open on this table. ** ** In some cases, the call to btreeMoveto() below is a no-op. For ** example, when inserting data into a table with auto-generated integer ** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the ** integer key to use. It then calls this function to actually insert the ** data into the intkey B-Tree. In this case btreeMoveto() recognizes ** that the cursor is already where it needs to be and returns without ** doing any work. To avoid thwarting these optimizations, it is important ** not to clear the cursor here. */ if( pCur->curFlags & BTCF_Multiple ){ rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); if( rc ) return rc; } if( pCur->pKeyInfo==0 ){ assert( pKey==0 ); /* If this is an insert into a table b-tree, invalidate any incrblob ** cursors open on the row being replaced */ invalidateIncrblobCursors(p, nKey, 0); /* If the cursor is currently on the last row and we are appending a ** new row onto the end, set the "loc" to avoid an unnecessary ** btreeMoveto() call */ if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 && pCur->info.nKey==nKey-1 ){ loc = -1; }else if( loc==0 ){ rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc); if( rc ) return rc; } }else if( loc==0 ){ rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); if( rc ) return rc; } assert( pCur->eState==CURSOR_VALID || (pCur->eState==CURSOR_INVALID && loc) ); pPage = pCur->apPage[pCur->iPage]; assert( pPage->intKey || nKey>=0 ); assert( pPage->leaf || !pPage->intKey ); TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n", pCur->pgnoRoot, nKey, nData, pPage->pgno, loc==0 ? "overwrite" : "new entry")); assert( pPage->isInit ); newCell = pBt->pTmpSpace; assert( newCell!=0 ); rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); if( rc ) goto end_insert; assert( szNew==pPage->xCellSize(pPage, newCell) ); assert( szNew <= MX_CELL_SIZE(pBt) ); idx = pCur->aiIdx[pCur->iPage]; if( loc==0 ){ u16 szOld; assert( idxnCell ); rc = sqlite3PagerWrite(pPage->pDbPage); if( rc ){ goto end_insert; } oldCell = findCell(pPage, idx); if( !pPage->leaf ){ memcpy(newCell, oldCell, 4); } rc = clearCell(pPage, oldCell, &szOld); dropCell(pPage, idx, szOld, &rc); if( rc ) goto end_insert; }else if( loc<0 && pPage->nCell>0 ){ assert( pPage->leaf ); idx = ++pCur->aiIdx[pCur->iPage]; }else{ assert( pPage->leaf ); } insertCell(pPage, idx, newCell, szNew, 0, 0, &rc); assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 ); /* If no error has occurred and pPage has an overflow cell, call balance() ** to redistribute the cells within the tree. Since balance() may move ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey ** variables. ** ** Previous versions of SQLite called moveToRoot() to move the cursor ** back to the root page as balance() used to invalidate the contents ** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that, ** set the cursor state to "invalid". This makes common insert operations ** slightly faster. ** ** There is a subtle but important optimization here too. When inserting ** multiple records into an intkey b-tree using a single cursor (as can ** happen while processing an "INSERT INTO ... SELECT" statement), it ** is advantageous to leave the cursor pointing to the last entry in ** the b-tree if possible. If the cursor is left pointing to the last ** entry in the table, and the next row inserted has an integer key ** larger than the largest existing key, it is possible to insert the ** row without seeking the cursor. This can be a big performance boost. */ pCur->info.nSize = 0; if( rc==SQLITE_OK && pPage->nOverflow ){ pCur->curFlags &= ~(BTCF_ValidNKey); rc = balance(pCur); /* Must make sure nOverflow is reset to zero even if the balance() ** fails. Internal data structure corruption will result otherwise. ** Also, set the cursor state to invalid. This stops saveCursorPosition() ** from trying to save the current position of the cursor. */ pCur->apPage[pCur->iPage]->nOverflow = 0; pCur->eState = CURSOR_INVALID; } assert( pCur->apPage[pCur->iPage]->nOverflow==0 ); end_insert: return rc; } /* ** Delete the entry that the cursor is pointing to. ** ** If the second parameter is zero, then the cursor is left pointing at an ** arbitrary location after the delete. If it is non-zero, then the cursor ** is left in a state such that the next call to BtreeNext() or BtreePrev() ** moves it to the same row as it would if the call to BtreeDelete() had ** been omitted. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeDelete(BtCursor *pCur, int bPreserve){ Btree *p = pCur->pBtree; BtShared *pBt = p->pBt; int rc; /* Return code */ MemPage *pPage; /* Page to delete cell from */ unsigned char *pCell; /* Pointer to cell to delete */ int iCellIdx; /* Index of cell to delete */ int iCellDepth; /* Depth of node containing pCell */ u16 szCell; /* Size of the cell being deleted */ int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */ assert( cursorHoldsMutex(pCur) ); assert( pBt->inTransaction==TRANS_WRITE ); assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); assert( pCur->curFlags & BTCF_WriteFlag ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); assert( !hasReadConflicts(p, pCur->pgnoRoot) ); assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); assert( pCur->eState==CURSOR_VALID ); iCellDepth = pCur->iPage; iCellIdx = pCur->aiIdx[iCellDepth]; pPage = pCur->apPage[iCellDepth]; pCell = findCell(pPage, iCellIdx); /* If the page containing the entry to delete is not a leaf page, move ** the cursor to the largest entry in the tree that is smaller than ** the entry being deleted. This cell will replace the cell being deleted ** from the internal node. The 'previous' entry is used for this instead ** of the 'next' entry, as the previous entry is always a part of the ** sub-tree headed by the child page of the cell being deleted. This makes ** balancing the tree following the delete operation easier. */ if( !pPage->leaf ){ int notUsed = 0; rc = sqlite3BtreePrevious(pCur, ¬Used); if( rc ) return rc; } /* Save the positions of any other cursors open on this table before ** making any modifications. */ if( pCur->curFlags & BTCF_Multiple ){ rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); if( rc ) return rc; } /* If this is a delete operation to remove a row from a table b-tree, ** invalidate any incrblob cursors open on the row being deleted. */ if( pCur->pKeyInfo==0 ){ invalidateIncrblobCursors(p, pCur->info.nKey, 0); } /* If the bPreserve flag is set to true, then the cursor position must ** be preserved following this delete operation. If the current delete ** will cause a b-tree rebalance, then this is done by saving the cursor ** key and leaving the cursor in CURSOR_REQUIRESEEK state before ** returning. ** ** Or, if the current delete will not cause a rebalance, then the cursor ** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately ** before or after the deleted entry. In this case set bSkipnext to true. */ if( bPreserve ){ if( !pPage->leaf || (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3) ){ /* A b-tree rebalance will be required after deleting this entry. ** Save the cursor key. */ rc = saveCursorKey(pCur); if( rc ) return rc; }else{ bSkipnext = 1; } } /* Make the page containing the entry to be deleted writable. Then free any ** overflow pages associated with the entry and finally remove the cell ** itself from within the page. */ rc = sqlite3PagerWrite(pPage->pDbPage); if( rc ) return rc; rc = clearCell(pPage, pCell, &szCell); dropCell(pPage, iCellIdx, szCell, &rc); if( rc ) return rc; /* If the cell deleted was not located on a leaf page, then the cursor ** is currently pointing to the largest entry in the sub-tree headed ** by the child-page of the cell that was just deleted from an internal ** node. The cell from the leaf node needs to be moved to the internal ** node to replace the deleted cell. */ if( !pPage->leaf ){ MemPage *pLeaf = pCur->apPage[pCur->iPage]; int nCell; Pgno n = pCur->apPage[iCellDepth+1]->pgno; unsigned char *pTmp; pCell = findCell(pLeaf, pLeaf->nCell-1); if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; nCell = pLeaf->xCellSize(pLeaf, pCell); assert( MX_CELL_SIZE(pBt) >= nCell ); pTmp = pBt->pTmpSpace; assert( pTmp!=0 ); rc = sqlite3PagerWrite(pLeaf->pDbPage); insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); if( rc ) return rc; } /* Balance the tree. If the entry deleted was located on a leaf page, ** then the cursor still points to that page. In this case the first ** call to balance() repairs the tree, and the if(...) condition is ** never true. ** ** Otherwise, if the entry deleted was on an internal node page, then ** pCur is pointing to the leaf page from which a cell was removed to ** replace the cell deleted from the internal node. This is slightly ** tricky as the leaf node may be underfull, and the internal node may ** be either under or overfull. In this case run the balancing algorithm ** on the leaf node first. If the balance proceeds far enough up the ** tree that we can be sure that any problem in the internal node has ** been corrected, so be it. Otherwise, after balancing the leaf node, ** walk the cursor up the tree to the internal node and balance it as ** well. */ rc = balance(pCur); if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){ while( pCur->iPage>iCellDepth ){ releasePage(pCur->apPage[pCur->iPage--]); } rc = balance(pCur); } if( rc==SQLITE_OK ){ if( bSkipnext ){ assert( bPreserve && pCur->iPage==iCellDepth ); assert( pPage==pCur->apPage[pCur->iPage] ); assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell ); pCur->eState = CURSOR_SKIPNEXT; if( iCellIdx>=pPage->nCell ){ pCur->skipNext = -1; pCur->aiIdx[iCellDepth] = pPage->nCell-1; }else{ pCur->skipNext = 1; } }else{ rc = moveToRoot(pCur); if( bPreserve ){ pCur->eState = CURSOR_REQUIRESEEK; } } } return rc; } /* ** Create a new BTree table. Write into *piTable the page ** number for the root page of the new table. ** ** The type of type is determined by the flags parameter. Only the ** following values of flags are currently in use. Other values for ** flags might not work: ** ** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys ** BTREE_ZERODATA Used for SQL indices */ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ BtShared *pBt = p->pBt; MemPage *pRoot; Pgno pgnoRoot; int rc; int ptfFlags; /* Page-type flage for the root page of new table */ assert( sqlite3BtreeHoldsMutex(p) ); assert( pBt->inTransaction==TRANS_WRITE ); assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); #ifdef SQLITE_OMIT_AUTOVACUUM rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); if( rc ){ return rc; } #else if( pBt->autoVacuum ){ Pgno pgnoMove; /* Move a page here to make room for the root-page */ MemPage *pPageMove; /* The page to move to. */ /* Creating a new table may probably require moving an existing database ** to make room for the new tables root page. In case this page turns ** out to be an overflow page, delete all overflow page-map caches ** held by open cursors. */ invalidateAllOverflowCache(pBt); /* Read the value of meta[3] from the database to determine where the ** root page of the new table should go. meta[3] is the largest root-page ** created so far, so the new root-page is (meta[3]+1). */ sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &pgnoRoot); pgnoRoot++; /* The new root-page may not be allocated on a pointer-map page, or the ** PENDING_BYTE page. */ while( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) || pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ pgnoRoot++; } assert( pgnoRoot>=3 || CORRUPT_DB ); testcase( pgnoRoot<3 ); /* Allocate a page. The page that currently resides at pgnoRoot will ** be moved to the allocated page (unless the allocated page happens ** to reside at pgnoRoot). */ rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); if( rc!=SQLITE_OK ){ return rc; } if( pgnoMove!=pgnoRoot ){ /* pgnoRoot is the page that will be used for the root-page of ** the new table (assuming an error did not occur). But we were ** allocated pgnoMove. If required (i.e. if it was not allocated ** by extending the file), the current page at position pgnoMove ** is already journaled. */ u8 eType = 0; Pgno iPtrPage = 0; /* Save the positions of any open cursors. This is required in ** case they are holding a reference to an xFetch reference ** corresponding to page pgnoRoot. */ rc = saveAllCursors(pBt, 0, 0); releasePage(pPageMove); if( rc!=SQLITE_OK ){ return rc; } /* Move the page currently at pgnoRoot to pgnoMove. */ rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); if( rc!=SQLITE_OK ){ return rc; } rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ rc = SQLITE_CORRUPT_BKPT; } if( rc!=SQLITE_OK ){ releasePage(pRoot); return rc; } assert( eType!=PTRMAP_ROOTPAGE ); assert( eType!=PTRMAP_FREEPAGE ); rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove, 0); releasePage(pRoot); /* Obtain the page at pgnoRoot */ if( rc!=SQLITE_OK ){ return rc; } rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); if( rc!=SQLITE_OK ){ return rc; } rc = sqlite3PagerWrite(pRoot->pDbPage); if( rc!=SQLITE_OK ){ releasePage(pRoot); return rc; } }else{ pRoot = pPageMove; } /* Update the pointer-map and meta-data with the new root-page number. */ ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0, &rc); if( rc ){ releasePage(pRoot); return rc; } /* When the new root page was allocated, page 1 was made writable in ** order either to increase the database filesize, or to decrement the ** freelist count. Hence, the sqlite3BtreeUpdateMeta() call cannot fail. */ assert( sqlite3PagerIswriteable(pBt->pPage1->pDbPage) ); rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot); if( NEVER(rc) ){ releasePage(pRoot); return rc; } }else{ rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0); if( rc ) return rc; } #endif assert( sqlite3PagerIswriteable(pRoot->pDbPage) ); if( createTabFlags & BTREE_INTKEY ){ ptfFlags = PTF_INTKEY | PTF_LEAFDATA | PTF_LEAF; }else{ ptfFlags = PTF_ZERODATA | PTF_LEAF; } zeroPage(pRoot, ptfFlags); sqlite3PagerUnref(pRoot->pDbPage); assert( (pBt->openFlags & BTREE_SINGLE)==0 || pgnoRoot==2 ); *piTable = (int)pgnoRoot; return SQLITE_OK; } SQLITE_API int SQLITE_STDCALL sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){ int rc; sqlite3BtreeEnter(p); rc = btreeCreateTable(p, piTable, flags); sqlite3BtreeLeave(p); return rc; } /* ** Erase the given database page and all its children. Return ** the page to the freelist. */ static int clearDatabasePage( BtShared *pBt, /* The BTree that contains the table */ Pgno pgno, /* Page number to clear */ int freePageFlag, /* Deallocate page if true */ int *pnChange /* Add number of Cells freed to this counter */ ){ MemPage *pPage; int rc; unsigned char *pCell; int i; int hdr; u16 szCell; assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); if( rc ) return rc; if( pPage->bBusy ){ rc = SQLITE_CORRUPT_BKPT; goto cleardatabasepage_out; } pPage->bBusy = 1; hdr = pPage->hdrOffset; for(i=0; inCell; i++){ pCell = findCell(pPage, i); if( !pPage->leaf ){ rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); if( rc ) goto cleardatabasepage_out; } rc = clearCell(pPage, pCell, &szCell); if( rc ) goto cleardatabasepage_out; } if( !pPage->leaf ){ rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); if( rc ) goto cleardatabasepage_out; }else if( pnChange ){ assert( pPage->intKey || CORRUPT_DB ); testcase( !pPage->intKey ); *pnChange += pPage->nCell; } if( freePageFlag ){ freePage(pPage, &rc); }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); } cleardatabasepage_out: pPage->bBusy = 0; releasePage(pPage); return rc; } /* ** Delete all information from a single table in the database. iTable is ** the page number of the root of the table. After this routine returns, ** the root page is empty, but still exists. ** ** This routine will fail with SQLITE_LOCKED if there are any open ** read cursors on the table. Open write cursors are moved to the ** root of the table. ** ** If pnChange is not NULL, then table iTable must be an intkey table. The ** integer value pointed to by pnChange is incremented by the number of ** entries in the table. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){ int rc; BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); assert( p->inTrans==TRANS_WRITE ); rc = saveAllCursors(pBt, (Pgno)iTable, 0); if( SQLITE_OK==rc ){ /* Invalidate all incrblob cursors open on table iTable (assuming iTable ** is the root of a table b-tree - if it is not, the following call is ** a no-op). */ invalidateIncrblobCursors(p, 0, 1); rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); } sqlite3BtreeLeave(p); return rc; } /* ** Delete all information from the single table that pCur is open on. ** ** This routine only work for pCur on an ephemeral table. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeClearTableOfCursor(BtCursor *pCur){ return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0); } /* ** Erase all information in a table and add the root of the table to ** the freelist. Except, the root of the principle table (the one on ** page 1) is never added to the freelist. ** ** This routine will fail with SQLITE_LOCKED if there are any open ** cursors on the table. ** ** If AUTOVACUUM is enabled and the page at iTable is not the last ** root page in the database file, then the last root page ** in the database file is moved into the slot formerly occupied by ** iTable and that last slot formerly occupied by the last root page ** is added to the freelist instead of iTable. In this say, all ** root pages are kept at the beginning of the database file, which ** is necessary for AUTOVACUUM to work right. *piMoved is set to the ** page number that used to be the last root page in the file before ** the move. If no page gets moved, *piMoved is set to 0. ** The last root page is recorded in meta[3] and the value of ** meta[3] is updated by this procedure. */ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ int rc; MemPage *pPage = 0; BtShared *pBt = p->pBt; assert( sqlite3BtreeHoldsMutex(p) ); assert( p->inTrans==TRANS_WRITE ); /* It is illegal to drop a table if any cursors are open on the ** database. This is because in auto-vacuum mode the backend may ** need to move another root-page to fill a gap left by the deleted ** root page. If an open cursor was using this page a problem would ** occur. ** ** This error is caught long before control reaches this point. */ if( NEVER(pBt->pCursor) ){ sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db); return SQLITE_LOCKED_SHAREDCACHE; } rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); if( rc ) return rc; rc = sqlite3BtreeClearTable(p, iTable, 0); if( rc ){ releasePage(pPage); return rc; } *piMoved = 0; if( iTable>1 ){ #ifdef SQLITE_OMIT_AUTOVACUUM freePage(pPage, &rc); releasePage(pPage); #else if( pBt->autoVacuum ){ Pgno maxRootPgno; sqlite3BtreeGetMeta(p, BTREE_LARGEST_ROOT_PAGE, &maxRootPgno); if( iTable==maxRootPgno ){ /* If the table being dropped is the table with the largest root-page ** number in the database, put the root page on the free list. */ freePage(pPage, &rc); releasePage(pPage); if( rc!=SQLITE_OK ){ return rc; } }else{ /* The table being dropped does not have the largest root-page ** number in the database. So move the page that does into the ** gap left by the deleted root-page. */ MemPage *pMove; releasePage(pPage); rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); if( rc!=SQLITE_OK ){ return rc; } rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0); releasePage(pMove); if( rc!=SQLITE_OK ){ return rc; } pMove = 0; rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); freePage(pMove, &rc); releasePage(pMove); if( rc!=SQLITE_OK ){ return rc; } *piMoved = maxRootPgno; } /* Set the new 'max-root-page' value in the database header. This ** is the old value less one, less one more if that happens to ** be a root-page number, less one again if that is the ** PENDING_BYTE_PAGE. */ maxRootPgno--; while( maxRootPgno==PENDING_BYTE_PAGE(pBt) || PTRMAP_ISPAGE(pBt, maxRootPgno) ){ maxRootPgno--; } assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) ); rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno); }else{ freePage(pPage, &rc); releasePage(pPage); } #endif }else{ /* If sqlite3BtreeDropTable was called on page 1. ** This really never should happen except in a corrupt ** database. */ zeroPage(pPage, PTF_INTKEY|PTF_LEAF ); releasePage(pPage); } return rc; } SQLITE_API int SQLITE_STDCALL sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ int rc; sqlite3BtreeEnter(p); rc = btreeDropTable(p, iTable, piMoved); sqlite3BtreeLeave(p); return rc; } /* ** This function may only be called if the b-tree connection already ** has a read or write transaction open on the database. ** ** Read the meta-information out of a database file. Meta[0] ** is the number of free pages currently in the database. Meta[1] ** through meta[15] are available for use by higher layers. Meta[0] ** is read-only, the others are read/write. ** ** The schema layer numbers meta values differently. At the schema ** layer (and the SetCookie and ReadCookie opcodes) the number of ** free pages is not visible. So Cookie[0] is the same as Meta[1]. ** ** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead ** of reading the value out of the header, it instead loads the "DataVersion" ** from the pager. The BTREE_DATA_VERSION value is not actually stored in the ** database file. It is a number computed by the pager. But its access ** pattern is the same as header meta values, and so it is convenient to ** read it from this routine. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); assert( p->inTrans>TRANS_NONE ); assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) ); assert( pBt->pPage1 ); assert( idx>=0 && idx<=15 ); if( idx==BTREE_DATA_VERSION ){ *pMeta = sqlite3PagerDataVersion(pBt->pPager) + p->iDataVersion; }else{ *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]); } /* If auto-vacuum is disabled in this build and this is an auto-vacuum ** database, mark the database as read-only. */ #ifdef SQLITE_OMIT_AUTOVACUUM if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ){ pBt->btsFlags |= BTS_READ_ONLY; } #endif sqlite3BtreeLeave(p); } /* ** Write meta-information back into the database. Meta[0] is ** read-only and may not be written. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){ BtShared *pBt = p->pBt; unsigned char *pP1; int rc; assert( idx>=1 && idx<=15 ); sqlite3BtreeEnter(p); assert( p->inTrans==TRANS_WRITE ); assert( pBt->pPage1!=0 ); pP1 = pBt->pPage1->aData; rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); if( rc==SQLITE_OK ){ put4byte(&pP1[36 + idx*4], iMeta); #ifndef SQLITE_OMIT_AUTOVACUUM if( idx==BTREE_INCR_VACUUM ){ assert( pBt->autoVacuum || iMeta==0 ); assert( iMeta==0 || iMeta==1 ); pBt->incrVacuum = (u8)iMeta; } #endif } sqlite3BtreeLeave(p); return rc; } #ifndef SQLITE_OMIT_BTREECOUNT /* ** The first argument, pCur, is a cursor opened on some b-tree. Count the ** number of entries in the b-tree and write the result to *pnEntry. ** ** SQLITE_OK is returned if the operation is successfully executed. ** Otherwise, if an error is encountered (i.e. an IO error or database ** corruption) an SQLite error code is returned. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){ i64 nEntry = 0; /* Value to return in *pnEntry */ int rc; /* Return code */ if( pCur->pgnoRoot==0 ){ *pnEntry = 0; return SQLITE_OK; } rc = moveToRoot(pCur); /* Unless an error occurs, the following loop runs one iteration for each ** page in the B-Tree structure (not including overflow pages). */ while( rc==SQLITE_OK ){ int iIdx; /* Index of child node in parent */ MemPage *pPage; /* Current page of the b-tree */ /* If this is a leaf page or the tree is not an int-key tree, then ** this page contains countable entries. Increment the entry counter ** accordingly. */ pPage = pCur->apPage[pCur->iPage]; if( pPage->leaf || !pPage->intKey ){ nEntry += pPage->nCell; } /* pPage is a leaf node. This loop navigates the cursor so that it ** points to the first interior cell that it points to the parent of ** the next page in the tree that has not yet been visited. The ** pCur->aiIdx[pCur->iPage] value is set to the index of the parent cell ** of the page, or to the number of cells in the page if the next page ** to visit is the right-child of its parent. ** ** If all pages in the tree have been visited, return SQLITE_OK to the ** caller. */ if( pPage->leaf ){ do { if( pCur->iPage==0 ){ /* All pages of the b-tree have been visited. Return successfully. */ *pnEntry = nEntry; return moveToRoot(pCur); } moveToParent(pCur); }while ( pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell ); pCur->aiIdx[pCur->iPage]++; pPage = pCur->apPage[pCur->iPage]; } /* Descend to the child node of the cell that the cursor currently ** points at. This is the right-child if (iIdx==pPage->nCell). */ iIdx = pCur->aiIdx[pCur->iPage]; if( iIdx==pPage->nCell ){ rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); }else{ rc = moveToChild(pCur, get4byte(findCell(pPage, iIdx))); } } /* An error has occurred. Return an error code. */ return rc; } #endif /* ** Return the pager associated with a BTree. This routine is used for ** testing and debugging only. */ SQLITE_API Pager *SQLITE_STDCALL sqlite3BtreePager(Btree *p){ return p->pBt->pPager; } #ifndef SQLITE_OMIT_INTEGRITY_CHECK /* ** Append a message to the error message string. */ static void checkAppendMsg( IntegrityCk *pCheck, const char *zFormat, ... ){ va_list ap; if( !pCheck->mxErr ) return; pCheck->mxErr--; pCheck->nErr++; va_start(ap, zFormat); if( pCheck->errMsg.nChar ){ sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); } if( pCheck->zPfx ){ sqlite3XPrintf(&pCheck->errMsg, 0, pCheck->zPfx, pCheck->v1, pCheck->v2); } sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); va_end(ap); if( pCheck->errMsg.accError==STRACCUM_NOMEM ){ pCheck->mallocFailed = 1; } } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ #ifndef SQLITE_OMIT_INTEGRITY_CHECK /* ** Return non-zero if the bit in the IntegrityCk.aPgRef[] array that ** corresponds to page iPg is already set. */ static int getPageReferenced(IntegrityCk *pCheck, Pgno iPg){ assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); return (pCheck->aPgRef[iPg/8] & (1 << (iPg & 0x07))); } /* ** Set the bit in the IntegrityCk.aPgRef[] array that corresponds to page iPg. */ static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){ assert( iPg<=pCheck->nPage && sizeof(pCheck->aPgRef[0])==1 ); pCheck->aPgRef[iPg/8] |= (1 << (iPg & 0x07)); } /* ** Add 1 to the reference count for page iPage. If this is the second ** reference to the page, add an error message to pCheck->zErrMsg. ** Return 1 if there are 2 or more references to the page and 0 if ** if this is the first reference to the page. ** ** Also check that the page number is in bounds. */ static int checkRef(IntegrityCk *pCheck, Pgno iPage){ if( iPage==0 ) return 1; if( iPage>pCheck->nPage ){ checkAppendMsg(pCheck, "invalid page number %d", iPage); return 1; } if( getPageReferenced(pCheck, iPage) ){ checkAppendMsg(pCheck, "2nd reference to page %d", iPage); return 1; } setPageReferenced(pCheck, iPage); return 0; } #ifndef SQLITE_OMIT_AUTOVACUUM /* ** Check that the entry in the pointer-map for page iChild maps to ** page iParent, pointer type ptrType. If not, append an error message ** to pCheck. */ static void checkPtrmap( IntegrityCk *pCheck, /* Integrity check context */ Pgno iChild, /* Child page number */ u8 eType, /* Expected pointer map type */ Pgno iParent /* Expected pointer map parent page number */ ){ int rc; u8 ePtrmapType; Pgno iPtrmapParent; rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); if( rc!=SQLITE_OK ){ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->mallocFailed = 1; checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild); return; } if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ checkAppendMsg(pCheck, "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", iChild, eType, iParent, ePtrmapType, iPtrmapParent); } } #endif /* ** Check the integrity of the freelist or of an overflow page list. ** Verify that the number of pages on the list is N. */ static void checkList( IntegrityCk *pCheck, /* Integrity checking context */ int isFreeList, /* True for a freelist. False for overflow page list */ int iPage, /* Page number for first page in the list */ int N /* Expected number of pages in the list */ ){ int i; int expected = N; int iFirst = iPage; while( N-- > 0 && pCheck->mxErr ){ DbPage *pOvflPage; unsigned char *pOvflData; if( iPage<1 ){ checkAppendMsg(pCheck, "%d of %d pages missing from overflow list starting at %d", N+1, expected, iFirst); break; } if( checkRef(pCheck, iPage) ) break; if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ checkAppendMsg(pCheck, "failed to get page %d", iPage); break; } pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); if( isFreeList ){ int n = get4byte(&pOvflData[4]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pCheck->pBt->autoVacuum ){ checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); } #endif if( n>(int)pCheck->pBt->usableSize/4-2 ){ checkAppendMsg(pCheck, "freelist leaf count too big on page %d", iPage); N--; }else{ for(i=0; ipBt->autoVacuum ){ checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0); } #endif checkRef(pCheck, iFreePage); } N -= n; } } #ifndef SQLITE_OMIT_AUTOVACUUM else{ /* If this database supports auto-vacuum and iPage is not the last ** page in this overflow list, check that the pointer-map entry for ** the following page matches iPage. */ if( pCheck->pBt->autoVacuum && N>0 ){ i = get4byte(pOvflData); checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); } } #endif iPage = get4byte(pOvflData); sqlite3PagerUnref(pOvflPage); if( isFreeList && N<(iPage!=0) ){ checkAppendMsg(pCheck, "free-page count in header is too small"); } } } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ /* ** An implementation of a min-heap. ** ** aHeap[0] is the number of elements on the heap. aHeap[1] is the ** root element. The daughter nodes of aHeap[N] are aHeap[N*2] ** and aHeap[N*2+1]. ** ** The heap property is this: Every node is less than or equal to both ** of its daughter nodes. A consequence of the heap property is that the ** root node aHeap[1] is always the minimum value currently in the heap. ** ** The btreeHeapInsert() routine inserts an unsigned 32-bit number onto ** the heap, preserving the heap property. The btreeHeapPull() routine ** removes the root element from the heap (the minimum value in the heap) ** and then moves other nodes around as necessary to preserve the heap ** property. ** ** This heap is used for cell overlap and coverage testing. Each u32 ** entry represents the span of a cell or freeblock on a btree page. ** The upper 16 bits are the index of the first byte of a range and the ** lower 16 bits are the index of the last byte of that range. */ static void btreeHeapInsert(u32 *aHeap, u32 x){ u32 j, i = ++aHeap[0]; aHeap[i] = x; while( (j = i/2)>0 && aHeap[j]>aHeap[i] ){ x = aHeap[j]; aHeap[j] = aHeap[i]; aHeap[i] = x; i = j; } } static int btreeHeapPull(u32 *aHeap, u32 *pOut){ u32 j, i, x; if( (x = aHeap[0])==0 ) return 0; *pOut = aHeap[1]; aHeap[1] = aHeap[x]; aHeap[x] = 0xffffffff; aHeap[0]--; i = 1; while( (j = i*2)<=aHeap[0] ){ if( aHeap[j]>aHeap[j+1] ) j++; if( aHeap[i]zPfx; int saved_v1 = pCheck->v1; int saved_v2 = pCheck->v2; u8 savedIsInit = 0; /* Check that the page exists */ pBt = pCheck->pBt; usableSize = pBt->usableSize; if( iPage==0 ) return 0; if( checkRef(pCheck, iPage) ) return 0; pCheck->zPfx = "Page %d: "; pCheck->v1 = iPage; if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ checkAppendMsg(pCheck, "unable to get the page. error code=%d", rc); goto end_of_check; } /* Clear MemPage.isInit to make sure the corruption detection code in ** btreeInitPage() is executed. */ savedIsInit = pPage->isInit; pPage->isInit = 0; if( (rc = btreeInitPage(pPage))!=0 ){ assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ checkAppendMsg(pCheck, "btreeInitPage() returns error code %d", rc); goto end_of_check; } data = pPage->aData; hdr = pPage->hdrOffset; /* Set up for cell analysis */ pCheck->zPfx = "On tree page %d cell %d: "; contentOffset = get2byteNotZero(&data[hdr+5]); assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the ** number of cells on the page. */ nCell = get2byte(&data[hdr+3]); assert( pPage->nCell==nCell ); /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page ** immediately follows the b-tree page header. */ cellStart = hdr + 12 - 4*pPage->leaf; assert( pPage->aCellIdx==&data[cellStart] ); pCellIdx = &data[cellStart + 2*(nCell-1)]; if( !pPage->leaf ){ /* Analyze the right-child page of internal pages */ pgno = get4byte(&data[hdr+8]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ pCheck->zPfx = "On page %d at right child: "; checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); } #endif depth = checkTreePage(pCheck, pgno, &maxKey, maxKey); keyCanBeEqual = 0; }else{ /* For leaf pages, the coverage check will occur in the same loop ** as the other cell checks, so initialize the heap. */ heap = pCheck->heap; heap[0] = 0; } /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte ** integer offsets to the cell contents. */ for(i=nCell-1; i>=0 && pCheck->mxErr; i--){ CellInfo info; /* Check cell size */ pCheck->v2 = i; assert( pCellIdx==&data[cellStart + i*2] ); pc = get2byteAligned(pCellIdx); pCellIdx -= 2; if( pcusableSize-4 ){ checkAppendMsg(pCheck, "Offset %d out of range %d..%d", pc, contentOffset, usableSize-4); doCoverageCheck = 0; continue; } pCell = &data[pc]; pPage->xParseCell(pPage, pCell, &info); if( pc+info.nSize>usableSize ){ checkAppendMsg(pCheck, "Extends off end of page"); doCoverageCheck = 0; continue; } /* Check for integer primary key out of range */ if( pPage->intKey ){ if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){ checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey); } maxKey = info.nKey; } /* Check the content overflow list */ if( info.nPayload>info.nLocal ){ int nPage; /* Number of pages on the overflow chain */ Pgno pgnoOvfl; /* First page of the overflow chain */ assert( pc + info.iOverflow <= usableSize ); nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4); pgnoOvfl = get4byte(&pCell[info.iOverflow]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); } #endif checkList(pCheck, 0, pgnoOvfl, nPage); } if( !pPage->leaf ){ /* Check sanity of left child page for internal pages */ pgno = get4byte(pCell); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); } #endif d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey); keyCanBeEqual = 0; if( d2!=depth ){ checkAppendMsg(pCheck, "Child page depth differs"); depth = d2; } }else{ /* Populate the coverage-checking heap for leaf pages */ btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1)); } } *piMinKey = maxKey; /* Check for complete coverage of the page */ pCheck->zPfx = 0; if( doCoverageCheck && pCheck->mxErr>0 ){ /* For leaf pages, the min-heap has already been initialized and the ** cells have already been inserted. But for internal pages, that has ** not yet been done, so do it now */ if( !pPage->leaf ){ heap = pCheck->heap; heap[0] = 0; for(i=nCell-1; i>=0; i--){ u32 size; pc = get2byteAligned(&data[cellStart+i*2]); size = pPage->xCellSize(pPage, &data[pc]); btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); } } /* Add the freeblocks to the min-heap ** ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header ** is the offset of the first freeblock, or zero if there are no ** freeblocks on the page. */ i = get2byte(&data[hdr+1]); while( i>0 ){ int size, j; assert( (u32)i<=usableSize-4 ); /* Enforced by btreeInitPage() */ size = get2byte(&data[i+2]); assert( (u32)(i+size)<=usableSize ); /* Enforced by btreeInitPage() */ btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1)); /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a ** big-endian integer which is the offset in the b-tree page of the next ** freeblock in the chain, or zero if the freeblock is the last on the ** chain. */ j = get2byte(&data[i]); /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ assert( j==0 || j>i+size ); /* Enforced by btreeInitPage() */ assert( (u32)j<=usableSize-4 ); /* Enforced by btreeInitPage() */ i = j; } /* Analyze the min-heap looking for overlap between cells and/or ** freeblocks, and counting the number of untracked bytes in nFrag. ** ** Each min-heap entry is of the form: (start_address<<16)|end_address. ** There is an implied first entry the covers the page header, the cell ** pointer index, and the gap between the cell pointer index and the start ** of cell content. ** ** The loop below pulls entries from the min-heap in order and compares ** the start_address against the previous end_address. If there is an ** overlap, that means bytes are used multiple times. If there is a gap, ** that gap is added to the fragmentation count. */ nFrag = 0; prev = contentOffset - 1; /* Implied first min-heap entry */ while( btreeHeapPull(heap,&x) ){ if( (prev&0xffff)>=(x>>16) ){ checkAppendMsg(pCheck, "Multiple uses for byte %u of page %d", x>>16, iPage); break; }else{ nFrag += (x>>16) - (prev&0xffff) - 1; prev = x; } } nFrag += usableSize - (prev&0xffff) - 1; /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments ** is stored in the fifth field of the b-tree page header. ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the ** number of fragmented free bytes within the cell content area. */ if( heap[0]==0 && nFrag!=data[hdr+7] ){ checkAppendMsg(pCheck, "Fragmentation of %d bytes reported as %d on page %d", nFrag, data[hdr+7], iPage); } } end_of_check: if( !doCoverageCheck ) pPage->isInit = savedIsInit; releasePage(pPage); pCheck->zPfx = saved_zPfx; pCheck->v1 = saved_v1; pCheck->v2 = saved_v2; return depth+1; } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ #ifndef SQLITE_OMIT_INTEGRITY_CHECK /* ** This routine does a complete check of the given BTree file. aRoot[] is ** an array of pages numbers were each page number is the root page of ** a table. nRoot is the number of entries in aRoot. ** ** A read-only or read-write transaction must be opened before calling ** this function. ** ** Write the number of error seen in *pnErr. Except for some memory ** allocation errors, an error message held in memory obtained from ** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is ** returned. If a memory allocation error occurs, NULL is returned. */ SQLITE_API char *SQLITE_STDCALL sqlite3BtreeIntegrityCheck( Btree *p, /* The btree to be checked */ int *aRoot, /* An array of root pages numbers for individual trees */ int nRoot, /* Number of entries in aRoot[] */ int mxErr, /* Stop reporting errors after this many */ int *pnErr /* Write number of errors seen to this variable */ ){ Pgno i; IntegrityCk sCheck; BtShared *pBt = p->pBt; int savedDbFlags = pBt->pBt->flags; char zErr[100]; VVA_ONLY( int nRef ); sqlite3BtreeEnter(p); assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); assert( (nRef = sqlite3PagerRefcount(pBt->pPager))>=0 ); sCheck.pBt = pBt; sCheck.pPager = pBt->pPager; sCheck.nPage = btreePagecount(sCheck.pBt); sCheck.mxErr = mxErr; sCheck.nErr = 0; sCheck.mallocFailed = 0; sCheck.zPfx = 0; sCheck.v1 = 0; sCheck.v2 = 0; sCheck.aPgRef = 0; sCheck.heap = 0; sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); if( sCheck.nPage==0 ){ goto integrity_ck_cleanup; } sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); if( !sCheck.aPgRef ){ sCheck.mallocFailed = 1; goto integrity_ck_cleanup; } sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); if( sCheck.heap==0 ){ sCheck.mallocFailed = 1; goto integrity_ck_cleanup; } i = PENDING_BYTE_PAGE(pBt); if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); /* Check the integrity of the freelist */ sCheck.zPfx = "Main freelist: "; checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), get4byte(&pBt->pPage1->aData[36])); sCheck.zPfx = 0; /* Check all the tables. */ testcase( pBt->pBt->flags & SQLITE_CellSizeCk ); pBt->pBt->flags &= ~SQLITE_CellSizeCk; for(i=0; (int)iautoVacuum && aRoot[i]>1 ){ checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); } #endif checkTreePage(&sCheck, aRoot[i], ¬Used, LARGEST_INT64); } pBt->pBt->flags = savedDbFlags; /* Make sure every page in the file is referenced */ for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ #ifdef SQLITE_OMIT_AUTOVACUUM if( getPageReferenced(&sCheck, i)==0 ){ checkAppendMsg(&sCheck, "Page %d is never used", i); } #else /* If the database supports auto-vacuum, make sure no tables contain ** references to pointer-map pages. */ if( getPageReferenced(&sCheck, i)==0 && (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ checkAppendMsg(&sCheck, "Page %d is never used", i); } if( getPageReferenced(&sCheck, i)!=0 && (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); } #endif } /* Clean up and report errors. */ integrity_ck_cleanup: sqlite3PageFree(sCheck.heap); sqlite3_free(sCheck.aPgRef); if( sCheck.mallocFailed ){ sqlite3StrAccumReset(&sCheck.errMsg); sCheck.nErr++; } *pnErr = sCheck.nErr; if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg); /* Make sure this analysis did not leave any unref() pages. */ assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); sqlite3BtreeLeave(p); return sqlite3StrAccumFinish(&sCheck.errMsg); } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeFileFormat(Btree *p){ return p->pBt->file_format; } /* ** Return the full pathname of the underlying database file. Return ** an empty string if the database is in-memory or a TEMP database. ** ** The pager filename is invariant as long as the pager is ** open so it is safe to access without the BtShared mutex. */ SQLITE_API const char *SQLITE_STDCALL sqlite3BtreeGetFilename(Btree *p){ assert( p->pBt->pPager!=0 ); return sqlite3PagerFilename(p->pBt->pPager, 1); } /* ** Return the pathname of the journal file for this database. The return ** value of this routine is the same regardless of whether the journal file ** has been created or not. ** ** The pager journal filename is invariant as long as the pager is ** open so it is safe to access without the BtShared mutex. */ SQLITE_API const char *SQLITE_STDCALL sqlite3BtreeGetJournalname(Btree *p){ assert( p->pBt->pPager!=0 ); return sqlite3PagerJournalname(p->pBt->pPager); } /* ** Return non-zero if a transaction is active. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsInTrans(Btree *p){ assert( p==0 || sqlite3_mutex_held(p->db->mutex) ); return (p && (p->inTrans==TRANS_WRITE)); } #ifndef SQLITE_OMIT_WAL /* ** Run a checkpoint on the Btree passed as the first argument. ** ** Return SQLITE_LOCKED if this or any other connection has an open ** transaction on the shared-cache the argument Btree is connected to. ** ** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ int rc = SQLITE_OK; if( p ){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); if( pBt->inTransaction!=TRANS_NONE ){ rc = SQLITE_LOCKED; }else{ rc = sqlite3PagerCheckpoint(pBt->pPager, eMode, pnLog, pnCkpt); } sqlite3BtreeLeave(p); } return rc; } #endif /* ** Return non-zero if a read (or write) transaction is active. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsInReadTrans(Btree *p){ assert( p ); assert( sqlite3_mutex_held(p->db->mutex) ); return p->inTrans!=TRANS_NONE; } SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsInBackup(Btree *p){ assert( p ); assert( sqlite3_mutex_held(p->db->mutex) ); return p->nBackup!=0; } /* ** This function returns a pointer to a blob of memory associated with ** a single shared-btree. The memory is used by client code for its own ** purposes (for example, to store a high-level schema associated with ** the shared-btree). The btree layer manages reference counting issues. ** ** The first time this is called on a shared-btree, nBytes bytes of memory ** are allocated, zeroed, and returned to the caller. For each subsequent ** call the nBytes parameter is ignored and a pointer to the same blob ** of memory returned. ** ** If the nBytes parameter is 0 and the blob of memory has not yet been ** allocated, a null pointer is returned. If the blob has already been ** allocated, it is returned as normal. ** ** Just before the shared-btree is closed, the function passed as the ** xFree argument when the memory allocation was made is invoked on the ** blob of allocated memory. The xFree function should not call sqlite3_free() ** on the memory, the btree layer does that. */ SQLITE_API void *SQLITE_STDCALL sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); if( !pBt->pSchema && nBytes ){ pBt->pSchema = sqlite3DbMallocZero(0, nBytes); pBt->xFreeSchema = xFree; } sqlite3BtreeLeave(p); return pBt->pSchema; } /* ** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared ** btree as the argument handle holds an exclusive lock on the ** sqlite_master table. Otherwise SQLITE_OK. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSchemaLocked(Btree *p){ int rc; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); rc = querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK); assert( rc==SQLITE_OK || rc==SQLITE_LOCKED_SHAREDCACHE ); sqlite3BtreeLeave(p); return rc; } #ifndef SQLITE_OMIT_SHARED_CACHE /* ** Obtain a lock on the table whose root page is iTab. The ** lock is a write lock if isWritelock is true or a read lock ** if it is false. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ int rc = SQLITE_OK; assert( p->inTrans!=TRANS_NONE ); if( p->sharable ){ u8 lockType = READ_LOCK + isWriteLock; assert( READ_LOCK+1==WRITE_LOCK ); assert( isWriteLock==0 || isWriteLock==1 ); sqlite3BtreeEnter(p); rc = querySharedCacheTableLock(p, iTab, lockType); if( rc==SQLITE_OK ){ rc = setSharedCacheTableLock(p, iTab, lockType); } sqlite3BtreeLeave(p); } return rc; } #endif #ifndef SQLITE_OMIT_INCRBLOB /* ** Argument pCsr must be a cursor opened for writing on an ** INTKEY table currently pointing at a valid table entry. ** This function modifies the data stored as part of that entry. ** ** Only the data content may only be modified, it is not possible to ** change the length of the data stored. If this function is called with ** parameters that attempt to write past the end of the existing data, ** no modifications are made and SQLITE_CORRUPT is returned. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ int rc; assert( cursorHoldsMutex(pCsr) ); assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); assert( pCsr->curFlags & BTCF_Incrblob ); rc = restoreCursorPosition(pCsr); if( rc!=SQLITE_OK ){ return rc; } assert( pCsr->eState!=CURSOR_REQUIRESEEK ); if( pCsr->eState!=CURSOR_VALID ){ return SQLITE_ABORT; } /* Save the positions of all other cursors open on this table. This is ** required in case any of them are holding references to an xFetch ** version of the b-tree page modified by the accessPayload call below. ** ** Note that pCsr must be open on a INTKEY table and saveCursorPosition() ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence ** saveAllCursors can only return SQLITE_OK. */ VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); assert( rc==SQLITE_OK ); /* Check some assumptions: ** (a) the cursor is open for writing, ** (b) there is a read/write transaction open, ** (c) the connection holds a write-lock on the table (if required), ** (d) there are no conflicting read-locks, and ** (e) the cursor points at a valid row of an intKey table. */ if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){ return SQLITE_READONLY; } assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0 && pCsr->pBt->inTransaction==TRANS_WRITE ); assert( hasSharedCacheTableLock(pCsr->pBtree, pCsr->pgnoRoot, 0, 2) ); assert( !hasReadConflicts(pCsr->pBtree, pCsr->pgnoRoot) ); assert( pCsr->apPage[pCsr->iPage]->intKey ); return accessPayload(pCsr, offset, amt, (unsigned char *)z, 1); } /* ** Mark this cursor as an incremental blob cursor. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeIncrblobCursor(BtCursor *pCur){ pCur->curFlags |= BTCF_Incrblob; pCur->pBtree->hasIncrblobCur = 1; } #endif /* ** Set both the "read version" (single byte at byte offset 18) and ** "write version" (single byte at byte offset 19) fields in the database ** header to iVersion. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ BtShared *pBt = pBtree->pBt; int rc; /* Return code */ assert( iVersion==1 || iVersion==2 ); /* If setting the version fields to 1, do not automatically open the ** WAL connection, even if the version fields are currently set to 2. */ pBt->btsFlags &= ~BTS_NO_WAL; if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL; rc = sqlite3BtreeBeginTrans(pBtree, 0); if( rc==SQLITE_OK ){ u8 *aData = pBt->pPage1->aData; if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){ rc = sqlite3BtreeBeginTrans(pBtree, 2); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); if( rc==SQLITE_OK ){ aData[18] = (u8)iVersion; aData[19] = (u8)iVersion; } } } } pBt->btsFlags &= ~BTS_NO_WAL; return rc; } /* ** set the mask of hint flags for cursor pCsr. */ SQLITE_API void SQLITE_STDCALL sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){ assert( mask==BTREE_BULKLOAD || mask==BTREE_SEEK_EQ || mask==0 ); pCsr->hints = mask; } #ifdef SQLITE_DEBUG /* ** Return true if the cursor has a hint specified. This routine is ** only used from within assert() statements */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){ return (pCsr->hints & mask)!=0; } #endif /* ** Return true if the given Btree is read-only. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeIsReadonly(Btree *p){ return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; } /* ** Return the size of the header added to each page by this module. */ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } SQLITE_API const char *SQLITE_STDCALL sqlite3BtreeErrName(int rc){ const char *zName = 0; int i, origRc = rc; for(i=0; i<2 && zName==0; i++, rc &= 0xff){ switch( rc ){ case SQLITE_OK: zName = "SQLITE_OK"; break; case SQLITE_ERROR: zName = "SQLITE_ERROR"; break; case SQLITE_INTERNAL: zName = "SQLITE_INTERNAL"; break; case SQLITE_PERM: zName = "SQLITE_PERM"; break; case SQLITE_ABORT: zName = "SQLITE_ABORT"; break; case SQLITE_ABORT_ROLLBACK: zName = "SQLITE_ABORT_ROLLBACK"; break; case SQLITE_BUSY: zName = "SQLITE_BUSY"; break; case SQLITE_BUSY_RECOVERY: zName = "SQLITE_BUSY_RECOVERY"; break; case SQLITE_BUSY_SNAPSHOT: zName = "SQLITE_BUSY_SNAPSHOT"; break; case SQLITE_LOCKED: zName = "SQLITE_LOCKED"; break; case SQLITE_LOCKED_SHAREDCACHE: zName = "SQLITE_LOCKED_SHAREDCACHE";break; case SQLITE_NOMEM: zName = "SQLITE_NOMEM"; break; case SQLITE_READONLY: zName = "SQLITE_READONLY"; break; case SQLITE_READONLY_RECOVERY: zName = "SQLITE_READONLY_RECOVERY"; break; case SQLITE_READONLY_CANTLOCK: zName = "SQLITE_READONLY_CANTLOCK"; break; case SQLITE_READONLY_ROLLBACK: zName = "SQLITE_READONLY_ROLLBACK"; break; case SQLITE_READONLY_DBMOVED: zName = "SQLITE_READONLY_DBMOVED"; break; case SQLITE_INTERRUPT: zName = "SQLITE_INTERRUPT"; break; case SQLITE_IOERR: zName = "SQLITE_IOERR"; break; case SQLITE_IOERR_READ: zName = "SQLITE_IOERR_READ"; break; case SQLITE_IOERR_SHORT_READ: zName = "SQLITE_IOERR_SHORT_READ"; break; case SQLITE_IOERR_WRITE: zName = "SQLITE_IOERR_WRITE"; break; case SQLITE_IOERR_FSYNC: zName = "SQLITE_IOERR_FSYNC"; break; case SQLITE_IOERR_DIR_FSYNC: zName = "SQLITE_IOERR_DIR_FSYNC"; break; case SQLITE_IOERR_TRUNCATE: zName = "SQLITE_IOERR_TRUNCATE"; break; case SQLITE_IOERR_FSTAT: zName = "SQLITE_IOERR_FSTAT"; break; case SQLITE_IOERR_UNLOCK: zName = "SQLITE_IOERR_UNLOCK"; break; case SQLITE_IOERR_RDLOCK: zName = "SQLITE_IOERR_RDLOCK"; break; case SQLITE_IOERR_DELETE: zName = "SQLITE_IOERR_DELETE"; break; case SQLITE_IOERR_NOMEM: zName = "SQLITE_IOERR_NOMEM"; break; case SQLITE_IOERR_ACCESS: zName = "SQLITE_IOERR_ACCESS"; break; case SQLITE_IOERR_CHECKRESERVEDLOCK: zName = "SQLITE_IOERR_CHECKRESERVEDLOCK"; break; case SQLITE_IOERR_LOCK: zName = "SQLITE_IOERR_LOCK"; break; case SQLITE_IOERR_CLOSE: zName = "SQLITE_IOERR_CLOSE"; break; case SQLITE_IOERR_DIR_CLOSE: zName = "SQLITE_IOERR_DIR_CLOSE"; break; case SQLITE_IOERR_SHMOPEN: zName = "SQLITE_IOERR_SHMOPEN"; break; case SQLITE_IOERR_SHMSIZE: zName = "SQLITE_IOERR_SHMSIZE"; break; case SQLITE_IOERR_SHMLOCK: zName = "SQLITE_IOERR_SHMLOCK"; break; case SQLITE_IOERR_SHMMAP: zName = "SQLITE_IOERR_SHMMAP"; break; case SQLITE_IOERR_SEEK: zName = "SQLITE_IOERR_SEEK"; break; case SQLITE_IOERR_DELETE_NOENT: zName = "SQLITE_IOERR_DELETE_NOENT";break; case SQLITE_IOERR_MMAP: zName = "SQLITE_IOERR_MMAP"; break; case SQLITE_IOERR_GETTEMPPATH: zName = "SQLITE_IOERR_GETTEMPPATH"; break; case SQLITE_IOERR_CONVPATH: zName = "SQLITE_IOERR_CONVPATH"; break; case SQLITE_CORRUPT: zName = "SQLITE_CORRUPT"; break; case SQLITE_CORRUPT_VTAB: zName = "SQLITE_CORRUPT_VTAB"; break; case SQLITE_NOTFOUND: zName = "SQLITE_NOTFOUND"; break; case SQLITE_FULL: zName = "SQLITE_FULL"; break; case SQLITE_CANTOPEN: zName = "SQLITE_CANTOPEN"; break; case SQLITE_CANTOPEN_NOTEMPDIR: zName = "SQLITE_CANTOPEN_NOTEMPDIR";break; case SQLITE_CANTOPEN_ISDIR: zName = "SQLITE_CANTOPEN_ISDIR"; break; case SQLITE_CANTOPEN_FULLPATH: zName = "SQLITE_CANTOPEN_FULLPATH"; break; case SQLITE_CANTOPEN_CONVPATH: zName = "SQLITE_CANTOPEN_CONVPATH"; break; case SQLITE_PROTOCOL: zName = "SQLITE_PROTOCOL"; break; case SQLITE_EMPTY: zName = "SQLITE_EMPTY"; break; case SQLITE_SCHEMA: zName = "SQLITE_SCHEMA"; break; case SQLITE_TOOBIG: zName = "SQLITE_TOOBIG"; break; case SQLITE_CONSTRAINT: zName = "SQLITE_CONSTRAINT"; break; case SQLITE_CONSTRAINT_UNIQUE: zName = "SQLITE_CONSTRAINT_UNIQUE"; break; case SQLITE_CONSTRAINT_TRIGGER: zName = "SQLITE_CONSTRAINT_TRIGGER";break; case SQLITE_CONSTRAINT_FOREIGNKEY: zName = "SQLITE_CONSTRAINT_FOREIGNKEY"; break; case SQLITE_CONSTRAINT_CHECK: zName = "SQLITE_CONSTRAINT_CHECK"; break; case SQLITE_CONSTRAINT_PRIMARYKEY: zName = "SQLITE_CONSTRAINT_PRIMARYKEY";break; case SQLITE_CONSTRAINT_NOTNULL: zName = "SQLITE_CONSTRAINT_NOTNULL";break; case SQLITE_CONSTRAINT_COMMITHOOK: zName = "SQLITE_CONSTRAINT_COMMITHOOK";break; case SQLITE_CONSTRAINT_VTAB: zName = "SQLITE_CONSTRAINT_VTAB"; break; case SQLITE_CONSTRAINT_FUNCTION: zName = "SQLITE_CONSTRAINT_FUNCTION";break; case SQLITE_CONSTRAINT_ROWID: zName = "SQLITE_CONSTRAINT_ROWID"; break; case SQLITE_MISMATCH: zName = "SQLITE_MISMATCH"; break; case SQLITE_MISUSE: zName = "SQLITE_MISUSE"; break; case SQLITE_NOLFS: zName = "SQLITE_NOLFS"; break; case SQLITE_AUTH: zName = "SQLITE_AUTH"; break; case SQLITE_FORMAT: zName = "SQLITE_FORMAT"; break; case SQLITE_RANGE: zName = "SQLITE_RANGE"; break; case SQLITE_NOTADB: zName = "SQLITE_NOTADB"; break; case SQLITE_ROW: zName = "SQLITE_ROW"; break; case SQLITE_NOTICE: zName = "SQLITE_NOTICE"; break; case SQLITE_NOTICE_RECOVER_WAL: zName = "SQLITE_NOTICE_RECOVER_WAL";break; case SQLITE_NOTICE_RECOVER_ROLLBACK: zName = "SQLITE_NOTICE_RECOVER_ROLLBACK"; break; case SQLITE_WARNING: zName = "SQLITE_WARNING"; break; case SQLITE_WARNING_AUTOINDEX: zName = "SQLITE_WARNING_AUTOINDEX"; break; case SQLITE_DONE: zName = "SQLITE_DONE"; break; } } if( zName==0 ){ static char zBuf[50]; sqlite3_snprintf(sizeof(zBuf), zBuf, "SQLITE_UNKNOWN(%d)", origRc); zName = zBuf; } return zName; } /* ** The following functions: ** ** sqlite3BtreeSerialType() ** sqlite3BtreeSerialTypeLen() ** sqlite3BtreeSerialLen() ** sqlite3BtreeSerialPut() ** sqlite3BtreeSerialGet() ** ** encapsulate the code that serializes values for storage in SQLite ** data and index records. Each serialized value consists of a ** 'serial-type' and a blob of data. The serial type is an 8-byte unsigned ** integer, stored as a varint. ** ** In an SQLite index record, the serial type is stored directly before ** the blob of data that it corresponds to. In a table record, all serial ** types are stored at the start of the record, and the blobs of data at ** the end. Hence these functions allow the caller to handle the ** serial-type and data blob separately. ** ** The following table describes the various storage classes for data: ** ** serial type bytes of data type ** -------------- --------------- --------------- ** 0 0 NULL ** 1 1 signed integer ** 2 2 signed integer ** 3 3 signed integer ** 4 4 signed integer ** 5 6 signed integer ** 6 8 signed integer ** 7 8 IEEE float ** 8 0 Integer constant 0 ** 9 0 Integer constant 1 ** 10,11 reserved for expansion ** N>=12 and even (N-12)/2 BLOB ** N>=13 and odd (N-13)/2 text ** ** The 8 and 9 types were added in 3.3.0, file format 4. Prior versions ** of SQLite will not understand those serial types. */ /* ** Return the serial-type for the value stored in pMem. */ SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialType(Mem *pMem, int file_format){ int flags = pMem->flags; u32 n; if( flags&MEM_Null ){ return 0; } if( flags&MEM_Int ){ /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */ # define MAX_6BYTE ((((i64)0x00008000)<<32)-1) i64 i = pMem->u.i; u64 u; if( i<0 ){ u = ~i; }else{ u = i; } if( u<=127 ){ return ((i&1)==i && file_format>=4) ? 8+(u32)u : 1; } if( u<=32767 ) return 2; if( u<=8388607 ) return 3; if( u<=2147483647 ) return 4; if( u<=MAX_6BYTE ) return 5; return 6; } if( flags&MEM_Real ){ return 7; } assert( pMem->pBtree->mallocFailed || flags&(MEM_Str|MEM_Blob) ); assert( pMem->n>=0 ); n = (u32)pMem->n; if( flags & MEM_Zero ){ n += pMem->u.nZero; } return ((n*2) + 12 + ((flags&MEM_Str)!=0)); } /* ** The sizes for serial types less than 12 */ static const u8 sqlite3SmallTypeSizes[] = { 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 }; /* ** Return the length of the data corresponding to the supplied serial-type. */ SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialTypeLen(u32 serial_type){ if( serial_type>=12 ){ return (serial_type-12)/2; }else{ return sqlite3SmallTypeSizes[serial_type]; } } /* ** If we are on an architecture with mixed-endian floating ** points (ex: ARM7) then swap the lower 4 bytes with the ** upper 4 bytes. Return the result. ** ** For most architectures, this is a no-op. ** ** (later): It is reported to me that the mixed-endian problem ** on ARM7 is an issue with GCC, not with the ARM7 chip. It seems ** that early versions of GCC stored the two words of a 64-bit ** float in the wrong order. And that error has been propagated ** ever since. The blame is not necessarily with GCC, though. ** GCC might have just copying the problem from a prior compiler. ** I am also told that newer versions of GCC that follow a different ** ABI get the byte order right. ** ** Developers using SQLite on an ARM7 should compile and run their ** application using -DSQLITE_DEBUG=1 at least once. With DEBUG ** enabled, some asserts below will ensure that the byte order of ** floating point values is correct. ** ** (2007-08-30) Frank van Vugt has studied this problem closely ** and has send his findings to the SQLite developers. Frank ** writes that some Linux kernels offer floating point hardware ** emulation that uses only 32-bit mantissas instead of a full ** 48-bits as required by the IEEE standard. (This is the ** CONFIG_FPE_FASTFPE option.) On such systems, floating point ** byte swapping becomes very complicated. To avoid problems, ** the necessary byte swapping is carried out using a 64-bit integer ** rather than a 64-bit float. Frank assures us that the code here ** works for him. We, the developers, have no way to independently ** verify this, but Frank seems to know what he is talking about ** so we trust him. */ #ifdef SQLITE_MIXED_ENDIAN_64BIT_FLOAT static u64 floatSwap(u64 in){ union { u64 r; u32 i[2]; } u; u32 t; u.r = in; t = u.i[0]; u.i[0] = u.i[1]; u.i[1] = t; return u.r; } # define swapMixedEndianFloat(X) X = floatSwap(X) #else # define swapMixedEndianFloat(X) #endif /* ** Write the serialized data blob for the value stored in pMem into ** buf. It is assumed that the caller has allocated sufficient space. ** Return the number of bytes written. ** ** nBuf is the amount of space left in buf[]. The caller is responsible ** for allocating enough space to buf[] to hold the entire field, exclusive ** of the pMem->u.nZero bytes for a MEM_Zero value. ** ** Return the number of bytes actually written into buf[]. The number ** of bytes in the zero-filled tail is included in the return value only ** if those bytes were zeroed in buf[]. */ SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ u32 len; /* Integer and Real */ if( serial_type<=7 && serial_type>0 ){ u64 v; u32 i; if( serial_type==7 ){ assert( sizeof(v)==sizeof(pMem->u.r) ); memcpy(&v, &pMem->u.r, sizeof(v)); swapMixedEndianFloat(v); }else{ v = pMem->u.i; } len = i = sqlite3SmallTypeSizes[serial_type]; assert( i>0 ); do{ buf[--i] = (u8)(v&0xFF); v >>= 8; }while( i ); return len; } /* String or blob */ if( serial_type>=12 ){ assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0) == (int)sqlite3BtreeSerialTypeLen(serial_type) ); len = pMem->n; memcpy(buf, pMem->z, len); return len; } /* NULL or constants 0 or 1 */ return 0; } /* Input "x" is a sequence of unsigned characters that represent a ** big-endian integer. Return the equivalent native integer */ #define ONE_BYTE_INT(x) ((i8)(x)[0]) #define TWO_BYTE_INT(x) (256*(i8)((x)[0])|(x)[1]) #define THREE_BYTE_INT(x) (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2]) #define FOUR_BYTE_UINT(x) (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3]) #define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3]) /* ** Size of struct Mem not including the Mem.zMalloc member or anything that ** follows. */ #define MEMCELLSIZE offsetof(Mem,zMalloc) #define VdbeMemDynamic(X) \ (((X)->flags&MEM_Dyn)!=0) /* ** Initialize bulk memory to be a consistent Mem object. ** ** The minimum amount of initialization feasible is performed. */ SQLITE_PRIVATE void sqlite3VdbeMemInit(Mem *pMem, Btree *pBtree, u16 flags){ assert( (flags & ~MEM_TypeMask)==0 ); pMem->flags = flags; pMem->pBtree = pBtree; pMem->szMalloc = 0; } /* ** If pMem is an object with a valid string representation, this routine ** ensures the internal encoding for the string representation is ** 'desiredEnc', one of SQLITE_UTF8, SQLITE_UTF16LE or SQLITE_UTF16BE. ** ** If pMem is not a string object, or the encoding of the string ** representation is already stored using the requested encoding, then this ** routine is a no-op. ** ** SQLITE_OK is returned if the conversion is successful (or not required). ** SQLITE_NOMEM may be returned if a malloc() fails during conversion ** between formats. */ SQLITE_PRIVATE int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){ assert( (pMem->flags&MEM_RowSet)==0 ); assert( desiredEnc==SQLITE_UTF8 || desiredEnc==SQLITE_UTF16LE || desiredEnc==SQLITE_UTF16BE ); if( !(pMem->flags&MEM_Str) || pMem->enc==desiredEnc ){ return SQLITE_OK; } assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) ); return SQLITE_ERROR; } /* ** If the memory cell contains a value that must be freed by ** invoking the external callback in Mem.xDel, then this routine ** will free that value. It also sets Mem.flags to MEM_Null. ** ** This is a helper routine for sqlite3VdbeMemSetNull() and ** for sqlite3VdbeMemRelease(). Use those other routines as the ** entry point for releasing Mem resources. */ static SQLITE_NOINLINE void vdbeMemClearExternAndSetNull(Mem *p){ assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) ); assert( VdbeMemDynamic(p) ); if( p->flags&MEM_Dyn ){ assert( p->xDel!=SQLITE_DYNAMIC && p->xDel!=0 ); p->xDel((void *)p->z); } p->flags = MEM_Null; } /* ** Delete any previous value and set the value stored in *pMem to NULL. ** ** This routine calls the Mem.xDel destructor to dispose of values that ** require the destructor. But it preserves the Mem.zMalloc memory allocation. ** To free all resources, use sqlite3VdbeMemRelease(), which both calls this ** routine to invoke the destructor and deallocates Mem.zMalloc. ** ** Use this routine to reset the Mem prior to insert a new value. ** ** Use sqlite3VdbeMemRelease() to complete erase the Mem prior to abandoning it. */ SQLITE_PRIVATE void sqlite3VdbeMemSetNull(Mem *pMem){ if( VdbeMemDynamic(pMem) ){ vdbeMemClearExternAndSetNull(pMem); }else{ pMem->flags = MEM_Null; } } /* ** Make sure pMem->z points to a writable allocation of at least ** min(n,32) bytes. ** ** If the bPreserve argument is true, then copy of the content of ** pMem->z into the new allocation. pMem must be either a string or ** blob if bPreserve is true. If bPreserve is false, any prior content ** in pMem->z is discarded. */ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3VdbeMemGrow(Mem *pMem, int n, int bPreserve){ assert( sqlite3VdbeCheckMemInvariants(pMem) ); assert( (pMem->flags&MEM_RowSet)==0 ); /* If the bPreserve flag is set to true, then the memory cell must already ** contain a valid string or blob value. */ assert( bPreserve==0 || pMem->flags&(MEM_Blob|MEM_Str) ); testcase( bPreserve && pMem->z==0 ); assert( pMem->szMalloc==0 || pMem->szMalloc==sqlite3DbMallocSize(pMem->pBtree, pMem->zMalloc) ); if( pMem->szMallocszMalloc>0 && pMem->z==pMem->zMalloc ){ pMem->z = pMem->zMalloc = sqlite3DbReallocOrFree(pMem->pBtree, pMem->z, n); bPreserve = 0; }else{ if( pMem->szMalloc>0 ) sqlite3DbFree(pMem->pBtree, pMem->zMalloc); pMem->zMalloc = sqlite3DbMallocRaw(pMem->pBtree, n); } if( pMem->zMalloc==0 ){ sqlite3VdbeMemSetNull(pMem); pMem->z = 0; pMem->szMalloc = 0; return SQLITE_NOMEM; }else{ pMem->szMalloc = sqlite3DbMallocSize(pMem->pBtree, pMem->zMalloc); } } if( bPreserve && pMem->z && pMem->z!=pMem->zMalloc ){ memcpy(pMem->zMalloc, pMem->z, pMem->n); } if( (pMem->flags&MEM_Dyn)!=0 ){ assert( pMem->xDel!=0 && pMem->xDel!=SQLITE_DYNAMIC ); pMem->xDel((void *)(pMem->z)); } pMem->z = pMem->zMalloc; pMem->flags &= ~(MEM_Dyn|MEM_Ephem|MEM_Static); return SQLITE_OK; } /* ** Change the pMem->zMalloc allocation to be at least szNew bytes. ** If pMem->zMalloc already meets or exceeds the requested size, this ** routine is a no-op. ** ** Any prior string or blob content in the pMem object may be discarded. ** The pMem->xDel destructor is called, if it exists. Though MEM_Str ** and MEM_Blob values may be discarded, MEM_Int, MEM_Real, and MEM_Null ** values are preserved. ** ** Return SQLITE_OK on success or an error code (probably SQLITE_NOMEM) ** if unable to complete the resizing. */ SQLITE_PRIVATE int sqlite3VdbeMemClearAndResize(Mem *pMem, int szNew){ assert( szNew>0 ); assert( (pMem->flags & MEM_Dyn)==0 || pMem->szMalloc==0 ); if( pMem->szMallocflags & MEM_Dyn)==0 ); pMem->z = pMem->zMalloc; pMem->flags &= (MEM_Null|MEM_Int|MEM_Real); return SQLITE_OK; } #ifndef SQLITE_OMIT_INCRBLOB int sqlite3VdbeMemExpandBlob(Mem *); #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0) #else #define sqlite3VdbeMemExpandBlob(x) SQLITE_OK #define ExpandBlob(P) SQLITE_OK #endif /* ** Change pMem so that its MEM_Str or MEM_Blob value is stored in ** MEM.zMalloc, where it can be safely written. ** ** Return SQLITE_OK on success or SQLITE_NOMEM if malloc fails. */ SQLITE_PRIVATE int sqlite3VdbeMemMakeWriteable(Mem *pMem){ int f; assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); assert( (pMem->flags&MEM_RowSet)==0 ); ExpandBlob(pMem); f = pMem->flags; if( (f&(MEM_Str|MEM_Blob)) && (pMem->szMalloc==0 || pMem->z!=pMem->zMalloc) ){ if( sqlite3VdbeMemGrow(pMem, pMem->n + 2, 1) ){ return SQLITE_NOMEM; } pMem->z[pMem->n] = 0; pMem->z[pMem->n+1] = 0; pMem->flags |= MEM_Term; } pMem->flags &= ~MEM_Ephem; #ifdef SQLITE_DEBUG pMem->pScopyFrom = 0; #endif return SQLITE_OK; } /* ** It is already known that pMem contains an unterminated string. ** Add the zero terminator. */ static SQLITE_NOINLINE int vdbeMemAddTerminator(Mem *pMem){ if( sqlite3VdbeMemGrow(pMem, pMem->n+2, 1) ){ return SQLITE_NOMEM; } pMem->z[pMem->n] = 0; pMem->z[pMem->n+1] = 0; pMem->flags |= MEM_Term; return SQLITE_OK; } /* ** Make sure the given Mem is \u0000 terminated. */ SQLITE_PRIVATE int sqlite3VdbeMemNulTerminate(Mem *pMem){ assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); testcase( (pMem->flags & (MEM_Term|MEM_Str))==(MEM_Term|MEM_Str) ); testcase( (pMem->flags & (MEM_Term|MEM_Str))==0 ); if( (pMem->flags & (MEM_Term|MEM_Str))!=MEM_Str ){ return SQLITE_OK; /* Nothing to do */ }else{ return vdbeMemAddTerminator(pMem); } } /* ** Add MEM_Str to the set of representations for the given Mem. Numbers ** are converted using sqlite3_snprintf(). Converting a BLOB to a string ** is a no-op. ** ** Existing representations MEM_Int and MEM_Real are invalidated if ** bForce is true but are retained if bForce is false. ** ** A MEM_Null value will never be passed to this function. This function is ** used for converting values to text for returning to the user (i.e. via ** sqlite3_value_text()), or for ensuring that values to be used as btree ** keys are strings. In the former case a NULL pointer is returned the ** user and the latter is an internal programming error. */ SQLITE_PRIVATE int sqlite3VdbeMemStringify(Mem *pMem, u8 enc, u8 bForce){ int fg = pMem->flags; const int nByte = 32; assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); assert( !(fg&MEM_Zero) ); assert( !(fg&(MEM_Str|MEM_Blob)) ); assert( fg&(MEM_Int|MEM_Real) ); assert( (pMem->flags&MEM_RowSet)==0 ); assert( EIGHT_BYTE_ALIGNMENT(pMem) ); if( sqlite3VdbeMemClearAndResize(pMem, nByte) ){ return SQLITE_NOMEM; } /* For a Real or Integer, use sqlite3_snprintf() to produce the UTF-8 ** string representation of the value. Then, if the required encoding ** is UTF-16le or UTF-16be do a translation. ** ** FIX ME: It would be better if sqlite3_snprintf() could do UTF-16. */ if( fg & MEM_Int ){ sqlite3_snprintf(nByte, pMem->z, "%lld", pMem->u.i); }else{ assert( fg & MEM_Real ); sqlite3_snprintf(nByte, pMem->z, "%!.15g", pMem->u.r); } pMem->n = sqlite3Strlen30(pMem->z); pMem->enc = SQLITE_UTF8; pMem->flags |= MEM_Str|MEM_Term; if( bForce ) pMem->flags &= ~(MEM_Int|MEM_Real); sqlite3VdbeChangeEncoding(pMem, enc); return SQLITE_OK; } /* ** Release memory held by the Mem p, both external memory cleared ** by p->xDel and memory in p->zMalloc. ** ** This is a helper routine invoked by sqlite3VdbeMemRelease() in ** the unusual case where there really is memory in p that needs ** to be freed. */ static SQLITE_NOINLINE void vdbeMemClear(Mem *p){ if( VdbeMemDynamic(p) ){ vdbeMemClearExternAndSetNull(p); } if( p->szMalloc ){ sqlite3DbFree(p->pBtree, p->zMalloc); p->szMalloc = 0; } p->z = 0; } /* ** Release any memory resources held by the Mem. Both the memory that is ** free by Mem.xDel and the Mem.zMalloc allocation are freed. ** ** Use this routine prior to clean up prior to abandoning a Mem, or to ** reset a Mem back to its minimum memory utilization. ** ** Use sqlite3VdbeMemSetNull() to release just the Mem.xDel space ** prior to inserting new content into the Mem. */ SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p){ assert( sqlite3VdbeCheckMemInvariants(p) ); if( VdbeMemDynamic(p) || p->szMalloc ){ vdbeMemClear(p); } } /* ** Make an shallow copy of pFrom into pTo. Prior contents of ** pTo are freed. The pFrom->z field is not duplicated. If ** pFrom->z is used, then pTo->z points to the same thing as pFrom->z ** and flags gets srcType (either MEM_Ephem or MEM_Static). */ static void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType); static SQLITE_NOINLINE void vdbeClrCopy(Mem *pTo, const Mem *pFrom, int eType){ vdbeMemClearExternAndSetNull(pTo); assert( !VdbeMemDynamic(pTo) ); sqlite3VdbeMemShallowCopy(pTo, pFrom, eType); } static void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){ assert( (pFrom->flags & MEM_RowSet)==0 ); assert( pTo->pBtree==pFrom->pBtree ); if( VdbeMemDynamic(pTo) ){ vdbeClrCopy(pTo,pFrom,srcType); return; } memcpy(pTo, pFrom, MEMCELLSIZE); if( (pFrom->flags&MEM_Static)==0 ){ pTo->flags &= ~(MEM_Dyn|MEM_Static|MEM_Ephem); assert( srcType==MEM_Ephem || srcType==MEM_Static ); pTo->flags |= srcType; } } /* ** Deserialize the data blob pointed to by buf as serial type serial_type ** and store the result in pMem. Return the number of bytes read. ** ** This function is implemented as two separate routines for performance. ** The few cases that require local variables are broken out into a separate ** routine so that in most cases the overhead of moving the stack pointer ** is avoided. */ static u32 SQLITE_NOINLINE serialGet( const unsigned char *buf, /* Buffer to deserialize from */ u32 serial_type, /* Serial type to deserialize */ Mem *pMem /* Memory cell to write value into */ ){ u64 x = FOUR_BYTE_UINT(buf); u32 y = FOUR_BYTE_UINT(buf+4); x = (x<<32) + y; if( serial_type==6 ){ /* EVIDENCE-OF: R-29851-52272 Value is a big-endian 64-bit ** twos-complement integer. */ pMem->u.i = *(i64*)&x; pMem->flags = MEM_Int; testcase( pMem->u.i<0 ); }else{ /* EVIDENCE-OF: R-57343-49114 Value is a big-endian IEEE 754-2008 64-bit ** floating point number. */ #if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT) /* Verify that integers and floating point values use the same ** byte order. Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is ** defined that 64-bit floating point values really are mixed ** endian. */ static const u64 t1 = ((u64)0x3ff00000)<<32; static const double r1 = 1.0; u64 t2 = t1; swapMixedEndianFloat(t2); assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 ); #endif assert( sizeof(x)==8 && sizeof(pMem->u.r)==8 ); swapMixedEndianFloat(x); memcpy(&pMem->u.r, &x, sizeof(x)); pMem->flags = sqlite3IsNaN(pMem->u.r) ? MEM_Null : MEM_Real; } return 8; } SQLITE_API u32 SQLITE_STDCALL sqlite3BtreeSerialGet( const unsigned char *buf, /* Buffer to deserialize from */ u32 serial_type, /* Serial type to deserialize */ Mem *pMem /* Memory cell to write value into */ ){ switch( serial_type ){ case 10: /* Reserved for future use */ case 11: /* Reserved for future use */ case 0: { /* Null */ /* EVIDENCE-OF: R-24078-09375 Value is a NULL. */ pMem->flags = MEM_Null; break; } case 1: { /* EVIDENCE-OF: R-44885-25196 Value is an 8-bit twos-complement ** integer. */ pMem->u.i = ONE_BYTE_INT(buf); pMem->flags = MEM_Int; testcase( pMem->u.i<0 ); return 1; } case 2: { /* 2-byte signed integer */ /* EVIDENCE-OF: R-49794-35026 Value is a big-endian 16-bit ** twos-complement integer. */ pMem->u.i = TWO_BYTE_INT(buf); pMem->flags = MEM_Int; testcase( pMem->u.i<0 ); return 2; } case 3: { /* 3-byte signed integer */ /* EVIDENCE-OF: R-37839-54301 Value is a big-endian 24-bit ** twos-complement integer. */ pMem->u.i = THREE_BYTE_INT(buf); pMem->flags = MEM_Int; testcase( pMem->u.i<0 ); return 3; } case 4: { /* 4-byte signed integer */ /* EVIDENCE-OF: R-01849-26079 Value is a big-endian 32-bit ** twos-complement integer. */ pMem->u.i = FOUR_BYTE_INT(buf); pMem->flags = MEM_Int; testcase( pMem->u.i<0 ); return 4; } case 5: { /* 6-byte signed integer */ /* EVIDENCE-OF: R-50385-09674 Value is a big-endian 48-bit ** twos-complement integer. */ pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf); pMem->flags = MEM_Int; testcase( pMem->u.i<0 ); return 6; } case 6: /* 8-byte signed integer */ case 7: { /* IEEE floating point */ /* These use local variables, so do them in a separate routine ** to avoid having to move the frame pointer in the common case */ return serialGet(buf,serial_type,pMem); } case 8: /* Integer 0 */ case 9: { /* Integer 1 */ /* EVIDENCE-OF: R-12976-22893 Value is the integer 0. */ /* EVIDENCE-OF: R-18143-12121 Value is the integer 1. */ pMem->u.i = serial_type-8; pMem->flags = MEM_Int; return 0; } default: { /* EVIDENCE-OF: R-14606-31564 Value is a BLOB that is (N-12)/2 bytes in ** length. ** EVIDENCE-OF: R-28401-00140 Value is a string in the text encoding and ** (N-13)/2 bytes in length. */ static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem }; pMem->z = (char *)buf; pMem->n = (serial_type-12)/2; pMem->flags = aFlag[serial_type&1]; return pMem->n; } } return 0; } /* ** This routine is used to allocate sufficient space for an UnpackedRecord ** structure large enough to be used with sqlite3VdbeRecordUnpack() if ** the first argument is a pointer to KeyInfo structure pKeyInfo. ** ** The space is either allocated using sqlite3DbMallocRaw() or from within ** the unaligned buffer passed via the second and third arguments (presumably ** stack space). If the former, then *ppFree is set to a pointer that should ** be eventually freed by the caller using sqlite3DbFree(). Or, if the ** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL ** before returning. ** ** If an OOM error occurs, NULL is returned. */ SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord( KeyInfo *pKeyInfo, /* Description of the record */ char *pSpace, /* Unaligned space available */ int szSpace, /* Size of pSpace[] in bytes */ char **ppFree /* OUT: Caller should free this pointer */ ){ UnpackedRecord *p; /* Unpacked record to return */ int nOff; /* Increment pSpace by nOff to align it */ int nByte; /* Number of bytes required for *p */ /* We want to shift the pointer pSpace up such that it is 8-byte aligned. ** Thus, we need to calculate a value, nOff, between 0 and 7, to shift ** it by. If pSpace is already 8-byte aligned, nOff should be zero. */ nOff = (8 - (SQLITE_PTR_TO_INT(pSpace) & 7)) & 7; nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nField+1); if( nByte>szSpace+nOff ){ p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->pBtree, nByte); *ppFree = (char *)p; if( !p ) return 0; }else{ p = (UnpackedRecord*)&pSpace[nOff]; *ppFree = 0; } p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))]; assert( pKeyInfo->aSortOrder!=0 ); p->pKeyInfo = pKeyInfo; p->nField = pKeyInfo->nField + 1; return p; } /* ** Given the nKey-byte encoding of a record in pKey[], populate the ** UnpackedRecord structure indicated by the fourth argument with the ** contents of the decoded record. */ SQLITE_PRIVATE void sqlite3VdbeRecordUnpack( KeyInfo *pKeyInfo, /* Information about the record format */ int nKey, /* Size of the binary record */ const void *pKey, /* The binary record */ UnpackedRecord *p /* Populate this structure before returning. */ ){ const unsigned char *aKey = (const unsigned char *)pKey; int d; u32 idx; /* Offset in aKey[] to read from */ u16 u; /* Unsigned loop counter */ u32 szHdr; Mem *pMem = p->aMem; p->default_rc = 0; assert( EIGHT_BYTE_ALIGNMENT(pMem) ); idx = getVarint32(aKey, szHdr); d = szHdr; u = 0; while( idxenc = pKeyInfo->enc; pMem->pBtree = pKeyInfo->pBtree; /* pMem->flags = 0; // sqlite3BtreeSerialGet() will set this for us */ pMem->szMalloc = 0; d += sqlite3BtreeSerialGet(&aKey[d], serial_type, pMem); pMem++; if( (++u)>=p->nField ) break; } assert( u<=pKeyInfo->nField + 1 ); p->nField = u; } #if SQLITE_DEBUG /* ** Count the number of fields (a.k.a. columns) in the record given by ** pKey,nKey. The verify that this count is less than or equal to the ** limit given by pKeyInfo->nField + pKeyInfo->nXField. ** ** If this constraint is not satisfied, it means that the high-speed ** vdbeRecordCompareInt() and vdbeRecordCompareString() routines will ** not work correctly. If this assert() ever fires, it probably means ** that the KeyInfo.nField or KeyInfo.nXField values were computed ** incorrectly. */ static void vdbeAssertFieldCountWithinLimits( int nKey, const void *pKey, /* The record to verify */ const KeyInfo *pKeyInfo /* Compare size with this KeyInfo */ ){ int nField = 0; u32 szHdr; u32 idx; u32 notUsed; const unsigned char *aKey = (const unsigned char*)pKey; if( CORRUPT_DB ) return; idx = getVarint32(aKey, szHdr); assert( nKey>=0 ); assert( szHdr<=(u32)nKey ); while( idxnField+pKeyInfo->nXField ); } #else # define vdbeAssertFieldCountWithinLimits(A,B,C) #endif /* ** This function is an optimized version of sqlite3BtreeRecordCompare() ** that (a) the first field of pPKey2 is an integer, and (b) the ** size-of-header varint at the start of (pKey1/nKey1) fits in a single ** byte (i.e. is less than 128). ** ** To avoid concerns about buffer overreads, this routine is only used ** on schemas where the maximum valid header size is 63 bytes or less. */ static int vdbeRecordCompareInt( int nKey1, const void *pKey1, /* Left key */ UnpackedRecord *pPKey2 /* Right key */ ){ const u8 *aKey = &((const u8*)pKey1)[*(const u8*)pKey1 & 0x3F]; int serial_type = ((const u8*)pKey1)[1]; int res; u32 y; u64 x; i64 v = pPKey2->aMem[0].u.i; i64 lhs; vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo); assert( (*(u8*)pKey1)<=0x3F || CORRUPT_DB ); switch( serial_type ){ case 1: { /* 1-byte signed integer */ lhs = ONE_BYTE_INT(aKey); testcase( lhs<0 ); break; } case 2: { /* 2-byte signed integer */ lhs = TWO_BYTE_INT(aKey); testcase( lhs<0 ); break; } case 3: { /* 3-byte signed integer */ lhs = THREE_BYTE_INT(aKey); testcase( lhs<0 ); break; } case 4: { /* 4-byte signed integer */ y = FOUR_BYTE_UINT(aKey); lhs = (i64)*(int*)&y; testcase( lhs<0 ); break; } case 5: { /* 6-byte signed integer */ lhs = FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey); testcase( lhs<0 ); break; } case 6: { /* 8-byte signed integer */ x = FOUR_BYTE_UINT(aKey); x = (x<<32) | FOUR_BYTE_UINT(aKey+4); lhs = *(i64*)&x; testcase( lhs<0 ); break; } case 8: lhs = 0; break; case 9: lhs = 1; break; /* This case could be removed without changing the results of running ** this code. Including it causes gcc to generate a faster switch ** statement (since the range of switch targets now starts at zero and ** is contiguous) but does not cause any duplicate code to be generated ** (as gcc is clever enough to combine the two like cases). Other ** compilers might be similar. */ case 0: case 7: return sqlite3BtreeRecordCompare(nKey1, pKey1, pPKey2); default: return sqlite3BtreeRecordCompare(nKey1, pKey1, pPKey2); } if( v>lhs ){ res = pPKey2->r1; }else if( vr2; }else if( pPKey2->nField>1 ){ /* The first fields of the two keys are equal. Compare the trailing ** fields. */ res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); }else{ /* The first fields of the two keys are equal and there are no trailing ** fields. Return pPKey2->default_rc in this case. */ res = pPKey2->default_rc; } assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) ); return res; } /* ** This function is an optimized version of sqlite3BtreeRecordCompare() ** that (a) the first field of pPKey2 is a string, that (b) the first field ** uses the collation sequence BINARY and (c) that the size-of-header varint ** at the start of (pKey1/nKey1) fits in a single byte. */ static int vdbeRecordCompareString( int nKey1, const void *pKey1, /* Left key */ UnpackedRecord *pPKey2 /* Right key */ ){ const u8 *aKey1 = (const u8*)pKey1; int serial_type; int res; vdbeAssertFieldCountWithinLimits(nKey1, pKey1, pPKey2->pKeyInfo); getVarint32(&aKey1[1], serial_type); if( serial_type<12 ){ res = pPKey2->r1; /* (pKey1/nKey1) is a number or a null */ }else if( !(serial_type & 0x01) ){ res = pPKey2->r2; /* (pKey1/nKey1) is a blob */ }else{ int nCmp; int nStr; int szHdr = aKey1[0]; nStr = (serial_type-12) / 2; if( (szHdr + nStr) > nKey1 ){ pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; return 0; /* Corruption */ } nCmp = MIN( pPKey2->aMem[0].n, nStr ); res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp); if( res==0 ){ res = nStr - pPKey2->aMem[0].n; if( res==0 ){ if( pPKey2->nField>1 ){ res = sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 1); }else{ res = pPKey2->default_rc; } }else if( res>0 ){ res = pPKey2->r2; }else{ res = pPKey2->r1; } }else if( res>0 ){ res = pPKey2->r2; }else{ res = pPKey2->r1; } } assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) || CORRUPT_DB || pPKey2->pKeyInfo->pBtree->mallocFailed ); return res; } /* ** If the given Mem* has a zero-filled tail, turn it into an ordinary ** blob stored in dynamically allocated space. */ #ifndef SQLITE_OMIT_INCRBLOB SQLITE_PRIVATE int sqlite3VdbeMemExpandBlob(Mem *pMem){ if( pMem->flags & MEM_Zero ){ int nByte; assert( pMem->flags&MEM_Blob ); assert( (pMem->flags&MEM_RowSet)==0 ); assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); /* Set nByte to the number of bytes required to store the expanded blob. */ nByte = pMem->n + pMem->u.nZero; if( nByte<=0 ){ nByte = 1; } if( sqlite3VdbeMemGrow(pMem, nByte, 1) ){ return SQLITE_NOMEM; } memset(&pMem->z[pMem->n], 0, pMem->u.nZero); pMem->n += pMem->u.nZero; pMem->flags &= ~MEM_Zero; } return SQLITE_OK; } #endif /* ** Convert a 64-bit IEEE double into a 64-bit signed integer. ** If the double is out of range of a 64-bit signed integer then ** return the closest available 64-bit signed integer. */ static i64 doubleToInt64(double r){ #ifdef SQLITE_OMIT_FLOATING_POINT /* When floating-point is omitted, double and int64 are the same thing */ return r; #else /* ** Many compilers we encounter do not define constants for the ** minimum and maximum 64-bit integers, or they define them ** inconsistently. And many do not understand the "LL" notation. ** So we define our own static constants here using nothing ** larger than a 32-bit integer constant. */ static const i64 maxInt = LARGEST_INT64; static const i64 minInt = SMALLEST_INT64; if( r<=(double)minInt ){ return minInt; }else if( r>=(double)maxInt ){ return maxInt; }else{ return (i64)r; } #endif } /* ** Return some kind of integer value which is the best we can do ** at representing the value that *pMem describes as an integer. ** If pMem is an integer, then the value is exact. If pMem is ** a floating-point then the value returned is the integer part. ** If pMem is a string or blob, then we make an attempt to convert ** it into an integer and return that. If pMem represents an ** an SQL-NULL value, return 0. ** ** If pMem represents a string value, its encoding might be changed. */ SQLITE_PRIVATE i64 sqlite3VdbeIntValue(Mem *pMem){ int flags; assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); assert( EIGHT_BYTE_ALIGNMENT(pMem) ); flags = pMem->flags; if( flags & MEM_Int ){ return pMem->u.i; }else if( flags & MEM_Real ){ return doubleToInt64(pMem->u.r); }else if( flags & (MEM_Str|MEM_Blob) ){ i64 value = 0; assert( pMem->z || pMem->n==0 ); sqlite3Atoi64(pMem->z, &value, pMem->n, pMem->enc); return value; }else{ return 0; } } /* ** Return the best representation of pMem that we can get into a ** double. If pMem is already a double or an integer, return its ** value. If it is a string or blob, try to convert it to a double. ** If it is a NULL, return 0.0. */ SQLITE_PRIVATE double sqlite3VdbeRealValue(Mem *pMem){ assert( pMem->pBtree==0 || sqlite3_mutex_held(pMem->pBtree->mutex) ); assert( EIGHT_BYTE_ALIGNMENT(pMem) ); if( pMem->flags & MEM_Real ){ return pMem->u.r; }else if( pMem->flags & MEM_Int ){ return (double)pMem->u.i; }else if( pMem->flags & (MEM_Str|MEM_Blob) ){ /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ double val = (double)0; sqlite3AtoF(pMem->z, &val, pMem->n, pMem->enc); return val; }else{ /* (double)0 In case of SQLITE_OMIT_FLOATING_POINT... */ return (double)0; } } /* ** The pVal argument is known to be a value other than NULL. ** Convert it into a string with encoding enc and return a pointer ** to a zero-terminated version of that string. */ static SQLITE_NOINLINE const void *valueToText(Mem* pVal, u8 enc){ assert( pVal!=0 ); assert( pVal->pBtree==0 || sqlite3_mutex_held(pVal->pBtree->mutex) ); assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); assert( (pVal->flags & MEM_RowSet)==0 ); assert( (pVal->flags & (MEM_Null))==0 ); if( pVal->flags & (MEM_Blob|MEM_Str) ){ pVal->flags |= MEM_Str; if( pVal->flags & MEM_Zero ){ sqlite3VdbeMemExpandBlob(pVal); } if( pVal->enc != (enc & ~SQLITE_UTF16_ALIGNED) ){ sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED); } if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){ assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 ); if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){ return 0; } } sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */ }else{ sqlite3VdbeMemStringify(pVal, enc, 0); assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) ); } assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->pBtree==0 || pVal->pBtree->mallocFailed ); if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){ return pVal->z; }else{ return 0; } } /* This function is only available internally, it is not part of the ** external API. It works in a similar way to sqlite3_value_text(), ** except the data returned is in the encoding specified by the second ** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or ** SQLITE_UTF8. ** ** (2006-02-16:) The enc value can be or-ed with SQLITE_UTF16_ALIGNED. ** If that is the case, then the result must be aligned on an even byte ** boundary. */ SQLITE_PRIVATE const void *sqlite3ValueText(Mem* pVal, u8 enc){ if( !pVal ) return 0; assert( pVal->pBtree==0 || sqlite3_mutex_held(pVal->pBtree->mutex) ); assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); assert( (pVal->flags & MEM_RowSet)==0 ); if( (pVal->flags&MEM_Str)==MEM_Str && pVal->enc==enc ){ return pVal->z; } if( pVal->flags&MEM_Null ){ return 0; } return valueToText(pVal, enc); } /* ** Both *pMem1 and *pMem2 contain string values. Compare the two values ** using the collation sequence pColl. As usual, return a negative , zero ** or positive value if *pMem1 is less than, equal to or greater than ** *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);". */ static int vdbeCompareMemString( const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl, u8 *prcErr /* If an OOM occurs, set to SQLITE_NOMEM */ ){ if( pMem1->enc==pColl->enc ){ /* The strings are already in the correct encoding. Call the ** comparison function directly */ return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z); }else{ int rc; const void *v1, *v2; int n1, n2; Mem c1; Mem c2; sqlite3VdbeMemInit(&c1, pMem1->pBtree, MEM_Null); sqlite3VdbeMemInit(&c2, pMem1->pBtree, MEM_Null); sqlite3VdbeMemShallowCopy(&c1, pMem1, MEM_Ephem); sqlite3VdbeMemShallowCopy(&c2, pMem2, MEM_Ephem); v1 = sqlite3ValueText(&c1, pColl->enc); n1 = v1==0 ? 0 : c1.n; v2 = sqlite3ValueText(&c2, pColl->enc); n2 = v2==0 ? 0 : c2.n; rc = pColl->xCmp(pColl->pUser, n1, v1, n2, v2); sqlite3VdbeMemRelease(&c1); sqlite3VdbeMemRelease(&c2); if( (v1==0 || v2==0) && prcErr ) *prcErr = SQLITE_NOMEM; return rc; } } /* ** The first argument passed to this function is a serial-type that ** corresponds to an integer - all values between 1 and 9 inclusive ** except 7. The second points to a buffer containing an integer value ** serialized according to serial_type. This function deserializes ** and returns the value. */ static i64 vdbeRecordDecodeInt(u32 serial_type, const u8 *aKey){ u32 y; assert( CORRUPT_DB || (serial_type>=1 && serial_type<=9 && serial_type!=7) ); switch( serial_type ){ case 0: case 1: testcase( aKey[0]&0x80 ); return ONE_BYTE_INT(aKey); case 2: testcase( aKey[0]&0x80 ); return TWO_BYTE_INT(aKey); case 3: testcase( aKey[0]&0x80 ); return THREE_BYTE_INT(aKey); case 4: { testcase( aKey[0]&0x80 ); y = FOUR_BYTE_UINT(aKey); return (i64)*(int*)&y; } case 5: { testcase( aKey[0]&0x80 ); return FOUR_BYTE_UINT(aKey+2) + (((i64)1)<<32)*TWO_BYTE_INT(aKey); } case 6: { u64 x = FOUR_BYTE_UINT(aKey); testcase( aKey[0]&0x80 ); x = (x<<32) | FOUR_BYTE_UINT(aKey+4); return (i64)*(i64*)&x; } } return (serial_type - 8); } /* ** This function compares the two table rows or index records ** specified by {nKey1, pKey1} and pPKey2. It returns a negative, zero ** or positive integer if key1 is less than, equal to or ** greater than key2. The {nKey1, pKey1} key must be a blob ** created by the OP_MakeRecord opcode of the VDBE. The pPKey2 ** key must be a parsed key such as obtained from ** sqlite3VdbeParseRecord. ** ** If argument bSkip is non-zero, it is assumed that the caller has already ** determined that the first fields of the keys are equal. ** ** Key1 and Key2 do not have to contain the same number of fields. If all ** fields that appear in both keys are equal, then pPKey2->default_rc is ** returned. ** ** If database corruption is discovered, set pPKey2->errCode to ** SQLITE_CORRUPT and return 0. If an OOM error is encountered, ** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the ** malloc-failed flag set on database handle (pPKey2->pKeyInfo->pBtree). */ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip( int nKey1, const void *pKey1, /* Left key */ UnpackedRecord *pPKey2, /* Right key */ int bSkip /* If true, skip the first field */ ){ u32 d1; /* Offset into aKey[] of next data element */ int i; /* Index of next field to compare */ u32 szHdr1; /* Size of record header in bytes */ u32 idx1; /* Offset of first type in header */ int rc = 0; /* Return value */ Mem *pRhs = pPKey2->aMem; /* Next field of pPKey2 to compare */ KeyInfo *pKeyInfo = pPKey2->pKeyInfo; const unsigned char *aKey1 = (const unsigned char *)pKey1; Mem mem1; /* If bSkip is true, then the caller has already determined that the first ** two elements in the keys are equal. Fix the various stack variables so ** that this routine begins comparing at the second field. */ if( bSkip ){ u32 s1; idx1 = 1 + getVarint32(&aKey1[1], s1); szHdr1 = aKey1[0]; d1 = szHdr1 + sqlite3BtreeSerialTypeLen(s1); i = 1; pRhs++; }else{ idx1 = getVarint32(aKey1, szHdr1); d1 = szHdr1; if( d1>(unsigned)nKey1 ){ pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; return 0; /* Corruption */ } i = 0; } VVA_ONLY( mem1.szMalloc = 0; ) /* Only needed by assert() statements */ assert( pPKey2->pKeyInfo->nField+pPKey2->pKeyInfo->nXField>=pPKey2->nField || CORRUPT_DB ); assert( pPKey2->pKeyInfo->aSortOrder!=0 ); assert( pPKey2->pKeyInfo->nField>0 ); assert( idx1<=szHdr1 || CORRUPT_DB ); do{ u32 serial_type; /* RHS is an integer */ if( pRhs->flags & MEM_Int ){ serial_type = aKey1[idx1]; testcase( serial_type==12 ); if( serial_type>=10 ){ rc = +1; }else if( serial_type==0 ){ rc = -1; }else if( serial_type==7 ){ double rhs = (double)pRhs->u.i; sqlite3BtreeSerialGet(&aKey1[d1], serial_type, &mem1); if( mem1.u.rrhs ){ rc = +1; } }else{ i64 lhs = vdbeRecordDecodeInt(serial_type, &aKey1[d1]); i64 rhs = pRhs->u.i; if( lhsrhs ){ rc = +1; } } } /* RHS is real */ else if( pRhs->flags & MEM_Real ){ serial_type = aKey1[idx1]; if( serial_type>=10 ){ /* Serial types 12 or greater are strings and blobs (greater than ** numbers). Types 10 and 11 are currently "reserved for future ** use", so it doesn't really matter what the results of comparing ** them to numberic values are. */ rc = +1; }else if( serial_type==0 ){ rc = -1; }else{ double rhs = pRhs->u.r; double lhs; sqlite3BtreeSerialGet(&aKey1[d1], serial_type, &mem1); if( serial_type==7 ){ lhs = mem1.u.r; }else{ lhs = (double)mem1.u.i; } if( lhsrhs ){ rc = +1; } } } /* RHS is a string */ else if( pRhs->flags & MEM_Str ){ getVarint32(&aKey1[idx1], serial_type); testcase( serial_type==12 ); if( serial_type<12 ){ rc = -1; }else if( !(serial_type & 0x01) ){ rc = +1; }else{ mem1.n = (serial_type - 12) / 2; testcase( (d1+mem1.n)==(unsigned)nKey1 ); testcase( (d1+mem1.n+1)==(unsigned)nKey1 ); if( (d1+mem1.n) > (unsigned)nKey1 ){ pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; return 0; /* Corruption */ }else if( pKeyInfo->aColl[i] ){ mem1.enc = pKeyInfo->enc; mem1.pBtree = pKeyInfo->pBtree; mem1.flags = MEM_Str; mem1.z = (char*)&aKey1[d1]; rc = vdbeCompareMemString( &mem1, pRhs, pKeyInfo->aColl[i], &pPKey2->errCode ); }else{ int nCmp = MIN(mem1.n, pRhs->n); rc = memcmp(&aKey1[d1], pRhs->z, nCmp); if( rc==0 ) rc = mem1.n - pRhs->n; } } } /* RHS is a blob */ else if( pRhs->flags & MEM_Blob ){ getVarint32(&aKey1[idx1], serial_type); testcase( serial_type==12 ); if( serial_type<12 || (serial_type & 0x01) ){ rc = -1; }else{ int nStr = (serial_type - 12) / 2; testcase( (d1+nStr)==(unsigned)nKey1 ); testcase( (d1+nStr+1)==(unsigned)nKey1 ); if( (d1+nStr) > (unsigned)nKey1 ){ pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT; return 0; /* Corruption */ }else{ int nCmp = MIN(nStr, pRhs->n); rc = memcmp(&aKey1[d1], pRhs->z, nCmp); if( rc==0 ) rc = nStr - pRhs->n; } } } /* RHS is null */ else{ serial_type = aKey1[idx1]; rc = (serial_type!=0); } if( rc!=0 ){ if( pKeyInfo->aSortOrder[i] ){ rc = -rc; } assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) ); assert( mem1.szMalloc==0 ); /* See comment below */ return rc; } i++; pRhs++; d1 += sqlite3BtreeSerialTypeLen(serial_type); idx1 += sqlite3BtreeVarintLen(serial_type); }while( idx1<(unsigned)szHdr1 && inField && d1<=(unsigned)nKey1 ); /* No memory allocation is ever used on mem1. Prove this using ** the following assert(). If the assert() fails, it indicates a ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1). */ assert( mem1.szMalloc==0 ); /* rc==0 here means that one or both of the keys ran out of fields and ** all the fields up to that point were equal. Return the default_rc ** value. */ assert( CORRUPT_DB || vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) || pKeyInfo->pBtree->mallocFailed ); return pPKey2->default_rc; } SQLITE_API int SQLITE_STDCALL sqlite3BtreeRecordCompare( int nKey1, const void *pKey1, /* Left key */ UnpackedRecord *pPKey2 /* Right key */ ){ return sqlite3VdbeRecordCompareWithSkip(nKey1, pKey1, pPKey2, 0); } /* ** Return a pointer to an sqlite3BtreeRecordCompare() compatible function ** suitable for comparing serialized records to the unpacked record passed ** as the only argument. */ SQLITE_PRIVATE RecordCompare sqlite3VdbeFindCompare(UnpackedRecord *p){ /* varintRecordCompareInt() and varintRecordCompareString() both assume ** that the size-of-header varint that occurs at the start of each record ** fits in a single byte (i.e. is 127 or less). varintRecordCompareInt() ** also assumes that it is safe to overread a buffer by at least the ** maximum possible legal header size plus 8 bytes. Because there is ** guaranteed to be at least 74 (but not 136) bytes of padding following each ** buffer passed to varintRecordCompareInt() this makes it convenient to ** limit the size of the header to 64 bytes in cases where the first field ** is an integer. ** ** The easiest way to enforce this limit is to consider only records with ** 13 fields or less. If the first field is an integer, the maximum legal ** header size is (12*5 + 1 + 1) bytes. */ if( (p->pKeyInfo->nField + p->pKeyInfo->nXField)<=13 ){ int flags = p->aMem[0].flags; if( p->pKeyInfo->aSortOrder[0] ){ p->r1 = 1; p->r2 = -1; }else{ p->r1 = -1; p->r2 = 1; } if( (flags & MEM_Int) ){ return vdbeRecordCompareInt; } testcase( flags & MEM_Real ); testcase( flags & MEM_Null ); testcase( flags & MEM_Blob ); if( (flags & (MEM_Real|MEM_Null|MEM_Blob))==0 && p->pKeyInfo->aColl[0]==0 ){ assert( flags & MEM_Str ); return vdbeRecordCompareString; } } return sqlite3BtreeRecordCompare; } /* ** Move data out of a btree key or data field and into a Mem structure. ** The data or key is taken from the entry that pCur is currently pointing ** to. offset and amt determine what portion of the data or key to retrieve. ** key is true to get the key or false to get data. The result is written ** into the pMem element. ** ** The pMem object must have been initialized. This routine will use ** pMem->zMalloc to hold the content from the btree, if possible. New ** pMem->zMalloc space will be allocated if necessary. The calling routine ** is responsible for making sure that the pMem object is eventually ** destroyed. ** ** If this routine fails for any reason (malloc returns NULL or unable ** to read from the disk) then the pMem is left in an inconsistent state. */ static SQLITE_NOINLINE int vdbeMemFromBtreeResize( BtCursor *pCur, /* Cursor pointing at record to retrieve. */ u32 offset, /* Offset from the start of data to return bytes from. */ u32 amt, /* Number of bytes to return. */ int key, /* If true, retrieve from the btree key, not data. */ Mem *pMem /* OUT: Return data in this Mem structure. */ ){ int rc; pMem->flags = MEM_Null; if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){ if( key ){ rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z); }else{ rc = sqlite3BtreeData(pCur, offset, amt, pMem->z); } if( rc==SQLITE_OK ){ pMem->z[amt] = 0; pMem->z[amt+1] = 0; pMem->flags = MEM_Blob|MEM_Term; pMem->n = (int)amt; }else{ sqlite3VdbeMemRelease(pMem); } } return rc; } SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( BtCursor *pCur, /* Cursor pointing at record to retrieve. */ u32 offset, /* Offset from the start of data to return bytes from. */ u32 amt, /* Number of bytes to return. */ int key, /* If true, retrieve from the btree key, not data. */ Mem *pMem /* OUT: Return data in this Mem structure. */ ){ char *zData; /* Data from the btree layer */ u32 available = 0; /* Number of bytes available on the local btree page */ int rc = SQLITE_OK; /* Return code */ assert( sqlite3BtreeCursorIsValid(pCur) ); assert( !VdbeMemDynamic(pMem) ); /* Note: the calls to BtreeKeyFetch() and DataFetch() below assert() ** that both the BtShared and database handle mutexes are held. */ assert( (pMem->flags & MEM_RowSet)==0 ); if( key ){ zData = (char *)sqlite3BtreeKeyFetch(pCur, &available); }else{ zData = (char *)sqlite3BtreeDataFetch(pCur, &available); } assert( zData!=0 ); if( offset+amt<=available ){ pMem->z = &zData[offset]; pMem->flags = MEM_Blob|MEM_Ephem; pMem->n = (int)amt; }else{ rc = vdbeMemFromBtreeResize(pCur, offset, amt, key, pMem); } return rc; } /* ** pCur points at an index entry created using the OP_MakeRecord opcode. ** Read the rowid (the last field in the record) and store it in *rowid. ** Return SQLITE_OK if everything works, or an error code otherwise. ** ** pCur might be pointing to text obtained from a corrupt database file. ** So the content cannot be trusted. Do appropriate checks on the content. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeIdxRowid(Btree* pBtree, BtCursor *pCur, i64 *rowid){ i64 nCellKey = 0; int rc; u32 szHdr; /* Size of the header */ u32 typeRowid; /* Serial type of the rowid */ u32 lenRowid; /* Size of the rowid */ Mem m, v; /* Get the size of the index entry. Only indices entries of less ** than 2GiB are support - anything large must be database corruption. ** Any corruption is detected in sqlite3BtreeParseCellPtr(), though, so ** this code can safely assume that nCellKey is 32-bits */ assert( sqlite3BtreeCursorIsValid(pCur) ); VVA_ONLY(rc =) sqlite3BtreeKeySize(pCur, &nCellKey); assert( rc==SQLITE_OK ); /* pCur is always valid so KeySize cannot fail */ assert( (nCellKey & SQLITE_MAX_U32)==(u64)nCellKey ); /* Read in the complete content of the index entry */ sqlite3VdbeMemInit(&m, pBtree, 0); rc = sqlite3VdbeMemFromBtree(pCur, 0, (u32)nCellKey, 1, &m); if( rc ){ return rc; } /* The index entry must begin with a header size */ (void)getVarint32((u8*)m.z, szHdr); testcase( szHdr==3 ); testcase( szHdr==m.n ); if( unlikely(szHdr<3 || (int)szHdr>m.n) ){ goto idx_rowid_corruption; } /* The last field of the index should be an integer - the ROWID. ** Verify that the last entry really is an integer. */ (void)getVarint32((u8*)&m.z[szHdr-1], typeRowid); testcase( typeRowid==1 ); testcase( typeRowid==2 ); testcase( typeRowid==3 ); testcase( typeRowid==4 ); testcase( typeRowid==5 ); testcase( typeRowid==6 ); testcase( typeRowid==8 ); testcase( typeRowid==9 ); if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){ goto idx_rowid_corruption; } lenRowid = sqlite3SmallTypeSizes[typeRowid]; testcase( (u32)m.n==szHdr+lenRowid ); if( unlikely((u32)m.npDest); const int nSrcPgsz = sqlite3BtreeGetPageSize(p->pSrc); int nDestPgsz = sqlite3BtreeGetPageSize(p->pDest); const int nCopy = MIN(nSrcPgsz, nDestPgsz); const i64 iEnd = (i64)iSrcPg*(i64)nSrcPgsz; #ifdef SQLITE_HAS_CODEC /* Use BtreeGetReserveNoMutex() for the source b-tree, as although it is ** guaranteed that the shared-mutex is held by this thread, handle ** p->pSrc may not actually be the owner. */ int nSrcReserve = sqlite3BtreeGetReserveNoMutex(p->pSrc); int nDestReserve = sqlite3BtreeGetOptimalReserve(p->pDest); #endif int rc = SQLITE_OK; i64 iOff; assert( sqlite3BtreeGetReserveNoMutex(p->pSrc)>=0 ); assert( p->bDestLocked ); assert( !isFatalError(p->rc) ); assert( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ); assert( zSrcData ); #ifdef SQLITE_HAS_CODEC /* Backup is not possible if the page size of the destination is changing ** and a codec is in use. */ if( nSrcPgsz!=nDestPgsz && sqlite3PagerGetCodec(pDestPager)!=0 ){ rc = SQLITE_READONLY; } /* Backup is not possible if the number of bytes of reserve space differ ** between source and destination. If there is a difference, try to ** fix the destination to agree with the source. If that is not possible, ** then the backup cannot proceed. */ if( nSrcReserve!=nDestReserve ){ u32 newPgsz = nSrcPgsz; rc = sqlite3PagerSetPagesize(pDestPager, &newPgsz, nSrcReserve); if( rc==SQLITE_OK && newPgsz!=nSrcPgsz ) rc = SQLITE_READONLY; } #endif /* This loop runs once for each destination page spanned by the source ** page. For each iteration, variable iOff is set to the byte offset ** of the destination page. */ for(iOff=iEnd-(i64)nSrcPgsz; rc==SQLITE_OK && iOffpDest->pBt) ) continue; if( SQLITE_OK==(rc = sqlite3PagerGet(pDestPager, iDest, &pDestPg)) && SQLITE_OK==(rc = sqlite3PagerWrite(pDestPg)) ){ const u8 *zIn = &zSrcData[iOff%nSrcPgsz]; u8 *zDestData = sqlite3PagerGetData(pDestPg); u8 *zOut = &zDestData[iOff%nDestPgsz]; /* Copy the data from the source page into the destination page. ** Then clear the Btree layer MemPage.isInit flag. Both this module ** and the pager code use this trick (clearing the first byte ** of the page 'extra' space to invalidate the Btree layers ** cached parse of the page). MemPage.isInit is marked ** "MUST BE FIRST" for this purpose. */ memcpy(zOut, zIn, nCopy); ((u8 *)sqlite3PagerGetExtra(pDestPg))[0] = 0; if( iOff==0 && bUpdate==0 ){ sqlite3Put4byte(&zOut[28], sqlite3BtreeLastPage(p->pSrc)); } } sqlite3PagerUnref(pDestPg); } return rc; } /* ** This function is called after the contents of page iPage of the ** source database have been modified. If page iPage has already been ** copied into the destination database, then the data written to the ** destination is now invalidated. The destination copy of iPage needs ** to be updated with the new data before the backup operation is ** complete. ** ** It is assumed that the mutex associated with the BtShared object ** corresponding to the source database is held when this function is ** called. */ static SQLITE_NOINLINE void backupUpdate( sqlite3_backup *p, Pgno iPage, const u8 *aData ){ assert( p!=0 ); do{ assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); if( !isFatalError(p->rc) && iPageiNext ){ /* The backup process p has already copied page iPage. But now it ** has been modified by a transaction on the source pager. Copy ** the new data into the backup. */ int rc; assert( p->pDest ); sqlite3_mutex_enter(p->pDest->mutex); rc = backupOnePage(p, iPage, aData, 1); sqlite3_mutex_leave(p->pDest->mutex); assert( rc!=SQLITE_BUSY && rc!=SQLITE_LOCKED ); if( rc!=SQLITE_OK ){ p->rc = rc; } } }while( (p = p->pNext)!=0 ); } SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ if( pBackup ) backupUpdate(pBackup, iPage, aData); } /* ** Restart the backup process. This is called when the pager layer ** detects that the database has been modified by an external database ** connection. In this case there is no way of knowing which of the ** pages that have been copied into the destination database are still ** valid and which are not, so the entire process needs to be restarted. ** ** It is assumed that the mutex associated with the BtShared object ** corresponding to the source database is held when this function is ** called. */ SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *pBackup){ sqlite3_backup *p; /* Iterator variable */ for(p=pBackup; p; p=p->pNext){ assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); p->iNext = 1; } } /************** End of backup.c **********************************************/ /************** Begin file journal.c *****************************************/ /* ** 2007 August 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file implements a special kind of sqlite3_file object used ** by SQLite to create journal files if the atomic-write optimization ** is enabled. ** ** The distinctive characteristic of this sqlite3_file is that the ** actual on disk file is created lazily. When the file is created, ** the caller specifies a buffer size for an in-memory buffer to ** be used to service read() and write() requests. The actual file ** on disk is not created or populated until either: ** ** 1) The in-memory representation grows too large for the allocated ** buffer, or ** 2) The sqlite3JournalCreate() function is called. */ #ifdef SQLITE_ENABLE_ATOMIC_WRITE /* #include "sqliteInt.h" */ /* ** A JournalFile object is a subclass of sqlite3_file used by ** as an open file handle for journal files. */ struct JournalFile { sqlite3_io_methods *pMethod; /* I/O methods on journal files */ int nBuf; /* Size of zBuf[] in bytes */ char *zBuf; /* Space to buffer journal writes */ int iSize; /* Amount of zBuf[] currently used */ int flags; /* xOpen flags */ sqlite3_vfs *pVfs; /* The "real" underlying VFS */ sqlite3_file *pReal; /* The "real" underlying file descriptor */ const char *zJournal; /* Name of the journal file */ }; typedef struct JournalFile JournalFile; /* ** If it does not already exists, create and populate the on-disk file ** for JournalFile p. */ static int createFile(JournalFile *p){ int rc = SQLITE_OK; if( !p->pReal ){ sqlite3_file *pReal = (sqlite3_file *)&p[1]; rc = sqlite3OsOpen(p->pVfs, p->zJournal, pReal, p->flags, 0); if( rc==SQLITE_OK ){ p->pReal = pReal; if( p->iSize>0 ){ assert(p->iSize<=p->nBuf); rc = sqlite3OsWrite(p->pReal, p->zBuf, p->iSize, 0); } if( rc!=SQLITE_OK ){ /* If an error occurred while writing to the file, close it before ** returning. This way, SQLite uses the in-memory journal data to ** roll back changes made to the internal page-cache before this ** function was called. */ sqlite3OsClose(pReal); p->pReal = 0; } } } return rc; } /* ** Close the file. */ static int jrnlClose(sqlite3_file *pJfd){ JournalFile *p = (JournalFile *)pJfd; if( p->pReal ){ sqlite3OsClose(p->pReal); } sqlite3_free(p->zBuf); return SQLITE_OK; } /* ** Read data from the file. */ static int jrnlRead( sqlite3_file *pJfd, /* The journal file from which to read */ void *zBuf, /* Put the results here */ int iAmt, /* Number of bytes to read */ sqlite_int64 iOfst /* Begin reading at this offset */ ){ int rc = SQLITE_OK; JournalFile *p = (JournalFile *)pJfd; if( p->pReal ){ rc = sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst); }else if( (iAmt+iOfst)>p->iSize ){ rc = SQLITE_IOERR_SHORT_READ; }else{ memcpy(zBuf, &p->zBuf[iOfst], iAmt); } return rc; } /* ** Write data to the file. */ static int jrnlWrite( sqlite3_file *pJfd, /* The journal file into which to write */ const void *zBuf, /* Take data to be written from here */ int iAmt, /* Number of bytes to write */ sqlite_int64 iOfst /* Begin writing at this offset into the file */ ){ int rc = SQLITE_OK; JournalFile *p = (JournalFile *)pJfd; if( !p->pReal && (iOfst+iAmt)>p->nBuf ){ rc = createFile(p); } if( rc==SQLITE_OK ){ if( p->pReal ){ rc = sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst); }else{ memcpy(&p->zBuf[iOfst], zBuf, iAmt); if( p->iSize<(iOfst+iAmt) ){ p->iSize = (iOfst+iAmt); } } } return rc; } /* ** Truncate the file. */ static int jrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ int rc = SQLITE_OK; JournalFile *p = (JournalFile *)pJfd; if( p->pReal ){ rc = sqlite3OsTruncate(p->pReal, size); }else if( sizeiSize ){ p->iSize = size; } return rc; } /* ** Sync the file. */ static int jrnlSync(sqlite3_file *pJfd, int flags){ int rc; JournalFile *p = (JournalFile *)pJfd; if( p->pReal ){ rc = sqlite3OsSync(p->pReal, flags); }else{ rc = SQLITE_OK; } return rc; } /* ** Query the size of the file in bytes. */ static int jrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ int rc = SQLITE_OK; JournalFile *p = (JournalFile *)pJfd; if( p->pReal ){ rc = sqlite3OsFileSize(p->pReal, pSize); }else{ *pSize = (sqlite_int64) p->iSize; } return rc; } /* ** Table of methods for JournalFile sqlite3_file object. */ static struct sqlite3_io_methods JournalFileMethods = { 1, /* iVersion */ jrnlClose, /* xClose */ jrnlRead, /* xRead */ jrnlWrite, /* xWrite */ jrnlTruncate, /* xTruncate */ jrnlSync, /* xSync */ jrnlFileSize, /* xFileSize */ 0, /* xLock */ 0, /* xUnlock */ 0, /* xCheckReservedLock */ 0, /* xFileControl */ 0, /* xSectorSize */ 0, /* xDeviceCharacteristics */ 0, /* xShmMap */ 0, /* xShmLock */ 0, /* xShmBarrier */ 0 /* xShmUnmap */ }; /* ** Open a journal file. */ SQLITE_PRIVATE int sqlite3JournalOpen( sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */ const char *zName, /* Name of the journal file */ sqlite3_file *pJfd, /* Preallocated, blank file handle */ int flags, /* Opening flags */ int nBuf /* Bytes buffered before opening the file */ ){ JournalFile *p = (JournalFile *)pJfd; memset(p, 0, sqlite3JournalSize(pVfs)); if( nBuf>0 ){ p->zBuf = sqlite3MallocZero(nBuf); if( !p->zBuf ){ return SQLITE_NOMEM; } }else{ return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0); } p->pMethod = &JournalFileMethods; p->nBuf = nBuf; p->flags = flags; p->zJournal = zName; p->pVfs = pVfs; return SQLITE_OK; } /* ** If the argument p points to a JournalFile structure, and the underlying ** file has not yet been created, create it now. */ SQLITE_PRIVATE int sqlite3JournalCreate(sqlite3_file *p){ if( p->pMethods!=&JournalFileMethods ){ return SQLITE_OK; } return createFile((JournalFile *)p); } /* ** The file-handle passed as the only argument is guaranteed to be an open ** file. It may or may not be of class JournalFile. If the file is a ** JournalFile, and the underlying file on disk has not yet been opened, ** return 0. Otherwise, return 1. */ SQLITE_PRIVATE int sqlite3JournalExists(sqlite3_file *p){ return (p->pMethods!=&JournalFileMethods || ((JournalFile *)p)->pReal!=0); } /* ** Return the number of bytes required to store a JournalFile that uses vfs ** pVfs to create the underlying on-disk files. */ SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *pVfs){ return (pVfs->szOsFile+sizeof(JournalFile)); } #endif /************** End of journal.c *********************************************/ /************** Begin file memjournal.c **************************************/ /* ** 2008 October 7 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains code use to implement an in-memory rollback journal. ** The in-memory rollback journal is used to journal transactions for ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. */ /* #include "sqliteInt.h" */ /* Forward references to internal structures */ typedef struct MemJournal MemJournal; typedef struct FilePoint FilePoint; typedef struct FileChunk FileChunk; /* Space to hold the rollback journal is allocated in increments of ** this many bytes. ** ** The size chosen is a little less than a power of two. That way, ** the FileChunk object will have a size that almost exactly fills ** a power-of-two allocation. This minimizes wasted space in power-of-two ** memory allocators. */ #define JOURNAL_CHUNKSIZE ((int)(1024-sizeof(FileChunk*))) /* ** The rollback journal is composed of a linked list of these structures. */ struct FileChunk { FileChunk *pNext; /* Next chunk in the journal */ u8 zChunk[JOURNAL_CHUNKSIZE]; /* Content of this chunk */ }; /* ** An instance of this object serves as a cursor into the rollback journal. ** The cursor can be either for reading or writing. */ struct FilePoint { sqlite3_int64 iOffset; /* Offset from the beginning of the file */ FileChunk *pChunk; /* Specific chunk into which cursor points */ }; /* ** This subclass is a subclass of sqlite3_file. Each open memory-journal ** is an instance of this class. */ struct MemJournal { sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */ FileChunk *pFirst; /* Head of in-memory chunk-list */ FilePoint endpoint; /* Pointer to the end of the file */ FilePoint readpoint; /* Pointer to the end of the last xRead() */ }; /* ** Read data from the in-memory journal file. This is the implementation ** of the sqlite3_vfs.xRead method. */ static int memjrnlRead( sqlite3_file *pJfd, /* The journal file from which to read */ void *zBuf, /* Put the results here */ int iAmt, /* Number of bytes to read */ sqlite_int64 iOfst /* Begin reading at this offset */ ){ MemJournal *p = (MemJournal *)pJfd; u8 *zOut = zBuf; int nRead = iAmt; int iChunkOffset; FileChunk *pChunk; /* SQLite never tries to read past the end of a rollback journal file */ assert( iOfst+iAmt<=p->endpoint.iOffset ); if( p->readpoint.iOffset!=iOfst || iOfst==0 ){ sqlite3_int64 iOff = 0; for(pChunk=p->pFirst; ALWAYS(pChunk) && (iOff+JOURNAL_CHUNKSIZE)<=iOfst; pChunk=pChunk->pNext ){ iOff += JOURNAL_CHUNKSIZE; } }else{ pChunk = p->readpoint.pChunk; } iChunkOffset = (int)(iOfst%JOURNAL_CHUNKSIZE); do { int iSpace = JOURNAL_CHUNKSIZE - iChunkOffset; int nCopy = MIN(nRead, (JOURNAL_CHUNKSIZE - iChunkOffset)); memcpy(zOut, &pChunk->zChunk[iChunkOffset], nCopy); zOut += nCopy; nRead -= iSpace; iChunkOffset = 0; } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 ); p->readpoint.iOffset = iOfst+iAmt; p->readpoint.pChunk = pChunk; return SQLITE_OK; } /* ** Write data to the file. */ static int memjrnlWrite( sqlite3_file *pJfd, /* The journal file into which to write */ const void *zBuf, /* Take data to be written from here */ int iAmt, /* Number of bytes to write */ sqlite_int64 iOfst /* Begin writing at this offset into the file */ ){ MemJournal *p = (MemJournal *)pJfd; int nWrite = iAmt; u8 *zWrite = (u8 *)zBuf; /* An in-memory journal file should only ever be appended to. Random ** access writes are not required by sqlite. */ assert( iOfst==p->endpoint.iOffset ); UNUSED_PARAMETER(iOfst); while( nWrite>0 ){ FileChunk *pChunk = p->endpoint.pChunk; int iChunkOffset = (int)(p->endpoint.iOffset%JOURNAL_CHUNKSIZE); int iSpace = MIN(nWrite, JOURNAL_CHUNKSIZE - iChunkOffset); if( iChunkOffset==0 ){ /* New chunk is required to extend the file. */ FileChunk *pNew = sqlite3_malloc(sizeof(FileChunk)); if( !pNew ){ return SQLITE_IOERR_NOMEM; } pNew->pNext = 0; if( pChunk ){ assert( p->pFirst ); pChunk->pNext = pNew; }else{ assert( !p->pFirst ); p->pFirst = pNew; } p->endpoint.pChunk = pNew; } memcpy(&p->endpoint.pChunk->zChunk[iChunkOffset], zWrite, iSpace); zWrite += iSpace; nWrite -= iSpace; p->endpoint.iOffset += iSpace; } return SQLITE_OK; } /* ** Truncate the file. */ static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){ MemJournal *p = (MemJournal *)pJfd; FileChunk *pChunk; assert(size==0); UNUSED_PARAMETER(size); pChunk = p->pFirst; while( pChunk ){ FileChunk *pTmp = pChunk; pChunk = pChunk->pNext; sqlite3_free(pTmp); } sqlite3MemJournalOpen(pJfd); return SQLITE_OK; } /* ** Close the file. */ static int memjrnlClose(sqlite3_file *pJfd){ memjrnlTruncate(pJfd, 0); return SQLITE_OK; } /* ** Sync the file. ** ** Syncing an in-memory journal is a no-op. And, in fact, this routine ** is never called in a working implementation. This implementation ** exists purely as a contingency, in case some malfunction in some other ** part of SQLite causes Sync to be called by mistake. */ static int memjrnlSync(sqlite3_file *NotUsed, int NotUsed2){ UNUSED_PARAMETER2(NotUsed, NotUsed2); return SQLITE_OK; } /* ** Query the size of the file in bytes. */ static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){ MemJournal *p = (MemJournal *)pJfd; *pSize = (sqlite_int64) p->endpoint.iOffset; return SQLITE_OK; } /* ** Table of methods for MemJournal sqlite3_file object. */ static const struct sqlite3_io_methods MemJournalMethods = { 1, /* iVersion */ memjrnlClose, /* xClose */ memjrnlRead, /* xRead */ memjrnlWrite, /* xWrite */ memjrnlTruncate, /* xTruncate */ memjrnlSync, /* xSync */ memjrnlFileSize, /* xFileSize */ 0, /* xLock */ 0, /* xUnlock */ 0, /* xCheckReservedLock */ 0, /* xFileControl */ 0, /* xSectorSize */ 0, /* xDeviceCharacteristics */ 0, /* xShmMap */ 0, /* xShmLock */ 0, /* xShmBarrier */ 0, /* xShmUnmap */ 0, /* xFetch */ 0 /* xUnfetch */ }; /* ** Open a journal file. */ SQLITE_PRIVATE void sqlite3MemJournalOpen(sqlite3_file *pJfd){ MemJournal *p = (MemJournal *)pJfd; assert( EIGHT_BYTE_ALIGNMENT(p) ); memset(p, 0, sqlite3MemJournalSize()); p->pMethod = (sqlite3_io_methods*)&MemJournalMethods; } /* ** Return true if the file-handle passed as an argument is ** an in-memory journal */ SQLITE_PRIVATE int sqlite3IsMemJournal(sqlite3_file *pJfd){ return pJfd->pMethods==&MemJournalMethods; } /* ** Return the number of bytes required to store a MemJournal file descriptor. */ SQLITE_PRIVATE int sqlite3MemJournalSize(void){ return sizeof(MemJournal); } /************** End of memjournal.c ******************************************/ /************** Begin file main.c ********************************************/ /* #include "sqliteInt.h" */ /* IMPLEMENTATION-OF: R-63124-39300 The sqlite3_sourceid() function returns a ** pointer to a string constant whose value is the same as the ** SQLITE_SOURCE_ID C preprocessor macro. */ SQLITE_PRIVATE const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } /* ** If the following global variable points to a string which is the ** name of a directory, then that directory will be used to store ** temporary files. ** ** See also the "PRAGMA temp_store_directory" SQL command. */ SQLITE_PRIVATE char *sqlite3_temp_directory = 0; /* ** Initialize SQLite. ** ** This routine must be called to initialize the memory allocation, ** VFS, and mutex subsystems prior to doing any serious work with ** SQLite. But as long as you do not compile with SQLITE_OMIT_AUTOINIT ** this routine will be called automatically by key routines such as ** sqlite3_open(). ** ** This routine is a no-op except on its very first call for the process, ** or for the first call after a call to sqlite3BtreeShutdown. ** ** The first thread to call this routine runs the initialization to ** completion. If subsequent threads call this routine before the first ** thread has finished the initialization process, then the subsequent ** threads must block until the first thread finishes with the initialization. ** ** The first thread might call this routine recursively. Recursive ** calls to this routine should not block, of course. Otherwise the ** initialization process would never complete. ** ** Let X be the first thread to enter this routine. Let Y be some other ** thread. Then while the initial invocation of this routine by X is ** incomplete, it is required that: ** ** * Calls to this routine from Y must block until the outer-most ** call by X completes. ** ** * Recursive calls to this routine from thread X return immediately ** without blocking. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeInitialize(void){ MUTEX_LOGIC( sqlite3_mutex *pMaster; ) /* The main static mutex */ int rc; /* Result code */ #ifdef SQLITE_EXTRA_INIT int bRunExtraInit = 0; /* Extra initialization needed */ #endif #ifdef SQLITE_OMIT_WSD rc = sqlite3_wsd_init(4096, 24); if( rc!=SQLITE_OK ){ return rc; } #endif /* If the following assert() fails on some obscure processor/compiler ** combination, the work-around is to set the correct pointer ** size at compile-time using -DSQLITE_PTRSIZE=n compile-time option */ assert( SQLITE_PTRSIZE==sizeof(char*) ); /* If SQLite is already completely initialized, then this call ** to sqlite3BtreeInitialize() should be a no-op. But the initialization ** must be complete. So isInit must not be set until the very end ** of this routine. */ if( sqlite3GlobalConfig.isInit ) return SQLITE_OK; /* Make sure the mutex subsystem is initialized. If unable to ** initialize the mutex subsystem, return early with the error. ** If the system is so sick that we are unable to allocate a mutex, ** there is not much SQLite is going to be able to do. ** ** The mutex subsystem must take care of serializing its own ** initialization. */ rc = sqlite3MutexInit(); if( rc ) return rc; /* Initialize the malloc() system and the recursive pInitMutex mutex. ** This operation is protected by the STATIC_MASTER mutex. Note that ** MutexAlloc() is called for a static mutex prior to initializing the ** malloc subsystem - this implies that the allocation of a static ** mutex must not require support from the malloc subsystem. */ MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); ) sqlite3_mutex_enter(pMaster); sqlite3GlobalConfig.isMutexInit = 1; if( !sqlite3GlobalConfig.isMallocInit ){ rc = sqlite3MallocInit(); } if( rc==SQLITE_OK ){ sqlite3GlobalConfig.isMallocInit = 1; if( !sqlite3GlobalConfig.pInitMutex ){ sqlite3GlobalConfig.pInitMutex = sqlite3MutexAlloc(SQLITE_MUTEX_RECURSIVE); if( sqlite3GlobalConfig.bCoreMutex && !sqlite3GlobalConfig.pInitMutex ){ rc = SQLITE_NOMEM; } } } if( rc==SQLITE_OK ){ sqlite3GlobalConfig.nRefInitMutex++; } sqlite3_mutex_leave(pMaster); /* If rc is not SQLITE_OK at this point, then either the malloc ** subsystem could not be initialized or the system failed to allocate ** the pInitMutex mutex. Return an error in either case. */ if( rc!=SQLITE_OK ){ return rc; } /* Do the rest of the initialization under the recursive mutex so ** that we will be able to handle recursive calls into ** sqlite3BtreeInitialize(). The recursive calls normally come through ** sqlite3_os_init() when it invokes sqlite3_vfs_register(), but other ** recursive calls might also be possible. ** ** IMPLEMENTATION-OF: R-00140-37445 SQLite automatically serializes calls ** to the xInit method, so the xInit method need not be threadsafe. ** ** The following mutex is what serializes access to the appdef pcache xInit ** methods. The sqlite3_pcache_methods.xInit() all is embedded in the ** call to sqlite3PcacheInitialize(). */ sqlite3_mutex_enter(sqlite3GlobalConfig.pInitMutex); if( sqlite3GlobalConfig.isInit==0 && sqlite3GlobalConfig.inProgress==0 ){ sqlite3GlobalConfig.inProgress = 1; if( sqlite3GlobalConfig.isPCacheInit==0 ){ rc = sqlite3PcacheInitialize(); } if( rc==SQLITE_OK ){ sqlite3GlobalConfig.isPCacheInit = 1; rc = sqlite3OsInit(); } if( rc==SQLITE_OK ){ sqlite3PCacheBufferSetup( sqlite3GlobalConfig.pPage, sqlite3GlobalConfig.szPage, sqlite3GlobalConfig.nPage); sqlite3GlobalConfig.isInit = 1; #ifdef SQLITE_EXTRA_INIT bRunExtraInit = 1; #endif } sqlite3GlobalConfig.inProgress = 0; } sqlite3_mutex_leave(sqlite3GlobalConfig.pInitMutex); /* Go back under the static mutex and clean up the recursive ** mutex to prevent a resource leak. */ sqlite3_mutex_enter(pMaster); sqlite3GlobalConfig.nRefInitMutex--; if( sqlite3GlobalConfig.nRefInitMutex<=0 ){ assert( sqlite3GlobalConfig.nRefInitMutex==0 ); sqlite3_mutex_free(sqlite3GlobalConfig.pInitMutex); sqlite3GlobalConfig.pInitMutex = 0; } sqlite3_mutex_leave(pMaster); /* The following is just a sanity check to make sure SQLite has ** been compiled correctly. It is important to run this code, but ** we don't want to run it too often and soak up CPU cycles for no ** reason. So we run it once during initialization. */ #ifndef NDEBUG #ifndef SQLITE_OMIT_FLOATING_POINT /* This section of code's only "output" is via assert() statements. */ if ( rc==SQLITE_OK ){ u64 x = (((u64)1)<<63)-1; double y; assert(sizeof(x)==8); assert(sizeof(x)==sizeof(y)); memcpy(&y, &x, 8); assert( sqlite3IsNaN(y) ); } #endif #endif /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT ** compile-time option. */ #ifdef SQLITE_EXTRA_INIT if( bRunExtraInit ){ int SQLITE_EXTRA_INIT(const char*); rc = SQLITE_EXTRA_INIT(0); } #endif return rc; } /* ** Undo the effects of sqlite3BtreeInitialize(). Must not be called while ** there are outstanding database connections or memory allocations or ** while any part of SQLite is otherwise in use in any thread. This ** routine is not threadsafe. But it is safe to invoke this routine ** on when SQLite is already shut down. If SQLite is already shut down ** when this routine is invoked, then this routine is a harmless no-op. */ SQLITE_API int SQLITE_STDCALL sqlite3BtreeShutdown(void){ #ifdef SQLITE_OMIT_WSD int rc = sqlite3_wsd_init(4096, 24); if( rc!=SQLITE_OK ){ return rc; } #endif if( sqlite3GlobalConfig.isInit ){ #ifdef SQLITE_EXTRA_SHUTDOWN void SQLITE_EXTRA_SHUTDOWN(void); SQLITE_EXTRA_SHUTDOWN(); #endif sqlite3_os_end(); sqlite3GlobalConfig.isInit = 0; } if( sqlite3GlobalConfig.isPCacheInit ){ sqlite3PcacheShutdown(); sqlite3GlobalConfig.isPCacheInit = 0; } if( sqlite3GlobalConfig.isMallocInit ){ sqlite3MallocEnd(); sqlite3GlobalConfig.isMallocInit = 0; } if( sqlite3GlobalConfig.isMutexInit ){ sqlite3MutexEnd(); sqlite3GlobalConfig.isMutexInit = 0; } return SQLITE_OK; } /* ** The following routines are substitutes for constants SQLITE_CORRUPT, ** SQLITE_MISUSE, SQLITE_CANTOPEN, SQLITE_IOERR and possibly other error ** constants. They serve two purposes: ** ** 1. Serve as a convenient place to set a breakpoint in a debugger ** to detect when version error conditions occurs. ** ** 2. Invoke sqlite3_log() to provide the source code location where ** a low-level error is first detected. */ SQLITE_PRIVATE int sqlite3CorruptError(int lineno){ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_CORRUPT, "database corruption at line %d of [%.10s]", lineno, 20+sqlite3_sourceid()); return SQLITE_CORRUPT; } SQLITE_PRIVATE int sqlite3MisuseError(int lineno){ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_MISUSE, "misuse at line %d of [%.10s]", lineno, 20+sqlite3_sourceid()); return SQLITE_MISUSE; } SQLITE_PRIVATE int sqlite3CantopenError(int lineno){ testcase( sqlite3GlobalConfig.xLog!=0 ); sqlite3_log(SQLITE_CANTOPEN, "cannot open file at line %d of [%.10s]", lineno, 20+sqlite3_sourceid()); return SQLITE_CANTOPEN; } /* ** Invoke the given busy handler. ** ** This routine is called when an operation failed with a lock. ** If this routine returns non-zero, the lock is retried. If it ** returns 0, the operation aborts with an SQLITE_BUSY error. */ SQLITE_PRIVATE int sqlite3InvokeBusyHandler(BusyHandler *p){ int rc; if( NEVER(p==0) || p->xFunc==0 || p->nBusy<0 ) return 0; rc = p->xFunc(p->pArg, p->nBusy); if( rc==0 ){ p->nBusy = -1; }else{ p->nBusy++; } return rc; } /* ** This API allows applications to modify the global configuration of ** the SQLite library at run-time. ** ** This routine should only be called when there are no outstanding ** database connections or memory allocations. This routine is not ** threadsafe. Failure to heed these warnings can lead to unpredictable ** behavior. */ SQLITE_PRIVATE int sqlite3_config(int op, ...){ va_list ap; int rc = SQLITE_OK; /* sqlite3_config() shall return SQLITE_MISUSE if it is invoked while ** the SQLite library is in use. */ if( sqlite3GlobalConfig.isInit ) return SQLITE_MISUSE_BKPT; va_start(ap, op); switch( op ){ /* Mutex configuration options are only available in a threadsafe ** compile. */ #if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-54466-46756 */ case SQLITE_CONFIG_SINGLETHREAD: { /* EVIDENCE-OF: R-02748-19096 This option sets the threading mode to ** Single-thread. */ sqlite3GlobalConfig.bCoreMutex = 0; /* Disable mutex on core */ sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ break; } #endif #if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-20520-54086 */ case SQLITE_CONFIG_MULTITHREAD: { /* EVIDENCE-OF: R-14374-42468 This option sets the threading mode to ** Multi-thread. */ sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ sqlite3GlobalConfig.bFullMutex = 0; /* Disable mutex on connections */ break; } #endif #if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-59593-21810 */ case SQLITE_CONFIG_SERIALIZED: { /* EVIDENCE-OF: R-41220-51800 This option sets the threading mode to ** Serialized. */ sqlite3GlobalConfig.bCoreMutex = 1; /* Enable mutex on core */ sqlite3GlobalConfig.bFullMutex = 1; /* Enable mutex on connections */ break; } #endif #if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-63666-48755 */ case SQLITE_CONFIG_MUTEX: { /* Specify an alternative mutex implementation */ sqlite3GlobalConfig.mutex = *va_arg(ap, sqlite3_mutex_methods*); break; } #endif #if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE>0 /* IMP: R-14450-37597 */ case SQLITE_CONFIG_GETMUTEX: { /* Retrieve the current mutex implementation */ *va_arg(ap, sqlite3_mutex_methods*) = sqlite3GlobalConfig.mutex; break; } #endif case SQLITE_CONFIG_MALLOC: { /* EVIDENCE-OF: R-55594-21030 The SQLITE_CONFIG_MALLOC option takes a ** single argument which is a pointer to an instance of the ** sqlite3_mem_methods structure. The argument specifies alternative ** low-level memory allocation routines to be used in place of the memory ** allocation routines built into SQLite. */ sqlite3GlobalConfig.m = *va_arg(ap, sqlite3_mem_methods*); break; } case SQLITE_CONFIG_GETMALLOC: { /* EVIDENCE-OF: R-51213-46414 The SQLITE_CONFIG_GETMALLOC option takes a ** single argument which is a pointer to an instance of the ** sqlite3_mem_methods structure. The sqlite3_mem_methods structure is ** filled with the currently defined memory allocation routines. */ if( sqlite3GlobalConfig.m.xMalloc==0 ) sqlite3MemSetDefault(); *va_arg(ap, sqlite3_mem_methods*) = sqlite3GlobalConfig.m; break; } case SQLITE_CONFIG_MEMSTATUS: { /* EVIDENCE-OF: R-61275-35157 The SQLITE_CONFIG_MEMSTATUS option takes ** single argument of type int, interpreted as a boolean, which enables ** or disables the collection of memory allocation statistics. */ sqlite3GlobalConfig.bMemstat = va_arg(ap, int); break; } case SQLITE_CONFIG_SCRATCH: { /* EVIDENCE-OF: R-08404-60887 There are three arguments to ** SQLITE_CONFIG_SCRATCH: A pointer an 8-byte aligned memory buffer from ** which the scratch allocations will be drawn, the size of each scratch ** allocation (sz), and the maximum number of scratch allocations (N). */ sqlite3GlobalConfig.pScratch = va_arg(ap, void*); sqlite3GlobalConfig.szScratch = va_arg(ap, int); sqlite3GlobalConfig.nScratch = va_arg(ap, int); break; } case SQLITE_CONFIG_PAGECACHE: { /* EVIDENCE-OF: R-31408-40510 There are three arguments to ** SQLITE_CONFIG_PAGECACHE: A pointer to 8-byte aligned memory, the size ** of each page buffer (sz), and the number of pages (N). */ sqlite3GlobalConfig.pPage = va_arg(ap, void*); sqlite3GlobalConfig.szPage = va_arg(ap, int); sqlite3GlobalConfig.nPage = va_arg(ap, int); break; } case SQLITE_CONFIG_PCACHE_HDRSZ: { /* EVIDENCE-OF: R-39100-27317 The SQLITE_CONFIG_PCACHE_HDRSZ option takes ** a single parameter which is a pointer to an integer and writes into ** that integer the number of extra bytes per page required for each page ** in SQLITE_CONFIG_PAGECACHE. */ *va_arg(ap, int*) = sqlite3HeaderSizeBtree() + sqlite3HeaderSizePcache() + sqlite3HeaderSizePcache1(); break; } case SQLITE_CONFIG_PCACHE: { /* no-op */ break; } case SQLITE_CONFIG_GETPCACHE: { /* now an error */ rc = SQLITE_ERROR; break; } case SQLITE_CONFIG_PCACHE2: { /* EVIDENCE-OF: R-63325-48378 The SQLITE_CONFIG_PCACHE2 option takes a ** single argument which is a pointer to an sqlite3_pcache_methods2 ** object. This object specifies the interface to a custom page cache ** implementation. */ sqlite3GlobalConfig.pcache2 = *va_arg(ap, sqlite3_pcache_methods2*); break; } case SQLITE_CONFIG_GETPCACHE2: { /* EVIDENCE-OF: R-22035-46182 The SQLITE_CONFIG_GETPCACHE2 option takes a ** single argument which is a pointer to an sqlite3_pcache_methods2 ** object. SQLite copies of the current page cache implementation into ** that object. */ if( sqlite3GlobalConfig.pcache2.xInit==0 ){ sqlite3PCacheSetDefault(); } *va_arg(ap, sqlite3_pcache_methods2*) = sqlite3GlobalConfig.pcache2; break; } /* EVIDENCE-OF: R-06626-12911 The SQLITE_CONFIG_HEAP option is only ** available if SQLite is compiled with either SQLITE_ENABLE_MEMSYS3 or ** SQLITE_ENABLE_MEMSYS5 and returns SQLITE_ERROR if invoked otherwise. */ #if defined(SQLITE_ENABLE_MEMSYS3) || defined(SQLITE_ENABLE_MEMSYS5) case SQLITE_CONFIG_HEAP: { /* EVIDENCE-OF: R-19854-42126 There are three arguments to ** SQLITE_CONFIG_HEAP: An 8-byte aligned pointer to the memory, the ** number of bytes in the memory buffer, and the minimum allocation size. */ sqlite3GlobalConfig.pHeap = va_arg(ap, void*); sqlite3GlobalConfig.nHeap = va_arg(ap, int); sqlite3GlobalConfig.mnReq = va_arg(ap, int); if( sqlite3GlobalConfig.mnReq<1 ){ sqlite3GlobalConfig.mnReq = 1; }else if( sqlite3GlobalConfig.mnReq>(1<<12) ){ /* cap min request size at 2^12 */ sqlite3GlobalConfig.mnReq = (1<<12); } if( sqlite3GlobalConfig.pHeap==0 ){ /* EVIDENCE-OF: R-49920-60189 If the first pointer (the memory pointer) ** is NULL, then SQLite reverts to using its default memory allocator ** (the system malloc() implementation), undoing any prior invocation of ** SQLITE_CONFIG_MALLOC. ** ** Setting sqlite3GlobalConfig.m to all zeros will cause malloc to ** revert to its default implementation when sqlite3BtreeInitialize() is run */ memset(&sqlite3GlobalConfig.m, 0, sizeof(sqlite3GlobalConfig.m)); }else{ /* EVIDENCE-OF: R-61006-08918 If the memory pointer is not NULL then the ** alternative memory allocator is engaged to handle all of SQLites ** memory allocation needs. */ #ifdef SQLITE_ENABLE_MEMSYS3 sqlite3GlobalConfig.m = *sqlite3MemGetMemsys3(); #endif #ifdef SQLITE_ENABLE_MEMSYS5 sqlite3GlobalConfig.m = *sqlite3MemGetMemsys5(); #endif } break; } #endif case SQLITE_CONFIG_LOOKASIDE: { sqlite3GlobalConfig.szLookaside = va_arg(ap, int); sqlite3GlobalConfig.nLookaside = va_arg(ap, int); break; } /* Record a pointer to the logger function and its first argument. ** The default is NULL. Logging is disabled if the function pointer is ** NULL. */ case SQLITE_CONFIG_LOG: { /* MSVC is picky about pulling func ptrs from va lists. ** http://support.microsoft.com/kb/47961 ** sqlite3GlobalConfig.xLog = va_arg(ap, void(*)(void*,int,const char*)); */ typedef void(*LOGFUNC_t)(void*,int,const char*); sqlite3GlobalConfig.xLog = va_arg(ap, LOGFUNC_t); sqlite3GlobalConfig.pLogArg = va_arg(ap, void*); break; } /* EVIDENCE-OF: R-55548-33817 The compile-time setting for URI filenames ** can be changed at start-time using the ** sqlite3_config(SQLITE_CONFIG_URI,1) or ** sqlite3_config(SQLITE_CONFIG_URI,0) configuration calls. */ case SQLITE_CONFIG_URI: { /* EVIDENCE-OF: R-25451-61125 The SQLITE_CONFIG_URI option takes a single ** argument of type int. If non-zero, then URI handling is globally ** enabled. If the parameter is zero, then URI handling is globally ** disabled. */ sqlite3GlobalConfig.bOpenUri = va_arg(ap, int); break; } case SQLITE_CONFIG_COVERING_INDEX_SCAN: { /* EVIDENCE-OF: R-36592-02772 The SQLITE_CONFIG_COVERING_INDEX_SCAN ** option takes a single integer argument which is interpreted as a ** boolean in order to enable or disable the use of covering indices for ** full table scans in the query optimizer. */ sqlite3GlobalConfig.bUseCis = va_arg(ap, int); break; } case SQLITE_CONFIG_MMAP_SIZE: { /* EVIDENCE-OF: R-58063-38258 SQLITE_CONFIG_MMAP_SIZE takes two 64-bit ** integer (sqlite3_int64) values that are the default mmap size limit ** (the default setting for PRAGMA mmap_size) and the maximum allowed ** mmap size limit. */ sqlite3_int64 szMmap = va_arg(ap, sqlite3_int64); sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); /* EVIDENCE-OF: R-53367-43190 If either argument to this option is ** negative, then that argument is changed to its compile-time default. ** ** EVIDENCE-OF: R-34993-45031 The maximum allowed mmap size will be ** silently truncated if necessary so that it does not exceed the ** compile-time maximum mmap size set by the SQLITE_MAX_MMAP_SIZE ** compile-time option. */ if( mxMmap<0 || mxMmap>SQLITE_MAX_MMAP_SIZE ){ mxMmap = SQLITE_MAX_MMAP_SIZE; } if( szMmap<0 ) szMmap = SQLITE_DEFAULT_MMAP_SIZE; if( szMmap>mxMmap) szMmap = mxMmap; sqlite3GlobalConfig.mxMmap = mxMmap; sqlite3GlobalConfig.szMmap = szMmap; break; } #if SQLITE_OS_WIN && defined(SQLITE_WIN32_MALLOC) /* IMP: R-04780-55815 */ case SQLITE_CONFIG_WIN32_HEAPSIZE: { /* EVIDENCE-OF: R-34926-03360 SQLITE_CONFIG_WIN32_HEAPSIZE takes a 32-bit ** unsigned integer value that specifies the maximum size of the created ** heap. */ sqlite3GlobalConfig.nHeap = va_arg(ap, int); break; } #endif case SQLITE_CONFIG_PMASZ: { sqlite3GlobalConfig.szPma = va_arg(ap, unsigned int); break; } default: { rc = SQLITE_ERROR; break; } } va_end(ap); return rc; } /************** End of main.c ************************************************/