diff options
author | Aaron Seigo <aseigo@kde.org> | 2014-12-14 12:00:05 +0100 |
---|---|---|
committer | Aaron Seigo <aseigo@kde.org> | 2014-12-14 12:00:05 +0100 |
commit | 7cc25005b8c46d1fa783d33def2c6923e8ef8469 (patch) | |
tree | 64fa59d17af29838396cf37b912b3babd885e5dd /common/unqlite/os_unix.c | |
parent | bfc32f265e8ad72823db960fed371d72596003b7 (diff) | |
parent | a6ed70495f9f3ecb21c26860dda16aadcdc91c3a (diff) | |
download | sink-7cc25005b8c46d1fa783d33def2c6923e8ef8469.tar.gz sink-7cc25005b8c46d1fa783d33def2c6923e8ef8469.zip |
Merge branch 'unqlite'
Diffstat (limited to 'common/unqlite/os_unix.c')
-rw-r--r-- | common/unqlite/os_unix.c | 1769 |
1 files changed, 1769 insertions, 0 deletions
diff --git a/common/unqlite/os_unix.c b/common/unqlite/os_unix.c new file mode 100644 index 0000000..f578d07 --- /dev/null +++ b/common/unqlite/os_unix.c | |||
@@ -0,0 +1,1769 @@ | |||
1 | /* | ||
2 | * Symisc unQLite: An Embeddable NoSQL (Post Modern) Database Engine. | ||
3 | * Copyright (C) 2012-2013, Symisc Systems http://unqlite.org/ | ||
4 | * Version 1.1.6 | ||
5 | * For information on licensing, redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES | ||
6 | * please contact Symisc Systems via: | ||
7 | * legal@symisc.net | ||
8 | * licensing@symisc.net | ||
9 | * contact@symisc.net | ||
10 | * or visit: | ||
11 | * http://unqlite.org/licensing.html | ||
12 | */ | ||
13 | /* $SymiscID: os_unix.c v1.3 FreeBSD 2013-04-05 01:10 devel <chm@symisc.net> $ */ | ||
14 | #ifndef UNQLITE_AMALGAMATION | ||
15 | #include "unqliteInt.h" | ||
16 | #endif | ||
17 | /* | ||
18 | * Omit the whole layer from the build if compiling for platforms other than Unix (Linux, BSD, Solaris, OS X, etc.). | ||
19 | * Note: Mostly SQLite3 source tree. | ||
20 | */ | ||
21 | #if defined(__UNIXES__) | ||
22 | /** This file contains the VFS implementation for unix-like operating systems | ||
23 | ** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others. | ||
24 | ** | ||
25 | ** There are actually several different VFS implementations in this file. | ||
26 | ** The differences are in the way that file locking is done. The default | ||
27 | ** implementation uses Posix Advisory Locks. Alternative implementations | ||
28 | ** use flock(), dot-files, various proprietary locking schemas, or simply | ||
29 | ** skip locking all together. | ||
30 | ** | ||
31 | ** This source file is organized into divisions where the logic for various | ||
32 | ** subfunctions is contained within the appropriate division. PLEASE | ||
33 | ** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed | ||
34 | ** in the correct division and should be clearly labeled. | ||
35 | ** | ||
36 | */ | ||
37 | /* | ||
38 | ** standard include files. | ||
39 | */ | ||
40 | #include <sys/types.h> | ||
41 | #include <sys/stat.h> | ||
42 | #include <sys/uio.h> | ||
43 | #include <sys/file.h> | ||
44 | #include <fcntl.h> | ||
45 | #include <unistd.h> | ||
46 | #include <time.h> | ||
47 | #include <sys/time.h> | ||
48 | #include <errno.h> | ||
49 | #if defined(__APPLE__) | ||
50 | # include <sys/mount.h> | ||
51 | #endif | ||
52 | /* | ||
53 | ** Allowed values of unixFile.fsFlags | ||
54 | */ | ||
55 | #define UNQLITE_FSFLAGS_IS_MSDOS 0x1 | ||
56 | |||
57 | /* | ||
58 | ** Default permissions when creating a new file | ||
59 | */ | ||
60 | #ifndef UNQLITE_DEFAULT_FILE_PERMISSIONS | ||
61 | # define UNQLITE_DEFAULT_FILE_PERMISSIONS 0644 | ||
62 | #endif | ||
63 | /* | ||
64 | ** Default permissions when creating auto proxy dir | ||
65 | */ | ||
66 | #ifndef UNQLITE_DEFAULT_PROXYDIR_PERMISSIONS | ||
67 | # define UNQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 | ||
68 | #endif | ||
69 | /* | ||
70 | ** Maximum supported path-length. | ||
71 | */ | ||
72 | #define MAX_PATHNAME 512 | ||
73 | /* | ||
74 | ** Only set the lastErrno if the error code is a real error and not | ||
75 | ** a normal expected return code of UNQLITE_BUSY or UNQLITE_OK | ||
76 | */ | ||
77 | #define IS_LOCK_ERROR(x) ((x != UNQLITE_OK) && (x != UNQLITE_BUSY)) | ||
78 | /* Forward references */ | ||
79 | typedef struct unixInodeInfo unixInodeInfo; /* An i-node */ | ||
80 | typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */ | ||
81 | /* | ||
82 | ** Sometimes, after a file handle is closed by SQLite, the file descriptor | ||
83 | ** cannot be closed immediately. In these cases, instances of the following | ||
84 | ** structure are used to store the file descriptor while waiting for an | ||
85 | ** opportunity to either close or reuse it. | ||
86 | */ | ||
87 | struct UnixUnusedFd { | ||
88 | int fd; /* File descriptor to close */ | ||
89 | int flags; /* Flags this file descriptor was opened with */ | ||
90 | UnixUnusedFd *pNext; /* Next unused file descriptor on same file */ | ||
91 | }; | ||
92 | /* | ||
93 | ** The unixFile structure is subclass of unqlite3_file specific to the unix | ||
94 | ** VFS implementations. | ||
95 | */ | ||
96 | typedef struct unixFile unixFile; | ||
97 | struct unixFile { | ||
98 | const unqlite_io_methods *pMethod; /* Always the first entry */ | ||
99 | unixInodeInfo *pInode; /* Info about locks on this inode */ | ||
100 | int h; /* The file descriptor */ | ||
101 | int dirfd; /* File descriptor for the directory */ | ||
102 | unsigned char eFileLock; /* The type of lock held on this fd */ | ||
103 | int lastErrno; /* The unix errno from last I/O error */ | ||
104 | void *lockingContext; /* Locking style specific state */ | ||
105 | UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ | ||
106 | int fileFlags; /* Miscellanous flags */ | ||
107 | const char *zPath; /* Name of the file */ | ||
108 | unsigned fsFlags; /* cached details from statfs() */ | ||
109 | }; | ||
110 | /* | ||
111 | ** The following macros define bits in unixFile.fileFlags | ||
112 | */ | ||
113 | #define UNQLITE_WHOLE_FILE_LOCKING 0x0001 /* Use whole-file locking */ | ||
114 | /* | ||
115 | ** Define various macros that are missing from some systems. | ||
116 | */ | ||
117 | #ifndef O_LARGEFILE | ||
118 | # define O_LARGEFILE 0 | ||
119 | #endif | ||
120 | #ifndef O_NOFOLLOW | ||
121 | # define O_NOFOLLOW 0 | ||
122 | #endif | ||
123 | #ifndef O_BINARY | ||
124 | # define O_BINARY 0 | ||
125 | #endif | ||
126 | /* | ||
127 | ** Helper functions to obtain and relinquish the global mutex. The | ||
128 | ** global mutex is used to protect the unixInodeInfo and | ||
129 | ** vxworksFileId objects used by this file, all of which may be | ||
130 | ** shared by multiple threads. | ||
131 | ** | ||
132 | ** Function unixMutexHeld() is used to assert() that the global mutex | ||
133 | ** is held when required. This function is only used as part of assert() | ||
134 | ** statements. e.g. | ||
135 | ** | ||
136 | ** unixEnterMutex() | ||
137 | ** assert( unixMutexHeld() ); | ||
138 | ** unixEnterLeave() | ||
139 | */ | ||
140 | static void unixEnterMutex(void){ | ||
141 | #ifdef UNQLITE_ENABLE_THREADS | ||
142 | const SyMutexMethods *pMutexMethods = SyMutexExportMethods(); | ||
143 | if( pMutexMethods ){ | ||
144 | SyMutex *pMutex = pMutexMethods->xNew(SXMUTEX_TYPE_STATIC_2); /* pre-allocated, never fail */ | ||
145 | SyMutexEnter(pMutexMethods,pMutex); | ||
146 | } | ||
147 | #endif /* UNQLITE_ENABLE_THREADS */ | ||
148 | } | ||
149 | static void unixLeaveMutex(void){ | ||
150 | #ifdef UNQLITE_ENABLE_THREADS | ||
151 | const SyMutexMethods *pMutexMethods = SyMutexExportMethods(); | ||
152 | if( pMutexMethods ){ | ||
153 | SyMutex *pMutex = pMutexMethods->xNew(SXMUTEX_TYPE_STATIC_2); /* pre-allocated, never fail */ | ||
154 | SyMutexLeave(pMutexMethods,pMutex); | ||
155 | } | ||
156 | #endif /* UNQLITE_ENABLE_THREADS */ | ||
157 | } | ||
158 | /* | ||
159 | ** This routine translates a standard POSIX errno code into something | ||
160 | ** useful to the clients of the unqlite3 functions. Specifically, it is | ||
161 | ** intended to translate a variety of "try again" errors into UNQLITE_BUSY | ||
162 | ** and a variety of "please close the file descriptor NOW" errors into | ||
163 | ** UNQLITE_IOERR | ||
164 | ** | ||
165 | ** Errors during initialization of locks, or file system support for locks, | ||
166 | ** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately. | ||
167 | */ | ||
168 | static int unqliteErrorFromPosixError(int posixError, int unqliteIOErr) { | ||
169 | switch (posixError) { | ||
170 | case 0: | ||
171 | return UNQLITE_OK; | ||
172 | |||
173 | case EAGAIN: | ||
174 | case ETIMEDOUT: | ||
175 | case EBUSY: | ||
176 | case EINTR: | ||
177 | case ENOLCK: | ||
178 | /* random NFS retry error, unless during file system support | ||
179 | * introspection, in which it actually means what it says */ | ||
180 | return UNQLITE_BUSY; | ||
181 | |||
182 | case EACCES: | ||
183 | /* EACCES is like EAGAIN during locking operations, but not any other time*/ | ||
184 | return UNQLITE_BUSY; | ||
185 | |||
186 | case EPERM: | ||
187 | return UNQLITE_PERM; | ||
188 | |||
189 | case EDEADLK: | ||
190 | return UNQLITE_IOERR; | ||
191 | |||
192 | #if EOPNOTSUPP!=ENOTSUP | ||
193 | case EOPNOTSUPP: | ||
194 | /* something went terribly awry, unless during file system support | ||
195 | * introspection, in which it actually means what it says */ | ||
196 | #endif | ||
197 | #ifdef ENOTSUP | ||
198 | case ENOTSUP: | ||
199 | /* invalid fd, unless during file system support introspection, in which | ||
200 | * it actually means what it says */ | ||
201 | #endif | ||
202 | case EIO: | ||
203 | case EBADF: | ||
204 | case EINVAL: | ||
205 | case ENOTCONN: | ||
206 | case ENODEV: | ||
207 | case ENXIO: | ||
208 | case ENOENT: | ||
209 | case ESTALE: | ||
210 | case ENOSYS: | ||
211 | /* these should force the client to close the file and reconnect */ | ||
212 | |||
213 | default: | ||
214 | return unqliteIOErr; | ||
215 | } | ||
216 | } | ||
217 | /****************************************************************************** | ||
218 | *************************** Posix Advisory Locking **************************** | ||
219 | ** | ||
220 | ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) | ||
221 | ** section 6.5.2.2 lines 483 through 490 specify that when a process | ||
222 | ** sets or clears a lock, that operation overrides any prior locks set | ||
223 | ** by the same process. It does not explicitly say so, but this implies | ||
224 | ** that it overrides locks set by the same process using a different | ||
225 | ** file descriptor. Consider this test case: | ||
226 | ** | ||
227 | ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); | ||
228 | ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); | ||
229 | ** | ||
230 | ** Suppose ./file1 and ./file2 are really the same file (because | ||
231 | ** one is a hard or symbolic link to the other) then if you set | ||
232 | ** an exclusive lock on fd1, then try to get an exclusive lock | ||
233 | ** on fd2, it works. I would have expected the second lock to | ||
234 | ** fail since there was already a lock on the file due to fd1. | ||
235 | ** But not so. Since both locks came from the same process, the | ||
236 | ** second overrides the first, even though they were on different | ||
237 | ** file descriptors opened on different file names. | ||
238 | ** | ||
239 | ** This means that we cannot use POSIX locks to synchronize file access | ||
240 | ** among competing threads of the same process. POSIX locks will work fine | ||
241 | ** to synchronize access for threads in separate processes, but not | ||
242 | ** threads within the same process. | ||
243 | ** | ||
244 | ** To work around the problem, SQLite has to manage file locks internally | ||
245 | ** on its own. Whenever a new database is opened, we have to find the | ||
246 | ** specific inode of the database file (the inode is determined by the | ||
247 | ** st_dev and st_ino fields of the stat structure that fstat() fills in) | ||
248 | ** and check for locks already existing on that inode. When locks are | ||
249 | ** created or removed, we have to look at our own internal record of the | ||
250 | ** locks to see if another thread has previously set a lock on that same | ||
251 | ** inode. | ||
252 | ** | ||
253 | ** (Aside: The use of inode numbers as unique IDs does not work on VxWorks. | ||
254 | ** For VxWorks, we have to use the alternative unique ID system based on | ||
255 | ** canonical filename and implemented in the previous division.) | ||
256 | ** | ||
257 | ** There is one locking structure | ||
258 | ** per inode, so if the same inode is opened twice, both unixFile structures | ||
259 | ** point to the same locking structure. The locking structure keeps | ||
260 | ** a reference count (so we will know when to delete it) and a "cnt" | ||
261 | ** field that tells us its internal lock status. cnt==0 means the | ||
262 | ** file is unlocked. cnt==-1 means the file has an exclusive lock. | ||
263 | ** cnt>0 means there are cnt shared locks on the file. | ||
264 | ** | ||
265 | ** Any attempt to lock or unlock a file first checks the locking | ||
266 | ** structure. The fcntl() system call is only invoked to set a | ||
267 | ** POSIX lock if the internal lock structure transitions between | ||
268 | ** a locked and an unlocked state. | ||
269 | ** | ||
270 | ** But wait: there are yet more problems with POSIX advisory locks. | ||
271 | ** | ||
272 | ** If you close a file descriptor that points to a file that has locks, | ||
273 | ** all locks on that file that are owned by the current process are | ||
274 | ** released. To work around this problem, each unixInodeInfo object | ||
275 | ** maintains a count of the number of pending locks on that inode. | ||
276 | ** When an attempt is made to close an unixFile, if there are | ||
277 | ** other unixFile open on the same inode that are holding locks, the call | ||
278 | ** to close() the file descriptor is deferred until all of the locks clear. | ||
279 | ** The unixInodeInfo structure keeps a list of file descriptors that need to | ||
280 | ** be closed and that list is walked (and cleared) when the last lock | ||
281 | ** clears. | ||
282 | ** | ||
283 | ** Yet another problem: LinuxThreads do not play well with posix locks. | ||
284 | ** | ||
285 | ** Many older versions of linux use the LinuxThreads library which is | ||
286 | ** not posix compliant. Under LinuxThreads, a lock created by thread | ||
287 | ** A cannot be modified or overridden by a different thread B. | ||
288 | ** Only thread A can modify the lock. Locking behavior is correct | ||
289 | ** if the appliation uses the newer Native Posix Thread Library (NPTL) | ||
290 | ** on linux - with NPTL a lock created by thread A can override locks | ||
291 | ** in thread B. But there is no way to know at compile-time which | ||
292 | ** threading library is being used. So there is no way to know at | ||
293 | ** compile-time whether or not thread A can override locks on thread B. | ||
294 | ** One has to do a run-time check to discover the behavior of the | ||
295 | ** current process. | ||
296 | ** | ||
297 | */ | ||
298 | |||
299 | /* | ||
300 | ** An instance of the following structure serves as the key used | ||
301 | ** to locate a particular unixInodeInfo object. | ||
302 | */ | ||
303 | struct unixFileId { | ||
304 | dev_t dev; /* Device number */ | ||
305 | ino_t ino; /* Inode number */ | ||
306 | }; | ||
307 | /* | ||
308 | ** An instance of the following structure is allocated for each open | ||
309 | ** inode. Or, on LinuxThreads, there is one of these structures for | ||
310 | ** each inode opened by each thread. | ||
311 | ** | ||
312 | ** A single inode can have multiple file descriptors, so each unixFile | ||
313 | ** structure contains a pointer to an instance of this object and this | ||
314 | ** object keeps a count of the number of unixFile pointing to it. | ||
315 | */ | ||
316 | struct unixInodeInfo { | ||
317 | struct unixFileId fileId; /* The lookup key */ | ||
318 | int nShared; /* Number of SHARED locks held */ | ||
319 | int eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */ | ||
320 | int nRef; /* Number of pointers to this structure */ | ||
321 | int nLock; /* Number of outstanding file locks */ | ||
322 | UnixUnusedFd *pUnused; /* Unused file descriptors to close */ | ||
323 | unixInodeInfo *pNext; /* List of all unixInodeInfo objects */ | ||
324 | unixInodeInfo *pPrev; /* .... doubly linked */ | ||
325 | }; | ||
326 | |||
327 | static unixInodeInfo *inodeList = 0; | ||
328 | /* | ||
329 | * Local memory allocation stuff. | ||
330 | */ | ||
331 | static void * unqlite_malloc(sxu32 nByte) | ||
332 | { | ||
333 | SyMemBackend *pAlloc; | ||
334 | void *p; | ||
335 | pAlloc = (SyMemBackend *)unqliteExportMemBackend(); | ||
336 | p = SyMemBackendAlloc(pAlloc,nByte); | ||
337 | return p; | ||
338 | } | ||
339 | static void unqlite_free(void *p) | ||
340 | { | ||
341 | SyMemBackend *pAlloc; | ||
342 | pAlloc = (SyMemBackend *)unqliteExportMemBackend(); | ||
343 | SyMemBackendFree(pAlloc,p); | ||
344 | } | ||
345 | /* | ||
346 | ** Close all file descriptors accumuated in the unixInodeInfo->pUnused list. | ||
347 | ** If all such file descriptors are closed without error, the list is | ||
348 | ** cleared and UNQLITE_OK returned. | ||
349 | ** | ||
350 | ** Otherwise, if an error occurs, then successfully closed file descriptor | ||
351 | ** entries are removed from the list, and UNQLITE_IOERR_CLOSE returned. | ||
352 | ** not deleted and UNQLITE_IOERR_CLOSE returned. | ||
353 | */ | ||
354 | static int closePendingFds(unixFile *pFile){ | ||
355 | int rc = UNQLITE_OK; | ||
356 | unixInodeInfo *pInode = pFile->pInode; | ||
357 | UnixUnusedFd *pError = 0; | ||
358 | UnixUnusedFd *p; | ||
359 | UnixUnusedFd *pNext; | ||
360 | for(p=pInode->pUnused; p; p=pNext){ | ||
361 | pNext = p->pNext; | ||
362 | if( close(p->fd) ){ | ||
363 | pFile->lastErrno = errno; | ||
364 | rc = UNQLITE_IOERR; | ||
365 | p->pNext = pError; | ||
366 | pError = p; | ||
367 | }else{ | ||
368 | unqlite_free(p); | ||
369 | } | ||
370 | } | ||
371 | pInode->pUnused = pError; | ||
372 | return rc; | ||
373 | } | ||
374 | /* | ||
375 | ** Release a unixInodeInfo structure previously allocated by findInodeInfo(). | ||
376 | ** | ||
377 | ** The mutex entered using the unixEnterMutex() function must be held | ||
378 | ** when this function is called. | ||
379 | */ | ||
380 | static void releaseInodeInfo(unixFile *pFile){ | ||
381 | unixInodeInfo *pInode = pFile->pInode; | ||
382 | if( pInode ){ | ||
383 | pInode->nRef--; | ||
384 | if( pInode->nRef==0 ){ | ||
385 | closePendingFds(pFile); | ||
386 | if( pInode->pPrev ){ | ||
387 | pInode->pPrev->pNext = pInode->pNext; | ||
388 | }else{ | ||
389 | inodeList = pInode->pNext; | ||
390 | } | ||
391 | if( pInode->pNext ){ | ||
392 | pInode->pNext->pPrev = pInode->pPrev; | ||
393 | } | ||
394 | unqlite_free(pInode); | ||
395 | } | ||
396 | } | ||
397 | } | ||
398 | /* | ||
399 | ** Given a file descriptor, locate the unixInodeInfo object that | ||
400 | ** describes that file descriptor. Create a new one if necessary. The | ||
401 | ** return value might be uninitialized if an error occurs. | ||
402 | ** | ||
403 | ** The mutex entered using the unixEnterMutex() function must be held | ||
404 | ** when this function is called. | ||
405 | ** | ||
406 | ** Return an appropriate error code. | ||
407 | */ | ||
408 | static int findInodeInfo( | ||
409 | unixFile *pFile, /* Unix file with file desc used in the key */ | ||
410 | unixInodeInfo **ppInode /* Return the unixInodeInfo object here */ | ||
411 | ){ | ||
412 | int rc; /* System call return code */ | ||
413 | int fd; /* The file descriptor for pFile */ | ||
414 | struct unixFileId fileId; /* Lookup key for the unixInodeInfo */ | ||
415 | struct stat statbuf; /* Low-level file information */ | ||
416 | unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */ | ||
417 | |||
418 | /* Get low-level information about the file that we can used to | ||
419 | ** create a unique name for the file. | ||
420 | */ | ||
421 | fd = pFile->h; | ||
422 | rc = fstat(fd, &statbuf); | ||
423 | if( rc!=0 ){ | ||
424 | pFile->lastErrno = errno; | ||
425 | #ifdef EOVERFLOW | ||
426 | if( pFile->lastErrno==EOVERFLOW ) return UNQLITE_NOTIMPLEMENTED; | ||
427 | #endif | ||
428 | return UNQLITE_IOERR; | ||
429 | } | ||
430 | |||
431 | #ifdef __APPLE__ | ||
432 | /* On OS X on an msdos filesystem, the inode number is reported | ||
433 | ** incorrectly for zero-size files. See ticket #3260. To work | ||
434 | ** around this problem (we consider it a bug in OS X, not SQLite) | ||
435 | ** we always increase the file size to 1 by writing a single byte | ||
436 | ** prior to accessing the inode number. The one byte written is | ||
437 | ** an ASCII 'S' character which also happens to be the first byte | ||
438 | ** in the header of every SQLite database. In this way, if there | ||
439 | ** is a race condition such that another thread has already populated | ||
440 | ** the first page of the database, no damage is done. | ||
441 | */ | ||
442 | if( statbuf.st_size==0 && (pFile->fsFlags & UNQLITE_FSFLAGS_IS_MSDOS)!=0 ){ | ||
443 | rc = write(fd, "S", 1); | ||
444 | if( rc!=1 ){ | ||
445 | pFile->lastErrno = errno; | ||
446 | return UNQLITE_IOERR; | ||
447 | } | ||
448 | rc = fstat(fd, &statbuf); | ||
449 | if( rc!=0 ){ | ||
450 | pFile->lastErrno = errno; | ||
451 | return UNQLITE_IOERR; | ||
452 | } | ||
453 | } | ||
454 | #endif | ||
455 | SyZero(&fileId,sizeof(fileId)); | ||
456 | fileId.dev = statbuf.st_dev; | ||
457 | fileId.ino = statbuf.st_ino; | ||
458 | pInode = inodeList; | ||
459 | while( pInode && SyMemcmp((const void *)&fileId,(const void *)&pInode->fileId, sizeof(fileId)) ){ | ||
460 | pInode = pInode->pNext; | ||
461 | } | ||
462 | if( pInode==0 ){ | ||
463 | pInode = (unixInodeInfo *)unqlite_malloc( sizeof(*pInode) ); | ||
464 | if( pInode==0 ){ | ||
465 | return UNQLITE_NOMEM; | ||
466 | } | ||
467 | SyZero(pInode,sizeof(*pInode)); | ||
468 | SyMemcpy((const void *)&fileId,(void *)&pInode->fileId,sizeof(fileId)); | ||
469 | pInode->nRef = 1; | ||
470 | pInode->pNext = inodeList; | ||
471 | pInode->pPrev = 0; | ||
472 | if( inodeList ) inodeList->pPrev = pInode; | ||
473 | inodeList = pInode; | ||
474 | }else{ | ||
475 | pInode->nRef++; | ||
476 | } | ||
477 | *ppInode = pInode; | ||
478 | return UNQLITE_OK; | ||
479 | } | ||
480 | /* | ||
481 | ** This routine checks if there is a RESERVED lock held on the specified | ||
482 | ** file by this or any other process. If such a lock is held, set *pResOut | ||
483 | ** to a non-zero value otherwise *pResOut is set to zero. The return value | ||
484 | ** is set to UNQLITE_OK unless an I/O error occurs during lock checking. | ||
485 | */ | ||
486 | static int unixCheckReservedLock(unqlite_file *id, int *pResOut){ | ||
487 | int rc = UNQLITE_OK; | ||
488 | int reserved = 0; | ||
489 | unixFile *pFile = (unixFile*)id; | ||
490 | |||
491 | |||
492 | unixEnterMutex(); /* Because pFile->pInode is shared across threads */ | ||
493 | |||
494 | /* Check if a thread in this process holds such a lock */ | ||
495 | if( pFile->pInode->eFileLock>SHARED_LOCK ){ | ||
496 | reserved = 1; | ||
497 | } | ||
498 | |||
499 | /* Otherwise see if some other process holds it. | ||
500 | */ | ||
501 | if( !reserved ){ | ||
502 | struct flock lock; | ||
503 | lock.l_whence = SEEK_SET; | ||
504 | lock.l_start = RESERVED_BYTE; | ||
505 | lock.l_len = 1; | ||
506 | lock.l_type = F_WRLCK; | ||
507 | if (-1 == fcntl(pFile->h, F_GETLK, &lock)) { | ||
508 | int tErrno = errno; | ||
509 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
510 | pFile->lastErrno = tErrno; | ||
511 | } else if( lock.l_type!=F_UNLCK ){ | ||
512 | reserved = 1; | ||
513 | } | ||
514 | } | ||
515 | |||
516 | unixLeaveMutex(); | ||
517 | |||
518 | *pResOut = reserved; | ||
519 | return rc; | ||
520 | } | ||
521 | /* | ||
522 | ** Lock the file with the lock specified by parameter eFileLock - one | ||
523 | ** of the following: | ||
524 | ** | ||
525 | ** (1) SHARED_LOCK | ||
526 | ** (2) RESERVED_LOCK | ||
527 | ** (3) PENDING_LOCK | ||
528 | ** (4) EXCLUSIVE_LOCK | ||
529 | ** | ||
530 | ** Sometimes when requesting one lock state, additional lock states | ||
531 | ** are inserted in between. The locking might fail on one of the later | ||
532 | ** transitions leaving the lock state different from what it started but | ||
533 | ** still short of its goal. The following chart shows the allowed | ||
534 | ** transitions and the inserted intermediate states: | ||
535 | ** | ||
536 | ** UNLOCKED -> SHARED | ||
537 | ** SHARED -> RESERVED | ||
538 | ** SHARED -> (PENDING) -> EXCLUSIVE | ||
539 | ** RESERVED -> (PENDING) -> EXCLUSIVE | ||
540 | ** PENDING -> EXCLUSIVE | ||
541 | ** | ||
542 | ** This routine will only increase a lock. Use the unqliteOsUnlock() | ||
543 | ** routine to lower a locking level. | ||
544 | */ | ||
545 | static int unixLock(unqlite_file *id, int eFileLock){ | ||
546 | /* The following describes the implementation of the various locks and | ||
547 | ** lock transitions in terms of the POSIX advisory shared and exclusive | ||
548 | ** lock primitives (called read-locks and write-locks below, to avoid | ||
549 | ** confusion with SQLite lock names). The algorithms are complicated | ||
550 | ** slightly in order to be compatible with unixdows systems simultaneously | ||
551 | ** accessing the same database file, in case that is ever required. | ||
552 | ** | ||
553 | ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved | ||
554 | ** byte', each single bytes at well known offsets, and the 'shared byte | ||
555 | ** range', a range of 510 bytes at a well known offset. | ||
556 | ** | ||
557 | ** To obtain a SHARED lock, a read-lock is obtained on the 'pending | ||
558 | ** byte'. If this is successful, a random byte from the 'shared byte | ||
559 | ** range' is read-locked and the lock on the 'pending byte' released. | ||
560 | ** | ||
561 | ** A process may only obtain a RESERVED lock after it has a SHARED lock. | ||
562 | ** A RESERVED lock is implemented by grabbing a write-lock on the | ||
563 | ** 'reserved byte'. | ||
564 | ** | ||
565 | ** A process may only obtain a PENDING lock after it has obtained a | ||
566 | ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock | ||
567 | ** on the 'pending byte'. This ensures that no new SHARED locks can be | ||
568 | ** obtained, but existing SHARED locks are allowed to persist. A process | ||
569 | ** does not have to obtain a RESERVED lock on the way to a PENDING lock. | ||
570 | ** This property is used by the algorithm for rolling back a journal file | ||
571 | ** after a crash. | ||
572 | ** | ||
573 | ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is | ||
574 | ** implemented by obtaining a write-lock on the entire 'shared byte | ||
575 | ** range'. Since all other locks require a read-lock on one of the bytes | ||
576 | ** within this range, this ensures that no other locks are held on the | ||
577 | ** database. | ||
578 | ** | ||
579 | ** The reason a single byte cannot be used instead of the 'shared byte | ||
580 | ** range' is that some versions of unixdows do not support read-locks. By | ||
581 | ** locking a random byte from a range, concurrent SHARED locks may exist | ||
582 | ** even if the locking primitive used is always a write-lock. | ||
583 | */ | ||
584 | int rc = UNQLITE_OK; | ||
585 | unixFile *pFile = (unixFile*)id; | ||
586 | unixInodeInfo *pInode = pFile->pInode; | ||
587 | struct flock lock; | ||
588 | int s = 0; | ||
589 | int tErrno = 0; | ||
590 | |||
591 | /* If there is already a lock of this type or more restrictive on the | ||
592 | ** unixFile, do nothing. Don't use the end_lock: exit path, as | ||
593 | ** unixEnterMutex() hasn't been called yet. | ||
594 | */ | ||
595 | if( pFile->eFileLock>=eFileLock ){ | ||
596 | return UNQLITE_OK; | ||
597 | } | ||
598 | /* This mutex is needed because pFile->pInode is shared across threads | ||
599 | */ | ||
600 | unixEnterMutex(); | ||
601 | pInode = pFile->pInode; | ||
602 | |||
603 | /* If some thread using this PID has a lock via a different unixFile* | ||
604 | ** handle that precludes the requested lock, return BUSY. | ||
605 | */ | ||
606 | if( (pFile->eFileLock!=pInode->eFileLock && | ||
607 | (pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK)) | ||
608 | ){ | ||
609 | rc = UNQLITE_BUSY; | ||
610 | goto end_lock; | ||
611 | } | ||
612 | |||
613 | /* If a SHARED lock is requested, and some thread using this PID already | ||
614 | ** has a SHARED or RESERVED lock, then increment reference counts and | ||
615 | ** return UNQLITE_OK. | ||
616 | */ | ||
617 | if( eFileLock==SHARED_LOCK && | ||
618 | (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ | ||
619 | pFile->eFileLock = SHARED_LOCK; | ||
620 | pInode->nShared++; | ||
621 | pInode->nLock++; | ||
622 | goto end_lock; | ||
623 | } | ||
624 | /* A PENDING lock is needed before acquiring a SHARED lock and before | ||
625 | ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will | ||
626 | ** be released. | ||
627 | */ | ||
628 | lock.l_len = 1L; | ||
629 | lock.l_whence = SEEK_SET; | ||
630 | if( eFileLock==SHARED_LOCK | ||
631 | || (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK) | ||
632 | ){ | ||
633 | lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK); | ||
634 | lock.l_start = PENDING_BYTE; | ||
635 | s = fcntl(pFile->h, F_SETLK, &lock); | ||
636 | if( s==(-1) ){ | ||
637 | tErrno = errno; | ||
638 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
639 | if( IS_LOCK_ERROR(rc) ){ | ||
640 | pFile->lastErrno = tErrno; | ||
641 | } | ||
642 | goto end_lock; | ||
643 | } | ||
644 | } | ||
645 | /* If control gets to this point, then actually go ahead and make | ||
646 | ** operating system calls for the specified lock. | ||
647 | */ | ||
648 | if( eFileLock==SHARED_LOCK ){ | ||
649 | /* Now get the read-lock */ | ||
650 | lock.l_start = SHARED_FIRST; | ||
651 | lock.l_len = SHARED_SIZE; | ||
652 | if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){ | ||
653 | tErrno = errno; | ||
654 | } | ||
655 | /* Drop the temporary PENDING lock */ | ||
656 | lock.l_start = PENDING_BYTE; | ||
657 | lock.l_len = 1L; | ||
658 | lock.l_type = F_UNLCK; | ||
659 | if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){ | ||
660 | if( s != -1 ){ | ||
661 | /* This could happen with a network mount */ | ||
662 | tErrno = errno; | ||
663 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
664 | if( IS_LOCK_ERROR(rc) ){ | ||
665 | pFile->lastErrno = tErrno; | ||
666 | } | ||
667 | goto end_lock; | ||
668 | } | ||
669 | } | ||
670 | if( s==(-1) ){ | ||
671 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
672 | if( IS_LOCK_ERROR(rc) ){ | ||
673 | pFile->lastErrno = tErrno; | ||
674 | } | ||
675 | }else{ | ||
676 | pFile->eFileLock = SHARED_LOCK; | ||
677 | pInode->nLock++; | ||
678 | pInode->nShared = 1; | ||
679 | } | ||
680 | }else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){ | ||
681 | /* We are trying for an exclusive lock but another thread in this | ||
682 | ** same process is still holding a shared lock. */ | ||
683 | rc = UNQLITE_BUSY; | ||
684 | }else{ | ||
685 | /* The request was for a RESERVED or EXCLUSIVE lock. It is | ||
686 | ** assumed that there is a SHARED or greater lock on the file | ||
687 | ** already. | ||
688 | */ | ||
689 | lock.l_type = F_WRLCK; | ||
690 | switch( eFileLock ){ | ||
691 | case RESERVED_LOCK: | ||
692 | lock.l_start = RESERVED_BYTE; | ||
693 | break; | ||
694 | case EXCLUSIVE_LOCK: | ||
695 | lock.l_start = SHARED_FIRST; | ||
696 | lock.l_len = SHARED_SIZE; | ||
697 | break; | ||
698 | default: | ||
699 | /* Can't happen */ | ||
700 | break; | ||
701 | } | ||
702 | s = fcntl(pFile->h, F_SETLK, &lock); | ||
703 | if( s==(-1) ){ | ||
704 | tErrno = errno; | ||
705 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
706 | if( IS_LOCK_ERROR(rc) ){ | ||
707 | pFile->lastErrno = tErrno; | ||
708 | } | ||
709 | } | ||
710 | } | ||
711 | if( rc==UNQLITE_OK ){ | ||
712 | pFile->eFileLock = eFileLock; | ||
713 | pInode->eFileLock = eFileLock; | ||
714 | }else if( eFileLock==EXCLUSIVE_LOCK ){ | ||
715 | pFile->eFileLock = PENDING_LOCK; | ||
716 | pInode->eFileLock = PENDING_LOCK; | ||
717 | } | ||
718 | end_lock: | ||
719 | unixLeaveMutex(); | ||
720 | return rc; | ||
721 | } | ||
722 | /* | ||
723 | ** Add the file descriptor used by file handle pFile to the corresponding | ||
724 | ** pUnused list. | ||
725 | */ | ||
726 | static void setPendingFd(unixFile *pFile){ | ||
727 | unixInodeInfo *pInode = pFile->pInode; | ||
728 | UnixUnusedFd *p = pFile->pUnused; | ||
729 | p->pNext = pInode->pUnused; | ||
730 | pInode->pUnused = p; | ||
731 | pFile->h = -1; | ||
732 | pFile->pUnused = 0; | ||
733 | } | ||
734 | /* | ||
735 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock | ||
736 | ** must be either NO_LOCK or SHARED_LOCK. | ||
737 | ** | ||
738 | ** If the locking level of the file descriptor is already at or below | ||
739 | ** the requested locking level, this routine is a no-op. | ||
740 | ** | ||
741 | ** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED | ||
742 | ** the byte range is divided into 2 parts and the first part is unlocked then | ||
743 | ** set to a read lock, then the other part is simply unlocked. This works | ||
744 | ** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to | ||
745 | ** remove the write lock on a region when a read lock is set. | ||
746 | */ | ||
747 | static int _posixUnlock(unqlite_file *id, int eFileLock, int handleNFSUnlock){ | ||
748 | unixFile *pFile = (unixFile*)id; | ||
749 | unixInodeInfo *pInode; | ||
750 | struct flock lock; | ||
751 | int rc = UNQLITE_OK; | ||
752 | int h; | ||
753 | int tErrno; /* Error code from system call errors */ | ||
754 | |||
755 | if( pFile->eFileLock<=eFileLock ){ | ||
756 | return UNQLITE_OK; | ||
757 | } | ||
758 | unixEnterMutex(); | ||
759 | |||
760 | h = pFile->h; | ||
761 | pInode = pFile->pInode; | ||
762 | |||
763 | if( pFile->eFileLock>SHARED_LOCK ){ | ||
764 | /* downgrading to a shared lock on NFS involves clearing the write lock | ||
765 | ** before establishing the readlock - to avoid a race condition we downgrade | ||
766 | ** the lock in 2 blocks, so that part of the range will be covered by a | ||
767 | ** write lock until the rest is covered by a read lock: | ||
768 | ** 1: [WWWWW] | ||
769 | ** 2: [....W] | ||
770 | ** 3: [RRRRW] | ||
771 | ** 4: [RRRR.] | ||
772 | */ | ||
773 | if( eFileLock==SHARED_LOCK ){ | ||
774 | if( handleNFSUnlock ){ | ||
775 | off_t divSize = SHARED_SIZE - 1; | ||
776 | |||
777 | lock.l_type = F_UNLCK; | ||
778 | lock.l_whence = SEEK_SET; | ||
779 | lock.l_start = SHARED_FIRST; | ||
780 | lock.l_len = divSize; | ||
781 | if( fcntl(h, F_SETLK, &lock)==(-1) ){ | ||
782 | tErrno = errno; | ||
783 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
784 | if( IS_LOCK_ERROR(rc) ){ | ||
785 | pFile->lastErrno = tErrno; | ||
786 | } | ||
787 | goto end_unlock; | ||
788 | } | ||
789 | lock.l_type = F_RDLCK; | ||
790 | lock.l_whence = SEEK_SET; | ||
791 | lock.l_start = SHARED_FIRST; | ||
792 | lock.l_len = divSize; | ||
793 | if( fcntl(h, F_SETLK, &lock)==(-1) ){ | ||
794 | tErrno = errno; | ||
795 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
796 | if( IS_LOCK_ERROR(rc) ){ | ||
797 | pFile->lastErrno = tErrno; | ||
798 | } | ||
799 | goto end_unlock; | ||
800 | } | ||
801 | lock.l_type = F_UNLCK; | ||
802 | lock.l_whence = SEEK_SET; | ||
803 | lock.l_start = SHARED_FIRST+divSize; | ||
804 | lock.l_len = SHARED_SIZE-divSize; | ||
805 | if( fcntl(h, F_SETLK, &lock)==(-1) ){ | ||
806 | tErrno = errno; | ||
807 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
808 | if( IS_LOCK_ERROR(rc) ){ | ||
809 | pFile->lastErrno = tErrno; | ||
810 | } | ||
811 | goto end_unlock; | ||
812 | } | ||
813 | }else{ | ||
814 | lock.l_type = F_RDLCK; | ||
815 | lock.l_whence = SEEK_SET; | ||
816 | lock.l_start = SHARED_FIRST; | ||
817 | lock.l_len = SHARED_SIZE; | ||
818 | if( fcntl(h, F_SETLK, &lock)==(-1) ){ | ||
819 | tErrno = errno; | ||
820 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
821 | if( IS_LOCK_ERROR(rc) ){ | ||
822 | pFile->lastErrno = tErrno; | ||
823 | } | ||
824 | goto end_unlock; | ||
825 | } | ||
826 | } | ||
827 | } | ||
828 | lock.l_type = F_UNLCK; | ||
829 | lock.l_whence = SEEK_SET; | ||
830 | lock.l_start = PENDING_BYTE; | ||
831 | lock.l_len = 2L; | ||
832 | if( fcntl(h, F_SETLK, &lock)!=(-1) ){ | ||
833 | pInode->eFileLock = SHARED_LOCK; | ||
834 | }else{ | ||
835 | tErrno = errno; | ||
836 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
837 | if( IS_LOCK_ERROR(rc) ){ | ||
838 | pFile->lastErrno = tErrno; | ||
839 | } | ||
840 | goto end_unlock; | ||
841 | } | ||
842 | } | ||
843 | if( eFileLock==NO_LOCK ){ | ||
844 | /* Decrement the shared lock counter. Release the lock using an | ||
845 | ** OS call only when all threads in this same process have released | ||
846 | ** the lock. | ||
847 | */ | ||
848 | pInode->nShared--; | ||
849 | if( pInode->nShared==0 ){ | ||
850 | lock.l_type = F_UNLCK; | ||
851 | lock.l_whence = SEEK_SET; | ||
852 | lock.l_start = lock.l_len = 0L; | ||
853 | |||
854 | if( fcntl(h, F_SETLK, &lock)!=(-1) ){ | ||
855 | pInode->eFileLock = NO_LOCK; | ||
856 | }else{ | ||
857 | tErrno = errno; | ||
858 | rc = unqliteErrorFromPosixError(tErrno, UNQLITE_LOCKERR); | ||
859 | if( IS_LOCK_ERROR(rc) ){ | ||
860 | pFile->lastErrno = tErrno; | ||
861 | } | ||
862 | pInode->eFileLock = NO_LOCK; | ||
863 | pFile->eFileLock = NO_LOCK; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | /* Decrement the count of locks against this same file. When the | ||
868 | ** count reaches zero, close any other file descriptors whose close | ||
869 | ** was deferred because of outstanding locks. | ||
870 | */ | ||
871 | pInode->nLock--; | ||
872 | |||
873 | if( pInode->nLock==0 ){ | ||
874 | int rc2 = closePendingFds(pFile); | ||
875 | if( rc==UNQLITE_OK ){ | ||
876 | rc = rc2; | ||
877 | } | ||
878 | } | ||
879 | } | ||
880 | |||
881 | end_unlock: | ||
882 | |||
883 | unixLeaveMutex(); | ||
884 | |||
885 | if( rc==UNQLITE_OK ) pFile->eFileLock = eFileLock; | ||
886 | return rc; | ||
887 | } | ||
888 | /* | ||
889 | ** Lower the locking level on file descriptor pFile to eFileLock. eFileLock | ||
890 | ** must be either NO_LOCK or SHARED_LOCK. | ||
891 | ** | ||
892 | ** If the locking level of the file descriptor is already at or below | ||
893 | ** the requested locking level, this routine is a no-op. | ||
894 | */ | ||
895 | static int unixUnlock(unqlite_file *id, int eFileLock){ | ||
896 | return _posixUnlock(id, eFileLock, 0); | ||
897 | } | ||
898 | /* | ||
899 | ** This function performs the parts of the "close file" operation | ||
900 | ** common to all locking schemes. It closes the directory and file | ||
901 | ** handles, if they are valid, and sets all fields of the unixFile | ||
902 | ** structure to 0. | ||
903 | ** | ||
904 | */ | ||
905 | static int closeUnixFile(unqlite_file *id){ | ||
906 | unixFile *pFile = (unixFile*)id; | ||
907 | if( pFile ){ | ||
908 | if( pFile->dirfd>=0 ){ | ||
909 | int err = close(pFile->dirfd); | ||
910 | if( err ){ | ||
911 | pFile->lastErrno = errno; | ||
912 | return UNQLITE_IOERR; | ||
913 | }else{ | ||
914 | pFile->dirfd=-1; | ||
915 | } | ||
916 | } | ||
917 | if( pFile->h>=0 ){ | ||
918 | int err = close(pFile->h); | ||
919 | if( err ){ | ||
920 | pFile->lastErrno = errno; | ||
921 | return UNQLITE_IOERR; | ||
922 | } | ||
923 | } | ||
924 | unqlite_free(pFile->pUnused); | ||
925 | SyZero(pFile,sizeof(unixFile)); | ||
926 | } | ||
927 | return UNQLITE_OK; | ||
928 | } | ||
929 | /* | ||
930 | ** Close a file. | ||
931 | */ | ||
932 | static int unixClose(unqlite_file *id){ | ||
933 | int rc = UNQLITE_OK; | ||
934 | if( id ){ | ||
935 | unixFile *pFile = (unixFile *)id; | ||
936 | unixUnlock(id, NO_LOCK); | ||
937 | unixEnterMutex(); | ||
938 | if( pFile->pInode && pFile->pInode->nLock ){ | ||
939 | /* If there are outstanding locks, do not actually close the file just | ||
940 | ** yet because that would clear those locks. Instead, add the file | ||
941 | ** descriptor to pInode->pUnused list. It will be automatically closed | ||
942 | ** when the last lock is cleared. | ||
943 | */ | ||
944 | setPendingFd(pFile); | ||
945 | } | ||
946 | releaseInodeInfo(pFile); | ||
947 | rc = closeUnixFile(id); | ||
948 | unixLeaveMutex(); | ||
949 | } | ||
950 | return rc; | ||
951 | } | ||
952 | /************** End of the posix advisory lock implementation ***************** | ||
953 | ******************************************************************************/ | ||
954 | /* | ||
955 | ** | ||
956 | ** The next division contains implementations for all methods of the | ||
957 | ** unqlite_file object other than the locking methods. The locking | ||
958 | ** methods were defined in divisions above (one locking method per | ||
959 | ** division). Those methods that are common to all locking modes | ||
960 | ** are gather together into this division. | ||
961 | */ | ||
962 | /* | ||
963 | ** Seek to the offset passed as the second argument, then read cnt | ||
964 | ** bytes into pBuf. Return the number of bytes actually read. | ||
965 | ** | ||
966 | ** NB: If you define USE_PREAD or USE_PREAD64, then it might also | ||
967 | ** be necessary to define _XOPEN_SOURCE to be 500. This varies from | ||
968 | ** one system to another. Since SQLite does not define USE_PREAD | ||
969 | ** any form by default, we will not attempt to define _XOPEN_SOURCE. | ||
970 | ** See tickets #2741 and #2681. | ||
971 | ** | ||
972 | ** To avoid stomping the errno value on a failed read the lastErrno value | ||
973 | ** is set before returning. | ||
974 | */ | ||
975 | static int seekAndRead(unixFile *id, unqlite_int64 offset, void *pBuf, int cnt){ | ||
976 | int got; | ||
977 | #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) | ||
978 | unqlite_int64 newOffset; | ||
979 | #endif | ||
980 | |||
981 | #if defined(USE_PREAD) | ||
982 | got = pread(id->h, pBuf, cnt, offset); | ||
983 | #elif defined(USE_PREAD64) | ||
984 | got = pread64(id->h, pBuf, cnt, offset); | ||
985 | #else | ||
986 | newOffset = lseek(id->h, offset, SEEK_SET); | ||
987 | |||
988 | if( newOffset!=offset ){ | ||
989 | if( newOffset == -1 ){ | ||
990 | ((unixFile*)id)->lastErrno = errno; | ||
991 | }else{ | ||
992 | ((unixFile*)id)->lastErrno = 0; | ||
993 | } | ||
994 | return -1; | ||
995 | } | ||
996 | got = read(id->h, pBuf, cnt); | ||
997 | #endif | ||
998 | if( got<0 ){ | ||
999 | ((unixFile*)id)->lastErrno = errno; | ||
1000 | } | ||
1001 | return got; | ||
1002 | } | ||
1003 | /* | ||
1004 | ** Read data from a file into a buffer. Return UNQLITE_OK if all | ||
1005 | ** bytes were read successfully and UNQLITE_IOERR if anything goes | ||
1006 | ** wrong. | ||
1007 | */ | ||
1008 | static int unixRead( | ||
1009 | unqlite_file *id, | ||
1010 | void *pBuf, | ||
1011 | unqlite_int64 amt, | ||
1012 | unqlite_int64 offset | ||
1013 | ){ | ||
1014 | unixFile *pFile = (unixFile *)id; | ||
1015 | int got; | ||
1016 | |||
1017 | got = seekAndRead(pFile, offset, pBuf, (int)amt); | ||
1018 | if( got==(int)amt ){ | ||
1019 | return UNQLITE_OK; | ||
1020 | }else if( got<0 ){ | ||
1021 | /* lastErrno set by seekAndRead */ | ||
1022 | return UNQLITE_IOERR; | ||
1023 | }else{ | ||
1024 | pFile->lastErrno = 0; /* not a system error */ | ||
1025 | /* Unread parts of the buffer must be zero-filled */ | ||
1026 | SyZero(&((char*)pBuf)[got],(sxu32)amt-got); | ||
1027 | return UNQLITE_IOERR; | ||
1028 | } | ||
1029 | } | ||
1030 | /* | ||
1031 | ** Seek to the offset in id->offset then read cnt bytes into pBuf. | ||
1032 | ** Return the number of bytes actually read. Update the offset. | ||
1033 | ** | ||
1034 | ** To avoid stomping the errno value on a failed write the lastErrno value | ||
1035 | ** is set before returning. | ||
1036 | */ | ||
1037 | static int seekAndWrite(unixFile *id, unqlite_int64 offset, const void *pBuf, unqlite_int64 cnt){ | ||
1038 | int got; | ||
1039 | #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) | ||
1040 | unqlite_int64 newOffset; | ||
1041 | #endif | ||
1042 | |||
1043 | #if defined(USE_PREAD) | ||
1044 | got = pwrite(id->h, pBuf, cnt, offset); | ||
1045 | #elif defined(USE_PREAD64) | ||
1046 | got = pwrite64(id->h, pBuf, cnt, offset); | ||
1047 | #else | ||
1048 | newOffset = lseek(id->h, offset, SEEK_SET); | ||
1049 | if( newOffset!=offset ){ | ||
1050 | if( newOffset == -1 ){ | ||
1051 | ((unixFile*)id)->lastErrno = errno; | ||
1052 | }else{ | ||
1053 | ((unixFile*)id)->lastErrno = 0; | ||
1054 | } | ||
1055 | return -1; | ||
1056 | } | ||
1057 | got = write(id->h, pBuf, cnt); | ||
1058 | #endif | ||
1059 | if( got<0 ){ | ||
1060 | ((unixFile*)id)->lastErrno = errno; | ||
1061 | } | ||
1062 | return got; | ||
1063 | } | ||
1064 | /* | ||
1065 | ** Write data from a buffer into a file. Return UNQLITE_OK on success | ||
1066 | ** or some other error code on failure. | ||
1067 | */ | ||
1068 | static int unixWrite( | ||
1069 | unqlite_file *id, | ||
1070 | const void *pBuf, | ||
1071 | unqlite_int64 amt, | ||
1072 | unqlite_int64 offset | ||
1073 | ){ | ||
1074 | unixFile *pFile = (unixFile*)id; | ||
1075 | int wrote = 0; | ||
1076 | |||
1077 | while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){ | ||
1078 | amt -= wrote; | ||
1079 | offset += wrote; | ||
1080 | pBuf = &((char*)pBuf)[wrote]; | ||
1081 | } | ||
1082 | |||
1083 | if( amt>0 ){ | ||
1084 | if( wrote<0 ){ | ||
1085 | /* lastErrno set by seekAndWrite */ | ||
1086 | return UNQLITE_IOERR; | ||
1087 | }else{ | ||
1088 | pFile->lastErrno = 0; /* not a system error */ | ||
1089 | return UNQLITE_FULL; | ||
1090 | } | ||
1091 | } | ||
1092 | return UNQLITE_OK; | ||
1093 | } | ||
1094 | /* | ||
1095 | ** We do not trust systems to provide a working fdatasync(). Some do. | ||
1096 | ** Others do no. To be safe, we will stick with the (slower) fsync(). | ||
1097 | ** If you know that your system does support fdatasync() correctly, | ||
1098 | ** then simply compile with -Dfdatasync=fdatasync | ||
1099 | */ | ||
1100 | #if !defined(fdatasync) && !defined(__linux__) | ||
1101 | # define fdatasync fsync | ||
1102 | #endif | ||
1103 | |||
1104 | /* | ||
1105 | ** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not | ||
1106 | ** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently | ||
1107 | ** only available on Mac OS X. But that could change. | ||
1108 | */ | ||
1109 | #ifdef F_FULLFSYNC | ||
1110 | # define HAVE_FULLFSYNC 1 | ||
1111 | #else | ||
1112 | # define HAVE_FULLFSYNC 0 | ||
1113 | #endif | ||
1114 | /* | ||
1115 | ** The fsync() system call does not work as advertised on many | ||
1116 | ** unix systems. The following procedure is an attempt to make | ||
1117 | ** it work better. | ||
1118 | ** | ||
1119 | ** | ||
1120 | ** SQLite sets the dataOnly flag if the size of the file is unchanged. | ||
1121 | ** The idea behind dataOnly is that it should only write the file content | ||
1122 | ** to disk, not the inode. We only set dataOnly if the file size is | ||
1123 | ** unchanged since the file size is part of the inode. However, | ||
1124 | ** Ted Ts'o tells us that fdatasync() will also write the inode if the | ||
1125 | ** file size has changed. The only real difference between fdatasync() | ||
1126 | ** and fsync(), Ted tells us, is that fdatasync() will not flush the | ||
1127 | ** inode if the mtime or owner or other inode attributes have changed. | ||
1128 | ** We only care about the file size, not the other file attributes, so | ||
1129 | ** as far as SQLite is concerned, an fdatasync() is always adequate. | ||
1130 | ** So, we always use fdatasync() if it is available, regardless of | ||
1131 | ** the value of the dataOnly flag. | ||
1132 | */ | ||
1133 | static int full_fsync(int fd, int fullSync, int dataOnly){ | ||
1134 | int rc; | ||
1135 | #if HAVE_FULLFSYNC | ||
1136 | SXUNUSED(dataOnly); | ||
1137 | #else | ||
1138 | SXUNUSED(fullSync); | ||
1139 | SXUNUSED(dataOnly); | ||
1140 | #endif | ||
1141 | |||
1142 | /* If we compiled with the UNQLITE_NO_SYNC flag, then syncing is a | ||
1143 | ** no-op | ||
1144 | */ | ||
1145 | #if HAVE_FULLFSYNC | ||
1146 | if( fullSync ){ | ||
1147 | rc = fcntl(fd, F_FULLFSYNC, 0); | ||
1148 | }else{ | ||
1149 | rc = 1; | ||
1150 | } | ||
1151 | /* If the FULLFSYNC failed, fall back to attempting an fsync(). | ||
1152 | ** It shouldn't be possible for fullfsync to fail on the local | ||
1153 | ** file system (on OSX), so failure indicates that FULLFSYNC | ||
1154 | ** isn't supported for this file system. So, attempt an fsync | ||
1155 | ** and (for now) ignore the overhead of a superfluous fcntl call. | ||
1156 | ** It'd be better to detect fullfsync support once and avoid | ||
1157 | ** the fcntl call every time sync is called. | ||
1158 | */ | ||
1159 | if( rc ) rc = fsync(fd); | ||
1160 | |||
1161 | #elif defined(__APPLE__) | ||
1162 | /* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly | ||
1163 | ** so currently we default to the macro that redefines fdatasync to fsync | ||
1164 | */ | ||
1165 | rc = fsync(fd); | ||
1166 | #else | ||
1167 | rc = fdatasync(fd); | ||
1168 | #endif /* ifdef UNQLITE_NO_SYNC elif HAVE_FULLFSYNC */ | ||
1169 | if( rc!= -1 ){ | ||
1170 | rc = 0; | ||
1171 | } | ||
1172 | return rc; | ||
1173 | } | ||
1174 | /* | ||
1175 | ** Make sure all writes to a particular file are committed to disk. | ||
1176 | ** | ||
1177 | ** If dataOnly==0 then both the file itself and its metadata (file | ||
1178 | ** size, access time, etc) are synced. If dataOnly!=0 then only the | ||
1179 | ** file data is synced. | ||
1180 | ** | ||
1181 | ** Under Unix, also make sure that the directory entry for the file | ||
1182 | ** has been created by fsync-ing the directory that contains the file. | ||
1183 | ** If we do not do this and we encounter a power failure, the directory | ||
1184 | ** entry for the journal might not exist after we reboot. The next | ||
1185 | ** SQLite to access the file will not know that the journal exists (because | ||
1186 | ** the directory entry for the journal was never created) and the transaction | ||
1187 | ** will not roll back - possibly leading to database corruption. | ||
1188 | */ | ||
1189 | static int unixSync(unqlite_file *id, int flags){ | ||
1190 | int rc; | ||
1191 | unixFile *pFile = (unixFile*)id; | ||
1192 | |||
1193 | int isDataOnly = (flags&UNQLITE_SYNC_DATAONLY); | ||
1194 | int isFullsync = (flags&0x0F)==UNQLITE_SYNC_FULL; | ||
1195 | |||
1196 | rc = full_fsync(pFile->h, isFullsync, isDataOnly); | ||
1197 | |||
1198 | if( rc ){ | ||
1199 | pFile->lastErrno = errno; | ||
1200 | return UNQLITE_IOERR; | ||
1201 | } | ||
1202 | if( pFile->dirfd>=0 ){ | ||
1203 | int err; | ||
1204 | #ifndef UNQLITE_DISABLE_DIRSYNC | ||
1205 | /* The directory sync is only attempted if full_fsync is | ||
1206 | ** turned off or unavailable. If a full_fsync occurred above, | ||
1207 | ** then the directory sync is superfluous. | ||
1208 | */ | ||
1209 | if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){ | ||
1210 | /* | ||
1211 | ** We have received multiple reports of fsync() returning | ||
1212 | ** errors when applied to directories on certain file systems. | ||
1213 | ** A failed directory sync is not a big deal. So it seems | ||
1214 | ** better to ignore the error. Ticket #1657 | ||
1215 | */ | ||
1216 | /* pFile->lastErrno = errno; */ | ||
1217 | /* return UNQLITE_IOERR; */ | ||
1218 | } | ||
1219 | #endif | ||
1220 | err = close(pFile->dirfd); /* Only need to sync once, so close the */ | ||
1221 | if( err==0 ){ /* directory when we are done */ | ||
1222 | pFile->dirfd = -1; | ||
1223 | }else{ | ||
1224 | pFile->lastErrno = errno; | ||
1225 | rc = UNQLITE_IOERR; | ||
1226 | } | ||
1227 | } | ||
1228 | return rc; | ||
1229 | } | ||
1230 | /* | ||
1231 | ** Truncate an open file to a specified size | ||
1232 | */ | ||
1233 | static int unixTruncate(unqlite_file *id, sxi64 nByte){ | ||
1234 | unixFile *pFile = (unixFile *)id; | ||
1235 | int rc; | ||
1236 | |||
1237 | rc = ftruncate(pFile->h, (off_t)nByte); | ||
1238 | if( rc ){ | ||
1239 | pFile->lastErrno = errno; | ||
1240 | return UNQLITE_IOERR; | ||
1241 | }else{ | ||
1242 | return UNQLITE_OK; | ||
1243 | } | ||
1244 | } | ||
1245 | /* | ||
1246 | ** Determine the current size of a file in bytes | ||
1247 | */ | ||
1248 | static int unixFileSize(unqlite_file *id,sxi64 *pSize){ | ||
1249 | int rc; | ||
1250 | struct stat buf; | ||
1251 | |||
1252 | rc = fstat(((unixFile*)id)->h, &buf); | ||
1253 | |||
1254 | if( rc!=0 ){ | ||
1255 | ((unixFile*)id)->lastErrno = errno; | ||
1256 | return UNQLITE_IOERR; | ||
1257 | } | ||
1258 | *pSize = buf.st_size; | ||
1259 | |||
1260 | /* When opening a zero-size database, the findInodeInfo() procedure | ||
1261 | ** writes a single byte into that file in order to work around a bug | ||
1262 | ** in the OS-X msdos filesystem. In order to avoid problems with upper | ||
1263 | ** layers, we need to report this file size as zero even though it is | ||
1264 | ** really 1. Ticket #3260. | ||
1265 | */ | ||
1266 | if( *pSize==1 ) *pSize = 0; | ||
1267 | |||
1268 | return UNQLITE_OK; | ||
1269 | } | ||
1270 | /* | ||
1271 | ** Return the sector size in bytes of the underlying block device for | ||
1272 | ** the specified file. This is almost always 512 bytes, but may be | ||
1273 | ** larger for some devices. | ||
1274 | ** | ||
1275 | ** SQLite code assumes this function cannot fail. It also assumes that | ||
1276 | ** if two files are created in the same file-system directory (i.e. | ||
1277 | ** a database and its journal file) that the sector size will be the | ||
1278 | ** same for both. | ||
1279 | */ | ||
1280 | static int unixSectorSize(unqlite_file *NotUsed){ | ||
1281 | SXUNUSED(NotUsed); | ||
1282 | return UNQLITE_DEFAULT_SECTOR_SIZE; | ||
1283 | } | ||
1284 | /* | ||
1285 | ** This vector defines all the methods that can operate on an | ||
1286 | ** unqlite_file for Windows systems. | ||
1287 | */ | ||
1288 | static const unqlite_io_methods unixIoMethod = { | ||
1289 | 1, /* iVersion */ | ||
1290 | unixClose, /* xClose */ | ||
1291 | unixRead, /* xRead */ | ||
1292 | unixWrite, /* xWrite */ | ||
1293 | unixTruncate, /* xTruncate */ | ||
1294 | unixSync, /* xSync */ | ||
1295 | unixFileSize, /* xFileSize */ | ||
1296 | unixLock, /* xLock */ | ||
1297 | unixUnlock, /* xUnlock */ | ||
1298 | unixCheckReservedLock, /* xCheckReservedLock */ | ||
1299 | unixSectorSize, /* xSectorSize */ | ||
1300 | }; | ||
1301 | /**************************************************************************** | ||
1302 | **************************** unqlite_vfs methods **************************** | ||
1303 | ** | ||
1304 | ** This division contains the implementation of methods on the | ||
1305 | ** unqlite_vfs object. | ||
1306 | */ | ||
1307 | /* | ||
1308 | ** Initialize the contents of the unixFile structure pointed to by pId. | ||
1309 | */ | ||
1310 | static int fillInUnixFile( | ||
1311 | unqlite_vfs *pVfs, /* Pointer to vfs object */ | ||
1312 | int h, /* Open file descriptor of file being opened */ | ||
1313 | int dirfd, /* Directory file descriptor */ | ||
1314 | unqlite_file *pId, /* Write to the unixFile structure here */ | ||
1315 | const char *zFilename, /* Name of the file being opened */ | ||
1316 | int noLock, /* Omit locking if true */ | ||
1317 | int isDelete /* Delete on close if true */ | ||
1318 | ){ | ||
1319 | const unqlite_io_methods *pLockingStyle = &unixIoMethod; | ||
1320 | unixFile *pNew = (unixFile *)pId; | ||
1321 | int rc = UNQLITE_OK; | ||
1322 | |||
1323 | /* Parameter isDelete is only used on vxworks. Express this explicitly | ||
1324 | ** here to prevent compiler warnings about unused parameters. | ||
1325 | */ | ||
1326 | SXUNUSED(isDelete); | ||
1327 | SXUNUSED(noLock); | ||
1328 | SXUNUSED(pVfs); | ||
1329 | |||
1330 | pNew->h = h; | ||
1331 | pNew->dirfd = dirfd; | ||
1332 | pNew->fileFlags = 0; | ||
1333 | pNew->zPath = zFilename; | ||
1334 | |||
1335 | unixEnterMutex(); | ||
1336 | rc = findInodeInfo(pNew, &pNew->pInode); | ||
1337 | if( rc!=UNQLITE_OK ){ | ||
1338 | /* If an error occured in findInodeInfo(), close the file descriptor | ||
1339 | ** immediately, before releasing the mutex. findInodeInfo() may fail | ||
1340 | ** in two scenarios: | ||
1341 | ** | ||
1342 | ** (a) A call to fstat() failed. | ||
1343 | ** (b) A malloc failed. | ||
1344 | ** | ||
1345 | ** Scenario (b) may only occur if the process is holding no other | ||
1346 | ** file descriptors open on the same file. If there were other file | ||
1347 | ** descriptors on this file, then no malloc would be required by | ||
1348 | ** findInodeInfo(). If this is the case, it is quite safe to close | ||
1349 | ** handle h - as it is guaranteed that no posix locks will be released | ||
1350 | ** by doing so. | ||
1351 | ** | ||
1352 | ** If scenario (a) caused the error then things are not so safe. The | ||
1353 | ** implicit assumption here is that if fstat() fails, things are in | ||
1354 | ** such bad shape that dropping a lock or two doesn't matter much. | ||
1355 | */ | ||
1356 | close(h); | ||
1357 | h = -1; | ||
1358 | } | ||
1359 | unixLeaveMutex(); | ||
1360 | |||
1361 | pNew->lastErrno = 0; | ||
1362 | if( rc!=UNQLITE_OK ){ | ||
1363 | if( dirfd>=0 ) close(dirfd); /* silent leak if fail, already in error */ | ||
1364 | if( h>=0 ) close(h); | ||
1365 | }else{ | ||
1366 | pNew->pMethod = pLockingStyle; | ||
1367 | } | ||
1368 | return rc; | ||
1369 | } | ||
1370 | /* | ||
1371 | ** Open a file descriptor to the directory containing file zFilename. | ||
1372 | ** If successful, *pFd is set to the opened file descriptor and | ||
1373 | ** UNQLITE_OK is returned. If an error occurs, either UNQLITE_NOMEM | ||
1374 | ** or UNQLITE_CANTOPEN is returned and *pFd is set to an undefined | ||
1375 | ** value. | ||
1376 | ** | ||
1377 | ** If UNQLITE_OK is returned, the caller is responsible for closing | ||
1378 | ** the file descriptor *pFd using close(). | ||
1379 | */ | ||
1380 | static int openDirectory(const char *zFilename, int *pFd){ | ||
1381 | sxu32 ii; | ||
1382 | int fd = -1; | ||
1383 | char zDirname[MAX_PATHNAME+1]; | ||
1384 | sxu32 n; | ||
1385 | n = Systrcpy(zDirname,sizeof(zDirname),zFilename,0); | ||
1386 | for(ii=n; ii>1 && zDirname[ii]!='/'; ii--); | ||
1387 | if( ii>0 ){ | ||
1388 | zDirname[ii] = '\0'; | ||
1389 | fd = open(zDirname, O_RDONLY|O_BINARY, 0); | ||
1390 | if( fd>=0 ){ | ||
1391 | #ifdef FD_CLOEXEC | ||
1392 | fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC); | ||
1393 | #endif | ||
1394 | } | ||
1395 | } | ||
1396 | *pFd = fd; | ||
1397 | return (fd>=0?UNQLITE_OK: UNQLITE_IOERR ); | ||
1398 | } | ||
1399 | /* | ||
1400 | ** Search for an unused file descriptor that was opened on the database | ||
1401 | ** file (not a journal or master-journal file) identified by pathname | ||
1402 | ** zPath with UNQLITE_OPEN_XXX flags matching those passed as the second | ||
1403 | ** argument to this function. | ||
1404 | ** | ||
1405 | ** Such a file descriptor may exist if a database connection was closed | ||
1406 | ** but the associated file descriptor could not be closed because some | ||
1407 | ** other file descriptor open on the same file is holding a file-lock. | ||
1408 | ** Refer to comments in the unixClose() function and the lengthy comment | ||
1409 | ** describing "Posix Advisory Locking" at the start of this file for | ||
1410 | ** further details. Also, ticket #4018. | ||
1411 | ** | ||
1412 | ** If a suitable file descriptor is found, then it is returned. If no | ||
1413 | ** such file descriptor is located, -1 is returned. | ||
1414 | */ | ||
1415 | static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ | ||
1416 | UnixUnusedFd *pUnused = 0; | ||
1417 | struct stat sStat; /* Results of stat() call */ | ||
1418 | /* A stat() call may fail for various reasons. If this happens, it is | ||
1419 | ** almost certain that an open() call on the same path will also fail. | ||
1420 | ** For this reason, if an error occurs in the stat() call here, it is | ||
1421 | ** ignored and -1 is returned. The caller will try to open a new file | ||
1422 | ** descriptor on the same path, fail, and return an error to SQLite. | ||
1423 | ** | ||
1424 | ** Even if a subsequent open() call does succeed, the consequences of | ||
1425 | ** not searching for a resusable file descriptor are not dire. */ | ||
1426 | if( 0==stat(zPath, &sStat) ){ | ||
1427 | unixInodeInfo *pInode; | ||
1428 | |||
1429 | unixEnterMutex(); | ||
1430 | pInode = inodeList; | ||
1431 | while( pInode && (pInode->fileId.dev!=sStat.st_dev | ||
1432 | || pInode->fileId.ino!=sStat.st_ino) ){ | ||
1433 | pInode = pInode->pNext; | ||
1434 | } | ||
1435 | if( pInode ){ | ||
1436 | UnixUnusedFd **pp; | ||
1437 | for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext)); | ||
1438 | pUnused = *pp; | ||
1439 | if( pUnused ){ | ||
1440 | *pp = pUnused->pNext; | ||
1441 | } | ||
1442 | } | ||
1443 | unixLeaveMutex(); | ||
1444 | } | ||
1445 | return pUnused; | ||
1446 | } | ||
1447 | /* | ||
1448 | ** This function is called by unixOpen() to determine the unix permissions | ||
1449 | ** to create new files with. If no error occurs, then UNQLITE_OK is returned | ||
1450 | ** and a value suitable for passing as the third argument to open(2) is | ||
1451 | ** written to *pMode. If an IO error occurs, an SQLite error code is | ||
1452 | ** returned and the value of *pMode is not modified. | ||
1453 | ** | ||
1454 | ** If the file being opened is a temporary file, it is always created with | ||
1455 | ** the octal permissions 0600 (read/writable by owner only). If the file | ||
1456 | ** is a database or master journal file, it is created with the permissions | ||
1457 | ** mask UNQLITE_DEFAULT_FILE_PERMISSIONS. | ||
1458 | ** | ||
1459 | ** Finally, if the file being opened is a WAL or regular journal file, then | ||
1460 | ** this function queries the file-system for the permissions on the | ||
1461 | ** corresponding database file and sets *pMode to this value. Whenever | ||
1462 | ** possible, WAL and journal files are created using the same permissions | ||
1463 | ** as the associated database file. | ||
1464 | */ | ||
1465 | static int findCreateFileMode( | ||
1466 | const char *zPath, /* Path of file (possibly) being created */ | ||
1467 | int flags, /* Flags passed as 4th argument to xOpen() */ | ||
1468 | mode_t *pMode /* OUT: Permissions to open file with */ | ||
1469 | ){ | ||
1470 | int rc = UNQLITE_OK; /* Return Code */ | ||
1471 | if( flags & UNQLITE_OPEN_TEMP_DB ){ | ||
1472 | *pMode = 0600; | ||
1473 | SXUNUSED(zPath); | ||
1474 | }else{ | ||
1475 | *pMode = UNQLITE_DEFAULT_FILE_PERMISSIONS; | ||
1476 | } | ||
1477 | return rc; | ||
1478 | } | ||
1479 | /* | ||
1480 | ** Open the file zPath. | ||
1481 | ** | ||
1482 | ** Previously, the SQLite OS layer used three functions in place of this | ||
1483 | ** one: | ||
1484 | ** | ||
1485 | ** unqliteOsOpenReadWrite(); | ||
1486 | ** unqliteOsOpenReadOnly(); | ||
1487 | ** unqliteOsOpenExclusive(); | ||
1488 | ** | ||
1489 | ** These calls correspond to the following combinations of flags: | ||
1490 | ** | ||
1491 | ** ReadWrite() -> (READWRITE | CREATE) | ||
1492 | ** ReadOnly() -> (READONLY) | ||
1493 | ** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE) | ||
1494 | ** | ||
1495 | ** The old OpenExclusive() accepted a boolean argument - "delFlag". If | ||
1496 | ** true, the file was configured to be automatically deleted when the | ||
1497 | ** file handle closed. To achieve the same effect using this new | ||
1498 | ** interface, add the DELETEONCLOSE flag to those specified above for | ||
1499 | ** OpenExclusive(). | ||
1500 | */ | ||
1501 | static int unixOpen( | ||
1502 | unqlite_vfs *pVfs, /* The VFS for which this is the xOpen method */ | ||
1503 | const char *zPath, /* Pathname of file to be opened */ | ||
1504 | unqlite_file *pFile, /* The file descriptor to be filled in */ | ||
1505 | unsigned int flags /* Input flags to control the opening */ | ||
1506 | ){ | ||
1507 | unixFile *p = (unixFile *)pFile; | ||
1508 | int fd = -1; /* File descriptor returned by open() */ | ||
1509 | int dirfd = -1; /* Directory file descriptor */ | ||
1510 | int openFlags = 0; /* Flags to pass to open() */ | ||
1511 | int noLock; /* True to omit locking primitives */ | ||
1512 | int rc = UNQLITE_OK; /* Function Return Code */ | ||
1513 | UnixUnusedFd *pUnused; | ||
1514 | int isExclusive = (flags & UNQLITE_OPEN_EXCLUSIVE); | ||
1515 | int isDelete = (flags & UNQLITE_OPEN_TEMP_DB); | ||
1516 | int isCreate = (flags & UNQLITE_OPEN_CREATE); | ||
1517 | int isReadonly = (flags & UNQLITE_OPEN_READONLY); | ||
1518 | int isReadWrite = (flags & UNQLITE_OPEN_READWRITE); | ||
1519 | /* If creating a master or main-file journal, this function will open | ||
1520 | ** a file-descriptor on the directory too. The first time unixSync() | ||
1521 | ** is called the directory file descriptor will be fsync()ed and close()d. | ||
1522 | */ | ||
1523 | int isOpenDirectory = isCreate ; | ||
1524 | const char *zName = zPath; | ||
1525 | |||
1526 | SyZero(p,sizeof(unixFile)); | ||
1527 | |||
1528 | pUnused = findReusableFd(zName, flags); | ||
1529 | if( pUnused ){ | ||
1530 | fd = pUnused->fd; | ||
1531 | }else{ | ||
1532 | pUnused = unqlite_malloc(sizeof(*pUnused)); | ||
1533 | if( !pUnused ){ | ||
1534 | return UNQLITE_NOMEM; | ||
1535 | } | ||
1536 | } | ||
1537 | p->pUnused = pUnused; | ||
1538 | |||
1539 | /* Determine the value of the flags parameter passed to POSIX function | ||
1540 | ** open(). These must be calculated even if open() is not called, as | ||
1541 | ** they may be stored as part of the file handle and used by the | ||
1542 | ** 'conch file' locking functions later on. */ | ||
1543 | if( isReadonly ) openFlags |= O_RDONLY; | ||
1544 | if( isReadWrite ) openFlags |= O_RDWR; | ||
1545 | if( isCreate ) openFlags |= O_CREAT; | ||
1546 | if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW); | ||
1547 | openFlags |= (O_LARGEFILE|O_BINARY); | ||
1548 | |||
1549 | if( fd<0 ){ | ||
1550 | mode_t openMode; /* Permissions to create file with */ | ||
1551 | rc = findCreateFileMode(zName, flags, &openMode); | ||
1552 | if( rc!=UNQLITE_OK ){ | ||
1553 | return rc; | ||
1554 | } | ||
1555 | fd = open(zName, openFlags, openMode); | ||
1556 | if( fd<0 ){ | ||
1557 | rc = UNQLITE_IOERR; | ||
1558 | goto open_finished; | ||
1559 | } | ||
1560 | } | ||
1561 | |||
1562 | if( p->pUnused ){ | ||
1563 | p->pUnused->fd = fd; | ||
1564 | p->pUnused->flags = flags; | ||
1565 | } | ||
1566 | |||
1567 | if( isDelete ){ | ||
1568 | unlink(zName); | ||
1569 | } | ||
1570 | |||
1571 | if( isOpenDirectory ){ | ||
1572 | rc = openDirectory(zPath, &dirfd); | ||
1573 | if( rc!=UNQLITE_OK ){ | ||
1574 | /* It is safe to close fd at this point, because it is guaranteed not | ||
1575 | ** to be open on a database file. If it were open on a database file, | ||
1576 | ** it would not be safe to close as this would release any locks held | ||
1577 | ** on the file by this process. */ | ||
1578 | close(fd); /* silently leak if fail, already in error */ | ||
1579 | goto open_finished; | ||
1580 | } | ||
1581 | } | ||
1582 | |||
1583 | #ifdef FD_CLOEXEC | ||
1584 | fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC); | ||
1585 | #endif | ||
1586 | |||
1587 | noLock = 0; | ||
1588 | |||
1589 | #if defined(__APPLE__) | ||
1590 | struct statfs fsInfo; | ||
1591 | if( fstatfs(fd, &fsInfo) == -1 ){ | ||
1592 | ((unixFile*)pFile)->lastErrno = errno; | ||
1593 | if( dirfd>=0 ) close(dirfd); /* silently leak if fail, in error */ | ||
1594 | close(fd); /* silently leak if fail, in error */ | ||
1595 | return UNQLITE_IOERR; | ||
1596 | } | ||
1597 | if (0 == SyStrncmp("msdos", fsInfo.f_fstypename, 5)) { | ||
1598 | ((unixFile*)pFile)->fsFlags |= UNQLITE_FSFLAGS_IS_MSDOS; | ||
1599 | } | ||
1600 | #endif | ||
1601 | |||
1602 | rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete); | ||
1603 | open_finished: | ||
1604 | if( rc!=UNQLITE_OK ){ | ||
1605 | unqlite_free(p->pUnused); | ||
1606 | } | ||
1607 | return rc; | ||
1608 | } | ||
1609 | /* | ||
1610 | ** Delete the file at zPath. If the dirSync argument is true, fsync() | ||
1611 | ** the directory after deleting the file. | ||
1612 | */ | ||
1613 | static int unixDelete( | ||
1614 | unqlite_vfs *NotUsed, /* VFS containing this as the xDelete method */ | ||
1615 | const char *zPath, /* Name of file to be deleted */ | ||
1616 | int dirSync /* If true, fsync() directory after deleting file */ | ||
1617 | ){ | ||
1618 | int rc = UNQLITE_OK; | ||
1619 | SXUNUSED(NotUsed); | ||
1620 | |||
1621 | if( unlink(zPath)==(-1) && errno!=ENOENT ){ | ||
1622 | return UNQLITE_IOERR; | ||
1623 | } | ||
1624 | #ifndef UNQLITE_DISABLE_DIRSYNC | ||
1625 | if( dirSync ){ | ||
1626 | int fd; | ||
1627 | rc = openDirectory(zPath, &fd); | ||
1628 | if( rc==UNQLITE_OK ){ | ||
1629 | if( fsync(fd) ) | ||
1630 | { | ||
1631 | rc = UNQLITE_IOERR; | ||
1632 | } | ||
1633 | if( close(fd) && !rc ){ | ||
1634 | rc = UNQLITE_IOERR; | ||
1635 | } | ||
1636 | } | ||
1637 | } | ||
1638 | #endif | ||
1639 | return rc; | ||
1640 | } | ||
1641 | /* | ||
1642 | ** Sleep for a little while. Return the amount of time slept. | ||
1643 | ** The argument is the number of microseconds we want to sleep. | ||
1644 | ** The return value is the number of microseconds of sleep actually | ||
1645 | ** requested from the underlying operating system, a number which | ||
1646 | ** might be greater than or equal to the argument, but not less | ||
1647 | ** than the argument. | ||
1648 | */ | ||
1649 | static int unixSleep(unqlite_vfs *NotUsed, int microseconds) | ||
1650 | { | ||
1651 | #if defined(HAVE_USLEEP) && HAVE_USLEEP | ||
1652 | usleep(microseconds); | ||
1653 | SXUNUSED(NotUsed); | ||
1654 | return microseconds; | ||
1655 | #else | ||
1656 | int seconds = (microseconds+999999)/1000000; | ||
1657 | SXUNUSED(NotUsed); | ||
1658 | sleep(seconds); | ||
1659 | return seconds*1000000; | ||
1660 | #endif | ||
1661 | } | ||
1662 | /* | ||
1663 | * Export the current system time. | ||
1664 | */ | ||
1665 | static int unixCurrentTime(unqlite_vfs *pVfs,Sytm *pOut) | ||
1666 | { | ||
1667 | struct tm *pTm; | ||
1668 | time_t tt; | ||
1669 | SXUNUSED(pVfs); | ||
1670 | time(&tt); | ||
1671 | pTm = gmtime(&tt); | ||
1672 | if( pTm ){ /* Yes, it can fail */ | ||
1673 | STRUCT_TM_TO_SYTM(pTm,pOut); | ||
1674 | } | ||
1675 | return UNQLITE_OK; | ||
1676 | } | ||
1677 | /* | ||
1678 | ** Test the existance of or access permissions of file zPath. The | ||
1679 | ** test performed depends on the value of flags: | ||
1680 | ** | ||
1681 | ** UNQLITE_ACCESS_EXISTS: Return 1 if the file exists | ||
1682 | ** UNQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable. | ||
1683 | ** UNQLITE_ACCESS_READONLY: Return 1 if the file is readable. | ||
1684 | ** | ||
1685 | ** Otherwise return 0. | ||
1686 | */ | ||
1687 | static int unixAccess( | ||
1688 | unqlite_vfs *NotUsed, /* The VFS containing this xAccess method */ | ||
1689 | const char *zPath, /* Path of the file to examine */ | ||
1690 | int flags, /* What do we want to learn about the zPath file? */ | ||
1691 | int *pResOut /* Write result boolean here */ | ||
1692 | ){ | ||
1693 | int amode = 0; | ||
1694 | SXUNUSED(NotUsed); | ||
1695 | switch( flags ){ | ||
1696 | case UNQLITE_ACCESS_EXISTS: | ||
1697 | amode = F_OK; | ||
1698 | break; | ||
1699 | case UNQLITE_ACCESS_READWRITE: | ||
1700 | amode = W_OK|R_OK; | ||
1701 | break; | ||
1702 | case UNQLITE_ACCESS_READ: | ||
1703 | amode = R_OK; | ||
1704 | break; | ||
1705 | default: | ||
1706 | /* Can't happen */ | ||
1707 | break; | ||
1708 | } | ||
1709 | *pResOut = (access(zPath, amode)==0); | ||
1710 | if( flags==UNQLITE_ACCESS_EXISTS && *pResOut ){ | ||
1711 | struct stat buf; | ||
1712 | if( 0==stat(zPath, &buf) && buf.st_size==0 ){ | ||
1713 | *pResOut = 0; | ||
1714 | } | ||
1715 | } | ||
1716 | return UNQLITE_OK; | ||
1717 | } | ||
1718 | /* | ||
1719 | ** Turn a relative pathname into a full pathname. The relative path | ||
1720 | ** is stored as a nul-terminated string in the buffer pointed to by | ||
1721 | ** zPath. | ||
1722 | ** | ||
1723 | ** zOut points to a buffer of at least unqlite_vfs.mxPathname bytes | ||
1724 | ** (in this case, MAX_PATHNAME bytes). The full-path is written to | ||
1725 | ** this buffer before returning. | ||
1726 | */ | ||
1727 | static int unixFullPathname( | ||
1728 | unqlite_vfs *pVfs, /* Pointer to vfs object */ | ||
1729 | const char *zPath, /* Possibly relative input path */ | ||
1730 | int nOut, /* Size of output buffer in bytes */ | ||
1731 | char *zOut /* Output buffer */ | ||
1732 | ){ | ||
1733 | if( zPath[0]=='/' ){ | ||
1734 | Systrcpy(zOut,(sxu32)nOut,zPath,0); | ||
1735 | SXUNUSED(pVfs); | ||
1736 | }else{ | ||
1737 | sxu32 nCwd; | ||
1738 | zOut[nOut-1] = '\0'; | ||
1739 | if( getcwd(zOut, nOut-1)==0 ){ | ||
1740 | return UNQLITE_IOERR; | ||
1741 | } | ||
1742 | nCwd = SyStrlen(zOut); | ||
1743 | SyBufferFormat(&zOut[nCwd],(sxu32)nOut-nCwd,"/%s",zPath); | ||
1744 | } | ||
1745 | return UNQLITE_OK; | ||
1746 | } | ||
1747 | /* | ||
1748 | * Export the Unix Vfs. | ||
1749 | */ | ||
1750 | UNQLITE_PRIVATE const unqlite_vfs * unqliteExportBuiltinVfs(void) | ||
1751 | { | ||
1752 | static const unqlite_vfs sUnixvfs = { | ||
1753 | "Unix", /* Vfs name */ | ||
1754 | 1, /* Vfs structure version */ | ||
1755 | sizeof(unixFile), /* szOsFile */ | ||
1756 | MAX_PATHNAME, /* mxPathName */ | ||
1757 | unixOpen, /* xOpen */ | ||
1758 | unixDelete, /* xDelete */ | ||
1759 | unixAccess, /* xAccess */ | ||
1760 | unixFullPathname, /* xFullPathname */ | ||
1761 | 0, /* xTmp */ | ||
1762 | unixSleep, /* xSleep */ | ||
1763 | unixCurrentTime, /* xCurrentTime */ | ||
1764 | 0, /* xGetLastError */ | ||
1765 | }; | ||
1766 | return &sUnixvfs; | ||
1767 | } | ||
1768 | |||
1769 | #endif /* __UNIXES__ */ | ||