Postfix3.3.1
slmdb.c
[詳解]
1 /*++
2 /* NAME
3 /* slmdb 3
4 /* SUMMARY
5 /* Simplified LMDB API
6 /* SYNOPSIS
7 /* #include <slmdb.h>
8 /*
9 /* int slmdb_init(slmdb, curr_limit, size_incr, hard_limit)
10 /* SLMDB *slmdb;
11 /* size_t curr_limit;
12 /* int size_incr;
13 /* size_t hard_limit;
14 /*
15 /* int slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags)
16 /* SLMDB *slmdb;
17 /* const char *path;
18 /* int open_flags;
19 /* int lmdb_flags;
20 /* int slmdb_flags;
21 /*
22 /* int slmdb_close(slmdb)
23 /* SLMDB *slmdb;
24 /*
25 /* int slmdb_get(slmdb, mdb_key, mdb_value)
26 /* SLMDB *slmdb;
27 /* MDB_val *mdb_key;
28 /* MDB_val *mdb_value;
29 /*
30 /* int slmdb_put(slmdb, mdb_key, mdb_value, flags)
31 /* SLMDB *slmdb;
32 /* MDB_val *mdb_key;
33 /* MDB_val *mdb_value;
34 /* int flags;
35 /*
36 /* int slmdb_del(slmdb, mdb_key)
37 /* SLMDB *slmdb;
38 /* MDB_val *mdb_key;
39 /*
40 /* int slmdb_cursor_get(slmdb, mdb_key, mdb_value, op)
41 /* SLMDB *slmdb;
42 /* MDB_val *mdb_key;
43 /* MDB_val *mdb_value;
44 /* MDB_cursor_op op;
45 /* AUXILIARY FUNCTIONS
46 /* int slmdb_fd(slmdb)
47 /* SLMDB *slmdb;
48 /*
49 /* size_t slmdb_curr_limit(slmdb)
50 /* SLMDB *slmdb;
51 /*
52 /* int slmdb_control(slmdb, request, ...)
53 /* SLMDB *slmdb;
54 /* int request;
55 /* DESCRIPTION
56 /* This module simplifies the LMDB API by hiding recoverable
57 /* errors from the application. Details are given in the
58 /* section "ERROR RECOVERY".
59 /*
60 /* slmdb_init() performs mandatory initialization before opening
61 /* an LMDB database. The result value is an LMDB status code
62 /* (zero in case of success).
63 /*
64 /* slmdb_open() opens an LMDB database. The result value is
65 /* an LMDB status code (zero in case of success).
66 /*
67 /* slmdb_close() finalizes an optional bulk-mode transaction
68 /* and closes a successfully-opened LMDB database. The result
69 /* value is an LMDB status code (zero in case of success).
70 /*
71 /* slmdb_get() is an mdb_get() wrapper with automatic error
72 /* recovery. The result value is an LMDB status code (zero
73 /* in case of success).
74 /*
75 /* slmdb_put() is an mdb_put() wrapper with automatic error
76 /* recovery. The result value is an LMDB status code (zero
77 /* in case of success).
78 /*
79 /* slmdb_del() is an mdb_del() wrapper with automatic error
80 /* recovery. The result value is an LMDB status code (zero
81 /* in case of success).
82 /*
83 /* slmdb_cursor_get() is an mdb_cursor_get() wrapper with
84 /* automatic error recovery. The result value is an LMDB
85 /* status code (zero in case of success). This wrapper supports
86 /* only one cursor per database.
87 /*
88 /* slmdb_fd() returns the file descriptor for the specified
89 /* database. This may be used for file status queries or
90 /* application-controlled locking.
91 /*
92 /* slmdb_curr_limit() returns the current database size limit
93 /* for the specified database.
94 /*
95 /* slmdb_control() specifies optional features. The result is
96 /* an LMDB status code (zero in case of success).
97 /*
98 /* Arguments:
99 /* .IP slmdb
100 /* Pointer to caller-provided storage.
101 /* .IP curr_limit
102 /* The initial memory mapping size limit. This limit is
103 /* automatically increased when the database becomes full.
104 /* .IP size_incr
105 /* An integer factor by which the memory mapping size limit
106 /* is increased when the database becomes full.
107 /* .IP hard_limit
108 /* The upper bound for the memory mapping size limit.
109 /* .IP path
110 /* LMDB database pathname.
111 /* .IP open_flags
112 /* Flags that control file open operations. Do not specify
113 /* locking flags here.
114 /* .IP lmdb_flags
115 /* Flags that control the LMDB environment. If MDB_NOLOCK is
116 /* specified, then each slmdb_get() or slmdb_cursor_get() call
117 /* must be protected with a shared (or exclusive) external lock,
118 /* and each slmdb_put() or slmdb_del() call must be protected
119 /* with an exclusive external lock. A lock may be released
120 /* after the call returns. A writer may atomically downgrade
121 /* an exclusive lock to shared, but it must obtain an exclusive
122 /* lock before making another slmdb(3) write request.
123 /* .sp
124 /* Note: when a database is opened with MDB_NOLOCK, external
125 /* locks such as fcntl() do not protect slmdb(3) requests
126 /* within the same process against each other. If a program
127 /* cannot avoid making simultaneous slmdb(3) requests, then
128 /* it must synchronize these requests with in-process locks,
129 /* in addition to the per-process fcntl(2) locks.
130 /* .IP slmdb_flags
131 /* Bit-wise OR of zero or more of the following:
132 /* .RS
133 /* .IP SLMDB_FLAG_BULK
134 /* Open the database and create a "bulk" transaction that is
135 /* committed when the database is closed. If MDB_NOLOCK is
136 /* specified, then the entire transaction must be protected
137 /* with a persistent external lock. All slmdb_get(), slmdb_put()
138 /* and slmdb_del() requests will be directed to the "bulk"
139 /* transaction.
140 /* .RE
141 /* .IP mdb_key
142 /* Pointer to caller-provided lookup key storage.
143 /* .IP mdb_value
144 /* Pointer to caller-provided value storage.
145 /* .IP op
146 /* LMDB cursor operation.
147 /* .IP request
148 /* The start of a list of (name, value) pairs, terminated with
149 /* CA_SLMDB_CTL_END. The following text enumerates the symbolic
150 /* request names and the corresponding argument types.
151 /* .RS
152 /* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))"
153 /* Call-back function pointer. The function is called to repeat
154 /* a failed bulk-mode transaction from the start. The arguments
155 /* are the application context and the setjmp() or sigsetjmp()
156 /* result value.
157 /* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))"
158 /* Call-back function pointer. The function is called to report
159 /* successful error recovery. The arguments are the application
160 /* context, the MDB error code, and additional arguments that
161 /* depend on the error code. Details are given in the section
162 /* "ERROR RECOVERY".
163 /* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))"
164 /* Call-back function pointer. The function is called to
165 /* report an LMDB internal assertion failure. The arguments
166 /* are the application context, and text that describes the
167 /* problem.
168 /* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)"
169 /* Application context that is passed in call-back function
170 /* calls.
171 /* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)"
172 /* How many times to recover from LMDB errors within the
173 /* execution of a single slmdb(3) API call before giving up.
174 /* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)"
175 /* How many times to recover from a bulk-mode transaction
176 /* before giving up.
177 /* .RE
178 /* ERROR RECOVERY
179 /* .ad
180 /* .fi
181 /* This module automatically repeats failed requests after
182 /* recoverable errors, up to the limits specified with
183 /* slmdb_control().
184 /*
185 /* Recoverable errors are reported through an optional
186 /* notification function specified with slmdb_control(). With
187 /* recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the
188 /* additional argument is a size_t value with the updated
189 /* current database size limit; with recoverable MDB_READERS_FULL
190 /* errors there is no additional argument.
191 /* BUGS
192 /* Recovery from MDB_MAP_FULL involves resizing the database
193 /* memory mapping. According to LMDB documentation this
194 /* requires that there is no concurrent activity in the same
195 /* database by other threads in the same memory address space.
196 /* SEE ALSO
197 /* lmdb(3) API manpage (currently, non-existent).
198 /* AUTHOR(S)
199 /* Howard Chu
200 /* Symas Corporation
201 /*
202 /* Wietse Venema
203 /* IBM T.J. Watson Research
204 /* P.O. Box 704
205 /* Yorktown Heights, NY 10598, USA
206 /*--*/
207 
208  /*
209  * DO NOT include other Postfix-specific header files. This LMDB wrapper
210  * must be usable outside Postfix.
211  */
212 
213 #ifdef HAS_LMDB
214 
215 /* System library. */
216 
217 #include <sys/stat.h>
218 #include <errno.h>
219 #include <fcntl.h>
220 #include <string.h>
221 #include <unistd.h>
222 #include <limits.h>
223 #include <stdarg.h>
224 #include <string.h>
225 #include <stdlib.h>
226 
227 /* Application-specific. */
228 
229 #include <slmdb.h>
230 
231  /*
232  * Minimum LMDB patchlevel.
233  *
234  * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of
235  * falling out of the sky without any explanation. Without such logging,
236  * Postfix with LMDB would be too hard to support.
237  *
238  * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096
239  * bytes of uninitialized heap memory to a database. This was a security
240  * violation because it made information persistent that was not meant to be
241  * persisted, or it was sharing information that was not meant to be shared.
242  *
243  * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of
244  * having to use world-writable LMDB lock files.
245  *
246  * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so
247  * that it can recover from an MDB_MAP_FULL error without having to close
248  * the database. It also allows an application to "pick up" a new database
249  * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED
250  * error without having to close the database.
251  *
252  * The database size limit that remains is imposed by the hardware memory
253  * address space (31 or 47 bits, typically) or file system. The LMDB
254  * implementation is supposed to handle databases larger than physical
255  * memory. However, this is not necessarily guaranteed for (bulk)
256  * transactions larger than physical memory.
257  */
258 #if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11)
259 #error "This Postfix version requires LMDB version 0.9.11 or later"
260 #endif
261 
262  /*
263  * Error recovery.
264  *
265  * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable
266  * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the
267  * way, applications can pretend that those quirks don't exist, and focus on
268  * their own job.
269  *
270  * - To recover from a single-transaction LMDB error, each wrapper function
271  * uses tail recursion instead of goto. Since LMDB errors are rare, code
272  * clarity is more important than speed.
273  *
274  * - To recover from a bulk-transaction LMDB error, the error-recovery code
275  * triggers a long jump back into the caller to some pre-arranged point (the
276  * closest thing that C has to exception handling). The application is then
277  * expected to repeat the bulk transaction from scratch.
278  */
279 
280  /*
281  * Our default retry attempt limits. We allow a few retries per slmdb(3) API
282  * call for non-bulk transactions. We allow a number of bulk-transaction
283  * retries that is proportional to the memory address space.
284  */
285 #define SLMDB_DEF_API_RETRY_LIMIT 30 /* Retries per slmdb(3) API call */
286 #define SLMDB_DEF_BULK_RETRY_LIMIT \
287  (2 * sizeof(size_t) * CHAR_BIT) /* Retries per bulk-mode transaction */
288 
289  /*
290  * We increment the recursion counter each time we try to recover from
291  * error, and reset the recursion counter when returning to the application
292  * from the slmdb(3) API.
293  */
294 #define SLMDB_API_RETURN(slmdb, status) do { \
295  (slmdb)->api_retry_count = 0; \
296  return (status); \
297  } while (0)
298 
299  /*
300  * With MDB_NOLOCK, the application uses an external lock for inter-process
301  * synchronization. Because the caller may release the external lock after
302  * an SLMDB API call, each SLMDB API function must use a short-lived
303  * transaction unless the transaction is a bulk-mode transaction.
304  */
305 
306 /* slmdb_cursor_close - close cursor and its read transaction */
307 
308 static void slmdb_cursor_close(SLMDB *slmdb)
309 {
310  MDB_txn *txn;
311 
312  /*
313  * Close the cursor and its read transaction. We can restore it later
314  * from the saved key information.
315  */
316  txn = mdb_cursor_txn(slmdb->cursor);
317  mdb_cursor_close(slmdb->cursor);
318  slmdb->cursor = 0;
319  mdb_txn_abort(txn);
320 }
321 
322 /* slmdb_saved_key_init - initialize saved key info */
323 
324 static void slmdb_saved_key_init(SLMDB *slmdb)
325 {
326  slmdb->saved_key.mv_data = 0;
327  slmdb->saved_key.mv_size = 0;
328  slmdb->saved_key_size = 0;
329 }
330 
331 /* slmdb_saved_key_free - destroy saved key info */
332 
333 static void slmdb_saved_key_free(SLMDB *slmdb)
334 {
335  free(slmdb->saved_key.mv_data);
336  slmdb_saved_key_init(slmdb);
337 }
338 
339 #define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0)
340 
341 /* slmdb_saved_key_assign - copy the saved key */
342 
343 static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val)
344 {
345 
346  /*
347  * Extend the buffer to fit the key, so that we can avoid malloc()
348  * overhead most of the time.
349  */
350  if (slmdb->saved_key_size < key_val->mv_size) {
351  if (slmdb->saved_key.mv_data == 0)
352  slmdb->saved_key.mv_data = malloc(key_val->mv_size);
353  else
354  slmdb->saved_key.mv_data =
355  realloc(slmdb->saved_key.mv_data, key_val->mv_size);
356  if (slmdb->saved_key.mv_data == 0) {
357  slmdb_saved_key_init(slmdb);
358  return (ENOMEM);
359  } else {
360  slmdb->saved_key_size = key_val->mv_size;
361  }
362  }
363 
364  /*
365  * Copy the key under the cursor.
366  */
367  memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size);
368  slmdb->saved_key.mv_size = key_val->mv_size;
369  return (0);
370 }
371 
372 /* slmdb_prepare - LMDB-specific (re)initialization before actual access */
373 
374 static int slmdb_prepare(SLMDB *slmdb)
375 {
376  int status = 0;
377 
378  /*
379  * This is called before accessing the database, or after recovery from
380  * an LMDB error. Note: this code cannot recover from errors itself.
381  * slmdb->txn is either the database open() transaction or a
382  * freshly-created bulk-mode transaction.
383  *
384  * - With O_TRUNC we make a "drop" request before updating the database.
385  *
386  * - With a bulk-mode transaction we commit when the database is closed.
387  */
388  if (slmdb->open_flags & O_TRUNC) {
389  if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0)
390  return (status);
391  if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
392  if ((status = mdb_txn_commit(slmdb->txn)) != 0)
393  return (status);
394  slmdb->txn = 0;
395  }
396  } else if ((slmdb->lmdb_flags & MDB_RDONLY) != 0
397  || (slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
398  mdb_txn_abort(slmdb->txn);
399  slmdb->txn = 0;
400  }
401  slmdb->api_retry_count = 0;
402  return (status);
403 }
404 
405 /* slmdb_recover - recover from LMDB errors */
406 
407 static int slmdb_recover(SLMDB *slmdb, int status)
408 {
409  MDB_envinfo info;
410 
411  /*
412  * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough
413  * that we don't care about a few wasted cycles.
414  */
415  if (slmdb->cursor != 0)
416  slmdb_cursor_close(slmdb);
417 
418  /*
419  * Recover bulk transactions only if they can be restarted. Limit the
420  * number of recovery attempts per slmdb(3) API request.
421  */
422  if ((slmdb->txn != 0 && slmdb->longjmp_fn == 0)
423  || ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit))
424  return (status);
425 
426  /*
427  * If we can recover from the error, we clear the error condition and the
428  * caller should retry the failed operation immediately. Otherwise, the
429  * caller should terminate with a fatal run-time error and the program
430  * should be re-run later.
431  *
432  * slmdb->txn must be either null (non-bulk transaction error), or an
433  * aborted bulk-mode transaction.
434  */
435  switch (status) {
436 
437  /*
438  * As of LMDB 0.9.8 when a non-bulk update runs into a "map full"
439  * error, we can resize the environment's memory map and clear the
440  * error condition. The caller should retry immediately.
441  */
442  case MDB_MAP_FULL:
443  /* Can we increase the memory map? Give up if we can't. */
444  if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) {
445  slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr;
446  } else if (slmdb->curr_limit < slmdb->hard_limit) {
447  slmdb->curr_limit = slmdb->hard_limit;
448  } else {
449  /* Sorry, we are already maxed out. */
450  break;
451  }
452  if (slmdb->notify_fn)
453  slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL,
454  slmdb->curr_limit);
455  status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit);
456  break;
457 
458  /*
459  * When a writer resizes the database, read-only applications must
460  * increase their LMDB memory map size limit, too. Otherwise, they
461  * won't be able to read a table after it grows.
462  *
463  * As of LMDB 0.9.8 we can import the new memory map size limit into the
464  * database environment by calling mdb_env_set_mapsize() with a zero
465  * size argument. Then we extract the map size limit for later use.
466  * The caller should retry immediately.
467  */
468  case MDB_MAP_RESIZED:
469  if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) {
470  /* Do not panic. Maps may shrink after bulk update. */
471  mdb_env_info(slmdb->env, &info);
472  slmdb->curr_limit = info.me_mapsize;
473  if (slmdb->notify_fn)
474  slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED,
475  slmdb->curr_limit);
476  }
477  break;
478 
479  /*
480  * What is it with these built-in hard limits that cause systems to
481  * stop when demand is at its highest? When the system is under
482  * stress it should slow down and keep making progress.
483  */
484  case MDB_READERS_FULL:
485  if (slmdb->notify_fn)
486  slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL);
487  sleep(1);
488  status = 0;
489  break;
490 
491  /*
492  * We can't solve this problem. The application should terminate with
493  * a fatal run-time error and the program should be re-run later.
494  */
495  default:
496  break;
497  }
498 
499  /*
500  * If a bulk-transaction error is recoverable, build a new bulk
501  * transaction from scratch, by making a long jump back into the caller
502  * at some pre-arranged point. In MDB_NOLOCK mode, there is no need to
503  * upgrade the lock to "exclusive", because the failed write transaction
504  * has no side effects.
505  */
506  if (slmdb->txn != 0 && status == 0 && slmdb->longjmp_fn != 0
507  && (slmdb->bulk_retry_count += 1) <= slmdb->bulk_retry_limit) {
508  if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0,
509  slmdb->lmdb_flags & MDB_RDONLY,
510  &slmdb->txn)) == 0
511  && (status = slmdb_prepare(slmdb)) == 0)
512  slmdb->longjmp_fn(slmdb->cb_context, 1);
513  }
514  return (status);
515 }
516 
517 /* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */
518 
519 static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn)
520 {
521  int status;
522 
523  if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0
524  && (status = slmdb_recover(slmdb, status)) == 0)
525  status = slmdb_txn_begin(slmdb, rdonly, txn);
526 
527  return (status);
528 }
529 
530 /* slmdb_get - mdb_get() wrapper with LMDB error recovery */
531 
532 int slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value)
533 {
534  MDB_txn *txn;
535  int status;
536 
537  /*
538  * Start a read transaction if there's no bulk-mode txn.
539  */
540  if (slmdb->txn)
541  txn = slmdb->txn;
542  else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
543  SLMDB_API_RETURN(slmdb, status);
544 
545  /*
546  * Do the lookup.
547  */
548  if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0
549  && status != MDB_NOTFOUND) {
550  mdb_txn_abort(txn);
551  if ((status = slmdb_recover(slmdb, status)) == 0)
552  status = slmdb_get(slmdb, mdb_key, mdb_value);
553  SLMDB_API_RETURN(slmdb, status);
554  }
555 
556  /*
557  * Close the read txn if it's not the bulk-mode txn.
558  */
559  if (slmdb->txn == 0)
560  mdb_txn_abort(txn);
561 
562  SLMDB_API_RETURN(slmdb, status);
563 }
564 
565 /* slmdb_put - mdb_put() wrapper with LMDB error recovery */
566 
567 int slmdb_put(SLMDB *slmdb, MDB_val *mdb_key,
568  MDB_val *mdb_value, int flags)
569 {
570  MDB_txn *txn;
571  int status;
572 
573  /*
574  * Start a write transaction if there's no bulk-mode txn.
575  */
576  if (slmdb->txn)
577  txn = slmdb->txn;
578  else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
579  SLMDB_API_RETURN(slmdb, status);
580 
581  /*
582  * Do the update.
583  */
584  if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) {
585  mdb_txn_abort(txn);
586  if (status != MDB_KEYEXIST) {
587  if ((status = slmdb_recover(slmdb, status)) == 0)
588  status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
589  SLMDB_API_RETURN(slmdb, status);
590  }
591  }
592 
593  /*
594  * Commit the transaction if it's not the bulk-mode txn.
595  */
596  if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
597  && (status = slmdb_recover(slmdb, status)) == 0)
598  status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
599 
600  SLMDB_API_RETURN(slmdb, status);
601 }
602 
603 /* slmdb_del - mdb_del() wrapper with LMDB error recovery */
604 
605 int slmdb_del(SLMDB *slmdb, MDB_val *mdb_key)
606 {
607  MDB_txn *txn;
608  int status;
609 
610  /*
611  * Start a write transaction if there's no bulk-mode txn.
612  */
613  if (slmdb->txn)
614  txn = slmdb->txn;
615  else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
616  SLMDB_API_RETURN(slmdb, status);
617 
618  /*
619  * Do the update.
620  */
621  if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) {
622  mdb_txn_abort(txn);
623  if (status != MDB_NOTFOUND) {
624  if ((status = slmdb_recover(slmdb, status)) == 0)
625  status = slmdb_del(slmdb, mdb_key);
626  SLMDB_API_RETURN(slmdb, status);
627  }
628  }
629 
630  /*
631  * Commit the transaction if it's not the bulk-mode txn.
632  */
633  if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
634  && (status = slmdb_recover(slmdb, status)) == 0)
635  status = slmdb_del(slmdb, mdb_key);
636 
637  SLMDB_API_RETURN(slmdb, status);
638 }
639 
640 /* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */
641 
642 int slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key,
643  MDB_val *mdb_value, MDB_cursor_op op)
644 {
645  MDB_txn *txn;
646  int status = 0;
647 
648  /*
649  * Open a read transaction and cursor if needed.
650  */
651  if (slmdb->cursor == 0) {
652  if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
653  SLMDB_API_RETURN(slmdb, status);
654  if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) {
655  mdb_txn_abort(txn);
656  if ((status = slmdb_recover(slmdb, status)) == 0)
657  status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
658  SLMDB_API_RETURN(slmdb, status);
659  }
660 
661  /*
662  * Restore the cursor position from the saved key information.
663  */
664  if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST)
665  status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key,
666  (MDB_val *) 0, MDB_SET);
667  }
668 
669  /*
670  * Database lookup.
671  */
672  if (status == 0)
673  status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op);
674 
675  /*
676  * Save the cursor position if successful. This can fail only with
677  * ENOMEM.
678  *
679  * Close the cursor read transaction if in MDB_NOLOCK mode, because the
680  * caller may release the external lock after we return.
681  */
682  if (status == 0) {
683  status = slmdb_saved_key_assign(slmdb, mdb_key);
684  if (slmdb->lmdb_flags & MDB_NOLOCK)
685  slmdb_cursor_close(slmdb);
686  }
687 
688  /*
689  * Handle end-of-database or other error.
690  */
691  else {
692  /* Do not hand-optimize out the slmdb_cursor_close() calls below. */
693  if (status == MDB_NOTFOUND) {
694  slmdb_cursor_close(slmdb);
695  if (HAVE_SLMDB_SAVED_KEY(slmdb))
696  slmdb_saved_key_free(slmdb);
697  } else {
698  slmdb_cursor_close(slmdb);
699  if ((status = slmdb_recover(slmdb, status)) == 0)
700  status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
701  SLMDB_API_RETURN(slmdb, status);
702  /* Do not hand-optimize out the above return statement. */
703  }
704  }
705  SLMDB_API_RETURN(slmdb, status);
706 }
707 
708 /* slmdb_assert_cb - report LMDB assertion failure */
709 
710 static void slmdb_assert_cb(MDB_env *env, const char *text)
711 {
712  SLMDB *slmdb = (SLMDB *) mdb_env_get_userctx(env);
713 
714  if (slmdb->assert_fn)
715  slmdb->assert_fn(slmdb->cb_context, text);
716 }
717 
718 /* slmdb_control - control optional settings */
719 
720 int slmdb_control(SLMDB *slmdb, int first,...)
721 {
722  va_list ap;
723  int status = 0;
724  int reqno;
725  int rc;
726 
727  va_start(ap, first);
728  for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) {
729  switch (reqno) {
731  slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN);
732  break;
733  case SLMDB_CTL_NOTIFY_FN:
734  slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN);
735  break;
736  case SLMDB_CTL_ASSERT_FN:
737  slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN);
738  if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0
739  || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0)
740  status = rc;
741  break;
743  slmdb->cb_context = va_arg(ap, void *);
744  break;
746  slmdb->api_retry_limit = va_arg(ap, int);
747  break;
749  slmdb->bulk_retry_limit = va_arg(ap, int);
750  break;
751  default:
752  status = errno = EINVAL;
753  break;
754  }
755  }
756  va_end(ap);
757  return (status);
758 }
759 
760 /* slmdb_close - wrapper with LMDB error recovery */
761 
762 int slmdb_close(SLMDB *slmdb)
763 {
764  int status = 0;
765 
766  /*
767  * Finish an open bulk transaction. If slmdb_recover() returns after a
768  * bulk-transaction error, then it was unable to recover.
769  */
770  if (slmdb->txn != 0
771  && (status = mdb_txn_commit(slmdb->txn)) != 0)
772  status = slmdb_recover(slmdb, status);
773 
774  /*
775  * Clean up after an unfinished sequence() operation.
776  */
777  if (slmdb->cursor != 0)
778  slmdb_cursor_close(slmdb);
779 
780  mdb_env_close(slmdb->env);
781 
782  /*
783  * Clean up the saved key information.
784  */
785  if (HAVE_SLMDB_SAVED_KEY(slmdb))
786  slmdb_saved_key_free(slmdb);
787 
788  SLMDB_API_RETURN(slmdb, status);
789 }
790 
791 /* slmdb_init - mandatory initialization */
792 
793 int slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr,
794  size_t hard_limit)
795 {
796 
797  /*
798  * This is a separate operation to keep the slmdb_open() API simple.
799  * Don't allocate resources here. Just store control information,
800  */
801  slmdb->curr_limit = curr_limit;
802  slmdb->size_incr = size_incr;
803  slmdb->hard_limit = hard_limit;
804 
805  return (MDB_SUCCESS);
806 }
807 
808 /* slmdb_open - open wrapped LMDB database */
809 
810 int slmdb_open(SLMDB *slmdb, const char *path, int open_flags,
811  int lmdb_flags, int slmdb_flags)
812 {
813  struct stat st;
814  MDB_env *env;
815  MDB_txn *txn;
816  MDB_dbi dbi;
817  int db_fd;
818  int status;
819 
820  /*
821  * Create LMDB environment.
822  */
823  if ((status = mdb_env_create(&env)) != 0)
824  return (status);
825 
826  /*
827  * Make sure that the memory map has room to store and commit an initial
828  * "drop" transaction as well as fixed database metadata. We have no way
829  * to recover from errors before the first application-level I/O request.
830  */
831 #define SLMDB_FUDGE 10240
832 
833  if (slmdb->curr_limit < SLMDB_FUDGE)
834  slmdb->curr_limit = SLMDB_FUDGE;
835  if (stat(path, &st) == 0
836  && st.st_size > slmdb->curr_limit - SLMDB_FUDGE) {
837  if (st.st_size > slmdb->hard_limit)
838  slmdb->hard_limit = st.st_size;
839  if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE)
840  slmdb->curr_limit = st.st_size + SLMDB_FUDGE;
841  else
842  slmdb->curr_limit = slmdb->hard_limit;
843  }
844 
845  /*
846  * mdb_open() requires a txn, but since the default DB always exists in
847  * an LMDB environment, we usually don't need to do anything else with
848  * the txn. It is currently used for truncate and for bulk transactions.
849  */
850  if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0
851  || (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0
852  || (status = mdb_txn_begin(env, (MDB_txn *) 0,
853  lmdb_flags & MDB_RDONLY, &txn)) != 0
854  || (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0
855  || (status = mdb_env_get_fd(env, &db_fd)) != 0) {
856  mdb_env_close(env);
857  return (status);
858  }
859 
860  /*
861  * Bundle up.
862  */
863  slmdb->open_flags = open_flags;
864  slmdb->lmdb_flags = lmdb_flags;
865  slmdb->slmdb_flags = slmdb_flags;
866  slmdb->env = env;
867  slmdb->dbi = dbi;
868  slmdb->db_fd = db_fd;
869  slmdb->cursor = 0;
870  slmdb_saved_key_init(slmdb);
871  slmdb->api_retry_count = 0;
872  slmdb->bulk_retry_count = 0;
873  slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT;
874  slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT;
875  slmdb->longjmp_fn = 0;
876  slmdb->notify_fn = 0;
877  slmdb->assert_fn = 0;
878  slmdb->cb_context = 0;
879  slmdb->txn = txn;
880 
881  if ((status = slmdb_prepare(slmdb)) != 0)
882  mdb_env_close(env);
883 
884  return (status);
885 }
886 
887 #endif
int db_fd
Definition: slmdb.h:52
Definition: slmdb.h:42
int size_incr
Definition: slmdb.h:44
int api_retry_limit
Definition: slmdb.h:62
void(* SLMDB_NOTIFY_FN)(void *, int,...)
Definition: slmdb.h:98
#define stat(p, s)
Definition: warn_stat.h:18
int slmdb_del(SLMDB *, MDB_val *)
MDB_cursor * cursor
Definition: slmdb.h:53
MDB_env * env
Definition: slmdb.h:49
int bulk_retry_count
Definition: slmdb.h:61
int slmdb_init(SLMDB *, size_t, int, size_t)
int lmdb_flags
Definition: slmdb.h:47
int slmdb_get(SLMDB *, MDB_val *, MDB_val *)
#define SLMDB_CTL_ASSERT_FN
Definition: slmdb.h:87
void(* longjmp_fn)(void *, int)
Definition: slmdb.h:56
int api_retry_count
Definition: slmdb.h:60
#define SLMDB_CTL_API_RETRY_LIMIT
Definition: slmdb.h:85
MDB_dbi dbi
Definition: slmdb.h:50
size_t curr_limit
Definition: slmdb.h:43
size_t hard_limit
Definition: slmdb.h:45
MDB_val saved_key
Definition: slmdb.h:54
#define SLMDB_CTL_NOTIFY_FN
Definition: slmdb.h:83
#define SLMDB_FLAG_BULK
Definition: slmdb.h:66
int slmdb_flags
Definition: slmdb.h:48
#define SLMDB_CTL_BULK_RETRY_LIMIT
Definition: slmdb.h:86
int open_flags
Definition: slmdb.h:46
int slmdb_close(SLMDB *)
void(* notify_fn)(void *, int,...)
Definition: slmdb.h:57
#define SLMDB_CTL_CB_CONTEXT
Definition: slmdb.h:84
void * cb_context
Definition: slmdb.h:59
void(* SLMDB_LONGJMP_FN)(void *, int)
Definition: slmdb.h:99
int slmdb_cursor_get(SLMDB *, MDB_val *, MDB_val *, MDB_cursor_op)
void(* assert_fn)(void *, const char *)
Definition: slmdb.h:58
MDB_txn * txn
Definition: slmdb.h:51
#define SLMDB_CTL_LONGJMP_FN
Definition: slmdb.h:82
#define SLMDB_CTL_END
Definition: slmdb.h:81
int slmdb_open(SLMDB *, const char *, int, int, int)
int bulk_retry_limit
Definition: slmdb.h:63
void(* SLMDB_ASSERT_FN)(void *, const char *)
Definition: slmdb.h:100
int slmdb_control(SLMDB *, int,...)
int slmdb_put(SLMDB *, MDB_val *, MDB_val *, int)
size_t saved_key_size
Definition: slmdb.h:55