root/trunk/umem.c

Revision 2, 84.4 kB (checked in by wez, 9 years ago)

Initial revision

  • Property svn:eol-style set to native
Line 
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Portions Copyright 2006 OmniTI, Inc.
28  */
29
30 /* #pragma ident        "@(#)umem.c     1.11    05/06/08 SMI" */
31
32 /*
33  * based on usr/src/uts/common/os/kmem.c r1.64 from 2001/12/18
34  *
35  * The slab allocator, as described in the following two papers:
36  *
37  *      Jeff Bonwick,
38  *      The Slab Allocator: An Object-Caching Kernel Memory Allocator.
39  *      Proceedings of the Summer 1994 Usenix Conference.
40  *      Available as /shared/sac/PSARC/1994/028/materials/kmem.pdf.
41  *
42  *      Jeff Bonwick and Jonathan Adams,
43  *      Magazines and vmem: Extending the Slab Allocator to Many CPUs and
44  *      Arbitrary Resources.
45  *      Proceedings of the 2001 Usenix Conference.
46  *      Available as /shared/sac/PSARC/2000/550/materials/vmem.pdf.
47  *
48  * 1. Overview
49  * -----------
50  * umem is very close to kmem in implementation.  There are four major
51  * areas of divergence:
52  *
53  *      * Initialization
54  *
55  *      * CPU handling
56  *
57  *      * umem_update()
58  *
59  *      * KM_SLEEP v.s. UMEM_NOFAIL
60  *
61  *
62  * 2. Initialization
63  * -----------------
64  * kmem is initialized early on in boot, and knows that no one will call
65  * into it before it is ready.  umem does not have these luxuries. Instead,
66  * initialization is divided into two phases:
67  *
68  *      * library initialization, and
69  *
70  *      * first use
71  *
72  * umem's full initialization happens at the time of the first allocation
73  * request (via malloc() and friends, umem_alloc(), or umem_zalloc()),
74  * or the first call to umem_cache_create().
75  *
76  * umem_free(), and umem_cache_alloc() do not require special handling,
77  * since the only way to get valid arguments for them is to successfully
78  * call a function from the first group.
79  *
80  * 2.1. Library Initialization: umem_startup()
81  * -------------------------------------------
82  * umem_startup() is libumem.so's .init section.  It calls pthread_atfork()
83  * to install the handlers necessary for umem's Fork1-Safety.  Because of
84  * race condition issues, all other pre-umem_init() initialization is done
85  * statically (i.e. by the dynamic linker).
86  *
87  * For standalone use, umem_startup() returns everything to its initial
88  * state.
89  *
90  * 2.2. First use: umem_init()
91  * ------------------------------
92  * The first time any memory allocation function is used, we have to
93  * create the backing caches and vmem arenas which are needed for it.
94  * umem_init() is the central point for that task.  When it completes,
95  * umem_ready is either UMEM_READY (all set) or UMEM_READY_INIT_FAILED (unable
96  * to initialize, probably due to lack of memory).
97  *
98  * There are four different paths from which umem_init() is called:
99  *
100  *      * from umem_alloc() or umem_zalloc(), with 0 < size < UMEM_MAXBUF,
101  *
102  *      * from umem_alloc() or umem_zalloc(), with size > UMEM_MAXBUF,
103  *
104  *      * from umem_cache_create(), and
105  *
106  *      * from memalign(), with align > UMEM_ALIGN.
107  *
108  * The last three just check if umem is initialized, and call umem_init()
109  * if it is not.  For performance reasons, the first case is more complicated.
110  *
111  * 2.2.1. umem_alloc()/umem_zalloc(), with 0 < size < UMEM_MAXBUF
112  * -----------------------------------------------------------------
113  * In this case, umem_cache_alloc(&umem_null_cache, ...) is called.
114  * There is special case code in which causes any allocation on
115  * &umem_null_cache to fail by returning (NULL), regardless of the
116  * flags argument.
117  *
118  * So umem_cache_alloc() returns NULL, and umem_alloc()/umem_zalloc() call
119  * umem_alloc_retry().  umem_alloc_retry() sees that the allocation
120  * was agains &umem_null_cache, and calls umem_init().
121  *
122  * If initialization is successful, umem_alloc_retry() returns 1, which
123  * causes umem_alloc()/umem_zalloc() to start over, which causes it to load
124  * the (now valid) cache pointer from umem_alloc_table.
125  *
126  * 2.2.2. Dealing with race conditions
127  * -----------------------------------
128  * There are a couple race conditions resulting from the initialization
129  * code that we have to guard against:
130  *
131  *      * In umem_cache_create(), there is a special UMC_INTERNAL cflag
132  *      that is passed for caches created during initialization.  It
133  *      is illegal for a user to try to create a UMC_INTERNAL cache.
134  *      This allows initialization to proceed, but any other
135  *      umem_cache_create()s will block by calling umem_init().
136  *
137  *      * Since umem_null_cache has a 1-element cache_cpu, it's cache_cpu_mask
138  *      is always zero.  umem_cache_alloc uses cp->cache_cpu_mask to
139  *      mask the cpu number.  This prevents a race between grabbing a
140  *      cache pointer out of umem_alloc_table and growing the cpu array.
141  *
142  *
143  * 3. CPU handling
144  * ---------------
145  * kmem uses the CPU's sequence number to determine which "cpu cache" to
146  * use for an allocation.  Currently, there is no way to get the sequence
147  * number in userspace.
148  *
149  * umem keeps track of cpu information in umem_cpus, an array of umem_max_ncpus
150  * umem_cpu_t structures.  CURCPU() is a a "hint" function, which we then mask
151  * with either umem_cpu_mask or cp->cache_cpu_mask to find the actual "cpu" id.
152  * The mechanics of this is all in the CPU(mask) macro.
153  *
154  * Currently, umem uses _lwp_self() as its hint.
155  *
156  *
157  * 4. The update thread
158  * --------------------
159  * kmem uses a task queue, kmem_taskq, to do periodic maintenance on
160  * every kmem cache.  vmem has a periodic timeout for hash table resizing.
161  * The kmem_taskq also provides a separate context for kmem_cache_reap()'s
162  * to be done in, avoiding issues of the context of kmem_reap() callers.
163  *
164  * Instead, umem has the concept of "updates", which are asynchronous requests
165  * for work attached to single caches.  All caches with pending work are
166  * on a doubly linked list rooted at the umem_null_cache.  All update state
167  * is protected by the umem_update_lock mutex, and the umem_update_cv is used
168  * for notification between threads.
169  *
170  * 4.1. Cache states with regards to updates
171  * -----------------------------------------
172  * A given cache is in one of three states:
173  *
174  * Inactive             cache_uflags is zero, cache_u{next,prev} are NULL
175  *
176  * Work Requested       cache_uflags is non-zero (but UMU_ACTIVE is not set),
177  *                      cache_u{next,prev} link the cache onto the global
178  *                      update list
179  *
180  * Active               cache_uflags has UMU_ACTIVE set, cache_u{next,prev}
181  *                      are NULL, and either umem_update_thr or
182  *                      umem_st_update_thr are actively doing work on the
183  *                      cache.
184  *
185  * An update can be added to any cache in any state -- if the cache is
186  * Inactive, it transitions to being Work Requested.  If the cache is
187  * Active, the worker will notice the new update and act on it before
188  * transitioning the cache to the Inactive state.
189  *
190  * If a cache is in the Active state, UMU_NOTIFY can be set, which asks
191  * the worker to broadcast the umem_update_cv when it has finished.
192  *
193  * 4.2. Update interface
194  * ---------------------
195  * umem_add_update() adds an update to a particular cache.
196  * umem_updateall() adds an update to all caches.
197  * umem_remove_updates() returns a cache to the Inactive state.
198  *
199  * umem_process_updates() process all caches in the Work Requested state.
200  *
201  * 4.3. Reaping
202  * ------------
203  * When umem_reap() is called (at the time of heap growth), it schedule
204  * UMU_REAP updates on every cache.  It then checks to see if the update
205  * thread exists (umem_update_thr != 0).  If it is, it broadcasts
206  * the umem_update_cv to wake the update thread up, and returns.
207  *
208  * If the update thread does not exist (umem_update_thr == 0), and the
209  * program currently has multiple threads, umem_reap() attempts to create
210  * a new update thread.
211  *
212  * If the process is not multithreaded, or the creation fails, umem_reap()
213  * calls umem_st_update() to do an inline update.
214  *
215  * 4.4. The update thread
216  * ----------------------
217  * The update thread spends most of its time in cond_timedwait() on the
218  * umem_update_cv.  It wakes up under two conditions:
219  *
220  *      * The timedwait times out, in which case it needs to run a global
221  *      update, or
222  *
223  *      * someone cond_broadcast(3THR)s the umem_update_cv, in which case
224  *      it needs to check if there are any caches in the Work Requested
225  *      state.
226  *
227  * When it is time for another global update, umem calls umem_cache_update()
228  * on every cache, then calls vmem_update(), which tunes the vmem structures.
229  * umem_cache_update() can request further work using umem_add_update().
230  *
231  * After any work from the global update completes, the update timer is
232  * reset to umem_reap_interval seconds in the future.  This makes the
233  * updates self-throttling.
234  *
235  * Reaps are similarly self-throttling.  After a UMU_REAP update has
236  * been scheduled on all caches, umem_reap() sets a flag and wakes up the
237  * update thread.  The update thread notices the flag, and resets the
238  * reap state.
239  *
240  * 4.5. Inline updates
241  * -------------------
242  * If the update thread is not running, umem_st_update() is used instead.  It
243  * immediately does a global update (as above), then calls
244  * umem_process_updates() to process both the reaps that umem_reap() added and
245  * any work generated by the global update.  Afterwards, it resets the reap
246  * state.
247  *
248  * While the umem_st_update() is running, umem_st_update_thr holds the thread
249  * id of the thread performing the update.
250  *
251  * 4.6. Updates and fork1()
252  * ------------------------
253  * umem has fork1() pre- and post-handlers which lock up (and release) every
254  * mutex in every cache.  They also lock up the umem_update_lock.  Since
255  * fork1() only copies over a single lwp, other threads (including the update
256  * thread) could have been actively using a cache in the parent.  This
257  * can lead to inconsistencies in the child process.
258  *
259  * Because we locked all of the mutexes, the only possible inconsistancies are:
260  *
261  *      * a umem_cache_alloc() could leak its buffer.
262  *
263  *      * a caller of umem_depot_alloc() could leak a magazine, and all the
264  *      buffers contained in it.
265  *
266  *      * a cache could be in the Active update state.  In the child, there
267  *      would be no thread actually working on it.
268  *
269  *      * a umem_hash_rescale() could leak the new hash table.
270  *
271  *      * a umem_magazine_resize() could be in progress.
272  *
273  *      * a umem_reap() could be in progress.
274  *
275  * The memory leaks we can't do anything about.  umem_release_child() resets
276  * the update state, moves any caches in the Active state to the Work Requested
277  * state.  This might cause some updates to be re-run, but UMU_REAP and
278  * UMU_HASH_RESCALE are effectively idempotent, and the worst that can
279  * happen from umem_magazine_resize() is resizing the magazine twice in close
280  * succession.
281  *
282  * Much of the cleanup in umem_release_child() is skipped if
283  * umem_st_update_thr == thr_self().  This is so that applications which call
284  * fork1() from a cache callback does not break.  Needless to say, any such
285  * application is tremendously broken.
286  *
287  *
288  * 5. KM_SLEEP v.s. UMEM_NOFAIL
289  * ----------------------------
290  * Allocations against kmem and vmem have two basic modes:  SLEEP and
291  * NOSLEEP.  A sleeping allocation is will go to sleep (waiting for
292  * more memory) instead of failing (returning NULL).
293  *
294  * SLEEP allocations presume an extremely multithreaded model, with
295  * a lot of allocation and deallocation activity.  umem cannot presume
296  * that its clients have any particular type of behavior.  Instead,
297  * it provides two types of allocations:
298  *
299  *      * UMEM_DEFAULT, equivalent to KM_NOSLEEP (i.e. return NULL on
300  *      failure)
301  *
302  *      * UMEM_NOFAIL, which, on failure, calls an optional callback
303  *      (registered with umem_nofail_callback()).
304  *
305  * The callback is invoked with no locks held, and can do an arbitrary
306  * amount of work.  It then has a choice between:
307  *
308  *      * Returning UMEM_CALLBACK_RETRY, which will cause the allocation
309  *      to be restarted.
310  *
311  *      * Returning UMEM_CALLBACK_EXIT(status), which will cause exit(2)
312  *      to be invoked with status.  If multiple threads attempt to do
313  *      this simultaneously, only one will call exit(2).
314  *
315  *      * Doing some kind of non-local exit (thr_exit(3thr), longjmp(3C),
316  *      etc.)
317  *
318  * The default callback returns UMEM_CALLBACK_EXIT(255).
319  *
320  * To have these callbacks without risk of state corruption (in the case of
321  * a non-local exit), we have to ensure that the callbacks get invoked
322  * close to the original allocation, with no inconsistent state or held
323  * locks.  The following steps are taken:
324  *
325  *      * All invocations of vmem are VM_NOSLEEP.
326  *
327  *      * All constructor callbacks (which can themselves to allocations)
328  *      are passed UMEM_DEFAULT as their required allocation argument.  This
329  *      way, the constructor will fail, allowing the highest-level allocation
330  *      invoke the nofail callback.
331  *
332  *      If a constructor callback _does_ do a UMEM_NOFAIL allocation, and
333  *      the nofail callback does a non-local exit, we will leak the
334  *      partially-constructed buffer.
335  */
336
337 #include "config.h"
338 /* #include "mtlib.h" */
339 #include <umem_impl.h>
340 #include <sys/vmem_impl_user.h>
341 #include "umem_base.h"
342 #include "vmem_base.h"
343
344 #if HAVE_SYS_PROCESSOR_H
345 #include <sys/processor.h>
346 #endif
347 #if HAVE_SYS_SYSMACROS_H
348 #include <sys/sysmacros.h>
349 #endif
350
351 #if HAVE_ALLOCA_H
352 #include <alloca.h>
353 #endif
354 #include <errno.h>
355 #include <limits.h>
356 #include <stdio.h>
357 #include <stdlib.h>
358 #include <string.h>
359 #if HAVE_STRINGS_H
360 #include <strings.h>
361 #endif
362 #include <signal.h>
363 #if HAVE_UNISTD_H
364 #include <unistd.h>
365 #endif
366 #if HAVE_ATOMIC_H
367 #include <atomic.h>
368 #endif
369
370 #include "misc.h"
371
372 #define UMEM_VMFLAGS(umflag)    (VM_NOSLEEP)
373
374 size_t pagesize;
375
376 /*
377  * The default set of caches to back umem_alloc().
378  * These sizes should be reevaluated periodically.
379  *
380  * We want allocations that are multiples of the coherency granularity
381  * (64 bytes) to be satisfied from a cache which is a multiple of 64
382  * bytes, so that it will be 64-byte aligned.  For all multiples of 64,
383  * the next kmem_cache_size greater than or equal to it must be a
384  * multiple of 64.
385  */
386 static const int umem_alloc_sizes[] = {
387 #ifdef _LP64
388         1 * 8,
389         1 * 16,
390         2 * 16,
391         3 * 16,
392 #else
393         1 * 8,
394         2 * 8,
395         3 * 8,
396         4 * 8,          5 * 8,          6 * 8,          7 * 8,
397 #endif
398         4 * 16,         5 * 16,         6 * 16,         7 * 16,
399         4 * 32,         5 * 32,         6 * 32,         7 * 32,
400         4 * 64,         5 * 64,         6 * 64,         7 * 64,
401         4 * 128,        5 * 128,        6 * 128,        7 * 128,
402         P2ALIGN(8192 / 7, 64),
403         P2ALIGN(8192 / 6, 64),
404         P2ALIGN(8192 / 5, 64),
405         P2ALIGN(8192 / 4, 64),
406         P2ALIGN(8192 / 3, 64),
407         P2ALIGN(8192 / 2, 64),
408         P2ALIGN(8192 / 1, 64),
409         4096 * 3,
410         8192 * 2,
411 };
412 #define NUM_ALLOC_SIZES (sizeof (umem_alloc_sizes) / sizeof (*umem_alloc_sizes))
413
414 #define UMEM_MAXBUF     16384
415
416 static umem_magtype_t umem_magtype[] = {
417         { 1,    8,      3200,   65536   },
418         { 3,    16,     256,    32768   },
419         { 7,    32,     64,     16384   },
420         { 15,   64,     0,      8192    },
421         { 31,   64,     0,      4096    },
422         { 47,   64,     0,      2048    },
423         { 63,   64,     0,      1024    },
424         { 95,   64,     0,      512     },
425         { 143,  64,     0,      0       },
426 };
427
428 /*
429  * umem tunables
430  */
431 uint32_t umem_max_ncpus;        /* # of CPU caches. */
432
433 uint32_t umem_stack_depth = 15; /* # stack frames in a bufctl_audit */
434 uint32_t umem_reap_interval = 10; /* max reaping rate (seconds) */
435 uint_t umem_depot_contention = 2; /* max failed trylocks per real interval */
436 uint_t umem_abort = 1;          /* whether to abort on error */
437 uint_t umem_output = 0;         /* whether to write to standard error */
438 uint_t umem_logging = 0;        /* umem_log_enter() override */
439 uint32_t umem_mtbf = 0;         /* mean time between failures [default: off] */
440 size_t umem_transaction_log_size; /* size of transaction log */
441 size_t umem_content_log_size;   /* size of content log */
442 size_t umem_failure_log_size;   /* failure log [4 pages per CPU] */
443 size_t umem_slab_log_size;      /* slab create log [4 pages per CPU] */
444 size_t umem_content_maxsave = 256; /* UMF_CONTENTS max bytes to log */
445 size_t umem_lite_minsize = 0;   /* minimum buffer size for UMF_LITE */
446 size_t umem_lite_maxalign = 1024; /* maximum buffer alignment for UMF_LITE */
447 size_t umem_maxverify;          /* maximum bytes to inspect in debug routines */
448 size_t umem_minfirewall;        /* hardware-enforced redzone threshold */
449
450 uint_t umem_flags = 0;
451
452 mutex_t                 umem_init_lock;         /* locks initialization */
453 cond_t                  umem_init_cv = DEFAULTCV;               /* initialization CV */
454 thread_t                umem_init_thr;          /* thread initializing */
455 int                     umem_init_env_ready;    /* environ pre-initted */
456 int                     umem_ready = UMEM_READY_STARTUP;
457
458 static umem_nofail_callback_t *nofail_callback;
459 static mutex_t          umem_nofail_exit_lock;
460 static thread_t         umem_nofail_exit_thr;
461
462 static umem_cache_t     *umem_slab_cache;
463 static umem_cache_t     *umem_bufctl_cache;
464 static umem_cache_t     *umem_bufctl_audit_cache;
465
466 mutex_t                 umem_flags_lock;
467
468 static vmem_t           *heap_arena;
469 static vmem_alloc_t     *heap_alloc;
470 static vmem_free_t      *heap_free;
471
472 static vmem_t           *umem_internal_arena;
473 static vmem_t           *umem_cache_arena;
474 static vmem_t           *umem_hash_arena;
475 static vmem_t           *umem_log_arena;
476 static vmem_t           *umem_oversize_arena;
477 static vmem_t           *umem_va_arena;
478 static vmem_t           *umem_default_arena;
479 static vmem_t           *umem_firewall_va_arena;
480 static vmem_t           *umem_firewall_arena;
481
482 vmem_t                  *umem_memalign_arena;
483
484 umem_log_header_t *umem_transaction_log;
485 umem_log_header_t *umem_content_log;
486 umem_log_header_t *umem_failure_log;
487 umem_log_header_t *umem_slab_log;
488
489 extern thread_t _thr_self(void);
490 #ifndef CPUHINT
491 #define CPUHINT()               (_thr_self())
492 #endif
493 #define CPUHINT_MAX()           INT_MAX
494
495 #define CPU(mask)               (umem_cpus + (CPUHINT() & (mask)))
496 static umem_cpu_t umem_startup_cpu = {  /* initial, single, cpu */
497         UMEM_CACHE_SIZE(0),
498         0
499 };
500
501 static uint32_t umem_cpu_mask = 0;                      /* global cpu mask */
502 static umem_cpu_t *umem_cpus = &umem_startup_cpu;       /* cpu list */
503
504 volatile uint32_t umem_reaping;
505
506 thread_t                umem_update_thr;
507 struct timeval          umem_update_next;       /* timeofday of next update */
508 volatile thread_t       umem_st_update_thr;     /* only used when single-thd */
509
510 #define IN_UPDATE()     (thr_self() == umem_update_thr || \
511                             thr_self() == umem_st_update_thr)
512 #define IN_REAP()       IN_UPDATE()
513
514 mutex_t                 umem_update_lock;       /* cache_u{next,prev,flags} */
515 cond_t                  umem_update_cv = DEFAULTCV;
516
517 volatile hrtime_t umem_reap_next;       /* min hrtime of next reap */
518
519 mutex_t                 umem_cache_lock;        /* inter-cache linkage only */
520
521 #ifdef UMEM_STANDALONE
522 umem_cache_t            umem_null_cache;
523 static const umem_cache_t umem_null_cache_template = {
524 #else
525 umem_cache_t            umem_null_cache = {
526 #endif
527         0, 0, 0, 0, 0,
528         0, 0,
529         0, 0,
530         0, 0,
531         "invalid_cache",
532         0, 0,
533         NULL, NULL, NULL, NULL,
534         NULL,
535         0, 0, 0, 0,
536         &umem_null_cache, &umem_null_cache,
537         &umem_null_cache, &umem_null_cache,
538         0,
539         DEFAULTMUTEX,                           /* start of slab layer */
540         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
541         &umem_null_cache.cache_nullslab,
542         {
543                 &umem_null_cache,
544                 NULL,
545                 &umem_null_cache.cache_nullslab,
546                 &umem_null_cache.cache_nullslab,
547                 NULL,
548                 -1,
549                 0
550         },
551         NULL,
552         NULL,
553         DEFAULTMUTEX,                           /* start of depot layer */
554         NULL, {
555                 NULL, 0, 0, 0, 0
556         }, {
557                 NULL, 0, 0, 0, 0
558         }, {
559                 {
560                         DEFAULTMUTEX,           /* start of CPU cache */
561                         0, 0, NULL, NULL, -1, -1, 0
562                 }
563         }
564 };
565
566 #define ALLOC_TABLE_4 \
567         &umem_null_cache, &umem_null_cache, &umem_null_cache, &umem_null_cache
568
569 #define ALLOC_TABLE_64 \
570         ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \
571         ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \
572         ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, \
573         ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4, ALLOC_TABLE_4
574
575 #define ALLOC_TABLE_1024 \
576         ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \
577         ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \
578         ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, \
579         ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64, ALLOC_TABLE_64
580
581 static umem_cache_t *umem_alloc_table[UMEM_MAXBUF >> UMEM_ALIGN_SHIFT] = {
582         ALLOC_TABLE_1024,
583         ALLOC_TABLE_1024
584 };
585
586
587 /* Used to constrain audit-log stack traces */
588 caddr_t                 umem_min_stack;
589 caddr_t                 umem_max_stack;
590
591
592 /*
593  * we use the _ versions, since we don't want to be cancelled.
594  * Actually, this is automatically taken care of by including "mtlib.h".
595  */
596 extern int _cond_wait(cond_t *cv, mutex_t *mutex);
597
598 #define UMERR_MODIFIED  0       /* buffer modified while on freelist */
599 #define UMERR_REDZONE   1       /* redzone violation (write past end of buf) */
600 #define UMERR_DUPFREE   2       /* freed a buffer twice */
601 #define UMERR_BADADDR   3       /* freed a bad (unallocated) address */
602 #define UMERR_BADBUFTAG 4       /* buftag corrupted */
603 #define UMERR_BADBUFCTL 5       /* bufctl corrupted */
604 #define UMERR_BADCACHE  6       /* freed a buffer to the wrong cache */
605 #define UMERR_BADSIZE   7       /* alloc size != free size */
606 #define UMERR_BADBASE   8       /* buffer base address wrong */
607
608 struct {
609         hrtime_t        ump_timestamp;  /* timestamp of error */
610         int             ump_error;      /* type of umem error (UMERR_*) */
611         void            *ump_buffer;    /* buffer that induced abort */
612         void            *ump_realbuf;   /* real start address for buffer */
613         umem_cache_t    *ump_cache;     /* buffer's cache according to client */
614         umem_cache_t    *ump_realcache; /* actual cache containing buffer */
615         umem_slab_t     *ump_slab;      /* slab accoring to umem_findslab() */
616         umem_bufctl_t   *ump_bufctl;    /* bufctl */
617 } umem_abort_info;
618
619 static void
620 copy_pattern(uint64_t pattern, void *buf_arg, size_t size)
621 {
622         uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
623         uint64_t *buf = buf_arg;
624
625         while (buf < bufend)
626                 *buf++ = pattern;
627 }
628
629 static void *
630 verify_pattern(uint64_t pattern, void *buf_arg, size_t size)
631 {
632         uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
633         uint64_t *buf;
634
635         for (buf = buf_arg; buf < bufend; buf++)
636                 if (*buf != pattern)
637                         return (buf);
638         return (NULL);
639 }
640
641 static void *
642 verify_and_copy_pattern(uint64_t old, uint64_t new, void *buf_arg, size_t size)
643 {
644         uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
645         uint64_t *buf;
646
647         for (buf = buf_arg; buf < bufend; buf++) {
648                 if (*buf != old) {
649                         copy_pattern(old, buf_arg,
650                             (char *)buf - (char *)buf_arg);
651                         return (buf);
652                 }
653                 *buf = new;
654         }
655
656         return (NULL);
657 }
658
659 void
660 umem_cache_applyall(void (*func)(umem_cache_t *))
661 {
662         umem_cache_t *cp;
663
664         (void) mutex_lock(&umem_cache_lock);
665         for (cp = umem_null_cache.cache_next; cp != &umem_null_cache;
666             cp = cp->cache_next)
667                 func(cp);
668         (void) mutex_unlock(&umem_cache_lock);
669 }
670
671 static void
672 umem_add_update_unlocked(umem_cache_t *cp, int flags)
673 {
674         umem_cache_t *cnext, *cprev;
675
676         flags &= ~UMU_ACTIVE;
677
678         if (!flags)
679                 return;
680
681         if (cp->cache_uflags & UMU_ACTIVE) {
682                 cp->cache_uflags |= flags;
683         } else {
684                 if (cp->cache_unext != NULL) {
685                         ASSERT(cp->cache_uflags != 0);
686                         cp->cache_uflags |= flags;
687                 } else {
688                         ASSERT(cp->cache_uflags == 0);
689                         cp->cache_uflags = flags;
690                         cp->cache_unext = cnext = &umem_null_cache;
691                         cp->cache_uprev = cprev = umem_null_cache.cache_uprev;
692                         cnext->cache_uprev = cp;
693                         cprev->cache_unext = cp;
694                 }
695         }
696 }
697
698 static void
699 umem_add_update(umem_cache_t *cp, int flags)
700 {
701         (void) mutex_lock(&umem_update_lock);
702
703         umem_add_update_unlocked(cp, flags);
704
705         if (!IN_UPDATE())
706                 (void) cond_broadcast(&umem_update_cv);
707
708         (void) mutex_unlock(&umem_update_lock);
709 }
710
711 /*
712  * Remove a cache from the update list, waiting for any in-progress work to
713  * complete first.
714  */
715 static void
716 umem_remove_updates(umem_cache_t *cp)
717 {
718         (void) mutex_lock(&umem_update_lock);
719
720         /*
721          * Get it out of the active state
722          */
723         while (cp->cache_uflags & UMU_ACTIVE) {
724                 ASSERT(cp->cache_unext == NULL);
725
726                 cp->cache_uflags |= UMU_NOTIFY;
727
728                 /*
729                  * Make sure the update state is sane, before we wait
730                  */
731                 ASSERT(umem_update_thr != 0 || umem_st_update_thr != 0);
732                 ASSERT(umem_update_thr != thr_self() &&
733                     umem_st_update_thr != thr_self());
734
735                 (void) _cond_wait(&umem_update_cv, &umem_update_lock);
736         }
737         /*
738          * Get it out of the Work Requested state
739          */
740         if (cp->cache_unext != NULL) {
741                 cp->cache_uprev->cache_unext = cp->cache_unext;
742                 cp->cache_unext->cache_uprev = cp->cache_uprev;
743                 cp->cache_uprev = cp->cache_unext = NULL;
744                 cp->cache_uflags = 0;
745         }
746         /*
747          * Make sure it is in the Inactive state
748          */
749         ASSERT(cp->cache_unext == NULL && cp->cache_uflags == 0);
750         (void) mutex_unlock(&umem_update_lock);
751 }
752
753 static void
754 umem_updateall(int flags)
755 {
756         umem_cache_t *cp;
757
758         /*
759          * NOTE:  To prevent deadlock, umem_cache_lock is always acquired first.
760          *
761          * (umem_add_update is called from things run via umem_cache_applyall)
762          */
763         (void) mutex_lock(&umem_cache_lock);
764         (void) mutex_lock(&umem_update_lock);
765
766         for (cp = umem_null_cache.cache_next; cp != &umem_null_cache;
767             cp = cp->cache_next)
768                 umem_add_update_unlocked(cp, flags);
769
770         if (!IN_UPDATE())
771                 (void) cond_broadcast(&umem_update_cv);
772
773         (void) mutex_unlock(&umem_update_lock);
774         (void) mutex_unlock(&umem_cache_lock);
775 }
776
777 /*
778  * Debugging support.  Given a buffer address, find its slab.
779  */
780 static umem_slab_t *
781 umem_findslab(umem_cache_t *cp, void *buf)
782 {
783         umem_slab_t *sp;
784
785         (void) mutex_lock(&cp->cache_lock);
786         for (sp = cp->cache_nullslab.slab_next;
787             sp != &cp->cache_nullslab; sp = sp->slab_next) {
788                 if (UMEM_SLAB_MEMBER(sp, buf)) {
789                         (void) mutex_unlock(&cp->cache_lock);
790                         return (sp);
791                 }
792         }
793         (void) mutex_unlock(&cp->cache_lock);
794
795         return (NULL);
796 }
797
798 static void
799 umem_error(int error, umem_cache_t *cparg, void *bufarg)
800 {
801         umem_buftag_t *btp = NULL;
802         umem_bufctl_t *bcp = NULL;
803         umem_cache_t *cp = cparg;
804         umem_slab_t *sp;
805         uint64_t *off;
806         void *buf = bufarg;
807
808         int old_logging = umem_logging;
809
810         umem_logging = 0;       /* stop logging when a bad thing happens */
811
812         umem_abort_info.ump_timestamp = gethrtime();
813
814         sp = umem_findslab(cp, buf);
815         if (sp == NULL) {
816                 for (cp = umem_null_cache.cache_prev; cp != &umem_null_cache;
817                     cp = cp->cache_prev) {
818                         if ((sp = umem_findslab(cp, buf)) != NULL)
819                                 break;
820                 }
821         }
822
823         if (sp == NULL) {
824                 cp = NULL;
825                 error = UMERR_BADADDR;
826         } else {
827                 if (cp != cparg)
828                         error = UMERR_BADCACHE;
829                 else
830                         buf = (char *)bufarg - ((uintptr_t)bufarg -
831                             (uintptr_t)sp->slab_base) % cp->cache_chunksize;
832                 if (buf != bufarg)
833                         error = UMERR_BADBASE;
834                 if (cp->cache_flags & UMF_BUFTAG)
835                         btp = UMEM_BUFTAG(cp, buf);
836                 if (cp->cache_flags & UMF_HASH) {
837                         (void) mutex_lock(&cp->cache_lock);
838                         for (bcp = *UMEM_HASH(cp, buf); bcp; bcp = bcp->bc_next)
839                                 if (bcp->bc_addr == buf)
840                                         break;
841                         (void) mutex_unlock(&cp->cache_lock);
842                         if (bcp == NULL && btp != NULL)
843                                 bcp = btp->bt_bufctl;
844                         if (umem_findslab(cp->cache_bufctl_cache, bcp) ==
845                             NULL || P2PHASE((uintptr_t)bcp, UMEM_ALIGN) ||
846                             bcp->bc_addr != buf) {
847                                 error = UMERR_BADBUFCTL;
848                                 bcp = NULL;
849                         }
850                 }
851         }
852
853         umem_abort_info.ump_error = error;
854         umem_abort_info.ump_buffer = bufarg;
855         umem_abort_info.ump_realbuf = buf;
856         umem_abort_info.ump_cache = cparg;
857         umem_abort_info.ump_realcache = cp;
858         umem_abort_info.ump_slab = sp;
859         umem_abort_info.ump_bufctl = bcp;
860
861         umem_printf("umem allocator: ");
862
863         switch (error) {
864
865         case UMERR_MODIFIED:
866                 umem_printf("buffer modified after being freed\n");
867                 off = verify_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify);
868                 if (off == NULL)        /* shouldn't happen */
869                         off = buf;
870                 umem_printf("modification occurred at offset 0x%lx "
871                     "(0x%llx replaced by 0x%llx)\n",
872                     (uintptr_t)off - (uintptr_t)buf,
873                     (longlong_t)UMEM_FREE_PATTERN, (longlong_t)*off);
874                 break;
875
876         case UMERR_REDZONE:
877                 umem_printf("redzone violation: write past end of buffer\n");
878                 break;
879
880         case UMERR_BADADDR:
881                 umem_printf("invalid free: buffer not in cache\n");
882                 break;
883
884         case UMERR_DUPFREE:
885                 umem_printf("duplicate free: buffer freed twice\n");
886                 break;
887
888         case UMERR_BADBUFTAG:
889                 umem_printf("boundary tag corrupted\n");
890                 umem_printf("bcp ^ bxstat = %lx, should be %lx\n",
891                     (intptr_t)btp->bt_bufctl ^ btp->bt_bxstat,
892                     UMEM_BUFTAG_FREE);
893                 break;
894
895         case UMERR_BADBUFCTL:
896                 umem_printf("bufctl corrupted\n");
897                 break;
898
899         case UMERR_BADCACHE:
900                 umem_printf("buffer freed to wrong cache\n");
901                 umem_printf("buffer was allocated from %s,\n", cp->cache_name);
902                 umem_printf("caller attempting free to %s.\n",
903                     cparg->cache_name);
904                 break;
905
906         case UMERR_BADSIZE:
907                 umem_printf("bad free: free size (%u) != alloc size (%u)\n",
908                     UMEM_SIZE_DECODE(((uint32_t *)btp)[0]),
909                     UMEM_SIZE_DECODE(((uint32_t *)btp)[1]));
910                 break;
911
912         case UMERR_BADBASE:
913                 umem_printf("bad free: free address (%p) != alloc address "
914                     "(%p)\n", bufarg, buf);
915                 break;
916         }
917
918         umem_printf("buffer=%p  bufctl=%p  cache: %s\n",
919             bufarg, (void *)bcp, cparg->cache_name);
920
921         if (bcp != NULL && (cp->cache_flags & UMF_AUDIT) &&
922             error != UMERR_BADBUFCTL) {
923                 int d;
924                 timespec_t ts;
925                 hrtime_t diff;
926                 umem_bufctl_audit_t *bcap = (umem_bufctl_audit_t *)bcp;
927
928                 diff = umem_abort_info.ump_timestamp - bcap->bc_timestamp;
929                 ts.tv_sec = diff / NANOSEC;
930                 ts.tv_nsec = diff % NANOSEC;
931
932                 umem_printf("previous transaction on buffer %p:\n", buf);
933                 umem_printf("thread=%p  time=T-%ld.%09ld  slab=%p  cache: %s\n",
934                     (void *)(intptr_t)bcap->bc_thread, ts.tv_sec, ts.tv_nsec,
935                     (void *)sp, cp->cache_name);
936                 for (d = 0; d < MIN(bcap->bc_depth, umem_stack_depth); d++) {
937                         (void) print_sym((void *)bcap->bc_stack[d]);
938                         umem_printf("\n");
939                 }
940         }
941
942         umem_err_recoverable("umem: heap corruption detected");
943
944         umem_logging = old_logging;     /* resume logging */
945 }
946
947 void
948 umem_nofail_callback(umem_nofail_callback_t *cb)
949 {
950         nofail_callback = cb;
951 }
952
953 static int
954 umem_alloc_retry(umem_cache_t *cp, int umflag)
955 {
956         if (cp == &umem_null_cache) {
957                 if (umem_init())
958                         return (1);                             /* retry */
959                 /*
960                  * Initialization failed.  Do normal failure processing.
961                  */
962         }
963         if (umflag & UMEM_NOFAIL) {
964                 int def_result = UMEM_CALLBACK_EXIT(255);
965                 int result = def_result;
966                 umem_nofail_callback_t *callback = nofail_callback;
967
968                 if (callback != NULL)
969                         result = callback();
970
971                 if (result == UMEM_CALLBACK_RETRY)
972                         return (1);
973
974                 if ((result & ~0xFF) != UMEM_CALLBACK_EXIT(0)) {
975                         log_message("nofail callback returned %x\n", result);
976                         result = def_result;
977                 }
978
979                 /*
980                  * only one thread will call exit
981                  */
982                 if (umem_nofail_exit_thr == thr_self())
983                         umem_panic("recursive UMEM_CALLBACK_EXIT()\n");
984
985                 (void) mutex_lock(&umem_nofail_exit_lock);
986                 umem_nofail_exit_thr = thr_self();
987                 exit(result & 0xFF);
988                 /*NOTREACHED*/
989         }
990         return (0);
991 }
992
993 static umem_log_header_t *
994 umem_log_init(size_t logsize)
995 {
996         umem_log_header_t *lhp;
997         int nchunks = 4 * umem_max_ncpus;
998         size_t lhsize = offsetof(umem_log_header_t, lh_cpu[umem_max_ncpus]);
999         int i;
1000
1001         if (logsize == 0)
1002                 return (NULL);
1003
1004         /*
1005          * Make sure that lhp->lh_cpu[] is nicely aligned
1006          * to prevent false sharing of cache lines.
1007          */
1008         lhsize = P2ROUNDUP(lhsize, UMEM_ALIGN);
1009         lhp = vmem_xalloc(umem_log_arena, lhsize, 64, P2NPHASE(lhsize, 64), 0,
1010             NULL, NULL, VM_NOSLEEP);
1011         if (lhp == NULL)
1012                 goto fail;
1013
1014         bzero(lhp, lhsize);
1015
1016         (void) mutex_init(&lhp->lh_lock, USYNC_THREAD, NULL);
1017         lhp->lh_nchunks = nchunks;
1018         lhp->lh_chunksize = P2ROUNDUP(logsize / nchunks, PAGESIZE);
1019         if (lhp->lh_chunksize == 0)
1020                 lhp->lh_chunksize = PAGESIZE;
1021
1022         lhp->lh_base = vmem_alloc(umem_log_arena,
1023             lhp->lh_chunksize * nchunks, VM_NOSLEEP);
1024         if (lhp->lh_base == NULL)
1025                 goto fail;
1026
1027         lhp->lh_free = vmem_alloc(umem_log_arena,
1028             nchunks * sizeof (int), VM_NOSLEEP);
1029         if (lhp->lh_free == NULL)
1030                 goto fail;
1031
1032         bzero(lhp->lh_base, lhp->lh_chunksize * nchunks);
1033
1034         for (i = 0; i < umem_max_ncpus; i++) {
1035                 umem_cpu_log_header_t *clhp = &lhp->lh_cpu[i];
1036                 (void) mutex_init(&clhp->clh_lock, USYNC_THREAD, NULL);
1037                 clhp->clh_chunk = i;
1038         }
1039
1040         for (i = umem_max_ncpus; i < nchunks; i++)
1041                 lhp->lh_free[i] = i;
1042
1043         lhp->lh_head = umem_max_ncpus;
1044         lhp->lh_tail = 0;
1045
1046         return (lhp);
1047
1048 fail:
1049         if (lhp != NULL) {
1050                 if (lhp->lh_base != NULL)
1051                         vmem_free(umem_log_arena, lhp->lh_base,
1052                             lhp->lh_chunksize * nchunks);
1053
1054                 vmem_xfree(umem_log_arena, lhp, lhsize);
1055         }
1056         return (NULL);
1057 }
1058
1059 static void *
1060 umem_log_enter(umem_log_header_t *lhp, void *data, size_t size)
1061 {
1062         void *logspace;
1063         umem_cpu_log_header_t *clhp =
1064             &(lhp->lh_cpu[CPU(umem_cpu_mask)->cpu_number]);
1065
1066         if (lhp == NULL || umem_logging == 0)
1067                 return (NULL);
1068
1069         (void) mutex_lock(&clhp->clh_lock);
1070         clhp->clh_hits++;
1071         if (size > clhp->clh_avail) {
1072                 (void) mutex_lock(&lhp->lh_lock);
1073                 lhp->lh_hits++;
1074                 lhp->lh_free[lhp->lh_tail] = clhp->clh_chunk;
1075                 lhp->lh_tail = (lhp->lh_tail + 1) % lhp->lh_nchunks;
1076                 clhp->clh_chunk = lhp->lh_free[lhp->lh_head];
1077                 lhp->lh_head = (lhp->lh_head + 1) % lhp->lh_nchunks;
1078                 clhp->clh_current = lhp->lh_base +
1079                     clhp->clh_chunk * lhp->lh_chunksize;
1080                 clhp->clh_avail = lhp->lh_chunksize;
1081                 if (size > lhp->lh_chunksize)
1082                         size = lhp->lh_chunksize;
1083                 (void) mutex_unlock(&lhp->lh_lock);
1084         }
1085         logspace = clhp->clh_current;
1086         clhp->clh_current += size;
1087         clhp->clh_avail -= size;
1088         bcopy(data, logspace, size);
1089         (void) mutex_unlock(&clhp->clh_lock);
1090         return (logspace);
1091 }
1092
1093 #define UMEM_AUDIT(lp, cp, bcp)                                         \
1094 {                                                                       \
1095         umem_bufctl_audit_t *_bcp = (umem_bufctl_audit_t *)(bcp);       \
1096         _bcp->bc_timestamp = gethrtime();                               \
1097         _bcp->bc_thread = thr_self();                                   \
1098         _bcp->bc_depth = getpcstack(_bcp->bc_stack, umem_stack_depth,   \
1099             (cp != NULL) && (cp->cache_flags & UMF_CHECKSIGNAL));       \
1100         _bcp->bc_lastlog = umem_log_enter((lp), _bcp,                   \
1101             UMEM_BUFCTL_AUDIT_SIZE);                                    \
1102 }
1103
1104 static void
1105 umem_log_event(umem_log_header_t *lp, umem_cache_t *cp,
1106         umem_slab_t *sp, void *addr)
1107 {
1108         umem_bufctl_audit_t *bcp;
1109         UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1110
1111         bzero(bcp, UMEM_BUFCTL_AUDIT_SIZE);
1112         bcp->bc_addr = addr;
1113         bcp->bc_slab = sp;
1114         bcp->bc_cache = cp;
1115         UMEM_AUDIT(lp, cp, bcp);
1116 }
1117
1118 /*
1119  * Create a new slab for cache cp.
1120  */
1121 static umem_slab_t *
1122 umem_slab_create(umem_cache_t *cp, int umflag)
1123 {
1124         size_t slabsize = cp->cache_slabsize;
1125         size_t chunksize = cp->cache_chunksize;
1126         int cache_flags = cp->cache_flags;
1127         size_t color, chunks;
1128         char *buf, *slab;
1129         umem_slab_t *sp;
1130         umem_bufctl_t *bcp;
1131         vmem_t *vmp = cp->cache_arena;
1132
1133         color = cp->cache_color + cp->cache_align;
1134         if (color > cp->cache_maxcolor)
1135                 color = cp->cache_mincolor;
1136         cp->cache_color = color;
1137
1138         slab = vmem_alloc(vmp, slabsize, UMEM_VMFLAGS(umflag));
1139
1140         if (slab == NULL)
1141                 goto vmem_alloc_failure;
1142
1143         ASSERT(P2PHASE((uintptr_t)slab, vmp->vm_quantum) == 0);
1144
1145         if (!(cp->cache_cflags & UMC_NOTOUCH) &&
1146             (cp->cache_flags & UMF_DEADBEEF))
1147                 copy_pattern(UMEM_UNINITIALIZED_PATTERN, slab, slabsize);
1148
1149         if (cache_flags & UMF_HASH) {
1150                 if ((sp = _umem_cache_alloc(umem_slab_cache, umflag)) == NULL)
1151                         goto slab_alloc_failure;
1152                 chunks = (slabsize - color) / chunksize;
1153         } else {
1154                 sp = UMEM_SLAB(cp, slab);
1155                 chunks = (slabsize - sizeof (umem_slab_t) - color) / chunksize;
1156         }
1157
1158         sp->slab_cache  = cp;
1159         sp->slab_head   = NULL;
1160         sp->slab_refcnt = 0;
1161         sp->slab_base   = buf = slab + color;
1162         sp->slab_chunks = chunks;
1163
1164         ASSERT(chunks > 0);
1165         while (chunks-- != 0) {
1166                 if (cache_flags & UMF_HASH) {
1167                         bcp = _umem_cache_alloc(cp->cache_bufctl_cache, umflag);
1168                         if (bcp == NULL)
1169                                 goto bufctl_alloc_failure;
1170                         if (cache_flags & UMF_AUDIT) {
1171                                 umem_bufctl_audit_t *bcap =
1172                                     (umem_bufctl_audit_t *)bcp;
1173                                 bzero(bcap, UMEM_BUFCTL_AUDIT_SIZE);
1174                                 bcap->bc_cache = cp;
1175                         }
1176                         bcp->bc_addr = buf;
1177                         bcp->bc_slab = sp;
1178                 } else {
1179                         bcp = UMEM_BUFCTL(cp, buf);
1180                 }
1181                 if (cache_flags & UMF_BUFTAG) {
1182                         umem_buftag_t *btp = UMEM_BUFTAG(cp, buf);
1183                         btp->bt_redzone = UMEM_REDZONE_PATTERN;
1184                         btp->bt_bufctl = bcp;
1185                         btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_FREE;
1186                         if (cache_flags & UMF_DEADBEEF) {
1187                                 copy_pattern(UMEM_FREE_PATTERN, buf,
1188                                     cp->cache_verify);
1189                         }
1190                 }
1191                 bcp->bc_next = sp->slab_head;
1192                 sp->slab_head = bcp;
1193                 buf += chunksize;
1194         }
1195
1196         umem_log_event(umem_slab_log, cp, sp, slab);
1197
1198         return (sp);
1199
1200 bufctl_alloc_failure:
1201
1202         while ((bcp = sp->slab_head) != NULL) {
1203                 sp->slab_head = bcp->bc_next;
1204                 _umem_cache_free(cp->cache_bufctl_cache, bcp);
1205         }
1206         _umem_cache_free(umem_slab_cache, sp);
1207
1208 slab_alloc_failure:
1209
1210         vmem_free(vmp, slab, slabsize);
1211
1212 vmem_alloc_failure:
1213
1214         umem_log_event(umem_failure_log, cp, NULL, NULL);
1215         atomic_add_64(&cp->cache_alloc_fail, 1);
1216
1217         return (NULL);
1218 }
1219
1220 /*
1221  * Destroy a slab.
1222  */
1223 static void
1224 umem_slab_destroy(umem_cache_t *cp, umem_slab_t *sp)
1225 {
1226         vmem_t *vmp = cp->cache_arena;
1227         void *slab = (void *)P2ALIGN((uintptr_t)sp->slab_base, vmp->vm_quantum);
1228
1229         if (cp->cache_flags & UMF_HASH) {
1230                 umem_bufctl_t *bcp;
1231                 while ((bcp = sp->slab_head) != NULL) {
1232                         sp->slab_head = bcp->bc_next;
1233                         _umem_cache_free(cp->cache_bufctl_cache, bcp);
1234                 }
1235                 _umem_cache_free(umem_slab_cache, sp);
1236         }
1237         vmem_free(vmp, slab, cp->cache_slabsize);
1238 }
1239
1240 /*
1241  * Allocate a raw (unconstructed) buffer from cp's slab layer.
1242  */
1243 static void *
1244 umem_slab_alloc(umem_cache_t *cp, int umflag)
1245 {
1246         umem_bufctl_t *bcp, **hash_bucket;
1247         umem_slab_t *sp;
1248         void *buf;
1249
1250         (void) mutex_lock(&cp->cache_lock);
1251         cp->cache_slab_alloc++;
1252         sp = cp->cache_freelist;
1253         ASSERT(sp->slab_cache == cp);
1254         if (sp->slab_head == NULL) {
1255                 /*
1256                  * The freelist is empty.  Create a new slab.
1257                  */
1258                 (void) mutex_unlock(&cp->cache_lock);
1259                 if (cp == &umem_null_cache)
1260                         return (NULL);
1261                 if ((sp = umem_slab_create(cp, umflag)) == NULL)
1262                         return (NULL);
1263                 (void) mutex_lock(&cp->cache_lock);
1264                 cp->cache_slab_create++;
1265                 if ((cp->cache_buftotal += sp->slab_chunks) > cp->cache_bufmax)
1266                         cp->cache_bufmax = cp->cache_buftotal;
1267                 sp->slab_next = cp->cache_freelist;
1268                 sp->slab_prev = cp->cache_freelist->slab_prev;
1269                 sp->slab_next->slab_prev = sp;
1270                 sp->slab_prev->slab_next = sp;
1271                 cp->cache_freelist = sp;
1272         }
1273
1274         sp->slab_refcnt++;
1275         ASSERT(sp->slab_refcnt <= sp->slab_chunks);
1276
1277         /*
1278          * If we're taking the last buffer in the slab,
1279          * remove the slab from the cache's freelist.
1280          */
1281         bcp = sp->slab_head;
1282         if ((sp->slab_head = bcp->bc_next) == NULL) {
1283                 cp->cache_freelist = sp->slab_next;
1284                 ASSERT(sp->slab_refcnt == sp->slab_chunks);
1285         }
1286
1287         if (cp->cache_flags & UMF_HASH) {
1288                 /*
1289                  * Add buffer to allocated-address hash table.
1290                  */
1291                 buf = bcp->bc_addr;
1292                 hash_bucket = UMEM_HASH(cp, buf);
1293                 bcp->bc_next = *hash_bucket;
1294                 *hash_bucket = bcp;
1295                 if ((cp->cache_flags & (UMF_AUDIT | UMF_BUFTAG)) == UMF_AUDIT) {
1296                         UMEM_AUDIT(umem_transaction_log, cp, bcp);
1297                 }
1298         } else {
1299                 buf = UMEM_BUF(cp, bcp);
1300         }
1301
1302         ASSERT(UMEM_SLAB_MEMBER(sp, buf));
1303
1304         (void) mutex_unlock(&cp->cache_lock);
1305
1306         return (buf);
1307 }
1308
1309 /*
1310  * Free a raw (unconstructed) buffer to cp's slab layer.
1311  */
1312 static void
1313 umem_slab_free(umem_cache_t *cp, void *buf)
1314 {
1315         umem_slab_t *sp;
1316         umem_bufctl_t *bcp, **prev_bcpp;
1317
1318         ASSERT(buf != NULL);
1319
1320         (void) mutex_lock(&cp->cache_lock);
1321         cp->cache_slab_free++;
1322
1323         if (cp->cache_flags & UMF_HASH) {
1324                 /*
1325                  * Look up buffer in allocated-address hash table.
1326                  */
1327                 prev_bcpp = UMEM_HASH(cp, buf);
1328                 while ((bcp = *prev_bcpp) != NULL) {
1329                         if (bcp->bc_addr == buf) {
1330                                 *prev_bcpp = bcp->bc_next;
1331                                 sp = bcp->bc_slab;
1332                                 break;
1333                         }
1334                         cp->cache_lookup_depth++;
1335                         prev_bcpp = &bcp->bc_next;
1336                 }
1337         } else {
1338                 bcp = UMEM_BUFCTL(cp, buf);
1339                 sp = UMEM_SLAB(cp, buf);
1340         }
1341
1342         if (bcp == NULL || sp->slab_cache != cp || !UMEM_SLAB_MEMBER(sp, buf)) {
1343                 (void) mutex_unlock(&cp->cache_lock);
1344                 umem_error(UMERR_BADADDR, cp, buf);
1345                 return;
1346         }
1347
1348         if ((cp->cache_flags & (UMF_AUDIT | UMF_BUFTAG)) == UMF_AUDIT) {
1349                 if (cp->cache_flags & UMF_CONTENTS)
1350                         ((umem_bufctl_audit_t *)bcp)->bc_contents =
1351                             umem_log_enter(umem_content_log, buf,
1352                             cp->cache_contents);
1353                 UMEM_AUDIT(umem_transaction_log, cp, bcp);
1354         }
1355
1356         /*
1357          * If this slab isn't currently on the freelist, put it there.
1358          */
1359         if (sp->slab_head == NULL) {
1360                 ASSERT(sp->slab_refcnt == sp->slab_chunks);
1361                 ASSERT(cp->cache_freelist != sp);
1362                 sp->slab_next->slab_prev = sp->slab_prev;
1363                 sp->slab_prev->slab_next = sp->slab_next;
1364                 sp->slab_next = cp->cache_freelist;
1365                 sp->slab_prev = cp->cache_freelist->slab_prev;
1366                 sp->slab_next->slab_prev = sp;
1367                 sp->slab_prev->slab_next = sp;
1368                 cp->cache_freelist = sp;
1369         }
1370
1371         bcp->bc_next = sp->slab_head;
1372         sp->slab_head = bcp;
1373
1374         ASSERT(sp->slab_refcnt >= 1);
1375         if (--sp->slab_refcnt == 0) {
1376                 /*
1377                  * There are no outstanding allocations from this slab,
1378                  * so we can reclaim the memory.
1379                  */
1380                 sp->slab_next->slab_prev = sp->slab_prev;
1381                 sp->slab_prev->slab_next = sp->slab_next;
1382                 if (sp == cp->cache_freelist)
1383                         cp->cache_freelist = sp->slab_next;
1384                 cp->cache_slab_destroy++;
1385                 cp->cache_buftotal -= sp->slab_chunks;
1386                 (void) mutex_unlock(&cp->cache_lock);
1387                 umem_slab_destroy(cp, sp);
1388                 return;
1389         }
1390         (void) mutex_unlock(&cp->cache_lock);
1391 }
1392
1393 static int
1394 umem_cache_alloc_debug(umem_cache_t *cp, void *buf, int umflag)
1395 {
1396         umem_buftag_t *btp = UMEM_BUFTAG(cp, buf);
1397         umem_bufctl_audit_t *bcp = (umem_bufctl_audit_t *)btp->bt_bufctl;
1398         uint32_t mtbf;
1399         int flags_nfatal;
1400
1401         if (btp->bt_bxstat != ((intptr_t)bcp ^ UMEM_BUFTAG_FREE)) {
1402                 umem_error(UMERR_BADBUFTAG, cp, buf);
1403                 return (-1);
1404         }
1405
1406         btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_ALLOC;
1407
1408         if ((cp->cache_flags & UMF_HASH) && bcp->bc_addr != buf) {
1409                 umem_error(UMERR_BADBUFCTL, cp, buf);
1410                 return (-1);
1411         }
1412
1413         btp->bt_redzone = UMEM_REDZONE_PATTERN;
1414
1415         if (cp->cache_flags & UMF_DEADBEEF) {
1416                 if (verify_and_copy_pattern(UMEM_FREE_PATTERN,
1417                     UMEM_UNINITIALIZED_PATTERN, buf, cp->cache_verify)) {
1418                         umem_error(UMERR_MODIFIED, cp, buf);
1419                         return (-1);
1420                 }
1421         }
1422
1423         if ((mtbf = umem_mtbf | cp->cache_mtbf) != 0 &&
1424             gethrtime() % mtbf == 0 &&
1425             (umflag & (UMEM_FATAL_FLAGS)) == 0) {
1426                 umem_log_event(umem_failure_log, cp, NULL, NULL);
1427         } else {
1428                 mtbf = 0;
1429         }
1430
1431         /*
1432          * We do not pass fatal flags on to the constructor.  This prevents
1433          * leaking buffers in the event of a subordinate constructor failing.
1434          */
1435         flags_nfatal = UMEM_DEFAULT;
1436         if (mtbf || (cp->cache_constructor != NULL &&
1437             cp->cache_constructor(buf, cp->cache_private, flags_nfatal) != 0)) {
1438                 atomic_add_64(&cp->cache_alloc_fail, 1);
1439                 btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_FREE;
1440                 copy_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify);
1441                 umem_slab_free(cp, buf);
1442                 return (-1);
1443         }
1444
1445         if (cp->cache_flags & UMF_AUDIT) {
1446                 UMEM_AUDIT(umem_transaction_log, cp, bcp);
1447         }
1448
1449         return (0);
1450 }
1451
1452 static int
1453 umem_cache_free_debug(umem_cache_t *cp, void *buf)
1454 {
1455         umem_buftag_t *btp = UMEM_BUFTAG(cp, buf);
1456         umem_bufctl_audit_t *bcp = (umem_bufctl_audit_t *)btp->bt_bufctl;
1457         umem_slab_t *sp;
1458
1459         if (btp->bt_bxstat != ((intptr_t)bcp ^ UMEM_BUFTAG_ALLOC)) {
1460                 if (btp->bt_bxstat == ((intptr_t)bcp ^ UMEM_BUFTAG_FREE)) {
1461                         umem_error(UMERR_DUPFREE, cp, buf);
1462                         return (-1);
1463                 }
1464                 sp = umem_findslab(cp, buf);
1465                 if (sp == NULL || sp->slab_cache != cp)
1466                         umem_error(UMERR_BADADDR, cp, buf);
1467                 else
1468                         umem_error(UMERR_REDZONE, cp, buf);
1469                 return (-1);
1470         }
1471
1472         btp->bt_bxstat = (intptr_t)bcp ^ UMEM_BUFTAG_FREE;
1473
1474         if ((cp->cache_flags & UMF_HASH) && bcp->bc_addr != buf) {
1475                 umem_error(UMERR_BADBUFCTL, cp, buf);
1476                 return (-1);
1477         }
1478
1479         if (btp->bt_redzone != UMEM_REDZONE_PATTERN) {
1480                 umem_error(UMERR_REDZONE, cp, buf);
1481                 return (-1);
1482         }
1483
1484         if (cp->cache_flags & UMF_AUDIT) {
1485                 if (cp->cache_flags & UMF_CONTENTS)
1486                         bcp->bc_contents = umem_log_enter(umem_content_log,
1487                             buf, cp->cache_contents);
1488                 UMEM_AUDIT(umem_transaction_log, cp, bcp);
1489         }
1490
1491         if (cp->cache_destructor != NULL)
1492                 cp->cache_destructor(buf, cp->cache_private);
1493
1494         if (cp->cache_flags & UMF_DEADBEEF)
1495                 copy_pattern(UMEM_FREE_PATTERN, buf, cp->cache_verify);
1496
1497         return (0);
1498 }
1499
1500 /*
1501  * Free each object in magazine mp to cp's slab layer, and free mp itself.
1502  */
1503 static void
1504 umem_magazine_destroy(umem_cache_t *cp, umem_magazine_t *mp, int nrounds)
1505 {
1506         int round;
1507
1508         ASSERT(cp->cache_next == NULL || IN_UPDATE());
1509
1510         for (round = 0; round < nrounds; round++) {
1511                 void *buf = mp->mag_round[round];
1512
1513                 if ((cp->cache_flags & UMF_DEADBEEF) &&
1514                     verify_pattern(UMEM_FREE_PATTERN, buf,
1515                     cp->cache_verify) != NULL) {
1516                         umem_error(UMERR_MODIFIED, cp, buf);
1517                         continue;
1518                 }
1519
1520                 if (!(cp->cache_flags & UMF_BUFTAG) &&
1521                     cp->cache_destructor != NULL)
1522                         cp->cache_destructor(buf, cp->cache_private);
1523
1524                 umem_slab_free(cp, buf);
1525         }
1526         ASSERT(UMEM_MAGAZINE_VALID(cp, mp));
1527         _umem_cache_free(cp->cache_magtype->mt_cache, mp);
1528 }
1529
1530 /*
1531  * Allocate a magazine from the depot.
1532  */
1533 static umem_magazine_t *
1534 umem_depot_alloc(umem_cache_t *cp, umem_maglist_t *mlp)
1535 {
1536         umem_magazine_t *mp;
1537
1538         /*
1539          * If we can't get the depot lock without contention,
1540          * update our contention count.  We use the depot
1541          * contention rate to determine whether we need to
1542          * increase the magazine size for better scalability.
1543          */
1544         if (mutex_trylock(&cp->cache_depot_lock) != 0) {
1545                 (void) mutex_lock(&cp->cache_depot_lock);
1546                 cp->cache_depot_contention++;
1547         }
1548
1549         if ((mp = mlp->ml_list) != NULL) {
1550                 ASSERT(UMEM_MAGAZINE_VALID(cp, mp));
1551                 mlp->ml_list = mp->mag_next;
1552                 if (--mlp->ml_total < mlp->ml_min)
1553                         mlp->ml_min = mlp->ml_total;
1554                 mlp->ml_alloc++;
1555         }
1556
1557         (void) mutex_unlock(&cp->cache_depot_lock);
1558
1559         return (mp);
1560 }
1561
1562 /*
1563  * Free a magazine to the depot.
1564  */
1565 static void
1566 umem_depot_free(umem_cache_t *cp, umem_maglist_t *mlp, umem_magazine_t *mp)
1567 {
1568         (void) mutex_lock(&cp->cache_depot_lock);
1569         ASSERT(UMEM_MAGAZINE_VALID(cp, mp));
1570         mp->mag_next = mlp->ml_list;
1571         mlp->ml_list = mp;
1572         mlp->ml_total++;
1573         (void) mutex_unlock(&cp->cache_depot_lock);
1574 }
1575
1576 /*
1577  * Update the working set statistics for cp's depot.
1578  */
1579 static void
1580 umem_depot_ws_update(umem_cache_t *cp)
1581 {
1582         (void) mutex_lock(&cp->cache_depot_lock);
1583         cp->cache_full.ml_reaplimit = cp->cache_full.ml_min;
1584         cp->cache_full.ml_min = cp->cache_full.ml_total;
1585         cp->cache_empty.ml_reaplimit = cp->cache_empty.ml_min;
1586         cp->cache_empty.ml_min = cp->cache_empty.ml_total;
1587         (void) mutex_unlock(&cp->cache_depot_lock);
1588 }
1589
1590 /*
1591  * Reap all magazines that have fallen out of the depot's working set.
1592  */
1593 static void
1594 umem_depot_ws_reap(umem_cache_t *cp)
1595 {
1596         long reap;
1597         umem_magazine_t *mp;
1598
1599         ASSERT(cp->cache_next == NULL || IN_REAP());
1600
1601         reap = MIN(cp->cache_full.ml_reaplimit, cp->cache_full.ml_min);
1602         while (reap-- && (mp = umem_depot_alloc(cp, &cp->cache_full)) != NULL)
1603                 umem_magazine_destroy(cp, mp, cp->cache_magtype->mt_magsize);
1604
1605         reap = MIN(cp->cache_empty.ml_reaplimit, cp->cache_empty.ml_min);
1606         while (reap-- && (mp = umem_depot_alloc(cp, &cp->cache_empty)) != NULL)
1607                 umem_magazine_destroy(cp, mp, 0);
1608 }
1609
1610 static void
1611 umem_cpu_reload(umem_cpu_cache_t *ccp, umem_magazine_t *mp, int rounds)
1612 {
1613         ASSERT((ccp->cc_loaded == NULL && ccp->cc_rounds == -1) ||
1614             (ccp->cc_loaded && ccp->cc_rounds + rounds == ccp->cc_magsize));
1615         ASSERT(ccp->cc_magsize > 0);
1616
1617         ccp->cc_ploaded = ccp->cc_loaded;
1618         ccp->cc_prounds = ccp->cc_rounds;
1619         ccp->cc_loaded = mp;
1620         ccp->cc_rounds = rounds;
1621 }
1622
1623 /*
1624  * Allocate a constructed object from cache cp.
1625  */
1626 #ifndef NO_WEAK_SYMBOLS
1627 #pragma weak umem_cache_alloc = _umem_cache_alloc
1628 #endif
1629 void *
1630 _umem_cache_alloc(umem_cache_t *cp, int umflag)
1631 {
1632         umem_cpu_cache_t *ccp;
1633         umem_magazine_t *fmp;
1634         void *buf;
1635         int flags_nfatal;
1636
1637 retry:
1638         ccp = UMEM_CPU_CACHE(cp, CPU(cp->cache_cpu_mask));
1639         (void) mutex_lock(&ccp->cc_lock);
1640         for (;;) {
1641                 /*
1642                  * If there's an object available in the current CPU's
1643                  * loaded magazine, just take it and return.
1644                  */
1645                 if (ccp->cc_rounds > 0) {
1646                         buf = ccp->cc_loaded->mag_round[--ccp->cc_rounds];
1647                         ccp->cc_alloc++;
1648                         (void) mutex_unlock(&ccp->cc_lock);
1649                         if ((ccp->cc_flags & UMF_BUFTAG) &&
1650                             umem_cache_alloc_debug(cp, buf, umflag) == -1) {
1651                                 if (umem_alloc_retry(cp, umflag)) {
1652                                         goto retry;
1653                                 }
1654
1655                                 return (NULL);
1656                         }
1657                         return (buf);
1658                 }
1659
1660                 /*
1661                  * The loaded magazine is empty.  If the previously loaded
1662                  * magazine was full, exchange them and try again.
1663                  */
1664                 if (ccp->cc_prounds > 0) {
1665                         umem_cpu_reload(ccp, ccp->cc_ploaded, ccp->cc_prounds);
1666                         continue;
1667                 }
1668
1669                 /*
1670                  * If the magazine layer is disabled, break out now.
1671                  */
1672                 if (ccp->cc_magsize == 0)
1673                         break;
1674
1675                 /*
1676                  * Try to get a full magazine from the depot.
1677                  */
1678                 fmp = umem_depot_alloc(cp, &cp->cache_full);
1679                 if (fmp != NULL) {
1680                         if (ccp->cc_ploaded != NULL)
1681                                 umem_depot_free(cp, &cp->cache_empty,
1682                                     ccp->cc_ploaded);
1683                         umem_cpu_reload(ccp, fmp, ccp->cc_magsize);
1684                         continue;
1685                 }
1686
1687                 /*
1688                  * There are no full magazines in the depot,
1689                  * so fall through to the slab layer.
1690                  */
1691                 break;
1692         }
1693         (void) mutex_unlock(&ccp->cc_lock);
1694
1695         /*
1696          * We couldn't allocate a constructed object from the magazine layer,
1697          * so get a raw buffer from the slab layer and apply its constructor.
1698          */
1699         buf = umem_slab_alloc(cp, umflag);
1700
1701         if (buf == NULL) {
1702                 if (cp == &umem_null_cache)
1703                         return (NULL);
1704                 if (umem_alloc_retry(cp, umflag)) {
1705                         goto retry;
1706                 }
1707
1708                 return (NULL);
1709         }
1710
1711         if (cp->cache_flags & UMF_BUFTAG) {
1712                 /*
1713                  * Let umem_cache_alloc_debug() apply the constructor for us.
1714                  */
1715                 if (umem_cache_alloc_debug(cp, buf, umflag) == -1) {
1716                         if (umem_alloc_retry(cp, umflag)) {
1717                                 goto retry;
1718                         }
1719                         return (NULL);
1720                 }
1721                 return (buf);
1722         }
1723
1724         /*
1725          * We do not pass fatal flags on to the constructor.  This prevents
1726          * leaking buffers in the event of a subordinate constructor failing.
1727          */
1728         flags_nfatal = UMEM_DEFAULT;
1729         if (cp->cache_constructor != NULL &&
1730             cp->cache_constructor(buf, cp->cache_private, flags_nfatal) != 0) {
1731                 atomic_add_64(&cp->cache_alloc_fail, 1);
1732                 umem_slab_free(cp, buf);
1733
1734                 if (umem_alloc_retry(cp, umflag)) {
1735                         goto retry;
1736                 }
1737                 return (NULL);
1738         }
1739
1740         return (buf);
1741 }
1742
1743 /*
1744  * Free a constructed object to cache cp.
1745  */
1746 #ifndef NO_WEAK_SYMBOLS
1747 #pragma weak umem_cache_free = _umem_cache_free
1748 #endif
1749 void
1750 _umem_cache_free(umem_cache_t *cp, void *buf)
1751 {
1752         umem_cpu_cache_t *ccp = UMEM_CPU_CACHE(cp, CPU(cp->cache_cpu_mask));
1753         umem_magazine_t *emp;
1754         umem_magtype_t *mtp;
1755
1756         if (ccp->cc_flags & UMF_BUFTAG)
1757                 if (umem_cache_free_debug(cp, buf) == -1)
1758                         return;
1759
1760         (void) mutex_lock(&ccp->cc_lock);
1761         for (;;) {
1762                 /*
1763                  * If there's a slot available in the current CPU's
1764                  * loaded magazine, just put the object there and return.
1765                  */
1766                 if ((uint_t)ccp->cc_rounds < ccp->cc_magsize) {
1767                         ccp->cc_loaded->mag_round[ccp->cc_rounds++] = buf;
1768                         ccp->cc_free++;
1769                         (void) mutex_unlock(&ccp->cc_lock);
1770                         return;
1771                 }
1772
1773                 /*
1774                  * The loaded magazine is full.  If the previously loaded
1775                  * magazine was empty, exchange them and try again.
1776                  */
1777                 if (ccp->cc_prounds == 0) {
1778                         umem_cpu_reload(ccp, ccp->cc_ploaded, ccp->cc_prounds);
1779                         continue;
1780                 }
1781
1782                 /*
1783                  * If the magazine layer is disabled, break out now.
1784                  */
1785                 if (ccp->cc_magsize == 0)
1786                         break;
1787
1788                 /*
1789                  * Try to get an empty magazine from the depot.
1790                  */
1791                 emp = umem_depot_alloc(cp, &cp->cache_empty);
1792                 if (emp != NULL) {
1793                         if (ccp->cc_ploaded != NULL)
1794                                 umem_depot_free(cp, &cp->cache_full,
1795                                     ccp->cc_ploaded);
1796                         umem_cpu_reload(ccp, emp, 0);
1797                         continue;
1798                 }
1799
1800                 /*
1801                  * There are no empty magazines in the depot,
1802                  * so try to allocate a new one.  We must drop all locks
1803                  * across umem_cache_alloc() because lower layers may
1804                  * attempt to allocate from this cache.
1805                  */
1806                 mtp = cp->cache_magtype;
1807                 (void) mutex_unlock(&ccp->cc_lock);
1808                 emp = _umem_cache_alloc(mtp->mt_cache, UMEM_DEFAULT);
1809                 (void) mutex_lock(&ccp->cc_lock);
1810
1811                 if (emp != NULL) {
1812                         /*
1813                          * We successfully allocated an empty magazine.
1814                          * However, we had to drop ccp->cc_lock to do it,
1815                          * so the cache's magazine size may have changed.
1816                          * If so, free the magazine and try again.
1817                          */
1818                         if (ccp->cc_magsize != mtp->mt_magsize) {
1819                                 (void) mutex_unlock(&ccp->cc_lock);
1820                                 _umem_cache_free(mtp->mt_cache, emp);
1821                                 (void) mutex_lock(&ccp->cc_lock);
1822                                 continue;
1823                         }
1824
1825                         /*
1826                          * We got a magazine of the right size.  Add it to
1827                          * the depot and try the whole dance again.
1828                          */
1829                         umem_depot_free(cp, &cp->cache_empty, emp);
1830                         continue;
1831                 }
1832
1833                 /*
1834                  * We couldn't allocate an empty magazine,
1835                  * so fall through to the slab layer.
1836                  */
1837                 break;
1838         }
1839         (void) mutex_unlock(&ccp->cc_lock);
1840
1841         /*
1842          * We couldn't free our constructed object to the magazine layer,
1843          * so apply its destructor and free it to the slab layer.
1844          * Note that if UMF_BUFTAG is in effect, umem_cache_free_debug()
1845          * will have already applied the destructor.
1846          */
1847         if (!(cp->cache_flags & UMF_BUFTAG) && cp->cache_destructor != NULL)
1848                 cp->cache_destructor(buf, cp->cache_private);
1849
1850         umem_slab_free(cp, buf);
1851 }
1852
1853 #ifndef NO_WEAK_SYMBOLS
1854 #pragma weak umem_zalloc = _umem_zalloc
1855 #endif
1856 void *
1857 _umem_zalloc(size_t size, int umflag)
1858 {
1859         size_t index = (size - 1) >> UMEM_ALIGN_SHIFT;
1860         void *buf;
1861
1862 retry:
1863         if (index < UMEM_MAXBUF >> UMEM_ALIGN_SHIFT) {
1864                 umem_cache_t *cp = umem_alloc_table[index];
1865                 buf = _umem_cache_alloc(cp, umflag);
1866                 if (buf != NULL) {
1867                         if (cp->cache_flags & UMF_BUFTAG) {
1868                                 umem_buftag_t *btp = UMEM_BUFTAG(cp, buf);
1869                                 ((uint8_t *)buf)[size] = UMEM_REDZONE_BYTE;
1870                                 ((uint32_t *)btp)[1] = UMEM_SIZE_ENCODE(size);
1871                         }
1872                         bzero(buf, size);
1873                 } else if (umem_alloc_retry(cp, umflag))
1874                         goto retry;
1875         } else {
1876                 buf = _umem_alloc(size, umflag);        /* handles failure */
1877                 if (buf != NULL)
1878                         bzero(buf, size);
1879         }
1880         return (buf);
1881 }
1882
1883 #ifndef NO_WEAK_SYMBOLS
1884 #pragma weak umem_alloc = _umem_alloc
1885 #endif
1886 void *
1887 _umem_alloc(size_t size, int umflag)
1888 {
1889         size_t index = (size - 1) >> UMEM_ALIGN_SHIFT;
1890         void *buf;
1891 umem_alloc_retry:
1892         if (index < UMEM_MAXBUF >> UMEM_ALIGN_SHIFT) {
1893                 umem_cache_t *cp = umem_alloc_table[index];
1894                 buf = _umem_cache_alloc(cp, umflag);
1895                 if ((cp->cache_flags & UMF_BUFTAG) && buf != NULL) {
1896                         umem_buftag_t *btp = UMEM_BUFTAG(cp, buf);
1897                         ((uint8_t *)buf)[size] = UMEM_REDZONE_BYTE;
1898                         ((uint32_t *)btp)[1] = UMEM_SIZE_ENCODE(size);
1899                 }
1900                 if (buf == NULL && umem_alloc_retry(cp, umflag))
1901                         goto umem_alloc_retry;
1902                 return (buf);
1903         }
1904         if (size == 0)
1905                 return (NULL);
1906         if (umem_oversize_arena == NULL) {
1907                 if (umem_init())
1908                         ASSERT(umem_oversize_arena != NULL);
1909                 else
1910                         return (NULL);
1911         }
1912         buf = vmem_alloc(umem_oversize_arena, size, UMEM_VMFLAGS(umflag));
1913         if (buf == NULL) {
1914                 umem_log_event(umem_failure_log, NULL, NULL, (void *)size);
1915                 if (umem_alloc_retry(NULL, umflag))
1916                         goto umem_alloc_retry;
1917         }
1918         return (buf);
1919 }
1920
1921 #ifndef NO_WEAK_SYMBOLS
1922 #pragma weak umem_alloc_align = _umem_alloc_align
1923 #endif
1924 void *
1925 _umem_alloc_align(size_t size, size_t align, int umflag)
1926 {
1927         void *buf;
1928
1929         if (size == 0)
1930                 return (NULL);
1931         if ((align & (align - 1)) != 0)
1932                 return (NULL);
1933         if (align < UMEM_ALIGN)
1934                 align = UMEM_ALIGN;
1935
1936 umem_alloc_align_retry:
1937         if (umem_memalign_arena == NULL) {
1938                 if (umem_init())
1939                         ASSERT(umem_oversize_arena != NULL);
1940                 else
1941                         return (NULL);
1942         }
1943         buf = vmem_xalloc(umem_memalign_arena, size, align, 0, 0, NULL, NULL,
1944             UMEM_VMFLAGS(umflag));
1945         if (buf == NULL) {
1946                 umem_log_event(umem_failure_log, NULL, NULL, (void *)size);
1947                 if (umem_alloc_retry(NULL, umflag))
1948                         goto umem_alloc_align_retry;
1949         }
1950         return (buf);
1951 }
1952
1953 #ifndef NO_WEAK_SYMBOLS
1954 #pragma weak umem_free = _umem_free
1955 #endif
1956 void
1957 _umem_free(void *buf, size_t size)
1958 {
1959         size_t index = (size - 1) >> UMEM_ALIGN_SHIFT;
1960
1961         if (index < UMEM_MAXBUF >> UMEM_ALIGN_SHIFT) {
1962                 umem_cache_t *cp = umem_alloc_table[index];
1963                 if (cp->cache_flags & UMF_BUFTAG) {
1964                         umem_buftag_t *btp = UMEM_BUFTAG(cp, buf);
1965                         uint32_t *ip = (uint32_t *)btp;
1966                         if (ip[1] != UMEM_SIZE_ENCODE(size)) {
1967                                 if (*(uint64_t *)buf == UMEM_FREE_PATTERN) {
1968                                         umem_error(UMERR_DUPFREE, cp, buf);
1969                                         return;
1970                                 }
1971                                 if (UMEM_SIZE_VALID(ip[1])) {
1972                                         ip[0] = UMEM_SIZE_ENCODE(size);
1973                                         umem_error(UMERR_BADSIZE, cp, buf);
1974                                 } else {
1975                                         umem_error(UMERR_REDZONE, cp, buf);
1976                                 }
1977                                 return;
1978                         }
1979                         if (((uint8_t *)buf)[size] != UMEM_REDZONE_BYTE) {
1980                                 umem_error(UMERR_REDZONE, cp, buf);
1981                                 return;
1982                         }
1983                         btp->bt_redzone = UMEM_REDZONE_PATTERN;
1984                 }
1985                 _umem_cache_free(cp, buf);
1986         } else {
1987                 if (buf == NULL && size == 0)
1988                         return;
1989                 vmem_free(umem_oversize_arena, buf, size);
1990         }
1991 }
1992
1993 #ifndef NO_WEAK_SYMBOLS
1994 #pragma weak umem_free_align = _umem_free_align
1995 #endif
1996 void
1997 _umem_free_align(void *buf, size_t size)
1998 {
1999         if (buf == NULL && size == 0)
2000                 return;
2001         vmem_xfree(umem_memalign_arena, buf, size);
2002 }
2003
2004 static void *
2005 umem_firewall_va_alloc(vmem_t *vmp, size_t size, int vmflag)
2006 {
2007         size_t realsize = size + vmp->vm_quantum;
2008
2009         /*
2010          * Annoying edge case: if 'size' is just shy of ULONG_MAX, adding
2011          * vm_quantum will cause integer wraparound.  Check for this, and
2012          * blow off the firewall page in this case.  Note that such a
2013          * giant allocation (the entire address space) can never be
2014          * satisfied, so it will either fail immediately (VM_NOSLEEP)
2015          * or sleep forever (VM_SLEEP).  Thus, there is no need for a
2016          * corresponding check in umem_firewall_va_free().
2017          */
2018         if (realsize < size)
2019                 realsize = size;
2020
2021         return (vmem_alloc(vmp, realsize, vmflag | VM_NEXTFIT));
2022 }
2023
2024 static void
2025 umem_firewall_va_free(vmem_t *vmp, void *addr, size_t size)
2026 {
2027         vmem_free(vmp, addr, size + vmp->vm_quantum);
2028 }
2029
2030 /*
2031  * Reclaim all unused memory from a cache.
2032  */
2033 static void
2034 umem_cache_reap(umem_cache_t *cp)
2035 {
2036         /*
2037          * Ask the cache's owner to free some memory if possible.
2038          * The idea is to handle things like the inode cache, which
2039          * typically sits on a bunch of memory that it doesn't truly
2040          * *need*.  Reclaim policy is entirely up to the owner; this
2041          * callback is just an advisory plea for help.
2042          */
2043         if (cp->cache_reclaim != NULL)
2044                 cp->cache_reclaim(cp->cache_private);
2045
2046         umem_depot_ws_reap(cp);
2047 }
2048
2049 /*
2050  * Purge all magazines from a cache and set its magazine limit to zero.
2051  * All calls are serialized by being done by the update thread, except for
2052  * the final call from umem_cache_destroy().
2053  */
2054 static void
2055 umem_cache_magazine_purge(umem_cache_t *cp)
2056 {
2057         umem_cpu_cache_t *ccp;
2058         umem_magazine_t *mp, *pmp;
2059         int rounds, prounds, cpu_seqid;
2060
2061         ASSERT(cp->cache_next == NULL || IN_UPDATE());
2062
2063         for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) {
2064                 ccp = &cp->cache_cpu[cpu_seqid];
2065
2066                 (void) mutex_lock(&ccp->cc_lock);
2067                 mp = ccp->cc_loaded;
2068                 pmp = ccp->cc_ploaded;
2069                 rounds = ccp->cc_rounds;
2070                 prounds = ccp->cc_prounds;
2071                 ccp->cc_loaded = NULL;
2072                 ccp->cc_ploaded = NULL;
2073                 ccp->cc_rounds = -1;
2074                 ccp->cc_prounds = -1;
2075                 ccp->cc_magsize = 0;
2076                 (void) mutex_unlock(&ccp->cc_lock);
2077
2078                 if (mp)
2079                         umem_magazine_destroy(cp, mp, rounds);
2080                 if (pmp)
2081                         umem_magazine_destroy(cp, pmp, prounds);
2082         }
2083
2084         /*
2085          * Updating the working set statistics twice in a row has the
2086          * effect of setting the working set size to zero, so everything
2087          * is eligible for reaping.
2088          */
2089         umem_depot_ws_update(cp);
2090         umem_depot_ws_update(cp);
2091
2092         umem_depot_ws_reap(cp);
2093 }
2094
2095 /*
2096  * Enable per-cpu magazines on a cache.
2097  */
2098 static void
2099 umem_cache_magazine_enable(umem_cache_t *cp)
2100 {
2101         int cpu_seqid;
2102
2103         if (cp->cache_flags & UMF_NOMAGAZINE)
2104                 return;
2105
2106         for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) {
2107                 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid];
2108                 (void) mutex_lock(&ccp->cc_lock);
2109                 ccp->cc_magsize = cp->cache_magtype->mt_magsize;
2110                 (void) mutex_unlock(&ccp->cc_lock);
2111         }
2112
2113 }
2114
2115 /*
2116  * Recompute a cache's magazine size.  The trade-off is that larger magazines
2117  * provide a higher transfer rate with the depot, while smaller magazines
2118  * reduce memory consumption.  Magazine resizing is an expensive operation;
2119  * it should not be done frequently.
2120  *
2121  * Changes to the magazine size are serialized by only having one thread
2122  * doing updates. (the update thread)
2123  *
2124  * Note: at present this only grows the magazine size.  It might be useful
2125  * to allow shrinkage too.
2126  */
2127 static void
2128 umem_cache_magazine_resize(umem_cache_t *cp)
2129 {
2130         umem_magtype_t *mtp = cp->cache_magtype;
2131
2132         ASSERT(IN_UPDATE());
2133
2134         if (cp->cache_chunksize < mtp->mt_maxbuf) {
2135                 umem_cache_magazine_purge(cp);
2136                 (void) mutex_lock(&cp->cache_depot_lock);
2137                 cp->cache_magtype = ++mtp;
2138                 cp->cache_depot_contention_prev =
2139                     cp->cache_depot_contention + INT_MAX;
2140                 (void) mutex_unlock(&cp->cache_depot_lock);
2141                 umem_cache_magazine_enable(cp);
2142         }
2143 }
2144
2145 /*
2146  * Rescale a cache's hash table, so that the table size is roughly the
2147  * cache size.  We want the average lookup time to be extremely small.
2148  */
2149 static void
2150 umem_hash_rescale(umem_cache_t *cp)
2151 {
2152         umem_bufctl_t **old_table, **new_table, *bcp;
2153         size_t old_size, new_size, h;
2154
2155         ASSERT(IN_UPDATE());
2156
2157         new_size = MAX(UMEM_HASH_INITIAL,
2158             1 << (highbit(3 * cp->cache_buftotal + 4) - 2));
2159         old_size = cp->cache_hash_mask + 1;
2160
2161         if ((old_size >> 1) <= new_size && new_size <= (old_size << 1))
2162                 return;
2163
2164         new_table = vmem_alloc(umem_hash_arena, new_size * sizeof (void *),
2165             VM_NOSLEEP);
2166         if (new_table == NULL)
2167                 return;
2168         bzero(new_table, new_size * sizeof (void *));
2169
2170         (void) mutex_lock(&cp->cache_lock);
2171
2172         old_size = cp->cache_hash_mask + 1;
2173         old_table = cp->cache_hash_table;
2174
2175         cp->cache_hash_mask = new_size - 1;
2176         cp->cache_hash_table = new_table;
2177         cp->cache_rescale++;
2178
2179         for (h = 0; h < old_size; h++) {
2180                 bcp = old_table[h];
2181                 while (bcp != NULL) {
2182                         void *addr = bcp->bc_addr;
2183                         umem_bufctl_t *next_bcp = bcp->bc_next;
2184                         umem_bufctl_t **hash_bucket = UMEM_HASH(cp, addr);
2185                         bcp->bc_next = *hash_bucket;
2186                         *hash_bucket = bcp;
2187                         bcp = next_bcp;
2188                 }
2189         }
2190
2191         (void) mutex_unlock(&cp->cache_lock);
2192
2193         vmem_free(umem_hash_arena, old_table, old_size * sizeof (void *));
2194 }
2195
2196 /*
2197  * Perform periodic maintenance on a cache: hash rescaling,
2198  * depot working-set update, and magazine resizing.
2199  */
2200 void
2201 umem_cache_update(umem_cache_t *cp)
2202 {
2203         int update_flags = 0;
2204
2205         ASSERT(MUTEX_HELD(&umem_cache_lock));
2206
2207         /*
2208          * If the cache has become much larger or smaller than its hash table,
2209          * fire off a request to rescale the hash table.
2210          */
2211         (void) mutex_lock(&cp->cache_lock);
2212
2213         if ((cp->cache_flags & UMF_HASH) &&
2214             (cp->cache_buftotal > (cp->cache_hash_mask << 1) ||
2215             (cp->cache_buftotal < (cp->cache_hash_mask >> 1) &&
2216             cp->cache_hash_mask > UMEM_HASH_INITIAL)))
2217                 update_flags |= UMU_HASH_RESCALE;
2218
2219         (void) mutex_unlock(&cp->cache_lock);
2220
2221         /*
2222          * Update the depot working set statistics.
2223          */
2224         umem_depot_ws_update(cp);
2225
2226         /*
2227          * If there's a lot of contention in the depot,
2228          * increase the magazine size.
2229          */
2230         (void) mutex_lock(&cp->cache_depot_lock);
2231
2232         if (cp->cache_chunksize < cp->cache_magtype->mt_maxbuf &&
2233             (int)(cp->cache_depot_contention -
2234             cp->cache_depot_contention_prev) > umem_depot_contention)
2235                 update_flags |= UMU_MAGAZINE_RESIZE;
2236
2237         cp->cache_depot_contention_prev = cp->cache_depot_contention;
2238
2239         (void) mutex_unlock(&cp->cache_depot_lock);
2240
2241         if (update_flags)
2242                 umem_add_update(cp, update_flags);
2243 }
2244
2245 /*
2246  * Runs all pending updates.
2247  *
2248  * The update lock must be held on entrance, and will be held on exit.
2249  */
2250 void
2251 umem_process_updates(void)
2252 {
2253         ASSERT(MUTEX_HELD(&umem_update_lock));
2254
2255         while (umem_null_cache.cache_unext != &umem_null_cache) {
2256                 int notify = 0;
2257                 umem_cache_t *cp = umem_null_cache.cache_unext;
2258
2259                 cp->cache_uprev->cache_unext = cp->cache_unext;
2260                 cp->cache_unext->cache_uprev = cp->cache_uprev;
2261                 cp->cache_uprev = cp->cache_unext = NULL;
2262
2263                 ASSERT(!(cp->cache_uflags & UMU_ACTIVE));
2264
2265                 while (cp->cache_uflags) {
2266                         int uflags = (cp->cache_uflags |= UMU_ACTIVE);
2267                         (void) mutex_unlock(&umem_update_lock);
2268
2269                         /*
2270                          * The order here is important.  Each step can speed up
2271                          * later steps.
2272                          */
2273
2274                         if (uflags & UMU_HASH_RESCALE)
2275                                 umem_hash_rescale(cp);
2276
2277                         if (uflags & UMU_MAGAZINE_RESIZE)
2278                                 umem_cache_magazine_resize(cp);
2279
2280                         if (uflags & UMU_REAP)
2281                                 umem_cache_reap(cp);
2282
2283                         (void) mutex_lock(&umem_update_lock);
2284
2285                         /*
2286                          * check if anyone has requested notification
2287                          */
2288                         if (cp->cache_uflags & UMU_NOTIFY) {
2289                                 uflags |= UMU_NOTIFY;
2290                                 notify = 1;
2291                         }
2292                         cp->cache_uflags &= ~uflags;
2293                 }
2294                 if (notify)
2295                         (void) cond_broadcast(&umem_update_cv);
2296         }
2297 }
2298
2299 #ifndef UMEM_STANDALONE
2300 static void
2301 umem_st_update(void)
2302 {
2303         ASSERT(MUTEX_HELD(&umem_update_lock));
2304         ASSERT(umem_update_thr == 0 && umem_st_update_thr == 0);
2305
2306         umem_st_update_thr = thr_self();
2307
2308         (void) mutex_unlock(&umem_update_lock);
2309
2310         vmem_update(NULL);
2311         umem_cache_applyall(umem_cache_update);
2312
2313         (void) mutex_lock(&umem_update_lock);
2314
2315         umem_process_updates(); /* does all of the requested work */
2316
2317         umem_reap_next = gethrtime() +
2318             (hrtime_t)umem_reap_interval * NANOSEC;
2319
2320         umem_reaping = UMEM_REAP_DONE;
2321
2322         umem_st_update_thr = 0;
2323 }
2324 #endif
2325
2326 /*
2327  * Reclaim all unused memory from all caches.  Called from vmem when memory
2328  * gets tight.  Must be called with no locks held.
2329  *
2330  * This just requests a reap on all caches, and notifies the update thread.
2331  */
2332 void
2333 umem_reap(void)
2334 {
2335 #ifndef UMEM_STANDALONE
2336         extern int __nthreads(void);
2337 #endif
2338
2339         if (umem_ready != UMEM_READY || umem_reaping != UMEM_REAP_DONE ||
2340             gethrtime() < umem_reap_next)
2341                 return;
2342
2343         (void) mutex_lock(&umem_update_lock);
2344
2345         if (umem_reaping != UMEM_REAP_DONE || gethrtime() < umem_reap_next) {
2346                 (void) mutex_unlock(&umem_update_lock);
2347                 return;
2348         }
2349         umem_reaping = UMEM_REAP_ADDING;        /* lock out other reaps */
2350
2351         (void) mutex_unlock(&umem_update_lock);
2352
2353         umem_updateall(UMU_REAP);
2354
2355         (void) mutex_lock(&umem_update_lock);
2356
2357         umem_reaping = UMEM_REAP_ACTIVE;
2358
2359         /* Standalone is single-threaded */
2360 #ifndef UMEM_STANDALONE
2361         if (umem_update_thr == 0) {
2362                 /*
2363                  * The update thread does not exist.  If the process is
2364                  * multi-threaded, create it.  If not, or the creation fails,
2365                  * do the update processing inline.
2366                  */
2367                 ASSERT(umem_st_update_thr == 0);
2368
2369                 if (__nthreads() <= 1 || umem_create_update_thread() == 0)
2370                         umem_st_update();
2371         }
2372
2373         (void) cond_broadcast(&umem_update_cv); /* wake up the update thread */
2374 #endif
2375
2376         (void) mutex_unlock(&umem_update_lock);
2377 }
2378
2379 umem_cache_t *
2380 umem_cache_create(
2381         char *name,             /* descriptive name for this cache */
2382         size_t bufsize,         /* size of the objects it manages */
2383         size_t align,           /* required object alignment */
2384         umem_constructor_t *constructor, /* object constructor */
2385         umem_destructor_t *destructor, /* object destructor */
2386         umem_reclaim_t *reclaim, /* memory reclaim callback */
2387         void *private,          /* pass-thru arg for constr/destr/reclaim */
2388         vmem_t *vmp,            /* vmem source for slab allocation */
2389         int cflags)             /* cache creation flags */
2390 {
2391         int cpu_seqid;
2392         size_t chunksize;
2393         umem_cache_t *cp, *cnext, *cprev;
2394         umem_magtype_t *mtp;
2395         size_t csize;
2396         size_t phase;
2397
2398         /*
2399          * The init thread is allowed to create internal and quantum caches.
2400          *
2401          * Other threads must wait until until initialization is complete.
2402          */
2403         if (umem_init_thr == thr_self())
2404                 ASSERT((cflags & (UMC_INTERNAL | UMC_QCACHE)) != 0);
2405         else {
2406                 ASSERT(!(cflags & UMC_INTERNAL));
2407                 if (umem_ready != UMEM_READY && umem_init() == 0) {
2408                         errno = EAGAIN;
2409                         return (NULL);
2410                 }
2411         }
2412
2413         csize = UMEM_CACHE_SIZE(umem_max_ncpus);
2414         phase = P2NPHASE(csize, UMEM_CPU_CACHE_SIZE);
2415
2416         if (vmp == NULL)
2417                 vmp = umem_default_arena;
2418
2419         ASSERT(P2PHASE(phase, UMEM_ALIGN) == 0);
2420
2421         /*
2422          * Check that the arguments are reasonable
2423          */
2424         if ((align & (align - 1)) != 0 || align > vmp->vm_quantum ||
2425             ((cflags & UMC_NOHASH) && (cflags & UMC_NOTOUCH)) ||
2426             name == NULL || bufsize == 0) {
2427                 errno = EINVAL;
2428                 return (NULL);
2429         }
2430
2431         /*
2432          * If align == 0, we set it to the minimum required alignment.
2433          *
2434          * If align < UMEM_ALIGN, we round it up to UMEM_ALIGN, unless
2435          * UMC_NOTOUCH was passed.
2436          */
2437         if (align == 0) {
2438                 if (P2ROUNDUP(bufsize, UMEM_ALIGN) >= UMEM_SECOND_ALIGN)
2439                         align = UMEM_SECOND_ALIGN;
2440                 else
2441                         align = UMEM_ALIGN;
2442         } else if (align < UMEM_ALIGN && (cflags & UMC_NOTOUCH) == 0)
2443                 align = UMEM_ALIGN;
2444
2445
2446         /*
2447          * Get a umem_cache structure.  We arrange that cp->cache_cpu[]
2448          * is aligned on a UMEM_CPU_CACHE_SIZE boundary to prevent
2449          * false sharing of per-CPU data.
2450          */
2451         cp = vmem_xalloc(umem_cache_arena, csize, UMEM_CPU_CACHE_SIZE, phase,
2452             0, NULL, NULL, VM_NOSLEEP);
2453
2454         if (cp == NULL) {
2455                 errno = EAGAIN;
2456                 return (NULL);
2457         }
2458
2459         bzero(cp, csize);
2460
2461         (void) mutex_lock(&umem_flags_lock);
2462         if (umem_flags & UMF_RANDOMIZE)
2463                 umem_flags = (((umem_flags | ~UMF_RANDOM) + 1) & UMF_RANDOM) |
2464                     UMF_RANDOMIZE;
2465         cp->cache_flags = umem_flags | (cflags & UMF_DEBUG);
2466         (void) mutex_unlock(&umem_flags_lock);
2467
2468         /*
2469          * Make sure all the various flags are reasonable.
2470          */
2471         if (cp->cache_flags & UMF_LITE) {
2472                 if (bufsize >= umem_lite_minsize &&
2473                     align <= umem_lite_maxalign &&
2474                     P2PHASE(bufsize, umem_lite_maxalign) != 0) {
2475                         cp->cache_flags |= UMF_BUFTAG;
2476                         cp->cache_flags &= ~(UMF_AUDIT | UMF_FIREWALL);
2477                 } else {
2478                         cp->cache_flags &= ~UMF_DEBUG;
2479                 }
2480         }
2481
2482         if ((cflags & UMC_QCACHE) && (cp->cache_flags & UMF_AUDIT))
2483                 cp->cache_flags |= UMF_NOMAGAZINE;
2484
2485         if (cflags & UMC_NODEBUG)
2486                 cp->cache_flags &= ~UMF_DEBUG;
2487
2488         if (cflags & UMC_NOTOUCH)
2489                 cp->cache_flags &= ~UMF_TOUCH;
2490
2491         if (cflags & UMC_NOHASH)
2492                 cp->cache_flags &= ~(UMF_AUDIT | UMF_FIREWALL);
2493
2494         if (cflags & UMC_NOMAGAZINE)
2495                 cp->cache_flags |= UMF_NOMAGAZINE;
2496
2497         if ((cp->cache_flags & UMF_AUDIT) && !(cflags & UMC_NOTOUCH))
2498                 cp->cache_flags |= UMF_REDZONE;
2499
2500         if ((cp->cache_flags & UMF_BUFTAG) && bufsize >= umem_minfirewall &&
2501             !(cp->cache_flags & UMF_LITE) && !(cflags & UMC_NOHASH))
2502                 cp->cache_flags |= UMF_FIREWALL;
2503
2504         if (vmp != umem_default_arena || umem_firewall_arena == NULL)
2505                 cp->cache_flags &= ~UMF_FIREWALL;
2506
2507         if (cp->cache_flags & UMF_FIREWALL) {
2508                 cp->cache_flags &= ~UMF_BUFTAG;
2509                 cp->cache_flags |= UMF_NOMAGAZINE;
2510                 ASSERT(vmp == umem_default_arena);
2511                 vmp = umem_firewall_arena;
2512         }
2513
2514         /*
2515          * Set cache properties.
2516          */
2517         (void) strncpy(cp->cache_name, name, sizeof (cp->cache_name) - 1);
2518         cp->cache_bufsize = bufsize;
2519         cp->cache_align = align;
2520         cp->cache_constructor = constructor;
2521         cp->cache_destructor = destructor;
2522         cp->cache_reclaim = reclaim;
2523         cp->cache_private = private;
2524         cp->cache_arena = vmp;
2525         cp->cache_cflags = cflags;
2526         cp->cache_cpu_mask = umem_cpu_mask;
2527
2528         /*
2529          * Determine the chunk size.
2530          */
2531         chunksize = bufsize;
2532
2533         if (align >= UMEM_ALIGN) {
2534                 chunksize = P2ROUNDUP(chunksize, UMEM_ALIGN);
2535                 cp->cache_bufctl = chunksize - UMEM_ALIGN;
2536         }
2537
2538         if (cp->cache_flags & UMF_BUFTAG) {
2539                 cp->cache_bufctl = chunksize;
2540                 cp->cache_buftag = chunksize;
2541                 chunksize += sizeof (umem_buftag_t);
2542         }
2543
2544         if (cp->cache_flags & UMF_DEADBEEF) {
2545                 cp->cache_verify = MIN(cp->cache_buftag, umem_maxverify);
2546                 if (cp->cache_flags & UMF_LITE)
2547                         cp->cache_verify = MIN(cp->cache_verify, UMEM_ALIGN);
2548         }
2549
2550         cp->cache_contents = MIN(cp->cache_bufctl, umem_content_maxsave);
2551
2552         cp->cache_chunksize = chunksize = P2ROUNDUP(chunksize, align);
2553
2554         if (chunksize < bufsize) {
2555                 errno = ENOMEM;
2556                 goto fail;
2557         }
2558
2559         /*
2560          * Now that we know the chunk size, determine the optimal slab size.
2561          */
2562         if (vmp == umem_firewall_arena) {
2563                 cp->cache_slabsize = P2ROUNDUP(chunksize, vmp->vm_quantum);
2564                 cp->cache_mincolor = cp->cache_slabsize - chunksize;
2565                 cp->cache_maxcolor = cp->cache_mincolor;
2566                 cp->cache_flags |= UMF_HASH;
2567                 ASSERT(!(cp->cache_flags & UMF_BUFTAG));
2568         } else if ((cflags & UMC_NOHASH) || (!(cflags & UMC_NOTOUCH) &&
2569             !(cp->cache_flags & UMF_AUDIT) &&
2570             chunksize < vmp->vm_quantum / UMEM_VOID_FRACTION)) {
2571                 cp->cache_slabsize = vmp->vm_quantum;
2572                 cp->cache_mincolor = 0;
2573                 cp->cache_maxcolor =
2574                     (cp->cache_slabsize - sizeof (umem_slab_t)) % chunksize;
2575
2576                 if (chunksize + sizeof (umem_slab_t) > cp->cache_slabsize) {
2577                         errno = EINVAL;
2578                         goto fail;
2579                 }
2580                 ASSERT(!(cp->cache_flags & UMF_AUDIT));
2581         } else {
2582                 size_t chunks, bestfit, waste, slabsize;
2583                 size_t minwaste = LONG_MAX;
2584
2585                 for (chunks = 1; chunks <= UMEM_VOID_FRACTION; chunks++) {
2586                         slabsize = P2ROUNDUP(chunksize * chunks,
2587                             vmp->vm_quantum);
2588                         /*
2589                          * check for overflow
2590                          */
2591                         if ((slabsize / chunks) < chunksize) {
2592                                 errno = ENOMEM;
2593                                 goto fail;
2594                         }
2595                         chunks = slabsize / chunksize;
2596                         waste = (slabsize % chunksize) / chunks;
2597                         if (waste < minwaste) {
2598                                 minwaste = waste;
2599                                 bestfit = slabsize;
2600                         }
2601                 }
2602                 if (cflags & UMC_QCACHE)
2603                         bestfit = MAX(1 << highbit(3 * vmp->vm_qcache_max), 64);
2604                 cp->cache_slabsize = bestfit;
2605                 cp->cache_mincolor = 0;
2606                 cp->cache_maxcolor = bestfit % chunksize;
2607                 cp->cache_flags |= UMF_HASH;
2608         }
2609
2610         if (cp->cache_flags & UMF_HASH) {
2611                 ASSERT(!(cflags & UMC_NOHASH));
2612                 cp->cache_bufctl_cache = (cp->cache_flags & UMF_AUDIT) ?
2613                     umem_bufctl_audit_cache : umem_bufctl_cache;
2614         }
2615
2616         if (cp->cache_maxcolor >= vmp->vm_quantum)
2617                 cp->cache_maxcolor = vmp->vm_quantum - 1;
2618
2619         cp->cache_color = cp->cache_mincolor;
2620
2621         /*
2622          * Initialize the rest of the slab layer.
2623          */
2624         (void) mutex_init(&cp->cache_lock, USYNC_THREAD, NULL);
2625
2626         cp->cache_freelist = &cp->cache_nullslab;
2627         cp->cache_nullslab.slab_cache = cp;
2628         cp->cache_nullslab.slab_refcnt = -1;
2629         cp->cache_nullslab.slab_next = &cp->cache_nullslab;
2630         cp->cache_nullslab.slab_prev = &cp->cache_nullslab;
2631
2632         if (cp->cache_flags & UMF_HASH) {
2633                 cp->cache_hash_table = vmem_alloc(umem_hash_arena,
2634                     UMEM_HASH_INITIAL * sizeof (void *), VM_NOSLEEP);
2635                 if (cp->cache_hash_table == NULL) {
2636                         errno = EAGAIN;
2637                         goto fail_lock;
2638                 }
2639                 bzero(cp->cache_hash_table,
2640                     UMEM_HASH_INITIAL * sizeof (void *));
2641                 cp->cache_hash_mask = UMEM_HASH_INITIAL - 1;
2642                 cp->cache_hash_shift = highbit((ulong_t)chunksize) - 1;
2643         }
2644
2645         /*
2646          * Initialize the depot.
2647          */
2648         (void) mutex_init(&cp->cache_depot_lock, USYNC_THREAD, NULL);
2649
2650         for (mtp = umem_magtype; chunksize <= mtp->mt_minbuf; mtp++)
2651                 continue;
2652
2653         cp->cache_magtype = mtp;
2654
2655         /*
2656          * Initialize the CPU layer.
2657          */
2658         for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++) {
2659                 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu_seqid];
2660                 (void) mutex_init(&ccp->cc_lock, USYNC_THREAD, NULL);
2661                 ccp->cc_flags = cp->cache_flags;
2662                 ccp->cc_rounds = -1;
2663                 ccp->cc_prounds = -1;
2664         }
2665
2666         /*
2667          * Add the cache to the global list.  This makes it visible
2668          * to umem_update(), so the cache must be ready for business.
2669          */
2670         (void) mutex_lock(&umem_cache_lock);
2671         cp->cache_next = cnext = &umem_null_cache;
2672         cp->cache_prev = cprev = umem_null_cache.cache_prev;
2673         cnext->cache_prev = cp;
2674         cprev->cache_next = cp;
2675         (void) mutex_unlock(&umem_cache_lock);
2676
2677         if (umem_ready == UMEM_READY)
2678                 umem_cache_magazine_enable(cp);
2679
2680         return (cp);
2681
2682 fail_lock:
2683         (void) mutex_destroy(&cp->cache_lock);
2684 fail:
2685         vmem_xfree(umem_cache_arena, cp, csize);
2686         return (NULL);
2687 }
2688
2689 void
2690 umem_cache_destroy(umem_cache_t *cp)
2691 {
2692         int cpu_seqid;
2693
2694         /*
2695          * Remove the cache from the global cache list so that no new updates
2696          * will be scheduled on its behalf, wait for any pending tasks to
2697          * complete, purge the cache, and then destroy it.
2698          */
2699         (void) mutex_lock(&umem_cache_lock);
2700         cp->cache_prev->cache_next = cp->cache_next;
2701         cp->cache_next->cache_prev = cp->cache_prev;
2702         cp->cache_prev = cp->cache_next = NULL;
2703         (void) mutex_unlock(&umem_cache_lock);
2704
2705         umem_remove_updates(cp);
2706
2707         umem_cache_magazine_purge(cp);
2708
2709         (void) mutex_lock(&cp->cache_lock);
2710         if (cp->cache_buftotal != 0)
2711                 log_message("umem_cache_destroy: '%s' (%p) not empty\n",
2712                     cp->cache_name, (void *)cp);
2713         cp->cache_reclaim = NULL;
2714         /*
2715          * The cache is now dead.  There should be no further activity.
2716          * We enforce this by setting land mines in the constructor and
2717          * destructor routines that induce a segmentation fault if invoked.
2718          */
2719         cp->cache_constructor = (umem_constructor_t *)1;
2720         cp->cache_destructor = (umem_destructor_t *)2;
2721         (void) mutex_unlock(&cp->cache_lock);
2722
2723         if (cp->cache_hash_table != NULL)
2724                 vmem_free(umem_hash_arena, cp->cache_hash_table,
2725                     (cp->cache_hash_mask + 1) * sizeof (void *));
2726
2727         for (cpu_seqid = 0; cpu_seqid < umem_max_ncpus; cpu_seqid++)
2728                 (void) mutex_destroy(&cp->cache_cpu[cpu_seqid].cc_lock);
2729
2730         (void) mutex_destroy(&cp->cache_depot_lock);
2731         (void) mutex_destroy(&cp->cache_lock);
2732
2733         vmem_free(umem_cache_arena, cp, UMEM_CACHE_SIZE(umem_max_ncpus));
2734 }
2735
2736 static int
2737 umem_cache_init(void)
2738 {
2739         int i;
2740         size_t size, max_size;
2741         umem_cache_t *cp;
2742         umem_magtype_t *mtp;
2743         char name[UMEM_CACHE_NAMELEN + 1];
2744         umem_cache_t *umem_alloc_caches[NUM_ALLOC_SIZES];
2745
2746         for (i = 0; i < sizeof (umem_magtype) / sizeof (*mtp); i++) {
2747                 mtp = &umem_magtype[i];
2748                 (void) snprintf(name, sizeof (name), "umem_magazine_%d",
2749                     mtp->mt_magsize);
2750                 mtp->mt_cache = umem_cache_create(name,
2751                     (mtp->mt_magsize + 1) * sizeof (void *),
2752                     mtp->mt_align, NULL, NULL, NULL, NULL,
2753                     umem_internal_arena, UMC_NOHASH | UMC_INTERNAL);
2754                 if (mtp->mt_cache == NULL)
2755                         return (0);
2756         }
2757
2758         umem_slab_cache = umem_cache_create("umem_slab_cache",
2759             sizeof (umem_slab_t), 0, NULL, NULL, NULL, NULL,
2760             umem_internal_arena, UMC_NOHASH | UMC_INTERNAL);
2761
2762         if (umem_slab_cache == NULL)
2763                 return (0);
2764
2765         umem_bufctl_cache = umem_cache_create("umem_bufctl_cache",
2766             sizeof (umem_bufctl_t), 0, NULL, NULL, NULL, NULL,
2767             umem_internal_arena, UMC_NOHASH | UMC_INTERNAL);
2768
2769         if (umem_bufctl_cache == NULL)
2770                 return (0);
2771
2772         /*
2773          * The size of the umem_bufctl_audit structure depends upon
2774          * umem_stack_depth.   See umem_impl.h for details on the size
2775          * restrictions.
2776          */
2777
2778         size = UMEM_BUFCTL_AUDIT_SIZE_DEPTH(umem_stack_depth);
2779         max_size = UMEM_BUFCTL_AUDIT_MAX_SIZE;
2780
2781         if (size > max_size) {                  /* too large -- truncate */
2782                 int max_frames = UMEM_MAX_STACK_DEPTH;
2783
2784                 ASSERT(UMEM_BUFCTL_AUDIT_SIZE_DEPTH(max_frames) <= max_size);
2785
2786                 umem_stack_depth = max_frames;
2787                 size = UMEM_BUFCTL_AUDIT_SIZE_DEPTH(umem_stack_depth);
2788         }
2789
2790         umem_bufctl_audit_cache = umem_cache_create("umem_bufctl_audit_cache",
2791             size, 0, NULL, NULL, NULL, NULL, umem_internal_arena,
2792             UMC_NOHASH | UMC_INTERNAL);
2793
2794         if (umem_bufctl_audit_cache == NULL)
2795                 return (0);
2796
2797         if (vmem_backend & VMEM_BACKEND_MMAP)
2798                 umem_va_arena = vmem_create("umem_va",
2799                     NULL, 0, pagesize,
2800                     vmem_alloc, vmem_free, heap_arena,
2801                     8 * pagesize, VM_NOSLEEP);
2802         else
2803                 umem_va_arena = heap_arena;
2804
2805         if (umem_va_arena == NULL)
2806                 return (0);
2807
2808         umem_default_arena = vmem_create("umem_default",
2809             NULL, 0, pagesize,
2810             heap_alloc, heap_free, umem_va_arena,
2811             0, VM_NOSLEEP);
2812
2813         if (umem_default_arena == NULL)
2814                 return (0);
2815
2816         /*
2817          * make sure the umem_alloc table initializer is correct
2818          */
2819         i = sizeof (umem_alloc_table) / sizeof (*umem_alloc_table);
2820         ASSERT(umem_alloc_table[i - 1] == &umem_null_cache);
2821
2822         /*
2823          * Create the default caches to back umem_alloc()
2824          */
2825         for (i = 0; i < NUM_ALLOC_SIZES; i++) {
2826                 size_t cache_size = umem_alloc_sizes[i];
2827                 size_t align = 0;
2828                 /*
2829                  * If they allocate a multiple of the coherency granularity,
2830                  * they get a coherency-granularity-aligned address.
2831                  */
2832                 if (IS_P2ALIGNED(cache_size, 64))
2833                         align = 64;
2834                 if (IS_P2ALIGNED(cache_size, pagesize))
2835                         align = pagesize;
2836                 (void) snprintf(name, sizeof (name), "umem_alloc_%lu",
2837                     (long)cache_size);
2838
2839                 cp = umem_cache_create(name, cache_size, align,
2840                     NULL, NULL, NULL, NULL, NULL, UMC_INTERNAL);
2841                 if (cp == NULL)
2842                         return (0);
2843
2844                 umem_alloc_caches[i] = cp;
2845         }
2846
2847         /*
2848          * Initialization cannot fail at this point.  Make the caches
2849          * visible to umem_alloc() and friends.
2850          */
2851         size = UMEM_ALIGN;
2852         for (i = 0; i < NUM_ALLOC_SIZES; i++) {
2853                 size_t cache_size = umem_alloc_sizes[i];
2854
2855                 cp = umem_alloc_caches[i];
2856
2857                 while (size <= cache_size) {
2858                         umem_alloc_table[(size - 1) >> UMEM_ALIGN_SHIFT] = cp;
2859                         size += UMEM_ALIGN;
2860                 }
2861         }
2862         return (1);
2863 }
2864
2865 /*
2866  * umem_startup() is called early on, and must be called explicitly if we're
2867  * the standalone version.
2868  */
2869 void
2870 umem_startup(caddr_t start, size_t len, size_t pagesize, caddr_t minstack,
2871     caddr_t maxstack)
2872 {
2873 #ifdef UMEM_STANDALONE
2874         int idx;
2875         /* Standalone doesn't fork */
2876 #else
2877         umem_forkhandler_init(); /* register the fork handler */
2878 #endif
2879
2880 #ifdef __lint
2881         /* make lint happy */
2882         minstack = maxstack;
2883 #endif
2884
2885 #ifdef UMEM_STANDALONE
2886         umem_ready = UMEM_READY_STARTUP;
2887         umem_init_env_ready = 0;
2888
2889         umem_min_stack = minstack;
2890         umem_max_stack = maxstack;
2891
2892         nofail_callback = NULL;
2893         umem_slab_cache = NULL;
2894         umem_bufctl_cache = NULL;
2895         umem_bufctl_audit_cache = NULL;
2896         heap_arena = NULL;
2897         heap_alloc = NULL;
2898         heap_free = NULL;
2899         umem_internal_arena = NULL;
2900         umem_cache_arena = NULL;
2901         umem_hash_arena = NULL;
2902         umem_log_arena = NULL;
2903         umem_oversize_arena = NULL;
2904         umem_va_arena = NULL;
2905         umem_default_arena = NULL;
2906         umem_firewall_va_arena = NULL;
2907         umem_firewall_arena = NULL;
2908         umem_memalign_arena = NULL;
2909         umem_transaction_log = NULL;
2910         umem_content_log = NULL;
2911         umem_failure_log = NULL;
2912         umem_slab_log = NULL;
2913         umem_cpu_mask = 0;
2914
2915         umem_cpus = &umem_startup_cpu;
2916         umem_startup_cpu.cpu_cache_offset = UMEM_CACHE_SIZE(0);
2917         umem_startup_cpu.cpu_number = 0;
2918
2919         bcopy(&umem_null_cache_template, &umem_null_cache,
2920             sizeof (umem_cache_t));
2921
2922         for (idx = 0; idx < (UMEM_MAXBUF >> UMEM_ALIGN_SHIFT); idx++)
2923                 umem_alloc_table[idx] = &umem_null_cache;
2924 #endif
2925
2926         /*
2927          * Perform initialization specific to the way we've been compiled
2928          * (library or standalone)
2929          */
2930         umem_type_init(start, len, pagesize);
2931
2932         vmem_startup();
2933 }
2934
2935 int
2936 umem_init(void)
2937 {
2938         size_t maxverify, minfirewall;
2939         size_t size;
2940         int idx;
2941         umem_cpu_t *new_cpus;
2942
2943         vmem_t *memalign_arena, *oversize_arena;
2944
2945         if (thr_self() != umem_init_thr) {
2946                 /*
2947                  * The usual case -- non-recursive invocation of umem_init().
2948                  */
2949                 (void) mutex_lock(&umem_init_lock);
2950                 if (umem_ready != UMEM_READY_STARTUP) {
2951                         /*
2952                          * someone else beat us to initializing umem.  Wait
2953                          * for them to complete, then return.
2954                          */
2955                         while (umem_ready == UMEM_READY_INITING)
2956                                 (void) _cond_wait(&umem_init_cv,
2957                                     &umem_init_lock);
2958                         ASSERT(umem_ready == UMEM_READY ||
2959                             umem_ready == UMEM_READY_INIT_FAILED);
2960                         (void) mutex_unlock(&umem_init_lock);
2961                         return (umem_ready == UMEM_READY);
2962                 }
2963
2964                 ASSERT(umem_ready == UMEM_READY_STARTUP);
2965                 ASSERT(umem_init_env_ready == 0);
2966
2967                 umem_ready = UMEM_READY_INITING;
2968                 umem_init_thr = thr_self();
2969
2970                 (void) mutex_unlock(&umem_init_lock);
2971                 umem_setup_envvars(0);          /* can recurse -- see below */
2972                 if (umem_init_env_ready) {
2973                         /*
2974                          * initialization was completed already
2975                          */
2976                         ASSERT(umem_ready == UMEM_READY ||
2977                             umem_ready == UMEM_READY_INIT_FAILED);
2978                         ASSERT(umem_init_thr == 0);
2979                         return (umem_ready == UMEM_READY);
2980                 }
2981         } else if (!umem_init_env_ready) {
2982                 /*
2983                  * The umem_setup_envvars() call (above) makes calls into
2984                  * the dynamic linker and directly into user-supplied code.
2985                  * Since we cannot know what that code will do, we could be
2986                  * recursively invoked (by, say, a malloc() call in the code
2987                  * itself, or in a (C++) _init section it causes to be fired).
2988                  *
2989                  * This code is where we end up if such recursion occurs.  We
2990                  * first clean up any partial results in the envvar code, then
2991                  * proceed to finish initialization processing in the recursive
2992                  * call.  The original call will notice this, and return
2993                  * immediately.
2994                  */
2995                 umem_setup_envvars(1);          /* clean up any partial state */
2996         } else {
2997                 umem_panic(
2998                     "recursive allocation while initializing umem\n");
2999         }
3000         umem_init_env_ready = 1;
3001
3002         /*
3003          * From this point until we finish, recursion into umem_init() will
3004          * cause a umem_panic().
3005          */
3006         maxverify = minfirewall = ULONG_MAX;
3007
3008         /* LINTED constant condition */
3009         if (sizeof (umem_cpu_cache_t) != UMEM_CPU_CACHE_SIZE) {
3010                 umem_panic("sizeof (umem_cpu_cache_t) = %d, should be %d\n",
3011                     sizeof (umem_cpu_cache_t), UMEM_CPU_CACHE_SIZE);
3012         }
3013
3014         umem_max_ncpus = umem_get_max_ncpus();
3015
3016         /*
3017          * load tunables from environment
3018          */
3019         umem_process_envvars();
3020
3021         if (issetugid())
3022                 umem_mtbf = 0;
3023
3024         /*
3025          * set up vmem
3026          */
3027         if (!(umem_flags & UMF_AUDIT))
3028                 vmem_no_debug();
3029
3030         heap_arena = vmem_heap_arena(&heap_alloc, &heap_free);
3031
3032         pagesize = heap_arena->vm_quantum;
3033
3034         umem_internal_arena = vmem_create("umem_internal", NULL, 0, pagesize,
3035             heap_alloc, heap_free, heap_arena, 0, VM_NOSLEEP);
3036
3037         umem_default_arena = umem_internal_arena;
3038
3039         if (umem_internal_arena == NULL)
3040                 goto fail;
3041
3042         umem_cache_arena = vmem_create("umem_cache", NULL, 0, UMEM_ALIGN,
3043             vmem_alloc, vmem_free, umem_internal_arena, 0, VM_NOSLEEP);
3044
3045         umem_hash_arena = vmem_create("umem_hash", NULL, 0, UMEM_ALIGN,
3046             vmem_alloc, vmem_free, umem_internal_arena, 0, VM_NOSLEEP);
3047
3048         umem_log_arena = vmem_create("umem_log", NULL, 0, UMEM_ALIGN,
3049             heap_alloc, heap_free, heap_arena, 0, VM_NOSLEEP);
3050
3051         umem_firewall_va_arena = vmem_create("umem_firewall_va",
3052             NULL, 0, pagesize,
3053             umem_firewall_va_alloc, umem_firewall_va_free, heap_arena,
3054             0, VM_NOSLEEP);
3055
3056         if (umem_cache_arena == NULL || umem_hash_arena == NULL ||
3057             umem_log_arena == NULL || umem_firewall_va_arena == NULL)
3058                 goto fail;
3059
3060         umem_firewall_arena = vmem_create("umem_firewall", NULL, 0, pagesize,
3061             heap_alloc, heap_free, umem_firewall_va_arena, 0,
3062             VM_NOSLEEP);
3063
3064         if (umem_firewall_arena == NULL)
3065                 goto fail;
3066
3067         oversize_arena = vmem_create("umem_oversize", NULL, 0, pagesize,
3068             heap_alloc, heap_free, minfirewall < ULONG_MAX ?
3069             umem_firewall_va_arena : heap_arena, 0, VM_NOSLEEP);
3070
3071         memalign_arena = vmem_create("umem_memalign", NULL, 0, UMEM_ALIGN,
3072             heap_alloc, heap_free, minfirewall < ULONG_MAX ?
3073             umem_firewall_va_arena : heap_arena, 0, VM_NOSLEEP);
3074
3075         if (oversize_arena == NULL || memalign_arena == NULL)
3076                 goto fail;
3077
3078         if (umem_max_ncpus > CPUHINT_MAX())
3079                 umem_max_ncpus = CPUHINT_MAX();
3080
3081         while ((umem_max_ncpus & (umem_max_ncpus - 1)) != 0)
3082                 umem_max_ncpus++;
3083
3084         if (umem_max_ncpus == 0)
3085                 umem_max_ncpus = 1;
3086
3087         size = umem_max_ncpus * sizeof (umem_cpu_t);
3088         new_cpus = vmem_alloc(umem_internal_arena, size, VM_NOSLEEP);
3089         if (new_cpus == NULL)
3090                 goto fail;
3091
3092         bzero(new_cpus, size);
3093         for (idx = 0; idx < umem_max_ncpus; idx++) {
3094                 new_cpus[idx].cpu_number = idx;
3095                 new_cpus[idx].cpu_cache_offset = UMEM_CACHE_SIZE(idx);
3096         }
3097         umem_cpus = new_cpus;
3098         umem_cpu_mask = (umem_max_ncpus - 1);
3099
3100         if (umem_maxverify == 0)
3101                 umem_maxverify = maxverify;
3102
3103         if (umem_minfirewall == 0)
3104                 umem_minfirewall = minfirewall;
3105
3106         /*
3107          * Set up updating and reaping
3108          */
3109         umem_reap_next = gethrtime() + NANOSEC;
3110
3111 #ifndef UMEM_STANDALONE
3112         (void) gettimeofday(&umem_update_next, NULL);
3113 #endif
3114
3115         /*
3116          * Set up logging -- failure here is okay, since it will just disable
3117          * the logs
3118          */
3119         if (umem_logging) {
3120                 umem_transaction_log = umem_log_init(umem_transaction_log_size);
3121                 umem_content_log = umem_log_init(umem_content_log_size);
3122                 umem_failure_log = umem_log_init(umem_failure_log_size);
3123                 umem_slab_log = umem_log_init(umem_slab_log_size);
3124         }
3125
3126         /*
3127          * Set up caches -- if successful, initialization cannot fail, since
3128          * allocations from other threads can now succeed.
3129          */
3130         if (umem_cache_init() == 0) {
3131                 log_message("unable to create initial caches\n");
3132                 goto fail;
3133         }
3134         umem_oversize_arena = oversize_arena;
3135         umem_memalign_arena = memalign_arena;
3136
3137         umem_cache_applyall(umem_cache_magazine_enable);
3138
3139         /*
3140          * initialization done, ready to go
3141          */
3142         (void) mutex_lock(&umem_init_lock);
3143         umem_ready = UMEM_READY;
3144         umem_init_thr = 0;
3145         (void) cond_broadcast(&umem_init_cv);
3146         (void) mutex_unlock(&umem_init_lock);
3147         return (1);
3148
3149 fail:
3150         log_message("umem initialization failed\n");
3151
3152         (void) mutex_lock(&umem_init_lock);
3153         umem_ready = UMEM_READY_INIT_FAILED;
3154         umem_init_thr = 0;
3155         (void) cond_broadcast(&umem_init_cv);
3156         (void) mutex_unlock(&umem_init_lock);
3157         return (0);
3158 }
Note: See TracBrowser for help on using the browser.