root/src/noit_check.c

Revision 8a26fbab8eeb5faefa109ddedfc52a6e3d4af181, 67.8 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 1 week ago)

Add a sequency number to checks.

If 'seq' attribute exists, enforce that it monotonously increases.

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  * Copyright (c) 2015, Circonus, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  *       copyright notice, this list of conditions and the following
14  *       disclaimer in the documentation and/or other materials provided
15  *       with the distribution.
16  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
17  *       of its contributors may be used to endorse or promote products
18  *       derived from this software without specific prior written
19  *       permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "noit_config.h"
35 #include <mtev_defines.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <ctype.h>
41 #include <assert.h>
42 #include <errno.h>
43 #include <netinet/in.h>
44 #include <arpa/inet.h>
45 #include <time.h>
46
47 #include <eventer/eventer.h>
48 #include <mtev_memory.h>
49 #include <mtev_log.h>
50 #include <mtev_hash.h>
51 #include <mtev_skiplist.h>
52 #include <mtev_watchdog.h>
53 #include <mtev_conf.h>
54 #include <mtev_console.h>
55
56 #include "noit_mtev_bridge.h"
57 #include "noit_dtrace_probes.h"
58 #include "noit_check.h"
59 #include "noit_module.h"
60 #include "noit_check_tools.h"
61 #include "noit_check_resolver.h"
62
63 #define DEFAULT_TEXT_METRIC_SIZE_LIMIT  512
64 #define RECYCLE_INTERVAL 60
65
66 MTEV_HOOK_IMPL(check_config_fixup,
67   (noit_check_t *check),
68   void *, closure,
69   (void *closure, noit_check_t *check),
70   (closure,check))
71
72 MTEV_HOOK_IMPL(check_stats_set_metric,
73   (noit_check_t *check, stats_t *stats, metric_t *m),
74   void *, closure,
75   (void *closure, noit_check_t *check, stats_t *stats, metric_t *m),
76   (closure,check,stats,m))
77
78 MTEV_HOOK_IMPL(check_stats_set_metric_coerce,
79   (noit_check_t *check, stats_t *stats, const char *name,
80    metric_type_t type, const char *v, mtev_boolean success),
81   void *, closure,
82   (void *closure, noit_check_t *check, stats_t *stats, const char *name,
83    metric_type_t type, const char *v, mtev_boolean success),
84   (closure,check,stats,name,type,v,success))
85
86 MTEV_HOOK_IMPL(check_passive_log_stats,
87   (noit_check_t *check),
88   void *, closure,
89   (void *closure, noit_check_t *check),
90   (closure,check))
91
92 MTEV_HOOK_IMPL(check_log_stats,
93   (noit_check_t *check),
94   void *, closure,
95   (void *closure, noit_check_t *check),
96   (closure,check))
97
98 /* 20 ms slots over 60 second for distribution */
99 #define SCHEDULE_GRANULARITY 20
100 #define SLOTS_PER_SECOND (1000/SCHEDULE_GRANULARITY)
101 #define MAX_MODULE_REGISTRATIONS 64
102
103 /* used to manage per-check generic module metadata */
104 struct vp_w_free {
105   void *ptr;
106   void (*freefunc)(void *);
107 };
108
109 static mtev_boolean system_needs_causality = mtev_false;
110 static int text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
111 static int reg_module_id = 0;
112 static char *reg_module_names[MAX_MODULE_REGISTRATIONS] = { NULL };
113 static int reg_module_used = -1;
114 static u_int64_t check_completion_count = 0ULL;
115 static u_int64_t check_metrics_seen = 0ULL;
116 static pthread_mutex_t polls_lock = PTHREAD_MUTEX_INITIALIZER;
117 static mtev_hash_table polls = MTEV_HASH_EMPTY;
118 static mtev_hash_table dns_ignore_list = MTEV_HASH_EMPTY;
119 static mtev_skiplist watchlist = { 0 };
120 static mtev_skiplist polls_by_name = { 0 };
121 static u_int32_t __config_load_generation = 0;
122 static unsigned short check_slots_count[60000 / SCHEDULE_GRANULARITY] = { 0 },
123                       check_slots_seconds_count[60] = { 0 };
124
125 static noit_check_t *
126 noit_poller_lookup__nolock(uuid_t in) {
127   void *vcheck;
128   if(mtev_hash_retrieve(&polls, (char *)in, UUID_SIZE, &vcheck))
129     return (noit_check_t *)vcheck;
130   return NULL;
131 }
132 static noit_check_t *
133 noit_poller_lookup_by_name__nolock(char *target, char *name) {
134   noit_check_t tmp_check;
135   memset(&tmp_check, 0, sizeof(tmp_check));
136   tmp_check.target = target;
137   tmp_check.name = name;
138   return mtev_skiplist_find(&polls_by_name, &tmp_check, NULL);
139 }
140
141 static int
142 noit_console_show_timing_slots(mtev_console_closure_t ncct,
143                                int argc, char **argv,
144                                mtev_console_state_t *dstate,
145                                void *closure) {
146   int i, j;
147   const int upl = (60000 / SCHEDULE_GRANULARITY) / 60;
148   for(i=0;i<60;i++) {
149     nc_printf(ncct, "[%02d] %04d: ", i, check_slots_seconds_count[i]);
150     for(j=i*upl;j<(i+1)*upl;j++) {
151       char cp = '!';
152       if(check_slots_count[j] < 10) cp = '0' + check_slots_count[j];
153       else if(check_slots_count[j] < 36) cp = 'a' + (check_slots_count[j] - 10);
154       nc_printf(ncct, "%c", cp);
155     }
156     nc_printf(ncct, "\n");
157   }
158   return 0;
159 }
160 static int
161 noit_check_add_to_list(noit_check_t *new_check, const char *newname) {
162   char *oldname = NULL, *newnamecopy;
163   if(newname) {
164     /* track this stuff outside the lock to avoid allocs */
165     oldname = new_check->name;
166     newnamecopy = strdup(newname);
167   }
168   pthread_mutex_lock(&polls_lock);
169   if(!(new_check->flags & NP_TRANSIENT)) {
170     assert(new_check->name || newname);
171     /* This remove could fail -- no big deal */
172     if(new_check->name != NULL)
173       mtev_skiplist_remove(&polls_by_name, new_check, NULL);
174
175     /* optional update the name (at the critical point) */
176     if(newname) new_check->name = newnamecopy;
177
178     /* This insert could fail.. which means we have a conflict on
179      * target`name.  That should result in the check being disabled. */
180     if(!mtev_skiplist_insert(&polls_by_name, new_check)) {
181       mtevL(noit_error, "Check %s`%s disabled due to naming conflict\n",
182             new_check->target, new_check->name);
183       new_check->flags |= NP_DISABLED;
184     }
185     if(oldname) free(oldname);
186   }
187   pthread_mutex_unlock(&polls_lock);
188   return 1;
189 }
190
191 u_int64_t noit_check_metric_count() {
192   return check_metrics_seen;
193 }
194 void noit_check_metric_count_add(int add) {
195   mtev_atomic64_t *n = (mtev_atomic64_t *)&check_metrics_seen;
196   mtev_atomic64_t v = (mtev_atomic64_t)add;
197   mtev_atomic_add64(n, v);
198 }
199
200 u_int64_t noit_check_completion_count() {
201   return check_completion_count;
202 }
203 static void register_console_check_commands();
204 static int check_recycle_bin_processor(eventer_t, int, void *,
205                                        struct timeval *);
206
207 static int
208 check_slots_find_smallest(int sec, struct timeval* period) {
209   int i, j, cyclic, random_offset, jbase = 0, mini = 0, minj = 0;
210   unsigned short min_running_i = 0xffff, min_running_j = 0xffff;
211   int period_seconds = period->tv_sec;
212
213   /* If we're greater than sixty seconds, we should do our
214    * initial scheduling as if the period was sixty seconds. */
215   if (period_seconds > 60)
216     period_seconds = 60;
217
218   for(i=0;i<period_seconds;i++) {
219     int adj_i = (i + sec) % 60;
220     if(check_slots_seconds_count[adj_i] < min_running_i) {
221       min_running_i = check_slots_seconds_count[adj_i];
222       mini = adj_i;
223     }
224   }
225   jbase = mini * (1000/SCHEDULE_GRANULARITY);
226   random_offset = drand48() * SLOTS_PER_SECOND;
227   for(cyclic=0;cyclic<SLOTS_PER_SECOND;cyclic++) {
228     j = jbase + ((random_offset + cyclic) % SLOTS_PER_SECOND);
229     if(check_slots_count[j] < min_running_j) {
230       min_running_j = check_slots_count[j];
231       minj = j;
232     }
233   }
234   return (minj * SCHEDULE_GRANULARITY) + drand48() * SCHEDULE_GRANULARITY;
235 }
236 static void
237 check_slots_adjust_tv(struct timeval *tv, short adj) {
238   int offset_ms, idx;
239   offset_ms = (tv->tv_sec % 60) * 1000 + (tv->tv_usec / 1000);
240   idx = offset_ms / SCHEDULE_GRANULARITY;
241   check_slots_count[idx] += adj;
242   check_slots_seconds_count[offset_ms / 1000] += adj;
243 }
244 void check_slots_inc_tv(struct timeval *tv) {
245   check_slots_adjust_tv(tv, 1);
246 }
247 void check_slots_dec_tv(struct timeval *tv) {
248   check_slots_adjust_tv(tv, -1);
249 }
250 static int
251 noit_check_generic_safe_string(const char *p) {
252   if(!p) return 0;
253   for(;*p;p++) {
254     if(!isprint(*p)) return 0;
255   }
256   return 1;
257 }
258 int
259 noit_check_validate_target(const char *p) {
260   if(!noit_check_generic_safe_string(p)) return 0;
261   return 1;
262 }
263 int
264 noit_check_validate_name(const char *p) {
265   if(!noit_check_generic_safe_string(p)) return 0;
266   return 1;
267 }
268 const char *
269 noit_check_available_string(int16_t available) {
270   switch(available) {
271     case NP_AVAILABLE:    return "available";
272     case NP_UNAVAILABLE:  return "unavailable";
273     case NP_UNKNOWN:      return "unknown";
274   }
275   return NULL;
276 }
277 const char *
278 noit_check_state_string(int16_t state) {
279   switch(state) {
280     case NP_GOOD:         return "good";
281     case NP_BAD:          return "bad";
282     case NP_UNKNOWN:      return "unknown";
283   }
284   return NULL;
285 }
286 static int __check_name_compare(const void *a, const void *b) {
287   const noit_check_t *ac = a;
288   const noit_check_t *bc = b;
289   int rv;
290   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
291   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
292   return 0;
293 }
294 static int __watchlist_compare(const void *a, const void *b) {
295   const noit_check_t *ac = a;
296   const noit_check_t *bc = b;
297   int rv;
298   if((rv = memcmp(ac->checkid, bc->checkid, sizeof(ac->checkid))) != 0) return rv;
299   if(ac->period < bc->period) return -1;
300   if(ac->period == bc->period) return 0;
301   return 1;
302 }
303 static int __check_target_ip_compare(const void *a, const void *b) {
304   const noit_check_t *ac = a;
305   const noit_check_t *bc = b;
306   int rv;
307   if((rv = strcmp(ac->target_ip, bc->target_ip)) != 0) return rv;
308   if (ac->name == NULL) return 1;
309   if (bc->name == NULL) return -1;
310   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
311   return 1;
312 }
313 static int __check_target_compare(const void *a, const void *b) {
314   const noit_check_t *ac = a;
315   const noit_check_t *bc = b;
316   int rv;
317   if (ac->target == NULL) return 1;
318   if (bc->target == NULL) return -1;
319   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
320   if (ac->name == NULL) return 1;
321   if (bc->name == NULL) return -1;
322   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
323   return 1;
324 }
325 int
326 noit_calc_rtype_flag(char *resolve_rtype) {
327   int flags = 0;
328   if(resolve_rtype) {
329     flags |= strcmp(resolve_rtype, PREFER_IPV6) == 0 ||
330              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_PREFER_IPV6 : 0;
331     flags |= strcmp(resolve_rtype, FORCE_IPV4) == 0 ||
332              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_SINGLE_RESOLVE : 0;
333   }
334   return flags;
335 }
336 void
337 noit_check_fake_last_check(noit_check_t *check,
338                            struct timeval *lc, struct timeval *_now) {
339   struct timeval now, period, lc_copy;
340   int balance_ms;
341
342   if(!_now) {
343     gettimeofday(&now, NULL);
344     _now = &now;
345   }
346   period.tv_sec = check->period / 1000;
347   period.tv_usec = (check->period % 1000) * 1000;
348   sub_timeval(*_now, period, lc);
349
350   /* We need to set the last check value based on the period, but
351    * we also need to store a value that is based around the one-minute
352    * time to properly increment the slots; otherwise, the slots will
353    * get all messed up */
354   if(!(check->flags & NP_TRANSIENT) && check->period) {
355     balance_ms = check_slots_find_smallest(_now->tv_sec+1, &period);
356     lc->tv_sec = (lc->tv_sec / 60) * 60 + balance_ms / 1000;
357     lc->tv_usec = (balance_ms % 1000) * 1000;
358     memcpy(&lc_copy, lc, sizeof(lc_copy));
359     if(compare_timeval(*_now, *lc) < 0) {
360       do {
361         sub_timeval(*lc, period, lc);
362       } while(compare_timeval(*_now, *lc) < 0);
363     }
364     else {
365       struct timeval test;
366       while(1) {
367         add_timeval(*lc, period, &test);
368         if(compare_timeval(*_now, test) < 0) break;
369         memcpy(lc, &test, sizeof(test));
370       }
371     }
372   }
373   else {
374     memcpy(&lc_copy, lc, sizeof(lc_copy));
375   }
376  
377   /* now, we're going to do an even distribution using the slots */
378   if(!(check->flags & NP_TRANSIENT)) check_slots_inc_tv(&lc_copy);
379 }
380 void
381 noit_poller_process_checks(const char *xpath) {
382   int i, flags, cnt = 0, found;
383   mtev_conf_section_t *sec;
384   __config_load_generation++;
385   sec = mtev_conf_get_sections(NULL, xpath, &cnt);
386   for(i=0; i<cnt; i++) {
387     void *vcheck;
388     char uuid_str[37];
389     char target[256] = "";
390     char module[256] = "";
391     char name[256] = "";
392     char filterset[256] = "";
393     char oncheck[1024] = "";
394     char resolve_rtype[16] = "";
395     int ridx;
396     int no_period = 0;
397     int no_oncheck = 0;
398     int period = 0, timeout = 0;
399     mtev_boolean disabled = mtev_false, busted = mtev_false;
400     uuid_t uuid, out_uuid;
401     int64_t config_seq = 0;
402     mtev_hash_table *options;
403     mtev_hash_table **moptions = NULL;
404     mtev_boolean moptions_used = mtev_false, backdated = mtev_false;
405
406     /* We want to heartbeat here... otherwise, if a lot of checks are
407      * configured or if we're running on a slower system, we could
408      * end up getting watchdog killed before we get a chance to run
409      * any checks */
410     mtev_watchdog_child_heartbeat();
411
412     if(reg_module_id > 0) {
413       moptions = alloca(reg_module_id * sizeof(mtev_hash_table *));
414       memset(moptions, 0, reg_module_id * sizeof(mtev_hash_table *));
415       moptions_used = mtev_true;
416     }
417
418 #define NEXT(...) mtevL(noit_stderr, __VA_ARGS__); continue
419 #define MYATTR(type,a,...) mtev_conf_get_##type(sec[i], "@" #a, __VA_ARGS__)
420 #define INHERIT(type,a,...) \
421   mtev_conf_get_##type(sec[i], "ancestor-or-self::node()/@" #a, __VA_ARGS__)
422
423     if(!MYATTR(stringbuf, uuid, uuid_str, sizeof(uuid_str))) {
424       mtevL(noit_stderr, "check %d has no uuid\n", i+1);
425       continue;
426     }
427
428     MYATTR(int64, seq, &config_seq);
429
430     if(uuid_parse(uuid_str, uuid)) {
431       mtevL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str);
432       continue;
433     }
434
435     if(!INHERIT(stringbuf, target, target, sizeof(target))) {
436       mtevL(noit_stderr, "check uuid: '%s' has no target\n", uuid_str);
437       busted = mtev_true;
438     }
439     if(!noit_check_validate_target(target)) {
440       mtevL(noit_stderr, "check uuid: '%s' has malformed target\n", uuid_str);
441       busted = mtev_true;
442     }
443     if(!INHERIT(stringbuf, module, module, sizeof(module))) {
444       mtevL(noit_stderr, "check uuid: '%s' has no module\n", uuid_str);
445       busted = mtev_true;
446     }
447
448     if(!INHERIT(stringbuf, filterset, filterset, sizeof(filterset)))
449       filterset[0] = '\0';
450    
451     if (!INHERIT(stringbuf, resolve_rtype, resolve_rtype, sizeof(resolve_rtype)))
452       strlcpy(resolve_rtype, PREFER_IPV4, sizeof(resolve_rtype));
453
454     if(!MYATTR(stringbuf, name, name, sizeof(name)))
455       strlcpy(name, module, sizeof(name));
456
457     if(!noit_check_validate_name(name)) {
458       mtevL(noit_stderr, "check uuid: '%s' has malformed name\n", uuid_str);
459       busted = mtev_true;
460     }
461
462     if(!INHERIT(int, period, &period) || period == 0)
463       no_period = 1;
464
465     if(!INHERIT(stringbuf, oncheck, oncheck, sizeof(oncheck)) || !oncheck[0])
466       no_oncheck = 1;
467
468     if(no_period && no_oncheck) {
469       mtevL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n",
470             uuid_str);
471       busted = mtev_true;
472     }
473     if(!(no_period || no_oncheck)) {
474       mtevL(noit_stderr, "check uuid: '%s' has oncheck and period.\n",
475             uuid_str);
476       busted = mtev_true;
477     }
478     if(!INHERIT(int, timeout, &timeout)) {
479       mtevL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str);
480       busted = mtev_true;
481     }
482     if(!no_period && timeout >= period) {
483       mtevL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str);
484       timeout = period/2;
485     }
486     options = mtev_conf_get_hash(sec[i], "config");
487     for(ridx=0; ridx<reg_module_id; ridx++) {
488       moptions[ridx] = mtev_conf_get_namespaced_hash(sec[i], "config",
489                                                      reg_module_names[ridx]);
490     }
491
492     INHERIT(boolean, disable, &disabled);
493     flags = 0;
494     if(busted) flags |= (NP_UNCONFIG|NP_DISABLED);
495     else if(disabled) flags |= NP_DISABLED;
496
497     flags |= noit_calc_rtype_flag(resolve_rtype);
498
499     pthread_mutex_lock(&polls_lock);
500     found = mtev_hash_retrieve(&polls, (char *)uuid, UUID_SIZE, &vcheck);
501     if(found) {
502       noit_check_t *check = (noit_check_t *)vcheck;
503       /* Possibly reset the seq */
504       if(config_seq < 0) check->config_seq = 0;
505
506       /* Otherwise note a non-increasing sequence */
507       if(check->config_seq > config_seq) backdated = mtev_true;
508     }
509     pthread_mutex_unlock(&polls_lock);
510     if(found)
511       noit_poller_deschedule(uuid);
512     if(backdated) {
513       mtevL(noit_error, "Check config seq backwards, ignored\n");
514     }
515     else {
516       noit_poller_schedule(target, module, name, filterset, options,
517                            moptions_used ? moptions : NULL,
518                            period, timeout, oncheck[0] ? oncheck : NULL,
519                            config_seq, flags, uuid, out_uuid);
520       mtevL(noit_debug, "loaded uuid: %s\n", uuid_str);
521     }
522     for(ridx=0; ridx<reg_module_id; ridx++) {
523       if(moptions[ridx]) {
524         mtev_hash_destroy(moptions[ridx], free, free);
525         free(moptions[ridx]);
526       }
527     }
528     mtev_hash_destroy(options, free, free);
529     free(options);
530   }
531   if(sec) free(sec);
532 }
533
534 int
535 noit_check_activate(noit_check_t *check) {
536   noit_module_t *mod;
537   if(NOIT_CHECK_LIVE(check)) return 0;
538   mod = noit_module_lookup(check->module);
539   if(mod && mod->initiate_check) {
540     if((check->flags & NP_DISABLED) == 0) {
541       mod->initiate_check(mod, check, 0, NULL);
542       return 1;
543     }
544     else
545       mtevL(noit_debug, "Skipping %s`%s, disabled.\n",
546             check->target, check->name);
547   }
548   else {
549     if(!mod) {
550       mtevL(noit_stderr, "Cannot find module '%s'\n", check->module);
551       check->flags |= NP_DISABLED;
552     }
553   }
554   return 0;
555 }
556
557 void
558 noit_poller_initiate() {
559   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
560   uuid_t key_id;
561   int klen;
562   void *vcheck;
563   /* This is only ever called in the beginning, no lock needed */
564   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
565                        &vcheck)) {
566     noit_check_activate((noit_check_t *)vcheck);
567     mtev_watchdog_child_heartbeat();
568   }
569 }
570
571 void
572 noit_poller_flush_epoch(int oldest_allowed) {
573   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
574   uuid_t key_id;
575   int klen, i;
576   void *vcheck;
577 #define TOFREE_PER_ITER 1024
578   noit_check_t *tofree[TOFREE_PER_ITER];
579
580   /* Cleanup any previous causal map */
581   while(1) {
582     i = 0;
583     pthread_mutex_lock(&polls_lock);
584     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
585                          &vcheck) && i < TOFREE_PER_ITER) {
586       noit_check_t *check = (noit_check_t *)vcheck;
587       if(check->generation < oldest_allowed) {
588         tofree[i++] = check;
589       }
590     }
591     pthread_mutex_unlock(&polls_lock);
592     if(i==0) break;
593     while(i>0) noit_poller_deschedule(tofree[--i]->checkid);
594   }
595 #undef TOFREE_PER_ITER
596 }
597
598 void
599 noit_poller_make_causal_map() {
600   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
601   uuid_t key_id;
602   int klen;
603   void *vcheck;
604
605   if(!system_needs_causality) return;
606
607   /* set it to false, we'll set it to true during the scan if we
608    * find anything causal.  */
609   system_needs_causality = mtev_false;
610
611   /* Cleanup any previous causal map */
612   pthread_mutex_lock(&polls_lock);
613   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
614                        &vcheck)) {
615     noit_check_t *check = (noit_check_t *)vcheck;
616     dep_list_t *dep;
617     while((dep = check->causal_checks) != NULL) {
618       check->causal_checks = dep->next;
619       free(dep);
620     }
621   }
622
623   memset(&iter, 0, sizeof(iter));
624   /* Walk all checks and add check dependencies to their parents */
625   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
626                        &vcheck)) {
627     noit_check_t *check = (noit_check_t *)vcheck, *parent;
628     if(check->oncheck) {
629       /* This service is causally triggered by another service */
630       uuid_t id;
631       char fullcheck[1024];
632       char *name = check->oncheck;
633       char *target = NULL;
634
635       system_needs_causality = mtev_true;
636       mtevL(noit_debug, "Searching for upstream trigger on %s\n", name);
637       parent = NULL;
638       if(uuid_parse(check->oncheck, id) == 0) {
639         target = "";
640         parent = noit_poller_lookup__nolock(id);
641       }
642       else if((target = strchr(check->oncheck, '`')) != NULL) {
643         strlcpy(fullcheck, check->oncheck, target + 1 - check->oncheck);
644         name = target + 1;
645         target = fullcheck;
646         parent = noit_poller_lookup_by_name__nolock(target, name);
647       }
648       else {
649         target = check->target;
650         parent = noit_poller_lookup_by_name__nolock(target, name);
651       }
652
653       if(!parent) {
654         check->flags |= NP_DISABLED;
655         mtevL(noit_stderr, "Disabling check %s`%s, can't find oncheck %s`%s\n",
656               check->target, check->name, target, name);
657       }
658       else {
659         dep_list_t *dep;
660         dep = malloc(sizeof(*dep));
661         dep->check = check;
662         dep->next = parent->causal_checks;
663         parent->causal_checks = dep;
664         mtevL(noit_debug, "Causal map %s`%s --> %s`%s\n",
665               parent->target, parent->name, check->target, check->name);
666       }
667     }
668   }
669   pthread_mutex_unlock(&polls_lock);
670   /* We found some causal checks, so we might need to activate stuff */
671   if(system_needs_causality) noit_poller_initiate();
672 }
673 void
674 noit_poller_reload(const char *xpath)
675 {
676   noit_poller_process_checks(xpath ? xpath : "/noit/checks//check");
677   if(!xpath) {
678     /* Full reload, we need to wipe old checks */
679     noit_poller_flush_epoch(__config_load_generation);
680   }
681   noit_poller_make_causal_map();
682 }
683 void
684 noit_check_dns_ignore_tld(const char* extension, const char* ignore) {
685   mtev_hash_replace(&dns_ignore_list, strdup(extension), strlen(extension), strdup(ignore), NULL, NULL);
686 }
687 static void
688 noit_check_dns_ignore_list_init() {
689   mtev_conf_section_t* dns;
690   int cnt;
691
692   dns = mtev_conf_get_sections(NULL, "/noit/dns/extension", &cnt);
693   if(dns) {
694     int i = 0;
695     for (i = 0; i < cnt; i++) {
696       char* extension;
697       char* ignore;
698       if(!mtev_conf_get_string(dns[i], "self::node()/@value", &extension)) {
699         continue;
700       }
701       if(!mtev_conf_get_string(dns[i], "self::node()/@ignore", &ignore)) {
702         continue;
703       }
704       noit_check_dns_ignore_tld(extension, ignore);
705     }
706   }
707 }
708 void
709 noit_poller_init() {
710   srand48((getpid() << 16) ^ time(NULL));
711   noit_check_resolver_init();
712   noit_check_tools_init();
713   mtev_skiplist_init(&polls_by_name);
714   mtev_skiplist_set_compare(&polls_by_name, __check_name_compare,
715                             __check_name_compare);
716   mtev_skiplist_add_index(&polls_by_name, __check_target_ip_compare,
717                             __check_target_ip_compare);
718   mtev_skiplist_add_index(&polls_by_name, __check_target_compare,
719                             __check_target_compare);
720   mtev_skiplist_init(&watchlist);
721   mtev_skiplist_set_compare(&watchlist, __watchlist_compare,
722                             __watchlist_compare);
723   register_console_check_commands();
724   eventer_name_callback("check_recycle_bin_processor",
725                         check_recycle_bin_processor);
726   eventer_add_in_s_us(check_recycle_bin_processor, NULL, RECYCLE_INTERVAL, 0);
727   mtev_conf_get_int(NULL, "noit/@text_size_limit", &text_size_limit);
728   if (text_size_limit <= 0) {
729     text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
730   }
731   noit_check_dns_ignore_list_init();
732   noit_poller_reload(NULL);
733 }
734
735 int
736 noit_poller_check_count() {
737   return polls_by_name.size;
738 }
739
740 int
741 noit_poller_transient_check_count() {
742   return watchlist.size;
743 }
744
745 noit_check_t *
746 noit_check_clone(uuid_t in) {
747   int i;
748   noit_check_t *checker, *new_check;
749   void *vcheck;
750   if(mtev_hash_retrieve(&polls,
751                         (char *)in, UUID_SIZE,
752                         &vcheck) == 0) {
753     return NULL;
754   }
755   checker = (noit_check_t *)vcheck;
756   if(checker->oncheck) {
757     return NULL;
758   }
759   new_check = calloc(1, sizeof(*new_check));
760   memcpy(new_check, checker, sizeof(*new_check));
761   new_check->target = strdup(new_check->target);
762   new_check->module = strdup(new_check->module);
763   new_check->name = strdup(new_check->name);
764   new_check->filterset = strdup(new_check->filterset);
765   new_check->flags = 0;
766   new_check->fire_event = NULL;
767   memset(&new_check->last_fire_time, 0, sizeof(new_check->last_fire_time));
768   memset(&new_check->stats, 0, sizeof(new_check->stats));
769   new_check->closure = NULL;
770   new_check->config = calloc(1, sizeof(*new_check->config));
771   mtev_hash_merge_as_dict(new_check->config, checker->config);
772   new_check->module_configs = NULL;
773   new_check->module_metadata = NULL;
774
775   for(i=0; i<reg_module_id; i++) {
776     void *src_metadata;
777     mtev_hash_table *src_mconfig;
778     src_mconfig = noit_check_get_module_config(checker, i);
779     if(src_mconfig) {
780       mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
781       mtev_hash_merge_as_dict(t, src_mconfig);
782       noit_check_set_module_config(new_check, i, t);
783     }
784     if(checker->flags & NP_PASSIVE_COLLECTION)
785       if(NULL != (src_metadata = noit_check_get_module_metadata(new_check, i)))
786         noit_check_set_module_metadata(new_check, i, src_metadata, NULL);
787   }
788   return new_check;
789 }
790
791 noit_check_t *
792 noit_check_watch(uuid_t in, int period) {
793   /* First look for a copy that is being watched */
794   int minimum_pi = 1000, granularity_pi = 500;
795   mtev_conf_section_t check_node;
796   char uuid_str[UUID_STR_LEN + 1];
797   char xpath[1024];
798   noit_check_t n, *f;
799
800   uuid_unparse_lower(in, uuid_str);
801
802   mtevL(noit_debug, "noit_check_watch(%s,%d)\n", uuid_str, period);
803   if(period == 0) {
804     return noit_poller_lookup(in);
805   }
806
807   /* Find the check */
808   snprintf(xpath, sizeof(xpath), "//checks//check[@uuid=\"%s\"]", uuid_str);
809   check_node = mtev_conf_get_section(NULL, xpath);
810   mtev_conf_get_int(NULL, "//checks/@transient_min_period", &minimum_pi);
811   mtev_conf_get_int(NULL, "//checks/@transient_period_granularity", &granularity_pi);
812   if(check_node) {
813     mtev_conf_get_int(check_node,
814                       "ancestor-or-self::node()/@transient_min_period",
815                       &minimum_pi);
816     mtev_conf_get_int(check_node,
817                       "ancestor-or-self::node()/@transient_period_granularity",
818                       &granularity_pi);
819   }
820
821   /* apply the bounds */
822   period /= granularity_pi;
823   period *= granularity_pi;
824   period = MAX(period, minimum_pi);
825
826   uuid_copy(n.checkid, in);
827   n.period = period;
828
829   f = mtev_skiplist_find(&watchlist, &n, NULL);
830   if(f) return f;
831   f = noit_check_clone(in);
832   if(!f) return NULL;
833   f->period = period;
834   f->timeout = period - 10;
835   f->flags |= NP_TRANSIENT;
836   mtevL(noit_debug, "Watching %s@%d\n", uuid_str, period);
837   mtev_skiplist_insert(&watchlist, f);
838   return f;
839 }
840
841 noit_check_t *
842 noit_check_get_watch(uuid_t in, int period) {
843   noit_check_t n, *f;
844
845   uuid_copy(n.checkid, in);
846   n.period = period;
847
848   f = mtev_skiplist_find(&watchlist, &n, NULL);
849   return f;
850 }
851
852 void
853 noit_check_transient_add_feed(noit_check_t *check, const char *feed) {
854   char *feedcopy;
855   if(!check->feeds) {
856     check->feeds = calloc(1, sizeof(*check->feeds));
857     mtev_skiplist_init(check->feeds);
858     mtev_skiplist_set_compare(check->feeds,
859                               (mtev_skiplist_comparator_t)strcmp,
860                               (mtev_skiplist_comparator_t)strcmp);
861   }
862   feedcopy = strdup(feed);
863   /* No error on failure -- it's already there */
864   if(mtev_skiplist_insert(check->feeds, feedcopy) == NULL) free(feedcopy);
865   mtevL(noit_debug, "check %s`%s @ %dms has %d feed(s): %s.\n",
866         check->target, check->name, check->period, check->feeds->size, feed);
867 }
868 void
869 noit_check_transient_remove_feed(noit_check_t *check, const char *feed) {
870   if(!check->feeds) return;
871   if(feed) {
872     mtevL(noit_debug, "check %s`%s @ %dms removing 1 of %d feeds: %s.\n",
873           check->target, check->name, check->period, check->feeds->size, feed);
874     mtev_skiplist_remove(check->feeds, feed, free);
875   }
876   if(check->feeds->size == 0) {
877     char uuid_str[UUID_STR_LEN + 1];
878     uuid_unparse_lower(check->checkid, uuid_str);
879     mtevL(noit_debug, "Unwatching %s@%d\n", uuid_str, check->period);
880     mtev_skiplist_remove(&watchlist, check, NULL);
881     mtev_skiplist_destroy(check->feeds, free);
882     free(check->feeds);
883     check->feeds = NULL;
884     if(check->flags & NP_TRANSIENT) {
885       mtevL(noit_debug, "check %s`%s @ %dms has no more listeners.\n",
886             check->target, check->name, check->period);
887       check->flags |= NP_KILLED;
888     }
889     noit_poller_free_check(check);
890   }
891 }
892
893 mtev_boolean
894 noit_check_is_valid_target(const char *target) {
895   int8_t family;
896   int rv;
897   union {
898     struct in_addr addr4;
899     struct in6_addr addr6;
900   } a;
901
902   family = AF_INET;
903   rv = inet_pton(family, target, &a);
904   if(rv != 1) {
905     family = AF_INET6;
906     rv = inet_pton(family, target, &a);
907     if(rv != 1) {
908       return mtev_false;
909     }
910   }
911   return mtev_true;
912 }
913 int
914 noit_check_set_ip(noit_check_t *new_check,
915                   const char *ip_str, const char *newname) {
916   int8_t family;
917   int rv, failed = 0;
918   char old_target_ip[INET6_ADDRSTRLEN];
919   union {
920     struct in_addr addr4;
921     struct in6_addr addr6;
922   } a;
923
924   memset(old_target_ip, 0, INET6_ADDRSTRLEN);
925   strlcpy(old_target_ip, new_check->target_ip, sizeof(old_target_ip));
926
927   family = NOIT_CHECK_PREFER_V6(new_check) ? AF_INET6 : AF_INET;
928   rv = inet_pton(family, ip_str, &a);
929   if(rv != 1) {
930     if (!NOIT_CHECK_SINGLE_RESOLVE(new_check)) {
931       family = family == AF_INET ? AF_INET6 : AF_INET;
932       rv = inet_pton(family, ip_str, &a);
933       if(rv != 1) {
934         family = AF_INET;
935         memset(&a, 0, sizeof(a));
936         failed = -1;
937       }
938     } else {
939       failed = -1;
940     }
941   }
942
943   new_check->target_family = family;
944   memcpy(&new_check->target_addr, &a, sizeof(a));
945   new_check->target_ip[0] = '\0';
946   if(failed == 0)
947     if(inet_ntop(new_check->target_family,
948                  &new_check->target_addr,
949                  new_check->target_ip,
950                  sizeof(new_check->target_ip)) == NULL) {
951       mtevL(noit_error, "inet_ntop failed [%s] -> %d\n", ip_str, errno);
952     }
953   /*
954    * new_check->name could be null if this check is being set for the
955    * first time.  add_to_list will set it.
956    */
957   if (new_check->name == NULL ||
958       strcmp(old_target_ip, new_check->target_ip) != 0) {
959     noit_check_add_to_list(new_check, newname);
960   }
961
962   if(new_check->name == NULL && newname != NULL) {
963     assert(new_check->flags & NP_TRANSIENT);
964     new_check->name = strdup(newname);
965   }
966
967   return failed;
968 }
969 int
970 noit_check_resolve(noit_check_t *check) {
971   uint8_t family_pref = NOIT_CHECK_PREFER_V6(check) ? AF_INET6 : AF_INET;
972   char ipaddr[INET6_ADDRSTRLEN];
973   if(!NOIT_CHECK_SHOULD_RESOLVE(check)) return 1; /* success, not required */
974   noit_check_resolver_remind(check->target);
975   if(noit_check_resolver_fetch(check->target, ipaddr, sizeof(ipaddr),
976                                family_pref) >= 0) {
977     check->flags |= NP_RESOLVED;
978     noit_check_set_ip(check, ipaddr, NULL);
979     return 0;
980   }
981   check->flags &= ~NP_RESOLVED;
982   return -1;
983 }
984 int
985 noit_check_update(noit_check_t *new_check,
986                   const char *target,
987                   const char *name,
988                   const char *filterset,
989                   mtev_hash_table *config,
990                   mtev_hash_table **mconfigs,
991                   u_int32_t period,
992                   u_int32_t timeout,
993                   const char *oncheck,
994           int64_t seq,
995                   int flags) {
996   int mask = NP_DISABLED | NP_UNCONFIG;
997
998   assert(name);
999   if(seq < 0) new_check->config_seq = seq = 0;
1000   if(new_check->config_seq > seq) {
1001     char uuid_str[37];
1002     uuid_unparse_lower(new_check->checkid, uuid_str);
1003     mtevL(mtev_error, "noit_check_update[%s] skipped: seq backwards\n", uuid_str);
1004     return -1;
1005   }
1006
1007   if(NOIT_CHECK_RUNNING(new_check)) {
1008     char module[256];
1009     uuid_t id, dummy;
1010     uuid_copy(id, new_check->checkid);
1011     strlcpy(module, new_check->module, sizeof(module));
1012     noit_poller_deschedule(id);
1013     return noit_poller_schedule(target, module, name, filterset,
1014                                 config, mconfigs, period, timeout, oncheck,
1015                                 seq, flags, id, dummy);
1016   }
1017
1018   new_check->generation = __config_load_generation;
1019   if(new_check->target) free(new_check->target);
1020   new_check->target = strdup(target);
1021
1022   // apply resolution flags to check.
1023   if (flags & NP_PREFER_IPV6)
1024     new_check->flags |= NP_PREFER_IPV6;
1025   else
1026     new_check->flags &= ~NP_PREFER_IPV6;
1027   if (flags & NP_SINGLE_RESOLVE)
1028     new_check->flags |= NP_SINGLE_RESOLVE;
1029   else
1030     new_check->flags &= ~NP_SINGLE_RESOLVE;
1031   if (flags & NP_RESOLVE)
1032     new_check->flags |= NP_RESOLVE;
1033   else
1034     new_check->flags &= ~NP_RESOLVE;
1035
1036   /* This sets both the name and the target_addr */
1037   if(noit_check_set_ip(new_check, target, name)) {
1038     mtev_boolean should_resolve;
1039     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1040     const char *key, *value;
1041     int klen;
1042     char* extension = strrchr(target, '.');
1043     new_check->flags |= NP_RESOLVE;
1044     new_check->flags &= ~NP_RESOLVED;
1045     /* If we match any of the extensions we're supposed to ignore,
1046      * don't resolve */
1047     if (extension && (strlen(extension) > 1)) {
1048       while(mtev_hash_next(&dns_ignore_list, &iter, &key, &klen, (void**)&value)) {
1049         if ((!strcmp("true", value)) && (!strcmp(extension+1, key))) {
1050             new_check->flags &= ~NP_RESOLVE;
1051             break;
1052         }
1053       }
1054     }
1055     if(noit_check_should_resolve_targets(&should_resolve) && !should_resolve)
1056       flags |= NP_DISABLED | NP_UNCONFIG;
1057     noit_check_resolve(new_check);
1058   }
1059
1060   if(new_check->filterset) free(new_check->filterset);
1061   new_check->filterset = filterset ? strdup(filterset): NULL;
1062
1063   if(config != NULL) {
1064     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1065     const char *k;
1066     int klen;
1067     void *data;
1068     if(new_check->config) mtev_hash_delete_all(new_check->config, free, free);
1069     else new_check->config = calloc(1, sizeof(*new_check->config));
1070     while(mtev_hash_next(config, &iter, &k, &klen, &data)) {
1071       mtev_hash_store(new_check->config, strdup(k), klen, strdup((char *)data));
1072     }
1073   }
1074   if(mconfigs != NULL) {
1075     int i;
1076     for(i=0; i<reg_module_id; i++) {
1077       mtev_hash_table *t;
1078       if(NULL != (t = noit_check_get_module_config(new_check, i))) {
1079         noit_check_set_module_config(new_check, i, NULL);
1080         mtev_hash_destroy(t, free, free);
1081         free(t);
1082       }
1083       if(mconfigs[i]) {
1084         mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
1085         mtev_hash_merge_as_dict(t, mconfigs[i]);
1086         noit_check_set_module_config(new_check, i, t);
1087       }
1088     }
1089   }
1090   if(new_check->oncheck) free(new_check->oncheck);
1091   new_check->oncheck = oncheck ? strdup(oncheck) : NULL;
1092   if(new_check->oncheck) system_needs_causality = mtev_true;
1093   new_check->period = period;
1094   new_check->timeout = timeout;
1095   new_check->config_seq = seq;
1096
1097   /* Unset what could be set.. then set what should be set */
1098   new_check->flags = (new_check->flags & ~mask) | flags;
1099
1100   check_config_fixup_hook_invoke(new_check);
1101
1102   if((new_check->flags & NP_TRANSIENT) == 0)
1103     noit_check_activate(new_check);
1104
1105   noit_check_add_to_list(new_check, NULL);
1106   noit_check_log_check(new_check);
1107   return 0;
1108 }
1109 int
1110 noit_poller_schedule(const char *target,
1111                      const char *module,
1112                      const char *name,
1113                      const char *filterset,
1114                      mtev_hash_table *config,
1115                      mtev_hash_table **mconfigs,
1116                      u_int32_t period,
1117                      u_int32_t timeout,
1118                      const char *oncheck,
1119                      int64_t seq,
1120                      int flags,
1121                      uuid_t in,
1122                      uuid_t out) {
1123   noit_check_t *new_check;
1124   new_check = calloc(1, sizeof(*new_check));
1125   if(!new_check) return -1;
1126
1127   /* The module and the UUID can never be changed */
1128   new_check->module = strdup(module);
1129   if(uuid_is_null(in))
1130     uuid_generate(new_check->checkid);
1131   else
1132     uuid_copy(new_check->checkid, in);
1133
1134   noit_check_update(new_check, target, name, filterset, config, mconfigs,
1135                     period, timeout, oncheck, seq, flags);
1136   assert(mtev_hash_store(&polls,
1137                          (char *)new_check->checkid, UUID_SIZE,
1138                          new_check));
1139   uuid_copy(out, new_check->checkid);
1140
1141   return 0;
1142 }
1143
1144 /* A quick little list of recycleable checks.  This list never really
1145  * grows large, so no sense in thinking too hard about the algorithmic
1146  * complexity.
1147  */
1148 struct _checker_rcb {
1149   noit_check_t *checker;
1150   struct _checker_rcb *next;
1151 };
1152 static struct _checker_rcb *checker_rcb = NULL;
1153 static void recycle_check(noit_check_t *checker) {
1154   struct _checker_rcb *n = malloc(sizeof(*n));
1155   n->checker = checker;
1156   n->next = checker_rcb;
1157   checker_rcb = n;
1158 }
1159 void
1160 free_metric(metric_t *m) {
1161   if(!m) return;
1162   if(m->metric_name) mtev_memory_safe_free(m->metric_name);
1163   if(m->metric_value.i) mtev_memory_safe_free(m->metric_value.i);
1164   mtev_memory_safe_free(m);
1165 }
1166 void
1167 noit_poller_free_check(noit_check_t *checker) {
1168   noit_module_t *mod;
1169
1170   if(checker->flags & NP_RUNNING) {
1171     recycle_check(checker);
1172     return;
1173   }
1174
1175   mod = noit_module_lookup(checker->module);
1176   if(mod && mod->cleanup) mod->cleanup(mod, checker);
1177   if(checker->fire_event) {
1178      eventer_remove(checker->fire_event);
1179      free(checker->fire_event->closure);
1180      eventer_free(checker->fire_event);
1181      checker->fire_event = NULL;
1182   }
1183   if(checker->closure) free(checker->closure);
1184   if(checker->target) free(checker->target);
1185   if(checker->module) free(checker->module);
1186   if(checker->name) free(checker->name);
1187   if(checker->config) {
1188     mtev_hash_destroy(checker->config, free, free);
1189     free(checker->config);
1190     checker->config = NULL;
1191   }
1192   if(checker->module_metadata) {
1193     int i;
1194     for(i=0; i<reg_module_id; i++) {
1195       struct vp_w_free *tuple;
1196       tuple = checker->module_metadata[i];
1197       if(tuple) {
1198         if(tuple->freefunc) tuple->freefunc(tuple->ptr);
1199         free(tuple);
1200       }
1201     }
1202     free(checker->module_metadata);
1203   }
1204   if(checker->module_configs) {
1205     int i;
1206     for(i=0; i<reg_module_id; i++) {
1207       if(checker->module_configs[i]) {
1208         mtev_hash_destroy(checker->module_configs[i], free, free);
1209         free(checker->module_configs[i]);
1210       }
1211     }
1212     free(checker->module_configs);
1213   }
1214   if(checker->stats.inprogress.status) free(checker->stats.inprogress.status);
1215   mtev_hash_destroy(&checker->stats.inprogress.metrics, NULL,
1216                     (void (*)(void *))free_metric);
1217   if(checker->stats.current.status) free(checker->stats.current.status);
1218   mtev_hash_destroy(&checker->stats.current.metrics, NULL,
1219                     (void (*)(void *))free_metric);
1220   if(checker->stats.previous.status) free(checker->stats.previous.status);
1221   mtev_hash_destroy(&checker->stats.previous.metrics, NULL,
1222                     (void (*)(void *))free_metric);
1223   free(checker);
1224 }
1225 static int
1226 check_recycle_bin_processor(eventer_t e, int mask, void *closure,
1227                             struct timeval *now) {
1228   static struct timeval one_minute = { RECYCLE_INTERVAL, 0L };
1229   struct _checker_rcb *prev = NULL, *curr = checker_rcb;
1230   mtevL(noit_debug, "Scanning check recycle bin\n");
1231   while(curr) {
1232     if(!(curr->checker->flags & NP_RUNNING)) {
1233       mtevL(noit_debug, "Check is ready to free.\n");
1234       noit_poller_free_check(curr->checker);
1235       if(prev) prev->next = curr->next;
1236       else checker_rcb = curr->next;
1237       free(curr);
1238       curr = prev ? prev->next : checker_rcb;
1239     }
1240     else {
1241       prev = curr;
1242       curr = curr->next;
1243     }
1244   }
1245   add_timeval(*now, one_minute, &e->whence);
1246   return EVENTER_TIMER;
1247 }
1248
1249 int
1250 noit_poller_deschedule(uuid_t in) {
1251   void *vcheck;
1252   noit_check_t *checker;
1253   if(mtev_hash_retrieve(&polls,
1254                         (char *)in, UUID_SIZE,
1255                         &vcheck) == 0) {
1256     return -1;
1257   }
1258   checker = (noit_check_t *)vcheck;
1259   checker->flags |= (NP_DISABLED|NP_KILLED);
1260
1261   noit_check_log_delete(checker);
1262
1263   assert(mtev_skiplist_remove(&polls_by_name, checker, NULL));
1264   assert(mtev_hash_delete(&polls, (char *)in, UUID_SIZE, NULL, NULL));
1265
1266   noit_poller_free_check(checker);
1267   return 0;
1268 }
1269
1270 noit_check_t *
1271 noit_poller_lookup(uuid_t in) {
1272   noit_check_t *check;
1273   pthread_mutex_lock(&polls_lock);
1274   check = noit_poller_lookup__nolock(in);
1275   pthread_mutex_unlock(&polls_lock);
1276   return check;
1277 }
1278 noit_check_t *
1279 noit_poller_lookup_by_name(char *target, char *name) {
1280   noit_check_t *check;
1281   pthread_mutex_lock(&polls_lock);
1282   check = noit_poller_lookup_by_name__nolock(target,name);
1283   pthread_mutex_unlock(&polls_lock);
1284   return check;
1285 }
1286 int
1287 noit_poller_target_ip_do(const char *target_ip,
1288                          int (*f)(noit_check_t *, void *),
1289                          void *closure) {
1290   int i, count = 0, todo_count = 0;
1291   noit_check_t pivot;
1292   mtev_skiplist *tlist;
1293   mtev_skiplist_node *next;
1294   noit_check_t *todo_onstack[8192];
1295   noit_check_t **todo = todo_onstack;
1296
1297   tlist = mtev_skiplist_find(polls_by_name.index,
1298                              __check_target_ip_compare, NULL);
1299
1300   pthread_mutex_lock(&polls_lock);
1301   /* First pass to count */
1302   memset(&pivot, 0, sizeof(pivot));
1303   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1304   pivot.name = "";
1305   pivot.target = "";
1306   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1307   while(next && next->data) {
1308     noit_check_t *check = next->data;
1309     if(strcmp(check->target_ip, target_ip)) break;
1310     todo_count++;
1311     mtev_skiplist_next(tlist, &next);
1312   }
1313
1314   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1315
1316   memset(&pivot, 0, sizeof(pivot));
1317   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1318   pivot.name = "";
1319   pivot.target = "";
1320   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1321   while(next && next->data) {
1322     noit_check_t *check = next->data;
1323     if(strcmp(check->target_ip, target_ip)) break;
1324     if(count < todo_count) todo[count++] = check;
1325     mtev_skiplist_next(tlist, &next);
1326   }
1327   pthread_mutex_unlock(&polls_lock);
1328
1329   todo_count = count;
1330   count = 0;
1331   for(i=0;i<todo_count;i++)
1332     count += f(todo[i],closure);
1333
1334   if(todo != todo_onstack) free(todo);
1335   return count;
1336 }
1337 int
1338 noit_poller_target_do(const char *target, int (*f)(noit_check_t *, void *),
1339                       void *closure) {
1340   int i, todo_count = 0, count = 0;
1341   noit_check_t pivot;
1342   mtev_skiplist *tlist;
1343   mtev_skiplist_node *next;
1344   noit_check_t *todo_onstack[8192];
1345   noit_check_t **todo = todo_onstack;
1346
1347   tlist = mtev_skiplist_find(polls_by_name.index,
1348                              __check_target_compare, NULL);
1349
1350   pthread_mutex_lock(&polls_lock);
1351   memset(&pivot, 0, sizeof(pivot));
1352   pivot.name = "";
1353   pivot.target = (char *)target;
1354   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1355   while(next && next->data) {
1356     noit_check_t *check = next->data;
1357     if(strcmp(check->target, target)) break;
1358     todo_count++;
1359     mtev_skiplist_next(tlist, &next);
1360   }
1361
1362   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1363
1364   memset(&pivot, 0, sizeof(pivot));
1365   pivot.name = "";
1366   pivot.target = (char *)target;
1367   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1368   while(next && next->data) {
1369     noit_check_t *check = next->data;
1370     if(strcmp(check->target, target)) break;
1371     if(count < todo_count) todo[count++] = check;
1372     mtev_skiplist_next(tlist, &next);
1373   }
1374   pthread_mutex_unlock(&polls_lock);
1375
1376   todo_count = count;
1377   count = 0;
1378   for(i=0;i<todo_count;i++)
1379     count += f(todo[i],closure);
1380
1381   if(todo != todo_onstack) free(todo);
1382   return count;
1383 }
1384
1385 int
1386 noit_poller_do(int (*f)(noit_check_t *, void *),
1387                void *closure) {
1388   mtev_skiplist_node *iter;
1389   int i, count = 0, max_count = 0;
1390   noit_check_t **todo;
1391
1392   if(polls_by_name.size == 0) return 0;
1393
1394   max_count = polls_by_name.size;
1395   todo = malloc(max_count * sizeof(*todo));
1396
1397   pthread_mutex_lock(&polls_lock);
1398   for(iter = mtev_skiplist_getlist(&polls_by_name); iter;
1399       mtev_skiplist_next(&polls_by_name, &iter)) {
1400     if(count < max_count) todo[count++] = (noit_check_t *)iter->data;
1401   }
1402   pthread_mutex_unlock(&polls_lock);
1403
1404   max_count = count;
1405   count = 0;
1406   for(i=0;i<max_count;i++)
1407     count += f(todo[i], closure);
1408   free(todo);
1409   return count;
1410 }
1411
1412 struct ip_module_collector_crutch {
1413   noit_check_t **array;
1414   const char *module;
1415   int idx;
1416   int allocd;
1417 };
1418 static int ip_module_collector(noit_check_t *check, void *cl) {
1419   struct ip_module_collector_crutch *c = cl;
1420   if(c->idx >= c->allocd) return 0;
1421   if(strcmp(check->module, c->module)) return 0;
1422   c->array[c->idx++] = check;
1423   return 1;
1424 }
1425 int
1426 noit_poller_lookup_by_ip_module(const char *ip, const char *mod,
1427                                 noit_check_t **checks, int nchecks) {
1428   struct ip_module_collector_crutch crutch;
1429   crutch.array = checks;
1430   crutch.allocd = nchecks;
1431   crutch.idx = 0;
1432   crutch.module = mod;
1433   return noit_poller_target_ip_do(ip, ip_module_collector, &crutch);
1434 }
1435 int
1436 noit_poller_lookup_by_module(const char *ip, const char *mod,
1437                              noit_check_t **checks, int nchecks) {
1438   struct ip_module_collector_crutch crutch;
1439   crutch.array = checks;
1440   crutch.allocd = nchecks;
1441   crutch.idx = 0;
1442   crutch.module = mod;
1443   return noit_poller_target_do(ip, ip_module_collector, &crutch);
1444 }
1445
1446
1447 int
1448 noit_check_xpath(char *xpath, int len,
1449                  const char *base, const char *arg) {
1450   uuid_t checkid;
1451   int base_trailing_slash;
1452   char argcopy[1024], *target, *module, *name;
1453
1454   base_trailing_slash = (base[strlen(base)-1] == '/');
1455   xpath[0] = '\0';
1456   argcopy[0] = '\0';
1457   if(arg) strlcpy(argcopy, arg, sizeof(argcopy));
1458
1459   if(uuid_parse(argcopy, checkid) == 0) {
1460     /* If they kill by uuid, we'll seek and destroy -- find it anywhere */
1461     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1462              base, base_trailing_slash ? "" : "/", argcopy);
1463   }
1464   else if((module = strchr(argcopy, '`')) != NULL) {
1465     noit_check_t *check;
1466     char uuid_str[37];
1467     target = argcopy;
1468     *module++ = '\0';
1469     if((name = strchr(module+1, '`')) == NULL)
1470       name = module;
1471     else
1472       name++;
1473     check = noit_poller_lookup_by_name(target, name);
1474     if(!check) {
1475       return -1;
1476     }
1477     uuid_unparse_lower(check->checkid, uuid_str);
1478     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1479              base, base_trailing_slash ? "" : "/", uuid_str);
1480   }
1481   return strlen(xpath);
1482 }
1483
1484 static int
1485 bad_check_initiate(noit_module_t *self, noit_check_t *check,
1486                    int once, noit_check_t *cause) {
1487   /* self is likely null here -- why it is bad, in fact */
1488   /* this is only suitable to call in one-offs */
1489   stats_t current;
1490   char buff[256];
1491   if(!once) return -1;
1492   if(!check) return -1;
1493   assert(!(check->flags & NP_RUNNING));
1494   check->flags |= NP_RUNNING;
1495   noit_check_stats_clear(check, &current);
1496   gettimeofday(&current.whence, NULL);
1497   current.duration = 0;
1498   current.available = NP_UNKNOWN;
1499   current.state = NP_UNKNOWN;
1500   snprintf(buff, sizeof(buff), "check[%s] implementation offline",
1501            check->module);
1502   current.status = buff;
1503   noit_check_set_stats(check, &current);
1504   check->flags &= ~NP_RUNNING;
1505   return 0;
1506 }
1507 void
1508 noit_check_stats_clear(noit_check_t *check, stats_t *s) {
1509   memset(s, 0, sizeof(*s));
1510   s->state = NP_UNKNOWN;
1511   s->available = NP_UNKNOWN;
1512 }
1513
1514 void
1515 __stats_add_metric(stats_t *newstate, metric_t *m) {
1516   mtev_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name),
1517                     m, NULL, (void (*)(void *))free_metric);
1518 }
1519
1520 static size_t
1521 noit_metric_sizes(metric_type_t type, const void *value) {
1522   switch(type) {
1523     case METRIC_INT32:
1524     case METRIC_UINT32:
1525       return sizeof(int32_t);
1526     case METRIC_INT64:
1527     case METRIC_UINT64:
1528       return sizeof(int64_t);
1529     case METRIC_DOUBLE:
1530       return sizeof(double);
1531     case METRIC_STRING: {
1532       int len = strlen((char*)value) + 1;
1533       return ((len >= text_size_limit) ? text_size_limit+1 : len);
1534     }
1535     case METRIC_GUESS:
1536       break;
1537   }
1538   assert(type != type);
1539   return 0;
1540 }
1541 static metric_type_t
1542 noit_metric_guess_type(const char *s, void **replacement) {
1543   char *copy, *cp, *trailer, *rpl;
1544   int negative = 0;
1545   metric_type_t type = METRIC_STRING;
1546
1547   if(!s) return METRIC_GUESS;
1548   copy = cp = strdup(s);
1549
1550   /* TRIM the string */
1551   while(*cp && isspace(*cp)) cp++; /* ltrim */
1552   s = cp; /* found a good starting point */
1553   while(*cp) cp++; /* advance to \0 */
1554   cp--; /* back up one */
1555   while(cp > s && isspace(*cp)) *cp-- = '\0'; /* rtrim */
1556
1557   /* Find the first space */
1558   cp = (char *)s;
1559   while(*cp && !isspace(*cp)) cp++;
1560   trailer = cp;
1561   cp--; /* backup one */
1562   if(cp > s && *cp == '%') *cp-- = '\0'; /* chop a last % is there is one */
1563
1564   while(*trailer && isspace(*trailer)) *trailer++ = '\0'; /* rtrim */
1565
1566   /* string was       '  -1.23e-01%  inodes used  ' */
1567   /* copy is (~ = \0) '  -1.23e-01~  inodes used~~' */
1568   /*                     ^           ^              */
1569   /*                     s           trailer        */
1570
1571   /* So, the trailer must not contain numbers */
1572   while(*trailer) { if(isdigit(*trailer)) goto notanumber; trailer++; }
1573
1574   /* And the 's' must be of the form:
1575    *  0) may start with a sign [-+]?
1576    *  1) [1-9][0-9]*
1577    *  2) [0]?.[0-9]+
1578    *  3) 0
1579    *  4) [1-9][0-9]*.[0-9]+
1580    *  5) all of the above ending with e[+-][0-9]+
1581    */
1582    rpl = (char *)s;
1583    /* CASE 0 */
1584    if(s[0] == '-' || s[0] == '+') {
1585      if(s[0] == '-') negative = 1;
1586      s++;
1587    }
1588
1589    if(s[0] == '.') goto decimal; /* CASE 2 */
1590    if(s[0] == '0') { /* CASE 2 & 3 */
1591      s++;
1592      if(!s[0]) goto scanint; /* CASE 3 */
1593      if(s[0] == '.') goto decimal; /* CASE 2 */
1594      goto notanumber;
1595    }
1596    if(s[0] >= '1' && s[0] <= '9') { /* CASE 1 & 4 */
1597      s++;
1598      while(isdigit(s[0])) s++; /* CASE 1 & 4 */
1599      if(!s[0]) goto scanint; /* CASE 1 */
1600      if(s[0] == '.') goto decimal; /* CASE 4 */
1601      goto notanumber;
1602    }
1603    /* Not case 1,2,3,4 */
1604    goto notanumber;
1605
1606   decimal:
1607    s++;
1608    if(!isdigit(s[0])) goto notanumber;
1609    s++;
1610    while(isdigit(s[0])) s++;
1611    if(!s[0]) goto scandouble;
1612    if(s[0] == 'e' || s[0] == 'E') goto exponent; /* CASE 5 */
1613    goto notanumber;
1614
1615   exponent:
1616    s++;
1617    if(s[0] != '-' && s[0] != '+') goto notanumber;
1618    s++;
1619    if(!isdigit(s[0])) goto notanumber;
1620    s++;
1621    while(isdigit(s[0])) s++;
1622    if(!s[0]) goto scandouble;
1623    goto notanumber;
1624
1625  scanint:
1626    if(negative) {
1627      int64_t *v;
1628      v = mtev_memory_safe_malloc(sizeof(*v));
1629      *v = strtoll(rpl, NULL, 10);
1630      *replacement = v;
1631      type = METRIC_INT64;
1632      goto alldone;
1633    }
1634    else {
1635      u_int64_t *v;
1636      v = mtev_memory_safe_malloc(sizeof(*v));
1637      *v = strtoull(rpl, NULL, 10);
1638      *replacement = v;
1639      type = METRIC_UINT64;
1640      goto alldone;
1641    }
1642  scandouble:
1643    {
1644      double *v;
1645      v = mtev_memory_safe_malloc(sizeof(*v));
1646      *v = strtod(rpl, NULL);
1647      *replacement = v;
1648      type = METRIC_DOUBLE;
1649      goto alldone;
1650    }
1651
1652  alldone:
1653  notanumber:
1654   free(copy);
1655   return type;
1656 }
1657
1658 static void
1659 cleanse_metric_name(char *m) {
1660   char *cp;
1661   for(cp = m; *cp; cp++)
1662     if(!isprint(*cp)) *cp=' ';
1663   for(cp--; *cp == ' ' && cp > m; cp--) /* always leave first char */
1664     *cp = '\0';
1665 }
1666
1667 int
1668 noit_stats_populate_metric(metric_t *m, const char *name, metric_type_t type,
1669                            const void *value) {
1670   void *replacement = NULL;
1671
1672   m->metric_name = mtev_memory_safe_strdup(name);
1673   cleanse_metric_name(m->metric_name);
1674
1675   if(type == METRIC_GUESS)
1676     type = noit_metric_guess_type((char *)value, &replacement);
1677   if(type == METRIC_GUESS) return -1;
1678
1679   m->metric_type = type;
1680
1681   if(replacement)
1682     m->metric_value.vp = replacement;
1683   else if(value) {
1684     size_t len;
1685     len = noit_metric_sizes(type, value);
1686     m->metric_value.vp = mtev_memory_safe_malloc(len);
1687     memcpy(m->metric_value.vp, value, len);
1688     if (type == METRIC_STRING) {
1689       m->metric_value.s[len-1] = 0;
1690     }
1691   }
1692   else m->metric_value.vp = NULL;
1693   return 0;
1694 }
1695
1696 metric_t *
1697 noit_stats_get_metric(noit_check_t *check,
1698                       stats_t *newstate, const char *name) {
1699   void *v;
1700   if(mtev_hash_retrieve(&newstate->metrics, name, strlen(name), &v))
1701     return (metric_t *)v;
1702   return NULL;
1703 }
1704
1705 void
1706 noit_stats_set_metric(noit_check_t *check,
1707                       stats_t *newstate, const char *name, metric_type_t type,
1708                       const void *value) {
1709   metric_t *m = mtev_memory_safe_calloc(1, sizeof(*m));
1710   if(noit_stats_populate_metric(m, name, type, value)) {
1711     free_metric(m);
1712     return;
1713   }
1714   noit_check_metric_count_add(1);
1715   check_stats_set_metric_hook_invoke(check, newstate, m);
1716   __stats_add_metric(newstate, m);
1717 }
1718 void
1719 noit_stats_set_metric_coerce(noit_check_t *check,
1720                              stats_t *stat, const char *name, metric_type_t t,
1721                              const char *v) {
1722   char *endptr;
1723   if(v == NULL) {
1724    bogus:
1725     check_stats_set_metric_coerce_hook_invoke(check, stat, name, t, v, mtev_false);
1726     noit_stats_set_metric(check, stat, name, t, NULL);
1727     return;
1728   }
1729   switch(t) {
1730     case METRIC_STRING:
1731       noit_stats_set_metric(check, stat, name, t, v);
1732       break;
1733     case METRIC_INT32:
1734     {
1735       int32_t val;
1736       val = strtol(v, &endptr, 10);
1737       if(endptr == v) goto bogus;
1738       noit_stats_set_metric(check, stat, name, t, &val);
1739       break;
1740     }
1741     case METRIC_UINT32:
1742     {
1743       u_int32_t val;
1744       val = strtoul(v, &endptr, 10);
1745       if(endptr == v) goto bogus;
1746       noit_stats_set_metric(check, stat, name, t, &val);
1747       break;
1748     }
1749     case METRIC_INT64:
1750     {
1751       int64_t val;
1752       val = strtoll(v, &endptr, 10);
1753       if(endptr == v) goto bogus;
1754       noit_stats_set_metric(check, stat, name, t, &val);
1755       break;
1756     }
1757     case METRIC_UINT64:
1758     {
1759       u_int64_t val;
1760       val = strtoull(v, &endptr, 10);
1761       if(endptr == v) goto bogus;
1762       noit_stats_set_metric(check, stat, name, t, &val);
1763       break;
1764     }
1765     case METRIC_DOUBLE:
1766     {
1767       double val;
1768       val = strtod(v, &endptr);
1769       if(endptr == v) goto bogus;
1770       noit_stats_set_metric(check, stat, name, t, &val);
1771       break;
1772     }
1773     case METRIC_GUESS:
1774       noit_stats_set_metric(check, stat, name, t, v);
1775       break;
1776   }
1777   check_stats_set_metric_coerce_hook_invoke(check, stat, name, t, v, mtev_true);
1778 }
1779 void
1780 noit_stats_log_immediate_metric(noit_check_t *check,
1781                                 const char *name, metric_type_t type,
1782                                 void *value) {
1783   struct timeval now;
1784   metric_t *m = mtev_memory_safe_malloc(sizeof(*m));
1785   if(noit_stats_populate_metric(m, name, type, value)) {
1786     free_metric(m);
1787     return;
1788   }
1789   gettimeofday(&now, NULL);
1790   noit_check_log_metric(check, &now, m);
1791   free_metric(m);
1792 }
1793
1794 void
1795 noit_check_passive_set_stats(noit_check_t *check, stats_t *newstate) {
1796   int i, nwatches = 0;
1797   mtev_skiplist_node *next;
1798   noit_check_t n;
1799   noit_check_t *watches[8192];
1800
1801   uuid_copy(n.checkid, check->checkid);
1802   n.period = 0;
1803
1804   noit_check_set_stats(check,newstate);
1805
1806   pthread_mutex_lock(&polls_lock);
1807   mtev_skiplist_find_neighbors(&watchlist, &n, NULL, NULL, &next);
1808   while(next && next->data && nwatches < 8192) {
1809     noit_check_t *wcheck = next->data;
1810     if(uuid_compare(n.checkid, wcheck->checkid)) break;
1811     watches[nwatches++] = wcheck;
1812     mtev_skiplist_next(&watchlist, &next);
1813   }
1814   pthread_mutex_unlock(&polls_lock);
1815
1816   for(i=0;i<nwatches;i++) {
1817     stats_t backup;
1818     noit_check_t *wcheck = watches[i];
1819     /* Swap the real check's stats into place */
1820     memcpy(&backup, &wcheck->stats.current, sizeof(stats_t));
1821     memcpy(&wcheck->stats.current, &check->stats.current, sizeof(stats_t));
1822
1823     if(check_passive_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
1824       /* Write out our status */
1825       noit_check_log_status(wcheck);
1826       /* Write out all metrics */
1827       noit_check_log_metrics(wcheck);
1828     }
1829     /* Swap them back out */
1830     memcpy(&wcheck->stats.current, &backup, sizeof(stats_t));
1831   }
1832 }
1833 void
1834 noit_check_set_stats(noit_check_t *check, stats_t *newstate) {
1835   int report_change = 0;
1836   char *cp;
1837   dep_list_t *dep;
1838   if(check->stats.previous.status)
1839     free(check->stats.previous.status);
1840   mtev_hash_destroy(&check->stats.previous.metrics, NULL,
1841                     (void (*)(void *))free_metric);
1842   memcpy(&check->stats.previous, &check->stats.current, sizeof(stats_t));
1843   if(newstate)
1844     memcpy(&check->stats.current, newstate, sizeof(stats_t));
1845   if(check->stats.current.status)
1846     check->stats.current.status = strdup(check->stats.current.status);
1847   for(cp = check->stats.current.status; cp && *cp; cp++)
1848     if(*cp == '\r' || *cp == '\n') *cp = ' ';
1849
1850   /* check for state changes */
1851   if(check->stats.current.available != NP_UNKNOWN &&
1852      check->stats.previous.available != NP_UNKNOWN &&
1853      check->stats.current.available != check->stats.previous.available)
1854     report_change = 1;
1855   if(check->stats.current.state != NP_UNKNOWN &&
1856      check->stats.previous.state != NP_UNKNOWN &&
1857      check->stats.current.state != check->stats.previous.state)
1858     report_change = 1;
1859
1860   mtevL(noit_debug, "%s`%s <- [%s]\n", check->target, check->name,
1861         check->stats.current.status);
1862   if(report_change) {
1863     mtevL(noit_debug, "%s`%s -> [%s:%s]\n",
1864           check->target, check->name,
1865           noit_check_available_string(check->stats.current.available),
1866           noit_check_state_string(check->stats.current.state));
1867   }
1868
1869   if(NOIT_CHECK_STATUS_ENABLED()) {
1870     char id[UUID_STR_LEN+1];
1871     uuid_unparse_lower(check->checkid, id);
1872     NOIT_CHECK_STATUS(id, check->module, check->name, check->target,
1873                       check->stats.current.available,
1874                       check->stats.current.state,
1875                       check->stats.current.status);
1876   }
1877
1878   if(check_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
1879     /* Write out the bundled information */
1880     noit_check_log_bundle(check);
1881   }
1882   /* count the check as complete */
1883   check_completion_count++;
1884
1885   for(dep = check->causal_checks; dep; dep = dep->next) {
1886     noit_module_t *mod;
1887     mod = noit_module_lookup(dep->check->module);
1888     if(!mod) {
1889       bad_check_initiate(mod, dep->check, 1, check);
1890     }
1891     else {
1892       mtevL(noit_debug, "Firing %s`%s in response to %s`%s\n",
1893             dep->check->target, dep->check->name,
1894             check->target, check->name);
1895       if((dep->check->flags & NP_DISABLED) == 0)
1896         if(mod->initiate_check)
1897           mod->initiate_check(mod, dep->check, 1, check);
1898     }
1899   }
1900 }
1901
1902 static int
1903 noit_console_show_watchlist(mtev_console_closure_t ncct,
1904                             int argc, char **argv,
1905                             mtev_console_state_t *dstate,
1906                             void *closure) {
1907   mtev_skiplist_node *iter, *fiter;
1908   int nwatches = 0, i;
1909   noit_check_t *watches[8192];
1910
1911   nc_printf(ncct, "%d active watches.\n", watchlist.size);
1912   pthread_mutex_lock(&polls_lock);
1913   for(iter = mtev_skiplist_getlist(&watchlist); iter && nwatches < 8192;
1914       mtev_skiplist_next(&watchlist, &iter)) {
1915     noit_check_t *check = iter->data;
1916     watches[nwatches++] = check;
1917   }
1918   pthread_mutex_unlock(&polls_lock);
1919
1920   for(i=0;i<nwatches;i++) {
1921     noit_check_t *check = watches[i];
1922     char uuid_str[UUID_STR_LEN + 1];
1923
1924     uuid_unparse_lower(check->checkid, uuid_str);
1925     nc_printf(ncct, "%s:\n\t[%s`%s`%s]\n\tPeriod: %dms\n\tFeeds[%d]:\n",
1926               uuid_str, check->target, check->module, check->name,
1927               check->period, check->feeds ? check->feeds->size : 0);
1928     if(check->feeds && check->feeds->size) {
1929       for(fiter = mtev_skiplist_getlist(check->feeds); fiter;
1930           mtev_skiplist_next(check->feeds, &fiter)) {
1931         nc_printf(ncct, "\t\t%s\n", (const char *)fiter->data);
1932       }
1933     }
1934   }
1935   return 0;
1936 }
1937
1938 static void
1939 nc_printf_check_brief(mtev_console_closure_t ncct,
1940                       noit_check_t *check) {
1941   char out[512];
1942   char uuid_str[37];
1943   snprintf(out, sizeof(out), "%s`%s (%s [%x])", check->target, check->name,
1944            check->target_ip, check->flags);
1945   uuid_unparse_lower(check->checkid, uuid_str);
1946   nc_printf(ncct, "%s %s\n", uuid_str, out);
1947   if(check->stats.current.status)
1948     nc_printf(ncct, "\t%s\n", check->stats.current.status);
1949 }
1950
1951 char *
1952 noit_console_conf_check_opts(mtev_console_closure_t ncct,
1953                              mtev_console_state_stack_t *stack,
1954                              mtev_console_state_t *dstate,
1955                              int argc, char **argv, int idx) {
1956   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1957   uuid_t key_id;
1958   int klen, i = 0;
1959   void *vcheck;
1960
1961   if(argc == 1) {
1962     if(!strncmp("new", argv[0], strlen(argv[0]))) {
1963       if(idx == i) return strdup("new");
1964       i++;
1965     }
1966     pthread_mutex_lock(&polls_lock);
1967     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
1968                          &vcheck)) {
1969       noit_check_t *check = (noit_check_t *)vcheck;
1970       char out[512];
1971       char uuid_str[37];
1972       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
1973       uuid_unparse_lower(check->checkid, uuid_str);
1974       if(!strncmp(out, argv[0], strlen(argv[0]))) {
1975         if(idx == i) {
1976           pthread_mutex_unlock(&polls_lock);
1977           return strdup(out);
1978         }
1979         i++;
1980       }
1981       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
1982         if(idx == i) {
1983           pthread_mutex_unlock(&polls_lock);
1984           return strdup(uuid_str);
1985         }
1986         i++;
1987       }
1988     }
1989     pthread_mutex_unlock(&polls_lock);
1990   }
1991   if(argc == 2) {
1992     cmd_info_t *cmd;
1993     if(!strcmp("new", argv[0])) return NULL;
1994     cmd = mtev_skiplist_find(&dstate->cmds, "attribute", NULL);
1995     if(!cmd) return NULL;
1996     return mtev_console_opt_delegate(ncct, stack, cmd->dstate, argc-1, argv+1, idx);
1997   }
1998   return NULL;
1999 }
2000
2001 char *
2002 noit_console_check_opts(mtev_console_closure_t ncct,
2003                         mtev_console_state_stack_t *stack,
2004                         mtev_console_state_t *dstate,
2005                         int argc, char **argv, int idx) {
2006   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2007   uuid_t key_id;
2008   int klen, i = 0;
2009
2010   if(argc == 1) {
2011     void *vcheck;
2012     pthread_mutex_lock(&polls_lock);
2013     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2014                          &vcheck)) {
2015       char out[512];
2016       char uuid_str[37];
2017       noit_check_t *check = (noit_check_t *)vcheck;
2018       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2019       uuid_unparse_lower(check->checkid, uuid_str);
2020       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2021         if(idx == i) {
2022           pthread_mutex_unlock(&polls_lock);
2023           return strdup(out);
2024         }
2025         i++;
2026       }
2027       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2028         if(idx == i) {
2029           pthread_mutex_unlock(&polls_lock);
2030           return strdup(uuid_str);
2031         }
2032         i++;
2033       }
2034     }
2035     pthread_mutex_unlock(&polls_lock);
2036   }
2037   if(argc == 2) {
2038     return mtev_console_opt_delegate(ncct, stack, dstate, argc-1, argv+1, idx);
2039   }
2040   return NULL;
2041 }
2042
2043 static int
2044 noit_console_show_checks(mtev_console_closure_t ncct,
2045                          int argc, char **argv,
2046                          mtev_console_state_t *dstate,
2047                          void *closure) {
2048   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2049   uuid_t key_id;
2050   int klen, i = 0, nchecks;
2051   void *vcheck;
2052   noit_check_t **checks;
2053
2054   nchecks = mtev_hash_size(&polls);
2055   if(nchecks == 0) return 0;
2056   checks = malloc(nchecks * sizeof(*checks));
2057
2058   pthread_mutex_lock(&polls_lock);
2059   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2060                        &vcheck)) {
2061     if(i<nchecks) checks[i++] = vcheck;
2062   }
2063   pthread_mutex_unlock(&polls_lock);
2064
2065   nchecks = i;
2066   for(i=0;i<nchecks;i++)
2067     nc_printf_check_brief(ncct,checks[i]);
2068
2069   free(checks);
2070   return 0;
2071 }
2072
2073 static int
2074 noit_console_short_checks_sl(mtev_console_closure_t ncct,
2075                              mtev_skiplist *tlist) {
2076   int max_count, i = 0;
2077   noit_check_t **todo;
2078   mtev_skiplist_node *iter;
2079
2080   max_count = tlist->size;
2081   if(max_count == 0) return 0;
2082   todo = malloc(max_count * sizeof(*todo));
2083
2084   pthread_mutex_lock(&polls_lock);
2085   for(iter = mtev_skiplist_getlist(tlist); i < max_count && iter;
2086       mtev_skiplist_next(tlist, &iter)) {
2087     todo[i++] = iter->data;
2088   }
2089   pthread_mutex_unlock(&polls_lock);
2090
2091   max_count = i;
2092   for(i=0;i<max_count;i++)
2093     nc_printf_check_brief(ncct, todo[i]);
2094
2095   free(todo);
2096   return 0;
2097 }
2098 static int
2099 noit_console_show_checks_name(mtev_console_closure_t ncct,
2100                               int argc, char **argv,
2101                               mtev_console_state_t *dstate,
2102                               void *closure) {
2103   return noit_console_short_checks_sl(ncct, &polls_by_name);
2104 }
2105
2106 static int
2107 noit_console_show_checks_target(mtev_console_closure_t ncct,
2108                                    int argc, char **argv,
2109                                    mtev_console_state_t *dstate,
2110                                    void *closure) {
2111   return noit_console_short_checks_sl(ncct,
2112            mtev_skiplist_find(polls_by_name.index,
2113            __check_target_compare, NULL));
2114 }
2115
2116 static int
2117 noit_console_show_checks_target_ip(mtev_console_closure_t ncct,
2118                                    int argc, char **argv,
2119                                    mtev_console_state_t *dstate,
2120                                    void *closure) {
2121   return noit_console_short_checks_sl(ncct,
2122            mtev_skiplist_find(polls_by_name.index,
2123            __check_target_ip_compare, NULL));
2124 }
2125
2126 static void
2127 register_console_check_commands() {
2128   mtev_console_state_t *tl;
2129   cmd_info_t *showcmd;
2130
2131   tl = mtev_console_state_initial();
2132   showcmd = mtev_console_state_get_cmd(tl, "show");
2133   assert(showcmd && showcmd->dstate);
2134
2135   mtev_console_state_add_cmd(showcmd->dstate,
2136     NCSCMD("timing_slots", noit_console_show_timing_slots, NULL, NULL, NULL));
2137
2138   mtev_console_state_add_cmd(showcmd->dstate,
2139     NCSCMD("checks", noit_console_show_checks, NULL, NULL, NULL));
2140
2141   mtev_console_state_add_cmd(showcmd->dstate,
2142     NCSCMD("checks:name", noit_console_show_checks_name, NULL,
2143            NULL, NULL));
2144
2145   mtev_console_state_add_cmd(showcmd->dstate,
2146     NCSCMD("checks:target", noit_console_show_checks_target, NULL,
2147            NULL, NULL));
2148
2149   mtev_console_state_add_cmd(showcmd->dstate,
2150     NCSCMD("checks:target_ip", noit_console_show_checks_target_ip, NULL,
2151            NULL, NULL));
2152
2153   mtev_console_state_add_cmd(showcmd->dstate,
2154     NCSCMD("watches", noit_console_show_watchlist, NULL, NULL, NULL));
2155 }
2156
2157 int
2158 noit_check_register_module(const char *name) {
2159   int i;
2160   for(i=0; i<reg_module_id; i++)
2161     if(!strcmp(reg_module_names[i], name)) return i;
2162   if(reg_module_id >= MAX_MODULE_REGISTRATIONS) return -1;
2163   mtevL(noit_debug, "Registered module %s as %d\n", name, i);
2164   i = reg_module_id++;
2165   reg_module_names[i] = strdup(name);
2166   mtev_conf_set_namespace(reg_module_names[i]);
2167   return i;
2168 }
2169 int
2170 noit_check_registered_module_cnt() {
2171   return reg_module_id;
2172 }
2173 const char *
2174 noit_check_registered_module(int idx) {
2175   if(reg_module_used < 0) reg_module_used = reg_module_id;
2176   assert(reg_module_used == reg_module_id);
2177   if(idx >= reg_module_id || idx < 0) return NULL;
2178   return reg_module_names[idx];
2179 }
2180
2181 void
2182 noit_check_set_module_metadata(noit_check_t *c, int idx, void *md, void (*freefunc)(void *)) {
2183   struct vp_w_free *tuple;
2184   if(reg_module_used < 0) reg_module_used = reg_module_id;
2185   assert(reg_module_used == reg_module_id);
2186   if(idx >= reg_module_id || idx < 0) return;
2187   if(!c->module_metadata) c->module_metadata = calloc(reg_module_id, sizeof(void *));
2188   c->module_metadata[idx] = calloc(1, sizeof(struct vp_w_free));
2189   tuple = c->module_metadata[idx];
2190   tuple->ptr = md;
2191   tuple->freefunc = freefunc;
2192 }
2193 void
2194 noit_check_set_module_config(noit_check_t *c, int idx, mtev_hash_table *config) {
2195   if(reg_module_used < 0) reg_module_used = reg_module_id;
2196   assert(reg_module_used == reg_module_id);
2197   if(idx >= reg_module_id || idx < 0) return;
2198   if(!c->module_configs) c->module_configs = calloc(reg_module_id, sizeof(mtev_hash_table *));
2199   c->module_configs[idx] = config;
2200 }
2201 void *
2202 noit_check_get_module_metadata(noit_check_t *c, int idx) {
2203   struct vp_w_free *tuple;
2204   if(reg_module_used < 0) reg_module_used = reg_module_id;
2205   assert(reg_module_used == reg_module_id);
2206   if(idx >= reg_module_id || idx < 0 || !c->module_metadata) return NULL;
2207   tuple = c->module_metadata[idx];
2208   return tuple ? tuple->ptr : NULL;
2209 }
2210 mtev_hash_table *
2211 noit_check_get_module_config(noit_check_t *c, int idx) {
2212   if(reg_module_used < 0) reg_module_used = reg_module_id;
2213   assert(reg_module_used == reg_module_id);
2214   if(idx >= reg_module_id || idx < 0 || !c->module_configs) return NULL;
2215   return c->module_configs[idx];
2216 }
Note: See TracBrowser for help on using the browser.