root/src/noit_check.c

Revision 06cfac1e0cfc409baed3e1e4a2f4722e391e4180, 70.4 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 3 weeks ago)

update check should allocate stasts if NULL

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  * Copyright (c) 2015, Circonus, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  *       copyright notice, this list of conditions and the following
14  *       disclaimer in the documentation and/or other materials provided
15  *       with the distribution.
16  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
17  *       of its contributors may be used to endorse or promote products
18  *       derived from this software without specific prior written
19  *       permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "noit_config.h"
35 #include <mtev_defines.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <ctype.h>
41 #include <assert.h>
42 #include <errno.h>
43 #include <netinet/in.h>
44 #include <arpa/inet.h>
45 #include <time.h>
46
47 #include <eventer/eventer.h>
48 #include <mtev_memory.h>
49 #include <mtev_log.h>
50 #include <mtev_hash.h>
51 #include <mtev_skiplist.h>
52 #include <mtev_watchdog.h>
53 #include <mtev_conf.h>
54 #include <mtev_console.h>
55 #include <mtev_cluster.h>
56
57 #include "noit_mtev_bridge.h"
58 #include "noit_dtrace_probes.h"
59 #include "noit_check.h"
60 #include "noit_module.h"
61 #include "noit_check_tools.h"
62 #include "noit_check_resolver.h"
63
64 #define DEFAULT_TEXT_METRIC_SIZE_LIMIT  512
65 #define RECYCLE_INTERVAL 60
66
67 MTEV_HOOK_IMPL(check_config_fixup,
68   (noit_check_t *check),
69   void *, closure,
70   (void *closure, noit_check_t *check),
71   (closure,check))
72
73 MTEV_HOOK_IMPL(check_stats_set_metric,
74   (noit_check_t *check, stats_t *stats, metric_t *m),
75   void *, closure,
76   (void *closure, noit_check_t *check, stats_t *stats, metric_t *m),
77   (closure,check,stats,m))
78
79 MTEV_HOOK_IMPL(check_stats_set_metric_coerce,
80   (noit_check_t *check, stats_t *stats, const char *name,
81    metric_type_t type, const char *v, mtev_boolean success),
82   void *, closure,
83   (void *closure, noit_check_t *check, stats_t *stats, const char *name,
84    metric_type_t type, const char *v, mtev_boolean success),
85   (closure,check,stats,name,type,v,success))
86
87 MTEV_HOOK_IMPL(check_passive_log_stats,
88   (noit_check_t *check),
89   void *, closure,
90   (void *closure, noit_check_t *check),
91   (closure,check))
92
93 MTEV_HOOK_IMPL(check_log_stats,
94   (noit_check_t *check),
95   void *, closure,
96   (void *closure, noit_check_t *check),
97   (closure,check))
98
99 #define STATS_INPROGRESS 0
100 #define STATS_CURRENT 1
101 #define STATS_PREVIOUS 2
102
103 void
104 free_metric(metric_t *m) {
105   if(!m) return;
106   if(m->metric_name) mtev_memory_safe_free(m->metric_name);
107   if(m->metric_value.i) mtev_memory_safe_free(m->metric_value.i);
108   mtev_memory_safe_free(m);
109 }
110
111 #define stats_inprogress(c) ((stats_t **)(c->statistics))[STATS_INPROGRESS]
112 #define stats_current(c) ((stats_t **)(c->statistics))[STATS_CURRENT]
113 #define stats_previous(c) ((stats_t **)(c->statistics))[STATS_PREVIOUS]
114
115 stats_t *
116 noit_check_get_stats_inprogress(noit_check_t *c) {
117   return stats_inprogress(c);
118 }
119 stats_t *
120 noit_check_get_stats_current(noit_check_t *c) {
121   return stats_current(c);
122 }
123 stats_t *
124 noit_check_get_stats_previous(noit_check_t *c) {
125   return stats_previous(c);
126 }
127
128 struct stats_t {
129   struct timeval whence;
130   int8_t available;
131   int8_t state;
132   u_int32_t duration;
133   mtev_hash_table metrics;
134   char status[256];
135 };
136
137 struct timeval *
138 noit_check_stats_whence(stats_t *s, struct timeval *n) {
139   if(n) memcpy(&s->whence, n, sizeof(*n));
140   return &s->whence;
141 }
142 int8_t
143 noit_check_stats_available(stats_t *s, int8_t *n) {
144   if(n) s->available = *n;
145   return s->available;
146 }
147 int8_t
148 noit_check_stats_state(stats_t *s, int8_t *n) {
149   if(n) s->state = *n;
150   return s->state;
151 }
152 u_int32_t
153 noit_check_stats_duration(stats_t *s, u_int32_t *n) {
154   if(n) s->duration = *n;
155   return s->duration;
156 }
157 const char *
158 noit_check_stats_status(stats_t *s, const char *n) {
159   if(n) strlcpy(s->status, n, sizeof(s->status));
160   return s->status;
161 }
162 mtev_hash_table *
163 noit_check_stats_metrics(stats_t *s) {
164   return &s->metrics;
165 }
166 void
167 noit_stats_set_whence(noit_check_t *c, struct timeval *t) {
168   (void)noit_check_stats_whence(noit_check_get_stats_inprogress(c), t);
169 }
170 void
171 noit_stats_set_state(noit_check_t *c, int8_t t) {
172   (void)noit_check_stats_state(noit_check_get_stats_inprogress(c), &t);
173 }
174 void
175 noit_stats_set_duration(noit_check_t *c, u_int32_t t) {
176   (void)noit_check_stats_duration(noit_check_get_stats_inprogress(c), &t);
177 }
178 void
179 noit_stats_set_status(noit_check_t *c, const char *s) {
180   (void)noit_check_stats_status(noit_check_get_stats_inprogress(c), s);
181 }
182 void
183 noit_stats_set_available(noit_check_t *c, int8_t t) {
184   (void)noit_check_stats_available(noit_check_get_stats_inprogress(c), &t);
185 }
186 static void
187 noit_check_safe_free_stats(void *vs) {
188   stats_t *s = vs;
189   mtev_hash_destroy(&s->metrics, NULL, (void (*)(void *))free_metric);
190 }
191 static stats_t *
192 noit_check_stats_alloc() {
193   stats_t *n;
194   n = mtev_memory_safe_malloc_cleanup(sizeof(*n), noit_check_safe_free_stats);
195   memset(n, 0, sizeof(*n));
196   mtev_hash_init(&n->metrics);
197   return n;
198 }
199 static void *
200 noit_check_stats_set_calloc() {
201   int i;
202   stats_t **s;
203   s = calloc(sizeof(stats_t *), 3);
204   for(i=0;i<3;i++) s[i] = noit_check_stats_alloc();
205   return s;
206 }
207
208 /* 20 ms slots over 60 second for distribution */
209 #define SCHEDULE_GRANULARITY 20
210 #define SLOTS_PER_SECOND (1000/SCHEDULE_GRANULARITY)
211 #define MAX_MODULE_REGISTRATIONS 64
212
213 /* used to manage per-check generic module metadata */
214 struct vp_w_free {
215   void *ptr;
216   void (*freefunc)(void *);
217 };
218
219 static mtev_boolean system_needs_causality = mtev_false;
220 static int text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
221 static int reg_module_id = 0;
222 static char *reg_module_names[MAX_MODULE_REGISTRATIONS] = { NULL };
223 static int reg_module_used = -1;
224 static u_int64_t check_completion_count = 0ULL;
225 static u_int64_t check_metrics_seen = 0ULL;
226 static pthread_mutex_t polls_lock = PTHREAD_MUTEX_INITIALIZER;
227 static mtev_hash_table polls = MTEV_HASH_EMPTY;
228 static mtev_hash_table dns_ignore_list = MTEV_HASH_EMPTY;
229 static mtev_skiplist watchlist = { 0 };
230 static mtev_skiplist polls_by_name = { 0 };
231 static u_int32_t __config_load_generation = 0;
232 static unsigned short check_slots_count[60000 / SCHEDULE_GRANULARITY] = { 0 },
233                       check_slots_seconds_count[60] = { 0 };
234
235 static noit_check_t *
236 noit_poller_lookup__nolock(uuid_t in) {
237   void *vcheck;
238   if(mtev_hash_retrieve(&polls, (char *)in, UUID_SIZE, &vcheck))
239     return (noit_check_t *)vcheck;
240   return NULL;
241 }
242 static noit_check_t *
243 noit_poller_lookup_by_name__nolock(char *target, char *name) {
244   noit_check_t tmp_check;
245   memset(&tmp_check, 0, sizeof(tmp_check));
246   tmp_check.target = target;
247   tmp_check.name = name;
248   return mtev_skiplist_find(&polls_by_name, &tmp_check, NULL);
249 }
250
251 static int
252 noit_console_show_timing_slots(mtev_console_closure_t ncct,
253                                int argc, char **argv,
254                                mtev_console_state_t *dstate,
255                                void *closure) {
256   int i, j;
257   const int upl = (60000 / SCHEDULE_GRANULARITY) / 60;
258   for(i=0;i<60;i++) {
259     nc_printf(ncct, "[%02d] %04d: ", i, check_slots_seconds_count[i]);
260     for(j=i*upl;j<(i+1)*upl;j++) {
261       char cp = '!';
262       if(check_slots_count[j] < 10) cp = '0' + check_slots_count[j];
263       else if(check_slots_count[j] < 36) cp = 'a' + (check_slots_count[j] - 10);
264       nc_printf(ncct, "%c", cp);
265     }
266     nc_printf(ncct, "\n");
267   }
268   return 0;
269 }
270 static int
271 noit_check_add_to_list(noit_check_t *new_check, const char *newname) {
272   char *oldname = NULL, *newnamecopy;
273   if(newname) {
274     /* track this stuff outside the lock to avoid allocs */
275     oldname = new_check->name;
276     newnamecopy = strdup(newname);
277   }
278   pthread_mutex_lock(&polls_lock);
279   if(!(new_check->flags & NP_TRANSIENT)) {
280     assert(new_check->name || newname);
281     /* This remove could fail -- no big deal */
282     if(new_check->name != NULL)
283       mtev_skiplist_remove(&polls_by_name, new_check, NULL);
284
285     /* optional update the name (at the critical point) */
286     if(newname) new_check->name = newnamecopy;
287
288     /* This insert could fail.. which means we have a conflict on
289      * target`name.  That should result in the check being disabled. */
290     if(!mtev_skiplist_insert(&polls_by_name, new_check)) {
291       mtevL(noit_error, "Check %s`%s disabled due to naming conflict\n",
292             new_check->target, new_check->name);
293       new_check->flags |= NP_DISABLED;
294     }
295     if(oldname) free(oldname);
296   }
297   pthread_mutex_unlock(&polls_lock);
298   return 1;
299 }
300
301 u_int64_t noit_check_metric_count() {
302   return check_metrics_seen;
303 }
304 void noit_check_metric_count_add(int add) {
305   mtev_atomic64_t *n = (mtev_atomic64_t *)&check_metrics_seen;
306   mtev_atomic64_t v = (mtev_atomic64_t)add;
307   mtev_atomic_add64(n, v);
308 }
309
310 u_int64_t noit_check_completion_count() {
311   return check_completion_count;
312 }
313 static void register_console_check_commands();
314 static int check_recycle_bin_processor(eventer_t, int, void *,
315                                        struct timeval *);
316
317 static int
318 check_slots_find_smallest(int sec, struct timeval* period) {
319   int i, j, cyclic, random_offset, jbase = 0, mini = 0, minj = 0;
320   unsigned short min_running_i = 0xffff, min_running_j = 0xffff;
321   int period_seconds = period->tv_sec;
322
323   /* If we're greater than sixty seconds, we should do our
324    * initial scheduling as if the period was sixty seconds. */
325   if (period_seconds > 60)
326     period_seconds = 60;
327
328   for(i=0;i<period_seconds;i++) {
329     int adj_i = (i + sec) % 60;
330     if(check_slots_seconds_count[adj_i] < min_running_i) {
331       min_running_i = check_slots_seconds_count[adj_i];
332       mini = adj_i;
333     }
334   }
335   jbase = mini * (1000/SCHEDULE_GRANULARITY);
336   random_offset = drand48() * SLOTS_PER_SECOND;
337   for(cyclic=0;cyclic<SLOTS_PER_SECOND;cyclic++) {
338     j = jbase + ((random_offset + cyclic) % SLOTS_PER_SECOND);
339     if(check_slots_count[j] < min_running_j) {
340       min_running_j = check_slots_count[j];
341       minj = j;
342     }
343   }
344   return (minj * SCHEDULE_GRANULARITY) + drand48() * SCHEDULE_GRANULARITY;
345 }
346 static void
347 check_slots_adjust_tv(struct timeval *tv, short adj) {
348   int offset_ms, idx;
349   offset_ms = (tv->tv_sec % 60) * 1000 + (tv->tv_usec / 1000);
350   idx = offset_ms / SCHEDULE_GRANULARITY;
351   check_slots_count[idx] += adj;
352   check_slots_seconds_count[offset_ms / 1000] += adj;
353 }
354 void check_slots_inc_tv(struct timeval *tv) {
355   check_slots_adjust_tv(tv, 1);
356 }
357 void check_slots_dec_tv(struct timeval *tv) {
358   check_slots_adjust_tv(tv, -1);
359 }
360 static int
361 noit_check_generic_safe_string(const char *p) {
362   if(!p) return 0;
363   for(;*p;p++) {
364     if(!isprint(*p)) return 0;
365   }
366   return 1;
367 }
368 int
369 noit_check_validate_target(const char *p) {
370   if(!noit_check_generic_safe_string(p)) return 0;
371   return 1;
372 }
373 int
374 noit_check_validate_name(const char *p) {
375   if(!noit_check_generic_safe_string(p)) return 0;
376   return 1;
377 }
378 const char *
379 noit_check_available_string(int16_t available) {
380   switch(available) {
381     case NP_AVAILABLE:    return "available";
382     case NP_UNAVAILABLE:  return "unavailable";
383     case NP_UNKNOWN:      return "unknown";
384   }
385   return NULL;
386 }
387 const char *
388 noit_check_state_string(int16_t state) {
389   switch(state) {
390     case NP_GOOD:         return "good";
391     case NP_BAD:          return "bad";
392     case NP_UNKNOWN:      return "unknown";
393   }
394   return NULL;
395 }
396 static int __check_name_compare(const void *a, const void *b) {
397   const noit_check_t *ac = a;
398   const noit_check_t *bc = b;
399   int rv;
400   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
401   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
402   return 0;
403 }
404 static int __watchlist_compare(const void *a, const void *b) {
405   const noit_check_t *ac = a;
406   const noit_check_t *bc = b;
407   int rv;
408   if((rv = memcmp(ac->checkid, bc->checkid, sizeof(ac->checkid))) != 0) return rv;
409   if(ac->period < bc->period) return -1;
410   if(ac->period == bc->period) return 0;
411   return 1;
412 }
413 static int __check_target_ip_compare(const void *a, const void *b) {
414   const noit_check_t *ac = a;
415   const noit_check_t *bc = b;
416   int rv;
417   if((rv = strcmp(ac->target_ip, bc->target_ip)) != 0) return rv;
418   if (ac->name == NULL) return 1;
419   if (bc->name == NULL) return -1;
420   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
421   return 1;
422 }
423 static int __check_target_compare(const void *a, const void *b) {
424   const noit_check_t *ac = a;
425   const noit_check_t *bc = b;
426   int rv;
427   if (ac->target == NULL) return 1;
428   if (bc->target == NULL) return -1;
429   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
430   if (ac->name == NULL) return 1;
431   if (bc->name == NULL) return -1;
432   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
433   return 1;
434 }
435 int
436 noit_calc_rtype_flag(char *resolve_rtype) {
437   int flags = 0;
438   if(resolve_rtype) {
439     flags |= strcmp(resolve_rtype, PREFER_IPV6) == 0 ||
440              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_PREFER_IPV6 : 0;
441     flags |= strcmp(resolve_rtype, FORCE_IPV4) == 0 ||
442              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_SINGLE_RESOLVE : 0;
443   }
444   return flags;
445 }
446 void
447 noit_check_fake_last_check(noit_check_t *check,
448                            struct timeval *lc, struct timeval *_now) {
449   struct timeval now, period, lc_copy;
450   int balance_ms;
451
452   if(!_now) {
453     gettimeofday(&now, NULL);
454     _now = &now;
455   }
456   period.tv_sec = check->period / 1000;
457   period.tv_usec = (check->period % 1000) * 1000;
458   sub_timeval(*_now, period, lc);
459
460   /* We need to set the last check value based on the period, but
461    * we also need to store a value that is based around the one-minute
462    * time to properly increment the slots; otherwise, the slots will
463    * get all messed up */
464   if(!(check->flags & NP_TRANSIENT) && check->period) {
465     balance_ms = check_slots_find_smallest(_now->tv_sec+1, &period);
466     lc->tv_sec = (lc->tv_sec / 60) * 60 + balance_ms / 1000;
467     lc->tv_usec = (balance_ms % 1000) * 1000;
468     memcpy(&lc_copy, lc, sizeof(lc_copy));
469     if(compare_timeval(*_now, *lc) < 0) {
470       do {
471         sub_timeval(*lc, period, lc);
472       } while(compare_timeval(*_now, *lc) < 0);
473     }
474     else {
475       struct timeval test;
476       while(1) {
477         add_timeval(*lc, period, &test);
478         if(compare_timeval(*_now, test) < 0) break;
479         memcpy(lc, &test, sizeof(test));
480       }
481     }
482   }
483   else {
484     memcpy(&lc_copy, lc, sizeof(lc_copy));
485   }
486  
487   /* now, we're going to do an even distribution using the slots */
488   if(!(check->flags & NP_TRANSIENT)) check_slots_inc_tv(&lc_copy);
489 }
490 void
491 noit_poller_process_checks(const char *xpath) {
492   int i, flags, cnt = 0, found;
493   mtev_conf_section_t *sec;
494   __config_load_generation++;
495   sec = mtev_conf_get_sections(NULL, xpath, &cnt);
496   for(i=0; i<cnt; i++) {
497     void *vcheck;
498     char uuid_str[37];
499     char target[256] = "";
500     char module[256] = "";
501     char name[256] = "";
502     char filterset[256] = "";
503     char oncheck[1024] = "";
504     char resolve_rtype[16] = "";
505     int ridx;
506     int no_period = 0;
507     int no_oncheck = 0;
508     int period = 0, timeout = 0;
509     mtev_boolean disabled = mtev_false, busted = mtev_false;
510     uuid_t uuid, out_uuid;
511     int64_t config_seq = 0;
512     mtev_hash_table *options;
513     mtev_hash_table **moptions = NULL;
514     mtev_boolean moptions_used = mtev_false, backdated = mtev_false;
515
516     /* We want to heartbeat here... otherwise, if a lot of checks are
517      * configured or if we're running on a slower system, we could
518      * end up getting watchdog killed before we get a chance to run
519      * any checks */
520     mtev_watchdog_child_heartbeat();
521
522     if(reg_module_id > 0) {
523       moptions = alloca(reg_module_id * sizeof(mtev_hash_table *));
524       memset(moptions, 0, reg_module_id * sizeof(mtev_hash_table *));
525       moptions_used = mtev_true;
526     }
527
528 #define NEXT(...) mtevL(noit_stderr, __VA_ARGS__); continue
529 #define MYATTR(type,a,...) mtev_conf_get_##type(sec[i], "@" #a, __VA_ARGS__)
530 #define INHERIT(type,a,...) \
531   mtev_conf_get_##type(sec[i], "ancestor-or-self::node()/@" #a, __VA_ARGS__)
532
533     if(!MYATTR(stringbuf, uuid, uuid_str, sizeof(uuid_str))) {
534       mtevL(noit_stderr, "check %d has no uuid\n", i+1);
535       continue;
536     }
537
538     MYATTR(int64, seq, &config_seq);
539
540     if(uuid_parse(uuid_str, uuid)) {
541       mtevL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str);
542       continue;
543     }
544
545     if(!INHERIT(stringbuf, target, target, sizeof(target))) {
546       mtevL(noit_stderr, "check uuid: '%s' has no target\n", uuid_str);
547       busted = mtev_true;
548     }
549     if(!noit_check_validate_target(target)) {
550       mtevL(noit_stderr, "check uuid: '%s' has malformed target\n", uuid_str);
551       busted = mtev_true;
552     }
553     if(!INHERIT(stringbuf, module, module, sizeof(module))) {
554       mtevL(noit_stderr, "check uuid: '%s' has no module\n", uuid_str);
555       busted = mtev_true;
556     }
557
558     if(!INHERIT(stringbuf, filterset, filterset, sizeof(filterset)))
559       filterset[0] = '\0';
560    
561     if (!INHERIT(stringbuf, resolve_rtype, resolve_rtype, sizeof(resolve_rtype)))
562       strlcpy(resolve_rtype, PREFER_IPV4, sizeof(resolve_rtype));
563
564     if(!MYATTR(stringbuf, name, name, sizeof(name)))
565       strlcpy(name, module, sizeof(name));
566
567     if(!noit_check_validate_name(name)) {
568       mtevL(noit_stderr, "check uuid: '%s' has malformed name\n", uuid_str);
569       busted = mtev_true;
570     }
571
572     if(!INHERIT(int, period, &period) || period == 0)
573       no_period = 1;
574
575     if(!INHERIT(stringbuf, oncheck, oncheck, sizeof(oncheck)) || !oncheck[0])
576       no_oncheck = 1;
577
578     if(no_period && no_oncheck) {
579       mtevL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n",
580             uuid_str);
581       busted = mtev_true;
582     }
583     if(!(no_period || no_oncheck)) {
584       mtevL(noit_stderr, "check uuid: '%s' has oncheck and period.\n",
585             uuid_str);
586       busted = mtev_true;
587     }
588     if(!INHERIT(int, timeout, &timeout)) {
589       mtevL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str);
590       busted = mtev_true;
591     }
592     if(!no_period && timeout >= period) {
593       mtevL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str);
594       timeout = period/2;
595     }
596     options = mtev_conf_get_hash(sec[i], "config");
597     for(ridx=0; ridx<reg_module_id; ridx++) {
598       moptions[ridx] = mtev_conf_get_namespaced_hash(sec[i], "config",
599                                                      reg_module_names[ridx]);
600     }
601
602     INHERIT(boolean, disable, &disabled);
603     flags = 0;
604     if(busted) flags |= (NP_UNCONFIG|NP_DISABLED);
605     else if(disabled) flags |= NP_DISABLED;
606
607     flags |= noit_calc_rtype_flag(resolve_rtype);
608
609     pthread_mutex_lock(&polls_lock);
610     found = mtev_hash_retrieve(&polls, (char *)uuid, UUID_SIZE, &vcheck);
611     if(found) {
612       noit_check_t *check = (noit_check_t *)vcheck;
613       /* Possibly reset the seq */
614       if(config_seq < 0) check->config_seq = 0;
615
616       /* Otherwise note a non-increasing sequence */
617       if(check->config_seq > config_seq) backdated = mtev_true;
618     }
619     pthread_mutex_unlock(&polls_lock);
620     if(found)
621       noit_poller_deschedule(uuid);
622     if(backdated) {
623       mtevL(noit_error, "Check config seq backwards, ignored\n");
624     }
625     else {
626       noit_poller_schedule(target, module, name, filterset, options,
627                            moptions_used ? moptions : NULL,
628                            period, timeout, oncheck[0] ? oncheck : NULL,
629                            config_seq, flags, uuid, out_uuid);
630       mtevL(noit_debug, "loaded uuid: %s\n", uuid_str);
631     }
632     for(ridx=0; ridx<reg_module_id; ridx++) {
633       if(moptions[ridx]) {
634         mtev_hash_destroy(moptions[ridx], free, free);
635         free(moptions[ridx]);
636       }
637     }
638     mtev_hash_destroy(options, free, free);
639     free(options);
640   }
641   if(sec) free(sec);
642 }
643
644 int
645 noit_check_activate(noit_check_t *check) {
646   noit_module_t *mod;
647   if(NOIT_CHECK_LIVE(check)) return 0;
648   mod = noit_module_lookup(check->module);
649   if(mod && mod->initiate_check) {
650     if((check->flags & NP_DISABLED) == 0) {
651       mod->initiate_check(mod, check, 0, NULL);
652       return 1;
653     }
654     else
655       mtevL(noit_debug, "Skipping %s`%s, disabled.\n",
656             check->target, check->name);
657   }
658   else {
659     if(!mod) {
660       mtevL(noit_stderr, "Cannot find module '%s'\n", check->module);
661       check->flags |= NP_DISABLED;
662     }
663   }
664   return 0;
665 }
666
667 void
668 noit_poller_initiate() {
669   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
670   uuid_t key_id;
671   int klen;
672   void *vcheck;
673   /* This is only ever called in the beginning, no lock needed */
674   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
675                        &vcheck)) {
676     noit_check_activate((noit_check_t *)vcheck);
677     mtev_watchdog_child_heartbeat();
678   }
679 }
680
681 void
682 noit_poller_flush_epoch(int oldest_allowed) {
683   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
684   uuid_t key_id;
685   int klen, i;
686   void *vcheck;
687 #define TOFREE_PER_ITER 1024
688   noit_check_t *tofree[TOFREE_PER_ITER];
689
690   /* Cleanup any previous causal map */
691   while(1) {
692     i = 0;
693     pthread_mutex_lock(&polls_lock);
694     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
695                          &vcheck) && i < TOFREE_PER_ITER) {
696       noit_check_t *check = (noit_check_t *)vcheck;
697       if(check->generation < oldest_allowed) {
698         tofree[i++] = check;
699       }
700     }
701     pthread_mutex_unlock(&polls_lock);
702     if(i==0) break;
703     while(i>0) noit_poller_deschedule(tofree[--i]->checkid);
704   }
705 #undef TOFREE_PER_ITER
706 }
707
708 void
709 noit_poller_make_causal_map() {
710   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
711   uuid_t key_id;
712   int klen;
713   void *vcheck;
714
715   if(!system_needs_causality) return;
716
717   /* set it to false, we'll set it to true during the scan if we
718    * find anything causal.  */
719   system_needs_causality = mtev_false;
720
721   /* Cleanup any previous causal map */
722   pthread_mutex_lock(&polls_lock);
723   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
724                        &vcheck)) {
725     noit_check_t *check = (noit_check_t *)vcheck;
726     dep_list_t *dep;
727     while((dep = check->causal_checks) != NULL) {
728       check->causal_checks = dep->next;
729       free(dep);
730     }
731   }
732
733   memset(&iter, 0, sizeof(iter));
734   /* Walk all checks and add check dependencies to their parents */
735   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
736                        &vcheck)) {
737     noit_check_t *check = (noit_check_t *)vcheck, *parent;
738     if(check->oncheck) {
739       /* This service is causally triggered by another service */
740       uuid_t id;
741       char fullcheck[1024];
742       char *name = check->oncheck;
743       char *target = NULL;
744
745       system_needs_causality = mtev_true;
746       mtevL(noit_debug, "Searching for upstream trigger on %s\n", name);
747       parent = NULL;
748       if(uuid_parse(check->oncheck, id) == 0) {
749         target = "";
750         parent = noit_poller_lookup__nolock(id);
751       }
752       else if((target = strchr(check->oncheck, '`')) != NULL) {
753         strlcpy(fullcheck, check->oncheck, target + 1 - check->oncheck);
754         name = target + 1;
755         target = fullcheck;
756         parent = noit_poller_lookup_by_name__nolock(target, name);
757       }
758       else {
759         target = check->target;
760         parent = noit_poller_lookup_by_name__nolock(target, name);
761       }
762
763       if(!parent) {
764         check->flags |= NP_DISABLED;
765         mtevL(noit_stderr, "Disabling check %s`%s, can't find oncheck %s`%s\n",
766               check->target, check->name, target, name);
767       }
768       else {
769         dep_list_t *dep;
770         dep = malloc(sizeof(*dep));
771         dep->check = check;
772         dep->next = parent->causal_checks;
773         parent->causal_checks = dep;
774         mtevL(noit_debug, "Causal map %s`%s --> %s`%s\n",
775               parent->target, parent->name, check->target, check->name);
776       }
777     }
778   }
779   pthread_mutex_unlock(&polls_lock);
780   /* We found some causal checks, so we might need to activate stuff */
781   if(system_needs_causality) noit_poller_initiate();
782 }
783 void
784 noit_poller_reload(const char *xpath)
785 {
786   noit_poller_process_checks(xpath ? xpath : "/noit/checks//check");
787   if(!xpath) {
788     /* Full reload, we need to wipe old checks */
789     noit_poller_flush_epoch(__config_load_generation);
790   }
791   noit_poller_make_causal_map();
792 }
793 void
794 noit_check_dns_ignore_tld(const char* extension, const char* ignore) {
795   mtev_hash_replace(&dns_ignore_list, strdup(extension), strlen(extension), strdup(ignore), NULL, NULL);
796 }
797 static void
798 noit_check_dns_ignore_list_init() {
799   mtev_conf_section_t* dns;
800   int cnt;
801
802   dns = mtev_conf_get_sections(NULL, "/noit/dns/extension", &cnt);
803   if(dns) {
804     int i = 0;
805     for (i = 0; i < cnt; i++) {
806       char* extension;
807       char* ignore;
808       if(!mtev_conf_get_string(dns[i], "self::node()/@value", &extension)) {
809         continue;
810       }
811       if(!mtev_conf_get_string(dns[i], "self::node()/@ignore", &ignore)) {
812         continue;
813       }
814       noit_check_dns_ignore_tld(extension, ignore);
815     }
816   }
817 }
818 void
819 noit_poller_init() {
820   srand48((getpid() << 16) ^ time(NULL));
821   noit_check_resolver_init();
822   noit_check_tools_init();
823   mtev_skiplist_init(&polls_by_name);
824   mtev_skiplist_set_compare(&polls_by_name, __check_name_compare,
825                             __check_name_compare);
826   mtev_skiplist_add_index(&polls_by_name, __check_target_ip_compare,
827                             __check_target_ip_compare);
828   mtev_skiplist_add_index(&polls_by_name, __check_target_compare,
829                             __check_target_compare);
830   mtev_skiplist_init(&watchlist);
831   mtev_skiplist_set_compare(&watchlist, __watchlist_compare,
832                             __watchlist_compare);
833   register_console_check_commands();
834   eventer_name_callback("check_recycle_bin_processor",
835                         check_recycle_bin_processor);
836   eventer_add_in_s_us(check_recycle_bin_processor, NULL, RECYCLE_INTERVAL, 0);
837   mtev_conf_get_int(NULL, "noit/@text_size_limit", &text_size_limit);
838   if (text_size_limit <= 0) {
839     text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
840   }
841   noit_check_dns_ignore_list_init();
842   noit_poller_reload(NULL);
843 }
844
845 int
846 noit_poller_check_count() {
847   return polls_by_name.size;
848 }
849
850 int
851 noit_poller_transient_check_count() {
852   return watchlist.size;
853 }
854
855 noit_check_t *
856 noit_check_clone(uuid_t in) {
857   int i;
858   noit_check_t *checker, *new_check;
859   void *vcheck;
860   if(mtev_hash_retrieve(&polls,
861                         (char *)in, UUID_SIZE,
862                         &vcheck) == 0) {
863     return NULL;
864   }
865   checker = (noit_check_t *)vcheck;
866   if(checker->oncheck) {
867     return NULL;
868   }
869   new_check = calloc(1, sizeof(*new_check));
870   memcpy(new_check, checker, sizeof(*new_check));
871   new_check->target = strdup(new_check->target);
872   new_check->module = strdup(new_check->module);
873   new_check->name = strdup(new_check->name);
874   new_check->filterset = strdup(new_check->filterset);
875   new_check->flags = 0;
876   new_check->fire_event = NULL;
877   memset(&new_check->last_fire_time, 0, sizeof(new_check->last_fire_time));
878   new_check->statistics = noit_check_stats_set_calloc();
879   new_check->closure = NULL;
880   new_check->config = calloc(1, sizeof(*new_check->config));
881   mtev_hash_merge_as_dict(new_check->config, checker->config);
882   new_check->module_configs = NULL;
883   new_check->module_metadata = NULL;
884
885   for(i=0; i<reg_module_id; i++) {
886     void *src_metadata;
887     mtev_hash_table *src_mconfig;
888     src_mconfig = noit_check_get_module_config(checker, i);
889     if(src_mconfig) {
890       mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
891       mtev_hash_merge_as_dict(t, src_mconfig);
892       noit_check_set_module_config(new_check, i, t);
893     }
894     if(checker->flags & NP_PASSIVE_COLLECTION)
895       if(NULL != (src_metadata = noit_check_get_module_metadata(new_check, i)))
896         noit_check_set_module_metadata(new_check, i, src_metadata, NULL);
897   }
898   return new_check;
899 }
900
901 noit_check_t *
902 noit_check_watch(uuid_t in, int period) {
903   /* First look for a copy that is being watched */
904   int minimum_pi = 1000, granularity_pi = 500;
905   mtev_conf_section_t check_node;
906   char uuid_str[UUID_STR_LEN + 1];
907   char xpath[1024];
908   noit_check_t n, *f;
909
910   uuid_unparse_lower(in, uuid_str);
911
912   mtevL(noit_debug, "noit_check_watch(%s,%d)\n", uuid_str, period);
913   if(period == 0) {
914     return noit_poller_lookup(in);
915   }
916
917   /* Find the check */
918   snprintf(xpath, sizeof(xpath), "//checks//check[@uuid=\"%s\"]", uuid_str);
919   check_node = mtev_conf_get_section(NULL, xpath);
920   mtev_conf_get_int(NULL, "//checks/@transient_min_period", &minimum_pi);
921   mtev_conf_get_int(NULL, "//checks/@transient_period_granularity", &granularity_pi);
922   if(check_node) {
923     mtev_conf_get_int(check_node,
924                       "ancestor-or-self::node()/@transient_min_period",
925                       &minimum_pi);
926     mtev_conf_get_int(check_node,
927                       "ancestor-or-self::node()/@transient_period_granularity",
928                       &granularity_pi);
929   }
930
931   /* apply the bounds */
932   period /= granularity_pi;
933   period *= granularity_pi;
934   period = MAX(period, minimum_pi);
935
936   uuid_copy(n.checkid, in);
937   n.period = period;
938
939   f = mtev_skiplist_find(&watchlist, &n, NULL);
940   if(f) return f;
941   f = noit_check_clone(in);
942   if(!f) return NULL;
943   f->period = period;
944   f->timeout = period - 10;
945   f->flags |= NP_TRANSIENT;
946   mtevL(noit_debug, "Watching %s@%d\n", uuid_str, period);
947   mtev_skiplist_insert(&watchlist, f);
948   return f;
949 }
950
951 noit_check_t *
952 noit_check_get_watch(uuid_t in, int period) {
953   noit_check_t n, *f;
954
955   uuid_copy(n.checkid, in);
956   n.period = period;
957
958   f = mtev_skiplist_find(&watchlist, &n, NULL);
959   return f;
960 }
961
962 void
963 noit_check_transient_add_feed(noit_check_t *check, const char *feed) {
964   char *feedcopy;
965   if(!check->feeds) {
966     check->feeds = calloc(1, sizeof(*check->feeds));
967     mtev_skiplist_init(check->feeds);
968     mtev_skiplist_set_compare(check->feeds,
969                               (mtev_skiplist_comparator_t)strcmp,
970                               (mtev_skiplist_comparator_t)strcmp);
971   }
972   feedcopy = strdup(feed);
973   /* No error on failure -- it's already there */
974   if(mtev_skiplist_insert(check->feeds, feedcopy) == NULL) free(feedcopy);
975   mtevL(noit_debug, "check %s`%s @ %dms has %d feed(s): %s.\n",
976         check->target, check->name, check->period, check->feeds->size, feed);
977 }
978 void
979 noit_check_transient_remove_feed(noit_check_t *check, const char *feed) {
980   if(!check->feeds) return;
981   if(feed) {
982     mtevL(noit_debug, "check %s`%s @ %dms removing 1 of %d feeds: %s.\n",
983           check->target, check->name, check->period, check->feeds->size, feed);
984     mtev_skiplist_remove(check->feeds, feed, free);
985   }
986   if(check->feeds->size == 0) {
987     char uuid_str[UUID_STR_LEN + 1];
988     uuid_unparse_lower(check->checkid, uuid_str);
989     mtevL(noit_debug, "Unwatching %s@%d\n", uuid_str, check->period);
990     mtev_skiplist_remove(&watchlist, check, NULL);
991     mtev_skiplist_destroy(check->feeds, free);
992     free(check->feeds);
993     check->feeds = NULL;
994     if(check->flags & NP_TRANSIENT) {
995       mtevL(noit_debug, "check %s`%s @ %dms has no more listeners.\n",
996             check->target, check->name, check->period);
997       check->flags |= NP_KILLED;
998     }
999     noit_poller_free_check(check);
1000   }
1001 }
1002
1003 mtev_boolean
1004 noit_check_is_valid_target(const char *target) {
1005   int8_t family;
1006   int rv;
1007   union {
1008     struct in_addr addr4;
1009     struct in6_addr addr6;
1010   } a;
1011
1012   family = AF_INET;
1013   rv = inet_pton(family, target, &a);
1014   if(rv != 1) {
1015     family = AF_INET6;
1016     rv = inet_pton(family, target, &a);
1017     if(rv != 1) {
1018       return mtev_false;
1019     }
1020   }
1021   return mtev_true;
1022 }
1023 int
1024 noit_check_set_ip(noit_check_t *new_check,
1025                   const char *ip_str, const char *newname) {
1026   int8_t family;
1027   int rv, failed = 0;
1028   char old_target_ip[INET6_ADDRSTRLEN];
1029   union {
1030     struct in_addr addr4;
1031     struct in6_addr addr6;
1032   } a;
1033
1034   memset(old_target_ip, 0, INET6_ADDRSTRLEN);
1035   strlcpy(old_target_ip, new_check->target_ip, sizeof(old_target_ip));
1036
1037   family = NOIT_CHECK_PREFER_V6(new_check) ? AF_INET6 : AF_INET;
1038   rv = inet_pton(family, ip_str, &a);
1039   if(rv != 1) {
1040     if (!NOIT_CHECK_SINGLE_RESOLVE(new_check)) {
1041       family = family == AF_INET ? AF_INET6 : AF_INET;
1042       rv = inet_pton(family, ip_str, &a);
1043       if(rv != 1) {
1044         family = AF_INET;
1045         memset(&a, 0, sizeof(a));
1046         failed = -1;
1047       }
1048     } else {
1049       failed = -1;
1050     }
1051   }
1052
1053   new_check->target_family = family;
1054   memcpy(&new_check->target_addr, &a, sizeof(a));
1055   new_check->target_ip[0] = '\0';
1056   if(failed == 0)
1057     if(inet_ntop(new_check->target_family,
1058                  &new_check->target_addr,
1059                  new_check->target_ip,
1060                  sizeof(new_check->target_ip)) == NULL) {
1061       mtevL(noit_error, "inet_ntop failed [%s] -> %d\n", ip_str, errno);
1062     }
1063   /*
1064    * new_check->name could be null if this check is being set for the
1065    * first time.  add_to_list will set it.
1066    */
1067   if (new_check->name == NULL ||
1068       strcmp(old_target_ip, new_check->target_ip) != 0) {
1069     noit_check_add_to_list(new_check, newname);
1070   }
1071
1072   if(new_check->name == NULL && newname != NULL) {
1073     assert(new_check->flags & NP_TRANSIENT);
1074     new_check->name = strdup(newname);
1075   }
1076
1077   return failed;
1078 }
1079 int
1080 noit_check_resolve(noit_check_t *check) {
1081   uint8_t family_pref = NOIT_CHECK_PREFER_V6(check) ? AF_INET6 : AF_INET;
1082   char ipaddr[INET6_ADDRSTRLEN];
1083   if(!NOIT_CHECK_SHOULD_RESOLVE(check)) return 1; /* success, not required */
1084   noit_check_resolver_remind(check->target);
1085   if(noit_check_resolver_fetch(check->target, ipaddr, sizeof(ipaddr),
1086                                family_pref) >= 0) {
1087     check->flags |= NP_RESOLVED;
1088     noit_check_set_ip(check, ipaddr, NULL);
1089     return 0;
1090   }
1091   check->flags &= ~NP_RESOLVED;
1092   return -1;
1093 }
1094 int
1095 noit_check_update(noit_check_t *new_check,
1096                   const char *target,
1097                   const char *name,
1098                   const char *filterset,
1099                   mtev_hash_table *config,
1100                   mtev_hash_table **mconfigs,
1101                   u_int32_t period,
1102                   u_int32_t timeout,
1103                   const char *oncheck,
1104           int64_t seq,
1105                   int flags) {
1106   char uuid_str[37];
1107   int mask = NP_DISABLED | NP_UNCONFIG;
1108
1109   assert(name);
1110   uuid_unparse_lower(new_check->checkid, uuid_str);
1111   if(!new_check->statistics) new_check->statistics = noit_check_stats_set_calloc();
1112   if(seq < 0) new_check->config_seq = seq = 0;
1113   if(new_check->config_seq > seq) {
1114     mtevL(mtev_error, "noit_check_update[%s] skipped: seq backwards\n", uuid_str);
1115     return -1;
1116   }
1117
1118   /* selfcheck will identify this node in a cluster */
1119   if(mtev_cluster_enabled() && !strcmp(new_check->module, "selfcheck")) {
1120     uuid_t cluster_id;
1121     mtev_cluster_get_self(cluster_id);
1122     if(uuid_compare(cluster_id, new_check->checkid)) {
1123       mtevL(mtev_error, "Setting global cluster identity to '%s'\n", uuid_str);
1124       mtev_cluster_set_self(new_check->checkid);
1125     }
1126   }
1127
1128   if(NOIT_CHECK_RUNNING(new_check)) {
1129     char module[256];
1130     uuid_t id, dummy;
1131     uuid_copy(id, new_check->checkid);
1132     strlcpy(module, new_check->module, sizeof(module));
1133     noit_poller_deschedule(id);
1134     return noit_poller_schedule(target, module, name, filterset,
1135                                 config, mconfigs, period, timeout, oncheck,
1136                                 seq, flags, id, dummy);
1137   }
1138
1139   new_check->generation = __config_load_generation;
1140   if(new_check->target) free(new_check->target);
1141   new_check->target = strdup(target);
1142
1143   // apply resolution flags to check.
1144   if (flags & NP_PREFER_IPV6)
1145     new_check->flags |= NP_PREFER_IPV6;
1146   else
1147     new_check->flags &= ~NP_PREFER_IPV6;
1148   if (flags & NP_SINGLE_RESOLVE)
1149     new_check->flags |= NP_SINGLE_RESOLVE;
1150   else
1151     new_check->flags &= ~NP_SINGLE_RESOLVE;
1152   if (flags & NP_RESOLVE)
1153     new_check->flags |= NP_RESOLVE;
1154   else
1155     new_check->flags &= ~NP_RESOLVE;
1156
1157   /* This sets both the name and the target_addr */
1158   if(noit_check_set_ip(new_check, target, name)) {
1159     mtev_boolean should_resolve;
1160     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1161     const char *key, *value;
1162     int klen;
1163     char* extension = strrchr(target, '.');
1164     new_check->flags |= NP_RESOLVE;
1165     new_check->flags &= ~NP_RESOLVED;
1166     /* If we match any of the extensions we're supposed to ignore,
1167      * don't resolve */
1168     if (extension && (strlen(extension) > 1)) {
1169       while(mtev_hash_next(&dns_ignore_list, &iter, &key, &klen, (void**)&value)) {
1170         if ((!strcmp("true", value)) && (!strcmp(extension+1, key))) {
1171             new_check->flags &= ~NP_RESOLVE;
1172             break;
1173         }
1174       }
1175     }
1176     if(noit_check_should_resolve_targets(&should_resolve) && !should_resolve)
1177       flags |= NP_DISABLED | NP_UNCONFIG;
1178     noit_check_resolve(new_check);
1179   }
1180
1181   if(new_check->filterset) free(new_check->filterset);
1182   new_check->filterset = filterset ? strdup(filterset): NULL;
1183
1184   if(config != NULL) {
1185     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1186     const char *k;
1187     int klen;
1188     void *data;
1189     if(new_check->config) mtev_hash_delete_all(new_check->config, free, free);
1190     else new_check->config = calloc(1, sizeof(*new_check->config));
1191     while(mtev_hash_next(config, &iter, &k, &klen, &data)) {
1192       mtev_hash_store(new_check->config, strdup(k), klen, strdup((char *)data));
1193     }
1194   }
1195   if(mconfigs != NULL) {
1196     int i;
1197     for(i=0; i<reg_module_id; i++) {
1198       mtev_hash_table *t;
1199       if(NULL != (t = noit_check_get_module_config(new_check, i))) {
1200         noit_check_set_module_config(new_check, i, NULL);
1201         mtev_hash_destroy(t, free, free);
1202         free(t);
1203       }
1204       if(mconfigs[i]) {
1205         mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
1206         mtev_hash_merge_as_dict(t, mconfigs[i]);
1207         noit_check_set_module_config(new_check, i, t);
1208       }
1209     }
1210   }
1211   if(new_check->oncheck) free(new_check->oncheck);
1212   new_check->oncheck = oncheck ? strdup(oncheck) : NULL;
1213   if(new_check->oncheck) system_needs_causality = mtev_true;
1214   new_check->period = period;
1215   new_check->timeout = timeout;
1216   new_check->config_seq = seq;
1217
1218   /* Unset what could be set.. then set what should be set */
1219   new_check->flags = (new_check->flags & ~mask) | flags;
1220
1221   check_config_fixup_hook_invoke(new_check);
1222
1223   if((new_check->flags & NP_TRANSIENT) == 0)
1224     noit_check_activate(new_check);
1225
1226   noit_check_add_to_list(new_check, NULL);
1227   noit_check_log_check(new_check);
1228   return 0;
1229 }
1230 int
1231 noit_poller_schedule(const char *target,
1232                      const char *module,
1233                      const char *name,
1234                      const char *filterset,
1235                      mtev_hash_table *config,
1236                      mtev_hash_table **mconfigs,
1237                      u_int32_t period,
1238                      u_int32_t timeout,
1239                      const char *oncheck,
1240                      int64_t seq,
1241                      int flags,
1242                      uuid_t in,
1243                      uuid_t out) {
1244   noit_check_t *new_check;
1245   new_check = calloc(1, sizeof(*new_check));
1246   if(!new_check) return -1;
1247
1248   /* The module and the UUID can never be changed */
1249   new_check->module = strdup(module);
1250   if(uuid_is_null(in))
1251     uuid_generate(new_check->checkid);
1252   else
1253     uuid_copy(new_check->checkid, in);
1254
1255   new_check->statistics = noit_check_stats_set_calloc();
1256   noit_check_update(new_check, target, name, filterset, config, mconfigs,
1257                     period, timeout, oncheck, seq, flags);
1258   assert(mtev_hash_store(&polls,
1259                          (char *)new_check->checkid, UUID_SIZE,
1260                          new_check));
1261   uuid_copy(out, new_check->checkid);
1262
1263   return 0;
1264 }
1265
1266 /* A quick little list of recycleable checks.  This list never really
1267  * grows large, so no sense in thinking too hard about the algorithmic
1268  * complexity.
1269  */
1270 struct _checker_rcb {
1271   noit_check_t *checker;
1272   struct _checker_rcb *next;
1273 };
1274 static struct _checker_rcb *checker_rcb = NULL;
1275 static void recycle_check(noit_check_t *checker) {
1276   struct _checker_rcb *n = malloc(sizeof(*n));
1277   n->checker = checker;
1278   n->next = checker_rcb;
1279   checker_rcb = n;
1280 }
1281 void
1282 noit_poller_free_check(noit_check_t *checker) {
1283   noit_module_t *mod;
1284
1285   if(checker->flags & NP_RUNNING) {
1286     recycle_check(checker);
1287     return;
1288   }
1289
1290   mod = noit_module_lookup(checker->module);
1291   if(mod && mod->cleanup) mod->cleanup(mod, checker);
1292   if(checker->fire_event) {
1293      eventer_remove(checker->fire_event);
1294      free(checker->fire_event->closure);
1295      eventer_free(checker->fire_event);
1296      checker->fire_event = NULL;
1297   }
1298   if(checker->closure) free(checker->closure);
1299   if(checker->target) free(checker->target);
1300   if(checker->module) free(checker->module);
1301   if(checker->name) free(checker->name);
1302   if(checker->config) {
1303     mtev_hash_destroy(checker->config, free, free);
1304     free(checker->config);
1305     checker->config = NULL;
1306   }
1307   if(checker->module_metadata) {
1308     int i;
1309     for(i=0; i<reg_module_id; i++) {
1310       struct vp_w_free *tuple;
1311       tuple = checker->module_metadata[i];
1312       if(tuple) {
1313         if(tuple->freefunc) tuple->freefunc(tuple->ptr);
1314         free(tuple);
1315       }
1316     }
1317     free(checker->module_metadata);
1318   }
1319   if(checker->module_configs) {
1320     int i;
1321     for(i=0; i<reg_module_id; i++) {
1322       if(checker->module_configs[i]) {
1323         mtev_hash_destroy(checker->module_configs[i], free, free);
1324         free(checker->module_configs[i]);
1325       }
1326     }
1327     free(checker->module_configs);
1328   }
1329   mtev_memory_safe_free(stats_inprogress(checker));
1330   mtev_memory_safe_free(stats_current(checker));
1331   mtev_memory_safe_free(stats_previous(checker));
1332   free(checker);
1333 }
1334 static int
1335 check_recycle_bin_processor(eventer_t e, int mask, void *closure,
1336                             struct timeval *now) {
1337   static struct timeval one_minute = { RECYCLE_INTERVAL, 0L };
1338   struct _checker_rcb *prev = NULL, *curr = checker_rcb;
1339   mtevL(noit_debug, "Scanning check recycle bin\n");
1340   while(curr) {
1341     if(!(curr->checker->flags & NP_RUNNING)) {
1342       mtevL(noit_debug, "Check is ready to free.\n");
1343       noit_poller_free_check(curr->checker);
1344       if(prev) prev->next = curr->next;
1345       else checker_rcb = curr->next;
1346       free(curr);
1347       curr = prev ? prev->next : checker_rcb;
1348     }
1349     else {
1350       prev = curr;
1351       curr = curr->next;
1352     }
1353   }
1354   add_timeval(*now, one_minute, &e->whence);
1355   return EVENTER_TIMER;
1356 }
1357
1358 int
1359 noit_poller_deschedule(uuid_t in) {
1360   void *vcheck;
1361   noit_check_t *checker;
1362   if(mtev_hash_retrieve(&polls,
1363                         (char *)in, UUID_SIZE,
1364                         &vcheck) == 0) {
1365     return -1;
1366   }
1367   checker = (noit_check_t *)vcheck;
1368   checker->flags |= (NP_DISABLED|NP_KILLED);
1369
1370   noit_check_log_delete(checker);
1371
1372   assert(mtev_skiplist_remove(&polls_by_name, checker, NULL));
1373   assert(mtev_hash_delete(&polls, (char *)in, UUID_SIZE, NULL, NULL));
1374
1375   noit_poller_free_check(checker);
1376   return 0;
1377 }
1378
1379 noit_check_t *
1380 noit_poller_lookup(uuid_t in) {
1381   noit_check_t *check;
1382   pthread_mutex_lock(&polls_lock);
1383   check = noit_poller_lookup__nolock(in);
1384   pthread_mutex_unlock(&polls_lock);
1385   return check;
1386 }
1387 noit_check_t *
1388 noit_poller_lookup_by_name(char *target, char *name) {
1389   noit_check_t *check;
1390   pthread_mutex_lock(&polls_lock);
1391   check = noit_poller_lookup_by_name__nolock(target,name);
1392   pthread_mutex_unlock(&polls_lock);
1393   return check;
1394 }
1395 int
1396 noit_poller_target_ip_do(const char *target_ip,
1397                          int (*f)(noit_check_t *, void *),
1398                          void *closure) {
1399   int i, count = 0, todo_count = 0;
1400   noit_check_t pivot;
1401   mtev_skiplist *tlist;
1402   mtev_skiplist_node *next;
1403   noit_check_t *todo_onstack[8192];
1404   noit_check_t **todo = todo_onstack;
1405
1406   tlist = mtev_skiplist_find(polls_by_name.index,
1407                              __check_target_ip_compare, NULL);
1408
1409   pthread_mutex_lock(&polls_lock);
1410   /* First pass to count */
1411   memset(&pivot, 0, sizeof(pivot));
1412   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1413   pivot.name = "";
1414   pivot.target = "";
1415   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1416   while(next && next->data) {
1417     noit_check_t *check = next->data;
1418     if(strcmp(check->target_ip, target_ip)) break;
1419     todo_count++;
1420     mtev_skiplist_next(tlist, &next);
1421   }
1422
1423   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1424
1425   memset(&pivot, 0, sizeof(pivot));
1426   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1427   pivot.name = "";
1428   pivot.target = "";
1429   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1430   while(next && next->data) {
1431     noit_check_t *check = next->data;
1432     if(strcmp(check->target_ip, target_ip)) break;
1433     if(count < todo_count) todo[count++] = check;
1434     mtev_skiplist_next(tlist, &next);
1435   }
1436   pthread_mutex_unlock(&polls_lock);
1437
1438   todo_count = count;
1439   count = 0;
1440   for(i=0;i<todo_count;i++)
1441     count += f(todo[i],closure);
1442
1443   if(todo != todo_onstack) free(todo);
1444   return count;
1445 }
1446 int
1447 noit_poller_target_do(const char *target, int (*f)(noit_check_t *, void *),
1448                       void *closure) {
1449   int i, todo_count = 0, count = 0;
1450   noit_check_t pivot;
1451   mtev_skiplist *tlist;
1452   mtev_skiplist_node *next;
1453   noit_check_t *todo_onstack[8192];
1454   noit_check_t **todo = todo_onstack;
1455
1456   tlist = mtev_skiplist_find(polls_by_name.index,
1457                              __check_target_compare, NULL);
1458
1459   pthread_mutex_lock(&polls_lock);
1460   memset(&pivot, 0, sizeof(pivot));
1461   pivot.name = "";
1462   pivot.target = (char *)target;
1463   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1464   while(next && next->data) {
1465     noit_check_t *check = next->data;
1466     if(strcmp(check->target, target)) break;
1467     todo_count++;
1468     mtev_skiplist_next(tlist, &next);
1469   }
1470
1471   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1472
1473   memset(&pivot, 0, sizeof(pivot));
1474   pivot.name = "";
1475   pivot.target = (char *)target;
1476   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1477   while(next && next->data) {
1478     noit_check_t *check = next->data;
1479     if(strcmp(check->target, target)) break;
1480     if(count < todo_count) todo[count++] = check;
1481     mtev_skiplist_next(tlist, &next);
1482   }
1483   pthread_mutex_unlock(&polls_lock);
1484
1485   todo_count = count;
1486   count = 0;
1487   for(i=0;i<todo_count;i++)
1488     count += f(todo[i],closure);
1489
1490   if(todo != todo_onstack) free(todo);
1491   return count;
1492 }
1493
1494 int
1495 noit_poller_do(int (*f)(noit_check_t *, void *),
1496                void *closure) {
1497   mtev_skiplist_node *iter;
1498   int i, count = 0, max_count = 0;
1499   noit_check_t **todo;
1500
1501   if(polls_by_name.size == 0) return 0;
1502
1503   max_count = polls_by_name.size;
1504   todo = malloc(max_count * sizeof(*todo));
1505
1506   pthread_mutex_lock(&polls_lock);
1507   for(iter = mtev_skiplist_getlist(&polls_by_name); iter;
1508       mtev_skiplist_next(&polls_by_name, &iter)) {
1509     if(count < max_count) todo[count++] = (noit_check_t *)iter->data;
1510   }
1511   pthread_mutex_unlock(&polls_lock);
1512
1513   max_count = count;
1514   count = 0;
1515   for(i=0;i<max_count;i++)
1516     count += f(todo[i], closure);
1517   free(todo);
1518   return count;
1519 }
1520
1521 struct ip_module_collector_crutch {
1522   noit_check_t **array;
1523   const char *module;
1524   int idx;
1525   int allocd;
1526 };
1527 static int ip_module_collector(noit_check_t *check, void *cl) {
1528   struct ip_module_collector_crutch *c = cl;
1529   if(c->idx >= c->allocd) return 0;
1530   if(strcmp(check->module, c->module)) return 0;
1531   c->array[c->idx++] = check;
1532   return 1;
1533 }
1534 int
1535 noit_poller_lookup_by_ip_module(const char *ip, const char *mod,
1536                                 noit_check_t **checks, int nchecks) {
1537   struct ip_module_collector_crutch crutch;
1538   crutch.array = checks;
1539   crutch.allocd = nchecks;
1540   crutch.idx = 0;
1541   crutch.module = mod;
1542   return noit_poller_target_ip_do(ip, ip_module_collector, &crutch);
1543 }
1544 int
1545 noit_poller_lookup_by_module(const char *ip, const char *mod,
1546                              noit_check_t **checks, int nchecks) {
1547   struct ip_module_collector_crutch crutch;
1548   crutch.array = checks;
1549   crutch.allocd = nchecks;
1550   crutch.idx = 0;
1551   crutch.module = mod;
1552   return noit_poller_target_do(ip, ip_module_collector, &crutch);
1553 }
1554
1555
1556 int
1557 noit_check_xpath(char *xpath, int len,
1558                  const char *base, const char *arg) {
1559   uuid_t checkid;
1560   int base_trailing_slash;
1561   char argcopy[1024], *target, *module, *name;
1562
1563   base_trailing_slash = (base[strlen(base)-1] == '/');
1564   xpath[0] = '\0';
1565   argcopy[0] = '\0';
1566   if(arg) strlcpy(argcopy, arg, sizeof(argcopy));
1567
1568   if(uuid_parse(argcopy, checkid) == 0) {
1569     /* If they kill by uuid, we'll seek and destroy -- find it anywhere */
1570     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1571              base, base_trailing_slash ? "" : "/", argcopy);
1572   }
1573   else if((module = strchr(argcopy, '`')) != NULL) {
1574     noit_check_t *check;
1575     char uuid_str[37];
1576     target = argcopy;
1577     *module++ = '\0';
1578     if((name = strchr(module+1, '`')) == NULL)
1579       name = module;
1580     else
1581       name++;
1582     check = noit_poller_lookup_by_name(target, name);
1583     if(!check) {
1584       return -1;
1585     }
1586     uuid_unparse_lower(check->checkid, uuid_str);
1587     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1588              base, base_trailing_slash ? "" : "/", uuid_str);
1589   }
1590   return strlen(xpath);
1591 }
1592
1593 static int
1594 bad_check_initiate(noit_module_t *self, noit_check_t *check,
1595                    int once, noit_check_t *cause) {
1596   /* self is likely null here -- why it is bad, in fact */
1597   /* this is only suitable to call in one-offs */
1598   struct timeval now;
1599   stats_t *inp;
1600   char buff[256];
1601   if(!once) return -1;
1602   if(!check) return -1;
1603   assert(!(check->flags & NP_RUNNING));
1604   check->flags |= NP_RUNNING;
1605   inp = noit_check_get_stats_inprogress(check);
1606   gettimeofday(&now, NULL);
1607   noit_check_stats_whence(inp, &now);
1608   snprintf(buff, sizeof(buff), "check[%s] implementation offline",
1609            check->module);
1610   noit_check_stats_status(inp, buff);
1611   noit_check_set_stats(check);
1612   check->flags &= ~NP_RUNNING;
1613   return 0;
1614 }
1615 void
1616 noit_check_stats_clear(noit_check_t *check, stats_t *s) {
1617   memset(s, 0, sizeof(*s));
1618   s->state = NP_UNKNOWN;
1619   s->available = NP_UNKNOWN;
1620 }
1621
1622 void
1623 __stats_add_metric(stats_t *newstate, metric_t *m) {
1624   mtev_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name),
1625                     m, NULL, (void (*)(void *))free_metric);
1626 }
1627
1628 static size_t
1629 noit_metric_sizes(metric_type_t type, const void *value) {
1630   switch(type) {
1631     case METRIC_INT32:
1632     case METRIC_UINT32:
1633       return sizeof(int32_t);
1634     case METRIC_INT64:
1635     case METRIC_UINT64:
1636       return sizeof(int64_t);
1637     case METRIC_DOUBLE:
1638       return sizeof(double);
1639     case METRIC_STRING: {
1640       int len = strlen((char*)value) + 1;
1641       return ((len >= text_size_limit) ? text_size_limit+1 : len);
1642     }
1643     case METRIC_GUESS:
1644       break;
1645   }
1646   assert(type != type);
1647   return 0;
1648 }
1649 static metric_type_t
1650 noit_metric_guess_type(const char *s, void **replacement) {
1651   char *copy, *cp, *trailer, *rpl;
1652   int negative = 0;
1653   metric_type_t type = METRIC_STRING;
1654
1655   if(!s) return METRIC_GUESS;
1656   copy = cp = strdup(s);
1657
1658   /* TRIM the string */
1659   while(*cp && isspace(*cp)) cp++; /* ltrim */
1660   s = cp; /* found a good starting point */
1661   while(*cp) cp++; /* advance to \0 */
1662   cp--; /* back up one */
1663   while(cp > s && isspace(*cp)) *cp-- = '\0'; /* rtrim */
1664
1665   /* Find the first space */
1666   cp = (char *)s;
1667   while(*cp && !isspace(*cp)) cp++;
1668   trailer = cp;
1669   cp--; /* backup one */
1670   if(cp > s && *cp == '%') *cp-- = '\0'; /* chop a last % is there is one */
1671
1672   while(*trailer && isspace(*trailer)) *trailer++ = '\0'; /* rtrim */
1673
1674   /* string was       '  -1.23e-01%  inodes used  ' */
1675   /* copy is (~ = \0) '  -1.23e-01~  inodes used~~' */
1676   /*                     ^           ^              */
1677   /*                     s           trailer        */
1678
1679   /* So, the trailer must not contain numbers */
1680   while(*trailer) { if(isdigit(*trailer)) goto notanumber; trailer++; }
1681
1682   /* And the 's' must be of the form:
1683    *  0) may start with a sign [-+]?
1684    *  1) [1-9][0-9]*
1685    *  2) [0]?.[0-9]+
1686    *  3) 0
1687    *  4) [1-9][0-9]*.[0-9]+
1688    *  5) all of the above ending with e[+-][0-9]+
1689    */
1690    rpl = (char *)s;
1691    /* CASE 0 */
1692    if(s[0] == '-' || s[0] == '+') {
1693      if(s[0] == '-') negative = 1;
1694      s++;
1695    }
1696
1697    if(s[0] == '.') goto decimal; /* CASE 2 */
1698    if(s[0] == '0') { /* CASE 2 & 3 */
1699      s++;
1700      if(!s[0]) goto scanint; /* CASE 3 */
1701      if(s[0] == '.') goto decimal; /* CASE 2 */
1702      goto notanumber;
1703    }
1704    if(s[0] >= '1' && s[0] <= '9') { /* CASE 1 & 4 */
1705      s++;
1706      while(isdigit(s[0])) s++; /* CASE 1 & 4 */
1707      if(!s[0]) goto scanint; /* CASE 1 */
1708      if(s[0] == '.') goto decimal; /* CASE 4 */
1709      goto notanumber;
1710    }
1711    /* Not case 1,2,3,4 */
1712    goto notanumber;
1713
1714   decimal:
1715    s++;
1716    if(!isdigit(s[0])) goto notanumber;
1717    s++;
1718    while(isdigit(s[0])) s++;
1719    if(!s[0]) goto scandouble;
1720    if(s[0] == 'e' || s[0] == 'E') goto exponent; /* CASE 5 */
1721    goto notanumber;
1722
1723   exponent:
1724    s++;
1725    if(s[0] != '-' && s[0] != '+') goto notanumber;
1726    s++;
1727    if(!isdigit(s[0])) goto notanumber;
1728    s++;
1729    while(isdigit(s[0])) s++;
1730    if(!s[0]) goto scandouble;
1731    goto notanumber;
1732
1733  scanint:
1734    if(negative) {
1735      int64_t *v;
1736      v = mtev_memory_safe_malloc(sizeof(*v));
1737      *v = strtoll(rpl, NULL, 10);
1738      *replacement = v;
1739      type = METRIC_INT64;
1740      goto alldone;
1741    }
1742    else {
1743      u_int64_t *v;
1744      v = mtev_memory_safe_malloc(sizeof(*v));
1745      *v = strtoull(rpl, NULL, 10);
1746      *replacement = v;
1747      type = METRIC_UINT64;
1748      goto alldone;
1749    }
1750  scandouble:
1751    {
1752      double *v;
1753      v = mtev_memory_safe_malloc(sizeof(*v));
1754      *v = strtod(rpl, NULL);
1755      *replacement = v;
1756      type = METRIC_DOUBLE;
1757      goto alldone;
1758    }
1759
1760  alldone:
1761  notanumber:
1762   free(copy);
1763   return type;
1764 }
1765
1766 static void
1767 cleanse_metric_name(char *m) {
1768   char *cp;
1769   for(cp = m; *cp; cp++)
1770     if(!isprint(*cp)) *cp=' ';
1771   for(cp--; *cp == ' ' && cp > m; cp--) /* always leave first char */
1772     *cp = '\0';
1773 }
1774
1775 int
1776 noit_stats_populate_metric(metric_t *m, const char *name, metric_type_t type,
1777                            const void *value) {
1778   void *replacement = NULL;
1779
1780   m->metric_name = mtev_memory_safe_strdup(name);
1781   cleanse_metric_name(m->metric_name);
1782
1783   if(type == METRIC_GUESS)
1784     type = noit_metric_guess_type((char *)value, &replacement);
1785   if(type == METRIC_GUESS) return -1;
1786
1787   m->metric_type = type;
1788
1789   if(replacement)
1790     m->metric_value.vp = replacement;
1791   else if(value) {
1792     size_t len;
1793     len = noit_metric_sizes(type, value);
1794     m->metric_value.vp = mtev_memory_safe_malloc(len);
1795     memcpy(m->metric_value.vp, value, len);
1796     if (type == METRIC_STRING) {
1797       m->metric_value.s[len-1] = 0;
1798     }
1799   }
1800   else m->metric_value.vp = NULL;
1801   return 0;
1802 }
1803
1804 metric_t *
1805 noit_stats_get_metric(noit_check_t *check,
1806                       stats_t *newstate, const char *name) {
1807   void *v;
1808   if(newstate == NULL)
1809     newstate = stats_inprogress(check);
1810   if(mtev_hash_retrieve(&newstate->metrics, name, strlen(name), &v))
1811     return (metric_t *)v;
1812   return NULL;
1813 }
1814
1815 void
1816 noit_stats_set_metric(noit_check_t *check,
1817                       const char *name, metric_type_t type,
1818                       const void *value) {
1819   stats_t *c;
1820   metric_t *m = mtev_memory_safe_calloc(1, sizeof(*m));
1821   if(noit_stats_populate_metric(m, name, type, value)) {
1822     free_metric(m);
1823     return;
1824   }
1825   noit_check_metric_count_add(1);
1826   c = noit_check_get_stats_inprogress(check);
1827   check_stats_set_metric_hook_invoke(check, c, m);
1828   __stats_add_metric(c, m);
1829 }
1830 void
1831 noit_stats_set_metric_coerce(noit_check_t *check,
1832                              const char *name, metric_type_t t,
1833                              const char *v) {
1834   char *endptr;
1835   stats_t *c;
1836   c = noit_check_get_stats_inprogress(check);
1837   if(v == NULL) {
1838    bogus:
1839     check_stats_set_metric_coerce_hook_invoke(check, c, name, t, v, mtev_false);
1840     noit_stats_set_metric(check, name, t, NULL);
1841     return;
1842   }
1843   switch(t) {
1844     case METRIC_STRING:
1845       noit_stats_set_metric(check, name, t, v);
1846       break;
1847     case METRIC_INT32:
1848     {
1849       int32_t val;
1850       val = strtol(v, &endptr, 10);
1851       if(endptr == v) goto bogus;
1852       noit_stats_set_metric(check, name, t, &val);
1853       break;
1854     }
1855     case METRIC_UINT32:
1856     {
1857       u_int32_t val;
1858       val = strtoul(v, &endptr, 10);
1859       if(endptr == v) goto bogus;
1860       noit_stats_set_metric(check, name, t, &val);
1861       break;
1862     }
1863     case METRIC_INT64:
1864     {
1865       int64_t val;
1866       val = strtoll(v, &endptr, 10);
1867       if(endptr == v) goto bogus;
1868       noit_stats_set_metric(check, name, t, &val);
1869       break;
1870     }
1871     case METRIC_UINT64:
1872     {
1873       u_int64_t val;
1874       val = strtoull(v, &endptr, 10);
1875       if(endptr == v) goto bogus;
1876       noit_stats_set_metric(check, name, t, &val);
1877       break;
1878     }
1879     case METRIC_DOUBLE:
1880     {
1881       double val;
1882       val = strtod(v, &endptr);
1883       if(endptr == v) goto bogus;
1884       noit_stats_set_metric(check, name, t, &val);
1885       break;
1886     }
1887     case METRIC_GUESS:
1888       noit_stats_set_metric(check, name, t, v);
1889       break;
1890   }
1891   check_stats_set_metric_coerce_hook_invoke(check, c, name, t, v, mtev_true);
1892 }
1893 void
1894 noit_stats_log_immediate_metric(noit_check_t *check,
1895                                 const char *name, metric_type_t type,
1896                                 void *value) {
1897   struct timeval now;
1898   metric_t *m = mtev_memory_safe_malloc(sizeof(*m));
1899   if(noit_stats_populate_metric(m, name, type, value)) {
1900     free_metric(m);
1901     return;
1902   }
1903   gettimeofday(&now, NULL);
1904   noit_check_log_metric(check, &now, m);
1905   free_metric(m);
1906 }
1907
1908 void
1909 noit_check_passive_set_stats(noit_check_t *check) {
1910   int i, nwatches = 0;
1911   mtev_skiplist_node *next;
1912   noit_check_t n;
1913   noit_check_t *watches[8192];
1914
1915   uuid_copy(n.checkid, check->checkid);
1916   n.period = 0;
1917
1918   noit_check_set_stats(check);
1919
1920   pthread_mutex_lock(&polls_lock);
1921   mtev_skiplist_find_neighbors(&watchlist, &n, NULL, NULL, &next);
1922   while(next && next->data && nwatches < 8192) {
1923     noit_check_t *wcheck = next->data;
1924     if(uuid_compare(n.checkid, wcheck->checkid)) break;
1925     watches[nwatches++] = wcheck;
1926     mtev_skiplist_next(&watchlist, &next);
1927   }
1928   pthread_mutex_unlock(&polls_lock);
1929
1930   for(i=0;i<nwatches;i++) {
1931     void *backup;
1932     noit_check_t *wcheck = watches[i];
1933     /* Swap the real check's stats into place */
1934     backup = wcheck->statistics;
1935     wcheck->statistics = check->statistics;
1936
1937     if(check_passive_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
1938       /* Write out our status */
1939       noit_check_log_status(wcheck);
1940       /* Write out all metrics */
1941       noit_check_log_metrics(wcheck);
1942     }
1943     /* Swap them back out */
1944     wcheck->statistics = backup;
1945   }
1946 }
1947 void
1948 noit_check_set_stats(noit_check_t *check) {
1949   int report_change = 0;
1950   char *cp;
1951   dep_list_t *dep;
1952   stats_t *old, *prev, *current;
1953   old = stats_previous(check);
1954   prev = stats_previous(check) = stats_current(check);
1955   current = stats_current(check) = stats_inprogress(check);
1956   stats_inprogress(check) = noit_check_stats_alloc();
1957  
1958   if(old) {
1959     noit_check_safe_free_stats(old);
1960   }
1961
1962   if(current) {
1963     for(cp = current->status; cp && *cp; cp++)
1964       if(*cp == '\r' || *cp == '\n') *cp = ' ';
1965   }
1966
1967   /* check for state changes */
1968   if((!current || (current->available != NP_UNKNOWN)) &&
1969      (!prev || (prev->available != NP_UNKNOWN)) &&
1970      (!current || !prev || (current->available != prev->available)))
1971     report_change = 1;
1972   if((!current || (current->state != NP_UNKNOWN)) &&
1973      (!prev || (prev->state != NP_UNKNOWN)) &&
1974      (!current || !prev || (current->state != prev->state)))
1975     report_change = 1;
1976
1977   mtevL(noit_debug, "%s`%s <- [%s]\n", check->target, check->name,
1978         current ? current->status : "null");
1979   if(report_change) {
1980     mtevL(noit_debug, "%s`%s -> [%s:%s]\n",
1981           check->target, check->name,
1982           noit_check_available_string(current ? current->available : NP_UNKNOWN),
1983           noit_check_state_string(current ? current->state : NP_UNKNOWN));
1984   }
1985
1986   if(NOIT_CHECK_STATUS_ENABLED()) {
1987     char id[UUID_STR_LEN+1];
1988     uuid_unparse_lower(check->checkid, id);
1989     NOIT_CHECK_STATUS(id, check->module, check->name, check->target,
1990                       current ? current->available : NP_UNKNOWN,
1991                       current ? current->state : NP_UNKNOWN,
1992                       current ? current->status : "null");
1993   }
1994
1995   if(check_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
1996     /* Write out the bundled information */
1997     noit_check_log_bundle(check);
1998   }
1999   /* count the check as complete */
2000   check_completion_count++;
2001
2002   for(dep = check->causal_checks; dep; dep = dep->next) {
2003     noit_module_t *mod;
2004     mod = noit_module_lookup(dep->check->module);
2005     if(!mod) {
2006       bad_check_initiate(mod, dep->check, 1, check);
2007     }
2008     else {
2009       mtevL(noit_debug, "Firing %s`%s in response to %s`%s\n",
2010             dep->check->target, dep->check->name,
2011             check->target, check->name);
2012       if((dep->check->flags & NP_DISABLED) == 0)
2013         if(mod->initiate_check)
2014           mod->initiate_check(mod, dep->check, 1, check);
2015     }
2016   }
2017 }
2018
2019 static int
2020 noit_console_show_watchlist(mtev_console_closure_t ncct,
2021                             int argc, char **argv,
2022                             mtev_console_state_t *dstate,
2023                             void *closure) {
2024   mtev_skiplist_node *iter, *fiter;
2025   int nwatches = 0, i;
2026   noit_check_t *watches[8192];
2027
2028   nc_printf(ncct, "%d active watches.\n", watchlist.size);
2029   pthread_mutex_lock(&polls_lock);
2030   for(iter = mtev_skiplist_getlist(&watchlist); iter && nwatches < 8192;
2031       mtev_skiplist_next(&watchlist, &iter)) {
2032     noit_check_t *check = iter->data;
2033     watches[nwatches++] = check;
2034   }
2035   pthread_mutex_unlock(&polls_lock);
2036
2037   for(i=0;i<nwatches;i++) {
2038     noit_check_t *check = watches[i];
2039     char uuid_str[UUID_STR_LEN + 1];
2040
2041     uuid_unparse_lower(check->checkid, uuid_str);
2042     nc_printf(ncct, "%s:\n\t[%s`%s`%s]\n\tPeriod: %dms\n\tFeeds[%d]:\n",
2043               uuid_str, check->target, check->module, check->name,
2044               check->period, check->feeds ? check->feeds->size : 0);
2045     if(check->feeds && check->feeds->size) {
2046       for(fiter = mtev_skiplist_getlist(check->feeds); fiter;
2047           mtev_skiplist_next(check->feeds, &fiter)) {
2048         nc_printf(ncct, "\t\t%s\n", (const char *)fiter->data);
2049       }
2050     }
2051   }
2052   return 0;
2053 }
2054
2055 static void
2056 nc_printf_check_brief(mtev_console_closure_t ncct,
2057                       noit_check_t *check) {
2058   stats_t *current;
2059   char out[512];
2060   char uuid_str[37];
2061   snprintf(out, sizeof(out), "%s`%s (%s [%x])", check->target, check->name,
2062            check->target_ip, check->flags);
2063   uuid_unparse_lower(check->checkid, uuid_str);
2064   nc_printf(ncct, "%s %s\n", uuid_str, out);
2065   current = stats_current(check);
2066   if(current && current->status)
2067     nc_printf(ncct, "\t%s\n", current->status);
2068 }
2069
2070 char *
2071 noit_console_conf_check_opts(mtev_console_closure_t ncct,
2072                              mtev_console_state_stack_t *stack,
2073                              mtev_console_state_t *dstate,
2074                              int argc, char **argv, int idx) {
2075   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2076   uuid_t key_id;
2077   int klen, i = 0;
2078   void *vcheck;
2079
2080   if(argc == 1) {
2081     if(!strncmp("new", argv[0], strlen(argv[0]))) {
2082       if(idx == i) return strdup("new");
2083       i++;
2084     }
2085     pthread_mutex_lock(&polls_lock);
2086     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2087                          &vcheck)) {
2088       noit_check_t *check = (noit_check_t *)vcheck;
2089       char out[512];
2090       char uuid_str[37];
2091       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2092       uuid_unparse_lower(check->checkid, uuid_str);
2093       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2094         if(idx == i) {
2095           pthread_mutex_unlock(&polls_lock);
2096           return strdup(out);
2097         }
2098         i++;
2099       }
2100       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2101         if(idx == i) {
2102           pthread_mutex_unlock(&polls_lock);
2103           return strdup(uuid_str);
2104         }
2105         i++;
2106       }
2107     }
2108     pthread_mutex_unlock(&polls_lock);
2109   }
2110   if(argc == 2) {
2111     cmd_info_t *cmd;
2112     if(!strcmp("new", argv[0])) return NULL;
2113     cmd = mtev_skiplist_find(&dstate->cmds, "attribute", NULL);
2114     if(!cmd) return NULL;
2115     return mtev_console_opt_delegate(ncct, stack, cmd->dstate, argc-1, argv+1, idx);
2116   }
2117   return NULL;
2118 }
2119
2120 char *
2121 noit_console_check_opts(mtev_console_closure_t ncct,
2122                         mtev_console_state_stack_t *stack,
2123                         mtev_console_state_t *dstate,
2124                         int argc, char **argv, int idx) {
2125   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2126   uuid_t key_id;
2127   int klen, i = 0;
2128
2129   if(argc == 1) {
2130     void *vcheck;
2131     pthread_mutex_lock(&polls_lock);
2132     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2133                          &vcheck)) {
2134       char out[512];
2135       char uuid_str[37];
2136       noit_check_t *check = (noit_check_t *)vcheck;
2137       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2138       uuid_unparse_lower(check->checkid, uuid_str);
2139       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2140         if(idx == i) {
2141           pthread_mutex_unlock(&polls_lock);
2142           return strdup(out);
2143         }
2144         i++;
2145       }
2146       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2147         if(idx == i) {
2148           pthread_mutex_unlock(&polls_lock);
2149           return strdup(uuid_str);
2150         }
2151         i++;
2152       }
2153     }
2154     pthread_mutex_unlock(&polls_lock);
2155   }
2156   if(argc == 2) {
2157     return mtev_console_opt_delegate(ncct, stack, dstate, argc-1, argv+1, idx);
2158   }
2159   return NULL;
2160 }
2161
2162 static int
2163 noit_console_show_checks(mtev_console_closure_t ncct,
2164                          int argc, char **argv,
2165                          mtev_console_state_t *dstate,
2166                          void *closure) {
2167   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2168   uuid_t key_id;
2169   int klen, i = 0, nchecks;
2170   void *vcheck;
2171   noit_check_t **checks;
2172
2173   nchecks = mtev_hash_size(&polls);
2174   if(nchecks == 0) return 0;
2175   checks = malloc(nchecks * sizeof(*checks));
2176
2177   pthread_mutex_lock(&polls_lock);
2178   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2179                        &vcheck)) {
2180     if(i<nchecks) checks[i++] = vcheck;
2181   }
2182   pthread_mutex_unlock(&polls_lock);
2183
2184   nchecks = i;
2185   for(i=0;i<nchecks;i++)
2186     nc_printf_check_brief(ncct,checks[i]);
2187
2188   free(checks);
2189   return 0;
2190 }
2191
2192 static int
2193 noit_console_short_checks_sl(mtev_console_closure_t ncct,
2194                              mtev_skiplist *tlist) {
2195   int max_count, i = 0;
2196   noit_check_t **todo;
2197   mtev_skiplist_node *iter;
2198
2199   max_count = tlist->size;
2200   if(max_count == 0) return 0;
2201   todo = malloc(max_count * sizeof(*todo));
2202
2203   pthread_mutex_lock(&polls_lock);
2204   for(iter = mtev_skiplist_getlist(tlist); i < max_count && iter;
2205       mtev_skiplist_next(tlist, &iter)) {
2206     todo[i++] = iter->data;
2207   }
2208   pthread_mutex_unlock(&polls_lock);
2209
2210   max_count = i;
2211   for(i=0;i<max_count;i++)
2212     nc_printf_check_brief(ncct, todo[i]);
2213
2214   free(todo);
2215   return 0;
2216 }
2217 static int
2218 noit_console_show_checks_name(mtev_console_closure_t ncct,
2219                               int argc, char **argv,
2220                               mtev_console_state_t *dstate,
2221                               void *closure) {
2222   return noit_console_short_checks_sl(ncct, &polls_by_name);
2223 }
2224
2225 static int
2226 noit_console_show_checks_target(mtev_console_closure_t ncct,
2227                                    int argc, char **argv,
2228                                    mtev_console_state_t *dstate,
2229                                    void *closure) {
2230   return noit_console_short_checks_sl(ncct,
2231            mtev_skiplist_find(polls_by_name.index,
2232            __check_target_compare, NULL));
2233 }
2234
2235 static int
2236 noit_console_show_checks_target_ip(mtev_console_closure_t ncct,
2237                                    int argc, char **argv,
2238                                    mtev_console_state_t *dstate,
2239                                    void *closure) {
2240   return noit_console_short_checks_sl(ncct,
2241            mtev_skiplist_find(polls_by_name.index,
2242            __check_target_ip_compare, NULL));
2243 }
2244
2245 static void
2246 register_console_check_commands() {
2247   mtev_console_state_t *tl;
2248   cmd_info_t *showcmd;
2249
2250   tl = mtev_console_state_initial();
2251   showcmd = mtev_console_state_get_cmd(tl, "show");
2252   assert(showcmd && showcmd->dstate);
2253
2254   mtev_console_state_add_cmd(showcmd->dstate,
2255     NCSCMD("timing_slots", noit_console_show_timing_slots, NULL, NULL, NULL));
2256
2257   mtev_console_state_add_cmd(showcmd->dstate,
2258     NCSCMD("checks", noit_console_show_checks, NULL, NULL, NULL));
2259
2260   mtev_console_state_add_cmd(showcmd->dstate,
2261     NCSCMD("checks:name", noit_console_show_checks_name, NULL,
2262            NULL, NULL));
2263
2264   mtev_console_state_add_cmd(showcmd->dstate,
2265     NCSCMD("checks:target", noit_console_show_checks_target, NULL,
2266            NULL, NULL));
2267
2268   mtev_console_state_add_cmd(showcmd->dstate,
2269     NCSCMD("checks:target_ip", noit_console_show_checks_target_ip, NULL,
2270            NULL, NULL));
2271
2272   mtev_console_state_add_cmd(showcmd->dstate,
2273     NCSCMD("watches", noit_console_show_watchlist, NULL, NULL, NULL));
2274 }
2275
2276 int
2277 noit_check_register_module(const char *name) {
2278   int i;
2279   for(i=0; i<reg_module_id; i++)
2280     if(!strcmp(reg_module_names[i], name)) return i;
2281   if(reg_module_id >= MAX_MODULE_REGISTRATIONS) return -1;
2282   mtevL(noit_debug, "Registered module %s as %d\n", name, i);
2283   i = reg_module_id++;
2284   reg_module_names[i] = strdup(name);
2285   mtev_conf_set_namespace(reg_module_names[i]);
2286   return i;
2287 }
2288 int
2289 noit_check_registered_module_cnt() {
2290   return reg_module_id;
2291 }
2292 const char *
2293 noit_check_registered_module(int idx) {
2294   if(reg_module_used < 0) reg_module_used = reg_module_id;
2295   assert(reg_module_used == reg_module_id);
2296   if(idx >= reg_module_id || idx < 0) return NULL;
2297   return reg_module_names[idx];
2298 }
2299
2300 void
2301 noit_check_set_module_metadata(noit_check_t *c, int idx, void *md, void (*freefunc)(void *)) {
2302   struct vp_w_free *tuple;
2303   if(reg_module_used < 0) reg_module_used = reg_module_id;
2304   assert(reg_module_used == reg_module_id);
2305   if(idx >= reg_module_id || idx < 0) return;
2306   if(!c->module_metadata) c->module_metadata = calloc(reg_module_id, sizeof(void *));
2307   c->module_metadata[idx] = calloc(1, sizeof(struct vp_w_free));
2308   tuple = c->module_metadata[idx];
2309   tuple->ptr = md;
2310   tuple->freefunc = freefunc;
2311 }
2312 void
2313 noit_check_set_module_config(noit_check_t *c, int idx, mtev_hash_table *config) {
2314   if(reg_module_used < 0) reg_module_used = reg_module_id;
2315   assert(reg_module_used == reg_module_id);
2316   if(idx >= reg_module_id || idx < 0) return;
2317   if(!c->module_configs) c->module_configs = calloc(reg_module_id, sizeof(mtev_hash_table *));
2318   c->module_configs[idx] = config;
2319 }
2320 void *
2321 noit_check_get_module_metadata(noit_check_t *c, int idx) {
2322   struct vp_w_free *tuple;
2323   if(reg_module_used < 0) reg_module_used = reg_module_id;
2324   assert(reg_module_used == reg_module_id);
2325   if(idx >= reg_module_id || idx < 0 || !c->module_metadata) return NULL;
2326   tuple = c->module_metadata[idx];
2327   return tuple ? tuple->ptr : NULL;
2328 }
2329 mtev_hash_table *
2330 noit_check_get_module_config(noit_check_t *c, int idx) {
2331   if(reg_module_used < 0) reg_module_used = reg_module_id;
2332   assert(reg_module_used == reg_module_id);
2333   if(idx >= reg_module_id || idx < 0 || !c->module_configs) return NULL;
2334   return c->module_configs[idx];
2335 }
Note: See TracBrowser for help on using the browser.