root/src/noit_check.c

Revision 531800a2f2fd2d1f87b44d1cd2623335e8badec4, 74.5 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 4 days ago)

merge conflicts

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  * Copyright (c) 2015, Circonus, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  *       copyright notice, this list of conditions and the following
14  *       disclaimer in the documentation and/or other materials provided
15  *       with the distribution.
16  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
17  *       of its contributors may be used to endorse or promote products
18  *       derived from this software without specific prior written
19  *       permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "noit_config.h"
35 #include <mtev_defines.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <ctype.h>
41 #include <errno.h>
42 #include <netinet/in.h>
43 #include <arpa/inet.h>
44 #include <time.h>
45
46 #include <eventer/eventer.h>
47 #include <mtev_memory.h>
48 #include <mtev_log.h>
49 #include <mtev_hash.h>
50 #include <mtev_skiplist.h>
51 #include <mtev_watchdog.h>
52 #include <mtev_conf.h>
53 #include <mtev_console.h>
54 #include <mtev_cluster.h>
55
56 #include "noit_mtev_bridge.h"
57 #include "noit_dtrace_probes.h"
58 #include "noit_check.h"
59 #include "noit_module.h"
60 #include "noit_check_tools.h"
61 #include "noit_check_resolver.h"
62
63 #define DEFAULT_TEXT_METRIC_SIZE_LIMIT  512
64 #define RECYCLE_INTERVAL 60
65
66 MTEV_HOOK_IMPL(check_config_fixup,
67   (noit_check_t *check),
68   void *, closure,
69   (void *closure, noit_check_t *check),
70   (closure,check))
71
72 MTEV_HOOK_IMPL(check_stats_set_metric,
73   (noit_check_t *check, stats_t *stats, metric_t *m),
74   void *, closure,
75   (void *closure, noit_check_t *check, stats_t *stats, metric_t *m),
76   (closure,check,stats,m))
77
78 MTEV_HOOK_IMPL(check_stats_set_metric_coerce,
79   (noit_check_t *check, stats_t *stats, const char *name,
80    metric_type_t type, const char *v, mtev_boolean success),
81   void *, closure,
82   (void *closure, noit_check_t *check, stats_t *stats, const char *name,
83    metric_type_t type, const char *v, mtev_boolean success),
84   (closure,check,stats,name,type,v,success))
85
86 MTEV_HOOK_IMPL(check_passive_log_stats,
87   (noit_check_t *check),
88   void *, closure,
89   (void *closure, noit_check_t *check),
90   (closure,check))
91
92 MTEV_HOOK_IMPL(check_set_stats,
93   (noit_check_t *check),
94   void *, closure,
95   (void *closure, noit_check_t *check),
96   (closure,check))
97
98 MTEV_HOOK_IMPL(check_log_stats,
99   (noit_check_t *check),
100   void *, closure,
101   (void *closure, noit_check_t *check),
102   (closure,check))
103
104 #define STATS_INPROGRESS 0
105 #define STATS_CURRENT 1
106 #define STATS_PREVIOUS 2
107
108 void
109 free_metric(metric_t *m) {
110   if(m->metric_name) free(m->metric_name);
111   if(m->metric_value.i) free(m->metric_value.i);
112 }
113
114 #define stats_inprogress(c) ((stats_t **)(c->statistics))[STATS_INPROGRESS]
115 #define stats_current(c) ((stats_t **)(c->statistics))[STATS_CURRENT]
116 #define stats_previous(c) ((stats_t **)(c->statistics))[STATS_PREVIOUS]
117
118 stats_t *
119 noit_check_get_stats_inprogress(noit_check_t *c) {
120   return stats_inprogress(c);
121 }
122 stats_t *
123 noit_check_get_stats_current(noit_check_t *c) {
124   return stats_current(c);
125 }
126 stats_t *
127 noit_check_get_stats_previous(noit_check_t *c) {
128   return stats_previous(c);
129 }
130
131 struct stats_t {
132   struct timeval whence;
133   int8_t available;
134   int8_t state;
135   u_int32_t duration;
136   mtev_hash_table metrics;
137   char status[256];
138 };
139
140 struct timeval *
141 noit_check_stats_whence(stats_t *s, struct timeval *n) {
142   if(n) memcpy(&s->whence, n, sizeof(*n));
143   return &s->whence;
144 }
145 int8_t
146 noit_check_stats_available(stats_t *s, int8_t *n) {
147   if(n) s->available = *n;
148   return s->available;
149 }
150 int8_t
151 noit_check_stats_state(stats_t *s, int8_t *n) {
152   if(n) s->state = *n;
153   return s->state;
154 }
155 u_int32_t
156 noit_check_stats_duration(stats_t *s, u_int32_t *n) {
157   if(n) s->duration = *n;
158   return s->duration;
159 }
160 const char *
161 noit_check_stats_status(stats_t *s, const char *n) {
162   if(n) strlcpy(s->status, n, sizeof(s->status));
163   return s->status;
164 }
165 mtev_hash_table *
166 noit_check_stats_metrics(stats_t *s) {
167   return &s->metrics;
168 }
169 void
170 noit_stats_set_whence(noit_check_t *c, struct timeval *t) {
171   (void)noit_check_stats_whence(noit_check_get_stats_inprogress(c), t);
172 }
173 void
174 noit_stats_set_state(noit_check_t *c, int8_t t) {
175   (void)noit_check_stats_state(noit_check_get_stats_inprogress(c), &t);
176 }
177 void
178 noit_stats_set_duration(noit_check_t *c, u_int32_t t) {
179   (void)noit_check_stats_duration(noit_check_get_stats_inprogress(c), &t);
180 }
181 void
182 noit_stats_set_status(noit_check_t *c, const char *s) {
183   (void)noit_check_stats_status(noit_check_get_stats_inprogress(c), s);
184 }
185 void
186 noit_stats_set_available(noit_check_t *c, int8_t t) {
187   (void)noit_check_stats_available(noit_check_get_stats_inprogress(c), &t);
188 }
189 static void
190 noit_check_safe_free_metric(void *vs) {
191   metric_t *m = vs;
192   if (m) {
193     free_metric(m);
194   }
195 }
196 static void
197 noit_check_safe_free_stats(void *vs) {
198   stats_t *s = vs;
199   mtev_hash_destroy(&s->metrics, NULL, (void (*)(void *))mtev_memory_safe_free);
200 }
201 static stats_t *
202 noit_check_stats_alloc() {
203   stats_t *n;
204   n = mtev_memory_safe_malloc_cleanup(sizeof(*n), noit_check_safe_free_stats);
205   memset(n, 0, sizeof(*n));
206   mtev_hash_init(&n->metrics);
207   return n;
208 }
209 static void *
210 noit_check_stats_set_calloc() {
211   int i;
212   stats_t **s;
213   s = calloc(sizeof(stats_t *), 3);
214   for(i=0;i<3;i++) s[i] = noit_check_stats_alloc();
215   return s;
216 }
217
218 /* 20 ms slots over 60 second for distribution */
219 #define SCHEDULE_GRANULARITY 20
220 #define SLOTS_PER_SECOND (1000/SCHEDULE_GRANULARITY)
221 #define MAX_MODULE_REGISTRATIONS 64
222
223 /* used to manage per-check generic module metadata */
224 struct vp_w_free {
225   void *ptr;
226   void (*freefunc)(void *);
227 };
228
229 static mtev_boolean system_needs_causality = mtev_false;
230 static int text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
231 static int reg_module_id = 0;
232 static char *reg_module_names[MAX_MODULE_REGISTRATIONS] = { NULL };
233 static int reg_module_used = -1;
234 static u_int64_t check_completion_count = 0ULL;
235 static u_int64_t check_metrics_seen = 0ULL;
236 static pthread_mutex_t polls_lock = PTHREAD_MUTEX_INITIALIZER;
237 static pthread_mutex_t recycling_lock = PTHREAD_MUTEX_INITIALIZER;
238 static mtev_hash_table polls = MTEV_HASH_EMPTY;
239 static mtev_hash_table dns_ignore_list = MTEV_HASH_EMPTY;
240 static mtev_skiplist watchlist = { 0 };
241 static mtev_skiplist polls_by_name = { 0 };
242 static u_int32_t __config_load_generation = 0;
243 static unsigned short check_slots_count[60000 / SCHEDULE_GRANULARITY] = { 0 },
244                       check_slots_seconds_count[60] = { 0 };
245 static mtev_boolean priority_scheduling = mtev_false;
246 static int priority_dead_zone_seconds = 3;
247
248 static noit_check_t *
249 noit_poller_lookup__nolock(uuid_t in) {
250   void *vcheck;
251   if(mtev_hash_retrieve(&polls, (char *)in, UUID_SIZE, &vcheck))
252     return (noit_check_t *)vcheck;
253   return NULL;
254 }
255 static noit_check_t *
256 noit_poller_lookup_by_name__nolock(char *target, char *name) {
257   noit_check_t tmp_check;
258   memset(&tmp_check, 0, sizeof(tmp_check));
259   tmp_check.target = target;
260   tmp_check.name = name;
261   return mtev_skiplist_find(&polls_by_name, &tmp_check, NULL);
262 }
263
264 static int
265 noit_console_show_timing_slots(mtev_console_closure_t ncct,
266                                int argc, char **argv,
267                                mtev_console_state_t *dstate,
268                                void *closure) {
269   int i, j;
270   const int upl = (60000 / SCHEDULE_GRANULARITY) / 60;
271   for(i=0;i<60;i++) {
272     nc_printf(ncct, "[%02d] %04d: ", i, check_slots_seconds_count[i]);
273     for(j=i*upl;j<(i+1)*upl;j++) {
274       char cp = '!';
275       if(check_slots_count[j] < 10) cp = '0' + check_slots_count[j];
276       else if(check_slots_count[j] < 36) cp = 'a' + (check_slots_count[j] - 10);
277       nc_printf(ncct, "%c", cp);
278     }
279     nc_printf(ncct, "\n");
280   }
281   return 0;
282 }
283 static int
284 noit_check_add_to_list(noit_check_t *new_check, const char *newname) {
285   char *oldname = NULL, *newnamecopy;
286   if(newname) {
287     /* track this stuff outside the lock to avoid allocs */
288     oldname = new_check->name;
289     newnamecopy = strdup(newname);
290   }
291   pthread_mutex_lock(&polls_lock);
292   if(!(new_check->flags & NP_TRANSIENT)) {
293     mtevAssert(new_check->name || newname);
294     /* This remove could fail -- no big deal */
295     if(new_check->name != NULL)
296       mtev_skiplist_remove(&polls_by_name, new_check, NULL);
297
298     /* optional update the name (at the critical point) */
299     if(newname) new_check->name = newnamecopy;
300
301     /* This insert could fail.. which means we have a conflict on
302      * target`name.  That should result in the check being disabled. */
303     if(!mtev_skiplist_insert(&polls_by_name, new_check)) {
304       mtevL(noit_error, "Check %s`%s disabled due to naming conflict\n",
305             new_check->target, new_check->name);
306       new_check->flags |= NP_DISABLED;
307     }
308     if(oldname) free(oldname);
309   }
310   pthread_mutex_unlock(&polls_lock);
311   return 1;
312 }
313
314 u_int64_t noit_check_metric_count() {
315   return check_metrics_seen;
316 }
317 void noit_check_metric_count_add(int add) {
318   mtev_atomic64_t *n = (mtev_atomic64_t *)&check_metrics_seen;
319   mtev_atomic64_t v = (mtev_atomic64_t)add;
320   mtev_atomic_add64(n, v);
321 }
322
323 u_int64_t noit_check_completion_count() {
324   return check_completion_count;
325 }
326 static void register_console_check_commands();
327 static int check_recycle_bin_processor(eventer_t, int, void *,
328                                        struct timeval *);
329
330 static int
331 check_slots_find_smallest(int sec, struct timeval* period, int timeout) {
332   int i, j, cyclic, random_offset, jbase = 0, mini = 0, minj = 0;
333   unsigned short min_running_i = 0xffff, min_running_j = 0xffff;
334   int period_seconds = period->tv_sec;
335
336   /* If we're greater than sixty seconds, we should do our
337    * initial scheduling as if the period was sixty seconds. */
338   if (period_seconds > 60) {
339     period_seconds = 60;
340   }
341
342   /* If a check is configured to run at times aligned with sixty seconds
343    * and we're configured to use priority scheduling, schedule so that
344    * we're guaranteed to finish before the timeout */
345   if ((priority_scheduling == mtev_true) &&
346       (((period->tv_sec % 60) == 0) && (period->tv_usec == 0))) {
347     /* Don't allow a ton of stuff to schedule in the first second in the case
348      * of very long timeouts - use the first 10 seconds in this case */
349     int allowable_time = MAX(60 - (timeout/1000) - 1, 10);
350     int max_seconds = MIN(60-priority_dead_zone_seconds, allowable_time);
351     for(i=0;i<max_seconds;i++) {
352       int adj_i = (i + sec) % max_seconds;
353       if(check_slots_seconds_count[adj_i] < min_running_i) {
354         min_running_i = check_slots_seconds_count[adj_i];
355         mini = adj_i;
356       }
357     }
358   }
359   else {
360     /* Just schedule normally*/
361     for(i=0;i<period_seconds;i++) {
362       int adj_i = (i + sec) % 60;
363       if(check_slots_seconds_count[adj_i] < min_running_i) {
364         min_running_i = check_slots_seconds_count[adj_i];
365         mini = adj_i;
366       }
367     }
368   }
369   jbase = mini * (1000/SCHEDULE_GRANULARITY);
370   random_offset = drand48() * SLOTS_PER_SECOND;
371   for(cyclic=0;cyclic<SLOTS_PER_SECOND;cyclic++) {
372     j = jbase + ((random_offset + cyclic) % SLOTS_PER_SECOND);
373     if(check_slots_count[j] < min_running_j) {
374       min_running_j = check_slots_count[j];
375       minj = j;
376     }
377   }
378   return (minj * SCHEDULE_GRANULARITY) + drand48() * SCHEDULE_GRANULARITY;
379 }
380 static void
381 check_slots_adjust_tv(struct timeval *tv, short adj) {
382   int offset_ms, idx;
383   offset_ms = (tv->tv_sec % 60) * 1000 + (tv->tv_usec / 1000);
384   idx = offset_ms / SCHEDULE_GRANULARITY;
385   check_slots_count[idx] += adj;
386   check_slots_seconds_count[offset_ms / 1000] += adj;
387 }
388 void check_slots_inc_tv(struct timeval *tv) {
389   check_slots_adjust_tv(tv, 1);
390 }
391 void check_slots_dec_tv(struct timeval *tv) {
392   check_slots_adjust_tv(tv, -1);
393 }
394 static int
395 noit_check_generic_safe_string(const char *p) {
396   if(!p) return 0;
397   for(;*p;p++) {
398     if(!isprint(*p)) return 0;
399   }
400   return 1;
401 }
402 int
403 noit_check_validate_target(const char *p) {
404   if(!noit_check_generic_safe_string(p)) return 0;
405   return 1;
406 }
407 int
408 noit_check_validate_name(const char *p) {
409   if(!noit_check_generic_safe_string(p)) return 0;
410   return 1;
411 }
412 const char *
413 noit_check_available_string(int16_t available) {
414   switch(available) {
415     case NP_AVAILABLE:    return "available";
416     case NP_UNAVAILABLE:  return "unavailable";
417     case NP_UNKNOWN:      return "unknown";
418   }
419   return NULL;
420 }
421 const char *
422 noit_check_state_string(int16_t state) {
423   switch(state) {
424     case NP_GOOD:         return "good";
425     case NP_BAD:          return "bad";
426     case NP_UNKNOWN:      return "unknown";
427   }
428   return NULL;
429 }
430 static int __check_name_compare(const void *a, const void *b) {
431   const noit_check_t *ac = a;
432   const noit_check_t *bc = b;
433   int rv;
434   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
435   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
436   return 0;
437 }
438 static int __watchlist_compare(const void *a, const void *b) {
439   const noit_check_t *ac = a;
440   const noit_check_t *bc = b;
441   int rv;
442   if((rv = memcmp(ac->checkid, bc->checkid, sizeof(ac->checkid))) != 0) return rv;
443   if(ac->period < bc->period) return -1;
444   if(ac->period == bc->period) return 0;
445   return 1;
446 }
447 static int __check_target_ip_compare(const void *a, const void *b) {
448   const noit_check_t *ac = a;
449   const noit_check_t *bc = b;
450   int rv;
451   if((rv = strcmp(ac->target_ip, bc->target_ip)) != 0) return rv;
452   if (ac->name == NULL) return 1;
453   if (bc->name == NULL) return -1;
454   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
455   return 1;
456 }
457 static int __check_target_compare(const void *a, const void *b) {
458   const noit_check_t *ac = a;
459   const noit_check_t *bc = b;
460   int rv;
461   if (ac->target == NULL) return 1;
462   if (bc->target == NULL) return -1;
463   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
464   if (ac->name == NULL) return 1;
465   if (bc->name == NULL) return -1;
466   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
467   return 1;
468 }
469 int
470 noit_calc_rtype_flag(char *resolve_rtype) {
471   int flags = 0;
472   if(resolve_rtype) {
473     flags |= strcmp(resolve_rtype, PREFER_IPV6) == 0 ||
474              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_PREFER_IPV6 : 0;
475     flags |= strcmp(resolve_rtype, FORCE_IPV4) == 0 ||
476              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_SINGLE_RESOLVE : 0;
477   }
478   return flags;
479 }
480 void
481 noit_check_fake_last_check(noit_check_t *check,
482                            struct timeval *lc, struct timeval *_now) {
483   struct timeval now, period, lc_copy;
484   int balance_ms;
485
486   if(!_now) {
487     gettimeofday(&now, NULL);
488     _now = &now;
489   }
490   period.tv_sec = check->period / 1000;
491   period.tv_usec = (check->period % 1000) * 1000;
492   sub_timeval(*_now, period, lc);
493
494   /* We need to set the last check value based on the period, but
495    * we also need to store a value that is based around the one-minute
496    * time to properly increment the slots; otherwise, the slots will
497    * get all messed up */
498   if(!(check->flags & NP_TRANSIENT) && check->period) {
499     balance_ms = check_slots_find_smallest(_now->tv_sec+1, &period, check->timeout);
500     lc->tv_sec = (lc->tv_sec / 60) * 60 + balance_ms / 1000;
501     lc->tv_usec = (balance_ms % 1000) * 1000;
502     memcpy(&lc_copy, lc, sizeof(lc_copy));
503     if(compare_timeval(*_now, *lc) < 0) {
504       do {
505         sub_timeval(*lc, period, lc);
506       } while(compare_timeval(*_now, *lc) < 0);
507     }
508     else {
509       struct timeval test;
510       while(1) {
511         add_timeval(*lc, period, &test);
512         if(compare_timeval(*_now, test) < 0) break;
513         memcpy(lc, &test, sizeof(test));
514       }
515     }
516   }
517   else {
518     memcpy(&lc_copy, lc, sizeof(lc_copy));
519   }
520  
521   /* now, we're going to do an even distribution using the slots */
522   if(!(check->flags & NP_TRANSIENT)) check_slots_inc_tv(&lc_copy);
523 }
524 void
525 noit_poller_process_checks(const char *xpath) {
526   int i, flags, cnt = 0, found;
527   mtev_conf_section_t *sec;
528   __config_load_generation++;
529   sec = mtev_conf_get_sections(NULL, xpath, &cnt);
530   for(i=0; i<cnt; i++) {
531     void *vcheck;
532     char uuid_str[37];
533     char target[256] = "";
534     char module[256] = "";
535     char name[256] = "";
536     char filterset[256] = "";
537     char oncheck[1024] = "";
538     char resolve_rtype[16] = "";
539     int ridx;
540     int no_period = 0;
541     int no_oncheck = 0;
542     int period = 0, timeout = 0;
543     mtev_boolean disabled = mtev_false, busted = mtev_false;
544     uuid_t uuid, out_uuid;
545     int64_t config_seq = 0;
546     mtev_hash_table *options;
547     mtev_hash_table **moptions = NULL;
548     mtev_boolean moptions_used = mtev_false, backdated = mtev_false;
549
550     /* We want to heartbeat here... otherwise, if a lot of checks are
551      * configured or if we're running on a slower system, we could
552      * end up getting watchdog killed before we get a chance to run
553      * any checks */
554     mtev_watchdog_child_heartbeat();
555
556     if(reg_module_id > 0) {
557       moptions = alloca(reg_module_id * sizeof(mtev_hash_table *));
558       memset(moptions, 0, reg_module_id * sizeof(mtev_hash_table *));
559       moptions_used = mtev_true;
560     }
561
562 #define NEXT(...) mtevL(noit_stderr, __VA_ARGS__); continue
563 #define MYATTR(type,a,...) mtev_conf_get_##type(sec[i], "@" #a, __VA_ARGS__)
564 #define INHERIT(type,a,...) \
565   mtev_conf_get_##type(sec[i], "ancestor-or-self::node()/@" #a, __VA_ARGS__)
566
567     if(!MYATTR(stringbuf, uuid, uuid_str, sizeof(uuid_str))) {
568       mtevL(noit_stderr, "check %d has no uuid\n", i+1);
569       continue;
570     }
571
572     MYATTR(int64, seq, &config_seq);
573
574     if(uuid_parse(uuid_str, uuid)) {
575       mtevL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str);
576       continue;
577     }
578
579     if(!INHERIT(stringbuf, target, target, sizeof(target))) {
580       mtevL(noit_stderr, "check uuid: '%s' has no target\n", uuid_str);
581       busted = mtev_true;
582     }
583     if(!noit_check_validate_target(target)) {
584       mtevL(noit_stderr, "check uuid: '%s' has malformed target\n", uuid_str);
585       busted = mtev_true;
586     }
587     if(!INHERIT(stringbuf, module, module, sizeof(module))) {
588       mtevL(noit_stderr, "check uuid: '%s' has no module\n", uuid_str);
589       busted = mtev_true;
590     }
591
592     if(!INHERIT(stringbuf, filterset, filterset, sizeof(filterset)))
593       filterset[0] = '\0';
594    
595     if (!INHERIT(stringbuf, resolve_rtype, resolve_rtype, sizeof(resolve_rtype)))
596       strlcpy(resolve_rtype, PREFER_IPV4, sizeof(resolve_rtype));
597
598     if(!MYATTR(stringbuf, name, name, sizeof(name)))
599       strlcpy(name, module, sizeof(name));
600
601     if(!noit_check_validate_name(name)) {
602       mtevL(noit_stderr, "check uuid: '%s' has malformed name\n", uuid_str);
603       busted = mtev_true;
604     }
605
606     if(!INHERIT(int, period, &period) || period == 0)
607       no_period = 1;
608
609     if(!INHERIT(stringbuf, oncheck, oncheck, sizeof(oncheck)) || !oncheck[0])
610       no_oncheck = 1;
611
612     if(no_period && no_oncheck) {
613       mtevL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n",
614             uuid_str);
615       busted = mtev_true;
616     }
617     if(!(no_period || no_oncheck)) {
618       mtevL(noit_stderr, "check uuid: '%s' has oncheck and period.\n",
619             uuid_str);
620       busted = mtev_true;
621     }
622     if(!INHERIT(int, timeout, &timeout)) {
623       mtevL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str);
624       busted = mtev_true;
625     }
626     if(!no_period && timeout >= period) {
627       mtevL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str);
628       timeout = period/2;
629     }
630     options = mtev_conf_get_hash(sec[i], "config");
631     for(ridx=0; ridx<reg_module_id; ridx++) {
632       moptions[ridx] = mtev_conf_get_namespaced_hash(sec[i], "config",
633                                                      reg_module_names[ridx]);
634     }
635
636     INHERIT(boolean, disable, &disabled);
637     flags = 0;
638     if(busted) flags |= (NP_UNCONFIG|NP_DISABLED);
639     else if(disabled) flags |= NP_DISABLED;
640
641     flags |= noit_calc_rtype_flag(resolve_rtype);
642
643     pthread_mutex_lock(&polls_lock);
644     found = mtev_hash_retrieve(&polls, (char *)uuid, UUID_SIZE, &vcheck);
645     if(found) {
646       noit_check_t *check = (noit_check_t *)vcheck;
647       /* Possibly reset the seq */
648       if(config_seq < 0) check->config_seq = 0;
649
650       /* Otherwise note a non-increasing sequence */
651       if(check->config_seq > config_seq) backdated = mtev_true;
652     }
653     pthread_mutex_unlock(&polls_lock);
654     if(found)
655       noit_poller_deschedule(uuid, mtev_false);
656     if(backdated) {
657       mtevL(noit_error, "Check config seq backwards, ignored\n");
658       if(found) noit_check_log_delete((noit_check_t *)vcheck);
659     }
660     else {
661       noit_poller_schedule(target, module, name, filterset, options,
662                            moptions_used ? moptions : NULL,
663                            period, timeout, oncheck[0] ? oncheck : NULL,
664                            config_seq, flags, uuid, out_uuid);
665       mtevL(noit_debug, "loaded uuid: %s\n", uuid_str);
666     }
667     for(ridx=0; ridx<reg_module_id; ridx++) {
668       if(moptions[ridx]) {
669         mtev_hash_destroy(moptions[ridx], free, free);
670         free(moptions[ridx]);
671       }
672     }
673     mtev_hash_destroy(options, free, free);
674     free(options);
675   }
676   if(sec) free(sec);
677 }
678
679 int
680 noit_check_activate(noit_check_t *check) {
681   noit_module_t *mod;
682   if(NOIT_CHECK_LIVE(check)) return 0;
683   mod = noit_module_lookup(check->module);
684   if(mod && mod->initiate_check) {
685     if((check->flags & NP_DISABLED) == 0) {
686       mod->initiate_check(mod, check, 0, NULL);
687       return 1;
688     }
689     else
690       mtevL(noit_debug, "Skipping %s`%s, disabled.\n",
691             check->target, check->name);
692   }
693   else {
694     if(!mod) {
695       mtevL(noit_stderr, "Cannot find module '%s'\n", check->module);
696       check->flags |= NP_DISABLED;
697     }
698   }
699   return 0;
700 }
701
702 void
703 noit_poller_initiate() {
704   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
705   uuid_t key_id;
706   int klen;
707   void *vcheck;
708   /* This is only ever called in the beginning, no lock needed */
709   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
710                        &vcheck)) {
711     noit_check_activate((noit_check_t *)vcheck);
712     mtev_watchdog_child_heartbeat();
713   }
714 }
715
716 void
717 noit_poller_flush_epoch(int oldest_allowed) {
718   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
719   uuid_t key_id;
720   int klen, i;
721   void *vcheck;
722 #define TOFREE_PER_ITER 1024
723   noit_check_t *tofree[TOFREE_PER_ITER];
724
725   /* Cleanup any previous causal map */
726   while(1) {
727     i = 0;
728     pthread_mutex_lock(&polls_lock);
729     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
730                          &vcheck) && i < TOFREE_PER_ITER) {
731       noit_check_t *check = (noit_check_t *)vcheck;
732       if(check->generation < oldest_allowed) {
733         tofree[i++] = check;
734       }
735     }
736     pthread_mutex_unlock(&polls_lock);
737     if(i==0) break;
738     while(i>0) noit_poller_deschedule(tofree[--i]->checkid, mtev_true);
739   }
740 #undef TOFREE_PER_ITER
741 }
742
743 void
744 noit_poller_make_causal_map() {
745   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
746   uuid_t key_id;
747   int klen;
748   void *vcheck;
749
750   if(!system_needs_causality) return;
751
752   /* set it to false, we'll set it to true during the scan if we
753    * find anything causal.  */
754   system_needs_causality = mtev_false;
755
756   /* Cleanup any previous causal map */
757   pthread_mutex_lock(&polls_lock);
758   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
759                        &vcheck)) {
760     noit_check_t *check = (noit_check_t *)vcheck;
761     dep_list_t *dep;
762     while((dep = check->causal_checks) != NULL) {
763       check->causal_checks = dep->next;
764       free(dep);
765     }
766   }
767
768   memset(&iter, 0, sizeof(iter));
769   /* Walk all checks and add check dependencies to their parents */
770   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
771                        &vcheck)) {
772     noit_check_t *check = (noit_check_t *)vcheck, *parent;
773     if(check->oncheck) {
774       /* This service is causally triggered by another service */
775       uuid_t id;
776       char fullcheck[1024];
777       char *name = check->oncheck;
778       char *target = NULL;
779
780       system_needs_causality = mtev_true;
781       mtevL(noit_debug, "Searching for upstream trigger on %s\n", name);
782       parent = NULL;
783       if(uuid_parse(check->oncheck, id) == 0) {
784         target = "";
785         parent = noit_poller_lookup__nolock(id);
786       }
787       else if((target = strchr(check->oncheck, '`')) != NULL) {
788         strlcpy(fullcheck, check->oncheck, target + 1 - check->oncheck);
789         name = target + 1;
790         target = fullcheck;
791         parent = noit_poller_lookup_by_name__nolock(target, name);
792       }
793       else {
794         target = check->target;
795         parent = noit_poller_lookup_by_name__nolock(target, name);
796       }
797
798       if(!parent) {
799         check->flags |= NP_DISABLED;
800         mtevL(noit_stderr, "Disabling check %s`%s, can't find oncheck %s`%s\n",
801               check->target, check->name, target, name);
802       }
803       else {
804         dep_list_t *dep;
805         dep = malloc(sizeof(*dep));
806         dep->check = check;
807         dep->next = parent->causal_checks;
808         parent->causal_checks = dep;
809         mtevL(noit_debug, "Causal map %s`%s --> %s`%s\n",
810               parent->target, parent->name, check->target, check->name);
811       }
812     }
813   }
814   pthread_mutex_unlock(&polls_lock);
815   /* We found some causal checks, so we might need to activate stuff */
816   if(system_needs_causality) noit_poller_initiate();
817 }
818 void
819 noit_poller_reload(const char *xpath)
820 {
821   noit_poller_process_checks(xpath ? xpath : "/noit/checks//check");
822   if(!xpath) {
823     /* Full reload, we need to wipe old checks */
824     noit_poller_flush_epoch(__config_load_generation);
825   }
826   noit_poller_make_causal_map();
827 }
828 void
829 noit_check_dns_ignore_tld(const char* extension, const char* ignore) {
830   mtev_hash_replace(&dns_ignore_list, strdup(extension), strlen(extension), strdup(ignore), NULL, NULL);
831 }
832 static void
833 noit_check_dns_ignore_list_init() {
834   mtev_conf_section_t* dns;
835   int cnt;
836
837   dns = mtev_conf_get_sections(NULL, "/noit/dns/extension", &cnt);
838   if(dns) {
839     int i = 0;
840     for (i = 0; i < cnt; i++) {
841       char* extension;
842       char* ignore;
843       if(!mtev_conf_get_string(dns[i], "self::node()/@value", &extension)) {
844         continue;
845       }
846       if(!mtev_conf_get_string(dns[i], "self::node()/@ignore", &ignore)) {
847         continue;
848       }
849       noit_check_dns_ignore_tld(extension, ignore);
850     }
851   }
852 }
853 static void
854 noit_check_poller_scheduling_init() {
855   mtev_conf_get_boolean(NULL, "//checks/@priority_scheduling", &priority_scheduling);
856 }
857 void
858 noit_poller_init() {
859   srand48((getpid() << 16) ^ time(NULL));
860   noit_check_poller_scheduling_init();
861   noit_check_resolver_init();
862   noit_check_tools_init();
863   mtev_skiplist_init(&polls_by_name);
864   mtev_skiplist_set_compare(&polls_by_name, __check_name_compare,
865                             __check_name_compare);
866   mtev_skiplist_add_index(&polls_by_name, __check_target_ip_compare,
867                             __check_target_ip_compare);
868   mtev_skiplist_add_index(&polls_by_name, __check_target_compare,
869                             __check_target_compare);
870   mtev_skiplist_init(&watchlist);
871   mtev_skiplist_set_compare(&watchlist, __watchlist_compare,
872                             __watchlist_compare);
873   register_console_check_commands();
874   eventer_name_callback("check_recycle_bin_processor",
875                         check_recycle_bin_processor);
876   eventer_add_in_s_us(check_recycle_bin_processor, NULL, RECYCLE_INTERVAL, 0);
877   mtev_conf_get_int(NULL, "noit/@text_size_limit", &text_size_limit);
878   if (text_size_limit <= 0) {
879     text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
880   }
881   noit_check_dns_ignore_list_init();
882   noit_poller_reload(NULL);
883 }
884
885 int
886 noit_poller_check_count() {
887   return polls_by_name.size;
888 }
889
890 int
891 noit_poller_transient_check_count() {
892   return watchlist.size;
893 }
894
895 noit_check_t *
896 noit_check_clone(uuid_t in) {
897   int i;
898   noit_check_t *checker, *new_check;
899   void *vcheck;
900   if(mtev_hash_retrieve(&polls,
901                         (char *)in, UUID_SIZE,
902                         &vcheck) == 0) {
903     return NULL;
904   }
905   checker = (noit_check_t *)vcheck;
906   if(checker->oncheck) {
907     return NULL;
908   }
909   new_check = mtev_memory_safe_calloc(1, sizeof(*new_check));
910   mtevAssert(new_check != NULL);
911   memcpy(new_check, checker, sizeof(*new_check));
912   new_check->target = strdup(new_check->target);
913   new_check->module = strdup(new_check->module);
914   new_check->name = strdup(new_check->name);
915   new_check->filterset = strdup(new_check->filterset);
916   new_check->flags = 0;
917   new_check->fire_event = NULL;
918   memset(&new_check->last_fire_time, 0, sizeof(new_check->last_fire_time));
919   new_check->statistics = noit_check_stats_set_calloc();
920   new_check->closure = NULL;
921   new_check->config = calloc(1, sizeof(*new_check->config));
922   mtev_hash_merge_as_dict(new_check->config, checker->config);
923   new_check->module_configs = NULL;
924   new_check->module_metadata = NULL;
925
926   for(i=0; i<reg_module_id; i++) {
927     void *src_metadata;
928     mtev_hash_table *src_mconfig;
929     src_mconfig = noit_check_get_module_config(checker, i);
930     if(src_mconfig) {
931       mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
932       mtev_hash_merge_as_dict(t, src_mconfig);
933       noit_check_set_module_config(new_check, i, t);
934     }
935     if(checker->flags & NP_PASSIVE_COLLECTION)
936       if(NULL != (src_metadata = noit_check_get_module_metadata(new_check, i)))
937         noit_check_set_module_metadata(new_check, i, src_metadata, NULL);
938   }
939   return new_check;
940 }
941
942 noit_check_t *
943 noit_check_watch(uuid_t in, int period) {
944   /* First look for a copy that is being watched */
945   int minimum_pi = 1000, granularity_pi = 500;
946   mtev_conf_section_t check_node;
947   char uuid_str[UUID_STR_LEN + 1];
948   char xpath[1024];
949   noit_check_t n, *f;
950
951   uuid_unparse_lower(in, uuid_str);
952
953   mtevL(noit_debug, "noit_check_watch(%s,%d)\n", uuid_str, period);
954   if(period == 0) {
955     return noit_poller_lookup(in);
956   }
957
958   /* Find the check */
959   snprintf(xpath, sizeof(xpath), "//checks//check[@uuid=\"%s\"]", uuid_str);
960   check_node = mtev_conf_get_section(NULL, xpath);
961   mtev_conf_get_int(NULL, "//checks/@transient_min_period", &minimum_pi);
962   mtev_conf_get_int(NULL, "//checks/@transient_period_granularity", &granularity_pi);
963   if(check_node) {
964     mtev_conf_get_int(check_node,
965                       "ancestor-or-self::node()/@transient_min_period",
966                       &minimum_pi);
967     mtev_conf_get_int(check_node,
968                       "ancestor-or-self::node()/@transient_period_granularity",
969                       &granularity_pi);
970   }
971
972   /* apply the bounds */
973   period /= granularity_pi;
974   period *= granularity_pi;
975   period = MAX(period, minimum_pi);
976
977   uuid_copy(n.checkid, in);
978   n.period = period;
979
980   f = mtev_skiplist_find(&watchlist, &n, NULL);
981   if(f) return f;
982   f = noit_check_clone(in);
983   if(!f) return NULL;
984   f->period = period;
985   f->timeout = period - 10;
986   f->flags |= NP_TRANSIENT;
987   mtevL(noit_debug, "Watching %s@%d\n", uuid_str, period);
988   mtev_skiplist_insert(&watchlist, f);
989   return f;
990 }
991
992 noit_check_t *
993 noit_check_get_watch(uuid_t in, int period) {
994   noit_check_t n, *f;
995
996   uuid_copy(n.checkid, in);
997   n.period = period;
998
999   f = mtev_skiplist_find(&watchlist, &n, NULL);
1000   return f;
1001 }
1002
1003 void
1004 noit_check_transient_add_feed(noit_check_t *check, const char *feed) {
1005   char *feedcopy;
1006   if(!check->feeds) {
1007     check->feeds = calloc(1, sizeof(*check->feeds));
1008     mtev_skiplist_init(check->feeds);
1009     mtev_skiplist_set_compare(check->feeds,
1010                               (mtev_skiplist_comparator_t)strcmp,
1011                               (mtev_skiplist_comparator_t)strcmp);
1012   }
1013   feedcopy = strdup(feed);
1014   /* No error on failure -- it's already there */
1015   if(mtev_skiplist_insert(check->feeds, feedcopy) == NULL) free(feedcopy);
1016   mtevL(noit_debug, "check %s`%s @ %dms has %d feed(s): %s.\n",
1017         check->target, check->name, check->period, check->feeds->size, feed);
1018 }
1019 void
1020 noit_check_transient_remove_feed(noit_check_t *check, const char *feed) {
1021   if(!check->feeds) return;
1022   if(feed) {
1023     mtevL(noit_debug, "check %s`%s @ %dms removing 1 of %d feeds: %s.\n",
1024           check->target, check->name, check->period, check->feeds->size, feed);
1025     mtev_skiplist_remove(check->feeds, feed, free);
1026   }
1027   if(check->feeds->size == 0) {
1028     char uuid_str[UUID_STR_LEN + 1];
1029     uuid_unparse_lower(check->checkid, uuid_str);
1030     mtevL(noit_debug, "Unwatching %s@%d\n", uuid_str, check->period);
1031     mtev_skiplist_remove(&watchlist, check, NULL);
1032     mtev_skiplist_destroy(check->feeds, free);
1033     free(check->feeds);
1034     check->feeds = NULL;
1035     if(check->flags & NP_TRANSIENT) {
1036       mtevL(noit_debug, "check %s`%s @ %dms has no more listeners.\n",
1037             check->target, check->name, check->period);
1038       check->flags |= NP_KILLED;
1039     }
1040     noit_poller_free_check(check);
1041   }
1042 }
1043
1044 mtev_boolean
1045 noit_check_is_valid_target(const char *target) {
1046   int8_t family;
1047   int rv;
1048   union {
1049     struct in_addr addr4;
1050     struct in6_addr addr6;
1051   } a;
1052
1053   family = AF_INET;
1054   rv = inet_pton(family, target, &a);
1055   if(rv != 1) {
1056     family = AF_INET6;
1057     rv = inet_pton(family, target, &a);
1058     if(rv != 1) {
1059       return mtev_false;
1060     }
1061   }
1062   return mtev_true;
1063 }
1064 int
1065 noit_check_set_ip(noit_check_t *new_check,
1066                   const char *ip_str, const char *newname) {
1067   int8_t family;
1068   int rv, failed = 0;
1069   char old_target_ip[INET6_ADDRSTRLEN];
1070   union {
1071     struct in_addr addr4;
1072     struct in6_addr addr6;
1073   } a;
1074
1075   memset(old_target_ip, 0, INET6_ADDRSTRLEN);
1076   strlcpy(old_target_ip, new_check->target_ip, sizeof(old_target_ip));
1077
1078   family = NOIT_CHECK_PREFER_V6(new_check) ? AF_INET6 : AF_INET;
1079   rv = inet_pton(family, ip_str, &a);
1080   if(rv != 1) {
1081     if (!NOIT_CHECK_SINGLE_RESOLVE(new_check)) {
1082       family = family == AF_INET ? AF_INET6 : AF_INET;
1083       rv = inet_pton(family, ip_str, &a);
1084       if(rv != 1) {
1085         family = AF_INET;
1086         memset(&a, 0, sizeof(a));
1087         failed = -1;
1088       }
1089     } else {
1090       failed = -1;
1091     }
1092   }
1093
1094   new_check->target_family = family;
1095   memcpy(&new_check->target_addr, &a, sizeof(a));
1096   new_check->target_ip[0] = '\0';
1097   if(failed == 0)
1098     if(inet_ntop(new_check->target_family,
1099                  &new_check->target_addr,
1100                  new_check->target_ip,
1101                  sizeof(new_check->target_ip)) == NULL) {
1102       mtevL(noit_error, "inet_ntop failed [%s] -> %d\n", ip_str, errno);
1103     }
1104   /*
1105    * new_check->name could be null if this check is being set for the
1106    * first time.  add_to_list will set it.
1107    */
1108   if (new_check->name == NULL ||
1109       strcmp(old_target_ip, new_check->target_ip) != 0) {
1110     noit_check_add_to_list(new_check, newname);
1111   }
1112
1113   if(new_check->name == NULL && newname != NULL) {
1114     mtevAssert(new_check->flags & NP_TRANSIENT);
1115     new_check->name = strdup(newname);
1116   }
1117
1118   return failed;
1119 }
1120 int
1121 noit_check_resolve(noit_check_t *check) {
1122   uint8_t family_pref = NOIT_CHECK_PREFER_V6(check) ? AF_INET6 : AF_INET;
1123   char ipaddr[INET6_ADDRSTRLEN];
1124   if(!NOIT_CHECK_SHOULD_RESOLVE(check)) return 1; /* success, not required */
1125   noit_check_resolver_remind(check->target);
1126   if(noit_check_resolver_fetch(check->target, ipaddr, sizeof(ipaddr),
1127                                family_pref) >= 0) {
1128     check->flags |= NP_RESOLVED;
1129     noit_check_set_ip(check, ipaddr, NULL);
1130     return 0;
1131   }
1132   check->flags &= ~NP_RESOLVED;
1133   return -1;
1134 }
1135 int
1136 noit_check_update(noit_check_t *new_check,
1137                   const char *target,
1138                   const char *name,
1139                   const char *filterset,
1140                   mtev_hash_table *config,
1141                   mtev_hash_table **mconfigs,
1142                   u_int32_t period,
1143                   u_int32_t timeout,
1144                   const char *oncheck,
1145                   int64_t seq,
1146                   int flags) {
1147   char uuid_str[37];
1148   int mask = NP_DISABLED | NP_UNCONFIG;
1149
1150   mtevAssert(name);
1151   uuid_unparse_lower(new_check->checkid, uuid_str);
1152   if(!new_check->statistics) new_check->statistics = noit_check_stats_set_calloc();
1153   if(seq < 0) new_check->config_seq = seq = 0;
1154   if(new_check->config_seq > seq) {
1155     mtevL(mtev_error, "noit_check_update[%s] skipped: seq backwards\n", uuid_str);
1156     return -1;
1157   }
1158
1159   /* selfcheck will identify this node in a cluster */
1160   if(mtev_cluster_enabled() && !strcmp(new_check->module, "selfcheck")) {
1161     uuid_t cluster_id;
1162     mtev_cluster_get_self(cluster_id);
1163     if(uuid_compare(cluster_id, new_check->checkid)) {
1164       mtevL(mtev_error, "Setting global cluster identity to '%s'\n", uuid_str);
1165       mtev_cluster_set_self(new_check->checkid);
1166     }
1167   }
1168
1169   if(NOIT_CHECK_RUNNING(new_check)) {
1170     char module[256];
1171     uuid_t id, dummy;
1172     uuid_copy(id, new_check->checkid);
1173     strlcpy(module, new_check->module, sizeof(module));
1174     noit_poller_deschedule(id, mtev_false);
1175     return noit_poller_schedule(target, module, name, filterset,
1176                                 config, mconfigs, period, timeout, oncheck,
1177                                 seq, flags, id, dummy);
1178   }
1179
1180   new_check->generation = __config_load_generation;
1181   if(new_check->target) free(new_check->target);
1182   new_check->target = strdup(target);
1183
1184   // apply resolution flags to check.
1185   if (flags & NP_PREFER_IPV6)
1186     new_check->flags |= NP_PREFER_IPV6;
1187   else
1188     new_check->flags &= ~NP_PREFER_IPV6;
1189   if (flags & NP_SINGLE_RESOLVE)
1190     new_check->flags |= NP_SINGLE_RESOLVE;
1191   else
1192     new_check->flags &= ~NP_SINGLE_RESOLVE;
1193   if (flags & NP_RESOLVE)
1194     new_check->flags |= NP_RESOLVE;
1195   else
1196     new_check->flags &= ~NP_RESOLVE;
1197
1198   /* This sets both the name and the target_addr */
1199   if(noit_check_set_ip(new_check, target, name)) {
1200     mtev_boolean should_resolve;
1201     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1202     const char *key, *value;
1203     int klen;
1204     char* extension = strrchr(target, '.');
1205     new_check->flags |= NP_RESOLVE;
1206     new_check->flags &= ~NP_RESOLVED;
1207     /* If we match any of the extensions we're supposed to ignore,
1208      * don't resolve */
1209     if (extension && (strlen(extension) > 1)) {
1210       while(mtev_hash_next(&dns_ignore_list, &iter, &key, &klen, (void**)&value)) {
1211         if ((!strcmp("true", value)) && (!strcmp(extension+1, key))) {
1212             new_check->flags &= ~NP_RESOLVE;
1213             break;
1214         }
1215       }
1216     }
1217     if(noit_check_should_resolve_targets(&should_resolve) && !should_resolve)
1218       flags |= NP_DISABLED | NP_UNCONFIG;
1219     noit_check_resolve(new_check);
1220   }
1221
1222   if(new_check->filterset) free(new_check->filterset);
1223   new_check->filterset = filterset ? strdup(filterset): NULL;
1224
1225   if(config != NULL) {
1226     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1227     const char *k;
1228     int klen;
1229     void *data;
1230     if(new_check->config) mtev_hash_delete_all(new_check->config, free, free);
1231     else new_check->config = calloc(1, sizeof(*new_check->config));
1232     while(mtev_hash_next(config, &iter, &k, &klen, &data)) {
1233       mtev_hash_store(new_check->config, strdup(k), klen, strdup((char *)data));
1234     }
1235   }
1236   if(mconfigs != NULL) {
1237     int i;
1238     for(i=0; i<reg_module_id; i++) {
1239       mtev_hash_table *t;
1240       if(NULL != (t = noit_check_get_module_config(new_check, i))) {
1241         noit_check_set_module_config(new_check, i, NULL);
1242         mtev_hash_destroy(t, free, free);
1243         free(t);
1244       }
1245       if(mconfigs[i]) {
1246         mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
1247         mtev_hash_merge_as_dict(t, mconfigs[i]);
1248         noit_check_set_module_config(new_check, i, t);
1249       }
1250     }
1251   }
1252   if(new_check->oncheck) free(new_check->oncheck);
1253   new_check->oncheck = oncheck ? strdup(oncheck) : NULL;
1254   if(new_check->oncheck) system_needs_causality = mtev_true;
1255   new_check->period = period;
1256   new_check->timeout = timeout;
1257   new_check->config_seq = seq;
1258
1259   /* Unset what could be set.. then set what should be set */
1260   new_check->flags = (new_check->flags & ~mask) | flags;
1261
1262   check_config_fixup_hook_invoke(new_check);
1263
1264   if((new_check->flags & NP_TRANSIENT) == 0)
1265     noit_check_activate(new_check);
1266
1267   noit_check_add_to_list(new_check, NULL);
1268   return 0;
1269 }
1270 int
1271 noit_poller_schedule(const char *target,
1272                      const char *module,
1273                      const char *name,
1274                      const char *filterset,
1275                      mtev_hash_table *config,
1276                      mtev_hash_table **mconfigs,
1277                      u_int32_t period,
1278                      u_int32_t timeout,
1279                      const char *oncheck,
1280                      int64_t seq,
1281                      int flags,
1282                      uuid_t in,
1283                      uuid_t out) {
1284   noit_check_t *new_check;
1285   new_check = mtev_memory_safe_calloc(1, sizeof(*new_check));
1286   mtevAssert(new_check != NULL);
1287
1288   /* The module and the UUID can never be changed */
1289   new_check->module = strdup(module);
1290   if(uuid_is_null(in))
1291     uuid_generate(new_check->checkid);
1292   else
1293     uuid_copy(new_check->checkid, in);
1294
1295   new_check->statistics = noit_check_stats_set_calloc();
1296   noit_check_update(new_check, target, name, filterset, config, mconfigs,
1297                     period, timeout, oncheck, seq, flags);
1298   mtevAssert(mtev_hash_store(&polls,
1299                          (char *)new_check->checkid, UUID_SIZE,
1300                          new_check));
1301   uuid_copy(out, new_check->checkid);
1302   noit_check_log_check(new_check);
1303
1304   return 0;
1305 }
1306
1307 /* A quick little list of recycleable checks.  This list never really
1308  * grows large, so no sense in thinking too hard about the algorithmic
1309  * complexity.
1310  */
1311 struct _checker_rcb {
1312   noit_check_t *checker;
1313   struct _checker_rcb *next;
1314 };
1315 static struct _checker_rcb *checker_rcb = NULL;
1316 static void recycle_check(noit_check_t *checker, mtev_boolean has_lock) {
1317   struct _checker_rcb *n = malloc(sizeof(*n));
1318   if(!has_lock) pthread_mutex_lock(&recycling_lock);
1319   n->checker = checker;
1320   n->next = checker_rcb;
1321   checker_rcb = n;
1322   if(!has_lock) pthread_mutex_unlock(&recycling_lock);
1323 }
1324 void
1325 noit_poller_free_check_internal(noit_check_t *checker, mtev_boolean has_lock) {
1326   noit_module_t *mod;
1327
1328   if (checker->flags & NP_PASSIVE_COLLECTION) {
1329     struct timeval current_time;
1330     gettimeofday(&current_time, NULL);
1331     if (checker->last_fire_time.tv_sec == 0) {
1332       memcpy(&checker->last_fire_time, &current_time, sizeof(struct timeval));
1333     }
1334     /* If NP_RUNNING is set for some reason or we've fired recently, recycle
1335      * the check.... we don't want to free it */
1336     if ((checker->flags & NP_RUNNING) ||
1337         (sub_timeval_ms(current_time,checker->last_fire_time) < (checker->period*2))) {
1338       recycle_check(checker, has_lock);
1339       return;
1340     }
1341   }
1342   else if(checker->flags & NP_RUNNING) {
1343     /* If the check is running, don't free it - will clean it up later */
1344     recycle_check(checker, has_lock);
1345     return;
1346   }
1347
1348   mod = noit_module_lookup(checker->module);
1349   if(mod && mod->cleanup) mod->cleanup(mod, checker);
1350   if(checker->fire_event) {
1351      eventer_remove(checker->fire_event);
1352      free(checker->fire_event->closure);
1353      eventer_free(checker->fire_event);
1354      checker->fire_event = NULL;
1355   }
1356   if(checker->closure) free(checker->closure);
1357   if(checker->target) free(checker->target);
1358   if(checker->module) free(checker->module);
1359   if(checker->name) free(checker->name);
1360   if(checker->config) {
1361     mtev_hash_destroy(checker->config, free, free);
1362     free(checker->config);
1363     checker->config = NULL;
1364   }
1365   if(checker->module_metadata) {
1366     int i;
1367     for(i=0; i<reg_module_id; i++) {
1368       struct vp_w_free *tuple;
1369       tuple = checker->module_metadata[i];
1370       if(tuple) {
1371         if(tuple->freefunc) tuple->freefunc(tuple->ptr);
1372         free(tuple);
1373       }
1374     }
1375     free(checker->module_metadata);
1376   }
1377   if(checker->module_configs) {
1378     int i;
1379     for(i=0; i<reg_module_id; i++) {
1380       if(checker->module_configs[i]) {
1381         mtev_hash_destroy(checker->module_configs[i], free, free);
1382         free(checker->module_configs[i]);
1383       }
1384     }
1385     free(checker->module_configs);
1386   }
1387
1388   mtev_memory_safe_free(stats_inprogress(checker));
1389   mtev_memory_safe_free(stats_current(checker));
1390   mtev_memory_safe_free(stats_previous(checker));
1391
1392   mtev_memory_safe_free(checker);
1393 }
1394 void
1395 noit_poller_free_check(noit_check_t *checker) {
1396   noit_poller_free_check_internal(checker, mtev_false);
1397 }
1398 static int
1399 check_recycle_bin_processor(eventer_t e, int mask, void *closure,
1400                             struct timeval *now) {
1401   static struct timeval one_minute = { RECYCLE_INTERVAL, 0L };
1402   struct _checker_rcb *prev = NULL, *curr = NULL;
1403   mtevL(noit_debug, "Scanning check recycle bin\n");
1404   pthread_mutex_lock(&recycling_lock);
1405   curr = checker_rcb;
1406   while(curr) {
1407     noit_check_t *check = curr->checker;
1408     mtev_boolean free_check = mtev_false;
1409     if (check->flags & NP_PASSIVE_COLLECTION) {
1410       struct timeval current_time;
1411       gettimeofday(&current_time, NULL);
1412       if ((!(check->flags & NP_RUNNING)) &&
1413           (sub_timeval_ms(current_time,check->last_fire_time) >= (check->period*2))) {
1414         free_check = mtev_true;
1415       }
1416     }
1417     else if(!(curr->checker->flags & NP_RUNNING)) {
1418       free_check = mtev_true;
1419     }
1420
1421     if (free_check == mtev_true) {
1422       mtevL(noit_debug, "0x%p: Check is ready to free.\n", check);
1423       noit_poller_free_check_internal(curr->checker, mtev_true);
1424       if(prev) prev->next = curr->next;
1425       else checker_rcb = curr->next;
1426       free(curr);
1427       curr = prev ? prev->next : checker_rcb;
1428     }
1429     else {
1430       prev = curr;
1431       curr = curr->next;
1432     }
1433   }
1434   pthread_mutex_unlock(&recycling_lock);
1435   add_timeval(*now, one_minute, &e->whence);
1436   return EVENTER_TIMER;
1437 }
1438
1439 int
1440 noit_poller_deschedule(uuid_t in, mtev_boolean log) {
1441   void *vcheck;
1442   noit_check_t *checker;
1443   if(mtev_hash_retrieve(&polls,
1444                         (char *)in, UUID_SIZE,
1445                         &vcheck) == 0) {
1446     return -1;
1447   }
1448   checker = (noit_check_t *)vcheck;
1449   checker->flags |= (NP_DISABLED|NP_KILLED);
1450
1451   if(log) noit_check_log_delete(checker);
1452
1453   mtevAssert(mtev_skiplist_remove(&polls_by_name, checker, NULL));
1454   mtevAssert(mtev_hash_delete(&polls, (char *)in, UUID_SIZE, NULL, NULL));
1455
1456   noit_poller_free_check(checker);
1457   return 0;
1458 }
1459
1460 noit_check_t *
1461 noit_poller_lookup(uuid_t in) {
1462   noit_check_t *check;
1463   pthread_mutex_lock(&polls_lock);
1464   check = noit_poller_lookup__nolock(in);
1465   pthread_mutex_unlock(&polls_lock);
1466   return check;
1467 }
1468 noit_check_t *
1469 noit_poller_lookup_by_name(char *target, char *name) {
1470   noit_check_t *check;
1471   pthread_mutex_lock(&polls_lock);
1472   check = noit_poller_lookup_by_name__nolock(target,name);
1473   pthread_mutex_unlock(&polls_lock);
1474   return check;
1475 }
1476 int
1477 noit_poller_target_ip_do(const char *target_ip,
1478                          int (*f)(noit_check_t *, void *),
1479                          void *closure) {
1480   int i, count = 0, todo_count = 0;
1481   noit_check_t pivot;
1482   mtev_skiplist *tlist;
1483   mtev_skiplist_node *next;
1484   noit_check_t *todo_onstack[8192];
1485   noit_check_t **todo = todo_onstack;
1486
1487   tlist = mtev_skiplist_find(polls_by_name.index,
1488                              __check_target_ip_compare, NULL);
1489
1490   pthread_mutex_lock(&polls_lock);
1491   /* First pass to count */
1492   memset(&pivot, 0, sizeof(pivot));
1493   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1494   pivot.name = "";
1495   pivot.target = "";
1496   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1497   while(next && next->data) {
1498     noit_check_t *check = next->data;
1499     if(strcmp(check->target_ip, target_ip)) break;
1500     todo_count++;
1501     mtev_skiplist_next(tlist, &next);
1502   }
1503
1504   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1505
1506   memset(&pivot, 0, sizeof(pivot));
1507   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1508   pivot.name = "";
1509   pivot.target = "";
1510   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1511   while(next && next->data) {
1512     noit_check_t *check = next->data;
1513     if(strcmp(check->target_ip, target_ip)) break;
1514     if(count < todo_count) todo[count++] = check;
1515     mtev_skiplist_next(tlist, &next);
1516   }
1517   pthread_mutex_unlock(&polls_lock);
1518
1519   todo_count = count;
1520   count = 0;
1521   for(i=0;i<todo_count;i++)
1522     count += f(todo[i],closure);
1523
1524   if(todo != todo_onstack) free(todo);
1525   return count;
1526 }
1527 int
1528 noit_poller_target_do(const char *target, int (*f)(noit_check_t *, void *),
1529                       void *closure) {
1530   int i, todo_count = 0, count = 0;
1531   noit_check_t pivot;
1532   mtev_skiplist *tlist;
1533   mtev_skiplist_node *next;
1534   noit_check_t *todo_onstack[8192];
1535   noit_check_t **todo = todo_onstack;
1536
1537   tlist = mtev_skiplist_find(polls_by_name.index,
1538                              __check_target_compare, NULL);
1539
1540   pthread_mutex_lock(&polls_lock);
1541   memset(&pivot, 0, sizeof(pivot));
1542   pivot.name = "";
1543   pivot.target = (char *)target;
1544   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1545   while(next && next->data) {
1546     noit_check_t *check = next->data;
1547     if(strcmp(check->target, target)) break;
1548     todo_count++;
1549     mtev_skiplist_next(tlist, &next);
1550   }
1551
1552   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1553
1554   memset(&pivot, 0, sizeof(pivot));
1555   pivot.name = "";
1556   pivot.target = (char *)target;
1557   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1558   while(next && next->data) {
1559     noit_check_t *check = next->data;
1560     if(strcmp(check->target, target)) break;
1561     if(count < todo_count) todo[count++] = check;
1562     mtev_skiplist_next(tlist, &next);
1563   }
1564   pthread_mutex_unlock(&polls_lock);
1565
1566   todo_count = count;
1567   count = 0;
1568   for(i=0;i<todo_count;i++)
1569     count += f(todo[i],closure);
1570
1571   if(todo != todo_onstack) free(todo);
1572   return count;
1573 }
1574
1575 int
1576 noit_poller_do(int (*f)(noit_check_t *, void *),
1577                void *closure) {
1578   mtev_skiplist_node *iter;
1579   int i, count = 0, max_count = 0;
1580   noit_check_t **todo;
1581
1582   if(polls_by_name.size == 0) return 0;
1583
1584   max_count = polls_by_name.size;
1585   todo = malloc(max_count * sizeof(*todo));
1586
1587   pthread_mutex_lock(&polls_lock);
1588   for(iter = mtev_skiplist_getlist(&polls_by_name); iter;
1589       mtev_skiplist_next(&polls_by_name, &iter)) {
1590     if(count < max_count) todo[count++] = (noit_check_t *)iter->data;
1591   }
1592   pthread_mutex_unlock(&polls_lock);
1593
1594   max_count = count;
1595   count = 0;
1596   for(i=0;i<max_count;i++)
1597     count += f(todo[i], closure);
1598   free(todo);
1599   return count;
1600 }
1601
1602 struct ip_module_collector_crutch {
1603   noit_check_t **array;
1604   const char *module;
1605   int idx;
1606   int allocd;
1607 };
1608 static int ip_module_collector(noit_check_t *check, void *cl) {
1609   struct ip_module_collector_crutch *c = cl;
1610   if(c->idx >= c->allocd) return 0;
1611   if(strcmp(check->module, c->module)) return 0;
1612   c->array[c->idx++] = check;
1613   return 1;
1614 }
1615 int
1616 noit_poller_lookup_by_ip_module(const char *ip, const char *mod,
1617                                 noit_check_t **checks, int nchecks) {
1618   struct ip_module_collector_crutch crutch;
1619   crutch.array = checks;
1620   crutch.allocd = nchecks;
1621   crutch.idx = 0;
1622   crutch.module = mod;
1623   return noit_poller_target_ip_do(ip, ip_module_collector, &crutch);
1624 }
1625 int
1626 noit_poller_lookup_by_module(const char *ip, const char *mod,
1627                              noit_check_t **checks, int nchecks) {
1628   struct ip_module_collector_crutch crutch;
1629   crutch.array = checks;
1630   crutch.allocd = nchecks;
1631   crutch.idx = 0;
1632   crutch.module = mod;
1633   return noit_poller_target_do(ip, ip_module_collector, &crutch);
1634 }
1635
1636
1637 int
1638 noit_check_xpath(char *xpath, int len,
1639                  const char *base, const char *arg) {
1640   uuid_t checkid;
1641   int base_trailing_slash;
1642   char argcopy[1024], *target, *module, *name;
1643
1644   base_trailing_slash = (base[strlen(base)-1] == '/');
1645   xpath[0] = '\0';
1646   argcopy[0] = '\0';
1647   if(arg) strlcpy(argcopy, arg, sizeof(argcopy));
1648
1649   if(uuid_parse(argcopy, checkid) == 0) {
1650     /* If they kill by uuid, we'll seek and destroy -- find it anywhere */
1651     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1652              base, base_trailing_slash ? "" : "/", argcopy);
1653   }
1654   else if((module = strchr(argcopy, '`')) != NULL) {
1655     noit_check_t *check;
1656     char uuid_str[37];
1657     target = argcopy;
1658     *module++ = '\0';
1659     if((name = strchr(module+1, '`')) == NULL)
1660       name = module;
1661     else
1662       name++;
1663     check = noit_poller_lookup_by_name(target, name);
1664     if(!check) {
1665       return -1;
1666     }
1667     uuid_unparse_lower(check->checkid, uuid_str);
1668     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1669              base, base_trailing_slash ? "" : "/", uuid_str);
1670   }
1671   return strlen(xpath);
1672 }
1673
1674 static int
1675 bad_check_initiate(noit_module_t *self, noit_check_t *check,
1676                    int once, noit_check_t *cause) {
1677   /* self is likely null here -- why it is bad, in fact */
1678   /* this is only suitable to call in one-offs */
1679   struct timeval now;
1680   stats_t *inp;
1681   char buff[256];
1682   if(!once) return -1;
1683   if(!check) return -1;
1684   mtevAssert(!(check->flags & NP_RUNNING));
1685   check->flags |= NP_RUNNING;
1686   inp = noit_check_get_stats_inprogress(check);
1687   gettimeofday(&now, NULL);
1688   noit_check_stats_whence(inp, &now);
1689   snprintf(buff, sizeof(buff), "check[%s] implementation offline",
1690            check->module);
1691   noit_check_stats_status(inp, buff);
1692   noit_check_set_stats(check);
1693   check->flags &= ~NP_RUNNING;
1694   return 0;
1695 }
1696 void
1697 noit_check_stats_clear(noit_check_t *check, stats_t *s) {
1698   memset(s, 0, sizeof(*s));
1699   s->state = NP_UNKNOWN;
1700   s->available = NP_UNKNOWN;
1701 }
1702
1703 static void
1704 __stats_add_metric(stats_t *newstate, metric_t *m) {
1705   mtev_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name),
1706                     m, NULL, (void (*)(void *))mtev_memory_safe_free);
1707 }
1708
1709 static void
1710 __mark_metric_logged(stats_t *newstate, const char *metric_name) {
1711   void *vm;
1712   if(mtev_hash_retrieve(&newstate->metrics,
1713                         metric_name, strlen(metric_name), &vm)) {
1714     ((metric_t *)vm)->logged = mtev_true;
1715   }
1716 }
1717
1718 static size_t
1719 noit_metric_sizes(metric_type_t type, const void *value) {
1720   switch(type) {
1721     case METRIC_INT32:
1722     case METRIC_UINT32:
1723       return sizeof(int32_t);
1724     case METRIC_INT64:
1725     case METRIC_UINT64:
1726       return sizeof(int64_t);
1727     case METRIC_DOUBLE:
1728       return sizeof(double);
1729     case METRIC_STRING: {
1730       int len = strlen((char*)value) + 1;
1731       return ((len >= text_size_limit) ? text_size_limit+1 : len);
1732     }
1733     case METRIC_ABSENT:
1734     case METRIC_NULL:
1735     case METRIC_GUESS:
1736       break;
1737   }
1738   mtevAssert(type != type);
1739   return 0;
1740 }
1741 static metric_type_t
1742 noit_metric_guess_type(const char *s, void **replacement) {
1743   char *copy, *cp, *trailer, *rpl;
1744   int negative = 0;
1745   metric_type_t type = METRIC_STRING;
1746
1747   if(!s) return METRIC_GUESS;
1748   copy = cp = strdup(s);
1749
1750   /* TRIM the string */
1751   while(*cp && isspace(*cp)) cp++; /* ltrim */
1752   s = cp; /* found a good starting point */
1753   while(*cp) cp++; /* advance to \0 */
1754   cp--; /* back up one */
1755   while(cp > s && isspace(*cp)) *cp-- = '\0'; /* rtrim */
1756
1757   /* Find the first space */
1758   cp = (char *)s;
1759   while(*cp && !isspace(*cp)) cp++;
1760   trailer = cp;
1761   cp--; /* backup one */
1762   if(cp > s && *cp == '%') *cp-- = '\0'; /* chop a last % is there is one */
1763
1764   while(*trailer && isspace(*trailer)) *trailer++ = '\0'; /* rtrim */
1765
1766   /* string was       '  -1.23e-01%  inodes used  ' */
1767   /* copy is (~ = \0) '  -1.23e-01~  inodes used~~' */
1768   /*                     ^           ^              */
1769   /*                     s           trailer        */
1770
1771   /* So, the trailer must not contain numbers */
1772   while(*trailer) { if(isdigit(*trailer)) goto notanumber; trailer++; }
1773
1774   /* And the 's' must be of the form:
1775    *  0) may start with a sign [-+]?
1776    *  1) [1-9][0-9]*
1777    *  2) [0]?.[0-9]+
1778    *  3) 0
1779    *  4) [1-9][0-9]*.[0-9]+
1780    *  5) all of the above ending with e[+-][0-9]+
1781    */
1782    rpl = (char *)s;
1783    /* CASE 0 */
1784    if(s[0] == '-' || s[0] == '+') {
1785      if(s[0] == '-') negative = 1;
1786      s++;
1787    }
1788
1789    if(s[0] == '.') goto decimal; /* CASE 2 */
1790    if(s[0] == '0') { /* CASE 2 & 3 */
1791      s++;
1792      if(!s[0]) goto scanint; /* CASE 3 */
1793      if(s[0] == '.') goto decimal; /* CASE 2 */
1794      goto notanumber;
1795    }
1796    if(s[0] >= '1' && s[0] <= '9') { /* CASE 1 & 4 */
1797      s++;
1798      while(isdigit(s[0])) s++; /* CASE 1 & 4 */
1799      if(!s[0]) goto scanint; /* CASE 1 */
1800      if(s[0] == '.') goto decimal; /* CASE 4 */
1801      goto notanumber;
1802    }
1803    /* Not case 1,2,3,4 */
1804    goto notanumber;
1805
1806   decimal:
1807    s++;
1808    if(!isdigit(s[0])) goto notanumber;
1809    s++;
1810    while(isdigit(s[0])) s++;
1811    if(!s[0]) goto scandouble;
1812    if(s[0] == 'e' || s[0] == 'E') goto exponent; /* CASE 5 */
1813    goto notanumber;
1814
1815   exponent:
1816    s++;
1817    if(s[0] != '-' && s[0] != '+') goto notanumber;
1818    s++;
1819    if(!isdigit(s[0])) goto notanumber;
1820    s++;
1821    while(isdigit(s[0])) s++;
1822    if(!s[0]) goto scandouble;
1823    goto notanumber;
1824
1825  scanint:
1826    if(negative) {
1827      int64_t *v;
1828      v = malloc(sizeof(*v));
1829      *v = strtoll(rpl, NULL, 10);
1830      *replacement = v;
1831      type = METRIC_INT64;
1832      goto alldone;
1833    }
1834    else {
1835      u_int64_t *v;
1836      v = malloc(sizeof(*v));
1837      *v = strtoull(rpl, NULL, 10);
1838      *replacement = v;
1839      type = METRIC_UINT64;
1840      goto alldone;
1841    }
1842  scandouble:
1843    {
1844      double *v;
1845      v = malloc(sizeof(*v));
1846      *v = strtod(rpl, NULL);
1847      *replacement = v;
1848      type = METRIC_DOUBLE;
1849      goto alldone;
1850    }
1851
1852  alldone:
1853  notanumber:
1854   free(copy);
1855   return type;
1856 }
1857
1858 static void
1859 cleanse_metric_name(char *m) {
1860   char *cp;
1861   for(cp = m; *cp; cp++)
1862     if(!isprint(*cp)) *cp=' ';
1863   for(cp--; *cp == ' ' && cp > m; cp--) /* always leave first char */
1864     *cp = '\0';
1865 }
1866
1867 int
1868 noit_stats_populate_metric(metric_t *m, const char *name, metric_type_t type,
1869                            const void *value) {
1870   void *replacement = NULL;
1871
1872   /* If we are passed a null name, we want to quit populating the metric...
1873    * no reason we should ever have a null metric name */
1874   if (!name) {
1875     return -1;
1876   }
1877
1878   m->metric_name = strdup(name);
1879   cleanse_metric_name(m->metric_name);
1880
1881   if(type == METRIC_GUESS)
1882     type = noit_metric_guess_type((char *)value, &replacement);
1883   if(type == METRIC_GUESS) return -1;
1884
1885   m->metric_type = type;
1886
1887   if(replacement)
1888     m->metric_value.vp = replacement;
1889   else if(value) {
1890     size_t len;
1891     len = noit_metric_sizes(type, value);
1892     m->metric_value.vp = malloc(len);
1893     memcpy(m->metric_value.vp, value, len);
1894     if (type == METRIC_STRING) {
1895       m->metric_value.s[len-1] = 0;
1896     }
1897   }
1898   else m->metric_value.vp = NULL;
1899   return 0;
1900 }
1901
1902 metric_t *
1903 noit_stats_get_metric(noit_check_t *check,
1904                       stats_t *newstate, const char *name) {
1905   void *v;
1906   if(newstate == NULL)
1907     newstate = stats_inprogress(check);
1908   if(mtev_hash_retrieve(&newstate->metrics, name, strlen(name), &v))
1909     return (metric_t *)v;
1910   return NULL;
1911 }
1912
1913 void
1914 noit_stats_set_metric(noit_check_t *check,
1915                       const char *name, metric_type_t type,
1916                       const void *value) {
1917   stats_t *c;
1918   metric_t *m = mtev_memory_safe_malloc_cleanup(sizeof(*m), noit_check_safe_free_metric);
1919   memset(m, 0, sizeof(*m));
1920   if(noit_stats_populate_metric(m, name, type, value)) {
1921     mtev_memory_safe_free(m);
1922     return;
1923   }
1924   noit_check_metric_count_add(1);
1925   c = noit_check_get_stats_inprogress(check);
1926   check_stats_set_metric_hook_invoke(check, c, m);
1927   __stats_add_metric(c, m);
1928 }
1929 void
1930 noit_stats_set_metric_coerce(noit_check_t *check,
1931                              const char *name, metric_type_t t,
1932                              const char *v) {
1933   char *endptr;
1934   stats_t *c;
1935   c = noit_check_get_stats_inprogress(check);
1936   if(v == NULL) {
1937    bogus:
1938     check_stats_set_metric_coerce_hook_invoke(check, c, name, t, v, mtev_false);
1939     noit_stats_set_metric(check, name, t, NULL);
1940     return;
1941   }
1942   switch(t) {
1943     case METRIC_STRING:
1944       noit_stats_set_metric(check, name, t, v);
1945       break;
1946     case METRIC_INT32:
1947     {
1948       int32_t val;
1949       val = strtol(v, &endptr, 10);
1950       if(endptr == v) goto bogus;
1951       noit_stats_set_metric(check, name, t, &val);
1952       break;
1953     }
1954     case METRIC_UINT32:
1955     {
1956       u_int32_t val;
1957       val = strtoul(v, &endptr, 10);
1958       if(endptr == v) goto bogus;
1959       noit_stats_set_metric(check, name, t, &val);
1960       break;
1961     }
1962     case METRIC_INT64:
1963     {
1964       int64_t val;
1965       val = strtoll(v, &endptr, 10);
1966       if(endptr == v) goto bogus;
1967       noit_stats_set_metric(check, name, t, &val);
1968       break;
1969     }
1970     case METRIC_UINT64:
1971     {
1972       u_int64_t val;
1973       val = strtoull(v, &endptr, 10);
1974       if(endptr == v) goto bogus;
1975       noit_stats_set_metric(check, name, t, &val);
1976       break;
1977     }
1978     case METRIC_DOUBLE:
1979     {
1980       double val;
1981       val = strtod(v, &endptr);
1982       if(endptr == v) goto bogus;
1983       noit_stats_set_metric(check, name, t, &val);
1984       break;
1985     }
1986     case METRIC_GUESS:
1987       noit_stats_set_metric(check, name, t, v);
1988       break;
1989     case METRIC_ABSENT:
1990     case METRIC_NULL:
1991       mtevAssert(0 && "ABSENT and NULL metrics may not be passed to noit_stats_set_metric_coerce");
1992   }
1993   check_stats_set_metric_coerce_hook_invoke(check, c, name, t, v, mtev_true);
1994 }
1995 void
1996 noit_stats_log_immediate_metric(noit_check_t *check,
1997                                 const char *name, metric_type_t type,
1998                                 const void *value) {
1999   struct timeval now;
2000   stats_t *c;
2001   metric_t *m = mtev_memory_safe_malloc_cleanup(sizeof(*m), noit_check_safe_free_metric);
2002   memset(m, 0, sizeof(*m));
2003   if(noit_stats_populate_metric(m, name, type, value)) {
2004     mtev_memory_safe_free(m);
2005     return;
2006   }
2007   gettimeofday(&now, NULL);
2008   noit_check_log_metric(check, &now, m);
2009   mtev_memory_safe_free(m);
2010   c = noit_check_get_stats_inprogress(check);
2011   __mark_metric_logged(c, name);
2012 }
2013
2014 void
2015 noit_check_passive_set_stats(noit_check_t *check) {
2016   int i, nwatches = 0;
2017   mtev_skiplist_node *next;
2018   noit_check_t n;
2019   noit_check_t *watches[8192];
2020
2021   uuid_copy(n.checkid, check->checkid);
2022   n.period = 0;
2023
2024   noit_check_set_stats(check);
2025
2026   pthread_mutex_lock(&polls_lock);
2027   mtev_skiplist_find_neighbors(&watchlist, &n, NULL, NULL, &next);
2028   while(next && next->data && nwatches < 8192) {
2029     noit_check_t *wcheck = next->data;
2030     if(uuid_compare(n.checkid, wcheck->checkid)) break;
2031     watches[nwatches++] = wcheck;
2032     mtev_skiplist_next(&watchlist, &next);
2033   }
2034   pthread_mutex_unlock(&polls_lock);
2035
2036   for(i=0;i<nwatches;i++) {
2037     void *backup;
2038     noit_check_t *wcheck = watches[i];
2039     /* Swap the real check's stats into place */
2040     backup = wcheck->statistics;
2041     wcheck->statistics = check->statistics;
2042
2043     if(check_passive_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
2044       /* Write out our status */
2045       noit_check_log_status(wcheck);
2046       /* Write out all metrics */
2047       noit_check_log_metrics(wcheck);
2048     }
2049     /* Swap them back out */
2050     wcheck->statistics = backup;
2051   }
2052 }
2053 void
2054 noit_check_set_stats(noit_check_t *check) {
2055   int report_change = 0;
2056   char *cp;
2057   dep_list_t *dep;
2058   stats_t *old, *prev, *current;
2059
2060   if(check_set_stats_hook_invoke(check) == MTEV_HOOK_ABORT) return;
2061
2062   old = stats_previous(check);
2063   prev = stats_previous(check) = stats_current(check);
2064   current = stats_current(check) = stats_inprogress(check);
2065   stats_inprogress(check) = noit_check_stats_alloc();
2066  
2067   if(old) {
2068     mtev_memory_safe_free(old);
2069   }
2070
2071   if(current) {
2072     for(cp = current->status; cp && *cp; cp++)
2073       if(*cp == '\r' || *cp == '\n') *cp = ' ';
2074   }
2075
2076   /* check for state changes */
2077   if((!current || (current->available != NP_UNKNOWN)) &&
2078      (!prev || (prev->available != NP_UNKNOWN)) &&
2079      (!current || !prev || (current->available != prev->available)))
2080     report_change = 1;
2081   if((!current || (current->state != NP_UNKNOWN)) &&
2082      (!prev || (prev->state != NP_UNKNOWN)) &&
2083      (!current || !prev || (current->state != prev->state)))
2084     report_change = 1;
2085
2086   mtevL(noit_debug, "%s`%s <- [%s]\n", check->target, check->name,
2087         current ? current->status : "null");
2088   if(report_change) {
2089     mtevL(noit_debug, "%s`%s -> [%s:%s]\n",
2090           check->target, check->name,
2091           noit_check_available_string(current ? current->available : NP_UNKNOWN),
2092           noit_check_state_string(current ? current->state : NP_UNKNOWN));
2093   }
2094
2095   if(NOIT_CHECK_STATUS_ENABLED()) {
2096     char id[UUID_STR_LEN+1];
2097     uuid_unparse_lower(check->checkid, id);
2098     NOIT_CHECK_STATUS(id, check->module, check->name, check->target,
2099                       current ? current->available : NP_UNKNOWN,
2100                       current ? current->state : NP_UNKNOWN,
2101                       current ? current->status : "null");
2102   }
2103
2104   if(check_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
2105     /* Write out the bundled information */
2106     noit_check_log_bundle(check);
2107   }
2108   /* count the check as complete */
2109   check_completion_count++;
2110
2111   for(dep = check->causal_checks; dep; dep = dep->next) {
2112     noit_module_t *mod;
2113     mod = noit_module_lookup(dep->check->module);
2114     if(!mod) {
2115       bad_check_initiate(mod, dep->check, 1, check);
2116     }
2117     else {
2118       mtevL(noit_debug, "Firing %s`%s in response to %s`%s\n",
2119             dep->check->target, dep->check->name,
2120             check->target, check->name);
2121       if((dep->check->flags & NP_DISABLED) == 0)
2122         if(mod->initiate_check)
2123           mod->initiate_check(mod, dep->check, 1, check);
2124     }
2125   }
2126 }
2127
2128 static int
2129 noit_console_show_watchlist(mtev_console_closure_t ncct,
2130                             int argc, char **argv,
2131                             mtev_console_state_t *dstate,
2132                             void *closure) {
2133   mtev_skiplist_node *iter, *fiter;
2134   int nwatches = 0, i;
2135   noit_check_t *watches[8192];
2136
2137   nc_printf(ncct, "%d active watches.\n", watchlist.size);
2138   pthread_mutex_lock(&polls_lock);
2139   for(iter = mtev_skiplist_getlist(&watchlist); iter && nwatches < 8192;
2140       mtev_skiplist_next(&watchlist, &iter)) {
2141     noit_check_t *check = iter->data;
2142     watches[nwatches++] = check;
2143   }
2144   pthread_mutex_unlock(&polls_lock);
2145
2146   for(i=0;i<nwatches;i++) {
2147     noit_check_t *check = watches[i];
2148     char uuid_str[UUID_STR_LEN + 1];
2149
2150     uuid_unparse_lower(check->checkid, uuid_str);
2151     nc_printf(ncct, "%s:\n\t[%s`%s`%s]\n\tPeriod: %dms\n\tFeeds[%d]:\n",
2152               uuid_str, check->target, check->module, check->name,
2153               check->period, check->feeds ? check->feeds->size : 0);
2154     if(check->feeds && check->feeds->size) {
2155       for(fiter = mtev_skiplist_getlist(check->feeds); fiter;
2156           mtev_skiplist_next(check->feeds, &fiter)) {
2157         nc_printf(ncct, "\t\t%s\n", (const char *)fiter->data);
2158       }
2159     }
2160   }
2161   return 0;
2162 }
2163
2164 static void
2165 nc_printf_check_brief(mtev_console_closure_t ncct,
2166                       noit_check_t *check) {
2167   stats_t *current;
2168   char out[512];
2169   char uuid_str[37];
2170   snprintf(out, sizeof(out), "%s`%s (%s [%x])", check->target, check->name,
2171            check->target_ip, check->flags);
2172   uuid_unparse_lower(check->checkid, uuid_str);
2173   nc_printf(ncct, "%s %s\n", uuid_str, out);
2174   current = stats_current(check);
2175   if(current)
2176     nc_printf(ncct, "\t%s\n", current->status);
2177 }
2178
2179 char *
2180 noit_console_conf_check_opts(mtev_console_closure_t ncct,
2181                              mtev_console_state_stack_t *stack,
2182                              mtev_console_state_t *dstate,
2183                              int argc, char **argv, int idx) {
2184   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2185   uuid_t key_id;
2186   int klen, i = 0;
2187   void *vcheck;
2188
2189   if(argc == 1) {
2190     if(!strncmp("new", argv[0], strlen(argv[0]))) {
2191       if(idx == i) return strdup("new");
2192       i++;
2193     }
2194     pthread_mutex_lock(&polls_lock);
2195     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2196                          &vcheck)) {
2197       noit_check_t *check = (noit_check_t *)vcheck;
2198       char out[512];
2199       char uuid_str[37];
2200       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2201       uuid_unparse_lower(check->checkid, uuid_str);
2202       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2203         if(idx == i) {
2204           pthread_mutex_unlock(&polls_lock);
2205           return strdup(out);
2206         }
2207         i++;
2208       }
2209       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2210         if(idx == i) {
2211           pthread_mutex_unlock(&polls_lock);
2212           return strdup(uuid_str);
2213         }
2214         i++;
2215       }
2216     }
2217     pthread_mutex_unlock(&polls_lock);
2218   }
2219   if(argc == 2) {
2220     cmd_info_t *cmd;
2221     if(!strcmp("new", argv[0])) return NULL;
2222     cmd = mtev_skiplist_find(&dstate->cmds, "attribute", NULL);
2223     if(!cmd) return NULL;
2224     return mtev_console_opt_delegate(ncct, stack, cmd->dstate, argc-1, argv+1, idx);
2225   }
2226   return NULL;
2227 }
2228
2229 char *
2230 noit_console_check_opts(mtev_console_closure_t ncct,
2231                         mtev_console_state_stack_t *stack,
2232                         mtev_console_state_t *dstate,
2233                         int argc, char **argv, int idx) {
2234   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2235   uuid_t key_id;
2236   int klen, i = 0;
2237
2238   if(argc == 1) {
2239     void *vcheck;
2240     pthread_mutex_lock(&polls_lock);
2241     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2242                          &vcheck)) {
2243       char out[512];
2244       char uuid_str[37];
2245       noit_check_t *check = (noit_check_t *)vcheck;
2246       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2247       uuid_unparse_lower(check->checkid, uuid_str);
2248       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2249         if(idx == i) {
2250           pthread_mutex_unlock(&polls_lock);
2251           return strdup(out);
2252         }
2253         i++;
2254       }
2255       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2256         if(idx == i) {
2257           pthread_mutex_unlock(&polls_lock);
2258           return strdup(uuid_str);
2259         }
2260         i++;
2261       }
2262     }
2263     pthread_mutex_unlock(&polls_lock);
2264   }
2265   if(argc == 2) {
2266     return mtev_console_opt_delegate(ncct, stack, dstate, argc-1, argv+1, idx);
2267   }
2268   return NULL;
2269 }
2270
2271 static int
2272 noit_console_show_checks(mtev_console_closure_t ncct,
2273                          int argc, char **argv,
2274                          mtev_console_state_t *dstate,
2275                          void *closure) {
2276   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2277   uuid_t key_id;
2278   int klen, i = 0, nchecks;
2279   void *vcheck;
2280   noit_check_t **checks;
2281
2282   nchecks = mtev_hash_size(&polls);
2283   if(nchecks == 0) return 0;
2284   checks = malloc(nchecks * sizeof(*checks));
2285
2286   pthread_mutex_lock(&polls_lock);
2287   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2288                        &vcheck)) {
2289     if(i<nchecks) checks[i++] = vcheck;
2290   }
2291   pthread_mutex_unlock(&polls_lock);
2292
2293   nchecks = i;
2294   for(i=0;i<nchecks;i++)
2295     nc_printf_check_brief(ncct,checks[i]);
2296
2297   free(checks);
2298   return 0;
2299 }
2300
2301 static int
2302 noit_console_short_checks_sl(mtev_console_closure_t ncct,
2303                              mtev_skiplist *tlist) {
2304   int max_count, i = 0;
2305   noit_check_t **todo;
2306   mtev_skiplist_node *iter;
2307
2308   max_count = tlist->size;
2309   if(max_count == 0) return 0;
2310   todo = malloc(max_count * sizeof(*todo));
2311
2312   pthread_mutex_lock(&polls_lock);
2313   for(iter = mtev_skiplist_getlist(tlist); i < max_count && iter;
2314       mtev_skiplist_next(tlist, &iter)) {
2315     todo[i++] = iter->data;
2316   }
2317   pthread_mutex_unlock(&polls_lock);
2318
2319   max_count = i;
2320   for(i=0;i<max_count;i++)
2321     nc_printf_check_brief(ncct, todo[i]);
2322
2323   free(todo);
2324   return 0;
2325 }
2326 static int
2327 noit_console_show_checks_name(mtev_console_closure_t ncct,
2328                               int argc, char **argv,
2329                               mtev_console_state_t *dstate,
2330                               void *closure) {
2331   return noit_console_short_checks_sl(ncct, &polls_by_name);
2332 }
2333
2334 static int
2335 noit_console_show_checks_target(mtev_console_closure_t ncct,
2336                                    int argc, char **argv,
2337                                    mtev_console_state_t *dstate,
2338                                    void *closure) {
2339   return noit_console_short_checks_sl(ncct,
2340            mtev_skiplist_find(polls_by_name.index,
2341            __check_target_compare, NULL));
2342 }
2343
2344 static int
2345 noit_console_show_checks_target_ip(mtev_console_closure_t ncct,
2346                                    int argc, char **argv,
2347                                    mtev_console_state_t *dstate,
2348                                    void *closure) {
2349   return noit_console_short_checks_sl(ncct,
2350            mtev_skiplist_find(polls_by_name.index,
2351            __check_target_ip_compare, NULL));
2352 }
2353
2354 static void
2355 register_console_check_commands() {
2356   mtev_console_state_t *tl;
2357   cmd_info_t *showcmd;
2358
2359   tl = mtev_console_state_initial();
2360   showcmd = mtev_console_state_get_cmd(tl, "show");
2361   mtevAssert(showcmd && showcmd->dstate);
2362
2363   mtev_console_state_add_cmd(showcmd->dstate,
2364     NCSCMD("timing_slots", noit_console_show_timing_slots, NULL, NULL, NULL));
2365
2366   mtev_console_state_add_cmd(showcmd->dstate,
2367     NCSCMD("checks", noit_console_show_checks, NULL, NULL, NULL));
2368
2369   mtev_console_state_add_cmd(showcmd->dstate,
2370     NCSCMD("checks:name", noit_console_show_checks_name, NULL,
2371            NULL, NULL));
2372
2373   mtev_console_state_add_cmd(showcmd->dstate,
2374     NCSCMD("checks:target", noit_console_show_checks_target, NULL,
2375            NULL, NULL));
2376
2377   mtev_console_state_add_cmd(showcmd->dstate,
2378     NCSCMD("checks:target_ip", noit_console_show_checks_target_ip, NULL,
2379            NULL, NULL));
2380
2381   mtev_console_state_add_cmd(showcmd->dstate,
2382     NCSCMD("watches", noit_console_show_watchlist, NULL, NULL, NULL));
2383 }
2384
2385 int
2386 noit_check_register_module(const char *name) {
2387   int i;
2388   for(i=0; i<reg_module_id; i++)
2389     if(!strcmp(reg_module_names[i], name)) return i;
2390   if(reg_module_id >= MAX_MODULE_REGISTRATIONS) return -1;
2391   mtevL(noit_debug, "Registered module %s as %d\n", name, i);
2392   i = reg_module_id++;
2393   reg_module_names[i] = strdup(name);
2394   mtev_conf_set_namespace(reg_module_names[i]);
2395   return i;
2396 }
2397 int
2398 noit_check_registered_module_by_name(const char *name) {
2399   int i;
2400   for(i=0; i<reg_module_id; i++)
2401     if(!strcmp(reg_module_names[i], name)) return i;
2402   return -1;
2403 }
2404 int
2405 noit_check_registered_module_cnt() {
2406   return reg_module_id;
2407 }
2408 const char *
2409 noit_check_registered_module(int idx) {
2410   if(reg_module_used < 0) reg_module_used = reg_module_id;
2411   mtevAssert(reg_module_used == reg_module_id);
2412   if(idx >= reg_module_id || idx < 0) return NULL;
2413   return reg_module_names[idx];
2414 }
2415
2416 void
2417 noit_check_set_module_metadata(noit_check_t *c, int idx, void *md, void (*freefunc)(void *)) {
2418   struct vp_w_free *tuple;
2419   if(reg_module_used < 0) reg_module_used = reg_module_id;
2420   mtevAssert(reg_module_used == reg_module_id);
2421   if(idx >= reg_module_id || idx < 0) return;
2422   if(!c->module_metadata) c->module_metadata = calloc(reg_module_id, sizeof(void *));
2423   c->module_metadata[idx] = calloc(1, sizeof(struct vp_w_free));
2424   tuple = c->module_metadata[idx];
2425   tuple->ptr = md;
2426   tuple->freefunc = freefunc;
2427 }
2428 void
2429 noit_check_set_module_config(noit_check_t *c, int idx, mtev_hash_table *config) {
2430   if(reg_module_used < 0) reg_module_used = reg_module_id;
2431   mtevAssert(reg_module_used == reg_module_id);
2432   if(idx >= reg_module_id || idx < 0) return;
2433   if(!c->module_configs) c->module_configs = calloc(reg_module_id, sizeof(mtev_hash_table *));
2434   c->module_configs[idx] = config;
2435 }
2436 void *
2437 noit_check_get_module_metadata(noit_check_t *c, int idx) {
2438   struct vp_w_free *tuple;
2439   if(reg_module_used < 0) reg_module_used = reg_module_id;
2440   mtevAssert(reg_module_used == reg_module_id);
2441   if(idx >= reg_module_id || idx < 0 || !c->module_metadata) return NULL;
2442   tuple = c->module_metadata[idx];
2443   return tuple ? tuple->ptr : NULL;
2444 }
2445 mtev_hash_table *
2446 noit_check_get_module_config(noit_check_t *c, int idx) {
2447   if(reg_module_used < 0) reg_module_used = reg_module_id;
2448   mtevAssert(reg_module_used == reg_module_id);
2449   if(idx >= reg_module_id || idx < 0 || !c->module_configs) return NULL;
2450   return c->module_configs[idx];
2451 }
Note: See TracBrowser for help on using the browser.