root/src/noit_check.c

Revision 72ef432903f5bd0b2ea537ba9e67f2843600959a, 72.0 kB (checked in by Phil Maddox <philip.maddox@circonus.com>, 3 weeks ago)

Add Experimental "Priority Scheduling" Feature

Added flag, "priority_scheduling" (configurable in the "checks" stanza)
that will attempt to ensure that all checks will run within the course
of a single minute.

This is experimental and of somewhat limited utility - may be replaced
in the future, but this suits a specific need.

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  * Copyright (c) 2015, Circonus, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  *       copyright notice, this list of conditions and the following
14  *       disclaimer in the documentation and/or other materials provided
15  *       with the distribution.
16  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
17  *       of its contributors may be used to endorse or promote products
18  *       derived from this software without specific prior written
19  *       permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "noit_config.h"
35 #include <mtev_defines.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <ctype.h>
41 #include <assert.h>
42 #include <errno.h>
43 #include <netinet/in.h>
44 #include <arpa/inet.h>
45 #include <time.h>
46
47 #include <eventer/eventer.h>
48 #include <mtev_memory.h>
49 #include <mtev_log.h>
50 #include <mtev_hash.h>
51 #include <mtev_skiplist.h>
52 #include <mtev_watchdog.h>
53 #include <mtev_conf.h>
54 #include <mtev_console.h>
55 #include <mtev_cluster.h>
56
57 #include "noit_mtev_bridge.h"
58 #include "noit_dtrace_probes.h"
59 #include "noit_check.h"
60 #include "noit_module.h"
61 #include "noit_check_tools.h"
62 #include "noit_check_resolver.h"
63
64 #define DEFAULT_TEXT_METRIC_SIZE_LIMIT  512
65 #define RECYCLE_INTERVAL 60
66
67 MTEV_HOOK_IMPL(check_config_fixup,
68   (noit_check_t *check),
69   void *, closure,
70   (void *closure, noit_check_t *check),
71   (closure,check))
72
73 MTEV_HOOK_IMPL(check_stats_set_metric,
74   (noit_check_t *check, stats_t *stats, metric_t *m),
75   void *, closure,
76   (void *closure, noit_check_t *check, stats_t *stats, metric_t *m),
77   (closure,check,stats,m))
78
79 MTEV_HOOK_IMPL(check_stats_set_metric_coerce,
80   (noit_check_t *check, stats_t *stats, const char *name,
81    metric_type_t type, const char *v, mtev_boolean success),
82   void *, closure,
83   (void *closure, noit_check_t *check, stats_t *stats, const char *name,
84    metric_type_t type, const char *v, mtev_boolean success),
85   (closure,check,stats,name,type,v,success))
86
87 MTEV_HOOK_IMPL(check_passive_log_stats,
88   (noit_check_t *check),
89   void *, closure,
90   (void *closure, noit_check_t *check),
91   (closure,check))
92
93 MTEV_HOOK_IMPL(check_set_stats,
94   (noit_check_t *check),
95   void *, closure,
96   (void *closure, noit_check_t *check),
97   (closure,check))
98
99 MTEV_HOOK_IMPL(check_log_stats,
100   (noit_check_t *check),
101   void *, closure,
102   (void *closure, noit_check_t *check),
103   (closure,check))
104
105 #define STATS_INPROGRESS 0
106 #define STATS_CURRENT 1
107 #define STATS_PREVIOUS 2
108
109 void
110 free_metric(metric_t *m) {
111   if(m->metric_name) free(m->metric_name);
112   if(m->metric_value.i) free(m->metric_value.i);
113 }
114
115 #define stats_inprogress(c) ((stats_t **)(c->statistics))[STATS_INPROGRESS]
116 #define stats_current(c) ((stats_t **)(c->statistics))[STATS_CURRENT]
117 #define stats_previous(c) ((stats_t **)(c->statistics))[STATS_PREVIOUS]
118
119 stats_t *
120 noit_check_get_stats_inprogress(noit_check_t *c) {
121   return stats_inprogress(c);
122 }
123 stats_t *
124 noit_check_get_stats_current(noit_check_t *c) {
125   return stats_current(c);
126 }
127 stats_t *
128 noit_check_get_stats_previous(noit_check_t *c) {
129   return stats_previous(c);
130 }
131
132 struct stats_t {
133   struct timeval whence;
134   int8_t available;
135   int8_t state;
136   u_int32_t duration;
137   mtev_hash_table metrics;
138   char status[256];
139 };
140
141 struct timeval *
142 noit_check_stats_whence(stats_t *s, struct timeval *n) {
143   if(n) memcpy(&s->whence, n, sizeof(*n));
144   return &s->whence;
145 }
146 int8_t
147 noit_check_stats_available(stats_t *s, int8_t *n) {
148   if(n) s->available = *n;
149   return s->available;
150 }
151 int8_t
152 noit_check_stats_state(stats_t *s, int8_t *n) {
153   if(n) s->state = *n;
154   return s->state;
155 }
156 u_int32_t
157 noit_check_stats_duration(stats_t *s, u_int32_t *n) {
158   if(n) s->duration = *n;
159   return s->duration;
160 }
161 const char *
162 noit_check_stats_status(stats_t *s, const char *n) {
163   if(n) strlcpy(s->status, n, sizeof(s->status));
164   return s->status;
165 }
166 mtev_hash_table *
167 noit_check_stats_metrics(stats_t *s) {
168   return &s->metrics;
169 }
170 void
171 noit_stats_set_whence(noit_check_t *c, struct timeval *t) {
172   (void)noit_check_stats_whence(noit_check_get_stats_inprogress(c), t);
173 }
174 void
175 noit_stats_set_state(noit_check_t *c, int8_t t) {
176   (void)noit_check_stats_state(noit_check_get_stats_inprogress(c), &t);
177 }
178 void
179 noit_stats_set_duration(noit_check_t *c, u_int32_t t) {
180   (void)noit_check_stats_duration(noit_check_get_stats_inprogress(c), &t);
181 }
182 void
183 noit_stats_set_status(noit_check_t *c, const char *s) {
184   (void)noit_check_stats_status(noit_check_get_stats_inprogress(c), s);
185 }
186 void
187 noit_stats_set_available(noit_check_t *c, int8_t t) {
188   (void)noit_check_stats_available(noit_check_get_stats_inprogress(c), &t);
189 }
190 static void
191 noit_check_safe_free_metric(void *vs) {
192   metric_t *m = vs;
193   if (m) {
194     free_metric(m);
195   }
196 }
197 static void
198 noit_check_safe_free_stats(void *vs) {
199   stats_t *s = vs;
200   mtev_hash_destroy(&s->metrics, NULL, (void (*)(void *))mtev_memory_safe_free);
201 }
202 static stats_t *
203 noit_check_stats_alloc() {
204   stats_t *n;
205   n = mtev_memory_safe_malloc_cleanup(sizeof(*n), noit_check_safe_free_stats);
206   memset(n, 0, sizeof(*n));
207   mtev_hash_init(&n->metrics);
208   return n;
209 }
210 static void *
211 noit_check_stats_set_calloc() {
212   int i;
213   stats_t **s;
214   s = calloc(sizeof(stats_t *), 3);
215   for(i=0;i<3;i++) s[i] = noit_check_stats_alloc();
216   return s;
217 }
218
219 /* 20 ms slots over 60 second for distribution */
220 #define SCHEDULE_GRANULARITY 20
221 #define SLOTS_PER_SECOND (1000/SCHEDULE_GRANULARITY)
222 #define MAX_MODULE_REGISTRATIONS 64
223
224 /* used to manage per-check generic module metadata */
225 struct vp_w_free {
226   void *ptr;
227   void (*freefunc)(void *);
228 };
229
230 static mtev_boolean system_needs_causality = mtev_false;
231 static int text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
232 static int reg_module_id = 0;
233 static char *reg_module_names[MAX_MODULE_REGISTRATIONS] = { NULL };
234 static int reg_module_used = -1;
235 static u_int64_t check_completion_count = 0ULL;
236 static u_int64_t check_metrics_seen = 0ULL;
237 static pthread_mutex_t polls_lock = PTHREAD_MUTEX_INITIALIZER;
238 static mtev_hash_table polls = MTEV_HASH_EMPTY;
239 static mtev_hash_table dns_ignore_list = MTEV_HASH_EMPTY;
240 static mtev_skiplist watchlist = { 0 };
241 static mtev_skiplist polls_by_name = { 0 };
242 static u_int32_t __config_load_generation = 0;
243 static unsigned short check_slots_count[60000 / SCHEDULE_GRANULARITY] = { 0 },
244                       check_slots_seconds_count[60] = { 0 };
245 static mtev_boolean priority_scheduling = mtev_false;
246 static int priority_dead_zone_seconds = 3;
247
248 static noit_check_t *
249 noit_poller_lookup__nolock(uuid_t in) {
250   void *vcheck;
251   if(mtev_hash_retrieve(&polls, (char *)in, UUID_SIZE, &vcheck))
252     return (noit_check_t *)vcheck;
253   return NULL;
254 }
255 static noit_check_t *
256 noit_poller_lookup_by_name__nolock(char *target, char *name) {
257   noit_check_t tmp_check;
258   memset(&tmp_check, 0, sizeof(tmp_check));
259   tmp_check.target = target;
260   tmp_check.name = name;
261   return mtev_skiplist_find(&polls_by_name, &tmp_check, NULL);
262 }
263
264 static int
265 noit_console_show_timing_slots(mtev_console_closure_t ncct,
266                                int argc, char **argv,
267                                mtev_console_state_t *dstate,
268                                void *closure) {
269   int i, j;
270   const int upl = (60000 / SCHEDULE_GRANULARITY) / 60;
271   for(i=0;i<60;i++) {
272     nc_printf(ncct, "[%02d] %04d: ", i, check_slots_seconds_count[i]);
273     for(j=i*upl;j<(i+1)*upl;j++) {
274       char cp = '!';
275       if(check_slots_count[j] < 10) cp = '0' + check_slots_count[j];
276       else if(check_slots_count[j] < 36) cp = 'a' + (check_slots_count[j] - 10);
277       nc_printf(ncct, "%c", cp);
278     }
279     nc_printf(ncct, "\n");
280   }
281   return 0;
282 }
283 static int
284 noit_check_add_to_list(noit_check_t *new_check, const char *newname) {
285   char *oldname = NULL, *newnamecopy;
286   if(newname) {
287     /* track this stuff outside the lock to avoid allocs */
288     oldname = new_check->name;
289     newnamecopy = strdup(newname);
290   }
291   pthread_mutex_lock(&polls_lock);
292   if(!(new_check->flags & NP_TRANSIENT)) {
293     assert(new_check->name || newname);
294     /* This remove could fail -- no big deal */
295     if(new_check->name != NULL)
296       mtev_skiplist_remove(&polls_by_name, new_check, NULL);
297
298     /* optional update the name (at the critical point) */
299     if(newname) new_check->name = newnamecopy;
300
301     /* This insert could fail.. which means we have a conflict on
302      * target`name.  That should result in the check being disabled. */
303     if(!mtev_skiplist_insert(&polls_by_name, new_check)) {
304       mtevL(noit_error, "Check %s`%s disabled due to naming conflict\n",
305             new_check->target, new_check->name);
306       new_check->flags |= NP_DISABLED;
307     }
308     if(oldname) free(oldname);
309   }
310   pthread_mutex_unlock(&polls_lock);
311   return 1;
312 }
313
314 u_int64_t noit_check_metric_count() {
315   return check_metrics_seen;
316 }
317 void noit_check_metric_count_add(int add) {
318   mtev_atomic64_t *n = (mtev_atomic64_t *)&check_metrics_seen;
319   mtev_atomic64_t v = (mtev_atomic64_t)add;
320   mtev_atomic_add64(n, v);
321 }
322
323 u_int64_t noit_check_completion_count() {
324   return check_completion_count;
325 }
326 static void register_console_check_commands();
327 static int check_recycle_bin_processor(eventer_t, int, void *,
328                                        struct timeval *);
329
330 static int
331 check_slots_find_smallest(int sec, struct timeval* period, int timeout) {
332   int i, j, cyclic, random_offset, jbase = 0, mini = 0, minj = 0;
333   unsigned short min_running_i = 0xffff, min_running_j = 0xffff;
334   int period_seconds = period->tv_sec;
335
336   /* If we're greater than sixty seconds, we should do our
337    * initial scheduling as if the period was sixty seconds. */
338   if (period_seconds > 60) {
339     period_seconds = 60;
340   }
341
342   /* If a check is configured to run at times aligned with sixty seconds
343    * and we're configured to use priority scheduling, schedule so that
344    * we're guaranteed to finish before the timeout */
345   if ((priority_scheduling == mtev_true) &&
346       (((period->tv_sec % 60) == 0) && (period->tv_usec == 0))) {
347     /* Don't allow a ton of stuff to schedule in the first second in the case
348      * of very long timeouts - use the first 10 seconds in this case */
349     int allowable_time = MAX(60 - (timeout/1000) - 1, 10);
350     int max_seconds = MIN(60-priority_dead_zone_seconds, allowable_time);
351     for(i=0;i<max_seconds;i++) {
352       int adj_i = (i + sec) % max_seconds;
353       if(check_slots_seconds_count[adj_i] < min_running_i) {
354         min_running_i = check_slots_seconds_count[adj_i];
355         mini = adj_i;
356       }
357     }
358   }
359   else {
360     /* Just schedule normally*/
361     for(i=0;i<period_seconds;i++) {
362       int adj_i = (i + sec) % 60;
363       if(check_slots_seconds_count[adj_i] < min_running_i) {
364         min_running_i = check_slots_seconds_count[adj_i];
365         mini = adj_i;
366       }
367     }
368   }
369   jbase = mini * (1000/SCHEDULE_GRANULARITY);
370   random_offset = drand48() * SLOTS_PER_SECOND;
371   for(cyclic=0;cyclic<SLOTS_PER_SECOND;cyclic++) {
372     j = jbase + ((random_offset + cyclic) % SLOTS_PER_SECOND);
373     if(check_slots_count[j] < min_running_j) {
374       min_running_j = check_slots_count[j];
375       minj = j;
376     }
377   }
378   return (minj * SCHEDULE_GRANULARITY) + drand48() * SCHEDULE_GRANULARITY;
379 }
380 static void
381 check_slots_adjust_tv(struct timeval *tv, short adj) {
382   int offset_ms, idx;
383   offset_ms = (tv->tv_sec % 60) * 1000 + (tv->tv_usec / 1000);
384   idx = offset_ms / SCHEDULE_GRANULARITY;
385   check_slots_count[idx] += adj;
386   check_slots_seconds_count[offset_ms / 1000] += adj;
387 }
388 void check_slots_inc_tv(struct timeval *tv) {
389   check_slots_adjust_tv(tv, 1);
390 }
391 void check_slots_dec_tv(struct timeval *tv) {
392   check_slots_adjust_tv(tv, -1);
393 }
394 static int
395 noit_check_generic_safe_string(const char *p) {
396   if(!p) return 0;
397   for(;*p;p++) {
398     if(!isprint(*p)) return 0;
399   }
400   return 1;
401 }
402 int
403 noit_check_validate_target(const char *p) {
404   if(!noit_check_generic_safe_string(p)) return 0;
405   return 1;
406 }
407 int
408 noit_check_validate_name(const char *p) {
409   if(!noit_check_generic_safe_string(p)) return 0;
410   return 1;
411 }
412 const char *
413 noit_check_available_string(int16_t available) {
414   switch(available) {
415     case NP_AVAILABLE:    return "available";
416     case NP_UNAVAILABLE:  return "unavailable";
417     case NP_UNKNOWN:      return "unknown";
418   }
419   return NULL;
420 }
421 const char *
422 noit_check_state_string(int16_t state) {
423   switch(state) {
424     case NP_GOOD:         return "good";
425     case NP_BAD:          return "bad";
426     case NP_UNKNOWN:      return "unknown";
427   }
428   return NULL;
429 }
430 static int __check_name_compare(const void *a, const void *b) {
431   const noit_check_t *ac = a;
432   const noit_check_t *bc = b;
433   int rv;
434   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
435   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
436   return 0;
437 }
438 static int __watchlist_compare(const void *a, const void *b) {
439   const noit_check_t *ac = a;
440   const noit_check_t *bc = b;
441   int rv;
442   if((rv = memcmp(ac->checkid, bc->checkid, sizeof(ac->checkid))) != 0) return rv;
443   if(ac->period < bc->period) return -1;
444   if(ac->period == bc->period) return 0;
445   return 1;
446 }
447 static int __check_target_ip_compare(const void *a, const void *b) {
448   const noit_check_t *ac = a;
449   const noit_check_t *bc = b;
450   int rv;
451   if((rv = strcmp(ac->target_ip, bc->target_ip)) != 0) return rv;
452   if (ac->name == NULL) return 1;
453   if (bc->name == NULL) return -1;
454   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
455   return 1;
456 }
457 static int __check_target_compare(const void *a, const void *b) {
458   const noit_check_t *ac = a;
459   const noit_check_t *bc = b;
460   int rv;
461   if (ac->target == NULL) return 1;
462   if (bc->target == NULL) return -1;
463   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
464   if (ac->name == NULL) return 1;
465   if (bc->name == NULL) return -1;
466   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
467   return 1;
468 }
469 int
470 noit_calc_rtype_flag(char *resolve_rtype) {
471   int flags = 0;
472   if(resolve_rtype) {
473     flags |= strcmp(resolve_rtype, PREFER_IPV6) == 0 ||
474              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_PREFER_IPV6 : 0;
475     flags |= strcmp(resolve_rtype, FORCE_IPV4) == 0 ||
476              strcmp(resolve_rtype, FORCE_IPV6) == 0 ? NP_SINGLE_RESOLVE : 0;
477   }
478   return flags;
479 }
480 void
481 noit_check_fake_last_check(noit_check_t *check,
482                            struct timeval *lc, struct timeval *_now) {
483   struct timeval now, period, lc_copy;
484   int balance_ms;
485
486   if(!_now) {
487     gettimeofday(&now, NULL);
488     _now = &now;
489   }
490   period.tv_sec = check->period / 1000;
491   period.tv_usec = (check->period % 1000) * 1000;
492   sub_timeval(*_now, period, lc);
493
494   /* We need to set the last check value based on the period, but
495    * we also need to store a value that is based around the one-minute
496    * time to properly increment the slots; otherwise, the slots will
497    * get all messed up */
498   if(!(check->flags & NP_TRANSIENT) && check->period) {
499     balance_ms = check_slots_find_smallest(_now->tv_sec+1, &period, check->timeout);
500     lc->tv_sec = (lc->tv_sec / 60) * 60 + balance_ms / 1000;
501     lc->tv_usec = (balance_ms % 1000) * 1000;
502     memcpy(&lc_copy, lc, sizeof(lc_copy));
503     if(compare_timeval(*_now, *lc) < 0) {
504       do {
505         sub_timeval(*lc, period, lc);
506       } while(compare_timeval(*_now, *lc) < 0);
507     }
508     else {
509       struct timeval test;
510       while(1) {
511         add_timeval(*lc, period, &test);
512         if(compare_timeval(*_now, test) < 0) break;
513         memcpy(lc, &test, sizeof(test));
514       }
515     }
516   }
517   else {
518     memcpy(&lc_copy, lc, sizeof(lc_copy));
519   }
520  
521   /* now, we're going to do an even distribution using the slots */
522   if(!(check->flags & NP_TRANSIENT)) check_slots_inc_tv(&lc_copy);
523 }
524 void
525 noit_poller_process_checks(const char *xpath) {
526   int i, flags, cnt = 0, found;
527   mtev_conf_section_t *sec;
528   __config_load_generation++;
529   sec = mtev_conf_get_sections(NULL, xpath, &cnt);
530   for(i=0; i<cnt; i++) {
531     void *vcheck;
532     char uuid_str[37];
533     char target[256] = "";
534     char module[256] = "";
535     char name[256] = "";
536     char filterset[256] = "";
537     char oncheck[1024] = "";
538     char resolve_rtype[16] = "";
539     int ridx;
540     int no_period = 0;
541     int no_oncheck = 0;
542     int period = 0, timeout = 0;
543     mtev_boolean disabled = mtev_false, busted = mtev_false;
544     uuid_t uuid, out_uuid;
545     int64_t config_seq = 0;
546     mtev_hash_table *options;
547     mtev_hash_table **moptions = NULL;
548     mtev_boolean moptions_used = mtev_false, backdated = mtev_false;
549
550     /* We want to heartbeat here... otherwise, if a lot of checks are
551      * configured or if we're running on a slower system, we could
552      * end up getting watchdog killed before we get a chance to run
553      * any checks */
554     mtev_watchdog_child_heartbeat();
555
556     if(reg_module_id > 0) {
557       moptions = alloca(reg_module_id * sizeof(mtev_hash_table *));
558       memset(moptions, 0, reg_module_id * sizeof(mtev_hash_table *));
559       moptions_used = mtev_true;
560     }
561
562 #define NEXT(...) mtevL(noit_stderr, __VA_ARGS__); continue
563 #define MYATTR(type,a,...) mtev_conf_get_##type(sec[i], "@" #a, __VA_ARGS__)
564 #define INHERIT(type,a,...) \
565   mtev_conf_get_##type(sec[i], "ancestor-or-self::node()/@" #a, __VA_ARGS__)
566
567     if(!MYATTR(stringbuf, uuid, uuid_str, sizeof(uuid_str))) {
568       mtevL(noit_stderr, "check %d has no uuid\n", i+1);
569       continue;
570     }
571
572     MYATTR(int64, seq, &config_seq);
573
574     if(uuid_parse(uuid_str, uuid)) {
575       mtevL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str);
576       continue;
577     }
578
579     if(!INHERIT(stringbuf, target, target, sizeof(target))) {
580       mtevL(noit_stderr, "check uuid: '%s' has no target\n", uuid_str);
581       busted = mtev_true;
582     }
583     if(!noit_check_validate_target(target)) {
584       mtevL(noit_stderr, "check uuid: '%s' has malformed target\n", uuid_str);
585       busted = mtev_true;
586     }
587     if(!INHERIT(stringbuf, module, module, sizeof(module))) {
588       mtevL(noit_stderr, "check uuid: '%s' has no module\n", uuid_str);
589       busted = mtev_true;
590     }
591
592     if(!INHERIT(stringbuf, filterset, filterset, sizeof(filterset)))
593       filterset[0] = '\0';
594    
595     if (!INHERIT(stringbuf, resolve_rtype, resolve_rtype, sizeof(resolve_rtype)))
596       strlcpy(resolve_rtype, PREFER_IPV4, sizeof(resolve_rtype));
597
598     if(!MYATTR(stringbuf, name, name, sizeof(name)))
599       strlcpy(name, module, sizeof(name));
600
601     if(!noit_check_validate_name(name)) {
602       mtevL(noit_stderr, "check uuid: '%s' has malformed name\n", uuid_str);
603       busted = mtev_true;
604     }
605
606     if(!INHERIT(int, period, &period) || period == 0)
607       no_period = 1;
608
609     if(!INHERIT(stringbuf, oncheck, oncheck, sizeof(oncheck)) || !oncheck[0])
610       no_oncheck = 1;
611
612     if(no_period && no_oncheck) {
613       mtevL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n",
614             uuid_str);
615       busted = mtev_true;
616     }
617     if(!(no_period || no_oncheck)) {
618       mtevL(noit_stderr, "check uuid: '%s' has oncheck and period.\n",
619             uuid_str);
620       busted = mtev_true;
621     }
622     if(!INHERIT(int, timeout, &timeout)) {
623       mtevL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str);
624       busted = mtev_true;
625     }
626     if(!no_period && timeout >= period) {
627       mtevL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str);
628       timeout = period/2;
629     }
630     options = mtev_conf_get_hash(sec[i], "config");
631     for(ridx=0; ridx<reg_module_id; ridx++) {
632       moptions[ridx] = mtev_conf_get_namespaced_hash(sec[i], "config",
633                                                      reg_module_names[ridx]);
634     }
635
636     INHERIT(boolean, disable, &disabled);
637     flags = 0;
638     if(busted) flags |= (NP_UNCONFIG|NP_DISABLED);
639     else if(disabled) flags |= NP_DISABLED;
640
641     flags |= noit_calc_rtype_flag(resolve_rtype);
642
643     pthread_mutex_lock(&polls_lock);
644     found = mtev_hash_retrieve(&polls, (char *)uuid, UUID_SIZE, &vcheck);
645     if(found) {
646       noit_check_t *check = (noit_check_t *)vcheck;
647       /* Possibly reset the seq */
648       if(config_seq < 0) check->config_seq = 0;
649
650       /* Otherwise note a non-increasing sequence */
651       if(check->config_seq > config_seq) backdated = mtev_true;
652     }
653     pthread_mutex_unlock(&polls_lock);
654     if(found)
655       noit_poller_deschedule(uuid);
656     if(backdated) {
657       mtevL(noit_error, "Check config seq backwards, ignored\n");
658     }
659     else {
660       noit_poller_schedule(target, module, name, filterset, options,
661                            moptions_used ? moptions : NULL,
662                            period, timeout, oncheck[0] ? oncheck : NULL,
663                            config_seq, flags, uuid, out_uuid);
664       mtevL(noit_debug, "loaded uuid: %s\n", uuid_str);
665     }
666     for(ridx=0; ridx<reg_module_id; ridx++) {
667       if(moptions[ridx]) {
668         mtev_hash_destroy(moptions[ridx], free, free);
669         free(moptions[ridx]);
670       }
671     }
672     mtev_hash_destroy(options, free, free);
673     free(options);
674   }
675   if(sec) free(sec);
676 }
677
678 int
679 noit_check_activate(noit_check_t *check) {
680   noit_module_t *mod;
681   if(NOIT_CHECK_LIVE(check)) return 0;
682   mod = noit_module_lookup(check->module);
683   if(mod && mod->initiate_check) {
684     if((check->flags & NP_DISABLED) == 0) {
685       mod->initiate_check(mod, check, 0, NULL);
686       return 1;
687     }
688     else
689       mtevL(noit_debug, "Skipping %s`%s, disabled.\n",
690             check->target, check->name);
691   }
692   else {
693     if(!mod) {
694       mtevL(noit_stderr, "Cannot find module '%s'\n", check->module);
695       check->flags |= NP_DISABLED;
696     }
697   }
698   return 0;
699 }
700
701 void
702 noit_poller_initiate() {
703   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
704   uuid_t key_id;
705   int klen;
706   void *vcheck;
707   /* This is only ever called in the beginning, no lock needed */
708   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
709                        &vcheck)) {
710     noit_check_activate((noit_check_t *)vcheck);
711     mtev_watchdog_child_heartbeat();
712   }
713 }
714
715 void
716 noit_poller_flush_epoch(int oldest_allowed) {
717   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
718   uuid_t key_id;
719   int klen, i;
720   void *vcheck;
721 #define TOFREE_PER_ITER 1024
722   noit_check_t *tofree[TOFREE_PER_ITER];
723
724   /* Cleanup any previous causal map */
725   while(1) {
726     i = 0;
727     pthread_mutex_lock(&polls_lock);
728     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
729                          &vcheck) && i < TOFREE_PER_ITER) {
730       noit_check_t *check = (noit_check_t *)vcheck;
731       if(check->generation < oldest_allowed) {
732         tofree[i++] = check;
733       }
734     }
735     pthread_mutex_unlock(&polls_lock);
736     if(i==0) break;
737     while(i>0) noit_poller_deschedule(tofree[--i]->checkid);
738   }
739 #undef TOFREE_PER_ITER
740 }
741
742 void
743 noit_poller_make_causal_map() {
744   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
745   uuid_t key_id;
746   int klen;
747   void *vcheck;
748
749   if(!system_needs_causality) return;
750
751   /* set it to false, we'll set it to true during the scan if we
752    * find anything causal.  */
753   system_needs_causality = mtev_false;
754
755   /* Cleanup any previous causal map */
756   pthread_mutex_lock(&polls_lock);
757   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
758                        &vcheck)) {
759     noit_check_t *check = (noit_check_t *)vcheck;
760     dep_list_t *dep;
761     while((dep = check->causal_checks) != NULL) {
762       check->causal_checks = dep->next;
763       free(dep);
764     }
765   }
766
767   memset(&iter, 0, sizeof(iter));
768   /* Walk all checks and add check dependencies to their parents */
769   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
770                        &vcheck)) {
771     noit_check_t *check = (noit_check_t *)vcheck, *parent;
772     if(check->oncheck) {
773       /* This service is causally triggered by another service */
774       uuid_t id;
775       char fullcheck[1024];
776       char *name = check->oncheck;
777       char *target = NULL;
778
779       system_needs_causality = mtev_true;
780       mtevL(noit_debug, "Searching for upstream trigger on %s\n", name);
781       parent = NULL;
782       if(uuid_parse(check->oncheck, id) == 0) {
783         target = "";
784         parent = noit_poller_lookup__nolock(id);
785       }
786       else if((target = strchr(check->oncheck, '`')) != NULL) {
787         strlcpy(fullcheck, check->oncheck, target + 1 - check->oncheck);
788         name = target + 1;
789         target = fullcheck;
790         parent = noit_poller_lookup_by_name__nolock(target, name);
791       }
792       else {
793         target = check->target;
794         parent = noit_poller_lookup_by_name__nolock(target, name);
795       }
796
797       if(!parent) {
798         check->flags |= NP_DISABLED;
799         mtevL(noit_stderr, "Disabling check %s`%s, can't find oncheck %s`%s\n",
800               check->target, check->name, target, name);
801       }
802       else {
803         dep_list_t *dep;
804         dep = malloc(sizeof(*dep));
805         dep->check = check;
806         dep->next = parent->causal_checks;
807         parent->causal_checks = dep;
808         mtevL(noit_debug, "Causal map %s`%s --> %s`%s\n",
809               parent->target, parent->name, check->target, check->name);
810       }
811     }
812   }
813   pthread_mutex_unlock(&polls_lock);
814   /* We found some causal checks, so we might need to activate stuff */
815   if(system_needs_causality) noit_poller_initiate();
816 }
817 void
818 noit_poller_reload(const char *xpath)
819 {
820   noit_poller_process_checks(xpath ? xpath : "/noit/checks//check");
821   if(!xpath) {
822     /* Full reload, we need to wipe old checks */
823     noit_poller_flush_epoch(__config_load_generation);
824   }
825   noit_poller_make_causal_map();
826 }
827 void
828 noit_check_dns_ignore_tld(const char* extension, const char* ignore) {
829   mtev_hash_replace(&dns_ignore_list, strdup(extension), strlen(extension), strdup(ignore), NULL, NULL);
830 }
831 static void
832 noit_check_dns_ignore_list_init() {
833   mtev_conf_section_t* dns;
834   int cnt;
835
836   dns = mtev_conf_get_sections(NULL, "/noit/dns/extension", &cnt);
837   if(dns) {
838     int i = 0;
839     for (i = 0; i < cnt; i++) {
840       char* extension;
841       char* ignore;
842       if(!mtev_conf_get_string(dns[i], "self::node()/@value", &extension)) {
843         continue;
844       }
845       if(!mtev_conf_get_string(dns[i], "self::node()/@ignore", &ignore)) {
846         continue;
847       }
848       noit_check_dns_ignore_tld(extension, ignore);
849     }
850   }
851 }
852 static void
853 noit_check_poller_scheduling_init() {
854   mtev_conf_get_boolean(NULL, "//checks/@priority_scheduling", &priority_scheduling);
855 }
856 void
857 noit_poller_init() {
858   srand48((getpid() << 16) ^ time(NULL));
859   noit_check_poller_scheduling_init();
860   noit_check_resolver_init();
861   noit_check_tools_init();
862   mtev_skiplist_init(&polls_by_name);
863   mtev_skiplist_set_compare(&polls_by_name, __check_name_compare,
864                             __check_name_compare);
865   mtev_skiplist_add_index(&polls_by_name, __check_target_ip_compare,
866                             __check_target_ip_compare);
867   mtev_skiplist_add_index(&polls_by_name, __check_target_compare,
868                             __check_target_compare);
869   mtev_skiplist_init(&watchlist);
870   mtev_skiplist_set_compare(&watchlist, __watchlist_compare,
871                             __watchlist_compare);
872   register_console_check_commands();
873   eventer_name_callback("check_recycle_bin_processor",
874                         check_recycle_bin_processor);
875   eventer_add_in_s_us(check_recycle_bin_processor, NULL, RECYCLE_INTERVAL, 0);
876   mtev_conf_get_int(NULL, "noit/@text_size_limit", &text_size_limit);
877   if (text_size_limit <= 0) {
878     text_size_limit = DEFAULT_TEXT_METRIC_SIZE_LIMIT;
879   }
880   noit_check_dns_ignore_list_init();
881   noit_poller_reload(NULL);
882 }
883
884 int
885 noit_poller_check_count() {
886   return polls_by_name.size;
887 }
888
889 int
890 noit_poller_transient_check_count() {
891   return watchlist.size;
892 }
893
894 noit_check_t *
895 noit_check_clone(uuid_t in) {
896   int i;
897   noit_check_t *checker, *new_check;
898   void *vcheck;
899   if(mtev_hash_retrieve(&polls,
900                         (char *)in, UUID_SIZE,
901                         &vcheck) == 0) {
902     return NULL;
903   }
904   checker = (noit_check_t *)vcheck;
905   if(checker->oncheck) {
906     return NULL;
907   }
908   new_check = calloc(1, sizeof(*new_check));
909   memcpy(new_check, checker, sizeof(*new_check));
910   new_check->target = strdup(new_check->target);
911   new_check->module = strdup(new_check->module);
912   new_check->name = strdup(new_check->name);
913   new_check->filterset = strdup(new_check->filterset);
914   new_check->flags = 0;
915   new_check->fire_event = NULL;
916   memset(&new_check->last_fire_time, 0, sizeof(new_check->last_fire_time));
917   new_check->statistics = noit_check_stats_set_calloc();
918   new_check->closure = NULL;
919   new_check->config = calloc(1, sizeof(*new_check->config));
920   mtev_hash_merge_as_dict(new_check->config, checker->config);
921   new_check->module_configs = NULL;
922   new_check->module_metadata = NULL;
923
924   for(i=0; i<reg_module_id; i++) {
925     void *src_metadata;
926     mtev_hash_table *src_mconfig;
927     src_mconfig = noit_check_get_module_config(checker, i);
928     if(src_mconfig) {
929       mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
930       mtev_hash_merge_as_dict(t, src_mconfig);
931       noit_check_set_module_config(new_check, i, t);
932     }
933     if(checker->flags & NP_PASSIVE_COLLECTION)
934       if(NULL != (src_metadata = noit_check_get_module_metadata(new_check, i)))
935         noit_check_set_module_metadata(new_check, i, src_metadata, NULL);
936   }
937   return new_check;
938 }
939
940 noit_check_t *
941 noit_check_watch(uuid_t in, int period) {
942   /* First look for a copy that is being watched */
943   int minimum_pi = 1000, granularity_pi = 500;
944   mtev_conf_section_t check_node;
945   char uuid_str[UUID_STR_LEN + 1];
946   char xpath[1024];
947   noit_check_t n, *f;
948
949   uuid_unparse_lower(in, uuid_str);
950
951   mtevL(noit_debug, "noit_check_watch(%s,%d)\n", uuid_str, period);
952   if(period == 0) {
953     return noit_poller_lookup(in);
954   }
955
956   /* Find the check */
957   snprintf(xpath, sizeof(xpath), "//checks//check[@uuid=\"%s\"]", uuid_str);
958   check_node = mtev_conf_get_section(NULL, xpath);
959   mtev_conf_get_int(NULL, "//checks/@transient_min_period", &minimum_pi);
960   mtev_conf_get_int(NULL, "//checks/@transient_period_granularity", &granularity_pi);
961   if(check_node) {
962     mtev_conf_get_int(check_node,
963                       "ancestor-or-self::node()/@transient_min_period",
964                       &minimum_pi);
965     mtev_conf_get_int(check_node,
966                       "ancestor-or-self::node()/@transient_period_granularity",
967                       &granularity_pi);
968   }
969
970   /* apply the bounds */
971   period /= granularity_pi;
972   period *= granularity_pi;
973   period = MAX(period, minimum_pi);
974
975   uuid_copy(n.checkid, in);
976   n.period = period;
977
978   f = mtev_skiplist_find(&watchlist, &n, NULL);
979   if(f) return f;
980   f = noit_check_clone(in);
981   if(!f) return NULL;
982   f->period = period;
983   f->timeout = period - 10;
984   f->flags |= NP_TRANSIENT;
985   mtevL(noit_debug, "Watching %s@%d\n", uuid_str, period);
986   mtev_skiplist_insert(&watchlist, f);
987   return f;
988 }
989
990 noit_check_t *
991 noit_check_get_watch(uuid_t in, int period) {
992   noit_check_t n, *f;
993
994   uuid_copy(n.checkid, in);
995   n.period = period;
996
997   f = mtev_skiplist_find(&watchlist, &n, NULL);
998   return f;
999 }
1000
1001 void
1002 noit_check_transient_add_feed(noit_check_t *check, const char *feed) {
1003   char *feedcopy;
1004   if(!check->feeds) {
1005     check->feeds = calloc(1, sizeof(*check->feeds));
1006     mtev_skiplist_init(check->feeds);
1007     mtev_skiplist_set_compare(check->feeds,
1008                               (mtev_skiplist_comparator_t)strcmp,
1009                               (mtev_skiplist_comparator_t)strcmp);
1010   }
1011   feedcopy = strdup(feed);
1012   /* No error on failure -- it's already there */
1013   if(mtev_skiplist_insert(check->feeds, feedcopy) == NULL) free(feedcopy);
1014   mtevL(noit_debug, "check %s`%s @ %dms has %d feed(s): %s.\n",
1015         check->target, check->name, check->period, check->feeds->size, feed);
1016 }
1017 void
1018 noit_check_transient_remove_feed(noit_check_t *check, const char *feed) {
1019   if(!check->feeds) return;
1020   if(feed) {
1021     mtevL(noit_debug, "check %s`%s @ %dms removing 1 of %d feeds: %s.\n",
1022           check->target, check->name, check->period, check->feeds->size, feed);
1023     mtev_skiplist_remove(check->feeds, feed, free);
1024   }
1025   if(check->feeds->size == 0) {
1026     char uuid_str[UUID_STR_LEN + 1];
1027     uuid_unparse_lower(check->checkid, uuid_str);
1028     mtevL(noit_debug, "Unwatching %s@%d\n", uuid_str, check->period);
1029     mtev_skiplist_remove(&watchlist, check, NULL);
1030     mtev_skiplist_destroy(check->feeds, free);
1031     free(check->feeds);
1032     check->feeds = NULL;
1033     if(check->flags & NP_TRANSIENT) {
1034       mtevL(noit_debug, "check %s`%s @ %dms has no more listeners.\n",
1035             check->target, check->name, check->period);
1036       check->flags |= NP_KILLED;
1037     }
1038     noit_poller_free_check(check);
1039   }
1040 }
1041
1042 mtev_boolean
1043 noit_check_is_valid_target(const char *target) {
1044   int8_t family;
1045   int rv;
1046   union {
1047     struct in_addr addr4;
1048     struct in6_addr addr6;
1049   } a;
1050
1051   family = AF_INET;
1052   rv = inet_pton(family, target, &a);
1053   if(rv != 1) {
1054     family = AF_INET6;
1055     rv = inet_pton(family, target, &a);
1056     if(rv != 1) {
1057       return mtev_false;
1058     }
1059   }
1060   return mtev_true;
1061 }
1062 int
1063 noit_check_set_ip(noit_check_t *new_check,
1064                   const char *ip_str, const char *newname) {
1065   int8_t family;
1066   int rv, failed = 0;
1067   char old_target_ip[INET6_ADDRSTRLEN];
1068   union {
1069     struct in_addr addr4;
1070     struct in6_addr addr6;
1071   } a;
1072
1073   memset(old_target_ip, 0, INET6_ADDRSTRLEN);
1074   strlcpy(old_target_ip, new_check->target_ip, sizeof(old_target_ip));
1075
1076   family = NOIT_CHECK_PREFER_V6(new_check) ? AF_INET6 : AF_INET;
1077   rv = inet_pton(family, ip_str, &a);
1078   if(rv != 1) {
1079     if (!NOIT_CHECK_SINGLE_RESOLVE(new_check)) {
1080       family = family == AF_INET ? AF_INET6 : AF_INET;
1081       rv = inet_pton(family, ip_str, &a);
1082       if(rv != 1) {
1083         family = AF_INET;
1084         memset(&a, 0, sizeof(a));
1085         failed = -1;
1086       }
1087     } else {
1088       failed = -1;
1089     }
1090   }
1091
1092   new_check->target_family = family;
1093   memcpy(&new_check->target_addr, &a, sizeof(a));
1094   new_check->target_ip[0] = '\0';
1095   if(failed == 0)
1096     if(inet_ntop(new_check->target_family,
1097                  &new_check->target_addr,
1098                  new_check->target_ip,
1099                  sizeof(new_check->target_ip)) == NULL) {
1100       mtevL(noit_error, "inet_ntop failed [%s] -> %d\n", ip_str, errno);
1101     }
1102   /*
1103    * new_check->name could be null if this check is being set for the
1104    * first time.  add_to_list will set it.
1105    */
1106   if (new_check->name == NULL ||
1107       strcmp(old_target_ip, new_check->target_ip) != 0) {
1108     noit_check_add_to_list(new_check, newname);
1109   }
1110
1111   if(new_check->name == NULL && newname != NULL) {
1112     assert(new_check->flags & NP_TRANSIENT);
1113     new_check->name = strdup(newname);
1114   }
1115
1116   return failed;
1117 }
1118 int
1119 noit_check_resolve(noit_check_t *check) {
1120   uint8_t family_pref = NOIT_CHECK_PREFER_V6(check) ? AF_INET6 : AF_INET;
1121   char ipaddr[INET6_ADDRSTRLEN];
1122   if(!NOIT_CHECK_SHOULD_RESOLVE(check)) return 1; /* success, not required */
1123   noit_check_resolver_remind(check->target);
1124   if(noit_check_resolver_fetch(check->target, ipaddr, sizeof(ipaddr),
1125                                family_pref) >= 0) {
1126     check->flags |= NP_RESOLVED;
1127     noit_check_set_ip(check, ipaddr, NULL);
1128     return 0;
1129   }
1130   check->flags &= ~NP_RESOLVED;
1131   return -1;
1132 }
1133 int
1134 noit_check_update(noit_check_t *new_check,
1135                   const char *target,
1136                   const char *name,
1137                   const char *filterset,
1138                   mtev_hash_table *config,
1139                   mtev_hash_table **mconfigs,
1140                   u_int32_t period,
1141                   u_int32_t timeout,
1142                   const char *oncheck,
1143           int64_t seq,
1144                   int flags) {
1145   char uuid_str[37];
1146   int mask = NP_DISABLED | NP_UNCONFIG;
1147
1148   assert(name);
1149   uuid_unparse_lower(new_check->checkid, uuid_str);
1150   if(!new_check->statistics) new_check->statistics = noit_check_stats_set_calloc();
1151   if(seq < 0) new_check->config_seq = seq = 0;
1152   if(new_check->config_seq > seq) {
1153     mtevL(mtev_error, "noit_check_update[%s] skipped: seq backwards\n", uuid_str);
1154     return -1;
1155   }
1156
1157   /* selfcheck will identify this node in a cluster */
1158   if(mtev_cluster_enabled() && !strcmp(new_check->module, "selfcheck")) {
1159     uuid_t cluster_id;
1160     mtev_cluster_get_self(cluster_id);
1161     if(uuid_compare(cluster_id, new_check->checkid)) {
1162       mtevL(mtev_error, "Setting global cluster identity to '%s'\n", uuid_str);
1163       mtev_cluster_set_self(new_check->checkid);
1164     }
1165   }
1166
1167   if(NOIT_CHECK_RUNNING(new_check)) {
1168     char module[256];
1169     uuid_t id, dummy;
1170     uuid_copy(id, new_check->checkid);
1171     strlcpy(module, new_check->module, sizeof(module));
1172     noit_poller_deschedule(id);
1173     return noit_poller_schedule(target, module, name, filterset,
1174                                 config, mconfigs, period, timeout, oncheck,
1175                                 seq, flags, id, dummy);
1176   }
1177
1178   new_check->generation = __config_load_generation;
1179   if(new_check->target) free(new_check->target);
1180   new_check->target = strdup(target);
1181
1182   // apply resolution flags to check.
1183   if (flags & NP_PREFER_IPV6)
1184     new_check->flags |= NP_PREFER_IPV6;
1185   else
1186     new_check->flags &= ~NP_PREFER_IPV6;
1187   if (flags & NP_SINGLE_RESOLVE)
1188     new_check->flags |= NP_SINGLE_RESOLVE;
1189   else
1190     new_check->flags &= ~NP_SINGLE_RESOLVE;
1191   if (flags & NP_RESOLVE)
1192     new_check->flags |= NP_RESOLVE;
1193   else
1194     new_check->flags &= ~NP_RESOLVE;
1195
1196   /* This sets both the name and the target_addr */
1197   if(noit_check_set_ip(new_check, target, name)) {
1198     mtev_boolean should_resolve;
1199     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1200     const char *key, *value;
1201     int klen;
1202     char* extension = strrchr(target, '.');
1203     new_check->flags |= NP_RESOLVE;
1204     new_check->flags &= ~NP_RESOLVED;
1205     /* If we match any of the extensions we're supposed to ignore,
1206      * don't resolve */
1207     if (extension && (strlen(extension) > 1)) {
1208       while(mtev_hash_next(&dns_ignore_list, &iter, &key, &klen, (void**)&value)) {
1209         if ((!strcmp("true", value)) && (!strcmp(extension+1, key))) {
1210             new_check->flags &= ~NP_RESOLVE;
1211             break;
1212         }
1213       }
1214     }
1215     if(noit_check_should_resolve_targets(&should_resolve) && !should_resolve)
1216       flags |= NP_DISABLED | NP_UNCONFIG;
1217     noit_check_resolve(new_check);
1218   }
1219
1220   if(new_check->filterset) free(new_check->filterset);
1221   new_check->filterset = filterset ? strdup(filterset): NULL;
1222
1223   if(config != NULL) {
1224     mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
1225     const char *k;
1226     int klen;
1227     void *data;
1228     if(new_check->config) mtev_hash_delete_all(new_check->config, free, free);
1229     else new_check->config = calloc(1, sizeof(*new_check->config));
1230     while(mtev_hash_next(config, &iter, &k, &klen, &data)) {
1231       mtev_hash_store(new_check->config, strdup(k), klen, strdup((char *)data));
1232     }
1233   }
1234   if(mconfigs != NULL) {
1235     int i;
1236     for(i=0; i<reg_module_id; i++) {
1237       mtev_hash_table *t;
1238       if(NULL != (t = noit_check_get_module_config(new_check, i))) {
1239         noit_check_set_module_config(new_check, i, NULL);
1240         mtev_hash_destroy(t, free, free);
1241         free(t);
1242       }
1243       if(mconfigs[i]) {
1244         mtev_hash_table *t = calloc(1, sizeof(*new_check->config));
1245         mtev_hash_merge_as_dict(t, mconfigs[i]);
1246         noit_check_set_module_config(new_check, i, t);
1247       }
1248     }
1249   }
1250   if(new_check->oncheck) free(new_check->oncheck);
1251   new_check->oncheck = oncheck ? strdup(oncheck) : NULL;
1252   if(new_check->oncheck) system_needs_causality = mtev_true;
1253   new_check->period = period;
1254   new_check->timeout = timeout;
1255   new_check->config_seq = seq;
1256
1257   /* Unset what could be set.. then set what should be set */
1258   new_check->flags = (new_check->flags & ~mask) | flags;
1259
1260   check_config_fixup_hook_invoke(new_check);
1261
1262   if((new_check->flags & NP_TRANSIENT) == 0)
1263     noit_check_activate(new_check);
1264
1265   noit_check_add_to_list(new_check, NULL);
1266   noit_check_log_check(new_check);
1267   return 0;
1268 }
1269 int
1270 noit_poller_schedule(const char *target,
1271                      const char *module,
1272                      const char *name,
1273                      const char *filterset,
1274                      mtev_hash_table *config,
1275                      mtev_hash_table **mconfigs,
1276                      u_int32_t period,
1277                      u_int32_t timeout,
1278                      const char *oncheck,
1279                      int64_t seq,
1280                      int flags,
1281                      uuid_t in,
1282                      uuid_t out) {
1283   noit_check_t *new_check;
1284   new_check = calloc(1, sizeof(*new_check));
1285   if(!new_check) return -1;
1286
1287   /* The module and the UUID can never be changed */
1288   new_check->module = strdup(module);
1289   if(uuid_is_null(in))
1290     uuid_generate(new_check->checkid);
1291   else
1292     uuid_copy(new_check->checkid, in);
1293
1294   new_check->statistics = noit_check_stats_set_calloc();
1295   noit_check_update(new_check, target, name, filterset, config, mconfigs,
1296                     period, timeout, oncheck, seq, flags);
1297   assert(mtev_hash_store(&polls,
1298                          (char *)new_check->checkid, UUID_SIZE,
1299                          new_check));
1300   uuid_copy(out, new_check->checkid);
1301
1302   return 0;
1303 }
1304
1305 /* A quick little list of recycleable checks.  This list never really
1306  * grows large, so no sense in thinking too hard about the algorithmic
1307  * complexity.
1308  */
1309 struct _checker_rcb {
1310   noit_check_t *checker;
1311   struct _checker_rcb *next;
1312 };
1313 static struct _checker_rcb *checker_rcb = NULL;
1314 static void recycle_check(noit_check_t *checker) {
1315   struct _checker_rcb *n = malloc(sizeof(*n));
1316   n->checker = checker;
1317   n->next = checker_rcb;
1318   checker_rcb = n;
1319 }
1320 void
1321 noit_poller_free_check(noit_check_t *checker) {
1322   noit_module_t *mod;
1323
1324   if(checker->flags & NP_RUNNING) {
1325     recycle_check(checker);
1326     return;
1327   }
1328
1329   mod = noit_module_lookup(checker->module);
1330   if(mod && mod->cleanup) mod->cleanup(mod, checker);
1331   if(checker->fire_event) {
1332      eventer_remove(checker->fire_event);
1333      free(checker->fire_event->closure);
1334      eventer_free(checker->fire_event);
1335      checker->fire_event = NULL;
1336   }
1337   if(checker->closure) free(checker->closure);
1338   if(checker->target) free(checker->target);
1339   if(checker->module) free(checker->module);
1340   if(checker->name) free(checker->name);
1341   if(checker->config) {
1342     mtev_hash_destroy(checker->config, free, free);
1343     free(checker->config);
1344     checker->config = NULL;
1345   }
1346   if(checker->module_metadata) {
1347     int i;
1348     for(i=0; i<reg_module_id; i++) {
1349       struct vp_w_free *tuple;
1350       tuple = checker->module_metadata[i];
1351       if(tuple) {
1352         if(tuple->freefunc) tuple->freefunc(tuple->ptr);
1353         free(tuple);
1354       }
1355     }
1356     free(checker->module_metadata);
1357   }
1358   if(checker->module_configs) {
1359     int i;
1360     for(i=0; i<reg_module_id; i++) {
1361       if(checker->module_configs[i]) {
1362         mtev_hash_destroy(checker->module_configs[i], free, free);
1363         free(checker->module_configs[i]);
1364       }
1365     }
1366     free(checker->module_configs);
1367   }
1368   mtev_memory_safe_free(stats_inprogress(checker));
1369   mtev_memory_safe_free(stats_current(checker));
1370   mtev_memory_safe_free(stats_previous(checker));
1371   free(checker);
1372 }
1373 static int
1374 check_recycle_bin_processor(eventer_t e, int mask, void *closure,
1375                             struct timeval *now) {
1376   static struct timeval one_minute = { RECYCLE_INTERVAL, 0L };
1377   struct _checker_rcb *prev = NULL, *curr = checker_rcb;
1378   mtevL(noit_debug, "Scanning check recycle bin\n");
1379   while(curr) {
1380     if(!(curr->checker->flags & NP_RUNNING)) {
1381       mtevL(noit_debug, "Check is ready to free.\n");
1382       noit_poller_free_check(curr->checker);
1383       if(prev) prev->next = curr->next;
1384       else checker_rcb = curr->next;
1385       free(curr);
1386       curr = prev ? prev->next : checker_rcb;
1387     }
1388     else {
1389       prev = curr;
1390       curr = curr->next;
1391     }
1392   }
1393   add_timeval(*now, one_minute, &e->whence);
1394   return EVENTER_TIMER;
1395 }
1396
1397 int
1398 noit_poller_deschedule(uuid_t in) {
1399   void *vcheck;
1400   noit_check_t *checker;
1401   if(mtev_hash_retrieve(&polls,
1402                         (char *)in, UUID_SIZE,
1403                         &vcheck) == 0) {
1404     return -1;
1405   }
1406   checker = (noit_check_t *)vcheck;
1407   checker->flags |= (NP_DISABLED|NP_KILLED);
1408
1409   noit_check_log_delete(checker);
1410
1411   assert(mtev_skiplist_remove(&polls_by_name, checker, NULL));
1412   assert(mtev_hash_delete(&polls, (char *)in, UUID_SIZE, NULL, NULL));
1413
1414   noit_poller_free_check(checker);
1415   return 0;
1416 }
1417
1418 noit_check_t *
1419 noit_poller_lookup(uuid_t in) {
1420   noit_check_t *check;
1421   pthread_mutex_lock(&polls_lock);
1422   check = noit_poller_lookup__nolock(in);
1423   pthread_mutex_unlock(&polls_lock);
1424   return check;
1425 }
1426 noit_check_t *
1427 noit_poller_lookup_by_name(char *target, char *name) {
1428   noit_check_t *check;
1429   pthread_mutex_lock(&polls_lock);
1430   check = noit_poller_lookup_by_name__nolock(target,name);
1431   pthread_mutex_unlock(&polls_lock);
1432   return check;
1433 }
1434 int
1435 noit_poller_target_ip_do(const char *target_ip,
1436                          int (*f)(noit_check_t *, void *),
1437                          void *closure) {
1438   int i, count = 0, todo_count = 0;
1439   noit_check_t pivot;
1440   mtev_skiplist *tlist;
1441   mtev_skiplist_node *next;
1442   noit_check_t *todo_onstack[8192];
1443   noit_check_t **todo = todo_onstack;
1444
1445   tlist = mtev_skiplist_find(polls_by_name.index,
1446                              __check_target_ip_compare, NULL);
1447
1448   pthread_mutex_lock(&polls_lock);
1449   /* First pass to count */
1450   memset(&pivot, 0, sizeof(pivot));
1451   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1452   pivot.name = "";
1453   pivot.target = "";
1454   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1455   while(next && next->data) {
1456     noit_check_t *check = next->data;
1457     if(strcmp(check->target_ip, target_ip)) break;
1458     todo_count++;
1459     mtev_skiplist_next(tlist, &next);
1460   }
1461
1462   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1463
1464   memset(&pivot, 0, sizeof(pivot));
1465   strlcpy(pivot.target_ip, (char*)target_ip, sizeof(pivot.target_ip));
1466   pivot.name = "";
1467   pivot.target = "";
1468   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1469   while(next && next->data) {
1470     noit_check_t *check = next->data;
1471     if(strcmp(check->target_ip, target_ip)) break;
1472     if(count < todo_count) todo[count++] = check;
1473     mtev_skiplist_next(tlist, &next);
1474   }
1475   pthread_mutex_unlock(&polls_lock);
1476
1477   todo_count = count;
1478   count = 0;
1479   for(i=0;i<todo_count;i++)
1480     count += f(todo[i],closure);
1481
1482   if(todo != todo_onstack) free(todo);
1483   return count;
1484 }
1485 int
1486 noit_poller_target_do(const char *target, int (*f)(noit_check_t *, void *),
1487                       void *closure) {
1488   int i, todo_count = 0, count = 0;
1489   noit_check_t pivot;
1490   mtev_skiplist *tlist;
1491   mtev_skiplist_node *next;
1492   noit_check_t *todo_onstack[8192];
1493   noit_check_t **todo = todo_onstack;
1494
1495   tlist = mtev_skiplist_find(polls_by_name.index,
1496                              __check_target_compare, NULL);
1497
1498   pthread_mutex_lock(&polls_lock);
1499   memset(&pivot, 0, sizeof(pivot));
1500   pivot.name = "";
1501   pivot.target = (char *)target;
1502   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1503   while(next && next->data) {
1504     noit_check_t *check = next->data;
1505     if(strcmp(check->target, target)) break;
1506     todo_count++;
1507     mtev_skiplist_next(tlist, &next);
1508   }
1509
1510   if(todo_count > 8192) todo = malloc(todo_count * sizeof(*todo));
1511
1512   memset(&pivot, 0, sizeof(pivot));
1513   pivot.name = "";
1514   pivot.target = (char *)target;
1515   mtev_skiplist_find_neighbors(tlist, &pivot, NULL, NULL, &next);
1516   while(next && next->data) {
1517     noit_check_t *check = next->data;
1518     if(strcmp(check->target, target)) break;
1519     if(count < todo_count) todo[count++] = check;
1520     mtev_skiplist_next(tlist, &next);
1521   }
1522   pthread_mutex_unlock(&polls_lock);
1523
1524   todo_count = count;
1525   count = 0;
1526   for(i=0;i<todo_count;i++)
1527     count += f(todo[i],closure);
1528
1529   if(todo != todo_onstack) free(todo);
1530   return count;
1531 }
1532
1533 int
1534 noit_poller_do(int (*f)(noit_check_t *, void *),
1535                void *closure) {
1536   mtev_skiplist_node *iter;
1537   int i, count = 0, max_count = 0;
1538   noit_check_t **todo;
1539
1540   if(polls_by_name.size == 0) return 0;
1541
1542   max_count = polls_by_name.size;
1543   todo = malloc(max_count * sizeof(*todo));
1544
1545   pthread_mutex_lock(&polls_lock);
1546   for(iter = mtev_skiplist_getlist(&polls_by_name); iter;
1547       mtev_skiplist_next(&polls_by_name, &iter)) {
1548     if(count < max_count) todo[count++] = (noit_check_t *)iter->data;
1549   }
1550   pthread_mutex_unlock(&polls_lock);
1551
1552   max_count = count;
1553   count = 0;
1554   for(i=0;i<max_count;i++)
1555     count += f(todo[i], closure);
1556   free(todo);
1557   return count;
1558 }
1559
1560 struct ip_module_collector_crutch {
1561   noit_check_t **array;
1562   const char *module;
1563   int idx;
1564   int allocd;
1565 };
1566 static int ip_module_collector(noit_check_t *check, void *cl) {
1567   struct ip_module_collector_crutch *c = cl;
1568   if(c->idx >= c->allocd) return 0;
1569   if(strcmp(check->module, c->module)) return 0;
1570   c->array[c->idx++] = check;
1571   return 1;
1572 }
1573 int
1574 noit_poller_lookup_by_ip_module(const char *ip, const char *mod,
1575                                 noit_check_t **checks, int nchecks) {
1576   struct ip_module_collector_crutch crutch;
1577   crutch.array = checks;
1578   crutch.allocd = nchecks;
1579   crutch.idx = 0;
1580   crutch.module = mod;
1581   return noit_poller_target_ip_do(ip, ip_module_collector, &crutch);
1582 }
1583 int
1584 noit_poller_lookup_by_module(const char *ip, const char *mod,
1585                              noit_check_t **checks, int nchecks) {
1586   struct ip_module_collector_crutch crutch;
1587   crutch.array = checks;
1588   crutch.allocd = nchecks;
1589   crutch.idx = 0;
1590   crutch.module = mod;
1591   return noit_poller_target_do(ip, ip_module_collector, &crutch);
1592 }
1593
1594
1595 int
1596 noit_check_xpath(char *xpath, int len,
1597                  const char *base, const char *arg) {
1598   uuid_t checkid;
1599   int base_trailing_slash;
1600   char argcopy[1024], *target, *module, *name;
1601
1602   base_trailing_slash = (base[strlen(base)-1] == '/');
1603   xpath[0] = '\0';
1604   argcopy[0] = '\0';
1605   if(arg) strlcpy(argcopy, arg, sizeof(argcopy));
1606
1607   if(uuid_parse(argcopy, checkid) == 0) {
1608     /* If they kill by uuid, we'll seek and destroy -- find it anywhere */
1609     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1610              base, base_trailing_slash ? "" : "/", argcopy);
1611   }
1612   else if((module = strchr(argcopy, '`')) != NULL) {
1613     noit_check_t *check;
1614     char uuid_str[37];
1615     target = argcopy;
1616     *module++ = '\0';
1617     if((name = strchr(module+1, '`')) == NULL)
1618       name = module;
1619     else
1620       name++;
1621     check = noit_poller_lookup_by_name(target, name);
1622     if(!check) {
1623       return -1;
1624     }
1625     uuid_unparse_lower(check->checkid, uuid_str);
1626     snprintf(xpath, len, "/noit/checks%s%s/check[@uuid=\"%s\"]",
1627              base, base_trailing_slash ? "" : "/", uuid_str);
1628   }
1629   return strlen(xpath);
1630 }
1631
1632 static int
1633 bad_check_initiate(noit_module_t *self, noit_check_t *check,
1634                    int once, noit_check_t *cause) {
1635   /* self is likely null here -- why it is bad, in fact */
1636   /* this is only suitable to call in one-offs */
1637   struct timeval now;
1638   stats_t *inp;
1639   char buff[256];
1640   if(!once) return -1;
1641   if(!check) return -1;
1642   assert(!(check->flags & NP_RUNNING));
1643   check->flags |= NP_RUNNING;
1644   inp = noit_check_get_stats_inprogress(check);
1645   gettimeofday(&now, NULL);
1646   noit_check_stats_whence(inp, &now);
1647   snprintf(buff, sizeof(buff), "check[%s] implementation offline",
1648            check->module);
1649   noit_check_stats_status(inp, buff);
1650   noit_check_set_stats(check);
1651   check->flags &= ~NP_RUNNING;
1652   return 0;
1653 }
1654 void
1655 noit_check_stats_clear(noit_check_t *check, stats_t *s) {
1656   memset(s, 0, sizeof(*s));
1657   s->state = NP_UNKNOWN;
1658   s->available = NP_UNKNOWN;
1659 }
1660
1661 void
1662 __stats_add_metric(stats_t *newstate, metric_t *m) {
1663   mtev_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name),
1664                     m, NULL, (void (*)(void *))mtev_memory_safe_free);
1665 }
1666
1667 static size_t
1668 noit_metric_sizes(metric_type_t type, const void *value) {
1669   switch(type) {
1670     case METRIC_INT32:
1671     case METRIC_UINT32:
1672       return sizeof(int32_t);
1673     case METRIC_INT64:
1674     case METRIC_UINT64:
1675       return sizeof(int64_t);
1676     case METRIC_DOUBLE:
1677       return sizeof(double);
1678     case METRIC_STRING: {
1679       int len = strlen((char*)value) + 1;
1680       return ((len >= text_size_limit) ? text_size_limit+1 : len);
1681     }
1682     case METRIC_GUESS:
1683       break;
1684   }
1685   assert(type != type);
1686   return 0;
1687 }
1688 static metric_type_t
1689 noit_metric_guess_type(const char *s, void **replacement) {
1690   char *copy, *cp, *trailer, *rpl;
1691   int negative = 0;
1692   metric_type_t type = METRIC_STRING;
1693
1694   if(!s) return METRIC_GUESS;
1695   copy = cp = strdup(s);
1696
1697   /* TRIM the string */
1698   while(*cp && isspace(*cp)) cp++; /* ltrim */
1699   s = cp; /* found a good starting point */
1700   while(*cp) cp++; /* advance to \0 */
1701   cp--; /* back up one */
1702   while(cp > s && isspace(*cp)) *cp-- = '\0'; /* rtrim */
1703
1704   /* Find the first space */
1705   cp = (char *)s;
1706   while(*cp && !isspace(*cp)) cp++;
1707   trailer = cp;
1708   cp--; /* backup one */
1709   if(cp > s && *cp == '%') *cp-- = '\0'; /* chop a last % is there is one */
1710
1711   while(*trailer && isspace(*trailer)) *trailer++ = '\0'; /* rtrim */
1712
1713   /* string was       '  -1.23e-01%  inodes used  ' */
1714   /* copy is (~ = \0) '  -1.23e-01~  inodes used~~' */
1715   /*                     ^           ^              */
1716   /*                     s           trailer        */
1717
1718   /* So, the trailer must not contain numbers */
1719   while(*trailer) { if(isdigit(*trailer)) goto notanumber; trailer++; }
1720
1721   /* And the 's' must be of the form:
1722    *  0) may start with a sign [-+]?
1723    *  1) [1-9][0-9]*
1724    *  2) [0]?.[0-9]+
1725    *  3) 0
1726    *  4) [1-9][0-9]*.[0-9]+
1727    *  5) all of the above ending with e[+-][0-9]+
1728    */
1729    rpl = (char *)s;
1730    /* CASE 0 */
1731    if(s[0] == '-' || s[0] == '+') {
1732      if(s[0] == '-') negative = 1;
1733      s++;
1734    }
1735
1736    if(s[0] == '.') goto decimal; /* CASE 2 */
1737    if(s[0] == '0') { /* CASE 2 & 3 */
1738      s++;
1739      if(!s[0]) goto scanint; /* CASE 3 */
1740      if(s[0] == '.') goto decimal; /* CASE 2 */
1741      goto notanumber;
1742    }
1743    if(s[0] >= '1' && s[0] <= '9') { /* CASE 1 & 4 */
1744      s++;
1745      while(isdigit(s[0])) s++; /* CASE 1 & 4 */
1746      if(!s[0]) goto scanint; /* CASE 1 */
1747      if(s[0] == '.') goto decimal; /* CASE 4 */
1748      goto notanumber;
1749    }
1750    /* Not case 1,2,3,4 */
1751    goto notanumber;
1752
1753   decimal:
1754    s++;
1755    if(!isdigit(s[0])) goto notanumber;
1756    s++;
1757    while(isdigit(s[0])) s++;
1758    if(!s[0]) goto scandouble;
1759    if(s[0] == 'e' || s[0] == 'E') goto exponent; /* CASE 5 */
1760    goto notanumber;
1761
1762   exponent:
1763    s++;
1764    if(s[0] != '-' && s[0] != '+') goto notanumber;
1765    s++;
1766    if(!isdigit(s[0])) goto notanumber;
1767    s++;
1768    while(isdigit(s[0])) s++;
1769    if(!s[0]) goto scandouble;
1770    goto notanumber;
1771
1772  scanint:
1773    if(negative) {
1774      int64_t *v;
1775      v = malloc(sizeof(*v));
1776      *v = strtoll(rpl, NULL, 10);
1777      *replacement = v;
1778      type = METRIC_INT64;
1779      goto alldone;
1780    }
1781    else {
1782      u_int64_t *v;
1783      v = malloc(sizeof(*v));
1784      *v = strtoull(rpl, NULL, 10);
1785      *replacement = v;
1786      type = METRIC_UINT64;
1787      goto alldone;
1788    }
1789  scandouble:
1790    {
1791      double *v;
1792      v = malloc(sizeof(*v));
1793      *v = strtod(rpl, NULL);
1794      *replacement = v;
1795      type = METRIC_DOUBLE;
1796      goto alldone;
1797    }
1798
1799  alldone:
1800  notanumber:
1801   free(copy);
1802   return type;
1803 }
1804
1805 static void
1806 cleanse_metric_name(char *m) {
1807   char *cp;
1808   for(cp = m; *cp; cp++)
1809     if(!isprint(*cp)) *cp=' ';
1810   for(cp--; *cp == ' ' && cp > m; cp--) /* always leave first char */
1811     *cp = '\0';
1812 }
1813
1814 int
1815 noit_stats_populate_metric(metric_t *m, const char *name, metric_type_t type,
1816                            const void *value) {
1817   void *replacement = NULL;
1818
1819   /* If we are passed a null name, we want to quit populating the metric...
1820    * no reason we should ever have a null metric name */
1821   if (!name) {
1822     return -1;
1823   }
1824
1825   m->metric_name = strdup(name);
1826   cleanse_metric_name(m->metric_name);
1827
1828   if(type == METRIC_GUESS)
1829     type = noit_metric_guess_type((char *)value, &replacement);
1830   if(type == METRIC_GUESS) return -1;
1831
1832   m->metric_type = type;
1833
1834   if(replacement)
1835     m->metric_value.vp = replacement;
1836   else if(value) {
1837     size_t len;
1838     len = noit_metric_sizes(type, value);
1839     m->metric_value.vp = malloc(len);
1840     memcpy(m->metric_value.vp, value, len);
1841     if (type == METRIC_STRING) {
1842       m->metric_value.s[len-1] = 0;
1843     }
1844   }
1845   else m->metric_value.vp = NULL;
1846   return 0;
1847 }
1848
1849 metric_t *
1850 noit_stats_get_metric(noit_check_t *check,
1851                       stats_t *newstate, const char *name) {
1852   void *v;
1853   if(newstate == NULL)
1854     newstate = stats_inprogress(check);
1855   if(mtev_hash_retrieve(&newstate->metrics, name, strlen(name), &v))
1856     return (metric_t *)v;
1857   return NULL;
1858 }
1859
1860 void
1861 noit_stats_set_metric(noit_check_t *check,
1862                       const char *name, metric_type_t type,
1863                       const void *value) {
1864   stats_t *c;
1865   metric_t *m = mtev_memory_safe_malloc_cleanup(sizeof(*m), noit_check_safe_free_metric);
1866   memset(m, 0, sizeof(*m));
1867   if(noit_stats_populate_metric(m, name, type, value)) {
1868     mtev_memory_safe_free(m);
1869     return;
1870   }
1871   noit_check_metric_count_add(1);
1872   c = noit_check_get_stats_inprogress(check);
1873   check_stats_set_metric_hook_invoke(check, c, m);
1874   __stats_add_metric(c, m);
1875 }
1876 void
1877 noit_stats_set_metric_coerce(noit_check_t *check,
1878                              const char *name, metric_type_t t,
1879                              const char *v) {
1880   char *endptr;
1881   stats_t *c;
1882   c = noit_check_get_stats_inprogress(check);
1883   if(v == NULL) {
1884    bogus:
1885     check_stats_set_metric_coerce_hook_invoke(check, c, name, t, v, mtev_false);
1886     noit_stats_set_metric(check, name, t, NULL);
1887     return;
1888   }
1889   switch(t) {
1890     case METRIC_STRING:
1891       noit_stats_set_metric(check, name, t, v);
1892       break;
1893     case METRIC_INT32:
1894     {
1895       int32_t val;
1896       val = strtol(v, &endptr, 10);
1897       if(endptr == v) goto bogus;
1898       noit_stats_set_metric(check, name, t, &val);
1899       break;
1900     }
1901     case METRIC_UINT32:
1902     {
1903       u_int32_t val;
1904       val = strtoul(v, &endptr, 10);
1905       if(endptr == v) goto bogus;
1906       noit_stats_set_metric(check, name, t, &val);
1907       break;
1908     }
1909     case METRIC_INT64:
1910     {
1911       int64_t val;
1912       val = strtoll(v, &endptr, 10);
1913       if(endptr == v) goto bogus;
1914       noit_stats_set_metric(check, name, t, &val);
1915       break;
1916     }
1917     case METRIC_UINT64:
1918     {
1919       u_int64_t val;
1920       val = strtoull(v, &endptr, 10);
1921       if(endptr == v) goto bogus;
1922       noit_stats_set_metric(check, name, t, &val);
1923       break;
1924     }
1925     case METRIC_DOUBLE:
1926     {
1927       double val;
1928       val = strtod(v, &endptr);
1929       if(endptr == v) goto bogus;
1930       noit_stats_set_metric(check, name, t, &val);
1931       break;
1932     }
1933     case METRIC_GUESS:
1934       noit_stats_set_metric(check, name, t, v);
1935       break;
1936   }
1937   check_stats_set_metric_coerce_hook_invoke(check, c, name, t, v, mtev_true);
1938 }
1939 void
1940 noit_stats_log_immediate_metric(noit_check_t *check,
1941                                 const char *name, metric_type_t type,
1942                                 void *value) {
1943   struct timeval now;
1944   metric_t *m = mtev_memory_safe_malloc_cleanup(sizeof(*m), noit_check_safe_free_metric);
1945   memset(m, 0, sizeof(*m));
1946   if(noit_stats_populate_metric(m, name, type, value)) {
1947     mtev_memory_safe_free(m);
1948     return;
1949   }
1950   gettimeofday(&now, NULL);
1951   noit_check_log_metric(check, &now, m);
1952   mtev_memory_safe_free(m);
1953 }
1954
1955 void
1956 noit_check_passive_set_stats(noit_check_t *check) {
1957   int i, nwatches = 0;
1958   mtev_skiplist_node *next;
1959   noit_check_t n;
1960   noit_check_t *watches[8192];
1961
1962   uuid_copy(n.checkid, check->checkid);
1963   n.period = 0;
1964
1965   noit_check_set_stats(check);
1966
1967   pthread_mutex_lock(&polls_lock);
1968   mtev_skiplist_find_neighbors(&watchlist, &n, NULL, NULL, &next);
1969   while(next && next->data && nwatches < 8192) {
1970     noit_check_t *wcheck = next->data;
1971     if(uuid_compare(n.checkid, wcheck->checkid)) break;
1972     watches[nwatches++] = wcheck;
1973     mtev_skiplist_next(&watchlist, &next);
1974   }
1975   pthread_mutex_unlock(&polls_lock);
1976
1977   for(i=0;i<nwatches;i++) {
1978     void *backup;
1979     noit_check_t *wcheck = watches[i];
1980     /* Swap the real check's stats into place */
1981     backup = wcheck->statistics;
1982     wcheck->statistics = check->statistics;
1983
1984     if(check_passive_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
1985       /* Write out our status */
1986       noit_check_log_status(wcheck);
1987       /* Write out all metrics */
1988       noit_check_log_metrics(wcheck);
1989     }
1990     /* Swap them back out */
1991     wcheck->statistics = backup;
1992   }
1993 }
1994 void
1995 noit_check_set_stats(noit_check_t *check) {
1996   int report_change = 0;
1997   char *cp;
1998   dep_list_t *dep;
1999   stats_t *old, *prev, *current;
2000
2001   if(check_set_stats_hook_invoke(check) == MTEV_HOOK_ABORT) return;
2002
2003   old = stats_previous(check);
2004   prev = stats_previous(check) = stats_current(check);
2005   current = stats_current(check) = stats_inprogress(check);
2006   stats_inprogress(check) = noit_check_stats_alloc();
2007  
2008   if(old) {
2009     mtev_memory_safe_free(old);
2010   }
2011
2012   if(current) {
2013     for(cp = current->status; cp && *cp; cp++)
2014       if(*cp == '\r' || *cp == '\n') *cp = ' ';
2015   }
2016
2017   /* check for state changes */
2018   if((!current || (current->available != NP_UNKNOWN)) &&
2019      (!prev || (prev->available != NP_UNKNOWN)) &&
2020      (!current || !prev || (current->available != prev->available)))
2021     report_change = 1;
2022   if((!current || (current->state != NP_UNKNOWN)) &&
2023      (!prev || (prev->state != NP_UNKNOWN)) &&
2024      (!current || !prev || (current->state != prev->state)))
2025     report_change = 1;
2026
2027   mtevL(noit_debug, "%s`%s <- [%s]\n", check->target, check->name,
2028         current ? current->status : "null");
2029   if(report_change) {
2030     mtevL(noit_debug, "%s`%s -> [%s:%s]\n",
2031           check->target, check->name,
2032           noit_check_available_string(current ? current->available : NP_UNKNOWN),
2033           noit_check_state_string(current ? current->state : NP_UNKNOWN));
2034   }
2035
2036   if(NOIT_CHECK_STATUS_ENABLED()) {
2037     char id[UUID_STR_LEN+1];
2038     uuid_unparse_lower(check->checkid, id);
2039     NOIT_CHECK_STATUS(id, check->module, check->name, check->target,
2040                       current ? current->available : NP_UNKNOWN,
2041                       current ? current->state : NP_UNKNOWN,
2042                       current ? current->status : "null");
2043   }
2044
2045   if(check_log_stats_hook_invoke(check) == MTEV_HOOK_CONTINUE) {
2046     /* Write out the bundled information */
2047     noit_check_log_bundle(check);
2048   }
2049   /* count the check as complete */
2050   check_completion_count++;
2051
2052   for(dep = check->causal_checks; dep; dep = dep->next) {
2053     noit_module_t *mod;
2054     mod = noit_module_lookup(dep->check->module);
2055     if(!mod) {
2056       bad_check_initiate(mod, dep->check, 1, check);
2057     }
2058     else {
2059       mtevL(noit_debug, "Firing %s`%s in response to %s`%s\n",
2060             dep->check->target, dep->check->name,
2061             check->target, check->name);
2062       if((dep->check->flags & NP_DISABLED) == 0)
2063         if(mod->initiate_check)
2064           mod->initiate_check(mod, dep->check, 1, check);
2065     }
2066   }
2067 }
2068
2069 static int
2070 noit_console_show_watchlist(mtev_console_closure_t ncct,
2071                             int argc, char **argv,
2072                             mtev_console_state_t *dstate,
2073                             void *closure) {
2074   mtev_skiplist_node *iter, *fiter;
2075   int nwatches = 0, i;
2076   noit_check_t *watches[8192];
2077
2078   nc_printf(ncct, "%d active watches.\n", watchlist.size);
2079   pthread_mutex_lock(&polls_lock);
2080   for(iter = mtev_skiplist_getlist(&watchlist); iter && nwatches < 8192;
2081       mtev_skiplist_next(&watchlist, &iter)) {
2082     noit_check_t *check = iter->data;
2083     watches[nwatches++] = check;
2084   }
2085   pthread_mutex_unlock(&polls_lock);
2086
2087   for(i=0;i<nwatches;i++) {
2088     noit_check_t *check = watches[i];
2089     char uuid_str[UUID_STR_LEN + 1];
2090
2091     uuid_unparse_lower(check->checkid, uuid_str);
2092     nc_printf(ncct, "%s:\n\t[%s`%s`%s]\n\tPeriod: %dms\n\tFeeds[%d]:\n",
2093               uuid_str, check->target, check->module, check->name,
2094               check->period, check->feeds ? check->feeds->size : 0);
2095     if(check->feeds && check->feeds->size) {
2096       for(fiter = mtev_skiplist_getlist(check->feeds); fiter;
2097           mtev_skiplist_next(check->feeds, &fiter)) {
2098         nc_printf(ncct, "\t\t%s\n", (const char *)fiter->data);
2099       }
2100     }
2101   }
2102   return 0;
2103 }
2104
2105 static void
2106 nc_printf_check_brief(mtev_console_closure_t ncct,
2107                       noit_check_t *check) {
2108   stats_t *current;
2109   char out[512];
2110   char uuid_str[37];
2111   snprintf(out, sizeof(out), "%s`%s (%s [%x])", check->target, check->name,
2112            check->target_ip, check->flags);
2113   uuid_unparse_lower(check->checkid, uuid_str);
2114   nc_printf(ncct, "%s %s\n", uuid_str, out);
2115   current = stats_current(check);
2116   if(current)
2117     nc_printf(ncct, "\t%s\n", current->status);
2118 }
2119
2120 char *
2121 noit_console_conf_check_opts(mtev_console_closure_t ncct,
2122                              mtev_console_state_stack_t *stack,
2123                              mtev_console_state_t *dstate,
2124                              int argc, char **argv, int idx) {
2125   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2126   uuid_t key_id;
2127   int klen, i = 0;
2128   void *vcheck;
2129
2130   if(argc == 1) {
2131     if(!strncmp("new", argv[0], strlen(argv[0]))) {
2132       if(idx == i) return strdup("new");
2133       i++;
2134     }
2135     pthread_mutex_lock(&polls_lock);
2136     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2137                          &vcheck)) {
2138       noit_check_t *check = (noit_check_t *)vcheck;
2139       char out[512];
2140       char uuid_str[37];
2141       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2142       uuid_unparse_lower(check->checkid, uuid_str);
2143       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2144         if(idx == i) {
2145           pthread_mutex_unlock(&polls_lock);
2146           return strdup(out);
2147         }
2148         i++;
2149       }
2150       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2151         if(idx == i) {
2152           pthread_mutex_unlock(&polls_lock);
2153           return strdup(uuid_str);
2154         }
2155         i++;
2156       }
2157     }
2158     pthread_mutex_unlock(&polls_lock);
2159   }
2160   if(argc == 2) {
2161     cmd_info_t *cmd;
2162     if(!strcmp("new", argv[0])) return NULL;
2163     cmd = mtev_skiplist_find(&dstate->cmds, "attribute", NULL);
2164     if(!cmd) return NULL;
2165     return mtev_console_opt_delegate(ncct, stack, cmd->dstate, argc-1, argv+1, idx);
2166   }
2167   return NULL;
2168 }
2169
2170 char *
2171 noit_console_check_opts(mtev_console_closure_t ncct,
2172                         mtev_console_state_stack_t *stack,
2173                         mtev_console_state_t *dstate,
2174                         int argc, char **argv, int idx) {
2175   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2176   uuid_t key_id;
2177   int klen, i = 0;
2178
2179   if(argc == 1) {
2180     void *vcheck;
2181     pthread_mutex_lock(&polls_lock);
2182     while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2183                          &vcheck)) {
2184       char out[512];
2185       char uuid_str[37];
2186       noit_check_t *check = (noit_check_t *)vcheck;
2187       snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
2188       uuid_unparse_lower(check->checkid, uuid_str);
2189       if(!strncmp(out, argv[0], strlen(argv[0]))) {
2190         if(idx == i) {
2191           pthread_mutex_unlock(&polls_lock);
2192           return strdup(out);
2193         }
2194         i++;
2195       }
2196       if(!strncmp(uuid_str, argv[0], strlen(argv[0]))) {
2197         if(idx == i) {
2198           pthread_mutex_unlock(&polls_lock);
2199           return strdup(uuid_str);
2200         }
2201         i++;
2202       }
2203     }
2204     pthread_mutex_unlock(&polls_lock);
2205   }
2206   if(argc == 2) {
2207     return mtev_console_opt_delegate(ncct, stack, dstate, argc-1, argv+1, idx);
2208   }
2209   return NULL;
2210 }
2211
2212 static int
2213 noit_console_show_checks(mtev_console_closure_t ncct,
2214                          int argc, char **argv,
2215                          mtev_console_state_t *dstate,
2216                          void *closure) {
2217   mtev_hash_iter iter = MTEV_HASH_ITER_ZERO;
2218   uuid_t key_id;
2219   int klen, i = 0, nchecks;
2220   void *vcheck;
2221   noit_check_t **checks;
2222
2223   nchecks = mtev_hash_size(&polls);
2224   if(nchecks == 0) return 0;
2225   checks = malloc(nchecks * sizeof(*checks));
2226
2227   pthread_mutex_lock(&polls_lock);
2228   while(mtev_hash_next(&polls, &iter, (const char **)key_id, &klen,
2229                        &vcheck)) {
2230     if(i<nchecks) checks[i++] = vcheck;
2231   }
2232   pthread_mutex_unlock(&polls_lock);
2233
2234   nchecks = i;
2235   for(i=0;i<nchecks;i++)
2236     nc_printf_check_brief(ncct,checks[i]);
2237
2238   free(checks);
2239   return 0;
2240 }
2241
2242 static int
2243 noit_console_short_checks_sl(mtev_console_closure_t ncct,
2244                              mtev_skiplist *tlist) {
2245   int max_count, i = 0;
2246   noit_check_t **todo;
2247   mtev_skiplist_node *iter;
2248
2249   max_count = tlist->size;
2250   if(max_count == 0) return 0;
2251   todo = malloc(max_count * sizeof(*todo));
2252
2253   pthread_mutex_lock(&polls_lock);
2254   for(iter = mtev_skiplist_getlist(tlist); i < max_count && iter;
2255       mtev_skiplist_next(tlist, &iter)) {
2256     todo[i++] = iter->data;
2257   }
2258   pthread_mutex_unlock(&polls_lock);
2259
2260   max_count = i;
2261   for(i=0;i<max_count;i++)
2262     nc_printf_check_brief(ncct, todo[i]);
2263
2264   free(todo);
2265   return 0;
2266 }
2267 static int
2268 noit_console_show_checks_name(mtev_console_closure_t ncct,
2269                               int argc, char **argv,
2270                               mtev_console_state_t *dstate,
2271                               void *closure) {
2272   return noit_console_short_checks_sl(ncct, &polls_by_name);
2273 }
2274
2275 static int
2276 noit_console_show_checks_target(mtev_console_closure_t ncct,
2277                                    int argc, char **argv,
2278                                    mtev_console_state_t *dstate,
2279                                    void *closure) {
2280   return noit_console_short_checks_sl(ncct,
2281            mtev_skiplist_find(polls_by_name.index,
2282            __check_target_compare, NULL));
2283 }
2284
2285 static int
2286 noit_console_show_checks_target_ip(mtev_console_closure_t ncct,
2287                                    int argc, char **argv,
2288                                    mtev_console_state_t *dstate,
2289                                    void *closure) {
2290   return noit_console_short_checks_sl(ncct,
2291            mtev_skiplist_find(polls_by_name.index,
2292            __check_target_ip_compare, NULL));
2293 }
2294
2295 static void
2296 register_console_check_commands() {
2297   mtev_console_state_t *tl;
2298   cmd_info_t *showcmd;
2299
2300   tl = mtev_console_state_initial();
2301   showcmd = mtev_console_state_get_cmd(tl, "show");
2302   assert(showcmd && showcmd->dstate);
2303
2304   mtev_console_state_add_cmd(showcmd->dstate,
2305     NCSCMD("timing_slots", noit_console_show_timing_slots, NULL, NULL, NULL));
2306
2307   mtev_console_state_add_cmd(showcmd->dstate,
2308     NCSCMD("checks", noit_console_show_checks, NULL, NULL, NULL));
2309
2310   mtev_console_state_add_cmd(showcmd->dstate,
2311     NCSCMD("checks:name", noit_console_show_checks_name, NULL,
2312            NULL, NULL));
2313
2314   mtev_console_state_add_cmd(showcmd->dstate,
2315     NCSCMD("checks:target", noit_console_show_checks_target, NULL,
2316            NULL, NULL));
2317
2318   mtev_console_state_add_cmd(showcmd->dstate,
2319     NCSCMD("checks:target_ip", noit_console_show_checks_target_ip, NULL,
2320            NULL, NULL));
2321
2322   mtev_console_state_add_cmd(showcmd->dstate,
2323     NCSCMD("watches", noit_console_show_watchlist, NULL, NULL, NULL));
2324 }
2325
2326 int
2327 noit_check_register_module(const char *name) {
2328   int i;
2329   for(i=0; i<reg_module_id; i++)
2330     if(!strcmp(reg_module_names[i], name)) return i;
2331   if(reg_module_id >= MAX_MODULE_REGISTRATIONS) return -1;
2332   mtevL(noit_debug, "Registered module %s as %d\n", name, i);
2333   i = reg_module_id++;
2334   reg_module_names[i] = strdup(name);
2335   mtev_conf_set_namespace(reg_module_names[i]);
2336   return i;
2337 }
2338 int
2339 noit_check_registered_module_cnt() {
2340   return reg_module_id;
2341 }
2342 const char *
2343 noit_check_registered_module(int idx) {
2344   if(reg_module_used < 0) reg_module_used = reg_module_id;
2345   assert(reg_module_used == reg_module_id);
2346   if(idx >= reg_module_id || idx < 0) return NULL;
2347   return reg_module_names[idx];
2348 }
2349
2350 void
2351 noit_check_set_module_metadata(noit_check_t *c, int idx, void *md, void (*freefunc)(void *)) {
2352   struct vp_w_free *tuple;
2353   if(reg_module_used < 0) reg_module_used = reg_module_id;
2354   assert(reg_module_used == reg_module_id);
2355   if(idx >= reg_module_id || idx < 0) return;
2356   if(!c->module_metadata) c->module_metadata = calloc(reg_module_id, sizeof(void *));
2357   c->module_metadata[idx] = calloc(1, sizeof(struct vp_w_free));
2358   tuple = c->module_metadata[idx];
2359   tuple->ptr = md;
2360   tuple->freefunc = freefunc;
2361 }
2362 void
2363 noit_check_set_module_config(noit_check_t *c, int idx, mtev_hash_table *config) {
2364   if(reg_module_used < 0) reg_module_used = reg_module_id;
2365   assert(reg_module_used == reg_module_id);
2366   if(idx >= reg_module_id || idx < 0) return;
2367   if(!c->module_configs) c->module_configs = calloc(reg_module_id, sizeof(mtev_hash_table *));
2368   c->module_configs[idx] = config;
2369 }
2370 void *
2371 noit_check_get_module_metadata(noit_check_t *c, int idx) {
2372   struct vp_w_free *tuple;
2373   if(reg_module_used < 0) reg_module_used = reg_module_id;
2374   assert(reg_module_used == reg_module_id);
2375   if(idx >= reg_module_id || idx < 0 || !c->module_metadata) return NULL;
2376   tuple = c->module_metadata[idx];
2377   return tuple ? tuple->ptr : NULL;
2378 }
2379 mtev_hash_table *
2380 noit_check_get_module_config(noit_check_t *c, int idx) {
2381   if(reg_module_used < 0) reg_module_used = reg_module_id;
2382   assert(reg_module_used == reg_module_id);
2383   if(idx >= reg_module_id || idx < 0 || !c->module_configs) return NULL;
2384   return c->module_configs[idx];
2385 }
Note: See TracBrowser for help on using the browser.