root/src/noit_check.c

Revision 7a1324aa50711b4d7f5d81f7e901081a150cf9fe, 15.7 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 6 years ago)

online editing of attributes

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  */
5
6 #include "noit_defines.h"
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <assert.h>
12 #include <netinet/in.h>
13 #include <arpa/inet.h>
14
15 #include "utils/noit_log.h"
16 #include "utils/noit_hash.h"
17 #include "utils/noit_skiplist.h"
18 #include "noit_conf.h"
19 #include "noit_check.h"
20 #include "noit_module.h"
21 #include "noit_console.h"
22 #include "eventer/eventer.h"
23
24 /* 60 seconds of possible stutter */
25 #define MAX_INITIAL_STUTTER (60*1000)
26
27 static noit_hash_table polls = NOIT_HASH_EMPTY;
28 static noit_skiplist polls_by_name = { 0 };
29 static u_int32_t __config_load_generation = 0;
30 struct uuid_dummy {
31   uuid_t foo;
32 };
33
34 static void register_console_check_commands();
35
36 #define UUID_SIZE sizeof(struct uuid_dummy)
37
38 static const char *
39 __noit_check_available_string(int16_t available) {
40   switch(available) {
41     case NP_AVAILABLE:    return "available";
42     case NP_UNAVAILABLE:  return "unavailable";
43     case NP_UNKNOWN:      return "unknown";
44   }
45   return "???";
46 }
47 static const char *
48 __noit_check_state_string(int16_t state) {
49   switch(state) {
50     case NP_GOOD:         return "good";
51     case NP_BAD:          return "bad";
52     case NP_UNKNOWN:      return "unknown";
53   }
54   return "???";
55 }
56 static int __check_name_compare(void *a, void *b) {
57   noit_check_t *ac = a;
58   noit_check_t *bc = b;
59   int rv;
60   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
61   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
62   return 0;
63 }
64 int
65 noit_check_max_initial_stutter() {
66   return MAX_INITIAL_STUTTER;
67 }
68 void
69 noit_check_fake_last_check(noit_check_t *check,
70                            struct timeval *lc, struct timeval *_now) {
71   struct timeval now, period;
72   double r;
73   int offset;
74
75   r = drand48();
76   offset = r * (MIN(MAX_INITIAL_STUTTER, check->period));
77   period.tv_sec = (check->period - offset) / 1000;
78   period.tv_usec = ((check->period - offset) % 1000) * 1000;
79   if(!_now) {
80     gettimeofday(&now, NULL);
81     _now = &now;
82   }
83   sub_timeval(*_now, period, lc);
84 }
85 void
86 noit_poller_process_checks(char *xpath) {
87   int i, cnt = 0;
88   noit_conf_section_t *sec;
89   __config_load_generation++;
90   sec = noit_conf_get_sections(NULL, xpath, &cnt);
91   for(i=0; i<cnt; i++) {
92     noit_check_t *existing_check;
93     char uuid_str[37];
94     char target[256];
95     char module[256];
96     char name[256];
97     char oncheck[1024] = "";
98     int no_period = 0;
99     int no_oncheck = 0;
100     int period = 0, timeout = 0;
101     noit_conf_boolean disabled = noit_false;
102     uuid_t uuid, out_uuid;
103     noit_hash_table *options;
104
105 #define NEXT(...) noitL(noit_stderr, __VA_ARGS__); continue
106 #define MYATTR(type,a,...) noit_conf_get_##type(sec[i], "@" #a, __VA_ARGS__)
107 #define INHERIT(type,a,...) \
108   noit_conf_get_##type(sec[i], "ancestor-or-self::node()/@" #a, __VA_ARGS__)
109
110     if(!MYATTR(stringbuf, uuid, uuid_str, sizeof(uuid_str))) {
111       noitL(noit_stderr, "check %d has no uuid\n", i+1);
112       continue;
113     }
114
115     if(uuid_parse(uuid_str, uuid)) {
116       noitL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str);
117       continue;
118     }
119
120     if(!INHERIT(stringbuf, target, target, sizeof(target))) {
121       noitL(noit_stderr, "check uuid: '%s' has no target\n", uuid_str);
122       disabled = noit_true;
123     }
124     if(!INHERIT(stringbuf, module, module, sizeof(module))) {
125       noitL(noit_stderr, "check uuid: '%s' has no module\n", uuid_str);
126       disabled = noit_true;
127     }
128
129     if(!MYATTR(stringbuf, name, name, sizeof(name)))
130       strlcpy(name, module, sizeof(name));
131
132     if(!INHERIT(int, period, &period) || period == 0)
133       no_period = 1;
134
135     if(!INHERIT(stringbuf, oncheck, oncheck, sizeof(oncheck)) || !oncheck[0])
136       no_oncheck = 1;
137
138     if(no_period && no_oncheck) {
139       noitL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n",
140             uuid_str);
141       disabled = noit_true;
142     }
143     if(!(no_period || no_oncheck)) {
144       noitL(noit_stderr, "check uuid: '%s' has oncheck and period.\n",
145             uuid_str);
146       disabled = noit_true;
147     }
148     if(!INHERIT(int, timeout, &timeout)) {
149       noitL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str);
150       disabled = noit_true;
151     }
152     if(!no_period && timeout >= period) {
153       noitL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str);
154       timeout = period/2;
155     }
156     options = noit_conf_get_hash(sec[i], "ancestor-or-self::node()/config/*");
157
158     if(noit_hash_retrieve(&polls, (char *)uuid, UUID_SIZE,
159                           (void **)&existing_check)) {
160       noit_check_update(existing_check, target, name, options,
161                            period, timeout, oncheck[0] ? oncheck : NULL,
162                            disabled);
163       noitL(noit_debug, "reloaded uuid: %s\n", uuid_str);
164     }
165     else {
166       noit_poller_schedule(target, module, name, options,
167                            period, timeout, oncheck[0] ? oncheck : NULL,
168                            disabled, uuid, out_uuid);
169       noitL(noit_debug, "loaded uuid: %s\n", uuid_str);
170     }
171   }
172 }
173
174 void
175 noit_poller_load_checks() {
176   noit_poller_process_checks("/noit/checks//check");
177 }
178
179 void
180 noit_poller_initiate() {
181   noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
182   uuid_t key_id;
183   int klen;
184   noit_check_t *check;
185   while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen,
186                        (void **)&check)) {
187     noit_module_t *mod;
188     mod = noit_module_lookup(check->module);
189     if(mod) {
190       if(NOIT_CHECK_LIVE(check))
191         continue;
192       if((check->flags & NP_DISABLED) == 0)
193         mod->initiate_check(mod, check, 0, NULL);
194       else
195         noitL(noit_debug, "Skipping %s`%s, disabled.\n",
196               check->target, check->name);
197     }
198     else {
199       noitL(noit_stderr, "Cannot find module '%s'\n", check->module);
200       check->flags |= NP_DISABLED;
201     }
202   }
203 }
204
205 void
206 noit_poller_make_causal_map() {
207   noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
208   uuid_t key_id;
209   int klen;
210   noit_check_t *check, *parent;
211   while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen,
212                        (void **)&check)) {
213     if(check->oncheck) {
214       /* This service is causally triggered by another service */
215       char fullcheck[1024];
216       char *name = check->oncheck;
217       char *target = NULL;
218
219       if((target = strchr(check->oncheck, '`')) != NULL) {
220         strlcpy(fullcheck, check->oncheck, target - check->oncheck);
221         name = target + 1;
222         target = fullcheck;
223       }
224       else
225        target = check->target;
226
227       parent = noit_poller_lookup_by_name(target, name);
228       if(!parent) {
229         check->flags |= NP_DISABLED;
230         noitL(noit_stderr, "Disabling check %s`%s, can't find oncheck %s`%s\n",
231               check->target, check->name, target, name);
232       }
233       else {
234         dep_list_t *dep;
235         dep = malloc(sizeof(*dep));
236         dep->check = check;
237         dep->next = parent->causal_checks;
238         parent->causal_checks = dep;
239         noitL(noit_debug, "Causal map %s`%s --> %s`%s\n",
240               parent->target, parent->name, check->target, check->name);
241       }
242     }
243   }
244 }
245 void
246 noit_poller_init() {
247   noit_skiplist_init(&polls_by_name);
248   noit_skiplist_set_compare(&polls_by_name, __check_name_compare,
249                             __check_name_compare);
250   noit_poller_load_checks();
251   noit_poller_make_causal_map();
252   register_console_check_commands();
253   noit_poller_initiate();
254 }
255
256 int
257 noit_check_update(noit_check_t *new_check,
258                   const char *target,
259                   const char *name,
260                   noit_hash_table *config,
261                   u_int32_t period,
262                   u_int32_t timeout,
263                   const char *oncheck,
264                   noit_conf_boolean disabled) {
265   int8_t family;
266   int rv;
267   union {
268     struct in_addr addr4;
269     struct in6_addr addr6;
270   } a;
271
272
273   family = AF_INET;
274   rv = inet_pton(family, target, &a);
275   if(rv != 1) {
276     family = AF_INET6;
277     rv = inet_pton(family, target, &a);
278     if(rv != 1) {
279       noitL(noit_stderr, "Cannot translate '%s' to IP\n", target);
280       memset(&a, 0, sizeof(a));
281       disabled = noit_true;
282     }
283   }
284
285   new_check->generation = __config_load_generation;
286   new_check->target_family = family;
287   memcpy(&new_check->target_addr, &a, sizeof(a));
288   if(new_check->target) free(new_check->target);
289   new_check->target = strdup(target);
290   if(new_check->name) free(new_check->name);
291   new_check->name = name ? strdup(name): NULL;
292
293   if(config != NULL) {
294     noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
295     const char *k;
296     int klen;
297     void *data;
298     if(new_check->config) noit_hash_delete_all(new_check->config, free, free);
299     else new_check->config = calloc(1, sizeof(*new_check->config));
300     while(noit_hash_next(config, &iter, &k, &klen, &data)) {
301       noit_hash_store(new_check->config, strdup(k), klen, strdup((char *)data));
302     }
303   }
304   if(new_check->oncheck) free(new_check->oncheck);
305   new_check->oncheck = oncheck ? strdup(oncheck) : NULL;
306   new_check->period = period;
307   new_check->timeout = timeout;
308
309   if(disabled) new_check->flags |= NP_DISABLED;
310
311   /* This remove could fail -- no big deal */
312   noit_skiplist_remove(&polls_by_name, new_check, NULL);
313
314   /* This insert could fail.. which means we have a conflict on
315    * target`name.  That should result in the check being disabled. */
316   if(!noit_skiplist_insert(&polls_by_name, new_check)) {
317     noitL(noit_stderr, "Check %s`%s disabled due to naming conflict\n",
318           new_check->target, new_check->name);
319     new_check->flags |= NP_DISABLED;
320   }
321   return 0;
322 }
323 int
324 noit_poller_schedule(const char *target,
325                      const char *module,
326                      const char *name,
327                      noit_hash_table *config,
328                      u_int32_t period,
329                      u_int32_t timeout,
330                      const char *oncheck,
331                      noit_conf_boolean disabled,
332                      uuid_t in,
333                      uuid_t out) {
334   noit_check_t *new_check;
335   new_check = calloc(1, sizeof(*new_check));
336   if(!new_check) return -1;
337
338   /* The module and the UUID can never be changed */
339   new_check->module = strdup(module);
340   if(uuid_is_null(in))
341     uuid_generate(new_check->checkid);
342   else
343     uuid_copy(new_check->checkid, in);
344
345   noit_check_update(new_check, target, name, config,
346                     period, timeout, oncheck, disabled);
347   assert(noit_hash_store(&polls,
348                          (char *)new_check->checkid, UUID_SIZE,
349                          new_check));
350   uuid_copy(out, new_check->checkid);
351
352   return 0;
353 }
354
355 int
356 noit_poller_deschedule(uuid_t in) {
357   noit_check_t *checker;
358   if(noit_hash_retrieve(&polls,
359                         (char *)in, UUID_SIZE,
360                         (void **)&checker) == 0) {
361     return -1;
362   }
363   if(checker->flags & NP_RUNNING) {
364     checker->flags |= NP_KILLED;
365     return 0;
366   }
367   checker->flags |= NP_KILLED;
368   if(checker->fire_event) {
369      eventer_remove(checker->fire_event);
370      eventer_free(checker->fire_event);
371      checker->fire_event = NULL;
372   }
373   noit_hash_delete(&polls, (char *)in, UUID_SIZE, free, free);
374
375   if(checker->target) free(checker->target);
376   if(checker->module) free(checker->module);
377   if(checker->name) free(checker->name);
378   if(checker->config) {
379     noit_hash_destroy(checker->config, free, free);
380     free(checker->config);
381     checker->config = NULL;
382   }
383   free(checker);
384   return 0;
385 }
386
387 noit_check_t *
388 noit_poller_lookup(uuid_t in) {
389   noit_check_t *check;
390   if(noit_hash_retrieve(&polls,
391                         (char *)in, UUID_SIZE,
392                         (void **)&check)) {
393     return check;
394   }
395   return NULL;
396 }
397 noit_check_t *
398 noit_poller_lookup_by_name(char *target, char *name) {
399   noit_check_t *check, *tmp_check;
400   tmp_check = calloc(1, sizeof(*tmp_check));
401   tmp_check->target = target;
402   tmp_check->name = name;
403   check = noit_skiplist_find(&polls_by_name, tmp_check, NULL);
404   free(tmp_check);
405   return check;
406 }
407
408 static void
409 __free_metric(void *vm) {
410   metric_t *m = vm;
411   free(m->metric_name);
412   if(m->metric_value.i) free(m->metric_value.i);
413 }
414
415 void
416 __stats_add_metric(stats_t *newstate, metric_t *m) {
417   noit_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name),
418                     m, NULL, __free_metric);
419 }
420
421 void
422 noit_stats_set_metric_int(stats_t *newstate, char *name, int *value) {
423   metric_t *m = calloc(1, sizeof(*m));
424   m->metric_name = strdup(name);
425   m->metric_type = METRIC_INT;
426   if(value) {
427     m->metric_value.i = malloc(sizeof(*value));
428     *(m->metric_value.i) = *value;
429   }
430   __stats_add_metric(newstate, m);
431 }
432
433 void
434 noit_stats_set_metric_float(stats_t *newstate, char *name, float *value) {
435   metric_t *m = calloc(1, sizeof(*m));
436   m->metric_name = strdup(name);
437   m->metric_type = METRIC_FLOAT;
438   if(value) {
439     m->metric_value.f = malloc(sizeof(*value));
440     *(m->metric_value.f) = *value;
441   }
442   __stats_add_metric(newstate, m);
443 }
444
445 void
446 noit_stats_set_metric_string(stats_t *newstate, char *name, char *value) {
447   metric_t *m = calloc(1, sizeof(*m));
448   m->metric_name = strdup(name);
449   m->metric_type = METRIC_STRING;
450   m->metric_value.s = value ? strdup(value) : NULL;
451   __stats_add_metric(newstate, m);
452 }
453
454 void
455 noit_check_set_stats(struct _noit_module *module,
456                      noit_check_t *check, stats_t *newstate) {
457   int report_change = 0;
458   dep_list_t *dep;
459   if(check->stats.previous.status)
460     free(check->stats.previous.status);
461   noit_hash_destroy(&check->stats.previous.metrics, NULL, __free_metric);
462   memcpy(&check->stats.previous, &check->stats.current, sizeof(stats_t));
463   memcpy(&check->stats.current, newstate, sizeof(stats_t));
464   if(check->stats.current.status)
465     check->stats.current.status = strdup(check->stats.current.status);
466
467   /* check for state changes */
468   if(check->stats.current.available != 0 &&
469      check->stats.previous.available != 0 &&
470      check->stats.current.available != check->stats.previous.available)
471     report_change = 1;
472   if(check->stats.current.state != 0 &&
473      check->stats.previous.state != 0 &&
474      check->stats.current.state != check->stats.previous.state)
475     report_change = 1;
476
477   noitL(noit_error, "%s`%s <- [%s]\n", check->target, check->name,
478         check->stats.current.status);
479   if(report_change) {
480     noitL(noit_error, "%s`%s -> [%s:%s]\n",
481           check->target, check->name,
482           __noit_check_available_string(check->stats.current.available),
483           __noit_check_state_string(check->stats.current.state));
484   }
485   for(dep = check->causal_checks; dep; dep = dep->next) {
486     noit_module_t *mod;
487     mod = noit_module_lookup(dep->check->module);
488     assert(mod);
489     noitL(noit_debug, "Firing %s`%s in response to %s`%s\n",
490           dep->check->target, dep->check->name,
491           check->target, check->name);
492     mod->initiate_check(mod, dep->check, 1, check);
493   }
494 }
495
496 static void
497 nc_printf_check_brief(noit_console_closure_t ncct,
498                       noit_check_t *check) {
499   char out[512];
500   char uuid_str[37];
501   snprintf(out, sizeof(out), "%s`%s", check->target, check->name);
502   uuid_unparse_lower(check->checkid, uuid_str);
503   nc_printf(ncct, "%s %s\n", uuid_str, out);
504   if(check->stats.current.status)
505     nc_printf(ncct, "\t%s\n", check->stats.current.status);
506 }
507
508 static int
509 noit_console_show_checks(noit_console_closure_t ncct,
510                          int argc, char **argv,
511                          noit_console_state_t *dstate,
512                          void *closure) {
513   struct timeval _now;
514   noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
515   uuid_t key_id;
516   int klen;
517   noit_check_t *check;
518
519   gettimeofday(&_now, NULL);
520   while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen,
521                        (void **)&check)) {
522     nc_printf_check_brief(ncct, check);
523   }
524   return 0;
525 }
526
527 static void
528 register_console_check_commands() {
529   noit_console_state_t *tl;
530   cmd_info_t *showcmd;
531
532   tl = noit_console_state_initial();
533   showcmd = noit_console_state_get_cmd(tl, "show");
534   assert(showcmd && showcmd->dstate);
535
536   noit_console_state_add_cmd(showcmd->dstate,
537     NCSCMD("checks", noit_console_show_checks, NULL, NULL));
538 }
539
Note: See TracBrowser for help on using the browser.