root/src/noit_check.c

Revision a2e53dc0c9ff0d9e50eb095b926924850a6c8ff8, 12.3 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 6 years ago)

implement staggered start skilz

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  */
5
6 #include "noit_defines.h"
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <assert.h>
12 #include <netinet/in.h>
13 #include <arpa/inet.h>
14
15 #include "utils/noit_log.h"
16 #include "utils/noit_hash.h"
17 #include "utils/noit_skiplist.h"
18 #include "noit_conf.h"
19 #include "noit_check.h"
20 #include "noit_module.h"
21 #include "eventer/eventer.h"
22
23 /* 60 seconds of possible stutter */
24 #define MAX_INITIAL_STUTTER (60*1000)
25
26 static noit_hash_table polls = NOIT_HASH_EMPTY;
27 static noit_skiplist polls_by_name = { 0 };
28 static u_int32_t __config_load_generation = 0;
29 struct uuid_dummy {
30   uuid_t foo;
31 };
32
33 #define UUID_SIZE sizeof(struct uuid_dummy)
34
35 static const char *
36 __noit_check_available_string(int16_t available) {
37   switch(available) {
38     case NP_AVAILABLE:    return "available";
39     case NP_UNAVAILABLE:  return "unavailable";
40     case NP_UNKNOWN:      return "unknown";
41   }
42   return "???";
43 }
44 static const char *
45 __noit_check_state_string(int16_t state) {
46   switch(state) {
47     case NP_GOOD:         return "good";
48     case NP_BAD:          return "bad";
49     case NP_UNKNOWN:      return "unknown";
50   }
51   return "???";
52 }
53 static int __check_name_compare(void *a, void *b) {
54   noit_check_t *ac = a;
55   noit_check_t *bc = b;
56   int rv;
57   if((rv = strcmp(ac->target, bc->target)) != 0) return rv;
58   if((rv = strcmp(ac->name, bc->name)) != 0) return rv;
59   return 0;
60 }
61 int
62 noit_check_max_initial_stutter() {
63   return MAX_INITIAL_STUTTER;
64 }
65 void
66 noit_check_fake_last_check(noit_check_t *check,
67                            struct timeval *lc, struct timeval *_now) {
68   struct timeval now, period;
69   double r;
70   int offset;
71
72   r = drand48();
73   offset = r * (MIN(MAX_INITIAL_STUTTER, check->period));
74   period.tv_sec = (check->period - offset) / 1000;
75   period.tv_usec = ((check->period - offset) % 1000) * 1000;
76   if(!_now) {
77     gettimeofday(&now, NULL);
78     _now = &now;
79   }
80   sub_timeval(*_now, period, lc);
81 }
82 void
83 noit_poller_load_checks() {
84   int i, cnt = 0;
85   noit_conf_section_t *sec;
86   __config_load_generation++;
87   sec = noit_conf_get_sections(NULL, "/noit/checks//check", &cnt);
88   for(i=0; i<cnt; i++) {
89     char uuid_str[37];
90     char target[256];
91     char module[256];
92     char name[256];
93     char oncheck[1024];
94     int no_period = 0;
95     int no_oncheck = 0;
96     int period = 0, timeout = 0;
97     uuid_t uuid, out_uuid;
98     noit_hash_table *options;
99
100     if(!noit_conf_get_stringbuf(sec[i], "@uuid",
101                                 uuid_str, sizeof(uuid_str))) {
102       noitL(noit_stderr, "check %d has no uuid\n", i+1);
103       continue;
104     }
105     if(uuid_parse(uuid_str, uuid)) {
106       noitL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str);
107       continue;
108     }
109     if(!noit_conf_get_stringbuf(sec[i], "ancestor-or-self::node()/target", target, sizeof(target))) {
110       noitL(noit_stderr, "check uuid: '%s' has no target\n",
111             uuid_str);
112       continue;
113     }
114     if(!noit_conf_get_stringbuf(sec[i], "ancestor-or-self::node()/module", module, sizeof(module))) {
115       noitL(noit_stderr, "check uuid: '%s' has no module\n",
116             uuid_str);
117       continue;
118     }
119     if(!noit_conf_get_stringbuf(sec[i], "name", name, sizeof(name))) {
120       strcpy(name, module);
121     }
122     if(!noit_conf_get_int(sec[i], "ancestor-or-self::node()/period", &period)) {
123       no_period = 1;
124     }
125     if(!noit_conf_get_stringbuf(sec[i], "ancestor-or-self::node()/oncheck", oncheck, sizeof(oncheck))) {
126       oncheck[0] = '\0';
127       no_oncheck = 1;
128     }
129     if(no_period && no_oncheck) {
130       noitL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n",
131             uuid_str);
132       continue;
133     }
134     if(!(no_period || no_oncheck)) {
135       noitL(noit_stderr, "check uuid: '%s' has has on check and period.\n",
136             uuid_str);
137       continue;
138     }
139     if(!noit_conf_get_int(sec[i], "ancestor-or-self::node()/timeout", &timeout)) {
140       noitL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str);
141       continue;
142     }
143     if(!no_period && timeout >= period) {
144       noitL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str);
145       timeout = period/2;
146     }
147     options = noit_conf_get_hash(sec[i], "ancestor-or-self::node()/config/*");
148     noit_poller_schedule(target, module, name, options,
149                          period, timeout, oncheck[0] ? oncheck : NULL,
150                          uuid, out_uuid);
151     noitL(noit_debug, "loaded uuid: %s\n", uuid_str);
152   }
153 }
154
155 void
156 noit_poller_initiate() {
157   noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
158   uuid_t key_id;
159   int klen;
160   noit_check_t *check;
161   while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen,
162                        (void **)&check)) {
163     noit_module_t *mod;
164     mod = noit_module_lookup(check->module);
165     if(mod) {
166       if((check->flags & NP_DISABLED) == 0)
167         mod->initiate_check(mod, check, 0, NULL);
168     }
169     else {
170       noitL(noit_stderr, "Cannot find module '%s'\n", check->module);
171       check->flags |= NP_DISABLED;
172     }
173   }
174 }
175
176 void
177 noit_poller_make_causal_map() {
178   noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
179   uuid_t key_id;
180   int klen;
181   noit_check_t *check, *parent;
182   while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen,
183                        (void **)&check)) {
184     if(check->oncheck) {
185       /* This service is causally triggered by another service */
186       char fullcheck[1024];
187       char *name = check->oncheck;
188       char *target = NULL;
189
190       if((target = strchr(check->oncheck, '`')) != NULL) {
191         strlcpy(fullcheck, check->oncheck, target - check->oncheck);
192         name = target + 1;
193         target = fullcheck;
194       }
195       else
196        target = check->target;
197
198       parent = noit_poller_lookup_by_name(target, name);
199       if(!parent) {
200         check->flags |= NP_DISABLED;
201         noitL(noit_stderr, "Disabling check %s/%s, can't find oncheck %s/%s\n",
202               check->target, check->name, target, name);
203       }
204       else {
205         dep_list_t *dep;
206         dep = malloc(sizeof(*dep));
207         dep->check = check;
208         dep->next = parent->causal_checks;
209         parent->causal_checks = dep;
210       }
211     }
212   }
213 }
214 void
215 noit_poller_init() {
216   noit_skiplist_init(&polls_by_name);
217   noit_skiplist_set_compare(&polls_by_name, __check_name_compare,
218                             __check_name_compare);
219   noit_poller_load_checks();
220   noit_poller_make_causal_map();
221   noit_poller_initiate();
222 }
223
224 int
225 noit_poller_schedule(const char *target,
226                      const char *module,
227                      const char *name,
228                      noit_hash_table *config,
229                      u_int32_t period,
230                      u_int32_t timeout,
231                      const char *oncheck,
232                      uuid_t in,
233                      uuid_t out) {
234   int8_t family;
235   int rv;
236   union {
237     struct in_addr addr4;
238     struct in6_addr addr6;
239   } a;
240   noit_check_t *new_check;
241
242
243   family = AF_INET;
244   rv = inet_pton(family, target, &a);
245   if(rv != 1) {
246     family = AF_INET6;
247     rv = inet_pton(family, target, &a);
248     if(rv != 1) {
249       noitL(noit_stderr, "Cannot translate '%s' to IP\n", target);
250       return -1;
251     }
252   }
253
254   new_check = calloc(1, sizeof(*new_check));
255   if(!new_check) return -1;
256   new_check->generation = __config_load_generation;
257   new_check->target_family = family;
258   memcpy(&new_check->target_addr, &a, sizeof(a));
259   new_check->target = strdup(target);
260   new_check->module = strdup(module);
261   new_check->name = name ? strdup(name): NULL;
262
263   if(config != NULL) {
264     noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
265     const char *k;
266     int klen;
267     void *data;
268     new_check->config = calloc(1, sizeof(*new_check->config));
269     while(noit_hash_next(config, &iter, &k, &klen, &data)) {
270       noit_hash_store(new_check->config, strdup(k), klen, strdup((char *)data));
271     }
272   }
273   new_check->oncheck = oncheck ? strdup(oncheck) : NULL;
274   new_check->period = period;
275   new_check->timeout = timeout;
276   new_check->flags = 0;
277   if(uuid_is_null(in))
278     uuid_generate(new_check->checkid);
279   else
280     uuid_copy(new_check->checkid, in);
281
282   assert(noit_hash_store(&polls,
283                          (char *)new_check->checkid, UUID_SIZE,
284                          new_check));
285   noit_skiplist_insert(&polls_by_name, new_check);
286   uuid_copy(out, new_check->checkid);
287   return 0;
288 }
289
290 int
291 noit_poller_deschedule(uuid_t in) {
292   noit_check_t *checker;
293   if(noit_hash_retrieve(&polls,
294                         (char *)in, UUID_SIZE,
295                         (void **)&checker) == 0) {
296     return -1;
297   }
298   if(checker->flags & NP_RUNNING) {
299     checker->flags |= NP_KILLED;
300     return 0;
301   }
302   if(checker->fire_event) {
303      eventer_remove(checker->fire_event);
304      eventer_free(checker->fire_event);
305      checker->fire_event = NULL;
306   }
307   noit_hash_delete(&polls, (char *)in, UUID_SIZE, free, free);
308
309   if(checker->target) free(checker->target);
310   if(checker->module) free(checker->module);
311   if(checker->name) free(checker->name);
312   if(checker->config) {
313     noit_hash_destroy(checker->config, free, free);
314     free(checker->config);
315     checker->config = NULL;
316   }
317   free(checker);
318   return 0;
319 }
320
321 noit_check_t *
322 noit_poller_lookup(uuid_t in) {
323   noit_check_t *check;
324   if(noit_hash_retrieve(&polls,
325                         (char *)in, UUID_SIZE,
326                         (void **)&check)) {
327     return check;
328   }
329   return NULL;
330 }
331 noit_check_t *
332 noit_poller_lookup_by_name(char *target, char *name) {
333   noit_check_t *check, *tmp_check;
334   tmp_check = calloc(1, sizeof(*tmp_check));
335   tmp_check->target = target;
336   tmp_check->name = name;
337   check = noit_skiplist_find(&polls_by_name, tmp_check, NULL);
338   free(tmp_check);
339   return check;
340 }
341
342 static void
343 __free_metric(void *vm) {
344   metric_t *m = vm;
345   free(m->metric_name);
346   if(m->metric_value.i) free(m->metric_value.i);
347 }
348
349 void
350 __stats_add_metric(stats_t *newstate, metric_t *m) {
351   noit_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name),
352                     m, NULL, __free_metric);
353 }
354
355 void
356 noit_stats_set_metric_int(stats_t *newstate, char *name, int *value) {
357   metric_t *m = calloc(1, sizeof(*m));
358   m->metric_name = strdup(name);
359   m->metric_type = METRIC_INT;
360   if(value) {
361     m->metric_value.i = malloc(sizeof(*value));
362     *(m->metric_value.i) = *value;
363   }
364   __stats_add_metric(newstate, m);
365 }
366
367 void
368 noit_stats_set_metric_float(stats_t *newstate, char *name, float *value) {
369   metric_t *m = calloc(1, sizeof(*m));
370   m->metric_name = strdup(name);
371   m->metric_type = METRIC_FLOAT;
372   if(value) {
373     m->metric_value.f = malloc(sizeof(*value));
374     *(m->metric_value.f) = *value;
375   }
376   __stats_add_metric(newstate, m);
377 }
378
379 void
380 noit_stats_set_metric_string(stats_t *newstate, char *name, char *value) {
381   metric_t *m = calloc(1, sizeof(*m));
382   m->metric_name = strdup(name);
383   m->metric_type = METRIC_STRING;
384   m->metric_value.s = value ? strdup(value) : NULL;
385   __stats_add_metric(newstate, m);
386 }
387
388 void
389 noit_check_set_stats(struct _noit_module *module,
390                      noit_check_t *check, stats_t *newstate) {
391   int report_change = 0;
392   dep_list_t *dep;
393   if(check->stats.previous.status)
394     free(check->stats.previous.status);
395   noit_hash_destroy(&check->stats.previous.metrics, NULL, __free_metric);
396   memcpy(&check->stats.previous, &check->stats.current, sizeof(stats_t));
397   memcpy(&check->stats.current, newstate, sizeof(stats_t));
398   if(check->stats.current.status)
399     check->stats.current.status = strdup(check->stats.current.status);
400
401   /* check for state changes */
402   if(check->stats.current.available != 0 &&
403      check->stats.previous.available != 0 &&
404      check->stats.current.available != check->stats.previous.available)
405     report_change = 1;
406   if(check->stats.current.state != 0 &&
407      check->stats.previous.state != 0 &&
408      check->stats.current.state != check->stats.previous.state)
409     report_change = 1;
410
411   noitL(noit_error, "%s/%s <- [%s]\n", check->target, check->module,
412         check->stats.current.status);
413   if(report_change) {
414     noitL(noit_error, "%s/%s -> [%s/%s]\n",
415           check->target, check->module,
416           __noit_check_available_string(check->stats.current.available),
417           __noit_check_state_string(check->stats.current.state));
418   }
419   for(dep = check->causal_checks; dep; dep = dep->next) {
420     noit_module_t *mod;
421     mod = noit_module_lookup(dep->check->module);
422     assert(mod);
423     noitL(noit_debug, "Firing %s/%s in response to %s/%s\n",
424           dep->check->target, dep->check->name,
425           check->target, check->name);
426     mod->initiate_check(mod, dep->check, 1, check);
427   }
428 }
Note: See TracBrowser for help on using the browser.