1 |
/* |
---|
2 |
* Copyright (c) 2007, OmniTI Computer Consulting, Inc. |
---|
3 |
* All rights reserved. |
---|
4 |
*/ |
---|
5 |
|
---|
6 |
#include "noit_defines.h" |
---|
7 |
|
---|
8 |
#include <stdio.h> |
---|
9 |
#include <stdlib.h> |
---|
10 |
#include <unistd.h> |
---|
11 |
#include <assert.h> |
---|
12 |
#include <netinet/in.h> |
---|
13 |
#include <arpa/inet.h> |
---|
14 |
|
---|
15 |
#include "utils/noit_log.h" |
---|
16 |
#include "utils/noit_hash.h" |
---|
17 |
#include "utils/noit_skiplist.h" |
---|
18 |
#include "noit_conf.h" |
---|
19 |
#include "noit_check.h" |
---|
20 |
#include "noit_module.h" |
---|
21 |
#include "noit_console.h" |
---|
22 |
#include "eventer/eventer.h" |
---|
23 |
|
---|
24 |
/* 60 seconds of possible stutter */ |
---|
25 |
#define MAX_INITIAL_STUTTER (60*1000) |
---|
26 |
|
---|
27 |
static noit_hash_table polls = NOIT_HASH_EMPTY; |
---|
28 |
static noit_skiplist polls_by_name = { 0 }; |
---|
29 |
static u_int32_t __config_load_generation = 0; |
---|
30 |
struct uuid_dummy { |
---|
31 |
uuid_t foo; |
---|
32 |
}; |
---|
33 |
|
---|
34 |
static void register_console_check_commands(); |
---|
35 |
|
---|
36 |
#define UUID_SIZE sizeof(struct uuid_dummy) |
---|
37 |
|
---|
38 |
static const char * |
---|
39 |
__noit_check_available_string(int16_t available) { |
---|
40 |
switch(available) { |
---|
41 |
case NP_AVAILABLE: return "available"; |
---|
42 |
case NP_UNAVAILABLE: return "unavailable"; |
---|
43 |
case NP_UNKNOWN: return "unknown"; |
---|
44 |
} |
---|
45 |
return "???"; |
---|
46 |
} |
---|
47 |
static const char * |
---|
48 |
__noit_check_state_string(int16_t state) { |
---|
49 |
switch(state) { |
---|
50 |
case NP_GOOD: return "good"; |
---|
51 |
case NP_BAD: return "bad"; |
---|
52 |
case NP_UNKNOWN: return "unknown"; |
---|
53 |
} |
---|
54 |
return "???"; |
---|
55 |
} |
---|
56 |
static int __check_name_compare(void *a, void *b) { |
---|
57 |
noit_check_t *ac = a; |
---|
58 |
noit_check_t *bc = b; |
---|
59 |
int rv; |
---|
60 |
if((rv = strcmp(ac->target, bc->target)) != 0) return rv; |
---|
61 |
if((rv = strcmp(ac->name, bc->name)) != 0) return rv; |
---|
62 |
return 0; |
---|
63 |
} |
---|
64 |
int |
---|
65 |
noit_check_max_initial_stutter() { |
---|
66 |
return MAX_INITIAL_STUTTER; |
---|
67 |
} |
---|
68 |
void |
---|
69 |
noit_check_fake_last_check(noit_check_t *check, |
---|
70 |
struct timeval *lc, struct timeval *_now) { |
---|
71 |
struct timeval now, period; |
---|
72 |
double r; |
---|
73 |
int offset; |
---|
74 |
|
---|
75 |
r = drand48(); |
---|
76 |
offset = r * (MIN(MAX_INITIAL_STUTTER, check->period)); |
---|
77 |
period.tv_sec = (check->period - offset) / 1000; |
---|
78 |
period.tv_usec = ((check->period - offset) % 1000) * 1000; |
---|
79 |
if(!_now) { |
---|
80 |
gettimeofday(&now, NULL); |
---|
81 |
_now = &now; |
---|
82 |
} |
---|
83 |
sub_timeval(*_now, period, lc); |
---|
84 |
} |
---|
85 |
void |
---|
86 |
noit_poller_load_checks() { |
---|
87 |
int i, cnt = 0; |
---|
88 |
noit_conf_section_t *sec; |
---|
89 |
__config_load_generation++; |
---|
90 |
sec = noit_conf_get_sections(NULL, "/noit/checks//check", &cnt); |
---|
91 |
for(i=0; i<cnt; i++) { |
---|
92 |
char uuid_str[37]; |
---|
93 |
char target[256]; |
---|
94 |
char module[256]; |
---|
95 |
char name[256]; |
---|
96 |
char oncheck[1024]; |
---|
97 |
int no_period = 0; |
---|
98 |
int no_oncheck = 0; |
---|
99 |
int period = 0, timeout = 0; |
---|
100 |
uuid_t uuid, out_uuid; |
---|
101 |
noit_hash_table *options; |
---|
102 |
|
---|
103 |
if(!noit_conf_get_stringbuf(sec[i], "@uuid", |
---|
104 |
uuid_str, sizeof(uuid_str))) { |
---|
105 |
noitL(noit_stderr, "check %d has no uuid\n", i+1); |
---|
106 |
continue; |
---|
107 |
} |
---|
108 |
if(uuid_parse(uuid_str, uuid)) { |
---|
109 |
noitL(noit_stderr, "check uuid: '%s' is invalid\n", uuid_str); |
---|
110 |
continue; |
---|
111 |
} |
---|
112 |
if(!noit_conf_get_stringbuf(sec[i], "ancestor-or-self::node()/target", target, sizeof(target))) { |
---|
113 |
noitL(noit_stderr, "check uuid: '%s' has no target\n", |
---|
114 |
uuid_str); |
---|
115 |
continue; |
---|
116 |
} |
---|
117 |
if(!noit_conf_get_stringbuf(sec[i], "ancestor-or-self::node()/module", module, sizeof(module))) { |
---|
118 |
noitL(noit_stderr, "check uuid: '%s' has no module\n", |
---|
119 |
uuid_str); |
---|
120 |
continue; |
---|
121 |
} |
---|
122 |
if(!noit_conf_get_stringbuf(sec[i], "name", name, sizeof(name))) { |
---|
123 |
strlcpy(name, module, sizeof(name)); |
---|
124 |
} |
---|
125 |
if(!noit_conf_get_int(sec[i], "ancestor-or-self::node()/period", &period)) { |
---|
126 |
no_period = 1; |
---|
127 |
} |
---|
128 |
if(!noit_conf_get_stringbuf(sec[i], "ancestor-or-self::node()/oncheck", oncheck, sizeof(oncheck))) { |
---|
129 |
oncheck[0] = '\0'; |
---|
130 |
no_oncheck = 1; |
---|
131 |
} |
---|
132 |
if(no_period && no_oncheck) { |
---|
133 |
noitL(noit_stderr, "check uuid: '%s' has neither period nor oncheck\n", |
---|
134 |
uuid_str); |
---|
135 |
continue; |
---|
136 |
} |
---|
137 |
if(!(no_period || no_oncheck)) { |
---|
138 |
noitL(noit_stderr, "check uuid: '%s' has has on check and period.\n", |
---|
139 |
uuid_str); |
---|
140 |
continue; |
---|
141 |
} |
---|
142 |
if(!noit_conf_get_int(sec[i], "ancestor-or-self::node()/timeout", &timeout)) { |
---|
143 |
noitL(noit_stderr, "check uuid: '%s' has no timeout\n", uuid_str); |
---|
144 |
continue; |
---|
145 |
} |
---|
146 |
if(!no_period && timeout >= period) { |
---|
147 |
noitL(noit_stderr, "check uuid: '%s' timeout > period\n", uuid_str); |
---|
148 |
timeout = period/2; |
---|
149 |
} |
---|
150 |
options = noit_conf_get_hash(sec[i], "ancestor-or-self::node()/config/*"); |
---|
151 |
noit_poller_schedule(target, module, name, options, |
---|
152 |
period, timeout, oncheck[0] ? oncheck : NULL, |
---|
153 |
uuid, out_uuid); |
---|
154 |
noitL(noit_debug, "loaded uuid: %s\n", uuid_str); |
---|
155 |
} |
---|
156 |
} |
---|
157 |
|
---|
158 |
void |
---|
159 |
noit_poller_initiate() { |
---|
160 |
noit_hash_iter iter = NOIT_HASH_ITER_ZERO; |
---|
161 |
uuid_t key_id; |
---|
162 |
int klen; |
---|
163 |
noit_check_t *check; |
---|
164 |
while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen, |
---|
165 |
(void **)&check)) { |
---|
166 |
noit_module_t *mod; |
---|
167 |
mod = noit_module_lookup(check->module); |
---|
168 |
if(mod) { |
---|
169 |
if((check->flags & NP_DISABLED) == 0) |
---|
170 |
mod->initiate_check(mod, check, 0, NULL); |
---|
171 |
} |
---|
172 |
else { |
---|
173 |
noitL(noit_stderr, "Cannot find module '%s'\n", check->module); |
---|
174 |
check->flags |= NP_DISABLED; |
---|
175 |
} |
---|
176 |
} |
---|
177 |
} |
---|
178 |
|
---|
179 |
void |
---|
180 |
noit_poller_make_causal_map() { |
---|
181 |
noit_hash_iter iter = NOIT_HASH_ITER_ZERO; |
---|
182 |
uuid_t key_id; |
---|
183 |
int klen; |
---|
184 |
noit_check_t *check, *parent; |
---|
185 |
while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen, |
---|
186 |
(void **)&check)) { |
---|
187 |
if(check->oncheck) { |
---|
188 |
/* This service is causally triggered by another service */ |
---|
189 |
char fullcheck[1024]; |
---|
190 |
char *name = check->oncheck; |
---|
191 |
char *target = NULL; |
---|
192 |
|
---|
193 |
if((target = strchr(check->oncheck, '`')) != NULL) { |
---|
194 |
strlcpy(fullcheck, check->oncheck, target - check->oncheck); |
---|
195 |
name = target + 1; |
---|
196 |
target = fullcheck; |
---|
197 |
} |
---|
198 |
else |
---|
199 |
target = check->target; |
---|
200 |
|
---|
201 |
parent = noit_poller_lookup_by_name(target, name); |
---|
202 |
if(!parent) { |
---|
203 |
check->flags |= NP_DISABLED; |
---|
204 |
noitL(noit_stderr, "Disabling check %s/%s, can't find oncheck %s/%s\n", |
---|
205 |
check->target, check->name, target, name); |
---|
206 |
} |
---|
207 |
else { |
---|
208 |
dep_list_t *dep; |
---|
209 |
dep = malloc(sizeof(*dep)); |
---|
210 |
dep->check = check; |
---|
211 |
dep->next = parent->causal_checks; |
---|
212 |
parent->causal_checks = dep; |
---|
213 |
} |
---|
214 |
} |
---|
215 |
} |
---|
216 |
} |
---|
217 |
void |
---|
218 |
noit_poller_init() { |
---|
219 |
noit_skiplist_init(&polls_by_name); |
---|
220 |
noit_skiplist_set_compare(&polls_by_name, __check_name_compare, |
---|
221 |
__check_name_compare); |
---|
222 |
noit_poller_load_checks(); |
---|
223 |
noit_poller_make_causal_map(); |
---|
224 |
register_console_check_commands(); |
---|
225 |
noit_poller_initiate(); |
---|
226 |
} |
---|
227 |
|
---|
228 |
int |
---|
229 |
noit_poller_schedule(const char *target, |
---|
230 |
const char *module, |
---|
231 |
const char *name, |
---|
232 |
noit_hash_table *config, |
---|
233 |
u_int32_t period, |
---|
234 |
u_int32_t timeout, |
---|
235 |
const char *oncheck, |
---|
236 |
uuid_t in, |
---|
237 |
uuid_t out) { |
---|
238 |
int8_t family; |
---|
239 |
int rv; |
---|
240 |
union { |
---|
241 |
struct in_addr addr4; |
---|
242 |
struct in6_addr addr6; |
---|
243 |
} a; |
---|
244 |
noit_check_t *new_check; |
---|
245 |
|
---|
246 |
|
---|
247 |
family = AF_INET; |
---|
248 |
rv = inet_pton(family, target, &a); |
---|
249 |
if(rv != 1) { |
---|
250 |
family = AF_INET6; |
---|
251 |
rv = inet_pton(family, target, &a); |
---|
252 |
if(rv != 1) { |
---|
253 |
noitL(noit_stderr, "Cannot translate '%s' to IP\n", target); |
---|
254 |
return -1; |
---|
255 |
} |
---|
256 |
} |
---|
257 |
|
---|
258 |
new_check = calloc(1, sizeof(*new_check)); |
---|
259 |
if(!new_check) return -1; |
---|
260 |
new_check->generation = __config_load_generation; |
---|
261 |
new_check->target_family = family; |
---|
262 |
memcpy(&new_check->target_addr, &a, sizeof(a)); |
---|
263 |
new_check->target = strdup(target); |
---|
264 |
new_check->module = strdup(module); |
---|
265 |
new_check->name = name ? strdup(name): NULL; |
---|
266 |
|
---|
267 |
if(config != NULL) { |
---|
268 |
noit_hash_iter iter = NOIT_HASH_ITER_ZERO; |
---|
269 |
const char *k; |
---|
270 |
int klen; |
---|
271 |
void *data; |
---|
272 |
new_check->config = calloc(1, sizeof(*new_check->config)); |
---|
273 |
while(noit_hash_next(config, &iter, &k, &klen, &data)) { |
---|
274 |
noit_hash_store(new_check->config, strdup(k), klen, strdup((char *)data)); |
---|
275 |
} |
---|
276 |
} |
---|
277 |
new_check->oncheck = oncheck ? strdup(oncheck) : NULL; |
---|
278 |
new_check->period = period; |
---|
279 |
new_check->timeout = timeout; |
---|
280 |
new_check->flags = 0; |
---|
281 |
if(uuid_is_null(in)) |
---|
282 |
uuid_generate(new_check->checkid); |
---|
283 |
else |
---|
284 |
uuid_copy(new_check->checkid, in); |
---|
285 |
|
---|
286 |
assert(noit_hash_store(&polls, |
---|
287 |
(char *)new_check->checkid, UUID_SIZE, |
---|
288 |
new_check)); |
---|
289 |
noit_skiplist_insert(&polls_by_name, new_check); |
---|
290 |
uuid_copy(out, new_check->checkid); |
---|
291 |
return 0; |
---|
292 |
} |
---|
293 |
|
---|
294 |
int |
---|
295 |
noit_poller_deschedule(uuid_t in) { |
---|
296 |
noit_check_t *checker; |
---|
297 |
if(noit_hash_retrieve(&polls, |
---|
298 |
(char *)in, UUID_SIZE, |
---|
299 |
(void **)&checker) == 0) { |
---|
300 |
return -1; |
---|
301 |
} |
---|
302 |
if(checker->flags & NP_RUNNING) { |
---|
303 |
checker->flags |= NP_KILLED; |
---|
304 |
return 0; |
---|
305 |
} |
---|
306 |
if(checker->fire_event) { |
---|
307 |
eventer_remove(checker->fire_event); |
---|
308 |
eventer_free(checker->fire_event); |
---|
309 |
checker->fire_event = NULL; |
---|
310 |
} |
---|
311 |
noit_hash_delete(&polls, (char *)in, UUID_SIZE, free, free); |
---|
312 |
|
---|
313 |
if(checker->target) free(checker->target); |
---|
314 |
if(checker->module) free(checker->module); |
---|
315 |
if(checker->name) free(checker->name); |
---|
316 |
if(checker->config) { |
---|
317 |
noit_hash_destroy(checker->config, free, free); |
---|
318 |
free(checker->config); |
---|
319 |
checker->config = NULL; |
---|
320 |
} |
---|
321 |
free(checker); |
---|
322 |
return 0; |
---|
323 |
} |
---|
324 |
|
---|
325 |
noit_check_t * |
---|
326 |
noit_poller_lookup(uuid_t in) { |
---|
327 |
noit_check_t *check; |
---|
328 |
if(noit_hash_retrieve(&polls, |
---|
329 |
(char *)in, UUID_SIZE, |
---|
330 |
(void **)&check)) { |
---|
331 |
return check; |
---|
332 |
} |
---|
333 |
return NULL; |
---|
334 |
} |
---|
335 |
noit_check_t * |
---|
336 |
noit_poller_lookup_by_name(char *target, char *name) { |
---|
337 |
noit_check_t *check, *tmp_check; |
---|
338 |
tmp_check = calloc(1, sizeof(*tmp_check)); |
---|
339 |
tmp_check->target = target; |
---|
340 |
tmp_check->name = name; |
---|
341 |
check = noit_skiplist_find(&polls_by_name, tmp_check, NULL); |
---|
342 |
free(tmp_check); |
---|
343 |
return check; |
---|
344 |
} |
---|
345 |
|
---|
346 |
static void |
---|
347 |
__free_metric(void *vm) { |
---|
348 |
metric_t *m = vm; |
---|
349 |
free(m->metric_name); |
---|
350 |
if(m->metric_value.i) free(m->metric_value.i); |
---|
351 |
} |
---|
352 |
|
---|
353 |
void |
---|
354 |
__stats_add_metric(stats_t *newstate, metric_t *m) { |
---|
355 |
noit_hash_replace(&newstate->metrics, m->metric_name, strlen(m->metric_name), |
---|
356 |
m, NULL, __free_metric); |
---|
357 |
} |
---|
358 |
|
---|
359 |
void |
---|
360 |
noit_stats_set_metric_int(stats_t *newstate, char *name, int *value) { |
---|
361 |
metric_t *m = calloc(1, sizeof(*m)); |
---|
362 |
m->metric_name = strdup(name); |
---|
363 |
m->metric_type = METRIC_INT; |
---|
364 |
if(value) { |
---|
365 |
m->metric_value.i = malloc(sizeof(*value)); |
---|
366 |
*(m->metric_value.i) = *value; |
---|
367 |
} |
---|
368 |
__stats_add_metric(newstate, m); |
---|
369 |
} |
---|
370 |
|
---|
371 |
void |
---|
372 |
noit_stats_set_metric_float(stats_t *newstate, char *name, float *value) { |
---|
373 |
metric_t *m = calloc(1, sizeof(*m)); |
---|
374 |
m->metric_name = strdup(name); |
---|
375 |
m->metric_type = METRIC_FLOAT; |
---|
376 |
if(value) { |
---|
377 |
m->metric_value.f = malloc(sizeof(*value)); |
---|
378 |
*(m->metric_value.f) = *value; |
---|
379 |
} |
---|
380 |
__stats_add_metric(newstate, m); |
---|
381 |
} |
---|
382 |
|
---|
383 |
void |
---|
384 |
noit_stats_set_metric_string(stats_t *newstate, char *name, char *value) { |
---|
385 |
metric_t *m = calloc(1, sizeof(*m)); |
---|
386 |
m->metric_name = strdup(name); |
---|
387 |
m->metric_type = METRIC_STRING; |
---|
388 |
m->metric_value.s = value ? strdup(value) : NULL; |
---|
389 |
__stats_add_metric(newstate, m); |
---|
390 |
} |
---|
391 |
|
---|
392 |
void |
---|
393 |
noit_check_set_stats(struct _noit_module *module, |
---|
394 |
noit_check_t *check, stats_t *newstate) { |
---|
395 |
int report_change = 0; |
---|
396 |
dep_list_t *dep; |
---|
397 |
if(check->stats.previous.status) |
---|
398 |
free(check->stats.previous.status); |
---|
399 |
noit_hash_destroy(&check->stats.previous.metrics, NULL, __free_metric); |
---|
400 |
memcpy(&check->stats.previous, &check->stats.current, sizeof(stats_t)); |
---|
401 |
memcpy(&check->stats.current, newstate, sizeof(stats_t)); |
---|
402 |
if(check->stats.current.status) |
---|
403 |
check->stats.current.status = strdup(check->stats.current.status); |
---|
404 |
|
---|
405 |
/* check for state changes */ |
---|
406 |
if(check->stats.current.available != 0 && |
---|
407 |
check->stats.previous.available != 0 && |
---|
408 |
check->stats.current.available != check->stats.previous.available) |
---|
409 |
report_change = 1; |
---|
410 |
if(check->stats.current.state != 0 && |
---|
411 |
check->stats.previous.state != 0 && |
---|
412 |
check->stats.current.state != check->stats.previous.state) |
---|
413 |
report_change = 1; |
---|
414 |
|
---|
415 |
noitL(noit_error, "%s/%s <- [%s]\n", check->target, check->module, |
---|
416 |
check->stats.current.status); |
---|
417 |
if(report_change) { |
---|
418 |
noitL(noit_error, "%s/%s -> [%s/%s]\n", |
---|
419 |
check->target, check->module, |
---|
420 |
__noit_check_available_string(check->stats.current.available), |
---|
421 |
__noit_check_state_string(check->stats.current.state)); |
---|
422 |
} |
---|
423 |
for(dep = check->causal_checks; dep; dep = dep->next) { |
---|
424 |
noit_module_t *mod; |
---|
425 |
mod = noit_module_lookup(dep->check->module); |
---|
426 |
assert(mod); |
---|
427 |
noitL(noit_debug, "Firing %s/%s in response to %s/%s\n", |
---|
428 |
dep->check->target, dep->check->name, |
---|
429 |
check->target, check->name); |
---|
430 |
mod->initiate_check(mod, dep->check, 1, check); |
---|
431 |
} |
---|
432 |
} |
---|
433 |
|
---|
434 |
static void |
---|
435 |
nc_printf_check_brief(noit_console_closure_t ncct, |
---|
436 |
noit_check_t *check) { |
---|
437 |
char out[512]; |
---|
438 |
char uuid_str[41]; |
---|
439 |
snprintf(out, sizeof(out), "%s/%s", check->target, check->name); |
---|
440 |
uuid_unparse_lower(check->checkid, uuid_str); |
---|
441 |
nc_printf(ncct, "%s %s\n", uuid_str, out); |
---|
442 |
if(check->stats.current.status) |
---|
443 |
nc_printf(ncct, "\t%s\n", check->stats.current.status); |
---|
444 |
} |
---|
445 |
|
---|
446 |
static int |
---|
447 |
noit_console_show_checks(noit_console_closure_t ncct, |
---|
448 |
int argc, char **argv, |
---|
449 |
noit_console_state_t *dstate, |
---|
450 |
void *closure) { |
---|
451 |
struct timeval _now; |
---|
452 |
noit_hash_iter iter = NOIT_HASH_ITER_ZERO; |
---|
453 |
uuid_t key_id; |
---|
454 |
int klen; |
---|
455 |
noit_check_t *check; |
---|
456 |
|
---|
457 |
gettimeofday(&_now, NULL); |
---|
458 |
while(noit_hash_next(&polls, &iter, (const char **)key_id, &klen, |
---|
459 |
(void **)&check)) { |
---|
460 |
nc_printf_check_brief(ncct, check); |
---|
461 |
} |
---|
462 |
return 0; |
---|
463 |
} |
---|
464 |
static void |
---|
465 |
register_console_check_commands() { |
---|
466 |
noit_console_state_t *tl; |
---|
467 |
cmd_info_t *showcmd; |
---|
468 |
|
---|
469 |
tl = noit_console_state_initial(); |
---|
470 |
showcmd = noit_console_state_get_cmd(tl, "show"); |
---|
471 |
assert(showcmd && showcmd->dstate); |
---|
472 |
|
---|
473 |
noit_console_state_add_cmd(showcmd->dstate, |
---|
474 |
NCSCMD("checks", noit_console_show_checks, NULL, NULL)); |
---|
475 |
} |
---|
476 |
|
---|