root/src/modules/snmp.c

Revision 46c8ff5682f6e2498f7d9295573d3ebcc82b9eb2, 16.1 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 7 years ago)

handle timeouts correctly

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  */
5
6 #include "noit_defines.h"
7
8 #include <stdio.h>
9 #include <unistd.h>
10 #include <errno.h>
11 #include <assert.h>
12 #include <math.h>
13
14 #include "noit_module.h"
15 #include "noit_check.h"
16 #include "noit_check_tools.h"
17 #include "utils/noit_log.h"
18 #include "utils/noit_hash.h"
19
20 #include <net-snmp/net-snmp-config.h>
21 #include <net-snmp/net-snmp-includes.h>
22
23 static noit_log_stream_t nlerr = NULL;
24 static noit_log_stream_t nldeb = NULL;
25 static noit_hash_table target_sessions = NOIT_HASH_EMPTY;
26
27 struct target_session {
28   void *sess_handle;
29   eventer_t timeoutevent;
30   int fd;
31   int in_table;
32   int refcnt;
33 };
34
35 struct snmp_check_closure {
36   noit_module_t *self;
37   noit_check_t *check;
38 };
39
40 struct check_info {
41   int reqid;
42   int timedout;
43   struct {
44      char *confname;
45      char *oidname;
46      oid oid[MAX_OID_LEN];
47      size_t oidlen;
48   } *oids;
49   int noids;
50   eventer_t timeoutevent;
51   noit_module_t *self;
52   noit_check_t *check;
53 };
54
55 /* We hold struct check_info's in there key's by their reqid.
56  *   If they timeout, we remove them.
57  *
58  *   When SNMP queries complete, we look them up, if we find them
59  *   then we know we can remove the timeout and  complete the check.
60  *   If we don't find them, the timeout fired and removed the check.
61  */
62 noit_hash_table active_checks = NOIT_HASH_EMPTY;
63 static void add_check(struct check_info *c) {
64   noit_hash_store(&active_checks, (char *)&c->reqid, sizeof(c->reqid), c);
65 }
66 static struct check_info *get_check(int reqid) {
67   struct check_info *c;
68   if(noit_hash_retrieve(&active_checks, (char *)&reqid, sizeof(reqid),
69                         (void **)&c))
70     return c;
71   return NULL;
72 }
73 static void remove_check(struct check_info *c) {
74   noit_hash_delete(&active_checks, (char *)&c->reqid, sizeof(c->reqid),
75                    NULL, NULL);
76 }
77
78 static int noit_snmp_recur_handler(eventer_t e, int mask, void *closure,
79                                    struct timeval *now);
80
81 static int noit_snmp_init(noit_module_t *self) {
82   register_mib_handlers();
83   read_premib_configs();
84   read_configs();
85   netsnmp_init_mib();
86   init_snmp("noitd");
87   return 0;
88 }
89
90 /* Handling of results */
91 static void noit_snmp_log_results(noit_module_t *self, noit_check_t *check,
92                                   struct snmp_pdu *pdu) {
93   struct check_info *info = check->closure;
94   struct variable_list *vars;
95   struct timeval duration;
96   char buff[128];
97   stats_t current;
98   int nresults = 0;
99
100   noit_check_stats_clear(&current);
101
102   if(pdu)
103     for(vars = pdu->variables; vars; vars = vars->next_variable)
104       nresults++;
105
106   gettimeofday(&current.whence, NULL);
107   sub_timeval(current.whence, check->last_fire_time, &duration);
108   current.duration = duration.tv_sec * 1000 + duration.tv_usec / 1000;
109   current.available = pdu ? NP_AVAILABLE : NP_UNAVAILABLE;
110   current.state = (nresults == info->noids) ? NP_GOOD : NP_BAD;
111   snprintf(buff, sizeof(buff), "%d/%d gets", nresults, info->noids);
112   current.status = buff;
113
114   /* We have no results over which to iterate. */
115   if(!pdu)
116     noit_check_set_stats(self, check, &current);
117
118   /* manipulate the information ourselves */
119   nresults = 0;
120   for(vars = pdu->variables; vars; vars = vars->next_variable) {
121     char *sp;
122     int oid_idx;
123     double float_conv;
124     u_int64_t u64;
125     int64_t i64;
126     char *endptr;
127     char varbuff[256];
128
129     /* find the oid to which this is the response */
130     oid_idx = nresults; /* our current idx is the most likely */
131     if(info->oids[oid_idx].oidlen != vars->name_length ||
132        memcmp(info->oids[oid_idx].oid, vars->name,
133               vars->name_length * sizeof(oid))) {
134       /* Not the most obvious guess */
135       for(oid_idx = info->noids - 1; oid_idx >= 0; oid_idx--) {
136         if(info->oids[oid_idx].oidlen == vars->name_length &&
137            memcmp(info->oids[oid_idx].oid, vars->name,
138                   vars->name_length * sizeof(oid))) break;
139       }
140     }
141     if(oid_idx < 0) {
142       snprint_variable(varbuff, sizeof(varbuff),
143                        vars->name, vars->name_length, vars);
144       noitL(nlerr, "Unexpected oid results to %s`%s`%s: %s\n",
145             check->target, check->module, check->name, varbuff);
146       nresults++;
147       continue;
148     }
149    
150 #define SETM(a,b) noit_stats_set_metric(&current, \
151                                         info->oids[oid_idx].confname, a, b)
152     switch(vars->type) {
153       case ASN_OCTET_STR:
154         sp = malloc(1 + vars->val_len);
155         memcpy(sp, vars->val.string, vars->val_len);
156         sp[vars->val_len] = '\0';
157         SETM(METRIC_STRING, sp);
158         free(sp);
159         break;
160       case ASN_INTEGER:
161       case ASN_GAUGE:
162         SETM(METRIC_INT32, vars->val.integer);
163         break;
164       case ASN_TIMETICKS:
165       case ASN_COUNTER:
166         SETM(METRIC_UINT32, vars->val.integer);
167         break;
168       case ASN_INTEGER64:
169         printI64(varbuff, vars->val.counter64);
170         i64 = strtoll(varbuff, &endptr, 10);
171         SETM(METRIC_INT64, (varbuff == endptr) ? NULL : &i64);
172         break;
173       case ASN_COUNTER64:
174         printU64(varbuff, vars->val.counter64);
175         u64 = strtoull(varbuff, &endptr, 10);
176         SETM(METRIC_UINT64, (varbuff == endptr) ? NULL : &u64);
177         break;
178       case ASN_FLOAT:
179         if(vars->val.floatVal) float_conv = *(vars->val.floatVal);
180         SETM(METRIC_DOUBLE, vars->val.floatVal ? &float_conv : NULL);
181         break;
182       case ASN_DOUBLE:
183         SETM(METRIC_DOUBLE, vars->val.doubleVal);
184         break;
185       default:
186         snprint_variable(varbuff, sizeof(varbuff), vars->name, vars->name_length, vars);
187         printf("%s!\n", varbuff);
188         /* Advance passed the first space and use that unless there
189          * is no space or we have no more string left.
190          */
191         sp = strchr(varbuff, ' ');
192         if(sp) sp++;
193         SETM(METRIC_STRING, (sp && *sp) ? sp : NULL);
194     }
195     nresults++;
196   }
197   noit_check_set_stats(self, check, &current);
198 }
199
200 struct target_session *
201 _get_target_session(char *target) {
202   struct target_session *ts;
203   if(!noit_hash_retrieve(&target_sessions,
204                          target, strlen(target), (void **)&ts)) {
205     ts = calloc(1, sizeof(*ts));
206     ts->fd = -1;
207     ts->refcnt = 0;
208     ts->in_table = 1;
209     noit_hash_store(&target_sessions,
210                     strdup(target), strlen(target), ts);
211   }
212   return ts;
213 }
214
215 static int noit_snmp_session_cleanse(struct target_session *ts) {
216   if(ts->refcnt == 0 && ts->sess_handle) {
217     eventer_remove_fd(ts->fd);
218     if(ts->timeoutevent) eventer_remove(ts->timeoutevent);
219     ts->timeoutevent = NULL;
220     snmp_sess_close(ts->sess_handle);
221     ts->sess_handle = NULL;
222     if(!ts->in_table) {
223       free(ts);
224     }
225     return 1;
226   }
227   return 0;
228 }
229
230 static int noit_snmp_session_timeout(eventer_t e, int mask, void *closure,
231                                      struct timeval *now) {
232   struct target_session *ts = closure;
233   snmp_sess_timeout(ts->sess_handle);
234   noit_snmp_session_cleanse(ts);
235   return 0;
236 }
237
238 static int noit_snmp_check_timeout(eventer_t e, int mask, void *closure,
239                                    struct timeval *now) {
240   struct check_info *info = closure;
241   info->timedout = 1;
242   remove_check(info);
243   if(info->timeoutevent) {
244     eventer_remove(info->timeoutevent);
245     eventer_free(info->timeoutevent);
246     info->timeoutevent = NULL;
247   }
248   /* Log our findings */
249   noit_snmp_log_results(info->self, info->check, NULL);
250   info->check->flags &= ~NP_RUNNING;
251   return 0;
252 }
253
254 static void _set_ts_timeout(struct target_session *ts, struct timeval *t) {
255   struct timeval now;
256   eventer_t e = NULL;
257   if(ts->timeoutevent) e = eventer_remove(ts->timeoutevent);
258   ts->timeoutevent = NULL;
259   if(!t) return;
260
261   gettimeofday(&now, NULL);
262   if(!e) e = eventer_alloc();
263   e->callback = noit_snmp_session_timeout;
264   e->closure = ts;
265   e->mask = EVENTER_TIMER;
266   add_timeval(now, *t, &e->whence);
267   ts->timeoutevent = e;
268   eventer_add(e);
269 }
270
271 static int noit_snmp_handler(eventer_t e, int mask, void *closure,
272                              struct timeval *now) {
273   fd_set fdset;
274   int fds, block = 0;
275   struct timeval timeout;
276   struct target_session *ts = closure;
277   FD_ZERO(&fdset);
278   FD_SET(e->fd, &fdset);
279   fds = e->fd + 1;
280   snmp_sess_read(ts->sess_handle, &fdset);
281   if(noit_snmp_session_cleanse(ts))
282     return 0;
283   snmp_sess_select_info(ts->sess_handle, &fds, &fdset, &timeout, &block);
284   _set_ts_timeout(ts, block ? &timeout : NULL);
285   return EVENTER_READ | EVENTER_EXCEPTION;
286 }
287 static int noit_snmp_asynch_response(int operation, struct snmp_session *sp,
288                                      int reqid, struct snmp_pdu *pdu,
289                                      void *magic) {
290   struct check_info *info;
291   struct target_session *ts = magic;
292
293   /* We don't deal with refcnt hitting zero here.  We could only be hit from
294    * the snmp read/timeout stuff.  Handle it there.
295    */
296   ts->refcnt--;
297
298   info = get_check(reqid);
299   if(!info) return 1;
300   remove_check(info);
301   if(info->timeoutevent) {
302     eventer_remove(info->timeoutevent);
303     eventer_free(info->timeoutevent);
304     info->timeoutevent = NULL;
305   }
306
307   /* Log our findings */
308   noit_snmp_log_results(info->self, info->check, pdu);
309   info->check->flags &= ~NP_RUNNING;
310   return 1;
311 }
312
313 static void noit_snmp_sess_open(struct target_session *ts,
314                                 noit_check_t *check) {
315   const char *community;
316   struct snmp_session sess;
317   snmp_sess_init(&sess);
318   sess.version = SNMP_VERSION_2c;
319   sess.peername = check->target;
320   if(!noit_hash_retrieve(check->config, "community", strlen("community"),
321                          (void **)&community)) {
322     community = "public";
323   }
324   sess.community = (unsigned char *)community;
325   sess.community_len = strlen(community);
326   sess.callback = noit_snmp_asynch_response;
327   sess.callback_magic = ts;
328   ts->sess_handle = snmp_sess_open(&sess);
329 }
330
331 static int noit_snmp_fill_req(struct snmp_pdu *req, noit_check_t *check) {
332   int i, klen;
333   noit_hash_iter iter = NOIT_HASH_ITER_ZERO;
334   const char *name, *value;
335   struct check_info *info = check->closure;
336   noit_hash_table check_attrs_hash = NOIT_HASH_EMPTY;
337
338   /* Toss the old set and bail if we have zero */
339   if(info->oids) {
340     for(i=0; i<info->noids;i++) {
341       if(info->oids[i].confname) free(info->oids[i].confname);
342       if(info->oids[i].oidname) free(info->oids[i].oidname);
343     }
344     free(info->oids);
345   }
346   info->noids = 0;
347   info->oids = NULL;
348
349   /* Figure our how many. */
350   while(noit_hash_next(check->config, &iter, &name, &klen, (void **)&value)) {
351     if(!strncasecmp(name, "oid_", 4)) {
352       info->noids++;
353     }
354   }
355
356   if(info->noids == 0) return 0;
357
358   /* Create a hash of important check attributes */
359 #define CA_STORE(a,b) noit_hash_store(&check_attrs_hash, a, strlen(a), b)
360   CA_STORE("target", check->target);
361   CA_STORE("name", check->name);
362   CA_STORE("module", check->module);
363
364   /* Fill out the new set of required oids */
365   info->oids = calloc(info->noids, sizeof(*info->oids));
366   memset(&iter, 0, sizeof(iter));
367   i = 0;
368   while(noit_hash_next(check->config, &iter, &name, &klen, (void **)&value)) {
369     if(!strncasecmp(name, "oid_", 4)) {
370       char oidbuff[128];
371       name += 4;
372       info->oids[i].confname = strdup(name);
373       noit_check_interpolate(oidbuff, sizeof(oidbuff), value,
374                              &check_attrs_hash, check->config);
375       info->oids[i].oidname = strdup(oidbuff);
376       info->oids[i].oidlen = MAX_OID_LEN;
377       get_node(oidbuff, info->oids[i].oid, &info->oids[i].oidlen);
378       read_objid(oidbuff, info->oids[i].oid, &info->oids[i].oidlen);
379       snmp_add_null_var(req, info->oids[i].oid, info->oids[i].oidlen);
380       i++;
381     }
382   }
383   assert(info->noids == i);
384   noit_hash_destroy(&check_attrs_hash, NULL, NULL);
385   return info->noids;
386 }
387 static int noit_snmp_send(noit_module_t *self, noit_check_t *check) {
388   struct snmp_pdu *req;
389   struct target_session *ts;
390   struct check_info *info = check->closure;
391
392   info->self = self;
393   info->check = check;
394   info->timedout = 0;
395
396   check->flags |= NP_RUNNING;
397   ts = _get_target_session(check->target);
398   if(!ts->refcnt) {
399     eventer_t newe;
400     int fds, block;
401     struct timeval timeout;
402     fd_set fdset;
403     noit_snmp_sess_open(ts, check);
404     block = 0;
405     fds = 0;
406     FD_ZERO(&fdset);
407     snmp_sess_select_info(ts->sess_handle, &fds, &fdset, &timeout, &block);
408     assert(fds > 0);
409     ts->fd = fds-1;
410     newe = eventer_alloc();
411     newe->fd = ts->fd;
412     newe->callback = noit_snmp_handler;
413     newe->closure = ts;
414     newe->mask = EVENTER_READ | EVENTER_EXCEPTION;
415     eventer_add(newe);
416   }
417   if(!ts->sess_handle) {
418     /* Error */
419   }
420   ts->refcnt++; /* Increment here, decrement when this check completes */
421
422   req = snmp_pdu_create(SNMP_MSG_GET);
423   noit_snmp_fill_req(req, check);
424   /* Setup out snmp requests */
425   if(ts->sess_handle &&
426      (info->reqid = snmp_sess_send(ts->sess_handle, req)) != 0) {
427     struct timeval when, to;
428     info->timeoutevent = eventer_alloc();
429     info->timeoutevent->callback = noit_snmp_check_timeout;
430     info->timeoutevent->closure = info;
431     info->timeoutevent->mask = EVENTER_TIMER;
432
433     gettimeofday(&when, NULL);
434     to.tv_sec = check->timeout / 1000;
435     to.tv_usec = (check->timeout % 1000) * 1000;
436     add_timeval(when, to, &info->timeoutevent->whence);
437     eventer_add(info->timeoutevent);
438     add_check(info);
439   }
440   else {
441     ts->refcnt--;
442      noit_snmp_session_cleanse(ts);
443     /* Error */
444     snmp_free_pdu(req);
445     /* Log our findings */
446     noit_snmp_log_results(self, check, NULL);
447     check->flags &= ~NP_RUNNING;
448   }
449   return 0;
450 }
451
452 static int noit_snmp_schedule_next(noit_module_t *self,
453                                    struct timeval *last_check,
454                                    noit_check_t *check,
455                                    struct timeval *now) {
456   eventer_t newe;
457   struct timeval period, earliest;
458   struct snmp_check_closure *scc;
459
460   if(check->period == 0) return 0;
461   if(NOIT_CHECK_DISABLED(check) || NOIT_CHECK_KILLED(check)) return 0;
462
463   /* If we have an event, we know when we intended it to fire.  This means
464    * we should schedule that point + period.
465    */
466   if(now)
467     memcpy(&earliest, now, sizeof(earliest));
468   else
469     gettimeofday(&earliest, NULL);
470   period.tv_sec = check->period / 1000;
471   period.tv_usec = (check->period % 1000) * 1000;
472
473   newe = eventer_alloc();
474   memcpy(&newe->whence, last_check, sizeof(*last_check));
475   add_timeval(newe->whence, period, &newe->whence);
476   if(compare_timeval(newe->whence, earliest) < 0)
477     memcpy(&newe->whence, &earliest, sizeof(earliest));
478   newe->mask = EVENTER_TIMER;
479   newe->callback = noit_snmp_recur_handler;
480   scc = calloc(1, sizeof(*scc));
481   scc->self = self;
482   scc->check = check;
483   newe->closure = scc;
484
485   eventer_add(newe);
486   check->fire_event = newe;
487   return 0;
488 }
489
490 static int noit_snmp_recur_handler(eventer_t e, int mask, void *closure,
491                                    struct timeval *now) {
492   struct snmp_check_closure *cl = closure;
493   cl->check->fire_event = NULL;
494   noit_snmp_schedule_next(cl->self, &e->whence, cl->check, now);
495   noit_snmp_send(cl->self, cl->check);
496   free(cl);
497   return 0;
498 }
499
500 static int noit_snmp_initiate_check(noit_module_t *self, noit_check_t *check,
501                                     int once, noit_check_t *cause) {
502   if(!check->closure) check->closure = calloc(1, sizeof(struct check_info));
503   if(once) {
504     noit_snmp_send(self, check);
505     return 0;
506   }
507   if(!check->fire_event) {
508     struct timeval epoch;
509     noit_check_fake_last_check(check, &epoch, NULL);
510     noit_snmp_schedule_next(self, &epoch, check, NULL);
511   }
512   return 0;
513 }
514
515 static int noit_snmp_config(noit_module_t *self, noit_hash_table *config) {
516   return 0;
517 }
518 static int noit_snmp_onload(noit_module_t *self) {
519   nlerr = noit_log_stream_find("error/noit_snmp");
520   nldeb = noit_log_stream_find("debug/noit_snmp");
521   if(!nlerr) nlerr = noit_stderr;
522   if(!nldeb) nldeb = noit_debug;
523   eventer_name_callback("noit_snmp/recur_handler", noit_snmp_recur_handler);
524   eventer_name_callback("noit_snmp/check_timeout", noit_snmp_check_timeout);
525   eventer_name_callback("noit_snmp/session_timeout", noit_snmp_session_timeout);
526   eventer_name_callback("noit_snmp/handler", noit_snmp_handler);
527   return 0;
528 }
529 noit_module_t snmp = {
530   NOIT_MODULE_MAGIC,
531   NOIT_MODULE_ABI_VERSION,
532   "snmp",
533   "SNMP collection",
534   noit_snmp_onload,
535   noit_snmp_config,
536   noit_snmp_init,
537   noit_snmp_initiate_check,
538   NULL /* noit_snmp_cleanup */
539 };
540
Note: See TracBrowser for help on using the browser.