root/src/modules/ping_icmp.c

Revision ec240f3789e4102e71fe1246fbff2af5f6fb3dc2, 15.6 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 5 years ago)

make ping cognizant of generation gaps on checks, fixes #107

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  */
5
6 #include "noit_defines.h"
7
8 #include <stdio.h>
9 #include <unistd.h>
10 #include <netdb.h>
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <sys/ioctl.h>
14 #ifdef HAVE_SYS_FILIO_H
15 #include <sys/filio.h>
16 #endif
17 #ifdef HAVE_NETINET_IN_SYSTM_H
18 #include <netinet/in_systm.h>
19 #endif
20 #include <netinet/in.h>
21 #include <netinet/ip.h>
22 #include <netinet/ip_icmp.h>
23 #include <math.h>
24 #ifndef MAXFLOAT
25 #include <float.h>
26 #define MAXFLOAT FLT_MAX
27 #endif
28
29 #include "noit_module.h"
30 #include "noit_check.h"
31 #include "noit_check_tools.h"
32 #include "utils/noit_log.h"
33
34 #define PING_INTERVAL 2000 /* 2000ms = 2s */
35 #define PING_COUNT    5
36
37 struct check_info {
38   int check_no;
39   int check_seq_no;
40   int seq;
41   int32_t expected_count;
42   float *turnaround;
43   eventer_t timeout_event;
44 };
45 struct ping_payload {
46   uuid_t checkid;
47   u_int32_t generation;   
48   struct timeval whence;
49   int    check_no;
50   int    check_pack_no;
51   int    check_pack_cnt;
52 };
53 struct ping_closure {
54   noit_module_t *self;
55   noit_check_t *check;
56   void *payload;
57   int payload_len;
58 };
59 static noit_log_stream_t nlerr = NULL;
60 static noit_log_stream_t nldeb = NULL;
61 static int in_cksum(u_short *addr, int len);
62
63 typedef struct  {
64   int ipv4_fd;
65   int ipv6_fd;
66 } ping_icmp_data_t;
67
68 static int ping_icmp_config(noit_module_t *self, noit_hash_table *options) {
69   return 0;
70 }
71 static int ping_icmp_is_complete(noit_module_t *self, noit_check_t *check) {
72   int i;
73   struct check_info *data;
74   data = (struct check_info *)check->closure;
75   for(i=0; i<data->expected_count; i++)
76     if(data->turnaround[i] == 0.0) {
77       noitL(nldeb, "ping_icmp: %s %d is still outstanding.\n",
78             check->target, i);
79       return 0;
80     }
81   return 1;
82 }
83 static void ping_icmp_log_results(noit_module_t *self, noit_check_t *check) {
84   struct check_info *data;
85   double avail, min = MAXFLOAT, max = 0.0, avg = 0.0, cnt;
86   int i, points = 0;
87   char human_buffer[256];
88   stats_t current;
89   struct timeval duration;
90
91   noit_check_stats_clear(&current);
92
93   data = (struct check_info *)check->closure;
94   for(i=0; i<data->expected_count; i++) {
95     if(data->turnaround[i] != 0) {
96       points++;
97       avg += data->turnaround[i];
98       if(data->turnaround[i] > max) max = data->turnaround[i];
99       if(data->turnaround[i] < min) min = data->turnaround[i];
100     }
101   }
102   if(points == 0) {
103     min = 0.0 / 0.0;
104     max = 0.0 / 0.0;
105   }
106   cnt = data->expected_count;
107   avail = (float)points /cnt;
108   avg /= (float)points;
109
110   snprintf(human_buffer, sizeof(human_buffer),
111            "cnt=%d,avail=%0.0f,min=%0.4f,max=%0.4f,avg=%0.4f",
112            (int)cnt, 100.0*avail, min, max, avg);
113   noitL(nldeb, "ping_icmp(%s) [%s]\n", check->target, human_buffer);
114
115   gettimeofday(&current.whence, NULL);
116   sub_timeval(current.whence, check->last_fire_time, &duration);
117   current.duration = duration.tv_sec * 1000 + duration.tv_usec / 1000;
118   current.available = (avail > 0.0) ? NP_AVAILABLE : NP_UNAVAILABLE;
119   current.state = (avail < 1.0) ? NP_BAD : NP_GOOD;
120   current.status = human_buffer;
121   noit_stats_set_metric(&current, "count",
122                         METRIC_INT32, &data->expected_count);
123   avail *= 100.0;
124   noit_stats_set_metric(&current, "available", METRIC_DOUBLE, &avail);
125   noit_stats_set_metric(&current, "minimum",
126                         METRIC_DOUBLE, avail > 0.0 ? &min : NULL);
127   noit_stats_set_metric(&current, "maximum",
128                         METRIC_DOUBLE, avail > 0.0 ? &max : NULL);
129   noit_stats_set_metric(&current, "average",
130                         METRIC_DOUBLE, avail > 0.0 ? &avg : NULL);
131   noit_check_set_stats(self, check, &current);
132 }
133 static int ping_icmp_timeout(eventer_t e, int mask,
134                              void *closure, struct timeval *now) {
135   struct ping_closure *pcl = (struct ping_closure *)closure;
136   struct check_info *data;
137   if(!NOIT_CHECK_KILLED(pcl->check) && !NOIT_CHECK_DISABLED(pcl->check)) {
138     ping_icmp_log_results(pcl->self, pcl->check);
139     data = (struct check_info *)pcl->check->closure;
140     data->timeout_event = NULL;
141   }
142   pcl->check->flags &= ~NP_RUNNING;
143   free(pcl);
144   return 0;
145 }
146 static int ping_icmp_handler(eventer_t e, int mask,
147                              void *closure, struct timeval *now) {
148   noit_module_t *self = (noit_module_t *)closure;
149   struct check_info *data;
150   char packet[1500];
151   int packet_len = sizeof(packet);
152   union {
153    struct sockaddr_in  in4;
154    struct sockaddr_in6 in6;
155   } from;
156   unsigned int from_len;
157   struct ip *ip = (struct ip *)packet;
158   struct icmp *icp;
159   struct ping_payload *payload;
160
161   while(1) {
162     int inlen, iphlen;
163     noit_check_t *check;
164     struct timeval tt;
165
166     from_len = sizeof(from);
167
168     inlen = recvfrom(e->fd, packet, packet_len, 0,
169                      (struct sockaddr *)&from, &from_len);
170     gettimeofday(now, NULL); /* set it, as we care about accuracy */
171
172     if(inlen < 0) {
173       if(errno == EAGAIN || errno == EINTR) break;
174       noitLT(nlerr, now, "ping_icmp recvfrom: %s\n", strerror(errno));
175       break;
176     }
177     iphlen = ip->ip_hl << 2;
178     if((inlen-iphlen) != (sizeof(struct icmp)+sizeof(struct ping_payload))) {
179       noitLT(nldeb, now,
180              "ping_icmp bad size: %d+%d\n", iphlen, inlen-iphlen);
181       continue;
182     }
183     icp = (struct icmp *)(packet + iphlen);
184     payload = (struct ping_payload *)(icp + 1);
185     if(icp->icmp_type != ICMP_ECHOREPLY) {
186       continue;
187     }
188     if(icp->icmp_id != (((vpsized_uint)self) & 0xffff)) {
189       noitLT(nlerr, now,
190                "ping_icmp not sent from this instance (%d:%d) vs. %lu\n",
191                icp->icmp_id, ntohs(icp->icmp_seq),
192                (unsigned long)(((vpsized_uint)self) & 0xffff));
193       continue;
194     }
195     check = noit_poller_lookup(payload->checkid);
196     /* make sure this check is from this generation! */
197     if(!check) {
198       char uuid_str[37];
199       uuid_unparse_lower(payload->checkid, uuid_str);
200       noitLT(nlerr, now,
201              "ping_icmp response for unknown check '%s'\n", uuid_str);
202       continue;
203     }
204     if(check->generation != payload->generation) {
205       noitLT(nldeb, now,
206              "ping_icmp response in generation gap\n");
207       continue;
208     }
209     data = (struct check_info *)check->closure;
210
211     /* If there is no timeout_event, the check must have completed.
212      * We have nothing to do. */
213     if(!data->timeout_event) continue;
214
215     /* Sanity check the payload */
216     if(payload->check_no != data->check_no) continue;
217     if(payload->check_pack_cnt != data->expected_count) continue;
218     if(payload->check_pack_no < 0 ||
219        payload->check_pack_no >= data->expected_count) continue;
220
221     sub_timeval(*now, payload->whence, &tt);
222     data->turnaround[payload->check_pack_no] =
223       (float)tt.tv_sec + (float)tt.tv_usec / 1000000.0;
224     if(ping_icmp_is_complete(self, check)) {
225       ping_icmp_log_results(self, check);
226       eventer_remove(data->timeout_event);
227       free(data->timeout_event->closure);
228       eventer_free(data->timeout_event);
229       data->timeout_event = NULL;
230       check->flags &= ~NP_RUNNING;
231     }
232   }
233   return EVENTER_READ;
234 }
235
236 static int ping_icmp_init(noit_module_t *self) {
237   socklen_t on;
238   struct protoent *proto;
239   ping_icmp_data_t *data;
240
241   data = malloc(sizeof(*data));
242   data->ipv4_fd = data->ipv6_fd = -1;
243
244   if ((proto = getprotobyname("icmp")) == NULL) {
245     noitL(noit_error, "Couldn't find 'icmp' protocol\n");
246     return -1;
247   }
248
249   data->ipv4_fd = socket(AF_INET, SOCK_RAW, proto->p_proto);
250   if(data->ipv4_fd < 0) {
251     noitL(noit_error, "ping_icmp: socket failed: %s\n",
252           strerror(errno));
253   }
254   else {
255     socklen_t slen = sizeof(on);
256     if(getsockopt(data->ipv4_fd, SOL_SOCKET, SO_SNDBUF, &on, &slen) == 0) {
257       while(on < (1 << 20)) {
258         on <<= 1;
259         if(setsockopt(data->ipv4_fd, SOL_SOCKET, SO_SNDBUF,
260                       &on, sizeof(on)) != 0) {
261           on >>= 1;
262           break;
263         }
264       }
265       noitL(noit_error, "ping_icmp: send buffer set to %d\n", on);
266     }
267     else
268       noitL(noit_error, "Cannot get sndbuf size: %s\n", strerror(errno));
269
270     on = 1;
271     if(ioctl(data->ipv4_fd, FIONBIO, &on)) {
272       close(data->ipv4_fd);
273       data->ipv4_fd = -1;
274       noitL(noit_error,
275             "ping_icmp: could not set socket non-blocking: %s\n",
276             strerror(errno));
277     }
278   }
279   if(data->ipv4_fd >= 0) {
280     eventer_t newe;
281     newe = eventer_alloc();
282     newe->fd = data->ipv4_fd;
283     newe->mask = EVENTER_READ;
284     newe->callback = ping_icmp_handler;
285     newe->closure = self;
286     eventer_add(newe);
287   }
288
289   data->ipv6_fd = socket(AF_INET6, SOCK_RAW, proto->p_proto);
290   if(data->ipv6_fd < 0) {
291     noitL(noit_error, "ping_icmp: socket failed: %s\n",
292           strerror(errno));
293   }
294   else {
295     on = 1;
296     if(ioctl(data->ipv6_fd, FIONBIO, &on)) {
297       close(data->ipv6_fd);
298       data->ipv6_fd = -1;
299       noitL(noit_error,
300             "ping_icmp: could not set socket non-blocking: %s\n",
301                strerror(errno));
302     }
303   }
304   if(data->ipv6_fd >= 0) {
305     eventer_t newe;
306     newe = eventer_alloc();
307     newe->fd = data->ipv6_fd;
308     newe->mask = EVENTER_READ;
309     newe->callback = ping_icmp_handler;
310     newe->closure = self;
311     eventer_add(newe);
312   }
313
314   noit_module_set_userdata(self, data);
315   return 0;
316 }
317
318 static int ping_icmp_real_send(eventer_t e, int mask,
319                                void *closure, struct timeval *now) {
320   struct ping_closure *pcl = (struct ping_closure *)closure;
321   struct icmp *icp;
322   struct ping_payload *payload;
323   ping_icmp_data_t *data;
324   int i;
325
326   noitLT(nldeb, now, "ping_icmp_real_send(%s)\n", pcl->check->target);
327   data = noit_module_get_userdata(pcl->self);
328   icp = (struct icmp *)pcl->payload;
329   payload = (struct ping_payload *)(icp + 1);
330   gettimeofday(&payload->whence, NULL); /* now isn't accurate enough */
331   icp->icmp_cksum = in_cksum(pcl->payload, pcl->payload_len);
332   if(pcl->check->target_family == AF_INET) {
333     struct sockaddr_in sin;
334     memset(&sin, 0, sizeof(sin));
335     sin.sin_family = AF_INET;
336     memcpy(&sin.sin_addr,
337            &pcl->check->target_addr.addr, sizeof(sin.sin_addr));
338     i = sendto(data->ipv4_fd,
339                pcl->payload, pcl->payload_len, 0,
340                (struct sockaddr *)&sin, sizeof(sin));
341   }
342   else {
343     struct sockaddr_in6 sin;
344     memset(&sin, 0, sizeof(sin));
345     sin.sin6_family = AF_INET6;
346     memcpy(&sin.sin6_addr,
347            &pcl->check->target_addr.addr6, sizeof(sin.sin6_addr));
348     i = sendto(data->ipv6_fd,
349                pcl->payload, pcl->payload_len, 0,
350                (struct sockaddr *)&sin, sizeof(sin));
351   }
352   if(i != pcl->payload_len) {
353     noitLT(nlerr, now, "Error sending ICMP packet to %s: %s\n",
354              pcl->check->target, strerror(errno));
355   }
356   free(pcl->payload);
357   free(pcl);
358   return 0;
359 }
360 static void ping_check_cleanup(noit_module_t *self, noit_check_t *check) {
361   struct check_info *ci = (struct check_info *)check->closure;
362   if(ci) {
363     if(ci->timeout_event) {
364       eventer_remove(ci->timeout_event);
365       free(ci->timeout_event->closure);
366       eventer_free(ci->timeout_event);
367       ci->timeout_event = NULL;
368     }
369     if(ci->turnaround) free(ci->turnaround);
370   }
371 }
372 static int ping_icmp_send(noit_module_t *self, noit_check_t *check) {
373   struct timeval when, p_int;
374   struct icmp *icp;
375   struct ping_payload *payload;
376   struct ping_closure *pcl;
377   struct check_info *ci = (struct check_info *)check->closure;
378   int packet_len, i;
379   eventer_t newe;
380   const char *config_val;
381
382   int interval = PING_INTERVAL;
383   int count = PING_COUNT;
384   if(noit_hash_retrieve(check->config, "interval", strlen("interval"),
385                         (void **)&config_val))
386     interval = atoi(config_val);
387   if(noit_hash_retrieve(check->config, "count", strlen("count"),
388                         (void **)&config_val))
389     count = atoi(config_val);
390
391   check->flags |= NP_RUNNING;
392   noitL(nldeb, "ping_icmp_send(%p,%s,%d,%d)\n",
393         self, check->target, interval, count);
394
395   /* remove a timeout if we still have one -- we should unless someone
396    * has set a lower timeout than the period.
397    */
398   if(ci->timeout_event) {
399     eventer_remove(ci->timeout_event);
400     free(ci->timeout_event->closure);
401     eventer_free(ci->timeout_event);
402     ci->timeout_event = NULL;
403   }
404
405   gettimeofday(&when, NULL);
406   memcpy(&check->last_fire_time, &when, sizeof(when));
407
408   /* Setup some stuff used in the loop */
409   p_int.tv_sec = interval / 1000;
410   p_int.tv_usec = (interval % 1000) * 1000;
411   packet_len = sizeof(*icp) + sizeof(*payload);
412
413   /* Prep holding spots for return info */
414   ci->expected_count = count;
415   if(ci->turnaround) free(ci->turnaround);
416   ci->turnaround = calloc(count, sizeof(*ci->turnaround));
417
418   ++ci->check_no;
419   for(i=0; i<count; i++) {
420     newe = eventer_alloc();
421     newe->callback = ping_icmp_real_send;
422     newe->mask = EVENTER_TIMER;
423     memcpy(&newe->whence, &when, sizeof(when));
424     add_timeval(when, p_int, &when); /* Next one is a bit later */
425
426     icp = calloc(1,packet_len);
427     payload = (struct ping_payload *)(icp + 1);
428
429     icp->icmp_type = ICMP_ECHO;
430     icp->icmp_code = 0;
431     icp->icmp_cksum = 0;
432     icp->icmp_seq = htons(ci->seq++);
433     icp->icmp_id = (((vpsized_uint)self) & 0xffff);
434
435     uuid_copy(payload->checkid, check->checkid);
436     payload->generation = check->generation;
437     payload->check_no = ci->check_no;
438     payload->check_pack_no = i;
439     payload->check_pack_cnt = count;
440
441     pcl = calloc(1, sizeof(*pcl));
442     pcl->self = self;
443     pcl->check = check;
444     pcl->payload = icp;
445     pcl->payload_len = packet_len;
446
447     newe->closure = pcl;
448     eventer_add(newe);
449   }
450   newe = eventer_alloc();
451   newe->mask = EVENTER_TIMER;
452   gettimeofday(&when, NULL);
453   p_int.tv_sec = check->timeout / 1000;
454   p_int.tv_usec = (check->timeout % 1000) * 1000;
455   add_timeval(when, p_int, &newe->whence);
456   pcl = calloc(1, sizeof(*pcl));
457   pcl->self = self;
458   pcl->check = check;
459   newe->closure = pcl;
460   newe->callback = ping_icmp_timeout;
461   eventer_add(newe);
462   ci->timeout_event = newe;
463
464   return 0;
465 }
466 static int ping_icmp_initiate_check(noit_module_t *self, noit_check_t *check,
467                                     int once, noit_check_t *cause) {
468   if(!check->closure) check->closure = calloc(1, sizeof(struct check_info));
469   INITIATE_CHECK(ping_icmp_send, self, check);
470   return 0;
471 }
472
473 /*
474  *      I N _ C K S U M
475  *          This is from Mike Muuss's Public Domain code.
476  * Checksum routine for Internet Protocol family headers (C Version)
477  *
478  */
479 static int in_cksum(u_short *addr, int len)
480 {
481   register int nleft = len;
482   register u_short *w = addr;
483   register u_short answer;
484   register int sum = 0;
485
486   /*
487    *  Our algorithm is simple, using a 32 bit accumulator (sum),
488    *  we add sequential 16 bit words to it, and at the end, fold
489    *  back all the carry bits from the top 16 bits into the lower
490    *  16 bits.
491    */
492   while( nleft > 1 )  {
493     sum += *w++;
494     nleft -= 2;
495   }
496
497   /* mop up an odd byte, if necessary */
498   if( nleft == 1 ) {
499     u_short  u = 0;
500
501     *(u_char *)(&u) = *(u_char *)w ;
502     sum += u;
503   }
504
505   /*
506    * add back carry outs from top 16 bits to low 16 bits
507    */
508   sum = (sum >> 16) + (sum & 0xffff);  /* add hi 16 to low 16 */
509   sum += (sum >> 16);      /* add carry */
510   answer = ~sum;        /* truncate to 16 bits */
511   return (answer);
512 }
513
514 static int ping_icmp_onload(noit_image_t *self) {
515   nlerr = noit_log_stream_find("error/ping_icmp");
516   nldeb = noit_log_stream_find("debug/ping_icmp");
517   if(!nlerr) nlerr = noit_stderr;
518   if(!nldeb) nldeb = noit_debug;
519   eventer_name_callback("ping_icmp/timeout", ping_icmp_timeout);
520   eventer_name_callback("ping_icmp/handler", ping_icmp_handler);
521   return 0;
522 }
523 #include "ping_icmp.xmlh"
524 noit_module_t ping_icmp = {
525   {
526     NOIT_MODULE_MAGIC,
527     NOIT_MODULE_ABI_VERSION,
528     "ping_icmp",
529     "ICMP based host availability detection",
530     ping_icmp_xml_description,
531     ping_icmp_onload
532   },
533   ping_icmp_config,
534   ping_icmp_init,
535   ping_icmp_initiate_check,
536   ping_check_cleanup
537 };
538
Note: See TracBrowser for help on using the browser.