root/src/utils/noit_watchdog.c

Revision 4790fc84757a210ff4aed6895cac2729f4a3e497, 4.8 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 4 years ago)

Cleanup and make sure Solaris Sun Studio compilers get -mt and POSIX thread semantics or things will go very wrong, refs #34

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2007-2009, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  *     * Redistributions of source code must retain the above copyright
10  *       notice, this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
16  *       of its contributors may be used to endorse or promote products
17  *       derived from this software without specific prior written
18  *       permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 #include "noit_defines.h"
33
34 #include <assert.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <sys/ioctl.h>
40 #include <fcntl.h>
41 #include <sys/mman.h>
42 #include <signal.h>
43 #ifdef HAVE_SYS_WAIT_H
44 #include <sys/wait.h>
45 #endif
46
47 #include "eventer/eventer.h"
48 #include "utils/noit_log.h"
49 #include "utils/noit_watchdog.h"
50
51 #define CHILD_WATCHDOG_TIMEOUT 5 /*seconds*/
52
53 /* Watchdog stuff */
54 static int *lifeline = NULL;
55 static unsigned long last_tick_time() {
56   static struct timeval lastchange = { 0, 0 };
57   static int lastcheck = 0;
58   struct timeval now, diff;
59
60   gettimeofday(&now, NULL);
61   if(lastcheck != *lifeline) {
62     lastcheck = *lifeline;
63     memcpy(&lastchange, &now, sizeof(lastchange));
64   }
65   sub_timeval(now, lastchange, &diff);
66   return (unsigned long)diff.tv_sec;
67 }
68 static void it_ticks() {
69   (*lifeline)++;
70 }
71 int noit_watchdog_child_heartbeat() {
72   it_ticks();
73   return 0;
74 }
75 int noit_watchdog_prefork_init() {
76   lifeline = (int *)mmap(NULL, sizeof(int), PROT_READ|PROT_WRITE,
77                          MAP_SHARED|MAP_ANON, -1, 0);
78   if(lifeline == (void *)-1) {
79     noitL(noit_error, "Failed to mmap anon for watchdog\n");
80     return -1;
81   }
82   return 0;
83 }
84
85 int noit_watchdog_start_child(const char *app, int (*func)(),
86                               int child_watchdog_timeout) {
87   int child_pid;
88   if(child_watchdog_timeout == 0)
89     child_watchdog_timeout = CHILD_WATCHDOG_TIMEOUT;
90   while(1) {
91     child_pid = fork();
92     if(child_pid == -1) {
93       noitL(noit_error, "fork failed: %s\n", strerror(errno));
94       exit(-1);
95     }
96     if(child_pid == 0) {
97       /* This sets up things so we start alive */
98       it_ticks();
99       /* run the program */
100       exit(func());
101     }
102     else {
103       int sig = -1, exit_val = -1;
104       while(1) {
105         unsigned long ltt;
106         int status, rv;
107         sleep(1); /* Just check child status every second */
108         rv = waitpid(child_pid, &status, WNOHANG);
109         if(rv == 0) {
110           /* Nothing */
111         }
112         else if (rv == child_pid) {
113           /* We died!... we need to relaunch, unless the status was a requested exit (2) */
114           sig = WTERMSIG(status);
115           exit_val = WEXITSTATUS(status);
116           if(sig == SIGINT || sig == SIGQUIT ||
117              (sig == 0 && (exit_val == 2 || exit_val < 0))) {
118             noitL(noit_error, "%s shutdown acknowledged.\n", app);
119             exit(0);
120           }
121           break;
122         }
123         else {
124           noitL(noit_error, "Unexpected return from waitpid: %d\n", rv);
125           exit(-1);
126         }
127         /* Now check out timeout */
128         if((ltt = last_tick_time()) > child_watchdog_timeout) {
129           noitL(noit_error,
130                 "Watchdog timeout (%lu s)... terminating child\n",
131                 ltt);
132           kill(child_pid, SIGKILL);
133         }
134       }
135       noitL(noit_error, "%s child died [%d/%d], restarting.\n",
136             app, exit_val, sig);
137     }
138   }
139 }
140
141 static int watchdog_tick(eventer_t e, int mask, void *unused, struct timeval *now) {
142   it_ticks();
143   return 0;
144 }
145 int noit_watchdog_child_eventer_heartbeat() {
146   eventer_t e;
147
148   assert(__eventer);
149
150   /* Setup our hearbeat */
151   e = eventer_alloc();
152   e->mask = EVENTER_RECURRENT;
153   e->callback = watchdog_tick;
154   eventer_add_recurrent(e);
155
156   return 0;
157 }
158
Note: See TracBrowser for help on using the browser.