root/src/modules/selfcheck.c

Revision de520024f8801f5cc271ac9301e66f7c3690558e, 8.1 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 1 year ago)

Don't assert on double runs, just log an error

  • Property mode set to 100644
Line 
1 /*
2  * Copyright (c) 2009, OmniTI Computer Consulting, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  *     * Redistributions of source code must retain the above copyright
10  *       notice, this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *     * Neither the name OmniTI Computer Consulting, Inc. nor the names
16  *       of its contributors may be used to endorse or promote products
17  *       derived from this software without specific prior written
18  *       permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include "noit_defines.h"
34 #include "noit_version.h"
35
36 #include <stdio.h>
37 #include <unistd.h>
38 #include <errno.h>
39 #include <assert.h>
40 #include <math.h>
41
42 #include "noit_module.h"
43 #include "noit_check.h"
44 #include "noit_check_tools.h"
45 #include "noit_jlog_listener.h"
46 #include "utils/noit_log.h"
47 #include "utils/noit_hash.h"
48
49 typedef struct {
50   noit_module_t *self;
51   noit_check_t *check;
52   noit_hash_table attrs;
53   size_t logsize;
54   int timed_out;
55 } selfcheck_info_t;
56
57 struct threadq_crutch {
58   noit_check_t *check;
59   stats_t *current;
60 };
61
62 static noit_log_stream_t nlerr = NULL;
63 static noit_log_stream_t nldeb = NULL;
64
65 static void selfcheck_cleanup(noit_module_t *self, noit_check_t *check) {
66   selfcheck_info_t *ci = check->closure;
67   if(ci) {
68     noit_check_release_attrs(&ci->attrs);
69     memset(ci, 0, sizeof(*ci));
70   }
71 }
72 static void jobq_thread_helper(eventer_jobq_t *jobq, void *closure) {
73   int s32;
74   char buffer[128];
75   struct threadq_crutch *crutch = (struct threadq_crutch *)closure;
76   s32 = jobq->concurrency;
77   if(s32 == 0) return; /* omit if no concurrency */
78   snprintf(buffer, sizeof(buffer), "%s_threads", jobq->queue_name);
79   noit_stats_set_metric(crutch->check, crutch->current, buffer, METRIC_INT32, &s32);
80 }
81 static int selfcheck_feed_details(jlog_feed_stats_t *s, void *closure) {
82   char buff[256];
83   uint64_t ms;
84   struct timeval now, diff;
85   struct threadq_crutch *crutch = (struct threadq_crutch *)closure;
86   gettimeofday(&now, NULL);
87
88   if(s->last_connection.tv_sec > 0) {
89     sub_timeval(now, s->last_connection, &diff);
90     ms = diff.tv_sec * 1000 + diff.tv_usec / 1000;
91     snprintf(buff, sizeof(buff), "feed`%s`last_connection_ms", s->feed_name);
92     noit_stats_set_metric(crutch->check, crutch->current, buff, METRIC_UINT64, &ms);
93   }
94
95   if(s->last_checkpoint.tv_sec > 0) {
96     sub_timeval(now, s->last_checkpoint, &diff);
97     ms = diff.tv_sec * 1000 + diff.tv_usec / 1000;
98     snprintf(buff, sizeof(buff), "feed`%s`last_checkpoint_ms", s->feed_name);
99     noit_stats_set_metric(crutch->check, crutch->current, buff, METRIC_UINT64, &ms);
100   }
101   return 1;
102 }
103 static void selfcheck_log_results(noit_module_t *self, noit_check_t *check) {
104   char buff[128];
105   u_int64_t u64;
106   int64_t s64;
107   int32_t s32;
108   stats_t current;
109   struct threadq_crutch crutch;
110   struct timeval duration, epoch, diff;
111   selfcheck_info_t *ci = check->closure;
112
113   crutch.check = check;
114   crutch.current = &current;
115   noit_check_stats_clear(check, &current);
116
117   gettimeofday(&current.whence, NULL);
118   sub_timeval(current.whence, check->last_fire_time, &duration);
119   current.duration = duration.tv_sec * 1000 + duration.tv_usec / 1000;
120   current.available = NP_UNAVAILABLE;
121   current.state = NP_BAD;
122   if(ci->timed_out) current.status = "timeout";
123   else {
124     current.available = NP_AVAILABLE;
125     current.state = NP_GOOD;
126     current.status = "ok";
127   }
128   /* Set all the metrics here */
129   s64 = (int64_t)ci->logsize;
130   noit_stats_set_metric(check, &current, "feed_bytes", METRIC_INT64, &s64);
131   s32 = noit_poller_check_count();
132   noit_stats_set_metric(check, &current, "check_cnt", METRIC_INT32, &s32);
133   s32 = noit_poller_transient_check_count();
134   noit_stats_set_metric(check, &current, "transient_cnt", METRIC_INT32, &s32);
135   if(eventer_get_epoch(&epoch)) s64 = 0;
136   else {
137     sub_timeval(current.whence, epoch, &diff);
138     s64 = diff.tv_sec;
139   }
140   noit_stats_set_metric(check, &current, "uptime", METRIC_INT64, &s64);
141   eventer_jobq_process_each(jobq_thread_helper, &crutch);
142   noit_build_version(buff, sizeof(buff));
143   noit_stats_set_metric(check, &current, "version", METRIC_STRING, buff);
144   u64 = noit_check_completion_count();
145   noit_stats_set_metric(check, &current, "checks_run", METRIC_UINT64, &u64);
146   /* feed pull info */
147   noit_jlog_foreach_feed_stats(selfcheck_feed_details, &crutch);
148
149   noit_check_set_stats(check, &current);
150 }
151
152 #define FETCH_CONFIG_OR(key, str) do { \
153   if(!noit_hash_retr_str(check->config, #key, strlen(#key), &key)) \
154     key = str; \
155 } while(0)
156
157 static int selfcheck_log_size(eventer_t e, int mask, void *closure,
158                               struct timeval *now) {
159   selfcheck_info_t *ci = closure;
160   noit_check_t *check = ci->check;
161   const char *feedname;
162   char feedname_buff[128];
163   noit_log_stream_t feed;
164
165   if(mask & (EVENTER_READ | EVENTER_WRITE)) {
166     /* this case is impossible from the eventer.  It is called as
167      * such on the synchronous completion of the event.
168      */
169     selfcheck_log_results(ci->self, ci->check);
170     selfcheck_cleanup(ci->self, ci->check);
171     check->flags &= ~NP_RUNNING;
172     return 0;
173   }
174   switch(mask) {
175     case EVENTER_ASYNCH_WORK:
176       /* Check the length of the log */
177       FETCH_CONFIG_OR(feedname, "feed");
178       noit_check_interpolate(feedname_buff, sizeof(feedname_buff), feedname,
179                              &ci->attrs, check->config);
180       feed = noit_log_stream_find(feedname_buff);
181       if(!feed) ci->logsize = -1;
182       else ci->logsize = noit_log_stream_size(feed);
183       ci->timed_out = 0;
184       return 0;
185       break;
186     case EVENTER_ASYNCH_CLEANUP:
187       /* This sets us up for a completion call. */
188       e->mask = EVENTER_READ | EVENTER_WRITE;
189       break;
190     default:
191       abort();
192   }
193   return 0;
194 }
195
196 static int selfcheck_initiate(noit_module_t *self, noit_check_t *check,
197                               noit_check_t *cause) {
198   selfcheck_info_t *ci = check->closure;
199   struct timeval __now;
200
201   /* We cannot be running */
202   BAIL_ON_RUNNING_CHECK(check);
203   check->flags |= NP_RUNNING;
204
205   ci->self = self;
206   ci->check = check;
207
208   ci->timed_out = 1;
209   noit_check_make_attrs(check, &ci->attrs);
210   gettimeofday(&__now, NULL);
211   memcpy(&check->last_fire_time, &__now, sizeof(__now));
212
213   /* Register a handler for the worker */
214   noit_check_run_full_asynch(check, selfcheck_log_size);
215   return 0;
216 }
217
218 static int selfcheck_initiate_check(noit_module_t *self, noit_check_t *check,
219                                    int once, noit_check_t *cause) {
220   if(!check->closure) check->closure = calloc(1, sizeof(selfcheck_info_t));
221   INITIATE_CHECK(selfcheck_initiate, self, check, cause);
222   return 0;
223 }
224
225 static int selfcheck_onload(noit_image_t *self) {
226   nlerr = noit_log_stream_find("error/selfcheck");
227   nldeb = noit_log_stream_find("debug/selfcheck");
228   if(!nlerr) nlerr = noit_stderr;
229   if(!nldeb) nldeb = noit_debug;
230
231   eventer_name_callback("selfcheck/selfcheck_log_size", selfcheck_log_size);
232   return 0;
233 }
234
235 #include "selfcheck.xmlh"
236 noit_module_t selfcheck = {
237   {
238     NOIT_MODULE_MAGIC,
239     NOIT_MODULE_ABI_VERSION,
240     "selfcheck",
241     "noitd self-checker",
242     selfcheck_xml_description,
243     selfcheck_onload
244   },
245   NULL,
246   NULL,
247   selfcheck_initiate_check,
248   selfcheck_cleanup
249 };
250
Note: See TracBrowser for help on using the browser.