[Reconnoiter-devel] [reconnoiter commit] Reconnoiter branch, master, updated. e81d0a8dd85a5eec4d37edcbaf8fcc00cf1fadf8

git at labs.omniti.com git at labs.omniti.com
Tue Apr 17 16:59:44 EDT 2012


Pushed by: jesus
The branch, master has been updated
       via  e81d0a8dd85a5eec4d37edcbaf8fcc00cf1fadf8 (commit)
       via  e0bfc4593702aba99402fba2c74753b6ca491d3f (commit)
       via  86e0c6af3530e751a8676f651a2477e300e494f1 (commit)
       via  4053452a227a6b5b378bbdbbe4ac68758d58a385 (commit)
       via  8ebbffb5373202a2665c35c8382f9e9192db3581 (commit)
      from  a3518fe2da96fee5c577d97b77bb7714465165d2 (commit)

Summary of changes:
 src/noit_main.c           |   17 +++++++++++++++++
 src/utils/noit_watchdog.c |   37 ++++++++++++++++++++++++++++++++++++-
 src/utils/noit_watchdog.h |   18 ++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletions(-)

Log:
commit e81d0a8dd85a5eec4d37edcbaf8fcc00cf1fadf8
Merge: a3518fe e0bfc45
Author: Theo Schlossnagle <jesus at lethargy.org>
Date:   Tue Apr 17 13:47:08 2012 -0700

    Merge pull request #61 from TheTeaWeevil/master
    
    Updated noit shutdown software


commit e0bfc4593702aba99402fba2c74753b6ca491d3f
Author: Philip Maddox <pmaddox at circonus.com>
Date:   Tue Apr 17 19:04:28 2012 +0000

    Fixed function header comment for noit_watchdog_start_child

diff --git a/src/utils/noit_watchdog.h b/src/utils/noit_watchdog.h
index 9e9cf5e..4472819 100644
--- a/src/utils/noit_watchdog.h
+++ b/src/utils/noit_watchdog.h
@@ -62,7 +62,7 @@ API_EXPORT(int)
 API_EXPORT(int)
   update_retries(int* offset, time_t times[]);
 
-/*! \fn int noit_watchdog_start_child(const char *app, int (*func)(), int timeout, int retries, int span)
+/*! \fn int noit_watchdog_start_child(const char *app, int (*func)(), int timeout)
     \brief Starts a function as a separate child under close watch.
     \param app The name of the application (for error output).
     \param func The function that will be the child process.

commit 86e0c6af3530e751a8676f651a2477e300e494f1
Author: Philip Maddox <pmaddox at circonus.com>
Date:   Tue Apr 17 18:59:27 2012 +0000

    Removed retries and span from noit_watchdog_start_child and added function to set them in watchdog

diff --git a/src/noit_main.c b/src/noit_main.c
index 32eee60..66fd250 100644
--- a/src/noit_main.c
+++ b/src/noit_main.c
@@ -175,6 +175,8 @@ noit_main(const char *appname,
     span_val = 60;
   }
 
+  noit_watchdog_ratelimit(retry_val, span_val);
+
   /* Lastly, run through all other system inits */
   snprintf(appscratch, sizeof(appscratch), "/%s/eventer/@implementation", appname);
   if(!noit_conf_get_stringbuf(NULL, appscratch, conf_str, sizeof(conf_str))) {
@@ -241,5 +243,5 @@ noit_main(const char *appname,
   }
 
   signal(SIGHUP, SIG_IGN);
-  return noit_watchdog_start_child("noitd", passed_child_main, watchdog_timeout, retry_val, span_val);
+  return noit_watchdog_start_child("noitd", passed_child_main, watchdog_timeout);
 }
diff --git a/src/utils/noit_watchdog.c b/src/utils/noit_watchdog.c
index 292f670..1f7b2c2 100644
--- a/src/utils/noit_watchdog.c
+++ b/src/utils/noit_watchdog.c
@@ -53,6 +53,8 @@
 const char *appname = "unknown";
 const char *glider_path = NULL;
 const char *trace_dir = "/var/tmp";
+int retries = 5;
+int span = 60;
 
 void noit_watchdog_glider(const char *path) {
   glider_path = path;
@@ -62,6 +64,10 @@ void noit_watchdog_glider(const char *path) {
 void noit_watchdog_glider_trace_dir(const char *path) {
   trace_dir = path;
 }
+void noit_watchdog_ratelimit(int retry_val, int span_val) {
+    retries = retry_val;
+    span = span_val;
+}
 
 /* Watchdog stuff */
 static int *lifeline = NULL;
@@ -120,9 +126,7 @@ void glideme(int sig) {
 }
 
 int noit_watchdog_start_child(const char *app, int (*func)(),
-                              int child_watchdog_timeout, 
-                              int retries, 
-                              int span) {
+                              int child_watchdog_timeout) {
   int child_pid;
   time_t time_data[retries];
   int offset = 0;
@@ -166,7 +170,7 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
           int quit;
           sig = WTERMSIG(status);
           exit_val = WEXITSTATUS(status);
-          quit = update_retries(retries, span, &offset, time_data);
+          quit = update_retries(&offset, time_data);
           if (quit) {
             noitL(noit_error, "noit exceeded retry limit of %d retries in %d seconds... exiting...\n", retries, span);
             exit(0);
@@ -197,7 +201,7 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
   }
 }
 
-int update_retries(int retries, int span, int* offset, time_t times[]) {
+int update_retries(int* offset, time_t times[]) {
   int i;
   time_t currtime = time(NULL);
   time_t cutoff = currtime - span;
diff --git a/src/utils/noit_watchdog.h b/src/utils/noit_watchdog.h
index 58250c8..9e9cf5e 100644
--- a/src/utils/noit_watchdog.h
+++ b/src/utils/noit_watchdog.h
@@ -50,8 +50,6 @@ API_EXPORT(int)
 
 /*! \fn int update_retries(int retries, int span, retry_data** data)
     \brief Updates the list of retries and signals to quit if the limit is exceeded
-    \param retries The number of times to attempt to restart the task with a certain span of time
-    \param span The amount of time in seconds to measure attempts to restart the task over
     \param offset The current location in the data array to place the new time in
     \param times An array of times used to determine if there have been too many restarts
     \return Returns 1 to signal a quit, 0 otherwise
@@ -62,22 +60,20 @@ API_EXPORT(int)
  */
 
 API_EXPORT(int)
-  update_retries(int retries, int span, int* offset, time_t times[]);
+  update_retries(int* offset, time_t times[]);
 
 /*! \fn int noit_watchdog_start_child(const char *app, int (*func)(), int timeout, int retries, int span)
     \brief Starts a function as a separate child under close watch.
     \param app The name of the application (for error output).
     \param func The function that will be the child process.
     \param timeout The number of seconds of lifelessness before the parent reaps and restarts the child.
-    \param retries The number of times to attempt to restart the task with a certain span of time
-    \param span The amount of time in seconds to measure attempts to restart the task over
     \return Returns on program termination.
 .
     
     noit_watchdog_start_child will fork and run the specified function in the child process.  The parent will watch.  The child process must initialize the eventer system and then call noit_watchdog_child_hearbeat to let the parent know it is alive.  If the eventer system is being used to drive the child process, noit_watchdog_child_eventer_heartbeat may be called once after the eventer is initalized.  This will induce a regular heartbeat.
  */
 API_EXPORT(int)
-  noit_watchdog_start_child(const char *app, int (*func)(), int timeout, int retries, int span);
+  noit_watchdog_start_child(const char *app, int (*func)(), int timeout);
 
 /*! \fn int noit_watchdog_child_heartbeat()
     \return Returns zero on success
@@ -101,4 +97,7 @@ API_EXPORT(void)
 API_EXPORT(void)
   noit_watchdog_glider_trace_dir(const char *path);
 
+API_EXPORT(void)
+  noit_watchdog_ratelimit(int retry_val, int span_val);
+
 #endif

commit 4053452a227a6b5b378bbdbbe4ac68758d58a385
Author: Philip Maddox <pmaddox at circonus.com>
Date:   Tue Apr 17 18:06:17 2012 +0000

    Updated retry limit on noit to simplify it significantly

diff --git a/src/noit_main.c b/src/noit_main.c
index d9a812b..32eee60 100644
--- a/src/noit_main.c
+++ b/src/noit_main.c
@@ -124,10 +124,9 @@ noit_main(const char *appname,
   char appscratch[1024];
   char *glider = (char *)_glider;
   char *watchdog_timeout_str;
-  char *retries = NULL;
-  char *span = NULL;
-  int retry_val = 5;
-  int span_val = 60;
+  int retry_val;
+  int span_val;
+  int ret;
   
    
   /* First initialize logging, so we can log errors */
@@ -166,14 +165,14 @@ noit_main(const char *appname,
   if(trace_dir) noit_watchdog_glider_trace_dir(trace_dir);
 
   snprintf(appscratch, sizeof(appscratch), "/%s/watchdog/@retries", appname);
-  noit_conf_get_string(NULL, appscratch, &retries);
-  if(retries) {
-    retry_val = atoi(retries);
+  ret = noit_conf_get_int(NULL, appscratch, &retry_val);
+  if((ret == 0) || (retry_val == 0)){
+    retry_val = 5;
   }
   snprintf(appscratch, sizeof(appscratch), "/%s/watchdog/@span", appname);
-  noit_conf_get_string(NULL, appscratch, &span);
-  if(span) {
-    span_val = atoi(span);
+  ret = noit_conf_get_int(NULL, appscratch, &span_val);
+  if((ret == 0) || (span_val == 0)){
+    span_val = 60;
   }
 
   /* Lastly, run through all other system inits */
diff --git a/src/utils/noit_watchdog.c b/src/utils/noit_watchdog.c
index 312feb7..292f670 100644
--- a/src/utils/noit_watchdog.c
+++ b/src/utils/noit_watchdog.c
@@ -124,7 +124,11 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
                               int retries, 
                               int span) {
   int child_pid;
-  retry_data* retry_head = NULL;
+  time_t time_data[retries];
+  int offset = 0;
+
+  memset(time_data, 0, sizeof(time_data));
+
   appname = strdup(app);
   if(child_watchdog_timeout == 0)
     child_watchdog_timeout = CHILD_WATCHDOG_TIMEOUT;
@@ -162,7 +166,7 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
           int quit;
           sig = WTERMSIG(status);
           exit_val = WEXITSTATUS(status);
-          quit = update_retries(retries, span, &retry_head);
+          quit = update_retries(retries, span, &offset, time_data);
           if (quit) {
             noitL(noit_error, "noit exceeded retry limit of %d retries in %d seconds... exiting...\n", retries, span);
             exit(0);
@@ -193,43 +197,21 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
   }
 }
 
-int update_retries(int retries, int span, retry_data** data) {
-  int count = 0;
-  retry_data* iter;
-  retry_data* prev = NULL;
-  retry_data* new_data = NULL;
-  retry_data* temp = NULL;
-  time_t curr_time = time(NULL);
-
-  /* Allocate the new entry and set it to the head of the list */
-  new_data = (retry_data*)malloc(sizeof(retry_data));
-  new_data->event_time = curr_time;
-  new_data->next = *data;
-  *data = new_data;
-
-  /* We always want to count the first one, so start on the second element */
-  count = 1;
-  iter = (retry_data*)new_data->next;
-  prev = new_data;
-
-  while (iter != NULL) {
-    int diff = curr_time - iter->event_time;
-    if (diff <= span) { /* Count it, since it's not too old */
-      prev = iter;
-      iter = (retry_data*)iter->next;
-      count++;
-    }
-    else { /* Remove node */
-      temp = iter;
-      prev->next = iter->next;
-      iter = iter->next;
-      free(temp);
+int update_retries(int retries, int span, int* offset, time_t times[]) {
+  int i;
+  time_t currtime = time(NULL);
+  time_t cutoff = currtime - span;
+
+  times[*offset % retries] = currtime;
+  *offset = *offset + 1;
+
+  for (i=0; i < retries; i++) {
+    if (times[i] < cutoff) {
+      return 0;
     }
   }
-  if (count >= retries) {
-    return 1;
-  }
-  return 0;
+
+  return 1;
 }
 
 static int watchdog_tick(eventer_t e, int mask, void *unused, struct timeval *now) {
diff --git a/src/utils/noit_watchdog.h b/src/utils/noit_watchdog.h
index eecf48d..58250c8 100644
--- a/src/utils/noit_watchdog.h
+++ b/src/utils/noit_watchdog.h
@@ -33,14 +33,10 @@
 #ifndef _NOIT_WATCHDOG_H
 #define _NOIT_WATCHDOG_H
 
+#include <time.h>
 #include "noit_config.h"
 #include "noit_defines.h"
 
-typedef struct{
-    time_t event_time;
-    void* next;
-} __attribute__ ((packed)) retry_data;
-
 /*! \fn int noit_watchdog_prefork_init()
     \brief Prepare the program to split into a child/parent-monitor relationship.
     \return Returns zero on success.
@@ -56,7 +52,8 @@ API_EXPORT(int)
     \brief Updates the list of retries and signals to quit if the limit is exceeded
     \param retries The number of times to attempt to restart the task with a certain span of time
     \param span The amount of time in seconds to measure attempts to restart the task over
-    \param data A pointer to the list of event data
+    \param offset The current location in the data array to place the new time in
+    \param times An array of times used to determine if there have been too many restarts
     \return Returns 1 to signal a quit, 0 otherwise
 
 .
@@ -65,7 +62,7 @@ API_EXPORT(int)
  */
 
 API_EXPORT(int)
-  update_retries(int retries, int span, retry_data** data);
+  update_retries(int retries, int span, int* offset, time_t times[]);
 
 /*! \fn int noit_watchdog_start_child(const char *app, int (*func)(), int timeout, int retries, int span)
     \brief Starts a function as a separate child under close watch.

commit 8ebbffb5373202a2665c35c8382f9e9192db3581
Author: Philip Maddox <pmaddox at circonus.com>
Date:   Tue Apr 17 15:46:32 2012 +0000

    Added retry limit for noit_watchdog to prevent infinite spawning

diff --git a/src/noit_main.c b/src/noit_main.c
index aa99a3a..d9a812b 100644
--- a/src/noit_main.c
+++ b/src/noit_main.c
@@ -124,6 +124,11 @@ noit_main(const char *appname,
   char appscratch[1024];
   char *glider = (char *)_glider;
   char *watchdog_timeout_str;
+  char *retries = NULL;
+  char *span = NULL;
+  int retry_val = 5;
+  int span_val = 60;
+  
    
   /* First initialize logging, so we can log errors */
   noit_log_init();
@@ -160,6 +165,17 @@ noit_main(const char *appname,
   noit_conf_get_string(NULL, appscratch, &trace_dir);
   if(trace_dir) noit_watchdog_glider_trace_dir(trace_dir);
 
+  snprintf(appscratch, sizeof(appscratch), "/%s/watchdog/@retries", appname);
+  noit_conf_get_string(NULL, appscratch, &retries);
+  if(retries) {
+    retry_val = atoi(retries);
+  }
+  snprintf(appscratch, sizeof(appscratch), "/%s/watchdog/@span", appname);
+  noit_conf_get_string(NULL, appscratch, &span);
+  if(span) {
+    span_val = atoi(span);
+  }
+
   /* Lastly, run through all other system inits */
   snprintf(appscratch, sizeof(appscratch), "/%s/eventer/@implementation", appname);
   if(!noit_conf_get_stringbuf(NULL, appscratch, conf_str, sizeof(conf_str))) {
@@ -226,5 +242,5 @@ noit_main(const char *appname,
   }
 
   signal(SIGHUP, SIG_IGN);
-  return noit_watchdog_start_child("noitd", passed_child_main, watchdog_timeout);
+  return noit_watchdog_start_child("noitd", passed_child_main, watchdog_timeout, retry_val, span_val);
 }
diff --git a/src/utils/noit_watchdog.c b/src/utils/noit_watchdog.c
index 749041e..312feb7 100644
--- a/src/utils/noit_watchdog.c
+++ b/src/utils/noit_watchdog.c
@@ -40,6 +40,7 @@
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <signal.h>
+#include <time.h>
 #ifdef HAVE_SYS_WAIT_H
 #include <sys/wait.h>
 #endif
@@ -119,8 +120,11 @@ void glideme(int sig) {
 }
 
 int noit_watchdog_start_child(const char *app, int (*func)(),
-                              int child_watchdog_timeout) {
+                              int child_watchdog_timeout, 
+                              int retries, 
+                              int span) {
   int child_pid;
+  retry_data* retry_head = NULL;
   appname = strdup(app);
   if(child_watchdog_timeout == 0)
     child_watchdog_timeout = CHILD_WATCHDOG_TIMEOUT;
@@ -155,9 +159,15 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
         }
         else if (rv == child_pid) {
           /* We died!... we need to relaunch, unless the status was a requested exit (2) */
+          int quit;
           sig = WTERMSIG(status);
           exit_val = WEXITSTATUS(status);
-          if(sig == SIGINT || sig == SIGQUIT ||
+          quit = update_retries(retries, span, &retry_head);
+          if (quit) {
+            noitL(noit_error, "noit exceeded retry limit of %d retries in %d seconds... exiting...\n", retries, span);
+            exit(0);
+          }
+          else if(sig == SIGINT || sig == SIGQUIT ||
              (sig == 0 && (exit_val == 2 || exit_val < 0))) {
             noitL(noit_error, "%s shutdown acknowledged.\n", app);
             exit(0);
@@ -183,6 +193,45 @@ int noit_watchdog_start_child(const char *app, int (*func)(),
   }
 }
 
+int update_retries(int retries, int span, retry_data** data) {
+  int count = 0;
+  retry_data* iter;
+  retry_data* prev = NULL;
+  retry_data* new_data = NULL;
+  retry_data* temp = NULL;
+  time_t curr_time = time(NULL);
+
+  /* Allocate the new entry and set it to the head of the list */
+  new_data = (retry_data*)malloc(sizeof(retry_data));
+  new_data->event_time = curr_time;
+  new_data->next = *data;
+  *data = new_data;
+
+  /* We always want to count the first one, so start on the second element */
+  count = 1;
+  iter = (retry_data*)new_data->next;
+  prev = new_data;
+
+  while (iter != NULL) {
+    int diff = curr_time - iter->event_time;
+    if (diff <= span) { /* Count it, since it's not too old */
+      prev = iter;
+      iter = (retry_data*)iter->next;
+      count++;
+    }
+    else { /* Remove node */
+      temp = iter;
+      prev->next = iter->next;
+      iter = iter->next;
+      free(temp);
+    }
+  }
+  if (count >= retries) {
+    return 1;
+  }
+  return 0;
+}
+
 static int watchdog_tick(eventer_t e, int mask, void *unused, struct timeval *now) {
   it_ticks();
   return 0;
diff --git a/src/utils/noit_watchdog.h b/src/utils/noit_watchdog.h
index 25c2898..eecf48d 100644
--- a/src/utils/noit_watchdog.h
+++ b/src/utils/noit_watchdog.h
@@ -36,6 +36,11 @@
 #include "noit_config.h"
 #include "noit_defines.h"
 
+typedef struct{
+    time_t event_time;
+    void* next;
+} __attribute__ ((packed)) retry_data;
+
 /*! \fn int noit_watchdog_prefork_init()
     \brief Prepare the program to split into a child/parent-monitor relationship.
     \return Returns zero on success.
@@ -47,18 +52,35 @@ child to instrument watchdogs.
 API_EXPORT(int)
   noit_watchdog_prefork_init();
 
-/*! \fn int noit_watchdog_start_child(const char *app, int (*func)(), int timeout)
+/*! \fn int update_retries(int retries, int span, retry_data** data)
+    \brief Updates the list of retries and signals to quit if the limit is exceeded
+    \param retries The number of times to attempt to restart the task with a certain span of time
+    \param span The amount of time in seconds to measure attempts to restart the task over
+    \param data A pointer to the list of event data
+    \return Returns 1 to signal a quit, 0 otherwise
+
+.
+
+    update_retries will iterate through a list of times the task has restarted. If it determines that the system has been restarted too many times in too short a period, it will return 1 and reconnoiter will terminate. Otherwise, it will return 0 and reconnoiter will restart.
+ */
+
+API_EXPORT(int)
+  update_retries(int retries, int span, retry_data** data);
+
+/*! \fn int noit_watchdog_start_child(const char *app, int (*func)(), int timeout, int retries, int span)
     \brief Starts a function as a separate child under close watch.
     \param app The name of the application (for error output).
     \param func The function that will be the child process.
     \param timeout The number of seconds of lifelessness before the parent reaps and restarts the child.
+    \param retries The number of times to attempt to restart the task with a certain span of time
+    \param span The amount of time in seconds to measure attempts to restart the task over
     \return Returns on program termination.
 .
     
     noit_watchdog_start_child will fork and run the specified function in the child process.  The parent will watch.  The child process must initialize the eventer system and then call noit_watchdog_child_hearbeat to let the parent know it is alive.  If the eventer system is being used to drive the child process, noit_watchdog_child_eventer_heartbeat may be called once after the eventer is initalized.  This will induce a regular heartbeat.
  */
 API_EXPORT(int)
-  noit_watchdog_start_child(const char *app, int (*func)(), int timeout);
+  noit_watchdog_start_child(const char *app, int (*func)(), int timeout, int retries, int span);
 
 /*! \fn int noit_watchdog_child_heartbeat()
     \return Returns zero on success




hooks/post-receive
-- 
Reconnoiter


More information about the Reconnoiter-devel mailing list