Changeset 200 for trunk/tools

Show
Ignore:
Timestamp:
11/26/10 13:47:56 (3 years ago)
Author:
depesz
Message:

1. add ability to ignore output of checks (to implement cleanup pseudo-checks)
2. add ability to read files directly by system_monitoring.pl, and not by using external 'cat' program

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/tools/system_monitoring.pl

    r199 r200  
    7777        close $fh; 
    7878        delete $C->{ 'input' }; 
     79        return unless 'periodic' eq $C->{ 'type' }; 
    7980        $C->{ 'next_call' } = $self->{ 'current_time' } + $C->{ 'interval' } if $self->{ 'current_time' } < $C->{ 'next_call' }; 
    8081        return; 
    8182    } 
    82     my @lines = split( /\r?\n/, $read_data ); 
     83    return if $C->{ 'ignore' }; 
     84 
    8385    my $line_prefix = strftime( '%Y-%m-%d %H:%M:%S %Z | ', localtime( $self->{ 'current_time' } ) ); 
    84     for my $line ( @lines ) { 
    85         print { $C->{ 'fh' } } "$line_prefix$line\n"; 
    86     } 
     86    $read_data =~ s/^/$line_prefix/gm; 
     87    $read_data =~ s/([^\n])\z/$1\n/; 
     88    print { $C->{ 'fh' } } $read_data; 
     89    $C->{ 'fh' }->flush(); 
     90    return; 
     91
     92 
     93sub run_check { 
     94    my $self    = shift; 
     95    my $C       = shift; 
     96    my $command = $C->{ 'exec' }; 
     97 
     98    my $mode = '-|'; 
     99    $mode = '<' if $command =~ s/\A\s*<\s*//; 
     100 
     101    open my $fh, $mode, $command or die "Cannot open [$command] in mode [$mode]: $OS_ERROR\n"; 
     102    $self->{ 'select' }->add( $fh ); 
     103    $C->{ 'input' } = $fh; 
     104 
    87105    return; 
    88106} 
     
    90108sub start_periodic_processes { 
    91109    my $self = shift; 
    92     for my $check ( keys %{ $self->{ 'checks' } } ) { 
    93         my $C = $self->{ 'checks' }->{ $check }; 
    94         next if 'persistent' eq $C->{ 'type' }; 
     110    for my $C ( values %{ $self->{ 'checks' } } ) { 
     111        next unless 'periodic' eq $C->{ 'type' }; 
    95112        next if defined $C->{ 'input' }; 
    96113        next if ( defined $C->{ 'next_call' } ) && ( $C->{ 'next_call' } > $self->{ 'current_time' } ); 
    97         open my $fh, '-|', $C->{ 'exec' } or die "Cannot run [" . $C->{ 'exec' } . "]: $OS_ERROR\n"; 
    98         $self->{ 'select' }->add( $fh ); 
    99         $C->{ 'input' }     = $fh; 
     114        $self->run_check( $C ); 
    100115        $C->{ 'next_call' } = $self->{ 'current_time' } + $C->{ 'interval' }; 
     116    } 
     117    return; 
     118} 
     119 
     120sub start_persistent_processes { 
     121    my $self = shift; 
     122    for my $C ( values %{ $self->{ 'checks' } } ) { 
     123        next unless 'persistent' eq $C->{ 'type' }; 
     124        $self->run_check( $C ); 
    101125    } 
    102126    return; 
     
    123147} 
    124148 
    125 sub start_persistent_processes { 
    126     my $self = shift; 
    127     for my $check ( keys %{ $self->{ 'checks' } } ) { 
    128         my $C = $self->{ 'checks' }->{ $check }; 
    129         next unless 'persistent' eq $C->{ 'type' }; 
    130         open my $fh, '-|', $C->{ 'exec' } or die "Cannot run [" . $C->{ 'exec' } . "]: $OS_ERROR\n"; 
    131         $self->{ 'select' }->add( $fh ); 
    132         $C->{ 'input' } = $fh; 
    133     } 
    134     return; 
    135 } 
    136  
    137149sub update_logger_filehandles { 
    138150    my $self = shift; 
     
    149161    for my $check ( keys %{ $self->{ 'checks' } } ) { 
    150162        my $C = $self->{ 'checks' }->{ $check }; 
     163        next if $C->{ 'ignore' }; 
     164 
    151165        if ( $C->{ 'fh' } ) { 
    152166            close $C->{ 'fh' }; 
     
    156170        my $full_name = File::Spec->catfile( $full_directory, $check . $file_suffix ); 
    157171        open my $fh, '>>', $full_name or die "Cannot write to $full_name: $OS_ERROR\n"; 
    158         $fh->autoflush( 1 ); 
    159172        $C->{ 'fh' } = $fh; 
    160173    } 
     
    171184    for my $check ( sort keys %{ $self->{ 'checks' } } ) { 
    172185        my $C = $self->{ 'checks' }->{ $check }; 
     186        $C->{ 'name' } = $check; 
     187 
    173188        die "Bad type " . $C->{ 'type' } . " in check $check!\n" unless $C->{ 'type' } =~ m{\A(?:persistent|periodic)\z}; 
    174189        next unless $C->{ 'type' } eq 'periodic'; 
     
    196211            next; 
    197212        } 
    198         elsif ( $line =~ m{ \A check\.([A-Za-z0-9_]+)\.(type|exec|interval) \s* = \s* (\S.*) \z }xmsi ) { 
     213        elsif ( $line =~ m{ \A check\.([A-Za-z0-9_]+)\.(type|exec|interval|ignore) \s* = \s* (\S.*) \z }xmsi ) { 
    199214            $self->{ 'checks' }->{ $1 }->{ $2 } = $3; 
    200215            next; 
     
    258273=item * check.XXX.interval - how often to run check XXX 
    259274 
     275=item * check.XXX.ignore - should output be ignored? 
     276 
    260277=back 
    261278 
     
    272289"exec" parameter is simply command line, to be run via shell, that will run the program. 
    273290 
     291If exec parameter starts with '<' character (with optional whitespace characters after), it is treated as filename to be read, and logged. 
     292 
     293Due to the way it is internally processed - using "<" approach makes sense only for periodic checks - in case of permenent checks it would simply copy the file at start of system_monitoring.pl, and 
     294ignore any changes to it afterwards. If you'd like to have something like 'tail -f' - use tail -f. 
     295 
    274296interval is time (in seconds) how often given program (of periodic type) should be run. 
     297 
     298ignore is optional parameter which is checked using Perl boolean logic (any value other than empty string or 0 ar treated as true). Since system_monitoring doesn't let setting empty string as value 
     299for option - it's best to not include ignore option for checks you want to log, and just add '...ignore=1' for those that you want to ignore. 
     300 
     301If ignore is set, system_monitoring will not log output from such check. 
     302 
     303This is helpful to build-in compression of older logs, using for example: 
     304 
     305    check.cleanup.type=periodic 
     306    check.cleanup.interval=300 
     307    check.cleanup.exec=find /var/log/monitoring -type f -name '*.log' -mmin +120 -print0 | xargs -0 gzip 
     308    check.cleanup.ignore=1 
    275309 
    276310"XXX" (name of check) can consist only of upper and lower case letters, digits, and character _. That is it has to match regular expression: