root/trunk/tools/check.zero.pages

Revision 264, 14.6 kB (checked in by depesz, 3 years ago)

initial version of script to check if there are pages willed with chr(0) in Pg

  • Property svn:executable set to *
Line 
1 #!/usr/bin/env perl
2 package main;
3 use strict;
4 use warnings;
5 my $program = Omni::Program::Pg::CheckZeroPages->new();
6 $program->run();
7 exit;
8
9 package Omni::Program::Pg::CheckZeroPages;
10 use strict;
11 use warnings;
12 use Carp qw( croak carp );
13 use English qw( -no_match_vars );
14 use Getopt::Long qw( :config no_ignore_case );
15 use Data::Dumper;
16 use File::Spec;
17 use Pod::Usage;
18 use POSIX qw( :sys_wait_h );
19 use File::Temp qw( tempfile tempdir );
20 use POSIX qw( strftime floor );
21 use Time::HiRes qw( time );
22
23 our %killed_pids = ();
24
25 sub REAPER {
26     my $child;
27     while ( ( $child = waitpid( -1, WNOHANG ) ) > 0 ) {
28         $killed_pids{ $child } = $CHILD_ERROR;
29     }
30     $SIG{ 'CHLD' } = \&REAPER;
31     return;
32 }
33
34 sub log {
35     my $self = shift;
36     my ( $format, @args ) = @_;
37     $format =~ s/\s*\z//;
38
39     my $time        = time();
40     my $date_time   = strftime( '%Y-%m-%d %H:%M:%S', localtime $time );
41     my $miliseconds = ( $time * 1000 ) % 1000;
42
43     my $time_stamp = sprintf "%s.%03u", $date_time, $miliseconds;
44     my $msg = sprintf $format, @args;
45
46     printf "%s : %s\n", $time_stamp, $msg;
47     return;
48 }
49
50 sub scan_in_parallel {
51     my $self = shift;
52
53     $SIG{ 'CHLD' } = \&REAPER;
54     $OUTPUT_AUTOFLUSH = 1;
55
56     my $total_files = scalar @{ $self->{ 'files' } };
57
58     my $alert   = 0;
59     my $kids    = {};
60     my $started = 0;
61     while ( 1 ) {
62         my @pids = keys %killed_pids;
63         for my $killed ( @pids ) {
64             my $rc  = delete $killed_pids{ $killed };
65             my $kid = delete $kids->{ $killed };
66             next unless $rc;
67             $alert = 1;
68             print "\nAborting. Got non-zero return from one of workers: " . Dumper( $kid ) . "\n";
69         }
70         while ( $self->{ 'jobs' } > scalar keys %{ $kids } ) {
71             last if $alert;
72             my $next = shift @{ $self->{ 'files' } };
73             $started++;
74             last unless defined $next;
75
76             my $pid = fork();
77             croak "cannot fork" unless defined $pid;
78             if ( $pid == 0 ) {
79
80                 # It's worker process.
81                 delete $SIG{ 'CHLD' };
82                 $PROGRAM_NAME = sprintf 'File %d of %d : %s ( %s : %s )', $started, $total_files, $next->{ 'filename' }, $next->{ 'what' }, $next->{ 'name' };
83                 $self->scan_file( $next );
84                 exit;
85             }
86
87             # It's master.
88             $kids->{ $pid } = $next;
89         }
90
91         printf "Working on %d files. %d more in queue.\r", scalar keys %{ $kids }, scalar @{ $self->{ 'files' } };
92         last if 0 == scalar keys %{ $kids };
93         sleep 60;    # sleep will get interrupted when child exits, and then the loop will repeat.
94     }
95     print "\n";
96     return;
97 }
98
99 sub scan_file {
100     my $self       = shift;
101     my $F          = shift;
102     my $bad_record = "\0" x 8192;
103     open my $fh, '<', $F->{ 'filename' } or croak( "Cannot read file " . $F->{ 'filename' } . ": $OS_ERROR\n" );
104     $INPUT_RECORD_SEPARATOR = \8192;
105     my $i = 0;
106     while ( my $l = <$fh> ) {
107         if ( $bad_record eq $l ) {
108             my $what = 'table';
109             $what = 'index' if $F->{'what'} eq 'i';
110             $what = 'toast' if $F->{'what'} eq 't';
111             printf "\nBad page #%d in %s %s ( file: %s )\n", 1 + $F->{ 'offset' } + $i, $what, $F->{ 'name' }, $F->{ 'filename' };
112         }
113         $i++;
114     }
115     close $fh;
116     return;
117 }
118
119 sub new {
120     my $class = shift;
121     my $self  = {};
122     bless $self, $class;
123     return $self;
124 }
125
126 sub run {
127     my $self = shift;
128     $self->read_options();
129     $self->get_meta_information();
130     $self->show_running_details();
131     $self->limit_relations();
132     $self->confirm_work();
133
134     $self->run_scan();
135     return;
136 }
137
138 sub run_scan {
139     my $self = shift;
140     $self->make_list_of_files();
141     $self->scan_in_parallel();
142     return;
143 }
144
145 sub make_list_of_files {
146     my $self  = shift;
147     my @files = ();
148     for my $i ( @{ $self->{ 'relations' } } ) {
149         my $size = $i->{ 'relation_size' };
150         next if 0 == $size;
151
152         my $base_filename;
153         if ( $self->{ 'database_version' } < 90000 ) {
154             my $base_path = defined $i->{ 'tablespace_path' } ? $i->{ 'tablespace_path' } : File::Spec->catdir( $self->{ 'datadir' }, 'base' );
155             $base_filename = File::Spec->catfile( $base_path, $self->{ 'database_oid' }, $i->{ 'relation_file' } );
156         }
157         else {
158             $base_filename = File::Spec->catfile( $self->{ 'datadir' }, $i->{ 'relation_file' } );
159         }
160
161         my $max               = 1024**3;
162         my $pages_per_segment = $max / ( 8 * 1024 );
163         my @sizes             = ( $max ) x ( floor( $size / $max ) );
164         push @sizes, $size % $max;
165
166         my $num = 0;
167         for my $s ( @sizes ) {
168             my $f = {};
169             my $suffix = $num ? ".$num" : "";
170
171             $f->{ 'size' }     = $s;
172             $f->{ 'offset' }   = $num * $pages_per_segment;
173             $f->{ 'filename' } = $base_filename . $suffix;
174             $f->{ 'what' }     = $i->{ 'relation_kind' };
175             $f->{ 'name' }     = $i->{ 'fullname' };
176             $f->{ 'toast' }    = $i->{ 'toast_for_schema' } . '.' . $i->{ 'toast_for_name' }
177                 if defined $i->{ 'toast_for_schema' };
178
179             push @files, $f;
180             $num++;
181
182         }
183     }
184
185     @files = sort { $b->{ 'size' } <=> $a->{ 'size' } } @files;
186     $self->{ 'files' } = \@files;
187     return;
188 }
189
190 sub limit_relations {
191     my $self    = shift;
192     my @to_work = ();
193     if ( $self->{ 'match' } ) {
194         @to_work = grep { $_->{ 'fullname' } =~ $self->{ 'match' } } @{ $self->{ 'relations' } };
195     }
196     else {
197         @to_work = grep { $_->{ 'fullname' } !~ $self->{ 'negative-match' } } @{ $self->{ 'relations' } };
198     }
199     $self->{ 'relations' } = \@to_work;
200     return;
201 }
202
203 sub get_meta_information {
204     my $self = shift;
205
206     my $server_info = $self->psql(
207         q{
208             SET search_path = pg_catalog;
209             SELECT
210                 current_user,
211                 d.datname,
212                 d.oid,
213                 current_setting('data_directory'),
214                 current_setting('server_version_num')
215             FROM
216                 pg_database d
217             WHERE
218                 d.datname = current_database()
219             ;
220         }
221     );
222     @{ $self }{ qw( database_user database_name database_oid datadir database_version ) } = @{ $server_info->[ 0 ] };
223     my $query = q{
224             SET search_path = pg_catalog;
225             SELECT
226                 n.nspname,
227                 c.relname,
228                 c.relkind,
229                 c.relfilenode,
230                 pg_relation_size(c.oid),
231                 tn.nspname,
232                 tc.relname,
233                 t.spclocation
234             FROM
235                 pg_class c
236                 join pg_namespace n on c.relnamespace = n.oid
237                 left outer join pg_tablespace t on c.reltablespace = t.oid
238                 left outer join pg_class tc on c.relname ~ '^pg_toast_[0-9]+(_index)?$' AND tc.oid::text = regexp_replace( c.relname, '[^0-9]+', '', 'g')
239                 left outer join pg_namespace tn on tc.relnamespace = tn.oid
240             WHERE
241                 c.relkind in ('r', 'i', 't')
242                 AND c.relfilenode > 0
243                 AND pg_relation_size(c.oid) > 0
244             ;
245         };
246     if ( $self->{ 'database_version' } >= 90000 ) {
247         $query = q{
248             SET search_path = pg_catalog;
249             SELECT
250                 n.nspname,
251                 c.relname,
252                 c.relkind,
253                 pg_relation_filepath( c.oid ),
254                 pg_relation_size(c.oid),
255                 tn.nspname,
256                 tc.relname,
257                 t.spclocation
258             FROM
259                 pg_class c
260                 join pg_namespace n on c.relnamespace = n.oid
261                 left outer join pg_tablespace t on c.reltablespace = t.oid
262                 left outer join pg_class tc on c.relname ~ '^pg_toast_[0-9]+(_index)?$' AND tc.oid::text = regexp_replace( c.relname, '[^0-9]+', '', 'g')
263                 left outer join pg_namespace tn on tc.relnamespace = tn.oid
264             WHERE
265                 c.relkind in ('r', 'i', 't')
266                 AND c.relfilenode > 0
267                 AND pg_relation_size(c.oid) > 0
268             ;
269         };
270
271     }
272     my $relations_info = $self->psql( $query );
273
274     my @relations = ();
275     for my $row ( @{ $relations_info } ) {
276         my $i = {};
277         @{ $i }{ qw(schema_name relation_name relation_kind relation_file relation_size toast_for_schema toast_for_name tablespace_path) } = @{ $row };
278         $i->{ 'fullname' } = join '.', $i->{ 'schema_name' }, $i->{ 'relation_name' };
279         push @relations, $i;
280     }
281     $self->{ 'relations' } = [ sort { $b->{ 'relation_size' } <=> $a->{ 'relation_size' } || $a->{ 'fullname' } cmp $b->{ 'fullname' } } @relations ];
282     return;
283 }
284
285 sub confirm_work {
286     my $self = shift;
287     while ( 1 ) {
288         printf "\n\nAre you sure you want to continue?\n";
289         printf "Enter YES to continue, or LIST to list objects that will be checked: ";
290         my $input = <STDIN>;
291         return if $input =~ m{\AYES\r?\n?\z};
292         if ( $input =~ m{\ALIST\r?\n?\z} ) {
293             print "- $_\n" for sort map { $_->{ 'fullname' } } @{ $self->{ 'relations' } };
294             next;
295         }
296         exit;
297     }
298 }
299
300 sub show_running_details {
301     my $self = shift;
302
303     printf "Config:\n";
304     for my $key ( sort qw( psql jobs negative-match match ) ) {
305         printf "%-15s : %s\n", $key, ( $self->{ $key } || '' ),;
306     }
307
308     my @tables = map { $_->{ 'fullname' } }
309         grep { 'r' eq $_->{ 'relation_kind' } } @{ $self->{ 'relations' } };
310     splice( @tables, 5 );
311
312     printf "\nDatabase details:\n";
313     printf "User           : %s\n", $self->{ 'database_user' };
314     printf "Database       : %s\n", $self->{ 'database_name' };
315     printf "Largest tables : %s\n", shift @tables;
316     printf "               : %s\n", $_ for @tables;
317     return;
318 }
319
320 sub read_options {
321     my $self = shift;
322
323     my $opts = {
324         'psql'           => 'psql',
325         'jobs'           => 1,
326         'negative-match' => undef,
327         'match'          => undef,
328     };
329
330     my $is_ok = GetOptions( $opts, qw( help|? jobs|j=i psql|p=s match|m=s negative-match|n=s) );
331
332     pod2usage( '-verbose' => 1, ) unless $is_ok;
333     pod2usage( '-verbose' => 99, '-sections' => [ qw( DESCRIPTION SYNOPSIS OPTIONS ) ] ) if $opts->{ 'help' };
334
335     pod2usage( '-message' => 'Number of jobs has to be not-empty.' ) if '' eq $opts->{ 'jobs' };
336     $opts->{ 'jobs' } = int( $opts->{ 'jobs' } );
337     pod2usage( '-message' => 'Number of jobs cannot be less than 1.' )   if 1 > $opts->{ 'jobs' };
338     pod2usage( '-message' => 'Number of jobs cannot be more than 100.' ) if 100 < $opts->{ 'jobs' };
339
340     pod2usage( '-message' => 'You cannot provide both --match and --negative-match.' )   if ( $opts->{ 'match' } )  && ( $opts->{ 'negative-match' } );
341     pod2usage( '-message' => 'You have to provide either --match or --negative-match.' ) if ( !$opts->{ 'match' } ) && ( !$opts->{ 'negative-match' } );
342
343     if ( $opts->{ 'match' } ) {
344         delete $opts->{ 'negative-match' };
345         my $t = $opts->{ 'match' };
346         $opts->{ 'match' } = qr{$t};
347     }
348     else {
349         delete $opts->{ 'match' };
350         if ( $opts->{ 'negative-match' } ) {
351             my $t = $opts->{ 'negative-match' };
352             $opts->{ 'negative-match' } = qr{$t};
353         }
354         else {
355             delete $opts->{ 'negative-match' };
356         }
357     }
358
359     @{ $self }{ keys %{ $opts } } = values %{ $opts };
360     return;
361 }
362
363 sub psql {
364     my $self       = shift;
365     my $query      = shift;
366     my $query_file = shift;
367
368     my $remove_query_file = 1;
369
370     my $query_fh;
371     if ( defined $query_file ) {
372         $remove_query_file = 0;
373         open $query_fh, '>', $query_file or croak( "Cannot write to $query_file: $OS_ERROR\n" );
374     }
375     else {
376         ( $query_fh, $query_file ) = tempfile( 'fast.dump.XXXXXXXX', 'TMPDIR' => 1, );
377     }
378
379     print $query_fh $query;
380     close $query_fh;
381     my $output = $self->run_command( qw( psql -qAtX -F ), "\t", '-f', $query_file );
382     unlink $query_file if $remove_query_file;
383
384     my @rows = grep { '' ne $_ } split /\r?\n/, $output;
385     my @data = map { [ split /\t/, $_ ] } @rows;
386
387     return \@data;
388 }
389
390 sub run_command {
391     my $self = shift;
392     my ( @cmd ) = @_;
393
394     # Use paths provided by user as command line options
395     $cmd[ 0 ] = $self->{ $cmd[ 0 ] } if $self->{ $cmd[ 0 ] };
396
397     my $real_command = join( ' ', map { quotemeta } @cmd );
398
399     my ( $stdout_fh, $stdout_filename ) = tempfile( 'fast.dump.XXXXXXXX', 'DIR' => $self->{ 'tmpdir' }, );
400     my ( $stderr_fh, $stderr_filename ) = tempfile( 'fast.dump.XXXXXXXX', 'DIR' => $self->{ 'tmpdir' }, );
401
402     $real_command .= sprintf ' 2>%s >%s', quotemeta $stderr_filename, quotemeta $stdout_filename;
403
404     system $real_command;
405     local $/ = undef;
406     my $stdout = <$stdout_fh>;
407     my $stderr = <$stderr_fh>;
408
409     close $stdout_fh;
410     close $stderr_fh;
411
412     unlink( $stdout_filename, $stderr_filename );
413
414     my $error_code;
415     if ( $CHILD_ERROR == -1 ) {
416         $error_code = $OS_ERROR;
417     }
418     elsif ( $CHILD_ERROR & 127 ) {
419         $error_code = sprintf "child died with signal %d, %s coredump\n", ( $CHILD_ERROR & 127 ), ( $CHILD_ERROR & 128 ) ? 'with' : 'without';
420     }
421     else {
422         $error_code = $CHILD_ERROR >> 8;
423     }
424
425     croak( "Couldn't run $real_command : " . $stderr ) if $error_code;
426
427     return $stdout;
428 }
429
430 =head1 NAME
431
432 check.zero.pages - Program to scan data files of PostgreSQL and report all pages that have all chr(0) content.
433
434 =head1 SYNOPSIS
435
436 check.zero.pages [--jobs=n] [--psql=/usr/bin/psql] [--help] [--match=^public] [--negative-match=^pg_]
437
438 =head1 OPTIONS
439
440 =over
441
442 =item --jobs - how many concurrent processes to run when scanning files.
443 Defaults to 1.
444
445 =item --psql - path to psql program. Defaults to "psql", which will use
446 $PATH environment variable to find it.
447
448 =item --match - perl regular expression that (if given) will be used to
449 limit objects (tables/indexes) to those matching regular expression.
450
451 =item --negative-match - perl regular expression that (if given) will be
452 used to limit objects (tables/indexes) to those B<not> matching the
453 expression.
454
455 =item --help - shows information about usage of the program.
456
457 =back
458
459 Important - you cannot provide both --match and --negative-match in the same
460 call.
461
462 All options can be given in abbreviated version, using single dash character
463 and first letter of option, like:
464
465     check.zero.pages -p /opt/pgsql/bin/psql -j 32 -m ^pg
466
467 Database connection details should be given using PG* environment variables.
468
469 =head1 DESCRIPTION
470
471 This program can be used to quickly (relatively) scan whole database to see
472 if there are any pages within tables and/or indexes that contain only chr(0)
473 bytes. Such pages will cause problems when using the table/index, and it
474 should be checked/fixed as soon as possible.
Note: See TracBrowser for help on using the browser.