root/trunk/tools/pg_bloat_report.pl

Revision 152, 17.2 kB (checked in by ssteinpreis, 4 years ago)

tid12312 removed reference to avoiding pg_temp_nnn namespaces, handled on command line

  • Property svn:executable set to *
Line 
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Carp;
5 use English qw( -no_match_vars );
6 use Getopt::Long qw(:config no_ignore_case);
7 use Data::Dumper;
8 use File::Basename;
9 use File::Path;
10 use File::Temp qw( tempdir );
11 use POSIX qw( strftime );
12
13 my $O = get_options();
14
15 validate_options();
16
17 my $sql = get_report_sql();
18
19 run_report( $sql );
20
21 send_report_by_mail();
22
23 cleanup();
24
25 exit;
26
27 sub make_path {
28
29     $File::Path::VERSION =~ m{\A(\d+)} or die "File::Path::VERSION doesn't start with digits? : $File::Path::VERSION\n";
30     my $ver = $1;
31     return $ver < 2 ? File::Path::mkpath( @_ ) : File::Path::make_path( @_ );
32 }
33
34 sub reformat_report {
35     my $txt = shift;
36     my @rows = split /\r?\n/, $txt;
37     return(0, '(0 rows)') if 0 == scalar @rows;
38
39     my $template;
40     if ($O->{'format'} eq 'simple') {
41         $template = "__[3u]i__. __[-60s]relation__ __[7u]wastedpages__ of __[7u]relpages__ pages wasted (__[.1f]percent__%), __pwastedbytes__ of __prelbytes__.\n";
42     } else {
43         $template = "-- __[3u]i__. __[-60s]relation__ __[7u]wastedpages__ of __[7u]relpages__ pages wasted (__[.1f]percent__%), __pwastedbytes__ of __prelbytes__.\n";
44         if ($O->{'mode'} eq 'tables') {
45             $template .= "CLUSTER __relation__; -- You might need to add: USING <some_index_name>\n\n";
46         } else {
47             $template .= "REINDEX INDEX __relation__;\n\n";
48         }
49     }
50
51     my $report = '';
52     my $i = 0;
53     for my $row ( @rows ) {
54         $i++;
55         my @columns = split /\|/, $row;
56         my %data;
57         @data{qw( relation reltuples relpages otta bloat wastedpages wastedbytes pwastedbytes relbytes prelbytes )} = @columns;
58         $data{ 'percent' } = 100 * $data{'wastedpages'} / $data{'relpages'};
59         $data{"i"} = $i;
60         my $string = $template;
61         $string =~ s#__(?:\[(.*?)\])?([a-z]+)__#sprintf '%'.($1||'s'), $data{$2}||''#ge;
62         $report .= $string;
63     }
64     return( scalar @rows, $report );
65 }
66
67 sub send_report_by_mail {
68     return unless $O->{ 'recipients' };
69     return if ( !-s 'report.stdout' ) && ( !-s 'report.stderr' );
70
71     my $report = slurp_file( 'report.stdout' );
72     my ( $row_count, $reformatted ) = reformat_report( $report );
73     if (   ( 0 == $row_count )
74         && ( !$O->{ 'send-zero' } ) )
75     {
76         log_info( 'Report contains 0 rows. Skipping mailing.' );
77         return;
78     }
79
80     my @recipients = map { my $q = $_; $q =~ s/\A\s*|\s*\z//; $q } split /\s*,\s*/, $O->{ 'recipients' };
81
82     my $subject = strftime( $O->{ 'subject' }, localtime time );
83     $subject =~ s/__(.*?)__/defined $O->{ $1 } ? $O->{$1} : ''/ge;
84
85     my @command = ();
86     push @command, $O->{ 'mailx' };
87     push @command, '-s', $subject;
88     push @command, @recipients;
89
90     my $command_string = join ' ', map { quotemeta $_ } @command;
91     $command_string .= ' 2>mailx.stderr >mailx.stdout';
92     my $mailx;
93     unless ( open $mailx, '|-', $command_string ) {
94         log_info( 'Cannot open mailx command (%s): %s', $command_string, $OS_ERROR, );
95         return;
96     }
97     if ( -s 'report.stderr' ) {
98         print $mailx "STDERR:\n" . slurp_file( 'report.stderr' ) . "\n";
99     }
100     print $mailx "Report:\n" . $reformatted . "\n";
101     close $mailx;
102
103     if ( -s 'mailx.stderr' ) {
104         log_info( "mailx STDERR:\n%s", slurp_file( 'mailx.stderr' ) );
105     }
106     if ( -s 'mailx.stdout' ) {
107         log_info( "mailx STDOUT:\n%s", slurp_file( 'mailx.stdout' ) );
108     }
109     else {
110         log_info( 'mailx (%s) finished', $command_string, );
111     }
112     return;
113 }
114
115 sub cleanup {
116     unlink qw( report.sql report.stdout report.stderr mailx.stderr mailx.stdout );
117     chdir '/';
118     return;
119 }
120
121 sub run_report {
122     my $sql = shift;
123
124     open my $fh, '>', 'report.sql' or die 'Cannot write to report.sql file in ' . $O->{ 'workdir' } . ' : ' . $OS_ERROR;
125     print $fh "begin; SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;\n";
126     print $fh $sql;
127     close $fh;
128
129     my @command = ();
130     push @command, $O->{ 'psql' };
131     push @command, '-d', $O->{ 'dbname' } if $O->{ 'dbname' };
132     push @command, '-h', $O->{ 'host' } if $O->{ 'host' };
133     push @command, '-p', $O->{ 'port' } if $O->{ 'port' };
134     push @command, '-U', $O->{ 'user' } if $O->{ 'user' };
135     push @command, '-f', 'report.sql';
136     push @command, '-qAt' unless $O->{ 'format' } eq 'table';
137
138     my $command_string = join ' ', map { quotemeta $_ } @command;
139     $command_string .= ' 2>report.stderr >report.stdout';
140
141     log_info( 'Calling psql: [%s]', $command_string );
142     my $status = system $command_string;
143     log_info( 'psql finished with status: %d', $status );
144
145     if ( -s 'report.stderr' ) {
146         log_info( "STDERR:\n%s", slurp_file( 'report.stderr' ) );
147     }
148     if ( -s 'report.stdout' ) {
149         log_info( "Report:\n%s", slurp_file( 'report.stdout' ) );
150     }
151     else {
152         log_info( 'There is no report!' );
153     }
154     return;
155 }
156
157 sub slurp_file {
158     my $file_name = shift;
159     open my $fh, '<', $file_name or die "Cannot open $file_name for reading: $OS_ERROR\n";
160     local $/;
161     my $content = <$fh>;
162     close $fh;
163     return $content;
164 }
165
166 sub log_info {
167     my ( $format, @args ) = @_;
168     $format =~ s/\s*\z/\n/;
169     my $timestamp = strftime( '%Y-%m-%d %H:%M:%S %z', localtime time );
170     printf { $O->{ 'logfh' } } "%s $format", $timestamp, @args;
171     return;
172 }
173
174 sub get_report_sql {
175
176     my $sql = get_base_sql();
177
178     my @where_parts = ();
179
180     push @where_parts, 'schemaname ~ ' . sqlify_regexp( $O->{ 'schema' } )                if $O->{ 'schema' };
181     push @where_parts, 'schemaname !~ ' . sqlify_regexp( $O->{ 'exclude-schema' } )       if $O->{ 'exclude-schema' };
182     push @where_parts, 'tablename ~ ' . sqlify_regexp( $O->{ 'relation-name' } )          if $O->{ 'relation-name' };
183     push @where_parts, 'tablename !~ ' . sqlify_regexp( $O->{ 'exclude-relation-name' } ) if $O->{ 'exclude-relation-name' };
184
185     if ( 0 == scalar @where_parts ) {
186         $sql =~ s/__EXTRA__WHERE__//g;
187         return $sql;
188     }
189     my $where_string = join ' AND ', @where_parts;
190     $sql =~ s/__EXTRA__WHERE__/ WHERE $where_string /g;
191
192     my @pages_where = ();
193
194     push @pages_where, 'relpages >= ' . $O->{ 'min-pages' }           if $O->{ 'min-pages' };
195     push @pages_where, 'wastedpages >= ' . $O->{ 'min-wasted-pages' } if $O->{ 'min-wasted-pages' };
196
197     return $sql if 0 == scalar @pages_where;
198
199     $sql = "SELECT * FROM ( $sql ) as subquery WHERE " . join( ' AND ', @pages_where );
200
201     return $sql;
202 }
203
204 sub sqlify_regexp {
205     my $regex_string = shift;
206     $regex_string =~ s/(['\\])/$1$1/g;
207     return "E'$regex_string'";
208 }
209
210 sub get_base_sql {
211     if ( 'tables' eq $O->{ 'mode' } ) {
212         return q{
213 SELECT
214   schemaname||'.'||tablename as relation, reltuples::bigint, relpages::bigint, otta,
215   ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS bloat,
216   CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
217   CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
218   pg_size_pretty((CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END)::bigint) AS pwastedbytes,
219   bs*relpages::bigint as relbytes,
220   pg_size_pretty((bs*relpages::bigint)::bigint) as prelbytes
221 FROM (
222   SELECT
223     schemaname, tablename, cc.reltuples, cc.relpages, bs,
224     CEIL((cc.reltuples*((datahdr+ma-
225       (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
226   FROM (
227     SELECT
228       ma,bs,schemaname,tablename,
229       (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
230       (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
231     FROM (
232       SELECT
233         schemaname, tablename, hdr, ma, bs,
234         SUM((1-null_frac)*avg_width) AS datawidth,
235         MAX(null_frac) AS maxfracsum,
236         hdr+(
237           SELECT 1+count(*)/8
238           FROM pg_stats s2
239           WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
240         ) AS nullhdr
241       FROM pg_stats s, (
242         SELECT
243           (SELECT current_setting('block_size')::numeric) AS bs,
244           CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
245           CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
246         FROM (SELECT version() AS v) AS foo
247       ) AS constants
248 __EXTRA__WHERE__
249       GROUP BY 1,2,3,4,5
250     ) AS foo
251   ) AS rs
252   JOIN pg_class cc ON cc.relname = rs.tablename and cc.relkind = 'r'
253   JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
254 ) AS sml
255 WHERE sml.relpages - otta > 128
256       AND ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) > 1.2
257       AND CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END > 1024 * 100
258 ORDER BY wastedbytes DESC
259         };
260     }
261     else {
262         return q{
263 SELECT
264   schemaname||'.'||iname as relation,ituples::bigint as reltuples, ipages::bigint as relpages, iotta as otta,
265   ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS bloat,
266   CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedpages,
267   CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedbytes,
268   pg_size_pretty((CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END)::bigint) AS pwastedbytes,
269   bs*ipages::bigint as relbytes,
270   pg_size_pretty((bs*ipages::bigint)::bigint) as prelbytes
271 FROM (
272   SELECT
273     schemaname, tablename, cc.reltuples, cc.relpages, bs,
274     CEIL((cc.reltuples*((datahdr+ma-
275       (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,
276     COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
277     COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
278   FROM (
279     SELECT
280       ma,bs,schemaname,tablename,
281       (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
282       (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
283     FROM (
284       SELECT
285         schemaname, tablename, hdr, ma, bs,
286         SUM((1-null_frac)*avg_width) AS datawidth,
287         MAX(null_frac) AS maxfracsum,
288         hdr+(
289           SELECT 1+count(*)/8
290           FROM pg_stats s2
291           WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
292         ) AS nullhdr
293       FROM pg_stats s, (
294         SELECT
295           (SELECT current_setting('block_size')::numeric) AS bs,
296           CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
297           CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
298         FROM (SELECT version() AS v) AS foo
299       ) AS constants
300 __EXTRA__WHERE__
301       GROUP BY 1,2,3,4,5
302     ) AS foo
303   ) AS rs
304   JOIN pg_class cc ON cc.relname = rs.tablename
305   JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
306   LEFT JOIN pg_index i ON indrelid = cc.oid
307   LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
308 ) AS sml
309 WHERE (sml.relpages - otta > 128 OR ipages - iotta > 128)
310   AND ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) > 1.2
311   AND CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END > 1024 * 100
312 ORDER BY wastedbytes DESC
313         };
314     }
315 }
316
317 sub get_options {
318     my %o = (
319         'exclude-schema' => '^(pg_.*|information_schema)$',
320         'format'         => 'table',
321         'logfile'        => '-',
322         'mailx'          => 'mailx',
323         'mode'           => 'tables',
324         'psql'           => 'psql',
325         'send-zero'      => undef,
326         'subject'        => '[%Y-%m-%d %H:%M:%S %z] Bloat report for __mode__ in __dbname__ at __host__:__port__',
327     );
328     show_help_and_die() unless GetOptions(
329         \%o,
330
331         # database connection
332         'host|h=s',
333         'port|p=i',
334         'user|U=s',
335         'dbname|d=s',
336
337         # object choosing
338         'mode|m=s',
339         'schema|n=s',
340         'exclude-schema|N=s',
341         'relation-name|t=s',
342         'exclude-relation-name|T=s',
343         'min-pages|a=i',
344         'min-wasted-pages|A=i',
345
346         # system options
347         'logfile|l=s',
348         'workdir|w=s',
349         'psql|q=s',
350         'mailx|x=s',
351
352         # mailing
353         'recipients|r=s',
354         'subject|s=s',
355         'send-zero|z',
356
357         # other
358         'format|f=s',
359         'help|?',
360     );
361     show_help_and_die() if $o{ 'help' };
362     return \%o;
363 }
364
365 sub validate_options {
366     $O->{ 'mode' } = 'tables'  if substr( 'tables',  0, length( $O->{ 'mode' } ) ) eq $O->{ 'mode' };    # make it tables for any prefix of 'tables'
367     $O->{ 'mode' } = 'indexes' if substr( 'indexes', 0, length( $O->{ 'mode' } ) ) eq $O->{ 'mode' };    # make it indexes for any prefix of 'indexes'
368     show_help_and_die( 'Given mode (%s) is invalid.', $O->{ 'mode' } ) unless $O->{ 'mode' } =~ m{\A(?:tables|indexes)\z};
369
370     for my $regexp_key ( qw( schema exclude-schema relation-name exclude-relation-name ) ) {
371         next unless defined $O->{ $regexp_key };
372         my $val = $O->{ $regexp_key };
373         eval { my $re = qr{$val}; };
374         next unless $EVAL_ERROR;
375         show_help_and_die( 'Invalid regexp: %s: "%s": %s', $regexp_key, $val, $EVAL_ERROR );
376     }
377
378     delete $O->{ 'logfile' } if defined $O->{ 'logfile' } && '-' eq $O->{ 'logfile' };
379     if ( defined $O->{ 'logfile' } ) {
380         $O->{ 'logfile' } = strftime( $O->{ 'logfile' }, localtime time );
381         my $base_dir = dirname( $O->{ 'logfile' } );
382         unless ( -d $base_dir ) {
383             eval { make_path( $base_dir ); };
384             if ( my $error = $EVAL_ERROR ) {
385                 show_help_and_die( "%s doesn't exist, and cannot be created (via --logfile): %s", $base_dir, $error );
386             }
387         }
388         open my $fh, '>>', $O->{ 'logfile' } or show_help_and_die( 'Cannot write to %s: %s', $O->{ 'logfile' }, $OS_ERROR );
389         $O->{ 'logfh' } = $fh;
390     }
391     else {
392         $O->{ 'logfh' } = \*STDOUT;
393     }
394
395     if ( defined $O->{ 'workdir' } ) {
396         $O->{ 'workdir' } = strftime( $O->{ 'workdir' }, localtime time );
397         unless ( -d $O->{ 'workdir' } ) {
398             eval { make_path( $O->{ 'workdir' } ); };
399             if ( my $error = $EVAL_ERROR ) {
400                 show_help_and_die( "%s doesn't exist, and cannot be created (via --workdir): %s", $O->{ 'workdir' }, $error );
401             }
402         }
403     }
404     else {
405         $O->{ 'workdir' } = tempdir( 'CLEANUP' => 1 );
406     }
407
408     show_help_and_die( 'psql program name missing!' ) unless $O->{ 'psql' };
409     show_help_and_die( 'mailx program name missing!' ) if ( !$O->{ 'mailx' } ) && ( $O->{ 'recipients' } );
410
411     show_help_and_dir( 'Bad format (%s) requsted!', $O->{ 'format' } ) unless $O->{ 'format' } =~ m{\A(?:table|simple|sql)\z};
412
413     chdir $O->{ 'workdir' };
414     return;
415 }
416
417 sub show_help_and_die {
418     my ( $format, @args ) = @_;
419     if ( defined $format ) {
420         $format =~ s/\s*\z/\n/;
421         printf STDERR $format, @args;
422     }
423     print STDERR <<_EOH_;
424 Syntax:
425    $PROGRAM_NAME [options]
426
427 Options:
428   [ database connection ]
429    --host                  (-h) : database server host or socket directory
430    --port                  (-p) : database server port
431    --user                  (-U) : database user name
432    --dbname                (-d) : database name to connect to
433
434   [ object choosing ]
435    --mode                  (-m) : tables/indexes - show information about which objects.
436    --schema                (-n) : regexp to choose which schemas should the report be about
437    --exclude-schema        (-N) : regexp to choose which schemas to skip from report
438    --relation-name         (-t) : regexp to choose which relations to report on
439    --exclude-relation-name (-T) : regexp to choose which relations should be excluded from report
440    --min-pages             (-a) : minimal number of pages object have to have to be on report
441    --min-wasted-pages      (-A) : minimal number of wasted pages object have to have to be on report
442
443   [ system options ]
444    --logfile               (-l) : where to log information about report execution
445    --workdir               (-w) : where to create temporary files
446    --psql                  (-q) : which psql binary to use
447    --mailx                 (-x) : which mailx binary to use
448
449   [ mailing ]
450    --recipients            (-r) : comma separated list of emails that will get the report
451    --subject               (-s) : subject of the mail to be sent with report
452    --send-zero             (-z) : if it is set $PROGRAM_NAME will send email even if there are no bloated relations.
453
454   [ other ]
455    --format                (-f) : format of report (table, simple or sql)
456    --help                  (-?) : show this help page
457
458 Defaults:
459    --exclude-schema   '^(pg_.*|information_schema)\$'
460    --format           table
461    --logfile          -
462    --mailx            mailx
463    --min-pages        0
464    --min-wasted-pages 0
465    --mode             tables
466    --psql             psql
467    --subject          [%Y-%m-%d %H:%M:%S %z] Bloat report for __mode__ in __dbname__ at __host__:__port__
468
469 Notes:
470     logfile, workdir and subject can contain strftime-styled %marks.
471     Additionally subject can will be parsed for __XXX__ strings, and there will be replaced by value of XXX option (full names of options have to be provided).
472     $PROGRAM_NAME will try to create necessary directories (logfile and workdir) if they don't exist.
473     - as logfile means logging to STDOUT.
474 _EOH_
475     exit 1;
476 }
Note: See TracBrowser for help on using the browser.