root/trunk/tools/pg_bloat_report.pl

Revision 41, 14.0 kB (checked in by depesz, 5 years ago)

new version, with --help, and more run options. migrated from bash to perl

  • Property svn:executable set to *
Line 
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Carp;
5 use English qw( -no_match_vars );
6 use Getopt::Long;
7 use Data::Dumper;
8 use File::Basename;
9 use File::Path qw( make_path );
10 use File::Temp qw( tempdir );
11 use POSIX qw( strftime );
12
13 my $O = get_options();
14
15 validate_options();
16
17 my $sql = get_report_sql();
18
19 run_report( $sql );
20
21 send_report_by_mail();
22
23 cleanup();
24
25 exit;
26
27 sub send_report_by_mail {
28     return unless $O->{ 'recipients' };
29     return if ( !-s 'report.stdout' ) && ( !-s 'report.stderr' );
30
31     my @recipients = map { my $q = $_; $q =~ s/\A\s*|\s*\z//; $q } split /\s*,\s*/, $O->{ 'recipients' };
32
33     my $subject = strftime( $O->{ 'subject' }, localtime time );
34     $subject =~ s/__(.*?)__/defined $O->{ $1 } ? $O->{$1} : ''/ge;
35
36     my @command = ();
37     push @command, $O->{ 'mailx' };
38     push @command, '-s', $subject;
39     push @command, @recipients;
40
41     my $command_string = join ' ', map { quotemeta $_ } @command;
42     $command_string .= ' 2>mailx.stderr >mailx.stdout';
43     my $mailx;
44     unless ( open $mailx, '|-', $command_string ) {
45         log_info( 'Cannot open mailx command (%s): %s', $command_string, $OS_ERROR, );
46         return;
47     }
48     if ( -s 'report.stderr' ) {
49         print $mailx "STDERR:\n" . slurp_file( 'report.stderr' ) . "\n";
50     }
51     if ( -s 'report.stdout' ) {
52         print $mailx "Report:\n" . slurp_file( 'report.stdout' ) . "\n";
53     }
54     else {
55         print $mailx "There is no report!\n";
56     }
57     close $mailx;
58
59     if ( -s 'mailx.stderr' ) {
60         log_info( "mailx STDERR:\n%s", slurp_file( 'mailx.stderr' ) );
61     }
62     if ( -s 'mailx.stdout' ) {
63         log_info( "mailx STDOUT:\n%s", slurp_file( 'mailx.stdout' ) );
64     }
65     else {
66         log_info( 'mailx (%s) finished', $command_string, );
67     }
68     return;
69 }
70
71 sub cleanup {
72     unlink qw( report.sql report.stdout report.stderr mailx.stderr mailx.stdout );
73     chdir '/';
74     return;
75 }
76
77 sub run_report {
78     my $sql = shift;
79
80     open my $fh, '>', 'report.sql' or die 'Cannot write to report.sql file in ' . $O->{ 'workdir' } . ' : ' . $OS_ERROR;
81     print $fh $sql;
82     close $fh;
83
84     my @command = ();
85     push @command, $O->{ 'psql' };
86     push @command, '-d', $O->{ 'dbname' } if $O->{ 'dbname' };
87     push @command, '-h', $O->{ 'host' } if $O->{ 'host' };
88     push @command, '-p', $O->{ 'port' } if $O->{ 'port' };
89     push @command, '-U', $O->{ 'user' } if $O->{ 'user' };
90     push @command, '-f', 'report.sql';
91
92     my $command_string = join ' ', map { quotemeta $_ } @command;
93     $command_string .= ' 2>report.stderr >report.stdout';
94
95     log_info( 'Calling psql: [%s]', $command_string );
96     my $status = system $command_string;
97     log_info( 'psql finished with status: %d', $status );
98
99     if ( -s 'report.stderr' ) {
100         log_info( "STDERR:\n%s", slurp_file( 'report.stderr' ) );
101     }
102     if ( -s 'report.stdout' ) {
103         log_info( "Report:\n%s", slurp_file( 'report.stdout' ) );
104     }
105     else {
106         log_info( 'There is no report!' );
107     }
108     return;
109 }
110
111 sub slurp_file {
112     my $file_name = shift;
113     open my $fh, '<', $file_name or die "Cannot open $file_name for reading: $OS_ERROR\n";
114     local $/;
115     my $content = <$fh>;
116     close $fh;
117     return $content;
118 }
119
120 sub log_info {
121     my ( $format, @args ) = @_;
122     $format =~ s/\s*\z/\n/;
123     my $timestamp = strftime( '%Y-%m-%d %H:%M:%S %z', localtime time );
124     printf { $O->{ 'logfh' } } "%s $format", $timestamp, @args;
125     return;
126 }
127
128 sub get_report_sql {
129
130     my $sql = get_base_sql();
131
132     my @where_parts = ();
133
134     push @where_parts, 'schemaname ~ ' . sqlify_regexp( $O->{ 'schema' } )                if $O->{ 'schema' };
135     push @where_parts, 'schemaname !~ ' . sqlify_regexp( $O->{ 'exclude-schema' } )       if $O->{ 'exclude-schema' };
136     push @where_parts, 'tablename ~ ' . sqlify_regexp( $O->{ 'relation-name' } )          if $O->{ 'relation-name' };
137     push @where_parts, 'tablename !~ ' . sqlify_regexp( $O->{ 'exclude-relation-name' } ) if $O->{ 'exclude-relation-name' };
138
139     if ( 0 == scalar @where_parts ) {
140         $sql =~ s/__EXTRA__WHERE__//g;
141         return $sql;
142     }
143     my $where_string = join ' AND ', @where_parts;
144     $sql =~ s/__EXTRA__WHERE__/ WHERE $where_string /g;
145
146     return $sql;
147 }
148
149 sub sqlify_regexp {
150     my $regex_string = shift;
151     $regex_string =~ s/(['\\])/$1$1/g;
152     return "E'$regex_string'";
153 }
154
155 sub get_base_sql {
156     if ( 'tables' eq $O->{ 'mode' } ) {
157         return q{
158 SELECT
159   schemaname||'.'||tablename as relation, reltuples::bigint, relpages::bigint, otta,
160   ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) AS tbloat,
161   CASE WHEN relpages < otta THEN 0 ELSE relpages::bigint - otta END AS wastedpages,
162   CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END AS wastedbytes,
163   pg_size_pretty((CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END)::bigint) AS pwastedbytes
164 FROM (
165   SELECT
166     schemaname, tablename, cc.reltuples, cc.relpages, bs,
167     CEIL((cc.reltuples*((datahdr+ma-
168       (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta
169   FROM (
170     SELECT
171       ma,bs,schemaname,tablename,
172       (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
173       (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
174     FROM (
175       SELECT
176         schemaname, tablename, hdr, ma, bs,
177         SUM((1-null_frac)*avg_width) AS datawidth,
178         MAX(null_frac) AS maxfracsum,
179         hdr+(
180           SELECT 1+count(*)/8
181           FROM pg_stats s2
182           WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
183         ) AS nullhdr
184       FROM pg_stats s, (
185         SELECT
186           (SELECT current_setting('block_size')::numeric) AS bs,
187           CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
188           CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
189         FROM (SELECT version() AS v) AS foo
190       ) AS constants
191 __EXTRA__WHERE__
192       GROUP BY 1,2,3,4,5
193     ) AS foo
194   ) AS rs
195   JOIN pg_class cc ON cc.relname = rs.tablename
196   JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
197 ) AS sml
198 WHERE sml.relpages - otta > 128
199       AND ROUND(CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages/otta::numeric END,1) > 1.2
200       AND CASE WHEN relpages < otta THEN 0 ELSE bs*(sml.relpages-otta)::bigint END > 1024 * 100
201 ORDER BY wastedbytes DESC
202         };
203     }
204     else {
205         return q{
206 SELECT
207   schemaname||'.'||iname as relation,ituples::bigint, ipages::bigint, iotta,
208   ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) AS ibloat,
209   CASE WHEN ipages < iotta THEN 0 ELSE ipages::bigint - iotta END AS wastedipages,
210   CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END AS wastedibytes,
211   pg_size_pretty((CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END)::bigint) AS pwastedisize
212 FROM (
213   SELECT
214     schemaname, tablename, cc.reltuples, cc.relpages, bs,
215     CEIL((cc.reltuples*((datahdr+ma-
216       (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::float)) AS otta,
217     COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages,
218     COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::float)),0) AS iotta -- very rough approximation, assumes all cols
219   FROM (
220     SELECT
221       ma,bs,schemaname,tablename,
222       (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,
223       (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2
224     FROM (
225       SELECT
226         schemaname, tablename, hdr, ma, bs,
227         SUM((1-null_frac)*avg_width) AS datawidth,
228         MAX(null_frac) AS maxfracsum,
229         hdr+(
230           SELECT 1+count(*)/8
231           FROM pg_stats s2
232           WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename
233         ) AS nullhdr
234       FROM pg_stats s, (
235         SELECT
236           (SELECT current_setting('block_size')::numeric) AS bs,
237           CASE WHEN substring(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr,
238           CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma
239         FROM (SELECT version() AS v) AS foo
240       ) AS constants
241 __EXTRA__WHERE__
242       GROUP BY 1,2,3,4,5
243     ) AS foo
244   ) AS rs
245   JOIN pg_class cc ON cc.relname = rs.tablename
246   JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema'
247   LEFT JOIN pg_index i ON indrelid = cc.oid
248   LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid
249 ) AS sml
250 WHERE (sml.relpages - otta > 128 OR ipages - iotta > 128)
251   AND ROUND(CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages/iotta::numeric END,1) > 1.2
252   AND CASE WHEN ipages < iotta THEN 0 ELSE bs*(ipages-iotta) END > 1024 * 100
253 ORDER BY wastedibytes DESC
254         };
255     }
256 }
257
258 sub get_options {
259     my %o = (
260         'exclude-schema' => '^(pg_.*|information_schema)$',
261         'logfile'        => '-',
262         'mode'           => 'tables',
263         'psql'           => 'psql',
264         'mailx'          => 'mailx',
265         'subject'        => '[%Y-%m-%d %H:%M:%S %z] Bloat report for __mode__ in __dbname__ at __host__:__port__',
266     );
267     show_help_and_die() unless GetOptions(
268         \%o,
269
270         # database connection
271         'host|h=s',
272         'port|p=i',
273         'user|U=s',
274         'dbname|d=s',
275
276         # object choosing
277         'mode|m',
278         'schema|n=s',
279         'exclude-schema|N=s',
280         'relation-name|t=s',
281         'exclude-relation-name|T=s',
282
283         # system options
284         'logfile|l=s',
285         'workdir|w=s',
286         'psql|q=s',
287         'mailx|x=s',
288
289         # mailing
290         'recipients|r=s',
291         'subject|s=s',
292
293         # other
294         'help|?',
295     );
296     show_help_and_die() if $o{ 'help' };
297     return \%o;
298 }
299
300 sub validate_options {
301     $O->{ 'mode' } = 'tables'  if substr( 'tables',  0, length( $O->{ 'mode' } ) ) eq $O->{ 'mode' };    # make it tables for any prefix of 'tables'
302     $O->{ 'mode' } = 'indexes' if substr( 'indexes', 0, length( $O->{ 'mode' } ) ) eq $O->{ 'mode' };    # make it indexes for any prefix of 'indexes'
303     show_help_and_die( 'Given mode (%) is invalid.', $O->{ 'mode' } ) unless $O->{ 'mode' } =~ m{\A(?:tables|indexes)\z};
304
305     for my $regexp_key ( qw( schema exclude-schema relation-name exclude-relation-name ) ) {
306         next unless defined $O->{ $regexp_key };
307         my $val = $O->{ $regexp_key };
308         eval { my $re = qr{$val}; };
309         next unless $EVAL_ERROR;
310         show_help_and_die( 'Invalid regexp: %s: "%s": %s', $regexp_key, $val, $EVAL_ERROR );
311     }
312
313     delete $O->{ 'logfile' } if defined $O->{ 'logfile' } && '-' eq $O->{ 'logfile' };
314     if ( defined $O->{ 'logfile' } ) {
315         $O->{ 'logfile' } = strftime( $O->{ 'logfile' }, localtime time );
316         my $base_dir = dirname( $O->{ 'logfile' } );
317         unless ( -d $base_dir ) {
318             eval { make_path( $base_dir ); };
319             if ( my $error = $EVAL_ERROR ) {
320                 show_help_and_die( "%s doesn't exist, and cannot be created (via --logfile): %s", $base_dir, $error );
321             }
322         }
323         open my $fh, '>>', $O->{ 'logfile' } or show_help_and_die( 'Cannot write to %s: %s', $O->{ 'logfile' }, $OS_ERROR );
324         $O->{ 'logfh' } = $fh;
325     }
326     else {
327         $O->{ 'logfh' } = \*STDOUT;
328     }
329
330     if ( defined $O->{ 'workdir' } ) {
331         $O->{ 'workdir' } = strftime( $O->{ 'workdir' }, localtime time );
332         unless ( -d $O->{ 'workdir' } ) {
333             eval { make_path( $O->{ 'workdir' } ); };
334             if ( my $error = $EVAL_ERROR ) {
335                 show_help_and_die( "%s doesn't exist, and cannot be created (via --workdir): %s", $O->{ 'workdir' }, $error );
336             }
337         }
338     }
339     else {
340         $O->{ 'workdir' } = tempdir( 'CLEANUP' => 1 );
341     }
342
343     show_help_and_die( 'psql program name missing!' ) unless $O->{ 'psql' };
344     show_help_and_die( 'mailx program name missing!' ) if ( !$O->{ 'mailx' } ) && ( $O->{ 'recipients' } );
345
346     chdir $O->{ 'workdir' };
347     return;
348 }
349
350 sub show_help_and_die {
351     my ( $format, @args ) = @_;
352     if ( defined $format ) {
353         $format =~ s/\s*\z/\n/;
354         printf STDERR $format, @args;
355     }
356     print STDERR <<_EOH_;
357 Syntax:
358    $PROGRAM_NAME [options]
359
360 Options:
361   [ database connection ]
362    --host                  (-h) : database server host or socket directory
363    --port                  (-p) : database server port
364    --user                  (-U) : database user name
365    --dbname                (-d) : database name to connect to
366
367   [ object choosing ]
368    --mode                  (-m) : tables/indexes - show information about which objects.
369    --schema                (-n) : regexp to choose which schemas should the report be about
370    --exclude-schema        (-N) : regexp to choose which schemas to skip from report
371    --relation-name         (-t) : regexp to choose which relations to report on
372    --exclude-relation-name (-T) : regexp to choose which relations should be excluded from report
373
374   [ system options ]
375    --logfile               (-l) : where to log information about report execution
376    --workdir               (-w) : where to create temporary files
377    --psql                  (-q) : which psql binary to use
378    --mailx                 (-x) : which mailx binary to use
379
380   [ mailing ]
381    --recipients            (-r) : comma separated list of emails that will get the report
382    --subject               (-s) : subject of the mail to be sent with report
383
384   [ other ]
385    --help                  (-?) : show this help page
386
387 Defaults:
388    --exclude-schema '^(pg_.*|information_schema)\$'
389    --logfile        -
390    --mode           tables
391    --psql           psql
392    --mailx          mailx
393    --subject        [%Y-%m-%d %H:%M:%S %z] Bloat report for __mode__ in __dbname__ at __host__:__port__
394
395 Notes:
396     logfile, workdir and subject can contain strftime-styled %marks.
397     Additionally subject can will be parsed for __XXX__ strings, and there will be replaced by value of XXX option (full names of options have to be provided).
398     $PROGRAM_NAME will try to create necessary directories (logfile and workdir) if they don't exist.
399     - as logfile means logging to STDOUT.
400 _EOH_
401     exit 1;
402 }
Note: See TracBrowser for help on using the browser.