Commit a0854f10 authored by Tom Lane's avatar Tom Lane

Avoid parsing catalog data twice during BKI file construction.

In the wake of commit 5602265f, we were doing duplicate-OID detection
quite inefficiently, by invoking duplicate_oids which does all the same
parsing of catalog headers and .dat files as genbki.pl does.  That adds
under half a second on modern machines, but quite a bit more on slow
buildfarm critters, so it seems worth avoiding.  Let's just extend
genbki.pl a little so it can also detect duplicate OIDs, and remove
the duplicate_oids call from the build process.

(This also means that duplicate OID detection will happen during
Windows builds, which AFAICS it didn't before.)

This makes the use-case for duplicate_oids a bit dubious, but it's
possible that people will still want to run that check without doing
a whole build run, so let's keep that script.

In passing, move down genbki.pl's creation of its temp output files
so that it doesn't happen until after we've done parsing and validation
of the input.  This avoids leaving a lot of clutter around after a
failure.

John Naylor and Tom Lane

Discussion: https://postgr.es/m/37D774E4-FE1F-437E-B3D2-593F314B7505@postgrespro.ru
parent dd4cc9d7
...@@ -382,8 +382,8 @@ ...@@ -382,8 +382,8 @@
through the catalog headers and <filename>.dat</filename> files through the catalog headers and <filename>.dat</filename> files
to see which ones do not appear. You can also use to see which ones do not appear. You can also use
the <filename>duplicate_oids</filename> script to check for mistakes. the <filename>duplicate_oids</filename> script to check for mistakes.
(That script is run automatically at compile time, and will stop the (<filename>genbki.pl</filename> will also detect duplicate OIDs
build if a duplicate is found.) at compile time.)
</para> </para>
<para> <para>
......
...@@ -386,6 +386,8 @@ sub FindDefinedSymbolFromData ...@@ -386,6 +386,8 @@ sub FindDefinedSymbolFromData
# Extract an array of all the OIDs assigned in the specified catalog headers # Extract an array of all the OIDs assigned in the specified catalog headers
# and their associated data files (if any). # and their associated data files (if any).
# Caution: genbki.pl contains equivalent logic; change it too if you need to
# touch this.
sub FindAllOidsFromHeaders sub FindAllOidsFromHeaders
{ {
my @input_files = @_; my @input_files = @_;
......
...@@ -84,8 +84,7 @@ generated-header-symlinks: $(top_builddir)/src/include/catalog/header-stamp ...@@ -84,8 +84,7 @@ generated-header-symlinks: $(top_builddir)/src/include/catalog/header-stamp
# configure run, even in distribution tarballs. So depending on configure.in # configure run, even in distribution tarballs. So depending on configure.in
# instead is cheating a bit, but it will achieve the goal of updating the # instead is cheating a bit, but it will achieve the goal of updating the
# version number when it changes. # version number when it changes.
bki-stamp: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure.in $(top_srcdir)/src/include/catalog/duplicate_oids bki-stamp: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure.in
cd $(top_srcdir)/src/include/catalog && $(PERL) ./duplicate_oids
$(PERL) -I $(catalogdir) $< --set-version=$(MAJORVERSION) $(POSTGRES_BKI_SRCS) $(PERL) -I $(catalogdir) $< --set-version=$(MAJORVERSION) $(POSTGRES_BKI_SRCS)
touch $@ touch $@
......
...@@ -57,27 +57,14 @@ if ($output_path ne '' && substr($output_path, -1) ne '/') ...@@ -57,27 +57,14 @@ if ($output_path ne '' && substr($output_path, -1) ne '/')
$output_path .= '/'; $output_path .= '/';
} }
# Open temp files
my $tmpext = ".tmp$$";
my $bkifile = $output_path . 'postgres.bki';
open my $bki, '>', $bkifile . $tmpext
or die "can't open $bkifile$tmpext: $!";
my $schemafile = $output_path . 'schemapg.h';
open my $schemapg, '>', $schemafile . $tmpext
or die "can't open $schemafile$tmpext: $!";
my $descrfile = $output_path . 'postgres.description';
open my $descr, '>', $descrfile . $tmpext
or die "can't open $descrfile$tmpext: $!";
my $shdescrfile = $output_path . 'postgres.shdescription';
open my $shdescr, '>', $shdescrfile . $tmpext
or die "can't open $shdescrfile$tmpext: $!";
# Read all the files into internal data structures. # Read all the files into internal data structures.
my @catnames; my @catnames;
my %catalogs; my %catalogs;
my %catalog_data; my %catalog_data;
my @toast_decls; my @toast_decls;
my @index_decls; my @index_decls;
my %oidcounts;
foreach my $header (@input_files) foreach my $header (@input_files)
{ {
$header =~ /(.+)\.h$/ $header =~ /(.+)\.h$/
...@@ -94,10 +81,30 @@ foreach my $header (@input_files) ...@@ -94,10 +81,30 @@ foreach my $header (@input_files)
$catalogs{$catname} = $catalog; $catalogs{$catname} = $catalog;
} }
# While checking for duplicated OIDs, we ignore the pg_class OID and
# rowtype OID of bootstrap catalogs, as those are expected to appear
# in the initial data for pg_class and pg_type. For regular catalogs,
# include these OIDs. (See also Catalog::FindAllOidsFromHeaders
# if you change this logic.)
if (!$catalog->{bootstrap})
{
$oidcounts{ $catalog->{relation_oid} }++
if ($catalog->{relation_oid});
$oidcounts{ $catalog->{rowtype_oid} }++
if ($catalog->{rowtype_oid});
}
# Not all catalogs have a data file. # Not all catalogs have a data file.
if (-e $datfile) if (-e $datfile)
{ {
$catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 0); my $data = Catalog::ParseData($datfile, $schema, 0);
$catalog_data{$catname} = $data;
# Check for duplicated OIDs while we're at it.
foreach my $row (@$data)
{
$oidcounts{ $row->{oid} }++ if defined $row->{oid};
}
} }
# If the header file contained toast or index info, build BKI # If the header file contained toast or index info, build BKI
...@@ -108,6 +115,8 @@ foreach my $header (@input_files) ...@@ -108,6 +115,8 @@ foreach my $header (@input_files)
sprintf "declare toast %s %s on %s\n", sprintf "declare toast %s %s on %s\n",
$toast->{toast_oid}, $toast->{toast_index_oid}, $toast->{toast_oid}, $toast->{toast_index_oid},
$toast->{parent_table}; $toast->{parent_table};
$oidcounts{ $toast->{toast_oid} }++;
$oidcounts{ $toast->{toast_index_oid} }++;
} }
foreach my $index (@{ $catalog->{indexing} }) foreach my $index (@{ $catalog->{indexing} })
{ {
...@@ -116,9 +125,24 @@ foreach my $header (@input_files) ...@@ -116,9 +125,24 @@ foreach my $header (@input_files)
$index->{is_unique} ? 'unique ' : '', $index->{is_unique} ? 'unique ' : '',
$index->{index_name}, $index->{index_oid}, $index->{index_name}, $index->{index_oid},
$index->{index_decl}; $index->{index_decl};
$oidcounts{ $index->{index_oid} }++;
} }
} }
# Complain and exit if we found any duplicate OIDs.
# While duplicate OIDs would only cause a failure if they appear in
# the same catalog, our project policy is that manually assigned OIDs
# should be globally unique, to avoid confusion.
my $found = 0;
foreach my $oid (keys %oidcounts)
{
next unless $oidcounts{$oid} > 1;
print "Duplicate oids detected:\n" if !$found;
print "$oid\n";
$found++;
}
die "found $found duplicate OID(s) in catalog data\n" if $found;
# Fetch some special data that we will substitute into the output file. # Fetch some special data that we will substitute into the output file.
# CAUTION: be wary about what symbols you substitute into the .bki file here! # CAUTION: be wary about what symbols you substitute into the .bki file here!
# It's okay to substitute things that are expected to be really constant # It's okay to substitute things that are expected to be really constant
...@@ -224,6 +248,21 @@ my %lookup_kind = ( ...@@ -224,6 +248,21 @@ my %lookup_kind = (
pg_type => \%typeoids); pg_type => \%typeoids);
# Open temp files
my $tmpext = ".tmp$$";
my $bkifile = $output_path . 'postgres.bki';
open my $bki, '>', $bkifile . $tmpext
or die "can't open $bkifile$tmpext: $!";
my $schemafile = $output_path . 'schemapg.h';
open my $schemapg, '>', $schemafile . $tmpext
or die "can't open $schemafile$tmpext: $!";
my $descrfile = $output_path . 'postgres.description';
open my $descr, '>', $descrfile . $tmpext
or die "can't open $descrfile$tmpext: $!";
my $shdescrfile = $output_path . 'postgres.shdescription';
open my $shdescr, '>', $shdescrfile . $tmpext
or die "can't open $shdescrfile$tmpext: $!";
# Generate postgres.bki, postgres.description, postgres.shdescription, # Generate postgres.bki, postgres.description, postgres.shdescription,
# and pg_*_d.h headers. # and pg_*_d.h headers.
print "Generating BKI files and symbol definition headers...\n"; print "Generating BKI files and symbol definition headers...\n";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment