Commit 5e39f06c authored by Tom Lane's avatar Tom Lane

Move bootstrap-time lookup of regproc OIDs into genbki.pl.

Formerly, the bootstrap backend looked up the OIDs corresponding to
names in regproc catalog entries using brute-force searches of pg_proc.
It was somewhat remarkable that that worked at all, since it was used
while populating other pretty-fundamental catalogs like pg_operator.
And it was also quite slow, and getting slower as pg_proc gets bigger.

This patch moves the lookup work into genbki.pl, so that the values in
postgres.bki for regproc columns are always numeric OIDs, an option
that regprocin() already supported.  Perl isn't the world's speediest
language, so this about doubles the time needed to run genbki.pl (from
0.3 to 0.6 sec on my machine).  But we only do that at most once per
build.  The time needed to run initdb drops significantly --- on my
machine, initdb --no-sync goes from 1.8 to 1.3 seconds.  So this is
a small net win even for just one initdb per build, and it becomes
quite a nice win for test sequences requiring many initdb runs.

Strip out the now-dead code for brute-force catalog searching in
regprocin.  We'd also cargo-culted similar logic into regoperin
and some (not all) of the other reg*in functions.  That is all
dead code too since we currently have no need to load such values
during bootstrap.  I removed it all, reasoning that if we ever
need such functionality it'd be much better to do it in a similar
way to this patch.

There might be some simplifications possible in the backend now that
regprocin doesn't require doing catalog reads so early in bootstrap.
I've not looked into that, though.

Andreas Karlsson, with some small adjustments by me

Discussion: https://postgr.es/m/30896.1492006367@sss.pgh.pa.us
parent a9254e67
......@@ -19,7 +19,7 @@ use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = ();
our @EXPORT_OK = qw(Catalogs RenameTempFile);
our @EXPORT_OK = qw(Catalogs SplitDataLine RenameTempFile);
# Call this function with an array of names of header files to parse.
# Returns a nested data structure describing the data in the headers.
......@@ -216,6 +216,28 @@ sub Catalogs
return \%catalogs;
}
# Split a DATA line into fields.
# Call this on the bki_values element of a DATA item returned by Catalogs();
# it returns a list of field values. We don't strip quoting from the fields.
# Note: it should be safe to assign the result to a list of length equal to
# the nominal number of catalog fields, because check_natts already checked
# the number of fields.
sub SplitDataLine
{
my $bki_values = shift;
# This handling of quoted strings might look too simplistic, but it
# matches what bootscanner.l does: that has no provision for quote marks
# inside quoted strings, either. If we don't have a quoted string, just
# snarf everything till next whitespace. That will accept some things
# that bootscanner.l will see as erroneous tokens; but it seems wiser
# to do that and let bootscanner.l complain than to silently drop
# non-whitespace characters.
my @result = $bki_values =~ /"[^"]*"|\S+/g;
return @result;
}
# Rename temporary files to final names.
# Call this function with the final file name and the .tmp extension
# Note: recommended extension is ".tmp$$", so that parallel make steps
......@@ -229,21 +251,20 @@ sub RenameTempFile
rename($temp_name, $final_name) || die "rename: $temp_name: $!";
}
# verify the number of fields in the passed-in bki structure
# verify the number of fields in the passed-in DATA line
sub check_natts
{
my ($catname, $natts, $bki_val, $file, $line) = @_;
die "Could not find definition for Natts_${catname} before start of DATA() in $file\n"
unless defined $natts;
# we're working with a copy and need to count the fields only, so collapse
$bki_val =~ s/"[^"]*?"/xxx/g;
my @atts = split /\s+/, $bki_val;
my $nfields = scalar(SplitDataLine($bki_val));
die sprintf
"Wrong number of attributes in DATA() entry at %s:%d (expected %d but got %d)\n",
$file, $line, $natts, scalar @atts
unless $natts == @atts;
$file, $line, $natts, $nfields
unless $natts == $nfields;
}
1;
......@@ -102,6 +102,7 @@ print $bki "# PostgreSQL $major_version\n";
# vars to hold data needed for schemapg.h
my %schemapg_entries;
my @tables_needing_macros;
my %regprocoids;
our @types;
# produce output, one catalog at a time
......@@ -160,22 +161,55 @@ foreach my $catname (@{ $catalogs->{names} })
foreach my $row (@{ $catalog->{data} })
{
# substitute constant values we acquired above
$row->{bki_values} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
$row->{bki_values} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
# Split line into tokens without interpreting their meaning.
my %bki_values;
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
# Perform required substitutions on fields
foreach my $att (keys %bki_values)
{
# Substitute constant values we acquired above.
# (It's intentional that this can apply to parts of a field).
$bki_values{$att} =~ s/\bPGUID\b/$BOOTSTRAP_SUPERUSERID/g;
$bki_values{$att} =~ s/\bPGNSP\b/$PG_CATALOG_NAMESPACE/g;
# Replace regproc columns' values with OIDs.
# If we don't have a unique value to substitute,
# just do nothing (regprocin will complain).
if ($bki_attr{$att}->{type} eq 'regproc')
{
my $procoid = $regprocoids{$bki_values{$att}};
$bki_values{$att} = $procoid
if defined($procoid) && $procoid ne 'MULTIPLE';
}
}
# Save pg_proc oids for use in later regproc substitutions.
# This relies on the order we process the files in!
if ($catname eq 'pg_proc')
{
if (defined($regprocoids{$bki_values{proname}}))
{
$regprocoids{$bki_values{proname}} = 'MULTIPLE';
}
else
{
$regprocoids{$bki_values{proname}} = $row->{oid};
}
}
# Save pg_type info for pg_attribute processing below
if ($catname eq 'pg_type')
{
my %type;
my %type = %bki_values;
$type{oid} = $row->{oid};
@type{@attnames} = split /\s+/, $row->{bki_values};
push @types, \%type;
}
# Write to postgres.bki
my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
printf $bki "insert %s( %s)\n", $oid, $row->{bki_values};
printf $bki "insert %s( %s )\n", $oid,
join(' ', @bki_values{@attnames});
# Write comments to postgres.description and postgres.shdescription
if (defined $row->{descr})
......@@ -426,7 +460,7 @@ sub bki_insert
my @attnames = @_;
my $oid = $row->{oid} ? "OID = $row->{oid} " : '';
my $bki_values = join ' ', map { $_ eq '' ? '""' : $_ } map $row->{$_}, @attnames;
printf $bki "insert %s( %s)\n", $oid, $bki_values;
printf $bki "insert %s( %s )\n", $oid, $bki_values;
}
# The field values of a Schema_pg_xxx declaration are similar, but not
......
......@@ -58,30 +58,20 @@ foreach my $column (@{ $catalogs->{pg_proc}->{columns} })
my $data = $catalogs->{pg_proc}->{data};
foreach my $row (@$data)
{
# To construct fmgroids.h and fmgrtab.c, we need to inspect some
# of the individual data fields. Just splitting on whitespace
# won't work, because some quoted fields might contain internal
# whitespace. We handle this by folding them all to a simple
# "xxx". Fortunately, this script doesn't need to look at any
# fields that might need quoting, so this simple hack is
# sufficient.
$row->{bki_values} =~ s/"[^"]*"/"xxx"/g;
@{$row}{@attnames} = split /\s+/, $row->{bki_values};
# Split line into tokens without interpreting their meaning.
my %bki_values;
@bki_values{@attnames} = Catalog::SplitDataLine($row->{bki_values});
# Select out just the rows for internal-language procedures.
# Note assumption here that INTERNALlanguageId is 12.
next if $row->{prolang} ne '12';
next if $bki_values{prolang} ne '12';
push @fmgr,
{ oid => $row->{oid},
strict => $row->{proisstrict},
retset => $row->{proretset},
nargs => $row->{pronargs},
prosrc => $row->{prosrc}, };
# Hack to work around memory leak in some versions of Perl
$row = undef;
strict => $bki_values{proisstrict},
retset => $bki_values{proretset},
nargs => $bki_values{pronargs},
prosrc => $bki_values{prosrc}, };
}
# Emit headers for both files
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment