Commit a386942b authored by Fujii Masao's avatar Fujii Masao

Add "G" (server-side data generation) as an initialization step in pgbench.

This commit allows --init-steps option in pgbench to accept "G" character
meaning server-side data generation as an initialization step.
With "G", only limited queries are sent from pgbench client and
then data is actually generated in the server. This might make
the initialization phase faster if the bandwidth between pgbench client
and the server is low.

Author: Fabien Coelho
Reviewed-by: Anna Endo, Ibrar Ahmed, Fujii Masao
Discussion: https://postgr.es/m/alpine.DEB.2.21.1904061826420.3678@lancre
parent 4b5e58b8
...@@ -193,12 +193,34 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d ...@@ -193,12 +193,34 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term><literal>g</literal> (Generate data)</term> <term><literal>g</literal> or <literal>G</literal> (Generate data, client-side or server-side)</term>
<listitem> <listitem>
<para> <para>
Generate data and load it into the standard tables, Generate data and load it into the standard tables,
replacing any data already present. replacing any data already present.
</para> </para>
<para>
With <literal>g</literal> (client-side data generation),
data is generated in <command>pgbench</command> client and then
sent to the server. This uses the client/server bandwidth
extensively through a <command>COPY</command>.
Using <literal>g</literal> causes logging to print one message
every 100,000 rows when generating data into
<structname>pgbench_accounts</structname> table.
</para>
<para>
With <literal>G</literal> (server-side data generation),
only limited queries are sent from <command>pgbench</command>
client and then data is actually generated in the server.
No significant bandwidth is required for this variant, but
the server will do more work.
Using <literal>G</literal> causes logging not to print any progress
message when generating data into
<structname>pgbench_accounts</structname> table.
</para>
<para>
</para>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
...@@ -262,9 +284,13 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d ...@@ -262,9 +284,13 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
<listitem> <listitem>
<para> <para>
Switch logging to quiet mode, producing only one progress message per 5 Switch logging to quiet mode, producing only one progress message per 5
seconds. The default logging prints one message each 100000 rows, which seconds. The default logging prints one message each 100,000 rows, which
often outputs many lines per second (especially on good hardware). often outputs many lines per second (especially on good hardware).
</para> </para>
<para>
This setting has no effect if <literal>G</literal> is specified
in <option>-I</option>.
</para>
</listitem> </listitem>
</varlistentry> </varlistentry>
......
...@@ -132,6 +132,7 @@ static int pthread_join(pthread_t th, void **thread_return); ...@@ -132,6 +132,7 @@ static int pthread_join(pthread_t th, void **thread_return);
* some configurable parameters */ * some configurable parameters */
#define DEFAULT_INIT_STEPS "dtgvp" /* default -I setting */ #define DEFAULT_INIT_STEPS "dtgvp" /* default -I setting */
#define ALL_INIT_STEPS "dtgGvpf" /* all possible steps */
#define LOG_STEP_SECONDS 5 /* seconds between log messages */ #define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */ #define DEFAULT_NXACTS 10 /* default nxacts */
...@@ -627,7 +628,7 @@ usage(void) ...@@ -627,7 +628,7 @@ usage(void)
" %s [OPTION]... [DBNAME]\n" " %s [OPTION]... [DBNAME]\n"
"\nInitialization options:\n" "\nInitialization options:\n"
" -i, --initialize invokes initialization mode\n" " -i, --initialize invokes initialization mode\n"
" -I, --init-steps=[dtgvpf]+ (default \"dtgvp\")\n" " -I, --init-steps=[" ALL_INIT_STEPS "]+ (default \"" DEFAULT_INIT_STEPS "\")\n"
" run selected initialization steps\n" " run selected initialization steps\n"
" -F, --fillfactor=NUM set fill factor\n" " -F, --fillfactor=NUM set fill factor\n"
" -n, --no-vacuum do not run VACUUM during initialization\n" " -n, --no-vacuum do not run VACUUM during initialization\n"
...@@ -3803,10 +3804,23 @@ append_fillfactor(char *opts, int len) ...@@ -3803,10 +3804,23 @@ append_fillfactor(char *opts, int len)
} }
/* /*
* Fill the standard tables with some data * Truncate away any old data, in one command in case there are foreign keys
*/ */
static void static void
initGenerateData(PGconn *con) initTruncateTables(PGconn *con)
{
executeStatement(con, "truncate table "
"pgbench_accounts, "
"pgbench_branches, "
"pgbench_history, "
"pgbench_tellers");
}
/*
* Fill the standard tables with some data generated and sent from the client
*/
static void
initGenerateDataClientSide(PGconn *con)
{ {
char sql[256]; char sql[256];
PGresult *res; PGresult *res;
...@@ -3820,7 +3834,7 @@ initGenerateData(PGconn *con) ...@@ -3820,7 +3834,7 @@ initGenerateData(PGconn *con)
remaining_sec; remaining_sec;
int log_interval = 1; int log_interval = 1;
fprintf(stderr, "generating data...\n"); fprintf(stderr, "generating data (client-side)...\n");
/* /*
* we do all of this in one transaction to enable the backend's * we do all of this in one transaction to enable the backend's
...@@ -3828,15 +3842,8 @@ initGenerateData(PGconn *con) ...@@ -3828,15 +3842,8 @@ initGenerateData(PGconn *con)
*/ */
executeStatement(con, "begin"); executeStatement(con, "begin");
/* /* truncate away any old data */
* truncate away any old data, in one command in case there are foreign initTruncateTables(con);
* keys
*/
executeStatement(con, "truncate table "
"pgbench_accounts, "
"pgbench_branches, "
"pgbench_history, "
"pgbench_tellers");
/* /*
* fill branches, tellers, accounts in that order in case foreign keys * fill branches, tellers, accounts in that order in case foreign keys
...@@ -3940,6 +3947,51 @@ initGenerateData(PGconn *con) ...@@ -3940,6 +3947,51 @@ initGenerateData(PGconn *con)
executeStatement(con, "commit"); executeStatement(con, "commit");
} }
/*
* Fill the standard tables with some data generated on the server
*
* As already the case with the client-side data generation, the filler
* column defaults to NULL in pgbench_branches and pgbench_tellers,
* and is a blank-padded string in pgbench_accounts.
*/
static void
initGenerateDataServerSide(PGconn *con)
{
char sql[256];
fprintf(stderr, "generating data (server-side)...\n");
/*
* we do all of this in one transaction to enable the backend's
* data-loading optimizations
*/
executeStatement(con, "begin");
/* truncate away any old data */
initTruncateTables(con);
snprintf(sql, sizeof(sql),
"insert into pgbench_branches(bid,bbalance) "
"select bid, 0 "
"from generate_series(1, %d) as bid", nbranches * scale);
executeStatement(con, sql);
snprintf(sql, sizeof(sql),
"insert into pgbench_tellers(tid,bid,tbalance) "
"select tid, (tid - 1) / %d + 1, 0 "
"from generate_series(1, %d) as tid", ntellers, ntellers * scale);
executeStatement(con, sql);
snprintf(sql, sizeof(sql),
"insert into pgbench_accounts(aid,bid,abalance,filler) "
"select aid, (aid - 1) / %d + 1, 0, '' "
"from generate_series(1, "INT64_FORMAT") as aid",
naccounts, (int64) naccounts * scale);
executeStatement(con, sql);
executeStatement(con, "commit");
}
/* /*
* Invoke vacuum on the standard tables * Invoke vacuum on the standard tables
*/ */
...@@ -4020,21 +4072,21 @@ initCreateFKeys(PGconn *con) ...@@ -4020,21 +4072,21 @@ initCreateFKeys(PGconn *con)
static void static void
checkInitSteps(const char *initialize_steps) checkInitSteps(const char *initialize_steps)
{ {
const char *step;
if (initialize_steps[0] == '\0') if (initialize_steps[0] == '\0')
{ {
fprintf(stderr, "no initialization steps specified\n"); fprintf(stderr, "no initialization steps specified\n");
exit(1); exit(1);
} }
for (step = initialize_steps; *step != '\0'; step++) for (const char *step = initialize_steps; *step != '\0'; step++)
{ {
if (strchr("dtgvpf ", *step) == NULL) if (strchr(ALL_INIT_STEPS " ", *step) == NULL)
{ {
fprintf(stderr, "unrecognized initialization step \"%c\"\n", fprintf(stderr,
"unrecognized initialization step \"%c\"\n",
*step); *step);
fprintf(stderr, "allowed steps are: \"d\", \"t\", \"g\", \"v\", \"p\", \"f\"\n"); fprintf(stderr,
"Allowed step characters are: \"" ALL_INIT_STEPS "\".\n");
exit(1); exit(1);
} }
} }
...@@ -4075,8 +4127,12 @@ runInitSteps(const char *initialize_steps) ...@@ -4075,8 +4127,12 @@ runInitSteps(const char *initialize_steps)
initCreateTables(con); initCreateTables(con);
break; break;
case 'g': case 'g':
op = "generate"; op = "client-side generate";
initGenerateData(con); initGenerateDataClientSide(con);
break;
case 'G':
op = "server-side generate";
initGenerateDataServerSide(con);
break; break;
case 'v': case 'v':
op = "vacuum"; op = "vacuum";
......
...@@ -130,7 +130,7 @@ pgbench( ...@@ -130,7 +130,7 @@ pgbench(
# Test interaction of --init-steps with legacy step-selection options # Test interaction of --init-steps with legacy step-selection options
pgbench( pgbench(
'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3', '--initialize --init-steps=dtpvGvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
0, 0,
[qr{^$}], [qr{^$}],
[ [
...@@ -138,7 +138,7 @@ pgbench( ...@@ -138,7 +138,7 @@ pgbench(
qr{creating tables}, qr{creating tables},
qr{creating 3 partitions}, qr{creating 3 partitions},
qr{creating primary keys}, qr{creating primary keys},
qr{.* of .* tuples \(.*\) done}, qr{generating data \(server-side\)},
qr{creating foreign keys}, qr{creating foreign keys},
qr{(?!vacuuming)}, # no vacuum qr{(?!vacuuming)}, # no vacuum
qr{done in \d+\.\d\d s } qr{done in \d+\.\d\d s }
......
...@@ -147,7 +147,7 @@ my @options = ( ...@@ -147,7 +147,7 @@ my @options = (
[ [
'invalid init step', 'invalid init step',
'-i -I dta', '-i -I dta',
[ qr{unrecognized initialization step}, qr{allowed steps are} ] [ qr{unrecognized initialization step}, qr{Allowed step characters are} ]
], ],
[ [
'bad random seed', 'bad random seed',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment