Commit b1c1aa53 authored by Amit Kapila's avatar Amit Kapila

pgbench: add --partitions and --partition-method options.

These new options allow users to partition the pgbench_accounts table by
specifying the number of partitions and partitioning method.  The values
allowed for partitioning method are range and hash.

This feature allows users to measure the overhead of partitioning if any.

Author: Fabien COELHO
Reviewed-by: Amit Kapila, Amit Langote, Dilip Kumar, Asif Rehman, and
Alvaro Herrera
Discussion: https://postgr.es/m/alpine.DEB.2.21.1907230826190.7008@lancre
parent df86e52c
......@@ -306,6 +306,31 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
</listitem>
</varlistentry>
<varlistentry>
<term><option>--partitions=<replaceable>NUM</replaceable></option></term>
<listitem>
<para>
Create a partitioned <literal>pgbench_accounts</literal> table with
<replaceable>NUM</replaceable> partitions of nearly equal size for
the scaled number of accounts.
Default is <literal>0</literal>, meaning no partitioning.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--partition-method=<replaceable>NAME</replaceable></option></term>
<listitem>
<para>
Create a partitioned <literal>pgbench_accounts</literal> table with
<replaceable>NAME</replaceable> method.
Expected values are <literal>range</literal> or <literal>hash</literal>.
This option requires that <option>--partitions</option> is set to non-zero.
If unspecified, default is <literal>range</literal>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--tablespace=<replaceable>tablespace</replaceable></option></term>
<listitem>
......
......@@ -186,6 +186,23 @@ int64 latency_limit = 0;
char *tablespace = NULL;
char *index_tablespace = NULL;
/*
* Number of "pgbench_accounts" partitions. 0 is the default and means no
* partitioning.
*/
static int partitions = 0;
/* partitioning strategy for "pgbench_accounts" */
typedef enum
{
PART_NONE, /* no partitioning */
PART_RANGE, /* range partitioning */
PART_HASH /* hash partitioning */
} partition_method_t;
static partition_method_t partition_method = PART_NONE;
static const char *PARTITION_METHOD[] = {"none", "range", "hash"};
/* random seed used to initialize base_random_sequence */
int64 random_seed = -1;
......@@ -582,6 +599,7 @@ static void doLog(TState *thread, CState *st,
StatsData *agg, bool skipped, double latency, double lag);
static void processXactStats(TState *thread, CState *st, instr_time *now,
bool skipped, StatsData *agg);
static void append_fillfactor(char *opts, int len);
static void addScript(ParsedScript script);
static void *threadRun(void *arg);
static void finishCon(CState *st);
......@@ -617,6 +635,9 @@ usage(void)
" --foreign-keys create foreign key constraints between tables\n"
" --index-tablespace=TABLESPACE\n"
" create indexes in the specified tablespace\n"
" --partitions=NUM partition pgbench_accounts in NUM parts (default: 0)\n"
" --partition-method=(range|hash)\n"
" partition pgbench_accounts with this method (default: range)\n"
" --tablespace=TABLESPACE create tables in the specified tablespace\n"
" --unlogged-tables create tables as unlogged tables\n"
"\nOptions to select what to run:\n"
......@@ -3601,6 +3622,77 @@ initDropTables(PGconn *con)
"pgbench_tellers");
}
/*
* Create "pgbench_accounts" partitions if needed.
*
* This is the larger table of pgbench default tpc-b like schema
* with a known size, so we choose to partition it.
*/
static void
createPartitions(PGconn *con)
{
char ff[64];
ff[0] = '\0';
/*
* Per ddlinfo in initCreateTables, fillfactor is needed on table
* pgbench_accounts.
*/
append_fillfactor(ff, sizeof(ff));
/* we must have to create some partitions */
Assert(partitions > 0);
fprintf(stderr, "creating %d partitions...\n", partitions);
for (int p = 1; p <= partitions; p++)
{
char query[256];
if (partition_method == PART_RANGE)
{
int64 part_size = (naccounts * (int64) scale + partitions - 1) / partitions;
char minvalue[32],
maxvalue[32];
/*
* For RANGE, we use open-ended partitions at the beginning and
* end to allow any valid value for the primary key. Although the
* actual minimum and maximum values can be derived from the
* scale, it is more generic and the performance is better.
*/
if (p == 1)
sprintf(minvalue, "minvalue");
else
sprintf(minvalue, INT64_FORMAT, (p - 1) * part_size + 1);
if (p < partitions)
sprintf(maxvalue, INT64_FORMAT, p * part_size + 1);
else
sprintf(maxvalue, "maxvalue");
snprintf(query, sizeof(query),
"create%s table pgbench_accounts_%d\n"
" partition of pgbench_accounts\n"
" for values from (%s) to (%s)%s\n",
unlogged_tables ? " unlogged" : "", p,
minvalue, maxvalue, ff);
}
else if (partition_method == PART_HASH)
snprintf(query, sizeof(query),
"create%s table pgbench_accounts_%d\n"
" partition of pgbench_accounts\n"
" for values with (modulus %d, remainder %d)%s\n",
unlogged_tables ? " unlogged" : "", p,
partitions, p - 1, ff);
else /* cannot get there */
Assert(0);
executeStatement(con, query);
}
}
/*
* Create pgbench's standard tables
*/
......@@ -3664,9 +3756,15 @@ initCreateTables(PGconn *con)
/* Construct new create table statement. */
opts[0] = '\0';
if (ddl->declare_fillfactor)
/* Partition pgbench_accounts table */
if (partition_method != PART_NONE && strcmp(ddl->table, "pgbench_accounts") == 0)
snprintf(opts + strlen(opts), sizeof(opts) - strlen(opts),
" with (fillfactor=%d)", fillfactor);
" partition by %s (aid)", PARTITION_METHOD[partition_method]);
else if (ddl->declare_fillfactor)
/* fillfactor is only expected on actual tables */
append_fillfactor(opts, sizeof(opts));
if (tablespace != NULL)
{
char *escape_tablespace;
......@@ -3686,6 +3784,21 @@ initCreateTables(PGconn *con)
executeStatement(con, buffer);
}
if (partition_method != PART_NONE)
createPartitions(con);
}
/*
* add fillfactor percent option.
*
* XXX - As default is 100, it could be removed in this case.
*/
static void
append_fillfactor(char *opts, int len)
{
snprintf(opts + strlen(opts), len - strlen(opts),
" with (fillfactor=%d)", fillfactor);
}
/*
......@@ -4010,6 +4123,121 @@ runInitSteps(const char *initialize_steps)
termPQExpBuffer(&stats);
}
/*
* Extract pgbench table informations into global variables scale,
* partition_method and partitions.
*/
static void
GetTableInfo(PGconn *con, bool scale_given)
{
PGresult *res;
/*
* get the scaling factor that should be same as count(*) from
* pgbench_branches if this is not a custom query
*/
res = PQexec(con, "select count(*) from pgbench_branches");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
fprintf(stderr, "%s", PQerrorMessage(con));
if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
{
fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
}
exit(1);
}
scale = atoi(PQgetvalue(res, 0, 0));
if (scale < 0)
{
fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
PQgetvalue(res, 0, 0));
exit(1);
}
PQclear(res);
/* warn if we override user-given -s switch */
if (scale_given)
fprintf(stderr,
"scale option ignored, using count from pgbench_branches table (%d)\n",
scale);
/*
* Get the partition information for the first "pgbench_accounts" table
* found in search_path.
*
* The result is empty if no "pgbench_accounts" is found.
*
* Otherwise, it always returns one row even if the table is not
* partitioned (in which case the partition strategy is NULL).
*
* The number of partitions can be 0 even for partitioned tables, if no
* partition is attached.
*
* We assume no partitioning on any failure, so as to avoid failing on an
* old version without "pg_partitioned_table".
*/
res = PQexec(con,
"select o.n, p.partstrat, pg_catalog.count(i.inhparent) "
"from pg_catalog.pg_class as c "
"join pg_catalog.pg_namespace as n on (n.oid = c.relnamespace) "
"cross join lateral (select pg_catalog.array_position(pg_catalog.current_schemas(true), n.nspname)) as o(n) "
"left join pg_catalog.pg_partitioned_table as p on (p.partrelid = c.oid) "
"left join pg_catalog.pg_inherits as i on (c.oid = i.inhparent) "
"where c.relname = 'pgbench_accounts' and o.n is not null "
"group by 1, 2 "
"order by 1 asc "
"limit 1");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
/* probably an older version, coldly assume no partitioning */
partition_method = PART_NONE;
partitions = 0;
}
else if (PQntuples(res) == 0)
{
/*
* This case is unlikely as pgbench already found "pgbench_branches"
* above to compute the scale.
*/
fprintf(stderr,
"no pgbench_accounts table found in search_path\n"
"Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\".\n", PQdb(con));
exit(1);
}
else /* PQntupes(res) == 1 */
{
/* normal case, extract partition information */
if (PQgetisnull(res, 0, 1))
partition_method = PART_NONE;
else
{
char *ps = PQgetvalue(res, 0, 1);
/* column must be there */
Assert(ps != NULL);
if (strcmp(ps, "r") == 0)
partition_method = PART_RANGE;
else if (strcmp(ps, "h") == 0)
partition_method = PART_HASH;
else
{
/* possibly a newer version with new partition method */
fprintf(stderr, "unexpected partition method: \"%s\"\n", ps);
exit(1);
}
}
partitions = atoi(PQgetvalue(res, 0, 2));
}
PQclear(res);
}
/*
* Replace :param with $n throughout the command's SQL text, which
* is a modifiable string in cmd->lines.
......@@ -4919,6 +5147,10 @@ printResults(StatsData *total, instr_time total_time,
printf("transaction type: %s\n",
num_scripts == 1 ? sql_script[0].desc : "multiple scripts");
printf("scaling factor: %d\n", scale);
/* only print partitioning information if some partitioning was detected */
if (partition_method != PART_NONE)
printf("partition method: %s\npartitions: %d\n",
PARTITION_METHOD[partition_method], partitions);
printf("query mode: %s\n", QUERYMODE[querymode]);
printf("number of clients: %d\n", nclients);
printf("number of threads: %d\n", nthreads);
......@@ -5126,6 +5358,8 @@ main(int argc, char **argv)
{"foreign-keys", no_argument, NULL, 8},
{"random-seed", required_argument, NULL, 9},
{"show-script", required_argument, NULL, 10},
{"partitions", required_argument, NULL, 11},
{"partition-method", required_argument, NULL, 12},
{NULL, 0, NULL, 0}
};
......@@ -5160,7 +5394,6 @@ main(int argc, char **argv)
#endif
PGconn *con;
PGresult *res;
char *env;
int exit_code = 0;
......@@ -5486,6 +5719,29 @@ main(int argc, char **argv)
exit(0);
}
break;
case 11: /* partitions */
initialization_option_set = true;
partitions = atoi(optarg);
if (partitions < 0)
{
fprintf(stderr, "invalid number of partitions: \"%s\"\n",
optarg);
exit(1);
}
break;
case 12: /* partition-method */
initialization_option_set = true;
if (pg_strcasecmp(optarg, "range") == 0)
partition_method = PART_RANGE;
else if (pg_strcasecmp(optarg, "hash") == 0)
partition_method = PART_HASH;
else
{
fprintf(stderr, "invalid partition method, expecting \"range\" or \"hash\","
" got: \"%s\"\n", optarg);
exit(1);
}
break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
......@@ -5567,6 +5823,16 @@ main(int argc, char **argv)
exit(1);
}
if (partitions == 0 && partition_method != PART_NONE)
{
fprintf(stderr, "--partition-method requires greater than zero --partitions\n");
exit(1);
}
/* set default method */
if (partitions > 0 && partition_method == PART_NONE)
partition_method = PART_RANGE;
if (initialize_steps == NULL)
initialize_steps = pg_strdup(DEFAULT_INIT_STEPS);
......@@ -5724,39 +5990,7 @@ main(int argc, char **argv)
}
if (internal_script_used)
{
/*
* get the scaling factor that should be same as count(*) from
* pgbench_branches if this is not a custom query
*/
res = PQexec(con, "select count(*) from pgbench_branches");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
char *sqlState = PQresultErrorField(res, PG_DIAG_SQLSTATE);
fprintf(stderr, "%s", PQerrorMessage(con));
if (sqlState && strcmp(sqlState, ERRCODE_UNDEFINED_TABLE) == 0)
{
fprintf(stderr, "Perhaps you need to do initialization (\"pgbench -i\") in database \"%s\"\n", PQdb(con));
}
exit(1);
}
scale = atoi(PQgetvalue(res, 0, 0));
if (scale < 0)
{
fprintf(stderr, "invalid count(*) from pgbench_branches: \"%s\"\n",
PQgetvalue(res, 0, 0));
exit(1);
}
PQclear(res);
/* warn if we override user-given -s switch */
if (scale_given)
fprintf(stderr,
"scale option ignored, using count from pgbench_branches table (%d)\n",
scale);
}
GetTableInfo(con, scale_given);
/*
* :scale variables normally get -s or database scale, but don't override
......
......@@ -58,6 +58,19 @@ sub pgbench
return;
}
# tablespace for testing, because partitioned tables cannot use pg_default
# explicitly and we want to test that table creation with tablespace works
# for partitioned tables.
my $ts = $node->basedir . '/regress_pgbench_tap_1_ts_dir';
mkdir $ts or die "cannot create directory $ts";
# this takes care of WIN-specific path issues
my $ets = TestLib::perl2host($ts);
# the next commands will issue a syntax error if the path contains a "'"
$node->safe_psql('postgres',
"CREATE TABLESPACE regress_pgbench_tap_1_ts LOCATION '$ets';"
);
# Test concurrent OID generation via pg_enum_oid_index. This indirectly
# exercises LWLock and spinlock concurrency.
my $labels = join ',', map { "'l$_'" } 1 .. 1000;
......@@ -100,12 +113,13 @@ pgbench(
# Again, with all possible options
pgbench(
'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=pg_default --index-tablespace=pg_default',
'--initialize --init-steps=dtpvg --scale=1 --unlogged-tables --fillfactor=98 --foreign-keys --quiet --tablespace=regress_pgbench_tap_1_ts --index-tablespace=regress_pgbench_tap_1_ts --partitions=2 --partition-method=hash',
0,
[qr{^$}i],
[
qr{dropping old tables},
qr{creating tables},
qr{creating 2 partitions},
qr{vacuuming},
qr{creating primary keys},
qr{creating foreign keys},
......@@ -116,12 +130,13 @@ pgbench(
# Test interaction of --init-steps with legacy step-selection options
pgbench(
'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables',
'--initialize --init-steps=dtpvgvv --no-vacuum --foreign-keys --unlogged-tables --partitions=3',
0,
[qr{^$}],
[
qr{dropping old tables},
qr{creating tables},
qr{creating 3 partitions},
qr{creating primary keys},
qr{.* of .* tuples \(.*\) done},
qr{creating foreign keys},
......@@ -910,5 +925,6 @@ check_pgbench_logs($bdir, '001_pgbench_log_3', 1, 10, 10,
qr{^\d \d{1,2} \d+ \d \d+ \d+$});
# done
$node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts');
$node->stop;
done_testing();
......@@ -157,6 +157,13 @@ my @options = (
qr{error while setting random seed from --random-seed option}
]
],
[ 'bad partition type', '-i --partition-method=BAD', [qr{"range"}, qr{"hash"}, qr{"BAD"}] ],
[ 'bad partition number', '-i --partitions -1', [ qr{invalid number of partitions: "-1"} ] ],
[
'partition method without partitioning',
'-i --partition-method=hash',
[ qr{partition-method requires greater than zero --partitions} ]
],
# logging sub-options
[
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment