Commit 5fd9dfa5 authored by Bruce Momjian's avatar Bruce Momjian

Move pg_stat_statements query jumbling to core.

Add compute_query_id GUC to control whether a query identifier should be
computed by the core (off by default).  It's thefore now possible to
disable core queryid computation and use pg_stat_statements with a
different algorithm to compute the query identifier by using a
third-party module.

To ensure that a single source of query identifier can be used and is
well defined, modules that calculate a query identifier should throw an
error if compute_query_id specified to compute a query id and if a query
idenfitier was already calculated.

Discussion: https://postgr.es/m/20210407125726.tkvjdbw76hxnpwfi@nol

Author: Julien Rouhaud

Reviewed-by: Alvaro Herrera, Nitin Jadhav, Zhihong Yu
parent a282ee68
shared_preload_libraries = 'pg_stat_statements'
compute_query_id = on
......@@ -7622,6 +7622,31 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
<title>Statistics Monitoring</title>
<variablelist>
<varlistentry id="guc-compute-query-id" xreflabel="compute_query_id">
<term><varname>compute_query_id</varname> (<type>boolean</type>)
<indexterm>
<primary><varname>compute_query_id</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Enables in-core computation of a query identifier. The <xref
linkend="pgstatstatements"/> extension requires a query identifier
to be computed. Note that an external module can alternatively
be used if the in-core query identifier computation method
isn't acceptable. In this case, in-core computation should
remain disabled. The default is <literal>off</literal>.
</para>
<note>
<para>
To ensure that a only one query identifier is calculated and
displayed, extensions that calculate query identifiers should
throw an error if a query identifier has already been computed.
</para>
</note>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>log_statement_stats</varname> (<type>boolean</type>)
<indexterm>
......
......@@ -20,6 +20,14 @@
This means that a server restart is needed to add or remove the module.
</para>
<para>
The module will not track statistics unless query
identifiers are calculated. This can be done by enabling <xref
linkend="guc-compute-query-id"/> or using a third-party module that
computes its own query identifiers. Note that all statistics tracked
by this module must be reset if the query identifier method is changed.
</para>
<para>
When <filename>pg_stat_statements</filename> is loaded, it tracks
statistics across all databases of the server. To access and manipulate
......@@ -84,7 +92,7 @@
<structfield>queryid</structfield> <type>bigint</type>
</para>
<para>
Internal hash code, computed from the statement's parse tree
Hash code to identify identical normalized queries.
</para></entry>
</row>
......@@ -386,6 +394,16 @@
are compared strictly on the basis of their textual query strings, however.
</para>
<note>
<para>
The following details about constant replacement and
<structfield>queryid</structfield> only applies when <xref
linkend="guc-compute-query-id"/> is enabled. If you use an external
module instead to compute <structfield>queryid</structfield>, you
should refer to its documentation for details.
</para>
</note>
<para>
When a constant's value has been ignored for purposes of matching the query
to other queries, the constant is replaced by a parameter symbol, such
......
......@@ -46,6 +46,8 @@
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/queryjumble.h"
#include "utils/rel.h"
......@@ -107,6 +109,7 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
{
ParseState *pstate = make_parsestate(NULL);
Query *query;
JumbleState *jstate = NULL;
Assert(sourceText != NULL); /* required as of 8.4 */
......@@ -119,8 +122,11 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
query = transformTopLevelStmt(pstate, parseTree);
if (compute_query_id)
jstate = JumbleQuery(query, sourceText);
if (post_parse_analyze_hook)
(*post_parse_analyze_hook) (pstate, query);
(*post_parse_analyze_hook) (pstate, query, jstate);
free_parsestate(pstate);
......@@ -140,6 +146,7 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
{
ParseState *pstate = make_parsestate(NULL);
Query *query;
JumbleState *jstate = NULL;
Assert(sourceText != NULL); /* required as of 8.4 */
......@@ -152,8 +159,11 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
/* make sure all is well with parameter types */
check_variable_parameters(pstate, query);
if (compute_query_id)
jstate = JumbleQuery(query, sourceText);
if (post_parse_analyze_hook)
(*post_parse_analyze_hook) (pstate, query);
(*post_parse_analyze_hook) (pstate, query, jstate);
free_parsestate(pstate);
......
......@@ -668,6 +668,7 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
ParseState *pstate;
Query *query;
List *querytree_list;
JumbleState *jstate = NULL;
Assert(query_string != NULL); /* required as of 8.4 */
......@@ -686,8 +687,11 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
query = transformTopLevelStmt(pstate, parsetree);
if (compute_query_id)
jstate = JumbleQuery(query, query_string);
if (post_parse_analyze_hook)
(*post_parse_analyze_hook) (pstate, query);
(*post_parse_analyze_hook) (pstate, query, jstate);
free_parsestate(pstate);
......
......@@ -22,6 +22,7 @@ OBJS = \
pg_rusage.o \
ps_status.o \
queryenvironment.o \
queryjumble.o \
rls.o \
sampling.o \
superuser.o \
......
......@@ -534,6 +534,7 @@ extern const struct config_enum_entry dynamic_shared_memory_options[];
/*
* GUC option variables that are exported from this module
*/
bool compute_query_id = false;
bool log_duration = false;
bool Debug_print_plan = false;
bool Debug_print_parse = false;
......@@ -1458,6 +1459,15 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
{
{"compute_query_id", PGC_SUSET, STATS_MONITORING,
gettext_noop("Compute query identifiers."),
NULL
},
&compute_query_id,
false,
NULL, NULL, NULL
},
{
{"log_parser_stats", PGC_SUSET, STATS_MONITORING,
gettext_noop("Writes parser performance statistics to the server log."),
......
......@@ -596,6 +596,7 @@
# - Monitoring -
#compute_query_id = off
#log_parser_stats = off
#log_planner_stats = off
#log_executor_stats = off
......
This diff is collapsed.
......@@ -15,10 +15,12 @@
#define ANALYZE_H
#include "parser/parse_node.h"
#include "utils/queryjumble.h"
/* Hook for plugins to get control at end of parse analysis */
typedef void (*post_parse_analyze_hook_type) (ParseState *pstate,
Query *query);
Query *query,
JumbleState *jstate);
extern PGDLLIMPORT post_parse_analyze_hook_type post_parse_analyze_hook;
......
......@@ -248,6 +248,7 @@ extern bool log_btree_build_stats;
extern PGDLLIMPORT bool check_function_bodies;
extern bool session_auth_is_superuser;
extern bool compute_query_id;
extern bool log_duration;
extern int log_parameter_max_length;
extern int log_parameter_max_length_on_error;
......
/*-------------------------------------------------------------------------
*
* queryjumble.h
* Query normalization and fingerprinting.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/include/utils/queryjumble.h
*
*-------------------------------------------------------------------------
*/
#ifndef QUERYJUBLE_H
#define QUERYJUBLE_H
#include "nodes/parsenodes.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
/*
* Struct for tracking locations/lengths of constants during normalization
*/
typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
} LocationLen;
/*
* Working state for computing a query jumble and producing a normalized
* query string
*/
typedef struct JumbleState
{
/* Jumble of current query tree */
unsigned char *jumble;
/* Number of bytes used in jumble[] */
Size jumble_len;
/* Array of locations of constants that should be removed */
LocationLen *clocations;
/* Allocated length of clocations array */
int clocations_buf_size;
/* Current number of valid entries in clocations array */
int clocations_count;
/* highest Param id we've seen, in order to start normalization correctly */
int highest_extern_param_id;
} JumbleState;
const char *CleanQuerytext(const char *query, int *location, int *len);
JumbleState *JumbleQuery(Query *query, const char *querytext);
#endif /* QUERYJUMBLE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment