Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
cee52386
Commit
cee52386
authored
Mar 27, 2012
by
Robert Haas
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
pg_test_timing utility, to measure clock monotonicity and timing cost.
Ants Aasma, Greg Smith
parent
5b4f3466
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
451 additions
and
2 deletions
+451
-2
contrib/Makefile
contrib/Makefile
+1
-0
contrib/pg_test_timing/.gitignore
contrib/pg_test_timing/.gitignore
+1
-0
contrib/pg_test_timing/Makefile
contrib/pg_test_timing/Makefile
+18
-0
contrib/pg_test_timing/pg_test_timing.c
contrib/pg_test_timing/pg_test_timing.c
+162
-0
doc/src/sgml/config.sgml
doc/src/sgml/config.sgml
+3
-1
doc/src/sgml/contrib.sgml
doc/src/sgml/contrib.sgml
+1
-0
doc/src/sgml/filelist.sgml
doc/src/sgml/filelist.sgml
+1
-0
doc/src/sgml/perform.sgml
doc/src/sgml/perform.sgml
+3
-1
doc/src/sgml/pgtesttiming.sgml
doc/src/sgml/pgtesttiming.sgml
+261
-0
No files found.
contrib/Makefile
View file @
cee52386
...
@@ -35,6 +35,7 @@ SUBDIRS = \
...
@@ -35,6 +35,7 @@ SUBDIRS = \
pg_standby
\
pg_standby
\
pg_stat_statements
\
pg_stat_statements
\
pg_test_fsync
\
pg_test_fsync
\
pg_test_timing
\
pg_trgm
\
pg_trgm
\
pg_upgrade
\
pg_upgrade
\
pg_upgrade_support
\
pg_upgrade_support
\
...
...
contrib/pg_test_timing/.gitignore
0 → 100644
View file @
cee52386
/pg_test_timing
contrib/pg_test_timing/Makefile
0 → 100644
View file @
cee52386
# contrib/pg_test_timing/Makefile
PGFILEDESC
=
"pg_test_timing - test timing overhead"
PGAPPICON
=
win32
PROGRAM
=
pg_test_timing
OBJS
=
pg_test_timing.o
ifdef
USE_PGXS
PG_CONFIG
=
pg_config
PGXS
:=
$(
shell
$(PG_CONFIG)
--pgxs
)
include
$(PGXS)
else
subdir
=
contrib/pg_test_timing
top_builddir
=
../..
include
$(top_builddir)/src/Makefile.global
include
$(top_srcdir)/contrib/contrib-global.mk
endif
contrib/pg_test_timing/pg_test_timing.c
0 → 100644
View file @
cee52386
/*
* pg_test_timing.c
* tests overhead of timing calls and their monotonicity: that
* they always move forward
*/
#include "postgres_fe.h"
#include "getopt_long.h"
#include "portability/instr_time.h"
static
const
char
*
progname
;
static
int32
test_duration
=
3
;
static
void
handle_args
(
int
argc
,
char
*
argv
[]);
static
void
test_timing
(
int32
);
int
main
(
int
argc
,
char
*
argv
[])
{
progname
=
get_progname
(
argv
[
0
]);
handle_args
(
argc
,
argv
);
test_timing
(
test_duration
);
return
0
;
}
static
void
handle_args
(
int
argc
,
char
*
argv
[])
{
static
struct
option
long_options
[]
=
{
{
"duration"
,
required_argument
,
NULL
,
'd'
},
{
NULL
,
0
,
NULL
,
0
}
};
int
option
;
/* Command line option */
int
optindex
=
0
;
/* used by getopt_long */
if
(
argc
>
1
)
{
if
(
strcmp
(
argv
[
1
],
"--help"
)
==
0
||
strcmp
(
argv
[
1
],
"-h"
)
==
0
||
strcmp
(
argv
[
1
],
"-?"
)
==
0
)
{
printf
(
"Usage: %s [-d DURATION]
\n
"
,
progname
);
exit
(
0
);
}
if
(
strcmp
(
argv
[
1
],
"--version"
)
==
0
||
strcmp
(
argv
[
1
],
"-V"
)
==
0
)
{
puts
(
"pg_test_timing (PostgreSQL) "
PG_VERSION
);
exit
(
0
);
}
}
while
((
option
=
getopt_long
(
argc
,
argv
,
"d:"
,
long_options
,
&
optindex
))
!=
-
1
)
{
switch
(
option
)
{
case
'd'
:
test_duration
=
atoi
(
optarg
);
break
;
default:
fprintf
(
stderr
,
"Try
\"
%s --help
\"
for more information.
\n
"
,
progname
);
exit
(
1
);
break
;
}
}
if
(
argc
>
optind
)
{
fprintf
(
stderr
,
"%s: too many command-line arguments (first is
\"
%s
\"
)
\n
"
,
progname
,
argv
[
optind
]);
fprintf
(
stderr
,
"Try
\"
%s --help
\"
for more information.
\n
"
,
progname
);
exit
(
1
);
}
if
(
test_duration
>
0
)
{
printf
(
"Testing timing overhead for %d seconds.
\n
"
,
test_duration
);
}
else
{
fprintf
(
stderr
,
"%s: duration must be a positive integer (duration is
\"
%d
\"
)
\n
"
,
progname
,
test_duration
);
fprintf
(
stderr
,
"Try
\"
%s --help
\"
for more information.
\n
"
,
progname
);
exit
(
1
);
}
}
static
void
test_timing
(
int32
duration
)
{
uint64
total_time
;
int64
time_elapsed
=
0
;
uint64
loop_count
=
0
;
uint64
prev
,
cur
;
int32
diff
,
i
,
bits
,
found
;
instr_time
start_time
,
end_time
,
temp
;
static
int64
histogram
[
32
];
total_time
=
duration
>
0
?
duration
*
1000000
:
0
;
INSTR_TIME_SET_CURRENT
(
start_time
);
cur
=
INSTR_TIME_GET_MICROSEC
(
start_time
);
while
(
time_elapsed
<
total_time
)
{
prev
=
cur
;
INSTR_TIME_SET_CURRENT
(
temp
);
cur
=
INSTR_TIME_GET_MICROSEC
(
temp
);
diff
=
cur
-
prev
;
if
(
diff
<
0
)
{
printf
(
"Detected clock going backwards in time.
\n
"
);
printf
(
"Time warp: %d microseconds
\n
"
,
diff
);
exit
(
1
);
}
bits
=
0
;
while
(
diff
)
{
diff
>>=
1
;
bits
++
;
}
histogram
[
bits
]
++
;
loop_count
++
;
INSTR_TIME_SUBTRACT
(
temp
,
start_time
);
time_elapsed
=
INSTR_TIME_GET_MICROSEC
(
temp
);
}
INSTR_TIME_SET_CURRENT
(
end_time
);
INSTR_TIME_SUBTRACT
(
end_time
,
start_time
);
printf
(
"Per loop time including overhead: %0.2f nsec
\n
"
,
INSTR_TIME_GET_DOUBLE
(
end_time
)
*
1e9
/
loop_count
);
printf
(
"Histogram of timing durations:
\n
"
);
printf
(
"%9s: %10s %9s
\n
"
,
"< usec"
,
"count"
,
"percent"
);
found
=
0
;
for
(
i
=
31
;
i
>=
0
;
i
--
)
{
if
(
found
||
histogram
[
i
])
{
found
=
1
;
printf
(
"%9ld: %10ld %8.5f%%
\n
"
,
1l
<<
i
,
histogram
[
i
],
(
double
)
histogram
[
i
]
*
100
/
loop_count
);
}
}
}
doc/src/sgml/config.sgml
View file @
cee52386
...
@@ -4294,7 +4294,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
...
@@ -4294,7 +4294,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
Enables timing of database I/O calls. This parameter is off by
Enables timing of database I/O calls. This parameter is off by
default, because it will repeatedly query the operating system for
default, because it will repeatedly query the operating system for
the current time, which may cause significant overhead on some
the current time, which may cause significant overhead on some
platforms. Only superusers can change this setting.
platforms. You can use the <xref linkend="pgtesttiming"> tool to
measure the overhead of timing on your system. Only superusers can
change this setting.
</para>
</para>
</listitem>
</listitem>
</varlistentry>
</varlistentry>
...
...
doc/src/sgml/contrib.sgml
View file @
cee52386
...
@@ -121,6 +121,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
...
@@ -121,6 +121,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
&pgstatstatements;
&pgstatstatements;
&pgstattuple;
&pgstattuple;
&pgtestfsync;
&pgtestfsync;
&pgtesttiming;
&pgtrgm;
&pgtrgm;
&pgupgrade;
&pgupgrade;
&seg;
&seg;
...
...
doc/src/sgml/filelist.sgml
View file @
cee52386
...
@@ -129,6 +129,7 @@
...
@@ -129,6 +129,7 @@
<!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
<!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
<!ENTITY pgstattuple SYSTEM "pgstattuple.sgml">
<!ENTITY pgstattuple SYSTEM "pgstattuple.sgml">
<!ENTITY pgtestfsync SYSTEM "pgtestfsync.sgml">
<!ENTITY pgtestfsync SYSTEM "pgtestfsync.sgml">
<!ENTITY pgtesttiming SYSTEM "pgtesttiming.sgml">
<!ENTITY pgtrgm SYSTEM "pgtrgm.sgml">
<!ENTITY pgtrgm SYSTEM "pgtrgm.sgml">
<!ENTITY pgupgrade SYSTEM "pgupgrade.sgml">
<!ENTITY pgupgrade SYSTEM "pgupgrade.sgml">
<!ENTITY seg SYSTEM "seg.sgml">
<!ENTITY seg SYSTEM "seg.sgml">
...
...
doc/src/sgml/perform.sgml
View file @
cee52386
...
@@ -770,7 +770,9 @@ ROLLBACK;
...
@@ -770,7 +770,9 @@ ROLLBACK;
network transmission costs and I/O conversion costs are not included.
network transmission costs and I/O conversion costs are not included.
Second, the measurement overhead added by <command>EXPLAIN
Second, the measurement overhead added by <command>EXPLAIN
ANALYZE</command> can be significant, especially on machines with slow
ANALYZE</command> can be significant, especially on machines with slow
<function>gettimeofday()</> operating-system calls.
<function>gettimeofday()</> operating-system calls. You can use the
<xref linkend="pgtesttiming"> tool to measure the overhead of timing
on your system.
</para>
</para>
<para>
<para>
...
...
doc/src/sgml/pgtesttiming.sgml
0 → 100644
View file @
cee52386
<!-- doc/src/sgml/pgtesttiming.sgml -->
<sect1 id="pgtesttiming" xreflabel="pg_test_timing">
<title>pg_test_timing</title>
<indexterm zone="pgtesttiming">
<primary>pg_test_timing</primary>
</indexterm>
<para>
<application>pg_test_timing</> is a tool to measure the timing overhead
on your system and confirm that the system time never moves backwards.
Systems that are slow to collect timing data can give less accurate
<command>EXPLAIN ANALYZE</command> results.
</para>
<sect2>
<title>Usage</title>
<synopsis>
pg_test_timing [options]
</synopsis>
<para>
<application>pg_test_timing</application> accepts the following
command-line options:
<variablelist>
<varlistentry>
<term><option>-d</option></term>
<term><option>--duration</option></term>
<listitem>
<para>
Specifies the test duration, in seconds. Longer durations
give slightly better accuracy, and are more likely to discover
problems with the system clock moving backwards. The default
test duration is 3 seconds.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</sect2>
<sect2>
<title>Interpreting results</title>
<para>
Good results will show most (>90%) individual timing calls take less
than one microsecond. Average per loop overhead will be even lower,
below 100 nanoseconds. This example from an Intel i7-860 system using
a TSC clock source shows excellent performance:
</para>
<screen>
Testing timing overhead for 3 seconds.
Per loop time including overhead: 35.96 nsec
Histogram of timing durations:
< usec: count percent
16: 2 0.00000%
8: 13 0.00002%
4: 126 0.00015%
2: 2999652 3.59518%
1: 80435604 96.40465%
</screen>
<para>
Note that different units are used for the per loop time than the
histogram. The loop can have resolution within a few nanoseconds
(nsec), while the individual timing calls can only resolve down to
one microsecond (usec).
</para>
</sect2>
<sect2>
<title>Measuring executor timing overhead</title>
<para>
When the query executor is running a statement using
<command>EXPLAIN ANALYZE</command>, individual operations are
timed as well as showing a summary. The overhead of your system
can be checked by counting rows with the psql program:
</para>
<screen>
CREATE TABLE t AS SELECT * FROM generate_series(1,100000);
\timing
SELECT COUNT(*) FROM t;
EXPLAIN ANALYZE SELECT COUNT(*) FROM t;
</screen>
<para>
The i7-860 system measured runs the count query in 9.8 ms while
the <command>EXPLAIN ANALYZE</command> version takes 16.6 ms,
each processing just over 100,000 rows. That 6.8 ms difference
means the timing overhead per row is 68 ns, about twice what
pg_test_timing estimated it would be. Even that relatively
small amount of overhead is making the fully timed count statement
take almost 70% longer. On more substantial queries, the
timing overhead would be less problematic.
</para>
</sect2>
<sect2>
<title>Changing time sources</title>
<para>
On some newer Linux systems, it's possible to change the clock
source used to collect timing data at any time. A second example
shows the slowdown possible from switching to the slower acpi_pm
time source, on the same system used for the fast results above:
</para>
<screen>
# cat /sys/devices/system/clocksource/clocksource0/available_clocksource
tsc hpet acpi_pm
# echo acpi_pm > /sys/devices/system/clocksource/clocksource0/current_clocksource
# pg_test_timing
Per loop time including overhead: 722.92 nsec
Histogram of timing durations:
< usec: count percent
16: 3 0.00007%
8: 563 0.01357%
4: 3241 0.07810%
2: 2990371 72.05956%
1: 1155682 27.84870%
</screen>
<para>
In this configuration, the sample <command>EXPLAIN ANALYZE</command>
above takes 115.9 ms. That's 1061 nsec of timing overhead, again
a small multiple of what's measured directly by this utility.
That much timing overhead means the actual query itself is only
taking a tiny fraction of the accounted for time, most of it
is being consumed in overhead instead. In this configuration,
any <command>EXPLAIN ANALYZE</command> totals involving many
timed operations would be inflated significantly by timing overhead.
</para>
<para>
FreeBSD also allows changing the time source on the fly, and
it logs information about the timer selected during boot:
</para>
<screen>
dmesg | grep "Timecounter"
sysctl kern.timecounter.hardware=TSC
</screen>
<para>
Other systems may only allow setting the time source on boot.
On older Linux systems the "clock" kernel setting is the only way
to make this sort of change. And even on some more recent ones,
the only option you'll see for a clock source is "jiffies". Jiffies
are the older Linux software clock implementation, which can have
good resolution when it's backed by fast enough timing hardware,
as in this example:
</para>
<screen>
$ cat /sys/devices/system/clocksource/clocksource0/available_clocksource
jiffies
$ dmesg | grep time.c
time.c: Using 3.579545 MHz WALL PM GTOD PIT/TSC timer.
time.c: Detected 2400.153 MHz processor.
$ ./pg_test_timing
Testing timing overhead for 3 seconds.
Per timing duration including loop overhead: 97.75 ns
Histogram of timing durations:
< usec: count percent
32: 1 0.00000%
16: 1 0.00000%
8: 22 0.00007%
4: 3010 0.00981%
2: 2993204 9.75277%
1: 27694571 90.23734%
</screen>
</sect2>
<sect2>
<title>Clock hardware and timing accuracy</title>
<para>
Collecting accurate timing information is normally done on computers
using hardware clocks with various levels of accuracy. With some
hardware the operating systems can pass the system clock time almost
directly to programs. A system clock can also be derived from a chip
that simply provides timing interrupts, periodic ticks at some known
time interval. In either case, operating system kernels provide
a clock source that hides these details. But the accuracy of that
clock source and how quickly it can return results varies based
on the underlying hardware.
</para>
<para>
Inaccurate time keeping can result in system instability. Test
any change to the clock source very carefully. Operating system
defaults are sometimes made to favor reliability over best
accuracy. And if you are using a virtual machine, look into the
recommended time sources compatible with it. Virtual hardware
faces additional difficulties when emulating timers, and there
are often per operating system settings suggested by vendors.
</para>
<para>
The Time Stamp Counter (TSC) clock source is the most accurate one
available on current generation CPUs. It's the preferred way to track
the system time when it's supported by the operating system and the
TSC clock is reliable. There are several ways that TSC can fail
to provide an accurate timing source, making it unreliable. Older
systems can have a TSC clock that varies based on the CPU
temperature, making it unusable for timing. Trying to use TSC on some
older multi-core CPUs can give a reported time that's inconsistent
among multiple cores. This can result in the time going backwards, a
problem this program checks for. And even the newest systems can
fail to provide accurate TSC timing with very aggressive power saving
configurations.
</para>
<para>
Newer operating systems may check for the known TSC problems and
switch to a slower, more stable clock source when they are seen.
If your system supports TSC time but doesn't default to that, it
may be disabled for a good reason. And some operating systems may
not detect all the possible problems correctly, or will allow using
TSC even in situations where it's known to be inaccurate.
</para>
<para>
The High Precision Event Timer (HPET) is the preferred timer on
systems where it's available and TSC is not accurate. The timer
chip itself is programmable to allow up to 100 nanosecond resolution,
but you may not see that much accuracy in your system clock.
</para>
<para>
Advanced Configuration and Power Interface (ACPI) provides a
Power Management (PM) Timer, which Linux refers to as the acpi_pm.
The clock derived from acpi_pm will at best provide 300 nanosecond
resolution.
</para>
<para>
Timers used on older PC hardware including the 8254 Programmable
Interval Timer (PIT), the real-time clock (RTC), the Advanced
Programmable Interrupt Controller (APIC) timer, and the Cyclone
timer. These timers aim for millisecond resolution.
</para>
</sect2>
<sect2>
<title>Author</title>
<para>
Ants Aasma <email>ants.aasma@eesti.ee</email>
</para>
</sect2>
</sect1>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment