Commit 3c6248a8 authored by Tom Lane's avatar Tom Lane

Remove the recently added USE_SEGMENTED_FILES option, and indeed remove all

support for a nonsegmented mode from md.c.  Per recent discussions, there
doesn't seem to be much value in a "never segment" option as opposed to
segmenting with a suitably large segment size.  So instead provide a
configure-time switch to set the desired segment size in units of gigabytes.
While at it, expose a configure switch for BLCKSZ as well.

Zdenek Kotala
parent 94b0b545
...@@ -1357,7 +1357,6 @@ Optional Features: ...@@ -1357,7 +1357,6 @@ Optional Features:
--enable-debug build with debugging symbols (-g) --enable-debug build with debugging symbols (-g)
--enable-profiling build with profiling enabled --enable-profiling build with profiling enabled
--enable-dtrace build with DTrace support --enable-dtrace build with DTrace support
--disable-segmented-files disable data file segmentation (requires largefile support)
--enable-depend turn on automatic dependency tracking --enable-depend turn on automatic dependency tracking
--enable-cassert enable assertion checks (for debugging) --enable-cassert enable assertion checks (for debugging)
--enable-thread-safety make client libraries thread-safe --enable-thread-safety make client libraries thread-safe
...@@ -1373,7 +1372,9 @@ Optional Packages: ...@@ -1373,7 +1372,9 @@ Optional Packages:
--with-includes=DIRS look for additional header files in DIRS --with-includes=DIRS look for additional header files in DIRS
--with-libraries=DIRS look for additional libraries in DIRS --with-libraries=DIRS look for additional libraries in DIRS
--with-libs=DIRS alternative spelling of --with-libraries --with-libs=DIRS alternative spelling of --with-libraries
--with-pgport=PORTNUM change default port number [5432] --with-pgport=PORTNUM set default port number [5432]
--with-blocksize=BLOCKSIZE set block size in kB [8]
--with-segsize=SEGSIZE set segment size in GB [1]
--with-tcl build Tcl modules (PL/Tcl) --with-tcl build Tcl modules (PL/Tcl)
--with-tclconfig=DIR tclConfig.sh is in DIR --with-tclconfig=DIR tclConfig.sh is in DIR
--with-perl build Perl modules (PL/Perl) --with-perl build Perl modules (PL/Perl)
...@@ -2549,34 +2550,102 @@ fi ...@@ -2549,34 +2550,102 @@ fi
# #
# Data file segmentation # Block size
# #
{ echo "$as_me:$LINENO: checking for block size" >&5
echo $ECHO_N "checking for block size... $ECHO_C" >&6; }
pgac_args="$pgac_args enable_segmented_files" pgac_args="$pgac_args with_blocksize"
# Check whether --enable-segmented-files was given.
if test "${enable_segmented_files+set}" = set; then # Check whether --with-blocksize was given.
enableval=$enable_segmented_files; if test "${with_blocksize+set}" = set; then
case $enableval in withval=$with_blocksize;
case $withval in
yes) yes)
: { { echo "$as_me:$LINENO: error: argument required for --with-blocksize option" >&5
echo "$as_me: error: argument required for --with-blocksize option" >&2;}
{ (exit 1); exit 1; }; }
;; ;;
no) no)
: { { echo "$as_me:$LINENO: error: argument required for --with-blocksize option" >&5
echo "$as_me: error: argument required for --with-blocksize option" >&2;}
{ (exit 1); exit 1; }; }
;; ;;
*) *)
{ { echo "$as_me:$LINENO: error: no argument expected for --enable-segmented-files option" >&5 blocksize=$withval
echo "$as_me: error: no argument expected for --enable-segmented-files option" >&2;}
{ (exit 1); exit 1; }; }
;; ;;
esac esac
else else
enable_segmented_files=yes blocksize=8
fi
case ${blocksize} in
1) BLCKSZ=1024;;
2) BLCKSZ=2048;;
4) BLCKSZ=4096;;
8) BLCKSZ=8192;;
16) BLCKSZ=16384;;
32) BLCKSZ=32768;;
*) { { echo "$as_me:$LINENO: error: Invalid block size. Allowed values are 1,2,4,8,16,32." >&5
echo "$as_me: error: Invalid block size. Allowed values are 1,2,4,8,16,32." >&2;}
{ (exit 1); exit 1; }; }
esac
{ echo "$as_me:$LINENO: result: ${blocksize}kB" >&5
echo "${ECHO_T}${blocksize}kB" >&6; }
cat >>confdefs.h <<_ACEOF
#define BLCKSZ ${BLCKSZ}
_ACEOF
#
# File segment size
#
{ echo "$as_me:$LINENO: checking for segment size" >&5
echo $ECHO_N "checking for segment size... $ECHO_C" >&6; }
pgac_args="$pgac_args with_segsize"
# Check whether --with-segsize was given.
if test "${with_segsize+set}" = set; then
withval=$with_segsize;
case $withval in
yes)
{ { echo "$as_me:$LINENO: error: argument required for --with-segsize option" >&5
echo "$as_me: error: argument required for --with-segsize option" >&2;}
{ (exit 1); exit 1; }; }
;;
no)
{ { echo "$as_me:$LINENO: error: argument required for --with-segsize option" >&5
echo "$as_me: error: argument required for --with-segsize option" >&2;}
{ (exit 1); exit 1; }; }
;;
*)
segsize=$withval
;;
esac
else
segsize=1
fi fi
# this expression is set up to avoid unnecessary integer overflow
RELSEG_SIZE=`expr '(' 1024 '*' ${segsize} / ${blocksize} ')' '*' 1024`
test $? -eq 0 || exit 1
{ echo "$as_me:$LINENO: result: ${segsize}GB" >&5
echo "${ECHO_T}${segsize}GB" >&6; }
cat >>confdefs.h <<_ACEOF
#define RELSEG_SIZE ${RELSEG_SIZE}
_ACEOF
# #
# C compiler # C compiler
...@@ -24287,12 +24356,11 @@ _ACEOF ...@@ -24287,12 +24356,11 @@ _ACEOF
if test "$ac_cv_sizeof_off_t" -lt 8 -o "$enable_segmented_files" = "yes"; then # If we don't have largefile support, can't handle segsize >= 2GB.
if test "$ac_cv_sizeof_off_t" -lt 8 -a "$segsize" != "1"; then
cat >>confdefs.h <<\_ACEOF { { echo "$as_me:$LINENO: error: Large file support is not enabled. Segment size cannot be larger than 1GB." >&5
#define USE_SEGMENTED_FILES 1 echo "$as_me: error: Large file support is not enabled. Segment size cannot be larger than 1GB." >&2;}
_ACEOF { (exit 1); exit 1; }; }
fi fi
# SunOS doesn't handle negative byte comparisons properly with +/- return # SunOS doesn't handle negative byte comparisons properly with +/- return
......
dnl Process this file with autoconf to produce a configure script. dnl Process this file with autoconf to produce a configure script.
dnl $PostgreSQL: pgsql/configure.in,v 1.557 2008/04/28 22:47:03 tgl Exp $ dnl $PostgreSQL: pgsql/configure.in,v 1.558 2008/05/02 01:08:26 tgl Exp $
dnl dnl
dnl Developers, please strive to achieve this order: dnl Developers, please strive to achieve this order:
dnl dnl
...@@ -155,7 +155,7 @@ AC_SUBST(WANTED_LANGUAGES) ...@@ -155,7 +155,7 @@ AC_SUBST(WANTED_LANGUAGES)
# Default port number (--with-pgport), default 5432 # Default port number (--with-pgport), default 5432
# #
AC_MSG_CHECKING([for default port number]) AC_MSG_CHECKING([for default port number])
PGAC_ARG_REQ(with, pgport, [ --with-pgport=PORTNUM change default port number [[5432]]], PGAC_ARG_REQ(with, pgport, [ --with-pgport=PORTNUM set default port number [[5432]]],
[default_port=$withval], [default_port=$withval],
[default_port=5432]) [default_port=5432])
AC_MSG_RESULT([$default_port]) AC_MSG_RESULT([$default_port])
...@@ -218,10 +218,67 @@ AC_SUBST(DTRACEFLAGS)]) ...@@ -218,10 +218,67 @@ AC_SUBST(DTRACEFLAGS)])
AC_SUBST(enable_dtrace) AC_SUBST(enable_dtrace)
# #
# Data file segmentation # Block size
# #
PGAC_ARG_BOOL(enable, segmented-files, yes, AC_MSG_CHECKING([for block size])
[ --disable-segmented-files disable data file segmentation (requires largefile support)]) PGAC_ARG_REQ(with, blocksize, [ --with-blocksize=BLOCKSIZE set block size in kB [[8]]],
[blocksize=$withval],
[blocksize=8])
case ${blocksize} in
1) BLCKSZ=1024;;
2) BLCKSZ=2048;;
4) BLCKSZ=4096;;
8) BLCKSZ=8192;;
16) BLCKSZ=16384;;
32) BLCKSZ=32768;;
*) AC_MSG_ERROR([Invalid block size. Allowed values are 1,2,4,8,16,32.])
esac
AC_MSG_RESULT([${blocksize}kB])
AC_DEFINE_UNQUOTED([BLCKSZ], ${BLCKSZ}, [
Size of a disk block --- this also limits the size of a tuple. You
can set it bigger if you need bigger tuples (although TOAST should
reduce the need to have large tuples, since fields can be spread
across multiple tuples).
BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ
is currently 2^15 (32768). This is determined by the 15-bit widths
of the lp_off and lp_len fields in ItemIdData (see
include/storage/itemid.h).
Changing BLCKSZ requires an initdb.
])
#
# File segment size
#
AC_MSG_CHECKING([for segment size])
PGAC_ARG_REQ(with, segsize, [ --with-segsize=SEGSIZE set segment size in GB [[1]]],
[segsize=$withval],
[segsize=1])
# this expression is set up to avoid unnecessary integer overflow
RELSEG_SIZE=`expr '(' 1024 '*' ${segsize} / ${blocksize} ')' '*' 1024`
test $? -eq 0 || exit 1
AC_MSG_RESULT([${segsize}GB])
AC_DEFINE_UNQUOTED([RELSEG_SIZE], ${RELSEG_SIZE}, [
RELSEG_SIZE is the maximum number of blocks allowed in one disk file.
Thus, the maximum size of a single file is RELSEG_SIZE * BLCKSZ;
relations bigger than that are divided into multiple files.
RELSEG_SIZE * BLCKSZ must be less than your OS' limit on file size.
This is often 2 GB or 4GB in a 32-bit operating system, unless you
have large file support enabled. By default, we make the limit 1 GB
to avoid any possible integer-overflow problems within the OS.
A limit smaller than necessary only means we divide a large
relation into more chunks than necessary, so it seems best to err
in the direction of a small limit.
A power-of-2 value is recommended to save a few cycles in md.c,
but is not absolutely required.
Changing RELSEG_SIZE requires an initdb.
])
# #
# C compiler # C compiler
...@@ -1469,8 +1526,9 @@ fi ...@@ -1469,8 +1526,9 @@ fi
# Check for largefile support (must be after AC_SYS_LARGEFILE) # Check for largefile support (must be after AC_SYS_LARGEFILE)
AC_CHECK_SIZEOF([off_t]) AC_CHECK_SIZEOF([off_t])
if test "$ac_cv_sizeof_off_t" -lt 8 -o "$enable_segmented_files" = "yes"; then # If we don't have largefile support, can't handle segsize >= 2GB.
AC_DEFINE([USE_SEGMENTED_FILES], 1, [Define to split data files into 1GB segments.]) if test "$ac_cv_sizeof_off_t" -lt 8 -a "$segsize" != "1"; then
AC_MSG_ERROR([Large file support is not enabled. Segment size cannot be larger than 1GB.])
fi fi
# SunOS doesn't handle negative byte comparisons properly with +/- return # SunOS doesn't handle negative byte comparisons properly with +/- return
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/installation.sgml,v 1.307 2008/04/21 00:26:44 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/installation.sgml,v 1.308 2008/05/02 01:08:26 tgl Exp $ -->
<chapter id="installation"> <chapter id="installation">
<title><![%standalone-include[<productname>PostgreSQL</>]]> <title><![%standalone-include[<productname>PostgreSQL</>]]>
...@@ -1067,15 +1067,38 @@ su - postgres ...@@ -1067,15 +1067,38 @@ su - postgres
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term><option>--disable-segmented-files</option></term> <term><option>--with-segsize=<replaceable>SEGSIZE</replaceable></option></term>
<listitem> <listitem>
<para> <para>
Store large tables as single operating-system files, rather than Set the <firstterm>segment size</>, in gigabytes. Large tables are
dividing them into 1GB segments as is the default. This option divided into multiple operating-system files, each of size equal
is ignored unless the operating system has <quote>largefile</> to the segment size. This avoids problems with file size limits
support (which most do, nowadays). It can be helpful to reduce that exist on many platforms. The default segment size, 1 gigabyte,
the number of file descriptors consumed when working with very is safe on all supported platforms. If your operating system has
large tables. <quote>largefile</> support (which most do, nowadays), you can use
a larger segment size. This can be helpful to reduce the number of
file descriptors consumed when working with very large tables.
But be careful not to select a value larger than is supported
by your platform and the filesystem(s) you intend to use. Other
tools you might wish to use, such as <application>tar</>, could
also set limits on the usable file size.
It is recommended, though not absolutely required, that this value
be a power of 2.
Note that changing this value requires an initdb.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--with-blocksize=<replaceable>BLOCKSIZE</replaceable></option></term>
<listitem>
<para>
Set the <firstterm>block size</>, in kilobytes. This is the unit
of storage and I/O within tables. The default, 8 kilobytes,
is suitable for most situations; but other values may be useful
in special cases.
The value must be a power of 2 between 1 and 32 (kilobytes).
Note that changing this value requires an initdb.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.22 2008/03/10 20:06:27 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/storage.sgml,v 1.23 2008/05/02 01:08:26 tgl Exp $ -->
<chapter id="storage"> <chapter id="storage">
...@@ -138,14 +138,13 @@ Avoid assuming that filenode and table OID are the same. ...@@ -138,14 +138,13 @@ Avoid assuming that filenode and table OID are the same.
</caution> </caution>
<para> <para>
When a table or index exceeds 1 GB, it is normally divided into gigabyte-sized When a table or index exceeds 1 GB, it is divided into gigabyte-sized
<firstterm>segments</>. The first segment's file name is the same as the <firstterm>segments</>. The first segment's file name is the same as the
filenode; subsequent segments are named filenode.1, filenode.2, etc. filenode; subsequent segments are named filenode.1, filenode.2, etc.
This arrangement avoids problems on platforms that have file size limitations. This arrangement avoids problems on platforms that have file size limitations.
(But if the platform does not have such a limitation, and (Actually, 1 GB is just the default segment size. The segment size can be
<option>--disable-segmented-files</option> was specified when adjusted using the configuration option <option>--with-segsize</option>
<productname>PostgreSQL</> was built, then each table or index is stored when building <productname>PostgreSQL</>.)
as a single file, without segmentation.)
The contents of tables and indexes are discussed further in The contents of tables and indexes are discussed further in
<xref linkend="storage-page-layout">. <xref linkend="storage-page-layout">.
</para> </para>
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.30 2008/03/10 20:06:27 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/file/buffile.c,v 1.31 2008/05/02 01:08:27 tgl Exp $
* *
* NOTES: * NOTES:
* *
...@@ -38,9 +38,9 @@ ...@@ -38,9 +38,9 @@
#include "storage/buffile.h" #include "storage/buffile.h"
/* /*
* We break BufFiles into gigabyte-sized segments, whether or not * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
* USE_SEGMENTED_FILES is defined. The reason is that we'd like large * The reason is that we'd like large temporary BufFiles to be spread across
* temporary BufFiles to be spread across multiple tablespaces when available. * multiple tablespaces when available.
*/ */
#define MAX_PHYSICAL_FILESIZE 0x40000000 #define MAX_PHYSICAL_FILESIZE 0x40000000
#define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ) #define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.137 2008/04/18 06:48:38 heikki Exp $ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.138 2008/05/02 01:08:27 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
* system's file size limit (often 2GBytes). In order to do that, * system's file size limit (often 2GBytes). In order to do that,
* we break relations up into "segment" files that are each shorter than * we break relations up into "segment" files that are each shorter than
* the OS file size limit. The segment size is set by the RELSEG_SIZE * the OS file size limit. The segment size is set by the RELSEG_SIZE
* configuration constant in pg_config_manual.h. * configuration constant in pg_config.h.
* *
* On disk, a relation must consist of consecutively numbered segment * On disk, a relation must consist of consecutively numbered segment
* files in the pattern * files in the pattern
...@@ -88,19 +88,13 @@ ...@@ -88,19 +88,13 @@
* segment, we assume that any subsequent segments are inactive. * segment, we assume that any subsequent segments are inactive.
* *
* All MdfdVec objects are palloc'd in the MdCxt memory context. * All MdfdVec objects are palloc'd in the MdCxt memory context.
*
* On platforms that support large files, USE_SEGMENTED_FILES can be
* #undef'd to disable the segmentation logic. In that case each
* relation is a single operating-system file.
*/ */
typedef struct _MdfdVec typedef struct _MdfdVec
{ {
File mdfd_vfd; /* fd number in fd.c's pool */ File mdfd_vfd; /* fd number in fd.c's pool */
BlockNumber mdfd_segno; /* segment number, from 0 */ BlockNumber mdfd_segno; /* segment number, from 0 */
#ifdef USE_SEGMENTED_FILES
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */ struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
#endif
} MdfdVec; } MdfdVec;
static MemoryContext MdCxt; /* context for all md.c allocations */ static MemoryContext MdCxt; /* context for all md.c allocations */
...@@ -161,11 +155,8 @@ static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior); ...@@ -161,11 +155,8 @@ static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg); static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
static void register_unlink(RelFileNode rnode); static void register_unlink(RelFileNode rnode);
static MdfdVec *_fdvec_alloc(void); static MdfdVec *_fdvec_alloc(void);
#ifdef USE_SEGMENTED_FILES
static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
int oflags); int oflags);
#endif
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
bool isTemp, ExtensionBehavior behavior); bool isTemp, ExtensionBehavior behavior);
static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg); static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
...@@ -258,9 +249,7 @@ mdcreate(SMgrRelation reln, bool isRedo) ...@@ -258,9 +249,7 @@ mdcreate(SMgrRelation reln, bool isRedo)
reln->md_fd->mdfd_vfd = fd; reln->md_fd->mdfd_vfd = fd;
reln->md_fd->mdfd_segno = 0; reln->md_fd->mdfd_segno = 0;
#ifdef USE_SEGMENTED_FILES
reln->md_fd->mdfd_chain = NULL; reln->md_fd->mdfd_chain = NULL;
#endif
} }
/* /*
...@@ -310,8 +299,7 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -310,8 +299,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
path = relpath(rnode); path = relpath(rnode);
/* /*
* Delete or truncate the first segment, or only segment if not doing * Delete or truncate the first segment.
* segmenting
*/ */
if (isRedo) if (isRedo)
ret = unlink(path); ret = unlink(path);
...@@ -344,8 +332,9 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -344,8 +332,9 @@ mdunlink(RelFileNode rnode, bool isRedo)
rnode.relNode))); rnode.relNode)));
} }
#ifdef USE_SEGMENTED_FILES /*
/* Delete the additional segments, if any */ * Delete any additional segments.
*/
else else
{ {
char *segpath = (char *) palloc(strlen(path) + 12); char *segpath = (char *) palloc(strlen(path) + 12);
...@@ -374,7 +363,6 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -374,7 +363,6 @@ mdunlink(RelFileNode rnode, bool isRedo)
} }
pfree(segpath); pfree(segpath);
} }
#endif
pfree(path); pfree(path);
...@@ -420,12 +408,8 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -420,12 +408,8 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE); v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (off_t) BLCKSZ * blocknum;
#endif
/* /*
* Note: because caller usually obtained blocknum by calling mdnblocks, * Note: because caller usually obtained blocknum by calling mdnblocks,
...@@ -469,9 +453,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -469,9 +453,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (!isTemp) if (!isTemp)
register_dirty_segment(reln, v); register_dirty_segment(reln, v);
#ifdef USE_SEGMENTED_FILES
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
#endif
} }
/* /*
...@@ -530,10 +512,8 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior) ...@@ -530,10 +512,8 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
mdfd->mdfd_vfd = fd; mdfd->mdfd_vfd = fd;
mdfd->mdfd_segno = 0; mdfd->mdfd_segno = 0;
#ifdef USE_SEGMENTED_FILES
mdfd->mdfd_chain = NULL; mdfd->mdfd_chain = NULL;
Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE)); Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
#endif
return mdfd; return mdfd;
} }
...@@ -552,7 +532,6 @@ mdclose(SMgrRelation reln) ...@@ -552,7 +532,6 @@ mdclose(SMgrRelation reln)
reln->md_fd = NULL; /* prevent dangling pointer after error */ reln->md_fd = NULL; /* prevent dangling pointer after error */
#ifdef USE_SEGMENTED_FILES
while (v != NULL) while (v != NULL)
{ {
MdfdVec *ov = v; MdfdVec *ov = v;
...@@ -564,11 +543,6 @@ mdclose(SMgrRelation reln) ...@@ -564,11 +543,6 @@ mdclose(SMgrRelation reln)
v = v->mdfd_chain; v = v->mdfd_chain;
pfree(ov); pfree(ov);
} }
#else
if (v->mdfd_vfd >= 0)
FileClose(v->mdfd_vfd);
pfree(v);
#endif
} }
/* /*
...@@ -583,12 +557,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) ...@@ -583,12 +557,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL); v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (off_t) BLCKSZ * blocknum;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR, ereport(ERROR,
...@@ -653,12 +623,8 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -653,12 +623,8 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL); v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
#else
seekpos = (off_t) BLCKSZ * blocknum;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR, ereport(ERROR,
...@@ -707,8 +673,6 @@ BlockNumber ...@@ -707,8 +673,6 @@ BlockNumber
mdnblocks(SMgrRelation reln) mdnblocks(SMgrRelation reln)
{ {
MdfdVec *v = mdopen(reln, EXTENSION_FAIL); MdfdVec *v = mdopen(reln, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
BlockNumber nblocks; BlockNumber nblocks;
BlockNumber segno = 0; BlockNumber segno = 0;
...@@ -764,9 +728,6 @@ mdnblocks(SMgrRelation reln) ...@@ -764,9 +728,6 @@ mdnblocks(SMgrRelation reln)
v = v->mdfd_chain; v = v->mdfd_chain;
} }
#else
return _mdnblocks(reln, v);
#endif
} }
/* /*
...@@ -777,10 +738,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -777,10 +738,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
{ {
MdfdVec *v; MdfdVec *v;
BlockNumber curnblk; BlockNumber curnblk;
#ifdef USE_SEGMENTED_FILES
BlockNumber priorblocks; BlockNumber priorblocks;
#endif
/* /*
* NOTE: mdnblocks makes sure we have opened all active segments, so that * NOTE: mdnblocks makes sure we have opened all active segments, so that
...@@ -804,7 +762,6 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -804,7 +762,6 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
v = mdopen(reln, EXTENSION_FAIL); v = mdopen(reln, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
priorblocks = 0; priorblocks = 0;
while (v != NULL) while (v != NULL)
{ {
...@@ -866,19 +823,6 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -866,19 +823,6 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
} }
priorblocks += RELSEG_SIZE; priorblocks += RELSEG_SIZE;
} }
#else
/* For unsegmented files, it's a lot easier */
if (FileTruncate(v->mdfd_vfd, (off_t) nblocks * BLCKSZ) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode,
nblocks)));
if (!isTemp)
register_dirty_segment(reln, v);
#endif
} }
/* /*
...@@ -901,7 +845,6 @@ mdimmedsync(SMgrRelation reln) ...@@ -901,7 +845,6 @@ mdimmedsync(SMgrRelation reln)
v = mdopen(reln, EXTENSION_FAIL); v = mdopen(reln, EXTENSION_FAIL);
#ifdef USE_SEGMENTED_FILES
while (v != NULL) while (v != NULL)
{ {
if (FileSync(v->mdfd_vfd) < 0) if (FileSync(v->mdfd_vfd) < 0)
...@@ -914,15 +857,6 @@ mdimmedsync(SMgrRelation reln) ...@@ -914,15 +857,6 @@ mdimmedsync(SMgrRelation reln)
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode)));
v = v->mdfd_chain; v = v->mdfd_chain;
} }
#else
if (FileSync(v->mdfd_vfd) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync relation %u/%u/%u: %m",
reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode)));
#endif
} }
/* /*
...@@ -1476,8 +1410,6 @@ _fdvec_alloc(void) ...@@ -1476,8 +1410,6 @@ _fdvec_alloc(void)
return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
} }
#ifdef USE_SEGMENTED_FILES
/* /*
* Open the specified segment of the relation, * Open the specified segment of the relation,
* and make a MdfdVec object for it. Returns NULL on failure. * and make a MdfdVec object for it. Returns NULL on failure.
...@@ -1522,7 +1454,6 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) ...@@ -1522,7 +1454,6 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
/* all done */ /* all done */
return v; return v;
} }
#endif /* USE_SEGMENTED_FILES */
/* /*
* _mdfd_getseg() -- Find the segment of the relation holding the * _mdfd_getseg() -- Find the segment of the relation holding the
...@@ -1537,8 +1468,6 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, ...@@ -1537,8 +1468,6 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
ExtensionBehavior behavior) ExtensionBehavior behavior)
{ {
MdfdVec *v = mdopen(reln, behavior); MdfdVec *v = mdopen(reln, behavior);
#ifdef USE_SEGMENTED_FILES
BlockNumber targetseg; BlockNumber targetseg;
BlockNumber nextsegno; BlockNumber nextsegno;
...@@ -1600,8 +1529,6 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, ...@@ -1600,8 +1529,6 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
} }
v = v->mdfd_chain; v = v->mdfd_chain;
} }
#endif
return v; return v;
} }
......
...@@ -27,6 +27,15 @@ ...@@ -27,6 +27,15 @@
/* The normal alignment of `short', in bytes. */ /* The normal alignment of `short', in bytes. */
#undef ALIGNOF_SHORT #undef ALIGNOF_SHORT
/* Size of a disk block --- this also limits the size of a tuple. You can set
it bigger if you need bigger tuples (although TOAST should reduce the need
to have large tuples, since fields can be spread across multiple tuples).
BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ is
currently 2^15 (32768). This is determined by the 15-bit widths of the
lp_off and lp_len fields in ItemIdData (see include/storage/itemid.h).
Changing BLCKSZ requires an initdb. */
#undef BLCKSZ
/* Define to the default TCP port number on which the server listens and to /* Define to the default TCP port number on which the server listens and to
which clients will try to connect. This can be overridden at run-time, but which clients will try to connect. This can be overridden at run-time, but
it's convenient if your clients have the right default compiled in. it's convenient if your clients have the right default compiled in.
...@@ -644,6 +653,19 @@ ...@@ -644,6 +653,19 @@
your system. */ your system. */
#undef PTHREAD_CREATE_JOINABLE #undef PTHREAD_CREATE_JOINABLE
/* RELSEG_SIZE is the maximum number of blocks allowed in one disk file. Thus,
the maximum size of a single file is RELSEG_SIZE * BLCKSZ; relations bigger
than that are divided into multiple files. RELSEG_SIZE * BLCKSZ must be
less than your OS' limit on file size. This is often 2 GB or 4GB in a
32-bit operating system, unless you have large file support enabled. By
default, we make the limit 1 GB to avoid any possible integer-overflow
problems within the OS. A limit smaller than necessary only means we divide
a large relation into more chunks than necessary, so it seems best to err
in the direction of a small limit. A power-of-2 value is recommended to
save a few cycles in md.c, but is not absolutely required. Changing
RELSEG_SIZE requires an initdb. */
#undef RELSEG_SIZE
/* The size of `off_t', as computed by sizeof. */ /* The size of `off_t', as computed by sizeof. */
#undef SIZEOF_OFF_T #undef SIZEOF_OFF_T
...@@ -703,9 +725,6 @@ ...@@ -703,9 +725,6 @@
/* Use replacement snprintf() functions. */ /* Use replacement snprintf() functions. */
#undef USE_REPL_SNPRINTF #undef USE_REPL_SNPRINTF
/* Define to split data files into 1GB segments. */
#undef USE_SEGMENTED_FILES
/* Define to build with (Open)SSL support. (--with-openssl) */ /* Define to build with (Open)SSL support. (--with-openssl) */
#undef USE_SSL #undef USE_SSL
......
...@@ -6,51 +6,10 @@ ...@@ -6,51 +6,10 @@
* for developers. If you edit any of these, be sure to do a *full* * for developers. If you edit any of these, be sure to do a *full*
* rebuild (and an initdb if noted). * rebuild (and an initdb if noted).
* *
* $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.31 2008/04/11 22:54:23 tgl Exp $ * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.32 2008/05/02 01:08:27 tgl Exp $
*------------------------------------------------------------------------ *------------------------------------------------------------------------
*/ */
/*
* Size of a disk block --- this also limits the size of a tuple. You
* can set it bigger if you need bigger tuples (although TOAST should
* reduce the need to have large tuples, since fields can be spread
* across multiple tuples).
*
* BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ
* is currently 2^15 (32768). This is determined by the 15-bit widths
* of the lp_off and lp_len fields in ItemIdData (see
* include/storage/itemid.h).
*
* Changing BLCKSZ requires an initdb.
*/
#define BLCKSZ 8192
/*
* RELSEG_SIZE is the maximum number of blocks allowed in one disk
* file when USE_SEGMENTED_FILES is defined. Thus, the maximum size
* of a single file is RELSEG_SIZE * BLCKSZ; relations bigger than that
* are divided into multiple files.
*
* RELSEG_SIZE * BLCKSZ must be less than your OS' limit on file size.
* This is often 2 GB or 4GB in a 32-bit operating system, unless you
* have large file support enabled. By default, we make the limit 1
* GB to avoid any possible integer-overflow problems within the OS.
* A limit smaller than necessary only means we divide a large
* relation into more chunks than necessary, so it seems best to err
* in the direction of a small limit. (Besides, a power-of-2 value
* saves a few cycles in md.c.)
*
* When not using segmented files, RELSEG_SIZE is set to zero so that
* this behavior can be distinguished in pg_control.
*
* Changing RELSEG_SIZE requires an initdb.
*/
#ifdef USE_SEGMENTED_FILES
#define RELSEG_SIZE (0x40000000 / BLCKSZ)
#else
#define RELSEG_SIZE 0
#endif
/* /*
* Size of a WAL file block. This need have no particular relation to BLCKSZ. * Size of a WAL file block. This need have no particular relation to BLCKSZ.
* XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O, * XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment