Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
db306521
Commit
db306521
authored
Apr 17, 2005
by
Tom Lane
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial implementation of lossy-tuple-bitmap data structures.
Not connected to anything useful yet ...
parent
18b98505
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
840 additions
and
6 deletions
+840
-6
src/backend/nodes/Makefile
src/backend/nodes/Makefile
+2
-2
src/backend/nodes/tidbitmap.c
src/backend/nodes/tidbitmap.c
+774
-0
src/include/nodes/nodes.h
src/include/nodes/nodes.h
+9
-4
src/include/nodes/tidbitmap.h
src/include/nodes/tidbitmap.h
+55
-0
No files found.
src/backend/nodes/Makefile
View file @
db306521
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
# Makefile for backend/nodes
# Makefile for backend/nodes
#
#
# IDENTIFICATION
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/nodes/Makefile,v 1.1
7 2004/08/02 01:30:4
2 tgl Exp $
# $PostgreSQL: pgsql/src/backend/nodes/Makefile,v 1.1
8 2005/04/17 22:24:0
2 tgl Exp $
#
#
#-------------------------------------------------------------------------
#-------------------------------------------------------------------------
...
@@ -12,7 +12,7 @@ subdir = src/backend/nodes
...
@@ -12,7 +12,7 @@ subdir = src/backend/nodes
top_builddir
=
../../..
top_builddir
=
../../..
include
$(top_builddir)/src/Makefile.global
include
$(top_builddir)/src/Makefile.global
OBJS
=
nodeFuncs.o nodes.o list.o bitmapset.o
\
OBJS
=
nodeFuncs.o nodes.o list.o bitmapset.o
tidbitmap.o
\
copyfuncs.o equalfuncs.o makefuncs.o
\
copyfuncs.o equalfuncs.o makefuncs.o
\
outfuncs.o readfuncs.o print.o read.o params.o value.o
outfuncs.o readfuncs.o print.o read.o params.o value.o
...
...
src/backend/nodes/tidbitmap.c
0 → 100644
View file @
db306521
/*-------------------------------------------------------------------------
*
* tidbitmap.c
* PostgreSQL tuple-id (TID) bitmap package
*
* This module provides bitmap data structures that are spiritually
* similar to Bitmapsets, but are specially adapted to store sets of
* tuple identifiers (TIDs), or ItemPointers. In particular, the division
* of an ItemPointer into BlockNumber and OffsetNumber is catered for.
* Also, since we wish to be able to store very large tuple sets in
* memory with this data structure, we support "lossy" storage, in which
* we no longer remember individual tuple offsets on a page but only the
* fact that a particular page needs to be visited.
*
* The "lossy" storage uses one bit per disk page, so at the standard 8K
* BLCKSZ, we can represent all pages in 64Gb of disk space in about 1Mb
* of memory. People pushing around tables of that size should have a
* couple of Mb to spare, so we don't worry about providing a second level
* of lossiness. In theory we could fall back to page ranges at some
* point, but for now that seems useless complexity.
*
*
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.1 2005/04/17 22:24:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <limits.h>
#include "access/htup.h"
#include "nodes/tidbitmap.h"
#include "utils/hsearch.h"
/*
* The maximum number of tuples per page is not large (typically 256 with
* 8K pages, or 1024 with 32K pages). So there's not much point in making
* the per-page bitmaps variable size. We just legislate that the size
* is this:
*/
#define MAX_TUPLES_PER_PAGE ((BLCKSZ - 1) / MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) + sizeof(ItemIdData)) + 1)
/*
* When we have to switch over to lossy storage, we use a data structure
* with one bit per page, where all pages having the same number DIV
* PAGES_PER_CHUNK are aggregated into one chunk. When a chunk is present
* and has the bit set for a given page, there must not be a per-page entry
* for that page in the page table.
*
* We actually store both exact pages and lossy chunks in the same hash
* table, using identical data structures. (This is because dynahash.c's
* memory management doesn't allow space to be transferred easily from one
* hashtable to another.) Therefore it's best if PAGES_PER_CHUNK is the
* same as MAX_TUPLES_PER_PAGE, or at least not too different. But we
* also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer
* remainder operations. So, define it like this:
*/
#define PAGES_PER_CHUNK (BLCKSZ / 32)
/* The bitmap unit size can be adjusted by changing these declarations: */
#define BITS_PER_BITMAPWORD 32
typedef
uint32
bitmapword
;
/* must be an unsigned type */
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
#define BITNUM(x) ((x) % BITS_PER_BITMAPWORD)
/* number of active words for an exact page: */
#define WORDS_PER_PAGE ((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
/* number of active words for a lossy chunk: */
#define WORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1)
/*
* The hashtable entries are represented by this data structure. For
* an exact page, blockno is the page number and bit k of the bitmap
* represents tuple offset k+1. For a lossy chunk, blockno is the first
* page in the chunk (this must be a multiple of PAGES_PER_CHUNK) and
* bit k represents page blockno+k. Note that it is not possible to
* have exact storage for the first page of a chunk if we are using
* lossy storage for any page in the chunk's range, since the same
* hashtable entry has to serve both purposes.
*/
typedef
struct
PagetableEntry
{
BlockNumber
blockno
;
/* page number (hashtable key) */
bool
ischunk
;
/* T = lossy storage, F = exact */
bitmapword
words
[
Max
(
WORDS_PER_PAGE
,
WORDS_PER_CHUNK
)];
}
PagetableEntry
;
/*
* Here is the representation for a whole TIDBitMap:
*/
struct
TIDBitmap
{
NodeTag
type
;
/* to make it a valid Node */
MemoryContext
mcxt
;
/* memory context containing me */
HTAB
*
pagetable
;
/* hash table of PagetableEntry's */
int
nentries
;
/* number of entries in pagetable */
int
maxentries
;
/* limit on same to meet maxbytes */
int
npages
;
/* number of exact entries in pagetable */
int
nchunks
;
/* number of lossy entries in pagetable */
bool
iterating
;
/* tbm_begin_iterate called? */
/* the remaining fields are used while producing sorted output: */
TBMIterateResult
*
output
;
/* NULL if not yet created */
PagetableEntry
**
spages
;
/* sorted exact-page list, or NULL */
PagetableEntry
**
schunks
;
/* sorted lossy-chunk list, or NULL */
int
spageptr
;
/* next spages index */
int
schunkptr
;
/* next schunks index */
int
schunkbit
;
/* next bit to check in current schunk */
};
/* Local function prototypes */
static
PagetableEntry
*
tbm_find_pageentry
(
const
TIDBitmap
*
tbm
,
BlockNumber
pageno
);
static
PagetableEntry
*
tbm_get_pageentry
(
TIDBitmap
*
tbm
,
BlockNumber
pageno
);
static
bool
tbm_page_is_lossy
(
const
TIDBitmap
*
tbm
,
BlockNumber
pageno
);
static
void
tbm_mark_page_lossy
(
TIDBitmap
*
tbm
,
BlockNumber
pageno
);
static
void
tbm_lossify
(
TIDBitmap
*
tbm
);
static
int
tbm_comparator
(
const
void
*
left
,
const
void
*
right
);
/*
* tbm_create - create an initially-empty bitmap
*
* The bitmap will live in the memory context that is CurrentMemoryContext
* at the time of this call. It will be limited to (approximately) maxbytes
* total memory consumption.
*/
TIDBitmap
*
tbm_create
(
long
maxbytes
)
{
TIDBitmap
*
tbm
;
HASHCTL
hash_ctl
;
long
nbuckets
;
tbm
=
makeNode
(
TIDBitmap
);
/* we rely on makeNode to have zeroed all the fields */
tbm
->
mcxt
=
CurrentMemoryContext
;
/*
* Estimate number of hashtable entries we can have within maxbytes.
* This estimates the hash overhead at MAXALIGN(sizeof(HASHELEMENT))
* plus a pointer per hash entry, which is crude but good enough for
* our purpose. (NOTE: this does not count the space for data
* structures created during iteration readout.)
*/
nbuckets
=
maxbytes
/
(
MAXALIGN
(
sizeof
(
HASHELEMENT
))
+
MAXALIGN
(
sizeof
(
PagetableEntry
))
+
sizeof
(
Pointer
));
nbuckets
=
Min
(
nbuckets
,
INT_MAX
-
1
);
/* safety limit */
tbm
->
maxentries
=
(
int
)
nbuckets
;
MemSet
(
&
hash_ctl
,
0
,
sizeof
(
hash_ctl
));
hash_ctl
.
keysize
=
sizeof
(
BlockNumber
);
hash_ctl
.
entrysize
=
sizeof
(
PagetableEntry
);
hash_ctl
.
hash
=
tag_hash
;
hash_ctl
.
hcxt
=
CurrentMemoryContext
;
tbm
->
pagetable
=
hash_create
(
"TIDBitmap"
,
nbuckets
,
&
hash_ctl
,
HASH_ELEM
|
HASH_FUNCTION
|
HASH_CONTEXT
);
return
tbm
;
}
/*
* tbm_free - free a TIDBitmap
*/
void
tbm_free
(
TIDBitmap
*
tbm
)
{
hash_destroy
(
tbm
->
pagetable
);
if
(
tbm
->
output
)
pfree
(
tbm
->
output
);
if
(
tbm
->
spages
)
pfree
(
tbm
->
spages
);
if
(
tbm
->
schunks
)
pfree
(
tbm
->
schunks
);
pfree
(
tbm
);
}
/*
* tbm_add_tuples - add some tuple IDs to a TIDBitmap
*/
void
tbm_add_tuples
(
TIDBitmap
*
tbm
,
const
ItemPointer
tids
,
int
ntids
)
{
int
i
;
Assert
(
!
tbm
->
iterating
);
for
(
i
=
0
;
i
<
ntids
;
i
++
)
{
BlockNumber
blk
=
ItemPointerGetBlockNumber
(
tids
+
i
);
OffsetNumber
off
=
ItemPointerGetOffsetNumber
(
tids
+
i
);
PagetableEntry
*
page
;
int
wordnum
,
bitnum
;
/* safety check to ensure we don't overrun bit array bounds */
if
(
off
<
1
||
off
>
MAX_TUPLES_PER_PAGE
)
elog
(
ERROR
,
"tuple offset out of range: %u"
,
off
);
if
(
tbm_page_is_lossy
(
tbm
,
blk
))
continue
;
/* whole page is already marked */
page
=
tbm_get_pageentry
(
tbm
,
blk
);
if
(
page
->
ischunk
)
{
/* The page is a lossy chunk header, set bit for itself */
wordnum
=
bitnum
=
0
;
}
else
{
/* Page is exact, so set bit for individual tuple */
wordnum
=
WORDNUM
(
off
-
1
);
bitnum
=
BITNUM
(
off
-
1
);
}
page
->
words
[
wordnum
]
|=
((
bitmapword
)
1
<<
bitnum
);
if
(
tbm
->
nentries
>
tbm
->
maxentries
)
tbm_lossify
(
tbm
);
}
}
/*
* tbm_union - set union
*
* a is modified in-place, b is not changed
*/
void
tbm_union
(
TIDBitmap
*
a
,
const
TIDBitmap
*
b
)
{
HASH_SEQ_STATUS
status
;
PagetableEntry
*
apage
;
PagetableEntry
*
bpage
;
int
wordnum
;
Assert
(
!
a
->
iterating
);
/* Scan through chunks and pages in b, merge into a */
hash_seq_init
(
&
status
,
b
->
pagetable
);
while
((
bpage
=
(
PagetableEntry
*
)
hash_seq_search
(
&
status
))
!=
NULL
)
{
if
(
bpage
->
ischunk
)
{
/* Scan b's chunk, mark each indicated page lossy in a */
for
(
wordnum
=
0
;
wordnum
<
WORDS_PER_PAGE
;
wordnum
++
)
{
bitmapword
w
=
bpage
->
words
[
wordnum
];
if
(
w
!=
0
)
{
BlockNumber
pg
;
pg
=
bpage
->
blockno
+
(
wordnum
*
BITS_PER_BITMAPWORD
);
while
(
w
!=
0
)
{
if
(
w
&
1
)
tbm_mark_page_lossy
(
a
,
pg
);
pg
++
;
w
>>=
1
;
}
}
}
}
else
if
(
tbm_page_is_lossy
(
a
,
bpage
->
blockno
))
{
/* page is already lossy in a, nothing to do */
continue
;
}
else
{
apage
=
tbm_get_pageentry
(
a
,
bpage
->
blockno
);
if
(
apage
->
ischunk
)
{
/* The page is a lossy chunk header, set bit for itself */
apage
->
words
[
0
]
|=
((
bitmapword
)
1
<<
0
);
}
else
{
/* Both pages are exact, merge at the bit level */
for
(
wordnum
=
0
;
wordnum
<
WORDS_PER_PAGE
;
wordnum
++
)
apage
->
words
[
wordnum
]
|=
bpage
->
words
[
wordnum
];
}
}
if
(
a
->
nentries
>
a
->
maxentries
)
tbm_lossify
(
a
);
}
}
/*
* tbm_intersect - set intersection
*
* a is modified in-place, b is not changed
*/
void
tbm_intersect
(
TIDBitmap
*
a
,
const
TIDBitmap
*
b
)
{
HASH_SEQ_STATUS
status
;
PagetableEntry
*
apage
;
PagetableEntry
*
bpage
;
int
wordnum
;
Assert
(
!
a
->
iterating
);
/* Scan through chunks and pages in a, try to match to b */
hash_seq_init
(
&
status
,
a
->
pagetable
);
while
((
apage
=
(
PagetableEntry
*
)
hash_seq_search
(
&
status
))
!=
NULL
)
{
if
(
apage
->
ischunk
)
{
/* Scan each bit in chunk, try to clear */
bool
candelete
=
true
;
for
(
wordnum
=
0
;
wordnum
<
WORDS_PER_PAGE
;
wordnum
++
)
{
bitmapword
w
=
apage
->
words
[
wordnum
];
if
(
w
!=
0
)
{
bitmapword
neww
=
w
;
BlockNumber
pg
;
int
bitnum
;
pg
=
apage
->
blockno
+
(
wordnum
*
BITS_PER_BITMAPWORD
);
bitnum
=
0
;
while
(
w
!=
0
)
{
if
(
w
&
1
)
{
if
(
!
tbm_page_is_lossy
(
b
,
pg
)
&&
tbm_find_pageentry
(
b
,
pg
)
==
NULL
)
{
/* Page is not in b at all, lose lossy bit */
neww
&=
~
((
bitmapword
)
1
<<
bitnum
);
}
}
pg
++
;
bitnum
++
;
w
>>=
1
;
}
apage
->
words
[
wordnum
]
=
neww
;
if
(
neww
!=
0
)
candelete
=
false
;
}
}
if
(
candelete
)
{
/* Chunk is now empty, remove it from a */
if
(
hash_search
(
a
->
pagetable
,
(
void
*
)
&
apage
->
blockno
,
HASH_REMOVE
,
NULL
)
==
NULL
)
elog
(
ERROR
,
"hash table corrupted"
);
a
->
nentries
--
;
a
->
nchunks
--
;
}
}
else
if
(
tbm_page_is_lossy
(
b
,
apage
->
blockno
))
{
/* page is lossy in b, cannot clear any bits */
continue
;
}
else
{
bool
candelete
=
true
;
bpage
=
tbm_find_pageentry
(
b
,
apage
->
blockno
);
if
(
bpage
!=
NULL
)
{
/* Both pages are exact, merge at the bit level */
Assert
(
!
bpage
->
ischunk
);
for
(
wordnum
=
0
;
wordnum
<
WORDS_PER_PAGE
;
wordnum
++
)
{
apage
->
words
[
wordnum
]
&=
bpage
->
words
[
wordnum
];
if
(
apage
->
words
[
wordnum
]
!=
0
)
candelete
=
false
;
}
}
if
(
candelete
)
{
/* Page is now empty, remove it from a */
if
(
hash_search
(
a
->
pagetable
,
(
void
*
)
&
apage
->
blockno
,
HASH_REMOVE
,
NULL
)
==
NULL
)
elog
(
ERROR
,
"hash table corrupted"
);
a
->
nentries
--
;
a
->
npages
--
;
}
}
}
}
/*
* tbm_begin_iterate - prepare to iterate through a TIDBitmap
*
* NB: after this is called, it is no longer allowed to modify the contents
* of the bitmap. However, you can call this multiple times to scan the
* contents repeatedly.
*/
void
tbm_begin_iterate
(
TIDBitmap
*
tbm
)
{
HASH_SEQ_STATUS
status
;
PagetableEntry
*
page
;
int
npages
;
int
nchunks
;
tbm
->
iterating
=
true
;
/*
* Allocate the output data structure if we didn't already.
* (We don't do this during tbm_create since it's entirely possible
* that a TIDBitmap will live and die without ever being iterated.)
*/
if
(
!
tbm
->
output
)
tbm
->
output
=
(
TBMIterateResult
*
)
MemoryContextAllocZero
(
tbm
->
mcxt
,
sizeof
(
TBMIterateResult
)
+
MAX_TUPLES_PER_PAGE
*
sizeof
(
OffsetNumber
));
/*
* Create and fill the sorted page lists if we didn't already.
*/
if
(
!
tbm
->
spages
&&
tbm
->
npages
>
0
)
tbm
->
spages
=
(
PagetableEntry
**
)
MemoryContextAlloc
(
tbm
->
mcxt
,
tbm
->
npages
*
sizeof
(
PagetableEntry
*
));
if
(
!
tbm
->
schunks
&&
tbm
->
nchunks
>
0
)
tbm
->
schunks
=
(
PagetableEntry
**
)
MemoryContextAlloc
(
tbm
->
mcxt
,
tbm
->
nchunks
*
sizeof
(
PagetableEntry
*
));
hash_seq_init
(
&
status
,
tbm
->
pagetable
);
npages
=
nchunks
=
0
;
while
((
page
=
(
PagetableEntry
*
)
hash_seq_search
(
&
status
))
!=
NULL
)
{
if
(
page
->
ischunk
)
tbm
->
schunks
[
nchunks
++
]
=
page
;
else
tbm
->
spages
[
npages
++
]
=
page
;
}
Assert
(
npages
==
tbm
->
npages
);
Assert
(
nchunks
==
tbm
->
nchunks
);
if
(
npages
>
1
)
qsort
(
tbm
->
spages
,
npages
,
sizeof
(
PagetableEntry
*
),
tbm_comparator
);
if
(
nchunks
>
1
)
qsort
(
tbm
->
schunks
,
nchunks
,
sizeof
(
PagetableEntry
*
),
tbm_comparator
);
/*
* Reset iteration pointers.
*/
tbm
->
spageptr
=
0
;
tbm
->
schunkptr
=
0
;
tbm
->
schunkbit
=
0
;
}
/*
* tbm_iterate - scan through next page of a TIDBitmap
*
* Returns a TBMIterateResult representing one page, or NULL if there are
* no more pages to scan. Pages are guaranteed to be delivered in numerical
* order. If result->ntuples < 0, then the bitmap is "lossy" and failed to
* remember the exact tuples to look at on this page --- the caller must
* examine all tuples on the page and check if they meet the intended
* condition.
*/
TBMIterateResult
*
tbm_iterate
(
TIDBitmap
*
tbm
)
{
TBMIterateResult
*
output
=
tbm
->
output
;
Assert
(
tbm
->
iterating
);
/*
* If lossy chunk pages remain, make sure we've advanced schunkptr/
* schunkbit to the next set bit.
*/
while
(
tbm
->
schunkptr
<
tbm
->
nchunks
)
{
PagetableEntry
*
chunk
=
tbm
->
schunks
[
tbm
->
schunkptr
];
int
schunkbit
=
tbm
->
schunkbit
;
while
(
schunkbit
<
PAGES_PER_CHUNK
)
{
int
wordnum
=
WORDNUM
(
schunkbit
);
int
bitnum
=
BITNUM
(
schunkbit
);
if
((
chunk
->
words
[
wordnum
]
&
((
bitmapword
)
1
<<
bitnum
))
!=
0
)
break
;
schunkbit
++
;
}
if
(
schunkbit
<
PAGES_PER_CHUNK
)
{
tbm
->
schunkbit
=
schunkbit
;
break
;
}
/* advance to next chunk */
tbm
->
schunkptr
++
;
tbm
->
schunkbit
=
0
;
}
/*
* If both chunk and per-page data remain, must output the numerically
* earlier page.
*/
if
(
tbm
->
schunkptr
<
tbm
->
nchunks
)
{
PagetableEntry
*
chunk
=
tbm
->
schunks
[
tbm
->
schunkptr
];
BlockNumber
chunk_blockno
;
chunk_blockno
=
chunk
->
blockno
+
tbm
->
schunkbit
;
if
(
tbm
->
spageptr
>=
tbm
->
npages
||
chunk_blockno
<
tbm
->
spages
[
tbm
->
spageptr
]
->
blockno
)
{
/* Return a lossy page indicator from the chunk */
output
->
blockno
=
chunk_blockno
;
output
->
ntuples
=
-
1
;
tbm
->
schunkbit
++
;
return
output
;
}
}
if
(
tbm
->
spageptr
<
tbm
->
npages
)
{
PagetableEntry
*
page
=
tbm
->
spages
[
tbm
->
spageptr
];
int
ntuples
;
int
wordnum
;
/* scan bitmap to extract individual offset numbers */
ntuples
=
0
;
for
(
wordnum
=
0
;
wordnum
<
WORDS_PER_PAGE
;
wordnum
++
)
{
bitmapword
w
=
page
->
words
[
wordnum
];
if
(
w
!=
0
)
{
int
off
=
wordnum
*
BITS_PER_BITMAPWORD
+
1
;
while
(
w
!=
0
)
{
if
(
w
&
1
)
output
->
offsets
[
ntuples
++
]
=
(
OffsetNumber
)
off
;
off
++
;
w
>>=
1
;
}
}
}
output
->
blockno
=
page
->
blockno
;
output
->
ntuples
=
ntuples
;
tbm
->
spageptr
++
;
return
output
;
}
/* Nothing more in the bitmap */
return
NULL
;
}
/*
* tbm_find_pageentry - find a PagetableEntry for the pageno
*
* Returns NULL if there is no non-lossy entry for the pageno.
*/
static
PagetableEntry
*
tbm_find_pageentry
(
const
TIDBitmap
*
tbm
,
BlockNumber
pageno
)
{
PagetableEntry
*
page
;
page
=
(
PagetableEntry
*
)
hash_search
(
tbm
->
pagetable
,
(
void
*
)
&
pageno
,
HASH_FIND
,
NULL
);
if
(
page
==
NULL
)
return
NULL
;
if
(
page
->
ischunk
)
return
NULL
;
/* don't want a lossy chunk header */
return
page
;
}
/*
* tbm_get_pageentry - find or create a PagetableEntry for the pageno
*
* If new, the entry is marked as an exact (non-chunk) entry.
*
* This may cause the table to exceed the desired memory size. It is
* up to the caller to call tbm_lossify() at the next safe point if so.
*/
static
PagetableEntry
*
tbm_get_pageentry
(
TIDBitmap
*
tbm
,
BlockNumber
pageno
)
{
PagetableEntry
*
page
;
bool
found
;
/* Look up or create an entry */
page
=
(
PagetableEntry
*
)
hash_search
(
tbm
->
pagetable
,
(
void
*
)
&
pageno
,
HASH_ENTER
,
&
found
);
if
(
page
==
NULL
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_OUT_OF_MEMORY
),
errmsg
(
"out of memory"
)));
/* Initialize it if not present before */
if
(
!
found
)
{
MemSet
(
page
,
0
,
sizeof
(
PagetableEntry
));
page
->
blockno
=
pageno
;
/* must count it too */
tbm
->
nentries
++
;
tbm
->
npages
++
;
}
return
page
;
}
/*
* tbm_page_is_lossy - is the page marked as lossily stored?
*/
static
bool
tbm_page_is_lossy
(
const
TIDBitmap
*
tbm
,
BlockNumber
pageno
)
{
PagetableEntry
*
page
;
BlockNumber
chunk_pageno
;
int
bitno
;
/* we can skip the lookup if there are no lossy chunks */
if
(
tbm
->
nchunks
==
0
)
return
false
;
bitno
=
pageno
%
PAGES_PER_CHUNK
;
chunk_pageno
=
pageno
-
bitno
;
page
=
(
PagetableEntry
*
)
hash_search
(
tbm
->
pagetable
,
(
void
*
)
&
chunk_pageno
,
HASH_FIND
,
NULL
);
if
(
page
!=
NULL
&&
page
->
ischunk
)
{
int
wordnum
=
WORDNUM
(
bitno
);
int
bitnum
=
BITNUM
(
bitno
);
if
((
page
->
words
[
wordnum
]
&
((
bitmapword
)
1
<<
bitnum
))
!=
0
)
return
true
;
}
return
false
;
}
/*
* tbm_mark_page_lossy - mark the page number as lossily stored
*
* This may cause the table to exceed the desired memory size. It is
* up to the caller to call tbm_lossify() at the next safe point if so.
*/
static
void
tbm_mark_page_lossy
(
TIDBitmap
*
tbm
,
BlockNumber
pageno
)
{
PagetableEntry
*
page
;
bool
found
;
BlockNumber
chunk_pageno
;
int
bitno
;
int
wordnum
;
int
bitnum
;
bitno
=
pageno
%
PAGES_PER_CHUNK
;
chunk_pageno
=
pageno
-
bitno
;
/*
* Remove any extant non-lossy entry for the page. If the page is
* its own chunk header, however, we skip this and handle the case
* below.
*/
if
(
bitno
!=
0
)
{
if
(
hash_search
(
tbm
->
pagetable
,
(
void
*
)
&
pageno
,
HASH_REMOVE
,
NULL
)
!=
NULL
)
{
/* It was present, so adjust counts */
tbm
->
nentries
--
;
tbm
->
npages
--
;
/* assume it must have been non-lossy */
}
}
/* Look up or create entry for chunk-header page */
page
=
(
PagetableEntry
*
)
hash_search
(
tbm
->
pagetable
,
(
void
*
)
&
chunk_pageno
,
HASH_ENTER
,
&
found
);
if
(
page
==
NULL
)
ereport
(
ERROR
,
(
errcode
(
ERRCODE_OUT_OF_MEMORY
),
errmsg
(
"out of memory"
)));
/* Initialize it if not present before */
if
(
!
found
)
{
MemSet
(
page
,
0
,
sizeof
(
PagetableEntry
));
page
->
blockno
=
chunk_pageno
;
page
->
ischunk
=
true
;
/* must count it too */
tbm
->
nentries
++
;
tbm
->
nchunks
++
;
}
else
if
(
!
page
->
ischunk
)
{
/* chunk header page was formerly non-lossy, make it lossy */
MemSet
(
page
,
0
,
sizeof
(
PagetableEntry
));
page
->
blockno
=
chunk_pageno
;
page
->
ischunk
=
true
;
/* we assume it had some tuple bit(s) set, so mark it lossy */
page
->
words
[
0
]
=
((
bitmapword
)
1
<<
0
);
/* adjust counts */
tbm
->
nchunks
++
;
tbm
->
npages
--
;
}
/* Now set the original target page's bit */
wordnum
=
WORDNUM
(
bitno
);
bitnum
=
BITNUM
(
bitno
);
page
->
words
[
wordnum
]
|=
((
bitmapword
)
1
<<
bitnum
);
}
/*
* tbm_lossify - lose some information to get back under the memory limit
*/
static
void
tbm_lossify
(
TIDBitmap
*
tbm
)
{
HASH_SEQ_STATUS
status
;
PagetableEntry
*
page
;
/*
* XXX Really stupid implementation: this just lossifies pages in
* essentially random order. We should be paying some attention
* to the number of bits set in each page, instead. Also it might
* be a good idea to lossify more than the minimum number of pages
* during each call.
*/
Assert
(
!
tbm
->
iterating
);
hash_seq_init
(
&
status
,
tbm
->
pagetable
);
while
((
page
=
(
PagetableEntry
*
)
hash_seq_search
(
&
status
))
!=
NULL
)
{
if
(
page
->
ischunk
)
continue
;
/* already a chunk header */
/*
* If the page would become a chunk header, we won't save anything
* by converting it to lossy, so skip it.
*/
if
((
page
->
blockno
%
PAGES_PER_CHUNK
)
==
0
)
continue
;
/* This does the dirty work ... */
tbm_mark_page_lossy
(
tbm
,
page
->
blockno
);
if
(
tbm
->
nentries
<=
tbm
->
maxentries
)
return
;
/* we have done enough */
/*
* Note: tbm_mark_page_lossy may have inserted a lossy chunk into
* the hashtable. We can continue the same seq_search scan since
* we do not care whether we visit lossy chunks or not.
*/
}
}
/*
* qsort comparator to handle PagetableEntry pointers.
*/
static
int
tbm_comparator
(
const
void
*
left
,
const
void
*
right
)
{
BlockNumber
l
=
(
*
((
const
PagetableEntry
**
)
left
))
->
blockno
;
BlockNumber
r
=
(
*
((
const
PagetableEntry
**
)
right
))
->
blockno
;
if
(
l
<
r
)
return
-
1
;
else
if
(
l
>
r
)
return
1
;
return
0
;
}
src/include/nodes/nodes.h
View file @
db306521
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.16
5 2005/04/06 16:34:07
tgl Exp $
* $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.16
6 2005/04/17 22:24:02
tgl Exp $
*
*
*-------------------------------------------------------------------------
*-------------------------------------------------------------------------
*/
*/
...
@@ -304,11 +304,16 @@ typedef enum NodeTag
...
@@ -304,11 +304,16 @@ typedef enum NodeTag
T_FunctionParameter
,
T_FunctionParameter
,
/*
/*
* TAGS FOR FUNCTION-CALL CONTEXT AND RESULTINFO NODES (see fmgr.h)
* TAGS FOR RANDOM OTHER STUFF
*
* These are objects that aren't part of parse/plan/execute node tree
* structures, but we give them NodeTags anyway for identification
* purposes (usually because they are involved in APIs where we want
* to pass multiple object types through the same pointer).
*/
*/
T_TriggerData
=
900
,
/* in commands/trigger.h */
T_TriggerData
=
900
,
/* in commands/trigger.h */
T_ReturnSetInfo
/* in nodes/execnodes.h */
T_ReturnSetInfo
,
/* in nodes/execnodes.h */
T_TIDBitmap
/* in nodes/tidbitmap.h */
}
NodeTag
;
}
NodeTag
;
/*
/*
...
...
src/include/nodes/tidbitmap.h
0 → 100644
View file @
db306521
/*-------------------------------------------------------------------------
*
* tidbitmap.h
* PostgreSQL tuple-id (TID) bitmap package
*
* This module provides bitmap data structures that are spiritually
* similar to Bitmapsets, but are specially adapted to store sets of
* tuple identifiers (TIDs), or ItemPointers. In particular, the division
* of an ItemPointer into BlockNumber and OffsetNumber is catered for.
* Also, since we wish to be able to store very large tuple sets in
* memory with this data structure, we support "lossy" storage, in which
* we no longer remember individual tuple offsets on a page but only the
* fact that a particular page needs to be visited.
*
*
* Copyright (c) 2003-2005, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.1 2005/04/17 22:24:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef TIDBITMAP_H
#define TIDBITMAP_H
#include "storage/itemptr.h"
/*
* Actual bitmap representation is private to tidbitmap.c. Callers can
* do IsA(x, TIDBitmap) on it, but nothing else.
*/
typedef
struct
TIDBitmap
TIDBitmap
;
/* Result structure for tbm_iterate */
typedef
struct
{
BlockNumber
blockno
;
/* page number containing tuples */
int
ntuples
;
/* -1 indicates lossy result */
OffsetNumber
offsets
[
1
];
/* VARIABLE LENGTH ARRAY */
}
TBMIterateResult
;
/* VARIABLE LENGTH STRUCT */
/* function prototypes in nodes/tidbitmap.c */
extern
TIDBitmap
*
tbm_create
(
long
maxbytes
);
extern
void
tbm_free
(
TIDBitmap
*
tbm
);
extern
void
tbm_add_tuples
(
TIDBitmap
*
tbm
,
const
ItemPointer
tids
,
int
ntids
);
extern
void
tbm_union
(
TIDBitmap
*
a
,
const
TIDBitmap
*
b
);
extern
void
tbm_intersect
(
TIDBitmap
*
a
,
const
TIDBitmap
*
b
);
extern
void
tbm_begin_iterate
(
TIDBitmap
*
tbm
);
extern
TBMIterateResult
*
tbm_iterate
(
TIDBitmap
*
tbm
);
#endif
/* TIDBITMAP_H */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment