Commit f81e97d0 authored by Heikki Linnakangas's avatar Heikki Linnakangas

pg_rewind: Replace the hybrid list+array data structure with simplehash.

Now that simplehash can be used in frontend code, let's make use of it.

Reviewed-by: Kyotaro Horiguchi, Soumyadeep Chakraborty
Discussion: https://www.postgresql.org/message-id/0c5b3783-af52-3ee5-f8fa-6e794061f70d%40iki.fi
parent eb00f1d4
......@@ -207,9 +207,9 @@ copy_executeFileMap(filemap_t *map)
file_entry_t *entry;
int i;
for (i = 0; i < map->narray; i++)
for (i = 0; i < map->nentries; i++)
{
entry = map->array[i];
entry = map->entries[i];
execute_pagemap(&entry->target_pages_to_overwrite, entry->path);
switch (entry->action)
......
......@@ -37,7 +37,7 @@ fetchSourceFileList(void)
* Fetch all relation data files that are marked in the given data page map.
*/
void
executeFileMap(void)
execute_file_actions(filemap_t *filemap)
{
if (datadir_source)
copy_executeFileMap(filemap);
......
......@@ -25,7 +25,7 @@
*/
extern void fetchSourceFileList(void);
extern char *fetchFile(const char *filename, size_t *filesize);
extern void executeFileMap(void);
extern void execute_file_actions(filemap_t *filemap);
/* in libpq_fetch.c */
extern void libpqProcessFileList(void);
......
......@@ -3,6 +3,19 @@
* filemap.c
* A data structure for keeping track of files that have changed.
*
* This source file contains the logic to decide what to do with different
* kinds of files, and the data structure to support it. Before modifying
* anything, pg_rewind collects information about all the files and their
* attributes in the target and source data directories. It also scans the
* WAL log in the target, and collects information about data blocks that
* were changed. All this information is stored in a hash table, using the
* file path relative to the root of the data directory as the key.
*
* After collecting all the information required, the decide_file_actions()
* function scans the hash table and decides what action needs to be taken
* for each file. Finally, it sorts the array to the final order that the
* actions should be executed in.
*
* Copyright (c) 2013-2020, PostgreSQL Global Development Group
*
*-------------------------------------------------------------------------
......@@ -14,22 +27,41 @@
#include <unistd.h>
#include "catalog/pg_tablespace_d.h"
#include "common/hashfn.h"
#include "common/string.h"
#include "datapagemap.h"
#include "filemap.h"
#include "pg_rewind.h"
#include "storage/fd.h"
filemap_t *filemap = NULL;
/*
* Define a hash table which we can use to store information about the files
* appearing in source and target systems.
*/
static uint32 hash_string_pointer(const char *s);
#define SH_PREFIX filehash
#define SH_ELEMENT_TYPE file_entry_t
#define SH_KEY_TYPE const char *
#define SH_KEY path
#define SH_HASH_KEY(tb, key) hash_string_pointer(key)
#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
#define SH_SCOPE static inline
#define SH_RAW_ALLOCATOR pg_malloc0
#define SH_DECLARE
#define SH_DEFINE
#include "lib/simplehash.h"
#define FILEHASH_INITIAL_SIZE 1000
static filehash_hash *filehash;
static bool isRelDataFile(const char *path);
static char *datasegpath(RelFileNode rnode, ForkNumber forknum,
BlockNumber segno);
static int path_cmp(const void *a, const void *b);
static file_entry_t *get_filemap_entry(const char *path, bool create);
static file_entry_t *insert_filehash_entry(const char *path);
static file_entry_t *lookup_filehash_entry(const char *path);
static int final_filemap_cmp(const void *a, const void *b);
static void filemap_list_to_array(filemap_t *map);
static bool check_file_excluded(const char *path, bool is_source);
/*
......@@ -131,54 +163,26 @@ static const struct exclude_list_item excludeFiles[] =
};
/*
* Create a new file map (stored in the global pointer "filemap").
* Initialize the hash table for the file map.
*/
void
filemap_create(void)
filehash_init(void)
{
filemap_t *map;
map = pg_malloc(sizeof(filemap_t));
map->first = map->last = NULL;
map->nlist = 0;
map->array = NULL;
map->narray = 0;
Assert(filemap == NULL);
filemap = map;
filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
}
/* Look up or create entry for 'path' */
/* Look up entry for 'path', creating a new one if it doesn't exist */
static file_entry_t *
get_filemap_entry(const char *path, bool create)
insert_filehash_entry(const char *path)
{
filemap_t *map = filemap;
file_entry_t *entry;
file_entry_t **e;
file_entry_t key;
file_entry_t *key_ptr;
if (map->array)
{
key.path = (char *) path;
key_ptr = &key;
e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *),
path_cmp);
}
else
e = NULL;
bool found;
if (e)
entry = *e;
else if (!create)
entry = NULL;
else
entry = filehash_insert(filehash, path, &found);
if (!found)
{
/* Create a new entry for this file */
entry = pg_malloc(sizeof(file_entry_t));
entry->path = pg_strdup(path);
entry->isrelfile = isRelDataFile(path);
entry->action = FILE_ACTION_UNDECIDED;
entry->target_exists = false;
entry->target_type = FILE_TYPE_UNDEFINED;
......@@ -192,21 +196,18 @@ get_filemap_entry(const char *path, bool create)
entry->source_size = 0;
entry->source_link_target = NULL;
entry->next = NULL;
if (map->last)
{
map->last->next = entry;
map->last = entry;
}
else
map->first = map->last = entry;
map->nlist++;
entry->action = FILE_ACTION_UNDECIDED;
}
return entry;
}
static file_entry_t *
lookup_filehash_entry(const char *path)
{
return filehash_lookup(filehash, path);
}
/*
* Callback for processing source file list.
*
......@@ -220,8 +221,6 @@ process_source_file(const char *path, file_type_t type, size_t size,
{
file_entry_t *entry;
Assert(filemap->array == NULL);
/*
* Pretend that pg_wal is a directory, even if it's really a symlink. We
* don't want to mess with the symlink itself, nor complain if it's a
......@@ -238,7 +237,9 @@ process_source_file(const char *path, file_type_t type, size_t size,
pg_fatal("data file \"%s\" in source is not a regular file", path);
/* Remember this source file */
entry = get_filemap_entry(path, true);
entry = insert_filehash_entry(path);
if (entry->source_exists)
pg_fatal("duplicate source file \"%s\"", path);
entry->source_exists = true;
entry->source_type = type;
entry->source_size = size;
......@@ -248,15 +249,12 @@ process_source_file(const char *path, file_type_t type, size_t size,
/*
* Callback for processing target file list.
*
* All source files must be already processed before calling this. We record
* the type and size of file, so that decide_file_action() can later decide
* what to do with it.
* Record the type and size of the file, like process_source_file() does.
*/
void
process_target_file(const char *path, file_type_t type, size_t size,
const char *link_target)
{
filemap_t *map = filemap;
file_entry_t *entry;
/*
......@@ -264,21 +262,6 @@ process_target_file(const char *path, file_type_t type, size_t size,
* from the target data folder all paths which have been filtered out from
* the source data folder when processing the source files.
*/
if (map->array == NULL)
{
/* on first call, initialize lookup array */
if (map->nlist == 0)
{
/* should not happen */
pg_fatal("source file list is empty");
}
filemap_list_to_array(map);
Assert(map->array != NULL);
qsort(map->array, map->narray, sizeof(file_entry_t *), path_cmp);
}
/*
* Like in process_source_file, pretend that pg_wal is always a directory.
......@@ -287,7 +270,9 @@ process_target_file(const char *path, file_type_t type, size_t size,
type = FILE_TYPE_DIRECTORY;
/* Remember this target file */
entry = get_filemap_entry(path, true);
entry = insert_filehash_entry(path);
if (entry->target_exists)
pg_fatal("duplicate source file \"%s\"", path);
entry->target_exists = true;
entry->target_type = type;
entry->target_size = size;
......@@ -301,7 +286,7 @@ process_target_file(const char *path, file_type_t type, size_t size,
* if so, records it in 'target_pages_to_overwrite' bitmap.
*
* NOTE: All the files on both systems must have already been added to the
* file map!
* hash table!
*/
void
process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
......@@ -312,47 +297,45 @@ process_target_wal_block_change(ForkNumber forknum, RelFileNode rnode,
BlockNumber blkno_inseg;
int segno;
Assert(filemap->array);
segno = blkno / RELSEG_SIZE;
blkno_inseg = blkno % RELSEG_SIZE;
path = datasegpath(rnode, forknum, segno);
entry = get_filemap_entry(path, false);
entry = lookup_filehash_entry(path);
pfree(path);
/*
* If the block still exists in both systems, remember it. Otherwise we
* can safely ignore it.
*
* If the block is beyond the EOF in the source system, or the file
* doesn't exist in the source at all, we're going to truncate/remove it
* away from the target anyway. Likewise, if it doesn't exist in the
* target anymore, we will copy it over with the "tail" from the source
* system, anyway.
*
* It is possible to find WAL for a file that doesn't exist on either
* system anymore. It means that the relation was dropped later in the
* target system, and independently on the source system too, or that it
* was created and dropped in the target system and it never existed in
* the source. Either way, we can safely ignore it.
*/
if (entry)
{
int64 end_offset;
Assert(entry->isrelfile);
if (entry->target_type != FILE_TYPE_REGULAR)
pg_fatal("unexpected page modification for non-regular file \"%s\"",
entry->path);
/*
* If the block beyond the EOF in the source system, no need to
* remember it now, because we're going to truncate it away from the
* target anyway. Also no need to remember the block if it's beyond
* the current EOF in the target system; we will copy it over with the
* "tail" from the source system, anyway.
*/
end_offset = (blkno_inseg + 1) * BLCKSZ;
if (end_offset <= entry->source_size &&
end_offset <= entry->target_size)
datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
}
else
{
/*
* If we don't have any record of this file in the file map, it means
* that it's a relation that doesn't exist in the source system. It
* could exist in the target system; we haven't moved the target-only
* entries from the linked list to the array yet! But in any case, if
* it doesn't exist in the source it will be removed from the target
* too, and we can safely ignore it.
*/
if (entry->target_exists && entry->source_exists)
{
off_t end_offset;
end_offset = (blkno_inseg + 1) * BLCKSZ;
if (end_offset <= entry->source_size && end_offset <= entry->target_size)
datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
}
}
}
......@@ -423,34 +406,6 @@ check_file_excluded(const char *path, bool is_source)
return false;
}
/*
* Convert the linked list of entries in map->first/last to the array,
* map->array.
*/
static void
filemap_list_to_array(filemap_t *map)
{
int narray;
file_entry_t *entry,
*next;
map->array = (file_entry_t **)
pg_realloc(map->array,
(map->nlist + map->narray) * sizeof(file_entry_t *));
narray = map->narray;
for (entry = map->first; entry != NULL; entry = next)
{
map->array[narray++] = entry;
next = entry->next;
entry->next = NULL;
}
Assert(narray == map->nlist + map->narray);
map->narray = narray;
map->nlist = 0;
map->first = map->last = NULL;
}
static const char *
action_to_str(file_action_t action)
{
......@@ -478,32 +433,31 @@ action_to_str(file_action_t action)
* Calculate the totals needed for progress reports.
*/
void
calculate_totals(void)
calculate_totals(filemap_t *filemap)
{
file_entry_t *entry;
int i;
filemap_t *map = filemap;
map->total_size = 0;
map->fetch_size = 0;
filemap->total_size = 0;
filemap->fetch_size = 0;
for (i = 0; i < map->narray; i++)
for (i = 0; i < filemap->nentries; i++)
{
entry = map->array[i];
entry = filemap->entries[i];
if (entry->source_type != FILE_TYPE_REGULAR)
continue;
map->total_size += entry->source_size;
filemap->total_size += entry->source_size;
if (entry->action == FILE_ACTION_COPY)
{
map->fetch_size += entry->source_size;
filemap->fetch_size += entry->source_size;
continue;
}
if (entry->action == FILE_ACTION_COPY_TAIL)
map->fetch_size += (entry->source_size - entry->target_size);
filemap->fetch_size += (entry->source_size - entry->target_size);
if (entry->target_pages_to_overwrite.bitmapsize > 0)
{
......@@ -512,7 +466,7 @@ calculate_totals(void)
iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
while (datapagemap_next(iter, &blk))
map->fetch_size += BLCKSZ;
filemap->fetch_size += BLCKSZ;
pg_free(iter);
}
......@@ -520,15 +474,14 @@ calculate_totals(void)
}
void
print_filemap(void)
print_filemap(filemap_t *filemap)
{
filemap_t *map = filemap;
file_entry_t *entry;
int i;
for (i = 0; i < map->narray; i++)
for (i = 0; i < filemap->nentries; i++)
{
entry = map->array[i];
entry = filemap->entries[i];
if (entry->action != FILE_ACTION_NONE ||
entry->target_pages_to_overwrite.bitmapsize > 0)
{
......@@ -650,15 +603,6 @@ datasegpath(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
return path;
}
static int
path_cmp(const void *a, const void *b)
{
file_entry_t *fa = *((file_entry_t **) a);
file_entry_t *fb = *((file_entry_t **) b);
return strcmp(fa->path, fb->path);
}
/*
* In the final stage, the filemap is sorted so that removals come last.
* From disk space usage point of view, it would be better to do removals
......@@ -834,22 +778,52 @@ decide_file_action(file_entry_t *entry)
/*
* Decide what to do with each file.
*
* Returns a 'filemap' with the entries in the order that their actions
* should be executed.
*/
void
filemap_t *
decide_file_actions(void)
{
int i;
filehash_iterator it;
file_entry_t *entry;
filemap_t *filemap;
filemap_list_to_array(filemap);
for (i = 0; i < filemap->narray; i++)
filehash_start_iterate(filehash, &it);
while ((entry = filehash_iterate(filehash, &it)) != NULL)
{
file_entry_t *entry = filemap->array[i];
entry->action = decide_file_action(entry);
}
/* Sort the actions to the order that they should be performed */
qsort(filemap->array, filemap->narray, sizeof(file_entry_t *),
/*
* Turn the hash table into an array, and sort in the order that the
* actions should be performed.
*/
filemap = pg_malloc(offsetof(filemap_t, entries) +
filehash->members * sizeof(file_entry_t *));
filemap->nentries = filehash->members;
filehash_start_iterate(filehash, &it);
i = 0;
while ((entry = filehash_iterate(filehash, &it)) != NULL)
{
filemap->entries[i++] = entry;
}
qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
final_filemap_cmp);
return filemap;
}
/*
* Helper function for filemap hash table.
*/
static uint32
hash_string_pointer(const char *s)
{
unsigned char *ss = (unsigned char *) s;
return hash_bytes(ss, strlen(s));
}
......@@ -12,15 +12,6 @@
#include "storage/block.h"
#include "storage/relfilenode.h"
/*
* For every file found in the local or remote system, we have a file entry
* that contains information about the file on both systems. For relation
* files, there is also a page map that marks pages in the file that were
* changed in the target after the last common checkpoint. Each entry also
* contains an 'action' field, which says what we are going to do with the
* file.
*/
/* these enum values are sorted in the order we want actions to be processed */
typedef enum
{
......@@ -45,9 +36,21 @@ typedef enum
FILE_TYPE_SYMLINK
} file_type_t;
/*
* For every file found in the local or remote system, we have a file entry
* that contains information about the file on both systems. For relation
* files, there is also a page map that marks pages in the file that were
* changed in the target after the last common checkpoint.
*
* When gathering information, these are kept in a hash table, private to
* filemap.c. decide_file_actions() fills in the 'action' field, sorts all
* the entries, and returns them in an array, ready for executing the actions.
*/
typedef struct file_entry_t
{
char *path;
uint32 status; /* hash status */
const char *path;
bool isrelfile; /* is it a relation data file? */
/*
......@@ -76,44 +79,25 @@ typedef struct file_entry_t
* What will we do to the file?
*/
file_action_t action;
struct file_entry_t *next;
} file_entry_t;
/*
* This contains the final decisions on what to do with each file.
* 'entries' array contains an entry for each file, sorted in the order
* that their actions should executed.
*/
typedef struct filemap_t
{
/*
* New entries are accumulated to a linked list, in process_source_file
* and process_target_file.
*/
file_entry_t *first;
file_entry_t *last;
int nlist; /* number of entries currently in list */
/*
* After processing all the remote files, the entries in the linked list
* are moved to this array. After processing local files, too, all the
* local entries are added to the array by decide_file_actions(), and
* sorted in the final order. After decide_file_actions(), all the entries
* are in the array, and the linked list is empty.
*/
file_entry_t **array;
int narray; /* current length of array */
/*
* Summary information.
*/
/* Summary information, filled by calculate_totals() */
uint64 total_size; /* total size of the source cluster */
uint64 fetch_size; /* number of bytes that needs to be copied */
} filemap_t;
extern filemap_t *filemap;
extern void filemap_create(void);
extern void calculate_totals(void);
extern void print_filemap(void);
int nentries; /* size of 'entries' array */
file_entry_t *entries[FLEXIBLE_ARRAY_MEMBER];
} filemap_t;
/* Functions for populating the filemap */
extern void filehash_init(void);
extern void process_source_file(const char *path, file_type_t type,
size_t size, const char *link_target);
extern void process_target_file(const char *path, file_type_t type,
......@@ -121,6 +105,9 @@ extern void process_target_file(const char *path, file_type_t type,
extern void process_target_wal_block_change(ForkNumber forknum,
RelFileNode rnode,
BlockNumber blkno);
extern void decide_file_actions(void);
extern filemap_t *decide_file_actions(void);
extern void calculate_totals(filemap_t *filemap);
extern void print_filemap(filemap_t *filemap);
#endif /* FILEMAP_H */
......@@ -460,9 +460,9 @@ libpq_executeFileMap(filemap_t *map)
PQresultErrorMessage(res));
PQclear(res);
for (i = 0; i < map->narray; i++)
for (i = 0; i < map->nentries; i++)
{
entry = map->array[i];
entry = map->entries[i];
/* If this is a relation file, copy the modified blocks */
execute_pagemap(&entry->target_pages_to_overwrite, entry->path);
......
......@@ -129,6 +129,7 @@ main(int argc, char **argv)
TimeLineID endtli;
ControlFileData ControlFile_new;
bool writerecoveryconf = false;
filemap_t *filemap;
pg_logging_init(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
......@@ -368,13 +369,16 @@ main(int argc, char **argv)
(uint32) (chkptrec >> 32), (uint32) chkptrec,
chkpttli);
/* Initialize the hash table to track the status of each file */
filehash_init();
/*
* Collect information about all files in the target and source systems.
*/
filemap_create();
if (showprogress)
pg_log_info("reading source file list");
fetchSourceFileList();
if (showprogress)
pg_log_info("reading target file list");
traverse_datadir(datadir_target, &process_target_file);
......@@ -395,13 +399,13 @@ main(int argc, char **argv)
* We have collected all information we need from both systems. Decide
* what to do with each file.
*/
decide_file_actions();
filemap = decide_file_actions();
if (showprogress)
calculate_totals();
calculate_totals(filemap);
/* this is too verbose even for verbose mode */
if (debug)
print_filemap();
print_filemap(filemap);
/*
* Ok, we're ready to start copying things over.
......@@ -421,7 +425,7 @@ main(int argc, char **argv)
* modified the target directory and there is no turning back!
*/
executeFileMap();
execute_file_actions(filemap);
progress_report(true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment