tuptoaster.c 25 KB
Newer Older
1 2 3 4
/*-------------------------------------------------------------------------
 *
 * tuptoaster.c
 *	  Support routines for external and compressed storage of
5
 *	  variable size attributes.
6
 *
7
 * Copyright (c) 2000, PostgreSQL Global Development Group
8 9 10
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.23 2001/06/22 19:16:20 wieck Exp $
12 13 14 15 16 17 18
 *
 *
 * INTERFACE ROUTINES
 *		heap_tuple_toast_attrs -
 *			Try to make a given tuple fit into one page by compressing
 *			or moving off attributes
 *
Jan Wieck's avatar
TOAST  
Jan Wieck committed
19 20 21
 *		heap_tuple_untoast_attr -
 *			Fetch back a given value from the "secondary" relation
 *
22 23 24
 *-------------------------------------------------------------------------
 */

25 26
#include "postgres.h"

Jan Wieck's avatar
TOAST  
Jan Wieck committed
27 28
#include <unistd.h>
#include <fcntl.h>
29

Jan Wieck's avatar
TOAST  
Jan Wieck committed
30 31 32 33 34 35 36 37
#include "access/heapam.h"
#include "access/genam.h"
#include "access/tuptoaster.h"
#include "catalog/catalog.h"
#include "utils/rel.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/pg_lzcompress.h"
38 39 40


#ifdef TUPLE_TOASTER_ACTIVE
41

Jan Wieck's avatar
TOAST  
Jan Wieck committed
42 43
#undef TOAST_DEBUG

44 45 46 47 48 49
static void toast_delete(Relation rel, HeapTuple oldtup);
static void toast_delete_datum(Relation rel, Datum value);
static void toast_insert_or_update(Relation rel, HeapTuple newtup,
					   HeapTuple oldtup);
static Datum toast_save_datum(Relation rel, Oid mainoid, int16 attno, Datum value);
static varattrib *toast_fetch_datum(varattrib *attr);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
50

51

Jan Wieck's avatar
TOAST  
Jan Wieck committed
52 53 54 55 56 57 58 59
/* ----------
 * heap_tuple_toast_attrs -
 *
 *	This is the central public entry point for toasting from heapam.
 *
 *	Calls the appropriate event specific action.
 * ----------
 */
60
void
61
heap_tuple_toast_attrs(Relation rel, HeapTuple newtup, HeapTuple oldtup)
62
{
Jan Wieck's avatar
TOAST  
Jan Wieck committed
63 64 65 66 67
	if (newtup == NULL)
		toast_delete(rel, oldtup);
	else
		toast_insert_or_update(rel, newtup, oldtup);
}
68 69 70 71 72


/* ----------
 * heap_tuple_fetch_attr -
 *
73
 *	Public entry point to get back a toasted value
74 75 76
 *	external storage (possibly still in compressed format).
 * ----------
 */
77
varattrib  *
78
heap_tuple_fetch_attr(varattrib *attr)
79
{
80
	varattrib  *result;
81 82 83

	if (VARATT_IS_EXTERNAL(attr))
	{
84 85

		/*
86 87 88 89
		 * This is an external stored plain value
		 */
		result = toast_fetch_datum(attr);
	}
90
	else
91
	{
92 93 94 95

		/*
		 * This is a plain value inside of the main tuple - why am I
		 * called?
96 97
		 */
		result = attr;
98
	}
99 100

	return result;
101
}
Jan Wieck's avatar
TOAST  
Jan Wieck committed
102 103 104 105 106 107 108 109 110


/* ----------
 * heap_tuple_untoast_attr -
 *
 *	Public entry point to get back a toasted value from compression
 *	or external storage.
 * ----------
 */
111
varattrib  *
Jan Wieck's avatar
TOAST  
Jan Wieck committed
112 113
heap_tuple_untoast_attr(varattrib *attr)
{
114
	varattrib  *result;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
115 116 117 118 119 120 121 122 123 124

	if (VARATT_IS_EXTERNAL(attr))
	{
		if (VARATT_IS_COMPRESSED(attr))
		{
			/* ----------
			 * This is an external stored compressed value
			 * Fetch it from the toast heap and decompress.
			 * ----------
			 */
125
			varattrib  *tmp;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
126 127

			tmp = toast_fetch_datum(attr);
128 129
			result = (varattrib *) palloc(attr->va_content.va_external.va_rawsize
										  + VARHDRSZ);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
130
			VARATT_SIZEP(result) = attr->va_content.va_external.va_rawsize
131 132
				+ VARHDRSZ;
			pglz_decompress((PGLZ_Header *) tmp, VARATT_DATA(result));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
133 134 135 136 137

			pfree(tmp);
		}
		else
		{
138 139

			/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
140 141 142 143 144 145 146
			 * This is an external stored plain value
			 */
			result = toast_fetch_datum(attr);
		}
	}
	else if (VARATT_IS_COMPRESSED(attr))
	{
147 148

		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
149 150
		 * This is a compressed value inside of the main tuple
		 */
151 152
		result = (varattrib *) palloc(attr->va_content.va_compressed.va_rawsize
									  + VARHDRSZ);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
153
		VARATT_SIZEP(result) = attr->va_content.va_compressed.va_rawsize
154 155
			+ VARHDRSZ;
		pglz_decompress((PGLZ_Header *) attr, VARATT_DATA(result));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
156 157
	}
	else
158 159 160 161

		/*
		 * This is a plain value inside of the main tuple - why am I
		 * called?
Jan Wieck's avatar
TOAST  
Jan Wieck committed
162 163 164 165 166 167 168
		 */
		return attr;

	return result;
}


169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
/* ----------
 * toast_raw_datum_size -
 *
 *	Return the raw (detoasted) size of a varlena datum
 * ----------
 */
Size
toast_raw_datum_size(Datum value)
{
	varattrib  *attr = (varattrib *) DatumGetPointer(value);
	Size		result;

	if (VARATT_IS_COMPRESSED(attr))
	{
		/*
		 * va_rawsize shows the original data size, whether the datum
		 * is external or not.
		 */
		result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
	}
	else if (VARATT_IS_EXTERNAL(attr))
	{
		/*
		 * an uncompressed external attribute has rawsize including the
		 * header (not too consistent!)
		 */
		result = attr->va_content.va_external.va_rawsize;
	}
	else
	{
		/* plain untoasted datum */
		result = VARSIZE(attr);
	}
	return result;
}


Jan Wieck's avatar
TOAST  
Jan Wieck committed
206 207 208 209 210 211 212 213 214
/* ----------
 * toast_delete -
 *
 *	Cascaded delete toast-entries on DELETE
 * ----------
 */
static void
toast_delete(Relation rel, HeapTuple oldtup)
{
215 216 217 218 219 220
	TupleDesc	tupleDesc;
	Form_pg_attribute *att;
	int			numAttrs;
	int			i;
	Datum		value;
	bool		isnull;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
221

222 223
	/*
	 * Get the tuple descriptor, the number of and attribute descriptors.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
224
	 */
225 226 227
	tupleDesc = rel->rd_att;
	numAttrs = tupleDesc->natts;
	att = tupleDesc->attrs;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
228

229 230 231
	/*
	 * Check for external stored attributes and delete them from the
	 * secondary relation.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
232 233 234
	 */
	for (i = 0; i < numAttrs; i++)
	{
235 236 237 238
		if (att[i]->attlen == -1)
		{
			value = heap_getattr(oldtup, i + 1, tupleDesc, &isnull);
			if (!isnull && VARATT_IS_EXTERNAL(value))
Jan Wieck's avatar
TOAST  
Jan Wieck committed
239
				toast_delete_datum(rel, value);
240
		}
Jan Wieck's avatar
TOAST  
Jan Wieck committed
241 242 243 244 245 246 247
	}
}


/* ----------
 * toast_insert_or_update -
 *
248
 *	Delete no-longer-used toast-entries and create new ones to
Jan Wieck's avatar
TOAST  
Jan Wieck committed
249 250 251 252 253 254
 *	make the new tuple fit on INSERT or UPDATE
 * ----------
 */
static void
toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup)
{
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
	TupleDesc	tupleDesc;
	Form_pg_attribute *att;
	int			numAttrs;
	int			i;
	bool		old_isnull;
	bool		new_isnull;

	bool		need_change = false;
	bool		need_free = false;
	bool		need_delold = false;
	bool		has_nulls = false;

	Size		maxDataLen;

	char		toast_action[MaxHeapAttributeNumber];
	char		toast_nulls[MaxHeapAttributeNumber];
	Datum		toast_values[MaxHeapAttributeNumber];
	int32		toast_sizes[MaxHeapAttributeNumber];
	bool		toast_free[MaxHeapAttributeNumber];
	bool		toast_delold[MaxHeapAttributeNumber];
Jan Wieck's avatar
TOAST  
Jan Wieck committed
275

276 277 278
	/*
	 * Get the tuple descriptor, the number of and attribute descriptors
	 * and the location of the tuple values.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
279
	 */
280 281 282
	tupleDesc = rel->rd_att;
	numAttrs = tupleDesc->natts;
	att = tupleDesc->attrs;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
283 284 285

	/* ----------
	 * Then collect information about the values given
286 287 288 289 290
	 *
	 * NOTE: toast_action[i] can have these values:
	 *		' '		default handling
	 *		'p'		already processed --- don't touch it
	 *		'x'		incompressible, but OK to move off
Jan Wieck's avatar
TOAST  
Jan Wieck committed
291 292
	 * ----------
	 */
293 294 295 296
	memset(toast_action, ' ', numAttrs * sizeof(char));
	memset(toast_nulls, ' ', numAttrs * sizeof(char));
	memset(toast_free, 0, numAttrs * sizeof(bool));
	memset(toast_delold, 0, numAttrs * sizeof(bool));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
297 298
	for (i = 0; i < numAttrs; i++)
	{
299 300
		varattrib  *old_value;
		varattrib  *new_value;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
301 302 303

		if (oldtup != NULL)
		{
304 305

			/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
306 307
			 * For UPDATE get the old and new values of this attribute
			 */
308 309 310 311 312
			old_value = (varattrib *) DatumGetPointer(
					heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull));
			toast_values[i] =
				heap_getattr(newtup, i + 1, tupleDesc, &new_isnull);
			new_value = (varattrib *) DatumGetPointer(toast_values[i]);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
313

314 315 316
			/*
			 * If the old value is an external stored one, check if it has
			 * changed so we have to delete it later.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
317
			 */
318 319
			if (!old_isnull && att[i]->attlen == -1 &&
				VARATT_IS_EXTERNAL(old_value))
Jan Wieck's avatar
TOAST  
Jan Wieck committed
320 321
			{
				if (new_isnull || !VARATT_IS_EXTERNAL(new_value) ||
322 323 324 325
					old_value->va_content.va_external.va_rowid !=
					new_value->va_content.va_external.va_rowid ||
					old_value->va_content.va_external.va_attno !=
					new_value->va_content.va_external.va_attno)
Jan Wieck's avatar
TOAST  
Jan Wieck committed
326
				{
327 328 329 330

					/*
					 * The old external store value isn't needed any more
					 * after the update
Jan Wieck's avatar
TOAST  
Jan Wieck committed
331 332 333 334 335 336
					 */
					toast_delold[i] = true;
					need_delold = true;
				}
				else
				{
337 338 339 340 341

					/*
					 * This attribute isn't changed by this update so we
					 * reuse the original reference to the old value in
					 * the new tuple.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
342 343 344 345 346 347 348 349 350
					 */
					toast_action[i] = 'p';
					toast_sizes[i] = VARATT_SIZE(toast_values[i]);
					continue;
				}
			}
		}
		else
		{
351 352

			/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
353 354
			 * For INSERT simply get the new value
			 */
355 356
			toast_values[i] =
				heap_getattr(newtup, i + 1, tupleDesc, &new_isnull);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
357 358
		}

359
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
360 361 362 363 364 365 366 367 368 369
		 * Handle NULL attributes
		 */
		if (new_isnull)
		{
			toast_action[i] = 'p';
			toast_nulls[i] = 'n';
			has_nulls = true;
			continue;
		}

370
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
371 372 373 374
		 * Now look at varsize attributes
		 */
		if (att[i]->attlen == -1)
		{
375 376

			/*
377
			 * If the table's attribute says PLAIN always, force it so.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
378 379 380 381
			 */
			if (att[i]->attstorage == 'p')
				toast_action[i] = 'p';

382
			/*
383
			 * We took care of UPDATE above, so any external value we find
384 385
			 * still in the tuple must be someone else's we cannot reuse.
			 * Expand it to plain (and, probably, toast it again below).
Jan Wieck's avatar
TOAST  
Jan Wieck committed
386
			 */
387
			if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
Jan Wieck's avatar
TOAST  
Jan Wieck committed
388 389
			{
				toast_values[i] = PointerGetDatum(heap_tuple_untoast_attr(
390
						(varattrib *) DatumGetPointer(toast_values[i])));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
391 392 393 394 395
				toast_free[i] = true;
				need_change = true;
				need_free = true;
			}

396
			/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
397 398
			 * Remember the size of this attribute
			 */
399
			toast_sizes[i] = VARATT_SIZE(DatumGetPointer(toast_values[i]));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
400 401 402
		}
		else
		{
403 404

			/*
405
			 * Not a variable size attribute, plain storage always
Jan Wieck's avatar
TOAST  
Jan Wieck committed
406 407
			 */
			toast_action[i] = 'p';
408
			toast_sizes[i] = att[i]->attlen;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
409 410 411 412
		}
	}

	/* ----------
413
	 * Compress and/or save external until data fits into target length
Jan Wieck's avatar
TOAST  
Jan Wieck committed
414 415 416
	 *
	 *	1: Inline compress attributes with attstorage 'x'
	 *	2: Store attributes with attstorage 'x' or 'e' external
417
	 *	3: Inline compress attributes with attstorage 'm'
Jan Wieck's avatar
TOAST  
Jan Wieck committed
418 419 420 421 422 423
	 *	4: Store attributes with attstorage 'm' external
	 * ----------
	 */
	maxDataLen = offsetof(HeapTupleHeaderData, t_bits);
	if (has_nulls)
		maxDataLen += BITMAPLEN(numAttrs);
424
	maxDataLen = TOAST_TUPLE_TARGET - MAXALIGN(maxDataLen);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
425

426
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
427 428 429
	 * Look for attributes with attstorage 'x' to compress
	 */
	while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
430
		   maxDataLen)
Jan Wieck's avatar
TOAST  
Jan Wieck committed
431
	{
432 433 434 435
		int			biggest_attno = -1;
		int32		biggest_size = MAXALIGN(sizeof(varattrib));
		Datum		old_value;
		Datum		new_value;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
436

437
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
438 439 440 441
		 * Search for the biggest yet uncompressed internal attribute
		 */
		for (i = 0; i < numAttrs; i++)
		{
442
			if (toast_action[i] != ' ')
Jan Wieck's avatar
TOAST  
Jan Wieck committed
443 444 445 446 447 448 449 450
				continue;
			if (VARATT_IS_EXTENDED(toast_values[i]))
				continue;
			if (att[i]->attstorage != 'x')
				continue;
			if (toast_sizes[i] > biggest_size)
			{
				biggest_attno = i;
451
				biggest_size = toast_sizes[i];
Jan Wieck's avatar
TOAST  
Jan Wieck committed
452 453 454 455 456 457
			}
		}

		if (biggest_attno < 0)
			break;

458
		/*
459
		 * Attempt to compress it inline
Jan Wieck's avatar
TOAST  
Jan Wieck committed
460
		 */
461 462 463
		i = biggest_attno;
		old_value = toast_values[i];
		new_value = toast_compress_datum(old_value);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
464

465 466 467 468 469
		if (DatumGetPointer(new_value) != NULL)
		{
			/* successful compression */
			if (toast_free[i])
				pfree(DatumGetPointer(old_value));
470 471 472 473 474
			toast_values[i] = new_value;
			toast_free[i] = true;
			toast_sizes[i] = VARATT_SIZE(toast_values[i]);
			need_change = true;
			need_free = true;
475 476 477
		}
		else
		{
478 479 480 481 482

			/*
			 * incompressible data, ignore on subsequent compression
			 * passes
			 */
483 484
			toast_action[i] = 'x';
		}
Jan Wieck's avatar
TOAST  
Jan Wieck committed
485 486
	}

487 488 489
	/*
	 * Second we look for attributes of attstorage 'x' or 'e' that are
	 * still inline.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
490 491
	 */
	while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
492
		   maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
Jan Wieck's avatar
TOAST  
Jan Wieck committed
493
	{
494 495 496
		int			biggest_attno = -1;
		int32		biggest_size = MAXALIGN(sizeof(varattrib));
		Datum		old_value;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
497

498 499 500 501
		/*------
		 * Search for the biggest yet inlined attribute with
		 * attstorage equals 'x' or 'e'
		 *------
Jan Wieck's avatar
TOAST  
Jan Wieck committed
502 503 504 505 506 507 508 509 510 511 512 513
		 */
		for (i = 0; i < numAttrs; i++)
		{
			if (toast_action[i] == 'p')
				continue;
			if (VARATT_IS_EXTERNAL(toast_values[i]))
				continue;
			if (att[i]->attstorage != 'x' && att[i]->attstorage != 'e')
				continue;
			if (toast_sizes[i] > biggest_size)
			{
				biggest_attno = i;
514
				biggest_size = toast_sizes[i];
Jan Wieck's avatar
TOAST  
Jan Wieck committed
515 516 517 518 519 520
			}
		}

		if (biggest_attno < 0)
			break;

521
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
522 523
		 * Store this external
		 */
524 525 526 527 528 529 530
		i = biggest_attno;
		old_value = toast_values[i];
		toast_action[i] = 'p';
		toast_values[i] = toast_save_datum(rel,
										   newtup->t_data->t_oid,
										   i + 1,
										   toast_values[i]);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
531 532 533
		if (toast_free[i])
			pfree(DatumGetPointer(old_value));

534 535
		toast_free[i] = true;
		toast_sizes[i] = VARATT_SIZE(toast_values[i]);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
536 537

		need_change = true;
538
		need_free = true;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
539 540
	}

541 542 543
	/*
	 * Round 3 - this time we take attributes with storage 'm' into
	 * compression
Jan Wieck's avatar
TOAST  
Jan Wieck committed
544 545
	 */
	while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
546
		   maxDataLen)
Jan Wieck's avatar
TOAST  
Jan Wieck committed
547
	{
548 549 550 551
		int			biggest_attno = -1;
		int32		biggest_size = MAXALIGN(sizeof(varattrib));
		Datum		old_value;
		Datum		new_value;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
552

553
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
554 555 556 557
		 * Search for the biggest yet uncompressed internal attribute
		 */
		for (i = 0; i < numAttrs; i++)
		{
558
			if (toast_action[i] != ' ')
Jan Wieck's avatar
TOAST  
Jan Wieck committed
559 560 561 562 563 564 565 566
				continue;
			if (VARATT_IS_EXTENDED(toast_values[i]))
				continue;
			if (att[i]->attstorage != 'm')
				continue;
			if (toast_sizes[i] > biggest_size)
			{
				biggest_attno = i;
567
				biggest_size = toast_sizes[i];
Jan Wieck's avatar
TOAST  
Jan Wieck committed
568 569 570 571 572 573
			}
		}

		if (biggest_attno < 0)
			break;

574
		/*
575
		 * Attempt to compress it inline
Jan Wieck's avatar
TOAST  
Jan Wieck committed
576
		 */
577 578 579
		i = biggest_attno;
		old_value = toast_values[i];
		new_value = toast_compress_datum(old_value);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
580

581 582 583 584 585
		if (DatumGetPointer(new_value) != NULL)
		{
			/* successful compression */
			if (toast_free[i])
				pfree(DatumGetPointer(old_value));
586 587 588 589 590
			toast_values[i] = new_value;
			toast_free[i] = true;
			toast_sizes[i] = VARATT_SIZE(toast_values[i]);
			need_change = true;
			need_free = true;
591 592 593
		}
		else
		{
594 595 596 597 598

			/*
			 * incompressible data, ignore on subsequent compression
			 * passes
			 */
599 600
			toast_action[i] = 'x';
		}
Jan Wieck's avatar
TOAST  
Jan Wieck committed
601 602
	}

603
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
604 605 606
	 * Finally we store attributes of type 'm' external
	 */
	while (MAXALIGN(ComputeDataSize(tupleDesc, toast_values, toast_nulls)) >
607
		   maxDataLen && rel->rd_rel->reltoastrelid != InvalidOid)
Jan Wieck's avatar
TOAST  
Jan Wieck committed
608
	{
609 610 611
		int			biggest_attno = -1;
		int32		biggest_size = MAXALIGN(sizeof(varattrib));
		Datum		old_value;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
612

613 614 615 616
		/*--------
		 * Search for the biggest yet inlined attribute with
		 * attstorage = 'm'
		 *--------
Jan Wieck's avatar
TOAST  
Jan Wieck committed
617 618 619 620 621 622 623 624 625 626 627 628
		 */
		for (i = 0; i < numAttrs; i++)
		{
			if (toast_action[i] == 'p')
				continue;
			if (VARATT_IS_EXTERNAL(toast_values[i]))
				continue;
			if (att[i]->attstorage != 'm')
				continue;
			if (toast_sizes[i] > biggest_size)
			{
				biggest_attno = i;
629
				biggest_size = toast_sizes[i];
Jan Wieck's avatar
TOAST  
Jan Wieck committed
630 631 632 633 634 635
			}
		}

		if (biggest_attno < 0)
			break;

636
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
637 638
		 * Store this external
		 */
639 640 641 642 643 644 645
		i = biggest_attno;
		old_value = toast_values[i];
		toast_action[i] = 'p';
		toast_values[i] = toast_save_datum(rel,
										   newtup->t_data->t_oid,
										   i + 1,
										   toast_values[i]);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
646 647 648
		if (toast_free[i])
			pfree(DatumGetPointer(old_value));

649 650
		toast_free[i] = true;
		toast_sizes[i] = VARATT_SIZE(toast_values[i]);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
651 652

		need_change = true;
653
		need_free = true;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
654 655
	}

656 657 658
	/*
	 * In the case we toasted any values, we need to build a new heap
	 * tuple with the changed values.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
659 660 661
	 */
	if (need_change)
	{
662 663 664 665
		char	   *new_data;
		int32		new_len;
		MemoryContext oldcxt;
		HeapTupleHeader olddata;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
666

667
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
668 669 670 671 672 673 674 675
		 * Calculate the new size of the tuple
		 */
		new_len = offsetof(HeapTupleHeaderData, t_bits);
		if (has_nulls)
			new_len += BITMAPLEN(numAttrs);
		new_len = MAXALIGN(new_len);
		new_len += ComputeDataSize(tupleDesc, toast_values, toast_nulls);

676
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
677
		 * Remember the old memory location of the tuple (for below),
678 679
		 * switch to the memory context of the HeapTuple structure and
		 * allocate the new tuple.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
680 681 682 683 684
		 */
		olddata = newtup->t_data;
		oldcxt = MemoryContextSwitchTo(newtup->t_datamcxt);
		new_data = palloc(new_len);

685
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
686 687 688
		 * Put the tuple header and the changed values into place
		 */
		memcpy(new_data, newtup->t_data, newtup->t_data->t_hoff);
689
		newtup->t_data = (HeapTupleHeader) new_data;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
690 691
		newtup->t_len = new_len;

692 693 694 695 696 697 698 699
		DataFill((char *) (MAXALIGN((long) new_data +
								  offsetof(HeapTupleHeaderData, t_bits) +
							   ((has_nulls) ? BITMAPLEN(numAttrs) : 0))),
				 tupleDesc,
				 toast_values,
				 toast_nulls,
				 &(newtup->t_data->t_infomask),
				 has_nulls ? newtup->t_data->t_bits : NULL);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
700

701 702 703
		/*
		 * In the case we modified a previously modified tuple again, free
		 * the memory from the previous run
Jan Wieck's avatar
TOAST  
Jan Wieck committed
704
		 */
705
		if ((char *) olddata != ((char *) newtup + HEAPTUPLESIZE))
Jan Wieck's avatar
TOAST  
Jan Wieck committed
706 707
			pfree(olddata);

708
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
709 710 711 712 713
		 * Switch back to the old memory context
		 */
		MemoryContextSwitchTo(oldcxt);
	}

714
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
715 716 717 718 719 720 721
	 * Free allocated temp values
	 */
	if (need_free)
		for (i = 0; i < numAttrs; i++)
			if (toast_free[i])
				pfree(DatumGetPointer(toast_values[i]));

722
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
723 724 725 726 727 728 729 730 731 732 733 734 735
	 * Delete external values from the old tuple
	 */
	if (need_delold)
		for (i = 0; i < numAttrs; i++)
			if (toast_delold[i])
				toast_delete_datum(rel,
					heap_getattr(oldtup, i + 1, tupleDesc, &old_isnull));
}


/* ----------
 * toast_compress_datum -
 *
736
 *	Create a compressed version of a varlena datum
737 738 739 740
 *
 *	If we fail (ie, compressed result is actually bigger than original)
 *	then return NULL.  We must not use compressed data if it'd expand
 *	the tuple!
Jan Wieck's avatar
TOAST  
Jan Wieck committed
741 742
 * ----------
 */
743
Datum
Jan Wieck's avatar
TOAST  
Jan Wieck committed
744 745
toast_compress_datum(Datum value)
{
746
	varattrib  *tmp;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
747

748
	tmp = (varattrib *) palloc(sizeof(PGLZ_Header) + VARATT_SIZE(value));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
749
	pglz_compress(VARATT_DATA(value), VARATT_SIZE(value) - VARHDRSZ,
750 751 752 753 754 755 756 757 758 759 760 761 762 763
				  (PGLZ_Header *) tmp,
				  PGLZ_strategy_default);
	if (VARATT_SIZE(tmp) < VARATT_SIZE(value))
	{
		/* successful compression */
		VARATT_SIZEP(tmp) |= VARATT_FLAG_COMPRESSED;
		return PointerGetDatum(tmp);
	}
	else
	{
		/* incompressible data */
		pfree(tmp);
		return PointerGetDatum(NULL);
	}
Jan Wieck's avatar
TOAST  
Jan Wieck committed
764 765 766 767 768 769 770 771 772 773 774 775 776
}


/* ----------
 * toast_save_datum -
 *
 *	Save one single datum into the secondary relation and return
 *	a varattrib reference for it.
 * ----------
 */
static Datum
toast_save_datum(Relation rel, Oid mainoid, int16 attno, Datum value)
{
777 778 779 780 781 782 783 784
	Relation	toastrel;
	Relation	toastidx;
	HeapTuple	toasttup;
	InsertIndexResult idxres;
	TupleDesc	toasttupDesc;
	Datum		t_values[3];
	char		t_nulls[3];
	varattrib  *result;
785 786 787 788
	struct {
		struct varlena	hdr;
		char			data[TOAST_MAX_CHUNK_SIZE];
	}			chunk_data;
789 790 791 792
	int32		chunk_size;
	int32		chunk_seq = 0;
	char	   *data_p;
	int32		data_todo;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
793

794
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
795 796
	 * Create the varattrib reference
	 */
797
	result = (varattrib *) palloc(sizeof(varattrib));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
798

799
	result->va_header = sizeof(varattrib) | VARATT_FLAG_EXTERNAL;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
800 801 802
	if (VARATT_IS_COMPRESSED(value))
	{
		result->va_header |= VARATT_FLAG_COMPRESSED;
803 804
		result->va_content.va_external.va_rawsize =
			((varattrib *) value)->va_content.va_compressed.va_rawsize;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
805 806 807
	}
	else
		result->va_content.va_external.va_rawsize = VARATT_SIZE(value);
808 809 810 811 812 813 814 815 816 817

	result->va_content.va_external.va_extsize =
		VARATT_SIZE(value) - VARHDRSZ;
	result->va_content.va_external.va_valueid = newoid();
	result->va_content.va_external.va_toastrelid =
		rel->rd_rel->reltoastrelid;
	result->va_content.va_external.va_toastidxid =
		rel->rd_rel->reltoastidxid;
	result->va_content.va_external.va_rowid = mainoid;
	result->va_content.va_external.va_attno = attno;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
818

819
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
820 821 822
	 * Initialize constant parts of the tuple data
	 */
	t_values[0] = ObjectIdGetDatum(result->va_content.va_external.va_valueid);
823
	t_values[2] = PointerGetDatum(&chunk_data);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
824 825 826 827
	t_nulls[0] = ' ';
	t_nulls[1] = ' ';
	t_nulls[2] = ' ';

828
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
829 830
	 * Get the data to process
	 */
831 832
	data_p = VARATT_DATA(value);
	data_todo = VARATT_SIZE(value) - VARHDRSZ;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
833

834
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
835 836 837 838 839
	 * Open the toast relation
	 */
	toastrel = heap_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
	toasttupDesc = toastrel->rd_att;
	toastidx = index_open(rel->rd_rel->reltoastidxid);
840

841
	/*
842
	 * Split up the item into chunks
Jan Wieck's avatar
TOAST  
Jan Wieck committed
843 844 845
	 */
	while (data_todo > 0)
	{
846 847

		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
848 849
		 * Calculate the size of this chunk
		 */
850
		chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
851

852
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
853 854
		 * Build a tuple
		 */
855
		t_values[1] = Int32GetDatum(chunk_seq++);
856 857
		VARATT_SIZEP(&chunk_data) = chunk_size + VARHDRSZ;
		memcpy(VARATT_DATA(&chunk_data), data_p, chunk_size);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
858 859 860 861
		toasttup = heap_formtuple(toasttupDesc, t_values, t_nulls);
		if (!HeapTupleIsValid(toasttup))
			elog(ERROR, "Failed to build TOAST tuple");

862
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
863 864 865 866
		 * Store it and create the index entry
		 */
		heap_insert(toastrel, toasttup);
		idxres = index_insert(toastidx, t_values, t_nulls,
867 868
							  &(toasttup->t_self),
							  toastrel);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
869 870 871
		if (idxres == NULL)
			elog(ERROR, "Failed to insert index entry for TOAST tuple");

872
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
873 874 875 876 877
		 * Free memory
		 */
		heap_freetuple(toasttup);
		pfree(idxres);

878
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
879 880 881 882 883 884
		 * Move on to next chunk
		 */
		data_todo -= chunk_size;
		data_p += chunk_size;
	}

885
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903
	 * Done - close toast relation and return the reference
	 */
	index_close(toastidx);
	heap_close(toastrel, RowExclusiveLock);

	return PointerGetDatum(result);
}


/* ----------
 * toast_delete_datum -
 *
 *	Delete a single external stored value.
 * ----------
 */
static void
toast_delete_datum(Relation rel, Datum value)
{
904 905 906 907 908 909 910 911
	register varattrib *attr = (varattrib *) value;
	Relation	toastrel;
	Relation	toastidx;
	ScanKeyData toastkey;
	IndexScanDesc toastscan;
	HeapTupleData toasttup;
	RetrieveIndexResult indexRes;
	Buffer		buffer;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
912 913 914 915

	if (!VARATT_IS_EXTERNAL(attr))
		return;

916
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
917 918
	 * Open the toast relation and it's index
	 */
919 920
	toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
						 RowExclusiveLock);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
921 922
	toastidx = index_open(attr->va_content.va_external.va_toastidxid);

923
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
924 925 926
	 * Setup a scan key to fetch from the index by va_valueid
	 */
	ScanKeyEntryInitialize(&toastkey,
927 928 929 930
						   (bits16) 0,
						   (AttrNumber) 1,
						   (RegProcedure) F_OIDEQ,
			  ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
931

932
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
933 934 935 936 937 938
	 * Read the chunks by index
	 */
	toastscan = index_beginscan(toastidx, false, 1, &toastkey);
	while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
	{
		toasttup.t_self = indexRes->heap_iptr;
939
		heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
940 941 942 943 944
		pfree(indexRes);

		if (!toasttup.t_data)
			continue;

945
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
946 947
		 * Have a chunk, delete it
		 */
948
		simple_heap_delete(toastrel, &toasttup.t_self);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
949 950 951 952

		ReleaseBuffer(buffer);
	}

953
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
954 955 956 957 958
	 * End scan and close relations
	 */
	index_endscan(toastscan);
	index_close(toastidx);
	heap_close(toastrel, RowExclusiveLock);
959 960 961
}


Jan Wieck's avatar
TOAST  
Jan Wieck committed
962 963 964 965 966 967 968 969 970
/* ----------
 * toast_fetch_datum -
 *
 *	Reconstruct an in memory varattrib from the chunks saved
 *	in the toast relation
 * ----------
 */
static varattrib *
toast_fetch_datum(varattrib *attr)
971
{
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
	Relation	toastrel;
	Relation	toastidx;
	ScanKeyData toastkey;
	IndexScanDesc toastscan;
	HeapTupleData toasttup;
	HeapTuple	ttup;
	TupleDesc	toasttupDesc;
	RetrieveIndexResult indexRes;
	Buffer		buffer;

	varattrib  *result;
	int32		ressize;
	int32		residx;
	int			numchunks;
	Pointer		chunk;
	bool		isnull;
	int32		chunksize;

	char	   *chunks_found;
	char	   *chunks_expected;
992

Jan Wieck's avatar
TOAST  
Jan Wieck committed
993
	ressize = attr->va_content.va_external.va_extsize;
994
	numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
995

996
	chunks_found = palloc(numchunks);
997
	chunks_expected = palloc(numchunks);
998
	memset(chunks_found, 0, numchunks);
999 1000
	memset(chunks_expected, 1, numchunks);

1001
	result = (varattrib *) palloc(ressize + VARHDRSZ);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1002 1003 1004 1005
	VARATT_SIZEP(result) = ressize + VARHDRSZ;
	if (VARATT_IS_COMPRESSED(attr))
		VARATT_SIZEP(result) |= VARATT_FLAG_COMPRESSED;

1006
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1007 1008
	 * Open the toast relation and it's index
	 */
1009 1010
	toastrel = heap_open(attr->va_content.va_external.va_toastrelid,
						 AccessShareLock);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1011 1012 1013
	toasttupDesc = toastrel->rd_att;
	toastidx = index_open(attr->va_content.va_external.va_toastidxid);

1014
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1015 1016 1017
	 * Setup a scan key to fetch from the index by va_valueid
	 */
	ScanKeyEntryInitialize(&toastkey,
1018 1019 1020 1021
						   (bits16) 0,
						   (AttrNumber) 1,
						   (RegProcedure) F_OIDEQ,
			  ObjectIdGetDatum(attr->va_content.va_external.va_valueid));
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1022

1023
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1024
	 * Read the chunks by index
1025 1026
	 *
	 * Note we will not necessarily see the chunks in sequence-number order.
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1027 1028 1029 1030 1031
	 */
	toastscan = index_beginscan(toastidx, false, 1, &toastkey);
	while ((indexRes = index_getnext(toastscan, ForwardScanDirection)) != NULL)
	{
		toasttup.t_self = indexRes->heap_iptr;
1032
		heap_fetch(toastrel, SnapshotAny, &toasttup, &buffer, toastscan);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1033 1034
		pfree(indexRes);

1035
		if (toasttup.t_data == NULL)
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1036 1037 1038
			continue;
		ttup = &toasttup;

1039
		/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1040 1041
		 * Have a chunk, extract the sequence number and the data
		 */
1042 1043 1044 1045 1046
		residx = DatumGetInt32(heap_getattr(ttup, 2, toasttupDesc, &isnull));
		Assert(!isnull);
		chunk = DatumGetPointer(heap_getattr(ttup, 3, toasttupDesc, &isnull));
		Assert(!isnull);
		chunksize = VARATT_SIZE(chunk) - VARHDRSZ;
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1047

1048
		/*
1049 1050
		 * Some checks on the data we've found
		 */
1051 1052 1053 1054
		if (residx < 0 || residx >= numchunks)
			elog(ERROR, "unexpected chunk number %d for toast value %d",
				 residx,
				 attr->va_content.va_external.va_valueid);
1055
		if (residx < numchunks - 1)
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
		{
			if (chunksize != TOAST_MAX_CHUNK_SIZE)
				elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %d",
					 chunksize, residx,
					 attr->va_content.va_external.va_valueid);
		}
		else
		{
			if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
				elog(ERROR, "unexpected chunk size %d in chunk %d for toast value %d",
					 chunksize, residx,
					 attr->va_content.va_external.va_valueid);
		}
1069 1070
		if (chunks_found[residx]++ > 0)
			elog(ERROR, "chunk %d for toast value %d appears multiple times",
1071 1072
				 residx,
				 attr->va_content.va_external.va_valueid);
1073

1074
		/*
1075
		 * Copy the data into proper place in our result
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1076
		 */
1077
		memcpy(((char *) VARATT_DATA(result)) + residx * TOAST_MAX_CHUNK_SIZE,
1078 1079
			   VARATT_DATA(chunk),
			   chunksize);
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1080 1081 1082 1083

		ReleaseBuffer(buffer);
	}

1084
	/*
1085 1086 1087 1088
	 * Final checks that we successfully fetched the datum
	 */
	if (memcmp(chunks_found, chunks_expected, numchunks) != 0)
		elog(ERROR, "not all toast chunks found for value %d",
1089
			 attr->va_content.va_external.va_valueid);
1090 1091 1092
	pfree(chunks_expected);
	pfree(chunks_found);

1093
	/*
Jan Wieck's avatar
TOAST  
Jan Wieck committed
1094 1095 1096 1097 1098 1099 1100
	 * End scan and close relations
	 */
	index_endscan(toastscan);
	index_close(toastidx);
	heap_close(toastrel, AccessShareLock);

	return result;
1101 1102 1103
}


1104
#endif	 /* TUPLE_TOASTER_ACTIVE */