Commit 94aceed3 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Support multi-dimensional arrays in PL/python.

Multi-dimensional arrays can now be used as arguments to a PL/python function
(used to throw an error), and they can be returned as nested Python lists.

This makes a backwards-incompatible change to the handling of composite
types in arrays. Previously, you could return an array of composite types
as "[[col1, col2], [col1, col2]]", but now that is interpreted as a two-
dimensional array. Composite types in arrays must now be returned as
Python tuples, not lists, to resolve the ambiguity. I.e. "[(col1, col2),
(col1, col2)]".

To avoid breaking backwards-compatibility, when not necessary, () is still
accepted for arrays at the top-level, but it is always treated as a
single-dimensional array. Likewise, [] is still accepted for composite types,
when they are not in an array. Update the documentation to recommend using []
for arrays, and () for composite types, with a mention that those other things
are also accepted in some contexts.

This needs to be mentioned in the release notes.

Alexey Grishchenko, Dave Cramer and me. Reviewed by Pavel Stehule.

Discussion: <CAH38_tmbqwaUyKs9yagyRra=SMaT45FPBxk1pmTYcM0TyXGG7Q@mail.gmail.com>
parent 8c035e55
......@@ -451,13 +451,13 @@ $$ LANGUAGE plpythonu;
<para>
SQL array values are passed into PL/Python as a Python list. To
return an SQL array value out of a PL/Python function, return a
Python sequence, for example a list or tuple:
Python list:
<programlisting>
CREATE FUNCTION return_arr()
RETURNS int[]
AS $$
return (1, 2, 3, 4, 5)
return [1, 2, 3, 4, 5]
$$ LANGUAGE plpythonu;
SELECT return_arr();
......@@ -467,6 +467,34 @@ SELECT return_arr();
(1 row)
</programlisting>
Multidimensional arrays are passed into PL/Python as nested Python lists.
A 2-dimensional array is a list of lists, for example. When returning
a multi-dimensional SQL array out of a PL/Python function, the inner
lists at each level must all be of the same size. For example:
<programlisting>
CREATE FUNCTION test_type_conversion_array_int4(x int4[]) RETURNS int4[] AS $$
plpy.info(x, type(x))
return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_int4(ARRAY[[1,2,3],[4,5,6]]);
INFO: ([[1, 2, 3], [4, 5, 6]], &lt;type 'list'&gt;)
test_type_conversion_array_int4
---------------------------------
{{1,2,3},{4,5,6}}
(1 row)
</programlisting>
Other Python sequences, like tuples, are also accepted for
backwards-compatibility with PostgreSQL versions 9.6 and below, when
multi-dimensional arrays were not supported. However, they are always
treated as one-dimensional arrays, because they are ambiguous with
composite types. For the same reason, when a composite type is used in a
multi-dimensional array, it must be represented by a tuple, rather than a
list.
</para>
<para>
Note that in Python, strings are sequences, which can have
undesirable effects that might be familiar to Python programmers:
......@@ -541,14 +569,19 @@ CREATE TYPE named_value AS (
CREATE FUNCTION make_pair (name text, value integer)
RETURNS named_value
AS $$
return [ name, value ]
# or alternatively, as tuple: return ( name, value )
return ( name, value )
# or alternatively, as tuple: return [ name, value ]
$$ LANGUAGE plpythonu;
</programlisting>
To return a SQL null for any column, insert <symbol>None</symbol> at
the corresponding position.
</para>
<para>
When an array of composite types is returned, it cannot be returned as a list,
because it is ambiguous whether the Python list represents a composite type,
or another array dimension.
</para>
</listitem>
</varlistentry>
......
......@@ -465,13 +465,13 @@ SELECT * FROM changing_test();
-- tables of composite types
CREATE FUNCTION composite_types_table(OUT tab table_record[], OUT typ type_record[] ) RETURNS SETOF record AS $$
yield {'tab': [['first', 1], ['second', 2]],
yield {'tab': [('first', 1), ('second', 2)],
'typ': [{'first': 'third', 'second': 3},
{'first': 'fourth', 'second': 4}]}
yield {'tab': [['first', 1], ['second', 2]],
yield {'tab': [('first', 1), ('second', 2)],
'typ': [{'first': 'third', 'second': 3},
{'first': 'fourth', 'second': 4}]}
yield {'tab': [['first', 1], ['second', 2]],
yield {'tab': [('first', 1), ('second', 2)],
'typ': [{'first': 'third', 'second': 3},
{'first': 'fourth', 'second': 4}]}
$$ LANGUAGE plpythonu;
......@@ -569,3 +569,13 @@ SELECT * FROM return_record_2('v3') AS (v1 int, v2 int, v3 int);
1 | 2 | 3
(1 row)
-- multi-dimensional array of composite types.
CREATE FUNCTION composite_type_as_list() RETURNS type_record[] AS $$
return [[('first', 1), ('second', 1)], [('first', 2), ('second', 2)], [('first', 3), ('second', 3)]];
$$ LANGUAGE plpython3u;
SELECT * FROM composite_type_as_list();
composite_type_as_list
------------------------------------------------------------------------------------
{{"(first,1)","(second,1)"},{"(first,2)","(second,2)"},{"(first,3)","(second,3)"}}
(1 row)
This diff is collapsed.
......@@ -45,6 +45,8 @@ static PyObject *PLyBytes_FromBytea(PLyDatumToOb *arg, Datum d);
static PyObject *PLyString_FromDatum(PLyDatumToOb *arg, Datum d);
static PyObject *PLyObject_FromTransform(PLyDatumToOb *arg, Datum d);
static PyObject *PLyList_FromArray(PLyDatumToOb *arg, Datum d);
static PyObject *PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndim, int dim,
char **dataptr_p, bits8 **bitmap_p, int *bitmask_p);
/* conversion from Python objects to Datums */
static Datum PLyObject_ToBool(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
......@@ -53,6 +55,9 @@ static Datum PLyObject_ToComposite(PLyObToDatum *arg, int32 typmod, PyObject *pl
static Datum PLyObject_ToDatum(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
static Datum PLyObject_ToTransform(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
static Datum PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv);
static void PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
int *dims, int ndim, int dim,
Datum *elems, bool *nulls, int *currelem);
/* conversion from Python objects to composite Datums (used by triggers and SRFs) */
static Datum PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string);
......@@ -631,43 +636,104 @@ PLyList_FromArray(PLyDatumToOb *arg, Datum d)
{
ArrayType *array = DatumGetArrayTypeP(d);
PLyDatumToOb *elm = arg->elm;
PyObject *list;
int length;
int lbound;
int i;
int ndim;
int *dims;
char *dataptr;
bits8 *bitmap;
int bitmask;
if (ARR_NDIM(array) == 0)
return PyList_New(0);
if (ARR_NDIM(array) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot convert multidimensional array to Python list"),
errdetail("PL/Python only supports one-dimensional arrays.")));
/* Array dimensions and left bounds */
ndim = ARR_NDIM(array);
dims = ARR_DIMS(array);
Assert(ndim < MAXDIM);
/*
* We iterate the SQL array in the physical order it's stored in the
* datum. For example, for a 3-dimensional array the order of iteration would
* be the following: [0,0,0] elements through [0,0,k], then [0,1,0] through
* [0,1,k] till [0,m,k], then [1,0,0] through [1,0,k] till [1,m,k], and so on.
*
* In Python, there are no multi-dimensional lists as such, but they are
* represented as a list of lists. So a 3-d array of [n,m,k] elements is a
* list of n m-element arrays, each element of which is k-element array.
* PLyList_FromArray_recurse() builds the Python list for a single
* dimension, and recurses for the next inner dimension.
*/
dataptr = ARR_DATA_PTR(array);
bitmap = ARR_NULLBITMAP(array);
bitmask = 1;
return PLyList_FromArray_recurse(elm, dims, ndim, 0,
&dataptr, &bitmap, &bitmask);
}
static PyObject *
PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndim, int dim,
char **dataptr_p, bits8 **bitmap_p, int *bitmask_p)
{
int i;
PyObject *list;
length = ARR_DIMS(array)[0];
lbound = ARR_LBOUND(array)[0];
list = PyList_New(length);
if (list == NULL)
PLy_elog(ERROR, "could not create new Python list");
list = PyList_New(dims[dim]);
for (i = 0; i < length; i++)
if (dim < ndim - 1)
{
Datum elem;
bool isnull;
int offset;
offset = lbound + i;
elem = array_ref(array, 1, &offset, arg->typlen,
elm->typlen, elm->typbyval, elm->typalign,
&isnull);
if (isnull)
/* Outer dimension. Recurse for each inner slice. */
for (i = 0; i < dims[dim]; i++)
{
Py_INCREF(Py_None);
PyList_SET_ITEM(list, i, Py_None);
PyObject *sublist;
sublist = PLyList_FromArray_recurse(elm, dims, ndim, dim + 1,
dataptr_p, bitmap_p, bitmask_p);
PyList_SET_ITEM(list, i, sublist);
}
else
PyList_SET_ITEM(list, i, elm->func(elm, elem));
}
else
{
/*
* Innermost dimension. Fill the list with the values from the array
* for this slice.
*/
char *dataptr = *dataptr_p;
bits8 *bitmap = *bitmap_p;
int bitmask = *bitmask_p;
for (i = 0; i < dims[dim]; i++)
{
/* checking for NULL */
if (bitmap && (*bitmap & bitmask) == 0)
{
Py_INCREF(Py_None);
PyList_SET_ITEM(list, i, Py_None);
}
else
{
Datum itemvalue;
itemvalue = fetch_att(dataptr, elm->typbyval, elm->typlen);
PyList_SET_ITEM(list, i, elm->func(elm, itemvalue));
dataptr = att_addlength_pointer(dataptr, elm->typlen, dataptr);
dataptr = (char *) att_align_nominal(dataptr, elm->typalign);
}
/* advance bitmap pointer if any */
if (bitmap)
{
bitmask <<= 1;
if (bitmask == 0x100 /* (1<<8) */ )
{
bitmap++;
bitmask = 1;
}
}
}
*dataptr_p = dataptr;
*bitmap_p = bitmap;
*bitmask_p = bitmask;
}
return list;
......@@ -864,39 +930,108 @@ static Datum
PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
{
ArrayType *array;
Datum rv;
int i;
Datum *elems;
bool *nulls;
int len;
int lbs;
int64 len;
int ndim;
int dims[MAXDIM];
int lbs[MAXDIM];
int currelem;
Datum rv;
PyObject *pyptr = plrv;
PyObject *next;
Assert(plrv != Py_None);
if (!PySequence_Check(plrv))
PLy_elog(ERROR, "return value of function with array return type is not a Python sequence");
/*
* Determine the number of dimensions, and their sizes.
*/
ndim = 0;
len = 1;
len = PySequence_Length(plrv);
elems = palloc(sizeof(*elems) * len);
nulls = palloc(sizeof(*nulls) * len);
Py_INCREF(plrv);
for (i = 0; i < len; i++)
for (;;)
{
PyObject *obj = PySequence_GetItem(plrv, i);
if (!PySequence_Check(pyptr))
break;
if (obj == Py_None)
nulls[i] = true;
else
/* composite type */
if (PyTuple_Check(pyptr))
break;
/* string */
if (PyString_Check(pyptr) || PyBytes_Check(pyptr) || PyUnicode_Check(pyptr))
break;
if (ndim == MAXDIM)
PLy_elog(ERROR, "number of array dimensions exceeds the maximum allowed (%d)", MAXDIM);
dims[ndim] = PySequence_Length(pyptr);
if (dims[ndim] < 0)
PLy_elog(ERROR, "cannot determine sequence length for function return value");
if (dims[ndim] > MaxAllocSize)
PLy_elog(ERROR, "array size exceeds the maximum allowed");
len *= dims[ndim];
if (len > MaxAllocSize)
PLy_elog(ERROR, "array size exceeds the maximum allowed");
if (dims[ndim] == 0)
{
nulls[i] = false;
elems[i] = arg->elm->func(arg->elm, -1, obj);
/* empty sequence */
break;
}
Py_XDECREF(obj);
ndim++;
next = PySequence_GetItem(pyptr, 0);
Py_XDECREF(pyptr);
pyptr = next;
}
Py_XDECREF(pyptr);
/*
* Check for zero dimensions. This happens if the object is a tuple or a
* string, rather than a list, or is not a sequence at all. We don't map
* tuples or strings to arrays in general, but in the first level, be
* lenient, for historical reasons. So if the object is a sequence of any
* kind, treat it as a one-dimensional array.
*/
if (ndim == 0)
{
if (!PySequence_Check(plrv))
PLy_elog(ERROR, "return value of function with array return type is not a Python sequence");
ndim = 1;
len = dims[0] = PySequence_Length(plrv);
}
lbs = 1;
array = construct_md_array(elems, nulls, 1, &len, &lbs,
get_base_element_type(arg->typoid), arg->elm->typlen, arg->elm->typbyval, arg->elm->typalign);
/*
* Traverse the Python lists, in depth-first order, and collect all the
* elements at the bottom level into 'elems'/'nulls' arrays.
*/
elems = palloc(sizeof(Datum) * len);
nulls = palloc(sizeof(bool) * len);
currelem = 0;
PLySequence_ToArray_recurse(arg->elm, plrv,
dims, ndim, 0,
elems, nulls, &currelem);
for (i = 0; i < ndim; i++)
lbs[i] = 1;
array = construct_md_array(elems,
nulls,
ndim,
dims,
lbs,
get_base_element_type(arg->typoid),
arg->elm->typlen,
arg->elm->typbyval,
arg->elm->typalign);
/*
* If the result type is a domain of array, the resulting array must be
......@@ -908,6 +1043,56 @@ PLySequence_ToArray(PLyObToDatum *arg, int32 typmod, PyObject *plrv)
return rv;
}
/*
* Helper function for PLySequence_ToArray. Traverse a Python list of lists in
* depth-first order, storing the elements in 'elems'.
*/
static void
PLySequence_ToArray_recurse(PLyObToDatum *elm, PyObject *list,
int *dims, int ndim, int dim,
Datum *elems, bool *nulls, int *currelem)
{
int i;
if (PySequence_Length(list) != dims[dim])
PLy_elog(ERROR,
"multidimensional arrays must have array expressions with matching dimensions. "
"PL/Python function return value has sequence length %d while expected %d",
(int) PySequence_Length(list), dims[dim]);
if (dim < ndim - 1)
{
for (i = 0; i < dims[dim]; i++)
{
PyObject *sublist = PySequence_GetItem(list, i);
PLySequence_ToArray_recurse(elm, sublist, dims, ndim, dim + 1,
elems, nulls, currelem);
Py_XDECREF(sublist);
}
}
else
{
for (i = 0; i < dims[dim]; i++)
{
PyObject *obj = PySequence_GetItem(list, i);
if (obj == Py_None)
{
nulls[*currelem] = true;
elems[*currelem] = (Datum) 0;
}
else
{
nulls[*currelem] = false;
elems[*currelem] = elm->func(elm, -1, obj);
}
Py_XDECREF(obj);
(*currelem)++;
}
}
}
static Datum
PLyString_ToComposite(PLyTypeInfo *info, TupleDesc desc, PyObject *string)
......
......@@ -169,13 +169,13 @@ SELECT * FROM changing_test();
-- tables of composite types
CREATE FUNCTION composite_types_table(OUT tab table_record[], OUT typ type_record[] ) RETURNS SETOF record AS $$
yield {'tab': [['first', 1], ['second', 2]],
yield {'tab': [('first', 1), ('second', 2)],
'typ': [{'first': 'third', 'second': 3},
{'first': 'fourth', 'second': 4}]}
yield {'tab': [['first', 1], ['second', 2]],
yield {'tab': [('first', 1), ('second', 2)],
'typ': [{'first': 'third', 'second': 3},
{'first': 'fourth', 'second': 4}]}
yield {'tab': [['first', 1], ['second', 2]],
yield {'tab': [('first', 1), ('second', 2)],
'typ': [{'first': 'third', 'second': 3},
{'first': 'fourth', 'second': 4}]}
$$ LANGUAGE plpythonu;
......@@ -207,3 +207,9 @@ SELECT * FROM return_record_2('v4') AS (v1 int, v3 int, v2 int);
-- works
SELECT * FROM return_record_2('v3') AS (v1 int, v3 int, v2 int);
SELECT * FROM return_record_2('v3') AS (v1 int, v2 int, v3 int);
-- multi-dimensional array of composite types.
CREATE FUNCTION composite_type_as_list() RETURNS type_record[] AS $$
return [[('first', 1), ('second', 1)], [('first', 2), ('second', 2)], [('first', 3), ('second', 3)]];
$$ LANGUAGE plpythonu;
SELECT * FROM composite_type_as_list();
......@@ -237,7 +237,80 @@ SELECT * FROM test_type_conversion_array_int4(ARRAY[NULL,1]);
SELECT * FROM test_type_conversion_array_int4(ARRAY[]::integer[]);
SELECT * FROM test_type_conversion_array_int4(NULL);
SELECT * FROM test_type_conversion_array_int4(ARRAY[[1,2,3],[4,5,6]]);
SELECT * FROM test_type_conversion_array_int4(ARRAY[[[1,2,NULL],[NULL,5,6]],[[NULL,8,9],[10,11,12]]]);
SELECT * FROM test_type_conversion_array_int4('[2:4]={1,2,3}');
CREATE FUNCTION test_type_conversion_array_int8(x int8[]) RETURNS int8[] AS $$
plpy.info(x, type(x))
return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_int8(ARRAY[[[1,2,NULL],[NULL,5,6]],[[NULL,8,9],[10,11,12]]]::int8[]);
CREATE FUNCTION test_type_conversion_array_float4(x float4[]) RETURNS float4[] AS $$
plpy.info(x, type(x))
return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_float4(ARRAY[[[1.2,2.3,NULL],[NULL,5.7,6.8]],[[NULL,8.9,9.345],[10.123,11.456,12.6768]]]::float4[]);
CREATE FUNCTION test_type_conversion_array_float8(x float8[]) RETURNS float8[] AS $$
plpy.info(x, type(x))
return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_float8(ARRAY[[[1.2,2.3,NULL],[NULL,5.7,6.8]],[[NULL,8.9,9.345],[10.123,11.456,12.6768]]]::float8[]);
CREATE FUNCTION test_type_conversion_array_date(x date[]) RETURNS date[] AS $$
plpy.info(x, type(x))
return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_date(ARRAY[[['2016-09-21','2016-09-22',NULL],[NULL,'2016-10-21','2016-10-22']],
[[NULL,'2016-11-21','2016-10-21'],['2015-09-21','2015-09-22','2014-09-21']]]::date[]);
CREATE FUNCTION test_type_conversion_array_timestamp(x timestamp[]) RETURNS timestamp[] AS $$
plpy.info(x, type(x))
return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_timestamp(ARRAY[[['2016-09-21 15:34:24.078792-04','2016-10-22 11:34:24.078795-04',NULL],
[NULL,'2016-10-21 11:34:25.078792-04','2016-10-21 11:34:24.098792-04']],
[[NULL,'2016-01-21 11:34:24.078792-04','2016-11-21 11:34:24.108792-04'],
['2015-09-21 11:34:24.079792-04','2014-09-21 11:34:24.078792-04','2013-09-21 11:34:24.078792-04']]]::timestamp[]);
CREATE OR REPLACE FUNCTION pyreturnmultidemint4(h int4, i int4, j int4, k int4 ) RETURNS int4[] AS $BODY$
m = [[[[x for x in range(h)] for y in range(i)] for z in range(j)] for w in range(k)]
plpy.info(m, type(m))
return m
$BODY$ LANGUAGE plpythonu;
select pyreturnmultidemint4(8,5,3,2);
CREATE OR REPLACE FUNCTION pyreturnmultidemint8(h int4, i int4, j int4, k int4 ) RETURNS int8[] AS $BODY$
m = [[[[x for x in range(h)] for y in range(i)] for z in range(j)] for w in range(k)]
plpy.info(m, type(m))
return m
$BODY$ LANGUAGE plpythonu;
select pyreturnmultidemint8(5,5,3,2);
CREATE OR REPLACE FUNCTION pyreturnmultidemfloat4(h int4, i int4, j int4, k int4 ) RETURNS float4[] AS $BODY$
m = [[[[x for x in range(h)] for y in range(i)] for z in range(j)] for w in range(k)]
plpy.info(m, type(m))
return m
$BODY$ LANGUAGE plpythonu;
select pyreturnmultidemfloat4(6,5,3,2);
CREATE OR REPLACE FUNCTION pyreturnmultidemfloat8(h int4, i int4, j int4, k int4 ) RETURNS float8[] AS $BODY$
m = [[[[x for x in range(h)] for y in range(i)] for z in range(j)] for w in range(k)]
plpy.info(m, type(m))
return m
$BODY$ LANGUAGE plpythonu;
select pyreturnmultidemfloat8(7,5,3,2);
CREATE FUNCTION test_type_conversion_array_text(x text[]) RETURNS text[] AS $$
plpy.info(x, type(x))
......@@ -245,6 +318,7 @@ return x
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_text(ARRAY['foo', 'bar']);
SELECT * FROM test_type_conversion_array_text(ARRAY[['foo', 'bar'],['foo2', 'bar2']]);
CREATE FUNCTION test_type_conversion_array_bytea(x bytea[]) RETURNS bytea[] AS $$
......@@ -268,6 +342,18 @@ $$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_array_mixed2();
CREATE FUNCTION test_type_conversion_mdarray_malformed() RETURNS int[] AS $$
return [[1,2,3],[4,5]]
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_mdarray_malformed();
CREATE FUNCTION test_type_conversion_mdarray_toodeep() RETURNS int[] AS $$
return [[[[[[[1]]]]]]]
$$ LANGUAGE plpythonu;
SELECT * FROM test_type_conversion_mdarray_toodeep();
CREATE FUNCTION test_type_conversion_array_record() RETURNS type_record[] AS $$
return [{'first': 'one', 'second': 42}, {'first': 'two', 'second': 11}]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment