Commit 0ea9efbe authored by Tom Lane's avatar Tom Lane

Split psql's lexer into two separate .l files for SQL and backslash cases.

This gets us to a point where psqlscan.l can be used by other frontend
programs for the same purpose psql uses it for, ie to detect when it's
collected a complete SQL command from input that is divided across
line boundaries.  Moreover, other programs can supply their own lexers
for backslash commands of their own choosing.  A follow-on patch will
use this in pgbench.

The end result here is roughly the same as in Kyotaro Horiguchi's
0001-Make-SQL-parser-part-of-psqlscan-independent-from-ps.patch, although
the details of the method for switching between lexers are quite different.
Basically, in this patch we share the entire PsqlScanState, YY_BUFFER_STATE
stack, *and* yyscan_t between different lexers.  The only thing we need
to do to switch to a different lexer is to make sure the start_state is
valid for the new lexer.  This works because flex doesn't keep any other
persistent state that depends on the specific lexing tables generated for
a particular .l file.  (We are assuming that both lexers are built with
the same flex version, or at least versions that are compatible with
respect to the contents of yyscan_t; but that doesn't seem likely to
be a big problem in practice, considering how slowly flex changes.)

Aside from being more efficient than Horiguchi-san's original solution,
this avoids possible corner-case changes in semantics: the original code
was capable of popping the input buffer stack while still staying in
backslash-related parsing states.  I'm not sure that that equates to any
useful user-visible behaviors, but I'm not sure it doesn't either, so
I'm loath to assume that we only need to consider the topmost buffer when
parsing a backslash command.

I've attempted to update the MSVC build scripts for the added .l file,
but will rely on the buildfarm to see if I missed anything.

Kyotaro Horiguchi and Tom Lane
parent 27199058
/psqlscan.c
/psqlscanslash.c
/sql_help.h
/sql_help.c
/dumputils.c
......
......@@ -23,7 +23,7 @@ override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/p
OBJS= command.o common.o help.o input.o stringutils.o mainloop.o copy.o \
startup.o prompt.o variables.o large_obj.o print.o describe.o \
tab-complete.o mbprint.o dumputils.o keywords.o kwlookup.o \
sql_help.o psqlscan.o \
sql_help.o psqlscan.o psqlscanslash.o \
$(WIN32RES)
......@@ -47,12 +47,16 @@ sql_help.h: create_help.pl $(wildcard $(REFDOCDIR)/*.sgml)
psqlscan.c: FLEXFLAGS = -Cfe -p -p
psqlscan.c: FLEX_NO_BACKUP=yes
# Latest flex causes warnings in this file.
psqlscanslash.c: FLEXFLAGS = -Cfe -p -p
psqlscanslash.c: FLEX_NO_BACKUP=yes
# Latest flex causes warnings in these files.
ifeq ($(GCC),yes)
psqlscan.o: CFLAGS += -Wno-error
psqlscanslash.o: CFLAGS += -Wno-error
endif
distprep: sql_help.h psqlscan.c
distprep: sql_help.h psqlscan.c psqlscanslash.c
install: all installdirs
$(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)'
......@@ -64,9 +68,10 @@ installdirs:
uninstall:
rm -f '$(DESTDIR)$(bindir)/psql$(X)' '$(DESTDIR)$(datadir)/psqlrc.sample'
# psqlscan.c is in the distribution tarball, so is not cleaned here
clean distclean:
rm -f psql$(X) $(OBJS) dumputils.c keywords.c kwlookup.c lex.backup
# files removed here are supposed to be in the distribution tarball,
# so do not clean them in the clean/distclean rules
maintainer-clean: distclean
rm -f sql_help.h sql_help.c psqlscan.c
rm -f sql_help.h sql_help.c psqlscan.c psqlscanslash.c
......@@ -45,7 +45,7 @@
#include "large_obj.h"
#include "mainloop.h"
#include "print.h"
#include "psqlscan.h"
#include "psqlscanslash.h"
#include "settings.h"
#include "variables.h"
......
......@@ -2,7 +2,8 @@
CATALOG_NAME = psql
AVAIL_LANGUAGES = cs de es fr it ja pl pt_BR ru zh_CN zh_TW
GETTEXT_FILES = command.c common.c copy.c help.c input.c large_obj.c \
mainloop.c print.c psqlscan.c startup.c describe.c sql_help.h sql_help.c \
mainloop.c print.c psqlscan.c psqlscanslash.c startup.c \
describe.c sql_help.h sql_help.c \
tab-complete.c variables.c \
../../common/exec.c ../../common/fe_memutils.c ../../common/username.c \
../../common/wait_error.c
......
......@@ -25,17 +25,6 @@ typedef enum
PSCAN_EOL /* end of line, SQL possibly complete */
} PsqlScanResult;
/* Different ways for scan_slash_option to handle parameter words */
enum slash_option_type
{
OT_NORMAL, /* normal case */
OT_SQLID, /* treat as SQL identifier */
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
OT_FILEPIPE, /* it's a filename or pipe */
OT_WHOLE_LINE, /* just snarf the rest of the line */
OT_NO_EVAL /* no expansion of backticks or variables */
};
/* Callback functions to be used by the lexer */
typedef struct PsqlScanCallbacks
{
......@@ -61,15 +50,8 @@ extern PsqlScanResult psql_scan(PsqlScanState state,
extern void psql_scan_reset(PsqlScanState state);
extern bool psql_scan_in_quote(PsqlScanState state);
extern char *psql_scan_slash_command(PsqlScanState state);
extern char *psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon);
extern void psql_scan_reselect_sql_lexer(PsqlScanState state);
extern void psql_scan_slash_command_end(PsqlScanState state);
extern bool psql_scan_in_quote(PsqlScanState state);
#endif /* PSQLSCAN_H */
This diff is collapsed.
/*
* psqlscan_int.h
* lexical scanner internal declarations
*
* This file declares the PsqlScanStateData structure used by psqlscan.l
* and shared by other lexers compatible with it, such as psqlscanslash.l.
*
* One difficult aspect of this code is that we need to work in multibyte
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
* all our lexing rules treat all high-bit-set characters alike, we don't
* really need to care whether such a byte is part of a sequence or not.
* In an "unsafe" encoding, we still expect the first byte of a multibyte
* sequence to be >= 0x80, but later bytes might not be. If we scan such
* a sequence as-is, the lexing rules could easily be fooled into matching
* such bytes to ordinary ASCII characters. Our solution for this is to
* substitute 0xFF for each non-first byte within the data presented to flex.
* The flex rules will then pass the FF's through unmolested. The
* psqlscan_emit() subroutine is responsible for looking back to the original
* string and replacing FF's with the corresponding original bytes.
*
* Another interesting thing we do here is scan different parts of the same
* input with physically separate flex lexers (ie, lexers written in separate
* .l files). We can get away with this because the only part of the
* persistent state of a flex lexer that depends on its parsing rule tables
* is the start state number, which is easy enough to manage --- usually,
* in fact, we just need to set it to INITIAL when changing lexers. But to
* make that work at all, we must use re-entrant lexers, so that all the
* relevant state is in the yyscanner_t attached to the PsqlScanState;
* if we were using lexers with separate static state we would soon end up
* with dangling buffer pointers in one or the other. Also note that this
* is unlikely to work very nicely if the lexers aren't all built with the
* same flex version.
*
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
*
* src/bin/psql/psqlscan_int.h
*/
#ifndef PSQLSCAN_INT_H
#define PSQLSCAN_INT_H
#include "psqlscan.h"
/* This is just to allow this file to be compilable standalone */
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
#define YY_TYPEDEF_YY_BUFFER_STATE
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
/*
* We use a stack of flex buffers to handle substitution of psql variables.
* Each stacked buffer contains the as-yet-unread text from one psql variable.
* When we pop the stack all the way, we resume reading from the outer buffer
* identified by scanbufhandle.
*/
typedef struct StackElem
{
YY_BUFFER_STATE buf; /* flex input control structure */
char *bufstring; /* data actually being scanned by flex */
char *origstring; /* copy of original data, if needed */
char *varname; /* name of variable providing data, or NULL */
struct StackElem *next;
} StackElem;
/*
* All working state of the lexer must be stored in PsqlScanStateData
* between calls. This allows us to have multiple open lexer operations,
* which is needed for nested include files. The lexer itself is not
* recursive, but it must be re-entrant.
*/
typedef struct PsqlScanStateData
{
yyscan_t scanner; /* Flex's state for this PsqlScanState */
PQExpBuffer output_buf; /* current output buffer */
StackElem *buffer_stack; /* stack of variable expansion buffers */
/*
* These variables always refer to the outer buffer, never to any stacked
* variable-expansion buffer.
*/
YY_BUFFER_STATE scanbufhandle;
char *scanbuf; /* start of outer-level input buffer */
const char *scanline; /* current input line at outer level */
/* safe_encoding, curline, refline are used by emit() to replace FFs */
int encoding; /* encoding being used now */
bool safe_encoding; /* is current encoding "safe"? */
bool std_strings; /* are string literals standard? */
const char *curline; /* actual flex input string for cur buf */
const char *refline; /* original data for cur buffer */
/*
* All this state lives across successive input lines, until explicitly
* reset by psql_scan_reset. start_state is adopted by yylex() on entry,
* and updated with its finishing state on exit.
*/
int start_state; /* yylex's starting/finishing state */
int paren_depth; /* depth of nesting in parentheses */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
/*
* Callback functions provided by the program making use of the lexer.
*/
const PsqlScanCallbacks *callbacks;
} PsqlScanStateData;
/*
* Functions exported by psqlscan.l, but only meant for use within
* compatible lexers.
*/
extern void psqlscan_push_new_buffer(PsqlScanState state,
const char *newstr, const char *varname);
extern void psqlscan_pop_buffer_stack(PsqlScanState state);
extern void psqlscan_select_top_buffer(PsqlScanState state);
extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
const char *txt, int len,
char **txtcopy);
extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
extern char *psqlscan_extract_substring(PsqlScanState state,
const char *txt, int len);
extern void psqlscan_escape_variable(PsqlScanState state,
const char *txt, int len,
bool as_ident);
#endif /* PSQLSCAN_INT_H */
/*
* psql - the PostgreSQL interactive terminal
*
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
*
* src/bin/psql/psqlscanslash.h
*/
#ifndef PSQLSCANSLASH_H
#define PSQLSCANSLASH_H
#include "psqlscan.h"
/* Different ways for scan_slash_option to handle parameter words */
enum slash_option_type
{
OT_NORMAL, /* normal case */
OT_SQLID, /* treat as SQL identifier */
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
OT_FILEPIPE, /* it's a filename or pipe */
OT_WHOLE_LINE, /* just snarf the rest of the line */
OT_NO_EVAL /* no expansion of backticks or variables */
};
extern char *psql_scan_slash_command(PsqlScanState state);
extern char *psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon);
extern void psql_scan_slash_command_end(PsqlScanState state);
#endif /* PSQLSCANSLASH_H */
This diff is collapsed.
......@@ -16,7 +16,7 @@
*
* We allow any non-ASCII character, as well as ASCII letters, digits, and
* underscore. Keep this in sync with the definition of variable_char in
* psqlscan.l.
* psqlscan.l and psqlscanslash.l.
*/
static bool
valid_variable_name(const char *name)
......
......@@ -64,7 +64,7 @@ my $frontend_extraincludes = {
'initdb' => ['src/timezone'],
'psql' => [ 'src/bin/pg_dump', 'src/backend' ] };
my $frontend_extrasource = {
'psql' => ['src/bin/psql/psqlscan.l'],
'psql' => ['src/bin/psql/psqlscan.l', 'src/bin/psql/psqlscanslash.l'],
'pgbench' =>
[ 'src/bin/pgbench/exprscan.l', 'src/bin/pgbench/exprparse.y' ], };
my @frontend_excludes = (
......
......@@ -76,6 +76,7 @@ if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.c del /q src\pl\plpgsql\src\pl_
if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.h del /q src\pl\plpgsql\src\pl_gram.h
if %DIST%==1 if exist src\bin\psql\psqlscan.c del /q src\bin\psql\psqlscan.c
if %DIST%==1 if exist src\bin\psql\psqlscanslash.c del /q src\bin\psql\psqlscanslash.c
if %DIST%==1 if exist contrib\cube\cubescan.c del /q contrib\cube\cubescan.c
if %DIST%==1 if exist contrib\cube\cubeparse.c del /q contrib\cube\cubeparse.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment