Commit 0ea9efbe authored by Tom Lane's avatar Tom Lane

Split psql's lexer into two separate .l files for SQL and backslash cases.

This gets us to a point where psqlscan.l can be used by other frontend
programs for the same purpose psql uses it for, ie to detect when it's
collected a complete SQL command from input that is divided across
line boundaries.  Moreover, other programs can supply their own lexers
for backslash commands of their own choosing.  A follow-on patch will
use this in pgbench.

The end result here is roughly the same as in Kyotaro Horiguchi's
0001-Make-SQL-parser-part-of-psqlscan-independent-from-ps.patch, although
the details of the method for switching between lexers are quite different.
Basically, in this patch we share the entire PsqlScanState, YY_BUFFER_STATE
stack, *and* yyscan_t between different lexers.  The only thing we need
to do to switch to a different lexer is to make sure the start_state is
valid for the new lexer.  This works because flex doesn't keep any other
persistent state that depends on the specific lexing tables generated for
a particular .l file.  (We are assuming that both lexers are built with
the same flex version, or at least versions that are compatible with
respect to the contents of yyscan_t; but that doesn't seem likely to
be a big problem in practice, considering how slowly flex changes.)

Aside from being more efficient than Horiguchi-san's original solution,
this avoids possible corner-case changes in semantics: the original code
was capable of popping the input buffer stack while still staying in
backslash-related parsing states.  I'm not sure that that equates to any
useful user-visible behaviors, but I'm not sure it doesn't either, so
I'm loath to assume that we only need to consider the topmost buffer when
parsing a backslash command.

I've attempted to update the MSVC build scripts for the added .l file,
but will rely on the buildfarm to see if I missed anything.

Kyotaro Horiguchi and Tom Lane
parent 27199058
/psqlscan.c /psqlscan.c
/psqlscanslash.c
/sql_help.h /sql_help.h
/sql_help.c /sql_help.c
/dumputils.c /dumputils.c
......
...@@ -23,7 +23,7 @@ override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/p ...@@ -23,7 +23,7 @@ override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/p
OBJS= command.o common.o help.o input.o stringutils.o mainloop.o copy.o \ OBJS= command.o common.o help.o input.o stringutils.o mainloop.o copy.o \
startup.o prompt.o variables.o large_obj.o print.o describe.o \ startup.o prompt.o variables.o large_obj.o print.o describe.o \
tab-complete.o mbprint.o dumputils.o keywords.o kwlookup.o \ tab-complete.o mbprint.o dumputils.o keywords.o kwlookup.o \
sql_help.o psqlscan.o \ sql_help.o psqlscan.o psqlscanslash.o \
$(WIN32RES) $(WIN32RES)
...@@ -47,12 +47,16 @@ sql_help.h: create_help.pl $(wildcard $(REFDOCDIR)/*.sgml) ...@@ -47,12 +47,16 @@ sql_help.h: create_help.pl $(wildcard $(REFDOCDIR)/*.sgml)
psqlscan.c: FLEXFLAGS = -Cfe -p -p psqlscan.c: FLEXFLAGS = -Cfe -p -p
psqlscan.c: FLEX_NO_BACKUP=yes psqlscan.c: FLEX_NO_BACKUP=yes
# Latest flex causes warnings in this file. psqlscanslash.c: FLEXFLAGS = -Cfe -p -p
psqlscanslash.c: FLEX_NO_BACKUP=yes
# Latest flex causes warnings in these files.
ifeq ($(GCC),yes) ifeq ($(GCC),yes)
psqlscan.o: CFLAGS += -Wno-error psqlscan.o: CFLAGS += -Wno-error
psqlscanslash.o: CFLAGS += -Wno-error
endif endif
distprep: sql_help.h psqlscan.c distprep: sql_help.h psqlscan.c psqlscanslash.c
install: all installdirs install: all installdirs
$(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)' $(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)'
...@@ -64,9 +68,10 @@ installdirs: ...@@ -64,9 +68,10 @@ installdirs:
uninstall: uninstall:
rm -f '$(DESTDIR)$(bindir)/psql$(X)' '$(DESTDIR)$(datadir)/psqlrc.sample' rm -f '$(DESTDIR)$(bindir)/psql$(X)' '$(DESTDIR)$(datadir)/psqlrc.sample'
# psqlscan.c is in the distribution tarball, so is not cleaned here
clean distclean: clean distclean:
rm -f psql$(X) $(OBJS) dumputils.c keywords.c kwlookup.c lex.backup rm -f psql$(X) $(OBJS) dumputils.c keywords.c kwlookup.c lex.backup
# files removed here are supposed to be in the distribution tarball,
# so do not clean them in the clean/distclean rules
maintainer-clean: distclean maintainer-clean: distclean
rm -f sql_help.h sql_help.c psqlscan.c rm -f sql_help.h sql_help.c psqlscan.c psqlscanslash.c
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
#include "large_obj.h" #include "large_obj.h"
#include "mainloop.h" #include "mainloop.h"
#include "print.h" #include "print.h"
#include "psqlscan.h" #include "psqlscanslash.h"
#include "settings.h" #include "settings.h"
#include "variables.h" #include "variables.h"
......
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
CATALOG_NAME = psql CATALOG_NAME = psql
AVAIL_LANGUAGES = cs de es fr it ja pl pt_BR ru zh_CN zh_TW AVAIL_LANGUAGES = cs de es fr it ja pl pt_BR ru zh_CN zh_TW
GETTEXT_FILES = command.c common.c copy.c help.c input.c large_obj.c \ GETTEXT_FILES = command.c common.c copy.c help.c input.c large_obj.c \
mainloop.c print.c psqlscan.c startup.c describe.c sql_help.h sql_help.c \ mainloop.c print.c psqlscan.c psqlscanslash.c startup.c \
describe.c sql_help.h sql_help.c \
tab-complete.c variables.c \ tab-complete.c variables.c \
../../common/exec.c ../../common/fe_memutils.c ../../common/username.c \ ../../common/exec.c ../../common/fe_memutils.c ../../common/username.c \
../../common/wait_error.c ../../common/wait_error.c
......
...@@ -25,17 +25,6 @@ typedef enum ...@@ -25,17 +25,6 @@ typedef enum
PSCAN_EOL /* end of line, SQL possibly complete */ PSCAN_EOL /* end of line, SQL possibly complete */
} PsqlScanResult; } PsqlScanResult;
/* Different ways for scan_slash_option to handle parameter words */
enum slash_option_type
{
OT_NORMAL, /* normal case */
OT_SQLID, /* treat as SQL identifier */
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
OT_FILEPIPE, /* it's a filename or pipe */
OT_WHOLE_LINE, /* just snarf the rest of the line */
OT_NO_EVAL /* no expansion of backticks or variables */
};
/* Callback functions to be used by the lexer */ /* Callback functions to be used by the lexer */
typedef struct PsqlScanCallbacks typedef struct PsqlScanCallbacks
{ {
...@@ -61,15 +50,8 @@ extern PsqlScanResult psql_scan(PsqlScanState state, ...@@ -61,15 +50,8 @@ extern PsqlScanResult psql_scan(PsqlScanState state,
extern void psql_scan_reset(PsqlScanState state); extern void psql_scan_reset(PsqlScanState state);
extern bool psql_scan_in_quote(PsqlScanState state); extern void psql_scan_reselect_sql_lexer(PsqlScanState state);
extern char *psql_scan_slash_command(PsqlScanState state);
extern char *psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon);
extern void psql_scan_slash_command_end(PsqlScanState state); extern bool psql_scan_in_quote(PsqlScanState state);
#endif /* PSQLSCAN_H */ #endif /* PSQLSCAN_H */
...@@ -15,19 +15,7 @@ ...@@ -15,19 +15,7 @@
* *
* XXX Avoid creating backtracking cases --- see the backend lexer for info. * XXX Avoid creating backtracking cases --- see the backend lexer for info.
* *
* The most difficult aspect of this code is that we need to work in multibyte * See psqlscan_int.h for additional commentary.
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
* all our lexing rules treat all high-bit-set characters alike, we don't
* really need to care whether such a byte is part of a sequence or not.
* In an "unsafe" encoding, we still expect the first byte of a multibyte
* sequence to be >= 0x80, but later bytes might not be. If we scan such
* a sequence as-is, the lexing rules could easily be fooled into matching
* such bytes to ordinary ASCII characters. Our solution for this is to
* substitute 0xFF for each non-first byte within the data presented to flex.
* The flex rules will then pass the FF's through unmolested. The emit()
* subroutine is responsible for looking back to the original string and
* replacing FF's with the corresponding original bytes.
* *
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
...@@ -45,64 +33,7 @@ ...@@ -45,64 +33,7 @@
} }
%{ %{
/* #include "psqlscan_int.h"
* We use a stack of flex buffers to handle substitution of psql variables.
* Each stacked buffer contains the as-yet-unread text from one psql variable.
* When we pop the stack all the way, we resume reading from the outer buffer
* identified by scanbufhandle.
*/
typedef struct StackElem
{
YY_BUFFER_STATE buf; /* flex input control structure */
char *bufstring; /* data actually being scanned by flex */
char *origstring; /* copy of original data, if needed */
char *varname; /* name of variable providing data, or NULL */
struct StackElem *next;
} StackElem;
/*
* All working state of the lexer must be stored in PsqlScanStateData
* between calls. This allows us to have multiple open lexer operations,
* which is needed for nested include files. The lexer itself is not
* recursive, but it must be re-entrant.
*/
typedef struct PsqlScanStateData
{
yyscan_t scanner; /* Flex's state for this PsqlScanState */
PQExpBuffer output_buf; /* current output buffer */
StackElem *buffer_stack; /* stack of variable expansion buffers */
/*
* These variables always refer to the outer buffer, never to any
* stacked variable-expansion buffer.
*/
YY_BUFFER_STATE scanbufhandle;
char *scanbuf; /* start of outer-level input buffer */
const char *scanline; /* current input line at outer level */
/* safe_encoding, curline, refline are used by emit() to replace FFs */
int encoding; /* encoding being used now */
bool safe_encoding; /* is current encoding "safe"? */
bool std_strings; /* are string literals standard? */
const char *curline; /* actual flex input string for cur buf */
const char *refline; /* original data for cur buffer */
/*
* All this state lives across successive input lines, until explicitly
* reset by psql_scan_reset. start_state is adopted by yylex() on
* entry, and updated with its finishing state on exit.
*/
int start_state; /* yylex's starting/finishing state */
int paren_depth; /* depth of nesting in parentheses */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
/*
* Callback functions provided by the program making use of the lexer.
*/
const PsqlScanCallbacks *callbacks;
} PsqlScanStateData;
/* /*
* Set the type of yyextra; we use it as a pointer back to the containing * Set the type of yyextra; we use it as a pointer back to the containing
...@@ -110,37 +41,16 @@ typedef struct PsqlScanStateData ...@@ -110,37 +41,16 @@ typedef struct PsqlScanStateData
*/ */
#define YY_EXTRA_TYPE PsqlScanState #define YY_EXTRA_TYPE PsqlScanState
/*
* These variables do not need to be saved across calls. Yeah, it's a bit
* of a hack, but putting them into PsqlScanStateData would be klugy too.
*/
static enum slash_option_type option_type;
static char *option_quote;
static int unquoted_option_chars;
static int backtick_start_offset;
/* Return values from yylex() */ /* Return values from yylex() */
#define LEXRES_EOL 0 /* end of input */ #define LEXRES_EOL 0 /* end of input */
#define LEXRES_SEMI 1 /* command-terminating semicolon found */ #define LEXRES_SEMI 1 /* command-terminating semicolon found */
#define LEXRES_BACKSLASH 2 /* backslash command start */ #define LEXRES_BACKSLASH 2 /* backslash command start */
#define LEXRES_OK 3 /* OK completion of backslash argument */
static void evaluate_backtick(PsqlScanState state);
static void push_new_buffer(PsqlScanState state,
const char *newstr, const char *varname);
static void pop_buffer_stack(PsqlScanState state);
static bool var_is_current_source(PsqlScanState state, const char *varname); static bool var_is_current_source(PsqlScanState state, const char *varname);
static YY_BUFFER_STATE prepare_buffer(PsqlScanState state,
const char *txt, int len,
char **txtcopy);
static void emit(PsqlScanState state, const char *txt, int len);
static char *extract_substring(PsqlScanState state, const char *txt, int len);
static void escape_variable(PsqlScanState state, const char *txt, int len,
bool as_ident);
#define ECHO emit(cur_state, yytext, yyleng) #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
/* /*
* Work around a bug in flex 2.5.35: it emits a couple of functions that * Work around a bug in flex 2.5.35: it emits a couple of functions that
...@@ -212,15 +122,6 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); ...@@ -212,15 +122,6 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
%x xuiend %x xuiend
%x xus %x xus
%x xusend %x xusend
/* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd
%x xslashargstart
%x xslasharg
%x xslashquote
%x xslashbackquote
%x xslashdquote
%x xslashwholeline
%x xslashend
/* /*
* In order to make the world safe for Windows and Mac clients as well as * In order to make the world safe for Windows and Mac clients as well as
...@@ -770,7 +671,7 @@ other . ...@@ -770,7 +671,7 @@ other .
"\\"[;:] { "\\"[;:] {
/* Force a semicolon or colon into the query buffer */ /* Force a semicolon or colon into the query buffer */
emit(cur_state, yytext + 1, 1); psqlscan_emit(cur_state, yytext + 1, 1);
} }
"\\" { "\\" {
...@@ -784,7 +685,7 @@ other . ...@@ -784,7 +685,7 @@ other .
char *varname; char *varname;
char *value; char *value;
varname = extract_substring(cur_state, varname = psqlscan_extract_substring(cur_state,
yytext + 1, yytext + 1,
yyleng - 1); yyleng - 1);
if (cur_state->callbacks->get_variable) if (cur_state->callbacks->get_variable)
...@@ -808,7 +709,7 @@ other . ...@@ -808,7 +709,7 @@ other .
else else
{ {
/* OK, perform substitution */ /* OK, perform substitution */
push_new_buffer(cur_state, value, varname); psqlscan_push_new_buffer(cur_state, value, varname);
/* yy_scan_string already made buffer active */ /* yy_scan_string already made buffer active */
} }
free(value); free(value);
...@@ -826,11 +727,11 @@ other . ...@@ -826,11 +727,11 @@ other .
} }
:'{variable_char}+' { :'{variable_char}+' {
escape_variable(cur_state, yytext, yyleng, false); psqlscan_escape_variable(cur_state, yytext, yyleng, false);
} }
:\"{variable_char}+\" { :\"{variable_char}+\" {
escape_variable(cur_state, yytext, yyleng, true); psqlscan_escape_variable(cur_state, yytext, yyleng, true);
} }
/* /*
...@@ -955,15 +856,12 @@ other . ...@@ -955,15 +856,12 @@ other .
ECHO; ECHO;
} }
/* /*
* Everything from here down is psql-specific. * psql uses a single <<EOF>> rule, unlike the backend.
*/ */
<<EOF>> { <<EOF>> {
StackElem *stackelem = cur_state->buffer_stack; if (cur_state->buffer_stack == NULL)
if (stackelem == NULL)
{ {
cur_state->start_state = YY_START; cur_state->start_state = YY_START;
return LEXRES_EOL; /* end of input reached */ return LEXRES_EOL; /* end of input reached */
...@@ -973,290 +871,10 @@ other . ...@@ -973,290 +871,10 @@ other .
* We were expanding a variable, so pop the inclusion * We were expanding a variable, so pop the inclusion
* stack and keep lexing * stack and keep lexing
*/ */
pop_buffer_stack(cur_state); psqlscan_pop_buffer_stack(cur_state);
psqlscan_select_top_buffer(cur_state);
stackelem = cur_state->buffer_stack;
if (stackelem != NULL)
{
yy_switch_to_buffer(stackelem->buf, cur_state->scanner);
cur_state->curline = stackelem->bufstring;
cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
}
else
{
yy_switch_to_buffer(cur_state->scanbufhandle, cur_state->scanner);
cur_state->curline = cur_state->scanbuf;
cur_state->refline = cur_state->scanline;
}
}
/*
* Exclusive lexer states to handle backslash command lexing
*/
<xslashcmd>{
/* command name ends at whitespace or backslash; eat all else */
{space}|"\\" {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other} { ECHO; }
}
<xslashargstart>{
/*
* Discard any whitespace before argument, then go to xslasharg state.
* An exception is that "|" is only special at start of argument, so we
* check for it here.
*/
{space}+ { }
"|" {
if (option_type == OT_FILEPIPE)
{
/* treat like whole-string case */
ECHO;
BEGIN(xslashwholeline);
}
else
{
/* vertical bar is not special otherwise */
yyless(0);
BEGIN(xslasharg);
}
}
{other} {
yyless(0);
BEGIN(xslasharg);
}
}
<xslasharg>{
/*
* Default processing of text in a slash command's argument.
*
* Note: unquoted_option_chars counts the number of characters at the
* end of the argument that were not subject to any form of quoting.
* psql_scan_slash_option needs this to strip trailing semicolons safely.
*/
{space}|"\\" {
/*
* Unquoted space is end of arg; do not eat. Likewise
* backslash is end of command or next command, do not eat
*
* XXX this means we can't conveniently accept options
* that include unquoted backslashes; therefore, option
* processing that encourages use of backslashes is rather
* broken.
*/
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{quote} {
*option_quote = '\'';
unquoted_option_chars = 0;
BEGIN(xslashquote);
}
"`" {
backtick_start_offset = output_buf->len;
*option_quote = '`';
unquoted_option_chars = 0;
BEGIN(xslashbackquote);
}
{dquote} {
ECHO;
*option_quote = '"';
unquoted_option_chars = 0;
BEGIN(xslashdquote);
}
:{variable_char}+ {
/* Possible psql variable substitution */
if (option_type == OT_NO_EVAL ||
cur_state->callbacks->get_variable == NULL)
ECHO;
else
{
char *varname;
char *value;
varname = extract_substring(cur_state,
yytext + 1,
yyleng - 1);
value = cur_state->callbacks->get_variable(varname,
false,
false);
free(varname);
/*
* The variable value is just emitted without any
* further examination. This is consistent with the
* pre-8.0 code behavior, if not with the way that
* variables are handled outside backslash commands.
* Note that we needn't guard against recursion here.
*/
if (value)
{
appendPQExpBufferStr(output_buf, value);
free(value);
}
else
ECHO;
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}+' {
if (option_type == OT_NO_EVAL)
ECHO;
else
{
escape_variable(cur_state, yytext, yyleng, false);
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:\"{variable_char}+\" {
if (option_type == OT_NO_EVAL)
ECHO;
else
{
escape_variable(cur_state, yytext, yyleng, true);
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
:\"{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
{other} {
unquoted_option_chars++;
ECHO;
}
}
<xslashquote>{
/*
* single-quoted text: copy literally except for '' and backslash
* sequences
*/
{quote} { BEGIN(xslasharg); }
{xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
{xeoctesc} {
/* octal case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 1, NULL, 8));
}
{xehexesc} {
/* hex case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 2, NULL, 16));
}
"\\". { emit(cur_state, yytext + 1, 1); }
{other}|\n { ECHO; }
}
<xslashbackquote>{
/*
* backticked text: copy everything until next backquote, then evaluate.
*
* XXX Possible future behavioral change: substitute for :VARIABLE?
*/
"`" {
/* In NO_EVAL mode, don't evaluate the command */
if (option_type != OT_NO_EVAL)
evaluate_backtick(cur_state);
BEGIN(xslasharg);
} }
{other}|\n { ECHO; }
}
<xslashdquote>{
/* double-quoted text: copy verbatim, including the double quotes */
{dquote} {
ECHO;
BEGIN(xslasharg);
}
{other}|\n { ECHO; }
}
<xslashwholeline>{
/* copy everything until end of input line */
/* but suppress leading whitespace */
{space}+ {
if (output_buf->len > 0)
ECHO;
}
{other} { ECHO; }
}
<xslashend>{
/* at end of command, eat a double backslash, but not anything else */
"\\\\" {
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other}|\n {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
}
%% %%
/* /*
...@@ -1326,7 +944,7 @@ psql_scan_setup(PsqlScanState state, ...@@ -1326,7 +944,7 @@ psql_scan_setup(PsqlScanState state,
state->std_strings = std_strings; state->std_strings = std_strings;
/* Set up flex input buffer with appropriate translation and padding */ /* Set up flex input buffer with appropriate translation and padding */
state->scanbufhandle = prepare_buffer(state, line, line_len, state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
&state->scanbuf); &state->scanbuf);
state->scanline = line; state->scanline = line;
...@@ -1348,10 +966,10 @@ psql_scan_setup(PsqlScanState state, ...@@ -1348,10 +966,10 @@ psql_scan_setup(PsqlScanState state,
* be executed, then clear query_buf and call again to scan the remainder * be executed, then clear query_buf and call again to scan the remainder
* of the line. * of the line.
* *
* PSCAN_BACKSLASH: found a backslash that starts a psql special command. * PSCAN_BACKSLASH: found a backslash that starts a special command.
* Any previous data on the line has been transferred to query_buf. * Any previous data on the line has been transferred to query_buf.
* The caller will typically next call psql_scan_slash_command(), * The caller will typically next apply a separate flex lexer to scan
* perhaps psql_scan_slash_option(), and psql_scan_slash_command_end(). * the special command.
* *
* PSCAN_INCOMPLETE: the end of the line was reached, but we have an * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
* incomplete SQL command. *prompt is set to the appropriate prompt type. * incomplete SQL command. *prompt is set to the appropriate prompt type.
...@@ -1398,7 +1016,6 @@ psql_scan(PsqlScanState state, ...@@ -1398,7 +1016,6 @@ psql_scan(PsqlScanState state,
case LEXRES_EOL: /* end of input */ case LEXRES_EOL: /* end of input */
switch (state->start_state) switch (state->start_state)
{ {
/* This switch must cover all non-slash-command states. */
case INITIAL: case INITIAL:
case xuiend: /* we treat these like INITIAL */ case xuiend: /* we treat these like INITIAL */
case xusend: case xusend:
...@@ -1492,7 +1109,7 @@ psql_scan_finish(PsqlScanState state) ...@@ -1492,7 +1109,7 @@ psql_scan_finish(PsqlScanState state)
{ {
/* Drop any incomplete variable expansions. */ /* Drop any incomplete variable expansions. */
while (state->buffer_stack != NULL) while (state->buffer_stack != NULL)
pop_buffer_stack(state); psqlscan_pop_buffer_stack(state);
/* Done with the outer scan buffer, too */ /* Done with the outer scan buffer, too */
if (state->scanbufhandle) if (state->scanbufhandle)
...@@ -1526,319 +1143,37 @@ psql_scan_reset(PsqlScanState state) ...@@ -1526,319 +1143,37 @@ psql_scan_reset(PsqlScanState state)
} }
/* /*
* Return true if lexer is currently in an "inside quotes" state. * Reselect this lexer (psqlscan.l) after using another one.
*
* This is pretty grotty but is needed to preserve the old behavior
* that mainloop.c drops blank lines not inside quotes without even
* echoing them.
*/
bool
psql_scan_in_quote(PsqlScanState state)
{
return state->start_state != INITIAL;
}
/*
* Scan the command name of a psql backslash command. This should be called
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
* has been consumed through the leading backslash.
*
* The return value is a malloc'd copy of the command name, as parsed off
* from the input.
*/
char *
psql_scan_slash_command(PsqlScanState state)
{
PQExpBufferData mybuf;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
state->start_state = xslashcmd;
/* And lex. */
yylex(state->scanner);
/* There are no possible errors in this lex state... */
/* Reset lexer state in case it's time to return to regular parsing */
state->start_state = INITIAL;
return mybuf.data;
}
/*
* Parse off the next argument for a backslash command, and return it as a
* malloc'd string. If there are no more arguments, returns NULL.
*
* type tells what processing, if any, to perform on the option string;
* for example, if it's a SQL identifier, we want to downcase any unquoted
* letters.
* *
* if quote is not NULL, *quote is set to 0 if no quoting was found, else * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
* the last quote symbol used in the argument. * state, because we'd never switch to another lexer in a different state.
* However, we don't want to reset e.g. paren_depth, so this can't be
* the same as psql_scan_reset().
* *
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
* be taken as part of the option string will be stripped. * must be a superset of this.
* *
* NOTE: the only possible syntax errors for backslash options are unmatched * Note: it seems likely that other lexers could just assign INITIAL for
* quotes, which are detected when we run out of input. Therefore, on a * themselves, since that probably has the value zero in every flex-generated
* syntax error we just throw away the string and return NULL; there is no * lexer. But let's not assume that.
* need to worry about flushing remaining input.
*/
char *
psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon)
{
PQExpBufferData mybuf;
int lexresult PG_USED_FOR_ASSERTS_ONLY;
int final_state;
char local_quote;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
if (quote == NULL)
quote = &local_quote;
*quote = 0;
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set up static variables that will be used by yylex */
option_type = type;
option_quote = quote;
unquoted_option_chars = 0;
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
if (type == OT_WHOLE_LINE)
state->start_state = xslashwholeline;
else
state->start_state = xslashargstart;
/* And lex. */
lexresult = yylex(state->scanner);
/* Reset lexer state in case it's time to return to regular parsing */
final_state = state->start_state;
state->start_state = INITIAL;
/*
* Check the lex result: we should have gotten back either LEXRES_OK
* or LEXRES_EOL (the latter indicating end of string). If we were inside
* a quoted string, as indicated by final_state, EOL is an error.
*/
Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
switch (final_state)
{
case xslashargstart:
/* empty arg */
break;
case xslasharg:
/* Strip any unquoted trailing semi-colons if requested */
if (semicolon)
{
while (unquoted_option_chars-- > 0 &&
mybuf.len > 0 &&
mybuf.data[mybuf.len - 1] == ';')
{
mybuf.data[--mybuf.len] = '\0';
}
}
/*
* If SQL identifier processing was requested, then we strip out
* excess double quotes and downcase unquoted letters.
* Doubled double-quotes become output double-quotes, per spec.
*
* Note that a string like FOO"BAR"BAZ will be converted to
* fooBARbaz; this is somewhat inconsistent with the SQL spec,
* which would have us parse it as several identifiers. But
* for psql's purposes, we want a string like "foo"."bar" to
* be treated as one option, so there's little choice.
*/
if (type == OT_SQLID || type == OT_SQLIDHACK)
{
bool inquotes = false;
char *cp = mybuf.data;
while (*cp)
{
if (*cp == '"')
{
if (inquotes && cp[1] == '"')
{
/* Keep the first quote, remove the second */
cp++;
}
inquotes = !inquotes;
/* Collapse out quote at *cp */
memmove(cp, cp + 1, strlen(cp));
mybuf.len--;
/* do not advance cp */
}
else
{
if (!inquotes && type == OT_SQLID)
*cp = pg_tolower((unsigned char) *cp);
cp += PQmblen(cp, state->encoding);
}
}
}
break;
case xslashquote:
case xslashbackquote:
case xslashdquote:
/* must have hit EOL inside quotes */
state->callbacks->write_error("unterminated quoted string\n");
termPQExpBuffer(&mybuf);
return NULL;
case xslashwholeline:
/* always okay */
break;
default:
/* can't get here */
fprintf(stderr, "invalid YY_START\n");
exit(1);
}
/*
* An unquoted empty argument isn't possible unless we are at end of
* command. Return NULL instead.
*/
if (mybuf.len == 0 && *quote == 0)
{
termPQExpBuffer(&mybuf);
return NULL;
}
/* Else return the completed string. */
return mybuf.data;
}
/*
* Eat up any unused \\ to complete a backslash command.
*/ */
void void
psql_scan_slash_command_end(PsqlScanState state) psql_scan_reselect_sql_lexer(PsqlScanState state)
{ {
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Set current output target */
state->output_buf = NULL; /* we won't output anything */
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
state->start_state = xslashend;
/* And lex. */
yylex(state->scanner);
/* There are no possible errors in this lex state... */
/* Reset lexer state in case it's time to return to regular parsing */
state->start_state = INITIAL; state->start_state = INITIAL;
} }
/* /*
* Evaluate a backticked substring of a slash command's argument. * Return true if lexer is currently in an "inside quotes" state.
* *
* The portion of output_buf starting at backtick_start_offset is evaluated * This is pretty grotty but is needed to preserve the old behavior
* as a shell command and then replaced by the command's output. * that mainloop.c drops blank lines not inside quotes without even
* echoing them.
*/ */
static void bool
evaluate_backtick(PsqlScanState state) psql_scan_in_quote(PsqlScanState state)
{ {
PQExpBuffer output_buf = state->output_buf; return state->start_state != INITIAL;
char *cmd = output_buf->data + backtick_start_offset;
PQExpBufferData cmd_output;
FILE *fd;
bool error = false;
char buf[512];
size_t result;
initPQExpBuffer(&cmd_output);
fd = popen(cmd, PG_BINARY_R);
if (!fd)
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
}
if (!error)
{
do
{
result = fread(buf, 1, sizeof(buf), fd);
if (ferror(fd))
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
break;
}
appendBinaryPQExpBuffer(&cmd_output, buf, result);
} while (!feof(fd));
}
if (fd && pclose(fd) == -1)
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
}
if (PQExpBufferDataBroken(cmd_output))
{
state->callbacks->write_error("%s: out of memory\n", cmd);
error = true;
}
/* Now done with cmd, delete it from output_buf */
output_buf->len = backtick_start_offset;
output_buf->data[output_buf->len] = '\0';
/* If no error, transfer result to output_buf */
if (!error)
{
/* strip any trailing newline */
if (cmd_output.len > 0 &&
cmd_output.data[cmd_output.len - 1] == '\n')
cmd_output.len--;
appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
}
termPQExpBuffer(&cmd_output);
} }
/* /*
...@@ -1846,8 +1181,9 @@ evaluate_backtick(PsqlScanState state) ...@@ -1846,8 +1181,9 @@ evaluate_backtick(PsqlScanState state)
* *
* NOTE SIDE EFFECT: the new buffer is made the active flex input buffer. * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
*/ */
static void void
push_new_buffer(PsqlScanState state, const char *newstr, const char *varname) psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
const char *varname)
{ {
StackElem *stackelem; StackElem *stackelem;
...@@ -1855,12 +1191,12 @@ push_new_buffer(PsqlScanState state, const char *newstr, const char *varname) ...@@ -1855,12 +1191,12 @@ push_new_buffer(PsqlScanState state, const char *newstr, const char *varname)
/* /*
* In current usage, the passed varname points at the current flex * In current usage, the passed varname points at the current flex
* input buffer; we must copy it before calling prepare_buffer() * input buffer; we must copy it before calling psqlscan_prepare_buffer()
* because that will change the buffer state. * because that will change the buffer state.
*/ */
stackelem->varname = varname ? pg_strdup(varname) : NULL; stackelem->varname = varname ? pg_strdup(varname) : NULL;
stackelem->buf = prepare_buffer(state, newstr, strlen(newstr), stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
&stackelem->bufstring); &stackelem->bufstring);
state->curline = stackelem->bufstring; state->curline = stackelem->bufstring;
if (state->safe_encoding) if (state->safe_encoding)
...@@ -1882,9 +1218,10 @@ push_new_buffer(PsqlScanState state, const char *newstr, const char *varname) ...@@ -1882,9 +1218,10 @@ push_new_buffer(PsqlScanState state, const char *newstr, const char *varname)
* *
* NB: after this, the flex input state is unspecified; caller must * NB: after this, the flex input state is unspecified; caller must
* switch to an appropriate buffer to continue lexing. * switch to an appropriate buffer to continue lexing.
* See psqlscan_select_top_buffer().
*/ */
static void void
pop_buffer_stack(PsqlScanState state) psqlscan_pop_buffer_stack(PsqlScanState state)
{ {
StackElem *stackelem = state->buffer_stack; StackElem *stackelem = state->buffer_stack;
...@@ -1898,6 +1235,28 @@ pop_buffer_stack(PsqlScanState state) ...@@ -1898,6 +1235,28 @@ pop_buffer_stack(PsqlScanState state)
free(stackelem); free(stackelem);
} }
/*
* Select the topmost surviving buffer as the active input.
*/
void
psqlscan_select_top_buffer(PsqlScanState state)
{
StackElem *stackelem = state->buffer_stack;
if (stackelem != NULL)
{
yy_switch_to_buffer(stackelem->buf, state->scanner);
state->curline = stackelem->bufstring;
state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
}
else
{
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
state->curline = state->scanbuf;
state->refline = state->scanline;
}
}
/* /*
* Check if specified variable name is the source for any string * Check if specified variable name is the source for any string
* currently being scanned * currently being scanned
...@@ -1924,8 +1283,9 @@ var_is_current_source(PsqlScanState state, const char *varname) ...@@ -1924,8 +1283,9 @@ var_is_current_source(PsqlScanState state, const char *varname)
* *
* NOTE SIDE EFFECT: the new buffer is made the active flex input buffer. * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
*/ */
static YY_BUFFER_STATE YY_BUFFER_STATE
prepare_buffer(PsqlScanState state, const char *txt, int len, char **txtcopy) psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
char **txtcopy)
{ {
char *newtxt; char *newtxt;
...@@ -1957,15 +1317,15 @@ prepare_buffer(PsqlScanState state, const char *txt, int len, char **txtcopy) ...@@ -1957,15 +1317,15 @@ prepare_buffer(PsqlScanState state, const char *txt, int len, char **txtcopy)
} }
/* /*
* emit() --- body for ECHO macro * psqlscan_emit() --- body for ECHO macro
* *
* NB: this must be used for ALL and ONLY the text copied from the flex * NB: this must be used for ALL and ONLY the text copied from the flex
* input data. If you pass it something that is not part of the yytext * input data. If you pass it something that is not part of the yytext
* string, you are making a mistake. Internally generated text can be * string, you are making a mistake. Internally generated text can be
* appended directly to output_buf. * appended directly to state->output_buf.
*/ */
static void void
emit(PsqlScanState state, const char *txt, int len) psqlscan_emit(PsqlScanState state, const char *txt, int len)
{ {
PQExpBuffer output_buf = state->output_buf; PQExpBuffer output_buf = state->output_buf;
...@@ -1991,13 +1351,13 @@ emit(PsqlScanState state, const char *txt, int len) ...@@ -1991,13 +1351,13 @@ emit(PsqlScanState state, const char *txt, int len)
} }
/* /*
* extract_substring --- fetch the true value of (part of) the current token * psqlscan_extract_substring --- fetch value of (part of) the current token
* *
* This is like emit(), except that the data is returned as a malloc'd string * This is like psqlscan_emit(), except that the data is returned as a
* rather than being pushed directly to output_buf. * malloc'd string rather than being pushed directly to state->output_buf.
*/ */
static char * char *
extract_substring(PsqlScanState state, const char *txt, int len) psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
{ {
char *result = (char *) pg_malloc(len + 1); char *result = (char *) pg_malloc(len + 1);
...@@ -2025,21 +1385,22 @@ extract_substring(PsqlScanState state, const char *txt, int len) ...@@ -2025,21 +1385,22 @@ extract_substring(PsqlScanState state, const char *txt, int len)
} }
/* /*
* escape_variable --- process :'VARIABLE' or :"VARIABLE" * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
* *
* If the variable name is found, escape its value using the appropriate * If the variable name is found, escape its value using the appropriate
* quoting method and emit the value to output_buf. (Since the result is * quoting method and emit the value to output_buf. (Since the result is
* surely quoted, there is never any reason to rescan it.) If we don't * surely quoted, there is never any reason to rescan it.) If we don't
* find the variable or escaping fails, emit the token as-is. * find the variable or escaping fails, emit the token as-is.
*/ */
static void void
escape_variable(PsqlScanState state, const char *txt, int len, bool as_ident) psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
bool as_ident)
{ {
char *varname; char *varname;
char *value; char *value;
/* Variable lookup. */ /* Variable lookup. */
varname = extract_substring(state, txt + 2, len - 3); varname = psqlscan_extract_substring(state, txt + 2, len - 3);
if (state->callbacks->get_variable) if (state->callbacks->get_variable)
value = state->callbacks->get_variable(varname, true, as_ident); value = state->callbacks->get_variable(varname, true, as_ident);
else else
...@@ -2055,6 +1416,6 @@ escape_variable(PsqlScanState state, const char *txt, int len, bool as_ident) ...@@ -2055,6 +1416,6 @@ escape_variable(PsqlScanState state, const char *txt, int len, bool as_ident)
else else
{ {
/* Emit original token as-is */ /* Emit original token as-is */
emit(state, txt, len); psqlscan_emit(state, txt, len);
} }
} }
/*
* psqlscan_int.h
* lexical scanner internal declarations
*
* This file declares the PsqlScanStateData structure used by psqlscan.l
* and shared by other lexers compatible with it, such as psqlscanslash.l.
*
* One difficult aspect of this code is that we need to work in multibyte
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
* all our lexing rules treat all high-bit-set characters alike, we don't
* really need to care whether such a byte is part of a sequence or not.
* In an "unsafe" encoding, we still expect the first byte of a multibyte
* sequence to be >= 0x80, but later bytes might not be. If we scan such
* a sequence as-is, the lexing rules could easily be fooled into matching
* such bytes to ordinary ASCII characters. Our solution for this is to
* substitute 0xFF for each non-first byte within the data presented to flex.
* The flex rules will then pass the FF's through unmolested. The
* psqlscan_emit() subroutine is responsible for looking back to the original
* string and replacing FF's with the corresponding original bytes.
*
* Another interesting thing we do here is scan different parts of the same
* input with physically separate flex lexers (ie, lexers written in separate
* .l files). We can get away with this because the only part of the
* persistent state of a flex lexer that depends on its parsing rule tables
* is the start state number, which is easy enough to manage --- usually,
* in fact, we just need to set it to INITIAL when changing lexers. But to
* make that work at all, we must use re-entrant lexers, so that all the
* relevant state is in the yyscanner_t attached to the PsqlScanState;
* if we were using lexers with separate static state we would soon end up
* with dangling buffer pointers in one or the other. Also note that this
* is unlikely to work very nicely if the lexers aren't all built with the
* same flex version.
*
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
*
* src/bin/psql/psqlscan_int.h
*/
#ifndef PSQLSCAN_INT_H
#define PSQLSCAN_INT_H
#include "psqlscan.h"
/* This is just to allow this file to be compilable standalone */
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
#define YY_TYPEDEF_YY_BUFFER_STATE
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
/*
* We use a stack of flex buffers to handle substitution of psql variables.
* Each stacked buffer contains the as-yet-unread text from one psql variable.
* When we pop the stack all the way, we resume reading from the outer buffer
* identified by scanbufhandle.
*/
typedef struct StackElem
{
YY_BUFFER_STATE buf; /* flex input control structure */
char *bufstring; /* data actually being scanned by flex */
char *origstring; /* copy of original data, if needed */
char *varname; /* name of variable providing data, or NULL */
struct StackElem *next;
} StackElem;
/*
* All working state of the lexer must be stored in PsqlScanStateData
* between calls. This allows us to have multiple open lexer operations,
* which is needed for nested include files. The lexer itself is not
* recursive, but it must be re-entrant.
*/
typedef struct PsqlScanStateData
{
yyscan_t scanner; /* Flex's state for this PsqlScanState */
PQExpBuffer output_buf; /* current output buffer */
StackElem *buffer_stack; /* stack of variable expansion buffers */
/*
* These variables always refer to the outer buffer, never to any stacked
* variable-expansion buffer.
*/
YY_BUFFER_STATE scanbufhandle;
char *scanbuf; /* start of outer-level input buffer */
const char *scanline; /* current input line at outer level */
/* safe_encoding, curline, refline are used by emit() to replace FFs */
int encoding; /* encoding being used now */
bool safe_encoding; /* is current encoding "safe"? */
bool std_strings; /* are string literals standard? */
const char *curline; /* actual flex input string for cur buf */
const char *refline; /* original data for cur buffer */
/*
* All this state lives across successive input lines, until explicitly
* reset by psql_scan_reset. start_state is adopted by yylex() on entry,
* and updated with its finishing state on exit.
*/
int start_state; /* yylex's starting/finishing state */
int paren_depth; /* depth of nesting in parentheses */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
/*
* Callback functions provided by the program making use of the lexer.
*/
const PsqlScanCallbacks *callbacks;
} PsqlScanStateData;
/*
* Functions exported by psqlscan.l, but only meant for use within
* compatible lexers.
*/
extern void psqlscan_push_new_buffer(PsqlScanState state,
const char *newstr, const char *varname);
extern void psqlscan_pop_buffer_stack(PsqlScanState state);
extern void psqlscan_select_top_buffer(PsqlScanState state);
extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
const char *txt, int len,
char **txtcopy);
extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
extern char *psqlscan_extract_substring(PsqlScanState state,
const char *txt, int len);
extern void psqlscan_escape_variable(PsqlScanState state,
const char *txt, int len,
bool as_ident);
#endif /* PSQLSCAN_INT_H */
/*
* psql - the PostgreSQL interactive terminal
*
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
*
* src/bin/psql/psqlscanslash.h
*/
#ifndef PSQLSCANSLASH_H
#define PSQLSCANSLASH_H
#include "psqlscan.h"
/* Different ways for scan_slash_option to handle parameter words */
enum slash_option_type
{
OT_NORMAL, /* normal case */
OT_SQLID, /* treat as SQL identifier */
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
OT_FILEPIPE, /* it's a filename or pipe */
OT_WHOLE_LINE, /* just snarf the rest of the line */
OT_NO_EVAL /* no expansion of backticks or variables */
};
extern char *psql_scan_slash_command(PsqlScanState state);
extern char *psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon);
extern void psql_scan_slash_command_end(PsqlScanState state);
#endif /* PSQLSCANSLASH_H */
%top{
/*-------------------------------------------------------------------------
*
* psqlscanslash.l
* lexical scanner for psql backslash commands
*
* XXX Avoid creating backtracking cases --- see the backend lexer for info.
*
* See psqlscan_int.h for additional commentary.
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/psql/psqlscanslash.l
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include "psqlscanslash.h"
#include "libpq-fe.h"
}
%{
#include "psqlscan_int.h"
/*
* Set the type of yyextra; we use it as a pointer back to the containing
* PsqlScanState.
*/
#define YY_EXTRA_TYPE PsqlScanState
/*
* These variables do not need to be saved across calls. Yeah, it's a bit
* of a hack, but putting them into PsqlScanStateData would be klugy too.
*/
static enum slash_option_type option_type;
static char *option_quote;
static int unquoted_option_chars;
static int backtick_start_offset;
/* Return values from yylex() */
#define LEXRES_EOL 0 /* end of input */
#define LEXRES_OK 1 /* OK completion of backslash argument */
static void evaluate_backtick(PsqlScanState state);
#define ECHO psqlscan_emit(cur_state, yytext, yyleng)
/*
* Work around a bug in flex 2.5.35: it emits a couple of functions that
* it forgets to emit declarations for. Since we use -Wmissing-prototypes,
* this would cause warnings. Providing our own declarations should be
* harmless even when the bug gets fixed.
*/
extern int slash_yyget_column(yyscan_t yyscanner);
extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
%}
%option reentrant
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="slash_yy"
/*
* OK, here is a short description of lex/flex rules behavior.
* The longest pattern which matches an input string is always chosen.
* For equal-length patterns, the first occurring in the rules list is chosen.
* INITIAL is the starting state, to which all non-conditional rules apply.
* Exclusive states change parsing rules while the state is active. When in
* an exclusive state, only those rules defined for that state apply.
*/
/* Exclusive states for lexing backslash commands */
%x xslashcmd
%x xslashargstart
%x xslasharg
%x xslashquote
%x xslashbackquote
%x xslashdquote
%x xslashwholeline
%x xslashend
/*
* Assorted character class definitions that should match psqlscan.l.
*/
space [ \t\n\r\f]
quote '
xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2}
xqdouble {quote}{quote}
dquote \"
variable_char [A-Za-z\200-\377_0-9]
other .
%%
%{
/* Declare some local variables inside yylex(), for convenience */
PsqlScanState cur_state = yyextra;
PQExpBuffer output_buf = cur_state->output_buf;
/*
* Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a
* new starting state without ugly direct access to flex variables,
* and it allows us to transition from one flex lexer to another
* so that we can lex different parts of the source string using
* separate lexers.
*/
BEGIN(cur_state->start_state);
%}
/*
* We don't really expect to be invoked in the INITIAL state in this
* lexer; but if we are, just spit data to the output_buf until EOF.
*/
{other}|\n { ECHO; }
/*
* Exclusive lexer states to handle backslash command lexing
*/
<xslashcmd>{
/* command name ends at whitespace or backslash; eat all else */
{space}|"\\" {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other} { ECHO; }
}
<xslashargstart>{
/*
* Discard any whitespace before argument, then go to xslasharg state.
* An exception is that "|" is only special at start of argument, so we
* check for it here.
*/
{space}+ { }
"|" {
if (option_type == OT_FILEPIPE)
{
/* treat like whole-string case */
ECHO;
BEGIN(xslashwholeline);
}
else
{
/* vertical bar is not special otherwise */
yyless(0);
BEGIN(xslasharg);
}
}
{other} {
yyless(0);
BEGIN(xslasharg);
}
}
<xslasharg>{
/*
* Default processing of text in a slash command's argument.
*
* Note: unquoted_option_chars counts the number of characters at the
* end of the argument that were not subject to any form of quoting.
* psql_scan_slash_option needs this to strip trailing semicolons safely.
*/
{space}|"\\" {
/*
* Unquoted space is end of arg; do not eat. Likewise
* backslash is end of command or next command, do not eat
*
* XXX this means we can't conveniently accept options
* that include unquoted backslashes; therefore, option
* processing that encourages use of backslashes is rather
* broken.
*/
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{quote} {
*option_quote = '\'';
unquoted_option_chars = 0;
BEGIN(xslashquote);
}
"`" {
backtick_start_offset = output_buf->len;
*option_quote = '`';
unquoted_option_chars = 0;
BEGIN(xslashbackquote);
}
{dquote} {
ECHO;
*option_quote = '"';
unquoted_option_chars = 0;
BEGIN(xslashdquote);
}
:{variable_char}+ {
/* Possible psql variable substitution */
if (option_type == OT_NO_EVAL ||
cur_state->callbacks->get_variable == NULL)
ECHO;
else
{
char *varname;
char *value;
varname = psqlscan_extract_substring(cur_state,
yytext + 1,
yyleng - 1);
value = cur_state->callbacks->get_variable(varname,
false,
false);
free(varname);
/*
* The variable value is just emitted without any
* further examination. This is consistent with the
* pre-8.0 code behavior, if not with the way that
* variables are handled outside backslash commands.
* Note that we needn't guard against recursion here.
*/
if (value)
{
appendPQExpBufferStr(output_buf, value);
free(value);
}
else
ECHO;
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}+' {
if (option_type == OT_NO_EVAL)
ECHO;
else
{
psqlscan_escape_variable(cur_state, yytext, yyleng, false);
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:\"{variable_char}+\" {
if (option_type == OT_NO_EVAL)
ECHO;
else
{
psqlscan_escape_variable(cur_state, yytext, yyleng, true);
*option_quote = ':';
}
unquoted_option_chars = 0;
}
:'{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
:\"{variable_char}* {
/* Throw back everything but the colon */
yyless(1);
unquoted_option_chars++;
ECHO;
}
{other} {
unquoted_option_chars++;
ECHO;
}
}
<xslashquote>{
/*
* single-quoted text: copy literally except for '' and backslash
* sequences
*/
{quote} { BEGIN(xslasharg); }
{xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
{xeoctesc} {
/* octal case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 1, NULL, 8));
}
{xehexesc} {
/* hex case */
appendPQExpBufferChar(output_buf,
(char) strtol(yytext + 2, NULL, 16));
}
"\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
{other}|\n { ECHO; }
}
<xslashbackquote>{
/*
* backticked text: copy everything until next backquote, then evaluate.
*
* XXX Possible future behavioral change: substitute for :VARIABLE?
*/
"`" {
/* In NO_EVAL mode, don't evaluate the command */
if (option_type != OT_NO_EVAL)
evaluate_backtick(cur_state);
BEGIN(xslasharg);
}
{other}|\n { ECHO; }
}
<xslashdquote>{
/* double-quoted text: copy verbatim, including the double quotes */
{dquote} {
ECHO;
BEGIN(xslasharg);
}
{other}|\n { ECHO; }
}
<xslashwholeline>{
/* copy everything until end of input line */
/* but suppress leading whitespace */
{space}+ {
if (output_buf->len > 0)
ECHO;
}
{other} { ECHO; }
}
<xslashend>{
/* at end of command, eat a double backslash, but not anything else */
"\\\\" {
cur_state->start_state = YY_START;
return LEXRES_OK;
}
{other}|\n {
yyless(0);
cur_state->start_state = YY_START;
return LEXRES_OK;
}
}
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> {
if (cur_state->buffer_stack == NULL)
{
cur_state->start_state = YY_START;
return LEXRES_EOL; /* end of input reached */
}
/*
* We were expanding a variable, so pop the inclusion
* stack and keep lexing
*/
psqlscan_pop_buffer_stack(cur_state);
psqlscan_select_top_buffer(cur_state);
}
%%
/*
* Scan the command name of a psql backslash command. This should be called
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
* has been consumed through the leading backslash.
*
* The return value is a malloc'd copy of the command name, as parsed off
* from the input.
*/
char *
psql_scan_slash_command(PsqlScanState state)
{
PQExpBufferData mybuf;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/*
* Set lexer start state. Note that this is sufficient to switch
* state->scanner over to using the tables in this lexer file.
*/
state->start_state = xslashcmd;
/* And lex. */
yylex(state->scanner);
/* There are no possible errors in this lex state... */
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
return mybuf.data;
}
/*
* Parse off the next argument for a backslash command, and return it as a
* malloc'd string. If there are no more arguments, returns NULL.
*
* type tells what processing, if any, to perform on the option string;
* for example, if it's a SQL identifier, we want to downcase any unquoted
* letters.
*
* if quote is not NULL, *quote is set to 0 if no quoting was found, else
* the last quote symbol used in the argument.
*
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise
* be taken as part of the option string will be stripped.
*
* NOTE: the only possible syntax errors for backslash options are unmatched
* quotes, which are detected when we run out of input. Therefore, on a
* syntax error we just throw away the string and return NULL; there is no
* need to worry about flushing remaining input.
*/
char *
psql_scan_slash_option(PsqlScanState state,
enum slash_option_type type,
char *quote,
bool semicolon)
{
PQExpBufferData mybuf;
int lexresult PG_USED_FOR_ASSERTS_ONLY;
int final_state;
char local_quote;
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
if (quote == NULL)
quote = &local_quote;
*quote = 0;
/* Build a local buffer that we'll return the data of */
initPQExpBuffer(&mybuf);
/* Set up static variables that will be used by yylex */
option_type = type;
option_quote = quote;
unquoted_option_chars = 0;
/* Set current output target */
state->output_buf = &mybuf;
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
if (type == OT_WHOLE_LINE)
state->start_state = xslashwholeline;
else
state->start_state = xslashargstart;
/* And lex. */
lexresult = yylex(state->scanner);
/* Save final state for a moment... */
final_state = state->start_state;
/*
* In case the caller returns to using the regular SQL lexer, reselect the
* appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
/*
* Check the lex result: we should have gotten back either LEXRES_OK
* or LEXRES_EOL (the latter indicating end of string). If we were inside
* a quoted string, as indicated by final_state, EOL is an error.
*/
Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
switch (final_state)
{
case xslashargstart:
/* empty arg */
break;
case xslasharg:
/* Strip any unquoted trailing semi-colons if requested */
if (semicolon)
{
while (unquoted_option_chars-- > 0 &&
mybuf.len > 0 &&
mybuf.data[mybuf.len - 1] == ';')
{
mybuf.data[--mybuf.len] = '\0';
}
}
/*
* If SQL identifier processing was requested, then we strip out
* excess double quotes and downcase unquoted letters.
* Doubled double-quotes become output double-quotes, per spec.
*
* Note that a string like FOO"BAR"BAZ will be converted to
* fooBARbaz; this is somewhat inconsistent with the SQL spec,
* which would have us parse it as several identifiers. But
* for psql's purposes, we want a string like "foo"."bar" to
* be treated as one option, so there's little choice.
*/
if (type == OT_SQLID || type == OT_SQLIDHACK)
{
bool inquotes = false;
char *cp = mybuf.data;
while (*cp)
{
if (*cp == '"')
{
if (inquotes && cp[1] == '"')
{
/* Keep the first quote, remove the second */
cp++;
}
inquotes = !inquotes;
/* Collapse out quote at *cp */
memmove(cp, cp + 1, strlen(cp));
mybuf.len--;
/* do not advance cp */
}
else
{
if (!inquotes && type == OT_SQLID)
*cp = pg_tolower((unsigned char) *cp);
cp += PQmblen(cp, state->encoding);
}
}
}
break;
case xslashquote:
case xslashbackquote:
case xslashdquote:
/* must have hit EOL inside quotes */
state->callbacks->write_error("unterminated quoted string\n");
termPQExpBuffer(&mybuf);
return NULL;
case xslashwholeline:
/* always okay */
break;
default:
/* can't get here */
fprintf(stderr, "invalid YY_START\n");
exit(1);
}
/*
* An unquoted empty argument isn't possible unless we are at end of
* command. Return NULL instead.
*/
if (mybuf.len == 0 && *quote == 0)
{
termPQExpBuffer(&mybuf);
return NULL;
}
/* Else return the completed string. */
return mybuf.data;
}
/*
* Eat up any unused \\ to complete a backslash command.
*/
void
psql_scan_slash_command_end(PsqlScanState state)
{
/* Must be scanning already */
Assert(state->scanbufhandle != NULL);
/* Set current output target */
state->output_buf = NULL; /* we won't output anything */
/* Set input source */
if (state->buffer_stack != NULL)
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
else
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
/* Set lexer start state */
state->start_state = xslashend;
/* And lex. */
yylex(state->scanner);
/* There are no possible errors in this lex state... */
/*
* We expect the caller to return to using the regular SQL lexer, so
* reselect the appropriate initial state.
*/
psql_scan_reselect_sql_lexer(state);
}
/*
* Evaluate a backticked substring of a slash command's argument.
*
* The portion of output_buf starting at backtick_start_offset is evaluated
* as a shell command and then replaced by the command's output.
*/
static void
evaluate_backtick(PsqlScanState state)
{
PQExpBuffer output_buf = state->output_buf;
char *cmd = output_buf->data + backtick_start_offset;
PQExpBufferData cmd_output;
FILE *fd;
bool error = false;
char buf[512];
size_t result;
initPQExpBuffer(&cmd_output);
fd = popen(cmd, PG_BINARY_R);
if (!fd)
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
}
if (!error)
{
do
{
result = fread(buf, 1, sizeof(buf), fd);
if (ferror(fd))
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
break;
}
appendBinaryPQExpBuffer(&cmd_output, buf, result);
} while (!feof(fd));
}
if (fd && pclose(fd) == -1)
{
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
error = true;
}
if (PQExpBufferDataBroken(cmd_output))
{
state->callbacks->write_error("%s: out of memory\n", cmd);
error = true;
}
/* Now done with cmd, delete it from output_buf */
output_buf->len = backtick_start_offset;
output_buf->data[output_buf->len] = '\0';
/* If no error, transfer result to output_buf */
if (!error)
{
/* strip any trailing newline */
if (cmd_output.len > 0 &&
cmd_output.data[cmd_output.len - 1] == '\n')
cmd_output.len--;
appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
}
termPQExpBuffer(&cmd_output);
}
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
* *
* We allow any non-ASCII character, as well as ASCII letters, digits, and * We allow any non-ASCII character, as well as ASCII letters, digits, and
* underscore. Keep this in sync with the definition of variable_char in * underscore. Keep this in sync with the definition of variable_char in
* psqlscan.l. * psqlscan.l and psqlscanslash.l.
*/ */
static bool static bool
valid_variable_name(const char *name) valid_variable_name(const char *name)
......
...@@ -64,7 +64,7 @@ my $frontend_extraincludes = { ...@@ -64,7 +64,7 @@ my $frontend_extraincludes = {
'initdb' => ['src/timezone'], 'initdb' => ['src/timezone'],
'psql' => [ 'src/bin/pg_dump', 'src/backend' ] }; 'psql' => [ 'src/bin/pg_dump', 'src/backend' ] };
my $frontend_extrasource = { my $frontend_extrasource = {
'psql' => ['src/bin/psql/psqlscan.l'], 'psql' => ['src/bin/psql/psqlscan.l', 'src/bin/psql/psqlscanslash.l'],
'pgbench' => 'pgbench' =>
[ 'src/bin/pgbench/exprscan.l', 'src/bin/pgbench/exprparse.y' ], }; [ 'src/bin/pgbench/exprscan.l', 'src/bin/pgbench/exprparse.y' ], };
my @frontend_excludes = ( my @frontend_excludes = (
......
...@@ -76,6 +76,7 @@ if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.c del /q src\pl\plpgsql\src\pl_ ...@@ -76,6 +76,7 @@ if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.c del /q src\pl\plpgsql\src\pl_
if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.h del /q src\pl\plpgsql\src\pl_gram.h if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.h del /q src\pl\plpgsql\src\pl_gram.h
if %DIST%==1 if exist src\bin\psql\psqlscan.c del /q src\bin\psql\psqlscan.c if %DIST%==1 if exist src\bin\psql\psqlscan.c del /q src\bin\psql\psqlscan.c
if %DIST%==1 if exist src\bin\psql\psqlscanslash.c del /q src\bin\psql\psqlscanslash.c
if %DIST%==1 if exist contrib\cube\cubescan.c del /q contrib\cube\cubescan.c if %DIST%==1 if exist contrib\cube\cubescan.c del /q contrib\cube\cubescan.c
if %DIST%==1 if exist contrib\cube\cubeparse.c del /q contrib\cube\cubeparse.c if %DIST%==1 if exist contrib\cube\cubeparse.c del /q contrib\cube\cubeparse.c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment