Commit f945f461 authored by Tom Lane's avatar Tom Lane

Modify lexing of multi-char operators per pghackers discussion around

16-Mar-00: trailing + or - is not part of the operator unless the operator
also contains characters not present in SQL92-defined operators.  This
solves the 'X=-Y' problem without unduly constraining users' choice of
operator names --- in particular, no existing Postgres operator names
become invalid.

Also, remove processing of // comments, as agreed in the same thread.
parent 2b23e864
<!-- <!--
$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.11 1999/07/22 15:09:08 thomas Exp $ $Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.12 2000/03/18 18:03:12 tgl Exp $
Postgres documentation Postgres documentation
--> -->
...@@ -60,8 +60,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class ...@@ -60,8 +60,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
<term><replaceable class="parameter">type1</replaceable></term> <term><replaceable class="parameter">type1</replaceable></term>
<listitem> <listitem>
<para> <para>
The type for the left-hand side of the operator, if any. This option would be The type of the left-hand argument of the operator, if any.
omitted for a right-unary operator. This option would be omitted for a left-unary operator.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -69,8 +69,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class ...@@ -69,8 +69,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
<term><replaceable class="parameter">type2</replaceable></term> <term><replaceable class="parameter">type2</replaceable></term>
<listitem> <listitem>
<para> <para>
The type for the right-hand side of the operator, if any. This option would be The type of the right-hand argument of the operator, if any.
omitted for a left-unary operator. This option would be omitted for a right-unary operator.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -78,7 +78,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class ...@@ -78,7 +78,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
<term><replaceable class="parameter">com_op</replaceable></term> <term><replaceable class="parameter">com_op</replaceable></term>
<listitem> <listitem>
<para> <para>
The commutator for this operator. The commutator of this operator.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -110,7 +110,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class ...@@ -110,7 +110,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
<term>HASHES</term> <term>HASHES</term>
<listitem> <listitem>
<para> <para>
Indicates this operator can support a hash-join algorithm. Indicates this operator can support a hash join.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -118,7 +118,8 @@ Indicates this operator can support a hash-join algorithm. ...@@ -118,7 +118,8 @@ Indicates this operator can support a hash-join algorithm.
<term><replaceable class="parameter">left_sort_op</replaceable></term> <term><replaceable class="parameter">left_sort_op</replaceable></term>
<listitem> <listitem>
<para> <para>
Operator that sorts the left-hand data type of this operator. If this operator can support a merge join, the
operator that sorts the left-hand data type of this operator.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -126,7 +127,8 @@ Indicates this operator can support a hash-join algorithm. ...@@ -126,7 +127,8 @@ Indicates this operator can support a hash-join algorithm.
<term><replaceable class="parameter">right_sort_op</replaceable></term> <term><replaceable class="parameter">right_sort_op</replaceable></term>
<listitem> <listitem>
<para> <para>
Operator that sorts the right-hand data type of this operator. If this operator can support a merge join, the
operator that sorts the right-hand data type of this operator.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -172,22 +174,56 @@ CREATE ...@@ -172,22 +174,56 @@ CREATE
</para> </para>
<para> <para>
The operator <replaceable class="parameter">name</replaceable> The operator <replaceable class="parameter">name</replaceable>
is a sequence of up to thirty two (32) characters in any combination is a sequence of up to NAMEDATALEN-1 (31 by default) characters
from the following: from the following list:
<literallayout> <literallayout>
+ - * / &lt; &gt; = ~ ! @ # % ^ & | ` ? $ : + - * / &lt; &gt; = ~ ! @ # % ^ &amp; | ` ? $ :
</literallayout> </literallayout>
There are a few restrictions on your choice of name:
<itemizedlist>
<listitem>
<para>
"$" and ":" cannot be defined as single-character operators,
although they can be part of a multi-character operator name.
</para>
</listitem>
<listitem>
<para>
"--" and "/*" cannot appear anywhere in an operator name,
since they will be taken as the start of a comment.
</para>
</listitem>
<listitem>
<para>
A multi-character operator name cannot end in "+" or "-",
unless the name also contains at least one of these characters:
<literallayout>
~ ! @ # % ^ &amp; | ` ? $ :
</literallayout>
For example, <literal>@-</literal> is an allowed operator name,
but <literal>*-</literal> is not.
This restriction allows <productname>Postgres</productname> to
parse SQL-compliant queries without requiring spaces between tokens.
</para>
</listitem>
</itemizedlist>
<note> <note>
<para> <para>
No alphabetic characters are allowed in an operator name. When working with non-SQL-standard operator names, you will usually
This enables <productname>Postgres</productname> to parse SQL input need to separate adjacent operators with spaces to avoid ambiguity.
into tokens without requiring spaces between each token. For example, if you have defined a left-unary operator named "@",
you cannot write <literal>X*@Y</literal>; you must write
<literal>X* @Y</literal> to ensure that
<productname>Postgres</productname> reads it as two operator names
not one.
</para> </para>
</note> </note>
</para> </para>
<para> <para>
The operator "!=" is mapped to "&lt;&gt;" on input, so they are The operator "!=" is mapped to "&lt;&gt;" on input, so these two names
therefore equivalent. are always equivalent.
</para> </para>
<para> <para>
At least one of LEFTARG and RIGHTARG must be defined. For At least one of LEFTARG and RIGHTARG must be defined. For
...@@ -196,11 +232,11 @@ CREATE ...@@ -196,11 +232,11 @@ CREATE
unary operators only RIGHTARG should be defined. unary operators only RIGHTARG should be defined.
</para> </para>
<para> <para>
Also, the The
<replaceable class="parameter">func_name</replaceable> procedure must have <replaceable class="parameter">func_name</replaceable> procedure must have
been previously defined using <command>CREATE FUNCTION</command> and must been previously defined using <command>CREATE FUNCTION</command> and must
be defined to accept the correct number of arguments be defined to accept the correct number of arguments
(either one or two). (either one or two) of the indicated types.
</para> </para>
<para> <para>
The commutator operator should be identified if one exists, The commutator operator should be identified if one exists,
...@@ -247,8 +283,6 @@ MYBOXES.description !== "0,0,1,1"::box ...@@ -247,8 +283,6 @@ MYBOXES.description !== "0,0,1,1"::box
does not yet have a commutator itself, then the commutator's does not yet have a commutator itself, then the commutator's
entry is updated to have the newly created operator as its entry is updated to have the newly created operator as its
commutator. This applies to the negator, as well. commutator. This applies to the negator, as well.
</para>
<para>
This is to allow the definition of two operators that are This is to allow the definition of two operators that are
the commutators or the negators of each other. The first the commutators or the negators of each other. The first
operator should be defined without a commutator or negator operator should be defined without a commutator or negator
...@@ -258,7 +292,7 @@ MYBOXES.description !== "0,0,1,1"::box ...@@ -258,7 +292,7 @@ MYBOXES.description !== "0,0,1,1"::box
it also works to just have both operators refer to each other.) it also works to just have both operators refer to each other.)
</para> </para>
<para> <para>
The next three specifications are present to support the The HASHES, SORT1, and SORT2 options are present to support the
query optimizer in performing joins. query optimizer in performing joins.
<productname>Postgres</productname> can always <productname>Postgres</productname> can always
evaluate a join (i.e., processing a clause with two tuple evaluate a join (i.e., processing a clause with two tuple
...@@ -294,9 +328,8 @@ MYBOXES.description !== "0,0,1,1"::box ...@@ -294,9 +328,8 @@ MYBOXES.description !== "0,0,1,1"::box
be worth the complexity involved. be worth the complexity involved.
</para> </para>
<para> <para>
The last two pieces of the specification are present so The RESTRICT and JOIN options assist the query optimizer in estimating
the query optimizer can estimate result sizes. If a result sizes. If a clause of the form:
clause of the form:
<programlisting> <programlisting>
MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box
</programlisting> </programlisting>
...@@ -310,7 +343,7 @@ MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box ...@@ -310,7 +343,7 @@ MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box
data types and returns a floating point number. The data types and returns a floating point number. The
query optimizer simply calls this function, passing the query optimizer simply calls this function, passing the
parameter "0,0,1,1" and multiplies the result by the relation parameter "0,0,1,1" and multiplies the result by the relation
size to get the desired expected number of instances. size to get the expected number of instances.
</para> </para>
<para> <para>
Similarly, when the operands of the operator both contain Similarly, when the operands of the operator both contain
...@@ -318,7 +351,7 @@ MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box ...@@ -318,7 +351,7 @@ MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box
size of the resulting join. The function join_proc will size of the resulting join. The function join_proc will
return another floating point number which will be multiplied return another floating point number which will be multiplied
by the cardinalities of the two classes involved to by the cardinalities of the two classes involved to
compute the desired expected result size. compute the expected result size.
</para> </para>
<para> <para>
The difference between the function The difference between the function
......
...@@ -315,12 +315,11 @@ UNCOMMITTED UNNAMED ...@@ -315,12 +315,11 @@ UNCOMMITTED UNNAMED
<para> <para>
A <firstterm>comment</firstterm> A <firstterm>comment</firstterm>
is an arbitrary sequence of characters following double dashes up to the end is an arbitrary sequence of characters beginning with double dashes
of the line. We also support double-slashes as comments, e.g.: and extending to the end of the line, e.g.:
<programlisting> <programlisting>
-- This is a standard SQL comment -- This is a standard SQL comment
// And this is another supported comment style, like C++
</programlisting> </programlisting>
We also support C-style block comments, e.g.: We also support C-style block comments, e.g.:
...@@ -331,6 +330,9 @@ We also support C-style block comments, e.g.: ...@@ -331,6 +330,9 @@ We also support C-style block comments, e.g.:
comment comment
*/ */
</programlisting> </programlisting>
A comment beginning with "/*" extends to the first occurrence of "*/".
</para> </para>
</sect1> </sect1>
...@@ -340,17 +342,22 @@ We also support C-style block comments, e.g.: ...@@ -340,17 +342,22 @@ We also support C-style block comments, e.g.:
<para> <para>
Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters, Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters,
starting with an alphabetic character. By default, NAMEDATALEN is set starting with an alphabetic character. By default, NAMEDATALEN is set
to 32, but at the time the system is built, NAMEDATALEN can be changed to 32 (but at the time the system is built, NAMEDATALEN can be changed
by changing the <literal>#define</literal> in by changing the <literal>#define</literal> in
src/backend/include/postgres.h. src/backend/include/postgres.h).
Underscore ("_") is considered an alphabetic character. Underscore ("_") is considered an alphabetic character.
</para> </para>
<para> <para>
In some contexts, names may contain other characters if surrounded Names containing other characters may be formed by surrounding them
by double quotes. For example, table or column names may contain otherwise with double quotes. For example, table or column names may contain
disallowed characters such as spaces, ampersands, etc. using this otherwise disallowed characters such as spaces, ampersands, etc. if
technique. quoted. Quoting a name also makes it case-sensitive,
whereas unquoted names are always folded to lower case. For example,
the names <literal>FOO</literal>, <literal>foo</literal>
and <literal>"foo"</literal> are
considered the same by <productname>Postgres</productname>, but
<literal>"Foo"</literal> is a different name.
</para> </para>
</sect1> </sect1>
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.67 2000/03/13 01:52:06 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.68 2000/03/18 18:03:09 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -87,10 +87,10 @@ static void addlit(char *ytext, int yleng); ...@@ -87,10 +87,10 @@ static void addlit(char *ytext, int yleng);
* and to eliminate parsing troubles for numeric strings. * and to eliminate parsing troubles for numeric strings.
* Exclusive states: * Exclusive states:
* <xb> binary numeric string - thomas 1997-11-16 * <xb> binary numeric string - thomas 1997-11-16
* <xc> extended C-style comments - tgl 1997-07-12 * <xc> extended C-style comments - thomas 1997-07-12
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27 * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
* <xh> hexadecimal numeric string - thomas 1997-11-16 * <xh> hexadecimal numeric string - thomas 1997-11-16
* <xq> quoted strings - tgl 1997-07-30 * <xq> quoted strings - thomas 1997-07-30
*/ */
%x xb %x xb
...@@ -144,7 +144,7 @@ xdinside [^"]+ ...@@ -144,7 +144,7 @@ xdinside [^"]+
* have something like plus-slash-star, lex will think this is a 3-character * have something like plus-slash-star, lex will think this is a 3-character
* operator whereas we want to see it as a + operator and a comment start. * operator whereas we want to see it as a + operator and a comment start.
* The solution is two-fold: * The solution is two-fold:
* 1. append {op_and_self}* to xcstart so that it matches as much text as * 1. append {op_chars}* to xcstart so that it matches as much text as
* {operator} would. Then the tie-breaker (first matching rule of same * {operator} would. Then the tie-breaker (first matching rule of same
* length) ensures xcstart wins. We put back the extra stuff with yyless() * length) ensures xcstart wins. We put back the extra stuff with yyless()
* in case it contains a star-slash that should terminate the comment. * in case it contains a star-slash that should terminate the comment.
...@@ -154,7 +154,7 @@ xdinside [^"]+ ...@@ -154,7 +154,7 @@ xdinside [^"]+
* SQL92-style comments, which start with dash-dash, have similar interactions * SQL92-style comments, which start with dash-dash, have similar interactions
* with the operator rule. * with the operator rule.
*/ */
xcstart \/\*{op_and_self}* xcstart \/\*{op_chars}*
xcstop \*+\/ xcstop \*+\/
xcinside ([^*]+)|(\*+[^/]) xcinside ([^*]+)|(\*+[^/])
...@@ -166,10 +166,19 @@ identifier {letter}{letter_or_digit}* ...@@ -166,10 +166,19 @@ identifier {letter}{letter_or_digit}*
typecast "::" typecast "::"
/* NB: if you change "self", fix the copy in the operator rule too! */ /*
* "self" is the set of chars that should be returned as single-character
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
* which can be one or more characters long (but if a single-char token
* appears in the "self" set, it is not to be returned as an Op). Note
* that the sets overlap, but each has some chars that are not in the other.
*
* If you change either set, adjust the character lists appearing in the
* rule for "operator"!
*/
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|] self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
operator {op_and_self}+ operator {op_chars}+
/* we no longer allow unary minus in numbers. /* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets * instead we pass it separately to parser. there it gets
...@@ -202,7 +211,7 @@ horiz_space [ \t\f] ...@@ -202,7 +211,7 @@ horiz_space [ \t\f]
newline [\n\r] newline [\n\r]
non_newline [^\n\r] non_newline [^\n\r]
comment (("--"|"//"){non_newline}*) comment ("--"{non_newline}*)
whitespace ({space}|{comment}) whitespace ({space}|{comment})
...@@ -220,7 +229,7 @@ other . ...@@ -220,7 +229,7 @@ other .
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION. /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
* AT&T lex does not properly handle C-style comments in this second lex block. * AT&T lex does not properly handle C-style comments in this second lex block.
* So, put comments here. tgl - 1997-09-08 * So, put comments here. thomas - 1997-09-08
* *
* Quoted strings must allow some special characters such as single-quote * Quoted strings must allow some special characters such as single-quote
* and newline. * and newline.
...@@ -329,23 +338,57 @@ other . ...@@ -329,23 +338,57 @@ other .
{self} { return yytext[0]; } {self} { return yytext[0]; }
{operator} { {operator} {
/* Check for embedded slash-star or dash-dash */ /*
char *slashstar = strstr((char*)yytext, "/*"); * Check for embedded slash-star or dash-dash; those
char *dashdash = strstr((char*)yytext, "--"); * are comment starts, so operator must stop there.
* Note that slash-star or dash-dash at the first
* character will match a prior rule, not this one.
*/
int nchars = yyleng;
char *slashstar = strstr((char*)yytext, "/*");
char *dashdash = strstr((char*)yytext, "--");
if (slashstar && dashdash) if (slashstar && dashdash)
{ {
/* if both appear, take the first one */
if (slashstar > dashdash) if (slashstar > dashdash)
slashstar = dashdash; slashstar = dashdash;
} }
else if (!slashstar) else if (!slashstar)
slashstar = dashdash; slashstar = dashdash;
if (slashstar) if (slashstar)
nchars = slashstar - ((char*)yytext);
/*
* For SQL92 compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
* contains chars that are not in SQL92 operators.
* The idea is to lex '=-' as two operators, but not
* to forbid operator names like '?-' that could not be
* sequences of SQL92 operators.
*/
while (nchars > 1 &&
(yytext[nchars-1] == '+' ||
yytext[nchars-1] == '-'))
{
int ic;
for (ic = nchars-2; ic >= 0; ic--)
{
if (strchr("~!@#&`?$:%^|", yytext[ic]))
break;
}
if (ic >= 0)
break; /* found a char that makes it OK */
nchars--; /* else remove the +/-, and check again */
}
if (nchars < yyleng)
{ {
int nchars = slashstar - ((char*)yytext); /* Strip the unwanted chars from the token */
yyless(nchars); yyless(nchars);
/* If what we have left is only one char, and it's /*
* If what we have left is only one char, and it's
* one of the characters matching "self", then * one of the characters matching "self", then
* return it as a character token the same way * return it as a character token the same way
* that the "self" rule would have. * that the "self" rule would have.
...@@ -355,8 +398,9 @@ other . ...@@ -355,8 +398,9 @@ other .
return yytext[0]; return yytext[0];
} }
/* Convert "!=" operator to "<>" for compatibility */
if (strcmp((char*)yytext, "!=") == 0) if (strcmp((char*)yytext, "!=") == 0)
yylval.str = pstrdup("<>"); /* compatibility */ yylval.str = pstrdup("<>");
else else
yylval.str = pstrdup((char*)yytext); yylval.str = pstrdup((char*)yytext);
return Op; return Op;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Copyright 2000 by PostgreSQL Global Development Group * Copyright 2000 by PostgreSQL Global Development Group
* *
* $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.25 2000/03/13 13:46:32 petere Exp $ * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.26 2000/03/18 18:03:11 tgl Exp $
*/ */
#include "postgres.h" #include "postgres.h"
#include "mainloop.h" #include "mainloop.h"
...@@ -318,8 +318,7 @@ MainLoop(FILE *source) ...@@ -318,8 +318,7 @@ MainLoop(FILE *source)
} }
/* single-line comment? truncate line */ /* single-line comment? truncate line */
else if ((line[i] == '-' && line[i + thislen] == '-') || else if (line[i] == '-' && line[i + thislen] == '-')
(line[i] == '/' && line[i + thislen] == '/'))
{ {
line[i] = '\0'; /* remove comment */ line[i] = '\0'; /* remove comment */
break; break;
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.55 2000/03/18 05:44:21 tgl Exp $ * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.56 2000/03/18 18:03:10 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -86,10 +86,10 @@ static struct _if_value { ...@@ -86,10 +86,10 @@ static struct _if_value {
* and to eliminate parsing troubles for numeric strings. * and to eliminate parsing troubles for numeric strings.
* Exclusive states: * Exclusive states:
* <xb> binary numeric string - thomas 1997-11-16 * <xb> binary numeric string - thomas 1997-11-16
* <xc> extended C-style comments - tgl 1997-07-12 * <xc> extended C-style comments - thomas 1997-07-12
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27 * <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
* <xh> hexadecimal numeric string - thomas 1997-11-16 * <xh> hexadecimal numeric string - thomas 1997-11-16
* <xq> quoted strings - tgl 1997-07-30 * <xq> quoted strings - thomas 1997-07-30
*/ */
%x xb %x xb
...@@ -146,14 +146,16 @@ xdcqdq \\\" ...@@ -146,14 +146,16 @@ xdcqdq \\\"
xdcother [^"] xdcother [^"]
xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
/* C-Style Comments /* C-style comments
*
* The "extended comment" syntax closely resembles allowable operator syntax. * The "extended comment" syntax closely resembles allowable operator syntax.
* The tricky part here is to get lex to recognize a string starting with * The tricky part here is to get lex to recognize a string starting with
* slash-star as a comment, when interpreting it as an operator would produce * slash-star as a comment, when interpreting it as an operator would produce
* a longer match --- remember lex will prefer a longer match! Also, if we * a longer match --- remember lex will prefer a longer match! Also, if we
* have tor whereas we want to see it as a + operator and a comment start. * have something like plus-slash-star, lex will think this is a 3-character
* operator whereas we want to see it as a + operator and a comment start.
* The solution is two-fold: * The solution is two-fold:
* 1. append {op_and_self}* to xcstart so that it matches as much text as * 1. append {op_chars}* to xcstart so that it matches as much text as
* {operator} would. Then the tie-breaker (first matching rule of same * {operator} would. Then the tie-breaker (first matching rule of same
* length) ensures xcstart wins. We put back the extra stuff with yyless() * length) ensures xcstart wins. We put back the extra stuff with yyless()
* in case it contains a star-slash that should terminate the comment. * in case it contains a star-slash that should terminate the comment.
...@@ -163,22 +165,31 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother}) ...@@ -163,22 +165,31 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
* SQL92-style comments, which start with dash-dash, have similar interactions * SQL92-style comments, which start with dash-dash, have similar interactions
* with the operator rule. * with the operator rule.
*/ */
xcstart \/\*{op_and_self}* xcstart \/\*{op_chars}*
xcstop \*+\/ xcstop \*+\/
xcinside ([^*]+)|(\*+[^/]) xcinside ([^*]+)|(\*+[^/])
digit [0-9] digit [0-9]
letter [\200-\377_A-Za-z] letter [\200-\377_A-Za-z]
letter_or_digit [\200-\377_A-Za-z0-9] letter_or_digit [\200-\377_A-Za-z0-9]
identifier {letter}{letter_or_digit}* identifier {letter}{letter_or_digit}*
typecast "::" typecast "::"
/* NB: if you change "self", fix the copy in the operator rule too! */ /*
* "self" is the set of chars that should be returned as single-character
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
* which can be one or more characters long (but if a single-char token
* appears in the "self" set, it is not to be returned as an Op). Note
* that the sets overlap, but each has some chars that are not in the other.
*
* If you change either set, adjust the character lists appearing in the
* rule for "operator"!
*/
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|] self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
operator {op_and_self}+ operator {op_chars}+
/* we no longer allow unary minus in numbers. /* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets * instead we pass it separately to parser. there it gets
...@@ -215,7 +226,7 @@ horiz_space [ \t\f] ...@@ -215,7 +226,7 @@ horiz_space [ \t\f]
newline [\n\r] newline [\n\r]
non_newline [^\n\r] non_newline [^\n\r]
comment (("--"|"//"){non_newline}*) comment ("--"{non_newline}*)
whitespace ({space}|{comment}) whitespace ({space}|{comment})
...@@ -250,7 +261,7 @@ cppline {space}*#(.*\\{line_end})*.* ...@@ -250,7 +261,7 @@ cppline {space}*#(.*\\{line_end})*.*
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION. /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
* AT&T lex does not properly handle C-style comments in this second lex block. * AT&T lex does not properly handle C-style comments in this second lex block.
* So, put comments here. tgl - 1997-09-08 * So, put comments here. thomas - 1997-09-08
* *
* Quoted strings must allow some special characters such as single-quote * Quoted strings must allow some special characters such as single-quote
* and newline. * and newline.
...@@ -294,15 +305,16 @@ cppline {space}*#(.*\\{line_end})*.* ...@@ -294,15 +305,16 @@ cppline {space}*#(.*\\{line_end})*.*
mmerror(ET_ERROR, "Bad binary integer input!"); mmerror(ET_ERROR, "Bad binary integer input!");
return ICONST; return ICONST;
} }
<xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); }
<xh>{xhinside} | <xh>{xhinside} |
<xb>{xbinside} { <xb>{xbinside} {
addlit(yytext, yyleng); addlit(yytext, yyleng);
} }
<xh>{xhcat} | <xh>{xhcat} |
<xb>{xbcat} { /* ignore */ <xb>{xbcat} {
/* ignore */
} }
<xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); }
<SQL>{xhstart} { <SQL>{xhstart} {
BEGIN(xh); BEGIN(xh);
...@@ -367,23 +379,57 @@ cppline {space}*#(.*\\{line_end})*.* ...@@ -367,23 +379,57 @@ cppline {space}*#(.*\\{line_end})*.*
return yytext[0]; return yytext[0];
} }
<SQL>{operator} { <SQL>{operator} {
/* Check for embedded slash-star or dash-dash */ /*
char *slashstar = strstr((char*)yytext, "/*"); * Check for embedded slash-star or dash-dash; those
char *dashdash = strstr((char*)yytext, "--"); * are comment starts, so operator must stop there.
* Note that slash-star or dash-dash at the first
* character will match a prior rule, not this one.
*/
int nchars = yyleng;
char *slashstar = strstr((char*)yytext, "/*");
char *dashdash = strstr((char*)yytext, "--");
if (slashstar && dashdash) if (slashstar && dashdash)
{ {
/* if both appear, take the first one */
if (slashstar > dashdash) if (slashstar > dashdash)
slashstar = dashdash; slashstar = dashdash;
} }
else if (!slashstar) else if (!slashstar)
slashstar = dashdash; slashstar = dashdash;
if (slashstar) if (slashstar)
nchars = slashstar - ((char*)yytext);
/*
* For SQL92 compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
* contains chars that are not in SQL92 operators.
* The idea is to lex '=-' as two operators, but not
* to forbid operator names like '?-' that could not be
* sequences of SQL92 operators.
*/
while (nchars > 1 &&
(yytext[nchars-1] == '+' ||
yytext[nchars-1] == '-'))
{
int ic;
for (ic = nchars-2; ic >= 0; ic--)
{
if (strchr("~!@#&`?$:%^|", yytext[ic]))
break;
}
if (ic >= 0)
break; /* found a char that makes it OK */
nchars--; /* else remove the +/-, and check again */
}
if (nchars < yyleng)
{ {
int nchars = slashstar - ((char*)yytext); /* Strip the unwanted chars from the token */
yyless(nchars); yyless(nchars);
/* If what we have left is only one char, and it's /*
* If what we have left is only one char, and it's
* one of the characters matching "self", then * one of the characters matching "self", then
* return it as a character token the same way * return it as a character token the same way
* that the "self" rule would have. * that the "self" rule would have.
...@@ -393,8 +439,9 @@ cppline {space}*#(.*\\{line_end})*.* ...@@ -393,8 +439,9 @@ cppline {space}*#(.*\\{line_end})*.*
return yytext[0]; return yytext[0];
} }
/* Convert "!=" operator to "<>" for compatibility */
if (strcmp((char*)yytext, "!=") == 0) if (strcmp((char*)yytext, "!=") == 0)
yylval.str = mm_strdup("<>"); /* compatability */ yylval.str = mm_strdup("<>");
else else
yylval.str = mm_strdup((char*)yytext); yylval.str = mm_strdup((char*)yytext);
return Op; return Op;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment