scan.l 11.2 KB
Newer Older
1 2 3 4
%{
/*-------------------------------------------------------------------------
 *
 * scan.l--
5
 *	  lexical scanner for POSTGRES
6 7 8 9 10
 *
 * Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
11
 *	  $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.36 1998/02/18 07:22:40 thomas Exp $
12 13 14 15 16 17 18 19 20 21 22
 *
 *-------------------------------------------------------------------------
 */
#include <ctype.h>
#include <unistd.h>
#ifndef __linux__
#include <math.h>
#else
#include <stdlib.h>
#endif /* __linux__ */
#include <string.h>
23
#include <errno.h>
24 25 26 27 28

#include "postgres.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
#include "nodes/parsenodes.h"
Bruce Momjian's avatar
Bruce Momjian committed
29
#include "parser/gramparse.h"
30 31 32
#include "parser/keywords.h"
#include "parser/scansup.h"
#include "parse.h"
33
#include "utils/builtins.h"
34 35

extern char *parseString;
36
static char *parseCh;
37 38 39 40 41 42 43 44 45

/* some versions of lex define this as a macro */
#if defined(yywrap)
#undef yywrap
#endif /* yywrap */

#if defined(FLEX_SCANNER)
/* MAX_PARSE_BUFFER is defined in miscadmin.h */
#define YYLMAX MAX_PARSE_BUFFER
46
#define YY_NO_UNPUT
47 48 49 50 51 52 53 54 55 56 57
extern int myinput(char* buf, int max);
#undef YY_INPUT
#define YY_INPUT(buf,result,max) {result = myinput(buf,max);}
#else
#undef input
int input();
#undef unput
void unput(char);
#endif /* FLEX_SCANNER */

extern YYSTYPE yylval;
58 59 60 61

int llen;
char literal[MAX_PARSE_BUFFER];

62
%}
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
/* OK, here is a short description of lex/flex rules behavior.
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
 * INITIAL is the starting condition, to which all non-conditional rules apply.
 * When in an exclusive condition, only those rules defined for that condition apply.
 *
 * Exclusive states change parsing rules while the state is active.
 * There are exclusive states for quoted strings, extended comments,
 *  and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xb> binary numeric string - thomas 1997-11-16
 *  <xc> extended C-style comments - tgl 1997-07-12
 *  <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
 *  <xh> hexadecimal numeric string - thomas 1997-11-16
 *  <xm> numeric strings with embedded minus sign - tgl 1997-09-05
 *  <xq> quoted strings - tgl 1997-07-30
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * So, when in condition <xc>, only strings which would terminate the
 *  "extended comment" trigger any action other than "ignore".
 * Be sure to match _any_ candidate comment, including those with appended
 *	operator-like symbols. - thomas 1997-07-14
 */
86

87
%x xb
88
%x xc
89
%x xd
90
%x xh
91
%x xm
92
%x xq
93

94 95
/* Binary number
 */
96 97 98 99 100
xbstart			[bB]{quote}
xbstop			{quote}
xbinside		[^']*
xbcat			{quote}{space}*\n{space}*{quote}

101 102
/* Hexadecimal number
 */
103 104 105 106 107
xhstart			[xX]{quote}
xhstop			{quote}
xhinside		[^']*
xhcat			{quote}{space}*\n{space}*{quote}

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
/* Extended quote
 * xqdouble implements SQL92 embedded quote
 * xqcat allows strings to cross input lines
 */
quote			'
xqstart			{quote}
xqstop			{quote}
xqdouble		{quote}{quote}
xqinside		[^\\']*
xqembedded		"\\'"
xqliteral		[\\](.|\n)
xqcat			{quote}{space}*\n{space}*{quote}

/* Delimited quote
 * Allows embedded spaces and other special characters into identifiers.
 */
124 125 126 127 128
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
xdinside		[^"]*

129 130 131
/* Comments
 * Ignored by the scanner and parser.
 */
132 133 134 135 136
xcline			[\/][\*].*[\*][\/]{space}*\n*
xcstart			[\/][\*]{op_and_self}*
xcstop			{op_and_self}*[\*][\/]({space}*|\n)
xcinside		[^*]*
xcstar			[^/]
137

138
digit			[0-9]
139
number			[-+.0-9Ee]
140 141
letter			[\200-\377_A-Za-z]
letter_or_digit	[\200-\377_A-Za-z0-9]
142

143
identifier		{letter}{letter_or_digit}*
144

145
typecast		"::"
146

147 148 149
self			[,()\[\].;$\:\+\-\*\/\<\>\=\|]
op_and_self		[\~\!\@\#\%\^\&\|\`\?\$\:\+\-\*\/\<\>\=]
operator		{op_and_self}+
150 151 152 153

xminteger		{integer}/-
xmreal			{real}/{space}*-{digit}
xmstop			-
154

155 156
integer			-?{digit}+
real			-?{digit}+\.{digit}+([Ee][-+]?{digit}+)?
157

158
param			\${integer}
159

160
comment			("--"|"//").*\n
161

162 163
space			[ \t\n\f]
other			.
164

165 166 167
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
 * AT&T lex does not properly handle C-style comments in this second lex block.
 * So, put comments here. tgl - 1997-09-08
168 169 170 171 172 173 174 175
 *
 * Quoted strings must allow some special characters such as single-quote
 *  and newline.
 * Embedded single-quotes are implemented both in the SQL/92-standard
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 *  of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *  backslash is dropped from the string. - thomas 1997-09-24
176
 */
177

178 179
%%
{comment}		{ /* ignore */ }
180

181
{xcline}		{ /* ignore */ }
182 183

<xc>{xcstar}	|
184
{xcstart}		{ BEGIN(xc); }
185

186
<xc>{xcstop}	{ BEGIN(INITIAL); }
187

188 189
<xc>{xcinside}	{ /* ignore */ }

190 191 192 193 194 195 196 197 198 199 200 201
{xbstart}		{
					BEGIN(xb);
					llen = 0;
					*literal = '\0';
				}
<xb>{xbstop}	{
					char* endptr;

					BEGIN(INITIAL);
					errno = 0;
					yylval.ival = strtol((char *)literal,&endptr,2);
					if (*endptr != '\0' || errno == ERANGE)
202
						elog(ERROR,"Bad binary integer input '%s'",literal);
203 204 205 206 207
					return (ICONST);
				}
<xh>{xhinside}	|
<xb>{xbinside}	{
					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
208
						elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
					memcpy(literal+llen, yytext, yyleng+1);
					llen += yyleng;
				}
<xh>{xhcat}		|
<xb>{xbcat}		{
				}

{xhstart}		{
					BEGIN(xh);
					llen = 0;
					*literal = '\0';
				}
<xh>{xhstop}	{
					char* endptr;

					BEGIN(INITIAL);
					errno = 0;
					yylval.ival = strtol((char *)literal,&endptr,16);
					if (*endptr != '\0' || errno == ERANGE)
228
						elog(ERROR,"Bad hexadecimal integer input '%s'",literal);
229 230 231
					return (ICONST);
				}

232
{xqstart}		{
233 234 235 236
					BEGIN(xq);
					llen = 0;
					*literal = '\0';
				}
237
<xq>{xqstop}	{
238 239 240 241
					BEGIN(INITIAL);
					yylval.str = pstrdup(scanstr(literal));
					return (SCONST);
				}
242 243
<xq>{xqdouble}	|
<xq>{xqinside}	{
244
					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
245
						elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
246 247 248
					memcpy(literal+llen, yytext, yyleng+1);
					llen += yyleng;
				}
249 250
<xq>{xqembedded} {
					if ((llen+yyleng-1) > (MAX_PARSE_BUFFER - 1))
251
						elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
252 253 254 255 256
					memcpy(literal+llen, yytext, yyleng+1);
					*(literal+llen) = '\'';
					llen += yyleng;
				}

257 258
<xq>{xqliteral} {
					if ((llen+yyleng-1) > (MAX_PARSE_BUFFER - 1))
259
						elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
260 261
					memcpy(literal+llen, yytext, yyleng+1);
					llen += yyleng;
262
				}
263 264
<xq>{xqcat}		{
				}
265

266 267 268 269 270 271 272 273 274 275 276 277 278

{xdstart}		{
					BEGIN(xd);
					llen = 0;
					*literal = '\0';
				}
<xd>{xdstop}	{
					BEGIN(INITIAL);
					yylval.str = pstrdup(literal);
					return (IDENT);
				}
<xd>{xdinside}	{
					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
279
						elog(ERROR,"quoted string parse buffer of %d chars exceeded",MAX_PARSE_BUFFER);
280 281 282 283 284
					memcpy(literal+llen, yytext, yyleng+1);
					llen += yyleng;
				}


285 286 287 288 289 290
<xm>{space}*	{ /* ignore */ }
<xm>{xmstop}	{
					BEGIN(INITIAL);
					return (yytext[0]);
				}

291

292 293
{typecast}		{ return TYPECAST; }

294
{self}/-[\.0-9]	{
295 296 297
					return (yytext[0]);
				}
{self}			{ 	return (yytext[0]); }
298
{operator}/-[\.0-9]	{
299
					yylval.str = pstrdup((char*)yytext);
300
					return (Op);
301 302 303 304 305 306 307 308 309 310 311 312
				}
{operator}		{
					if (strcmp((char*)yytext,"!=") == 0)
						yylval.str = pstrdup("<>"); /* compatability */
					else
						yylval.str = pstrdup((char*)yytext);
					return (Op);
				}
{param}			{
					yylval.ival = atoi((char*)&yytext[1]);
					return (PARAM);
				}
313

314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
{identifier}/{space}*-{number}	{
					int i;
					ScanKeyword		*keyword;

					BEGIN(xm);
					for(i = 0; yytext[i]; i++)
						if (isupper(yytext[i]))
							yytext[i] = tolower(yytext[i]);

					keyword = ScanKeywordLookup((char*)yytext);
					if (keyword != NULL) {
						return (keyword->value);
					}
					else
					{
						yylval.str = pstrdup((char*)yytext);
						return (IDENT);
					}
				}
333
{integer}/{space}*-{number}	{
334 335
					char* endptr;

336
					BEGIN(xm);
337 338 339
					errno = 0;
					yylval.ival = strtol((char *)yytext,&endptr,10);
					if (*endptr != '\0' || errno == ERANGE)
340 341 342 343 344 345 346 347 348
					{
						errno = 0;
						yylval.dval = strtod(((char *)yytext),&endptr);
						if (*endptr != '\0' || errno == ERANGE)
							elog(ERROR,"Bad integer input '%s'",yytext);
						CheckFloat8Val(yylval.dval);
						elog(NOTICE,"Integer input '%s' is out of range; promoted to float", yytext);
						return (FCONST);
					}
349 350 351 352
					return (ICONST);
				}
{real}/{space}*-{number} {
					char* endptr;
353

354 355 356 357
					BEGIN(xm);
					errno = 0;
					yylval.dval = strtod(((char *)yytext),&endptr);
					if (*endptr != '\0' || errno == ERANGE)
358
						elog(ERROR,"Bad float8 input '%s'",yytext);
359 360 361
					CheckFloat8Val(yylval.dval);
					return (FCONST);
				}
362
{integer}		{
363 364 365 366 367
					char* endptr;

					errno = 0;
					yylval.ival = strtol((char *)yytext,&endptr,10);
					if (*endptr != '\0' || errno == ERANGE)
368 369 370 371 372 373 374 375 376
					{
						errno = 0;
						yylval.dval = strtod(((char *)yytext),&endptr);
						if (*endptr != '\0' || errno == ERANGE)
							elog(ERROR,"Bad integer input '%s'",yytext);
						CheckFloat8Val(yylval.dval);
						elog(NOTICE,"Integer input '%s' is out of range; promoted to float", yytext);
						return (FCONST);
					}
377 378 379 380 381 382
					return (ICONST);
				}
{real}			{
					char* endptr;

					errno = 0;
383
					yylval.dval = strtod((char *)yytext,&endptr);
384
					if (*endptr != '\0' || errno == ERANGE)
385
						elog(ERROR,"Bad float input '%s'",yytext);
386 387 388
					CheckFloat8Val(yylval.dval);
					return (FCONST);
				}
389
{identifier}	{
390 391 392
					int i;
					ScanKeyword		*keyword;

393
					for(i = 0; yytext[i]; i++)
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
						if (isupper(yytext[i]))
							yytext[i] = tolower(yytext[i]);

					keyword = ScanKeywordLookup((char*)yytext);
					if (keyword != NULL) {
						return (keyword->value);
					}
					else
					{
						yylval.str = pstrdup((char*)yytext);
						return (IDENT);
					}
				}
{space}			{ /* ignore */ }

{other}			{ return (yytext[0]); }
410 411 412 413 414

%%

void yyerror(char message[])
{
415
	elog(ERROR, "parser: %s at or near \"%s\"", message, yytext);
416 417 418 419
}

int yywrap()
{
420
	return(1);
421 422 423 424
}

/*
 init_io:
425
	called by postgres before any actual parsing is done
426 427 428 429
*/
void
init_io()
{
430 431 432 433
	/* it's important to set this to NULL
	   because input()/myinput() checks the non-nullness of parseCh
	   to know when to pass the string to lex/flex */
	parseCh = NULL;
434
#if defined(FLEX_SCANNER)
435 436
	if (YY_CURRENT_BUFFER)
		yy_flush_buffer(YY_CURRENT_BUFFER);
437
#endif /* FLEX_SCANNER */
438
	BEGIN INITIAL;
439 440 441 442 443 444 445
}

#if !defined(FLEX_SCANNER)
/* get lex input from a string instead of from stdin */
int
input()
{
446 447 448 449 450 451 452 453 454
	if (parseCh == NULL)
	{
		parseCh = parseString;
		return(*parseCh++);
	}
	else if (*parseCh == '\0')
		return(0);
	else
		return(*parseCh++);
455 456 457 458 459 460
}

/* undo lex input from a string instead of from stdin */
void
unput(char c)
{
461 462 463 464
	if (parseCh == NULL)
		elog(FATAL, "Unput() failed.\n");
	else if (c != 0)
		*--parseCh = c;
465 466 467 468 469
}
#endif /* !defined(FLEX_SCANNER) */

#ifdef FLEX_SCANNER
/* input routine for flex to read input from a string instead of a file */
470
int
471 472
myinput(char* buf, int max)
{
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
	int len, copylen;

	if (parseCh == NULL)
	{
		len = strlen(parseString);
		if (len >= max)
			copylen = max - 1;
		else
			copylen = len;
		if (copylen > 0)
			memcpy(buf, parseString, copylen);
		buf[copylen] = '\0';
		parseCh = parseString;
		return copylen;
	}
488
	else
489
		return 0; /* end of string */
490 491 492
}
#endif /* FLEX_SCANNER */