本节继续介绍PostgreSQL的语法分析定义文件gram.y的第三部分Productions(产生式).
Bison输入文件的组成:
%{
Declarations
%}
Definitions
%%
Productions
%%
User subroutines
一、Productions
Productions即产生式,这是用户编写的语法产生式,产生式的书写格式如下:
创新互联坚持“要么做到,要么别承诺”的工作理念,服务领域包括:做网站、成都网站制作、企业官网、英文网站、手机端网站、网站推广等服务,满足客户于互联网时代的远安网站设计、移动媒体设计的需求,帮助企业找到有效的互联网解决方案。努力成为您成熟可靠的网络建设合作伙伴!
S -> X \n
X -> X + X | X - X | T_NUMBER
S -> X \n成为产生式,第一条产生式的最左边的符号成为起始符号,在这里是符号S.
为了避免出现递归解析,Bison因此会在最前面多添加一条产生式S’ -> S,S’为起始符号.
在Bison中,符号”:”表示一条”->”,同一个非终结符的不同产生式用”|”隔开,用”;”结束.每条产生式的后面花括号内是一段C代码,这些代码在该产生式被应用时执行,成为Action(动作),产生式的右边是ε(空集合)时,用注释/* empty */代替.
产生式中的非终结符不需要预先定义,Bison会自动根据所有产生式的左边符号来确定哪些符号是非终结符;终结符中,单字符token(token type值和字符的ASCII码相同)也不需要预先定义,在产生式内部直接用单引号括起来即可,其他类型的token则需要预先在 Definitions段中定义好,如%token
[root@localhost src]# vim ./include/parser/gram.h
...
/* Token type. */
44 #ifndef YYTOKENTYPE
45 # define YYTOKENTYPE
46 enum yytokentype
47 {
48 IDENT = 258,
49 FCONST = 259,
50 SCONST = 260,
51 BCONST = 261,
52 XCONST = 262,
53 Op = 263,
54 ICONST = 264,
55 PARAM = 265,
....
编号从258开始,根据gram.y中的顺序逐个定义.
...
%token IDENT FCONST SCONST BCONST XCONST Op
%token ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
%token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
%token ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER
AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC
ASSERTION ASSIGNMENT ASYMMETRIC AT ATTACH ATTRIBUTE AUTHORIZATION
...
这些token定义在scan.l中可直接使用.
#include "parser/gramparse.h" --> #include "parser/gram.h"
Bison会根据产生式以及符号优先级转化为LALR(1)动作表输出到gram.c文件中去.在gram.c文件中,PG根据自定义语法文件生成一个函数int base_yyparse (core_yyscan_t yyscanner);该函数按LR(1)解析流程对词法分析得到的token流进行解析,每当它需要读入下一个符号时,它就执行一次s = yylex() ,每当它要执行一个折叠(reduce)动作时,这个reduce所应用的产生式后面C代码将被执行,执行完后才将相应的状态出栈。
下面是gram.c中yyparse的部分代码:
/*----------.
| yyparse. |
`----------*/
int
yyparse (core_yyscan_t yyscanner)
{
/* The lookahead symbol. */
int yychar;
/* The semantic value of the lookahead symbol. */
/* Default value used for initialization, for pacifying older GCCs
or non-GCC compilers. */
YY_INITIAL_VALUE (static YYSTYPE yyval_default;)
YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default);
/* Location data for the lookahead symbol. */
static YYLTYPE yyloc_default
# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
= { 1, 1, 1, 1 }
# endif
;
YYLTYPE yylloc = yyloc_default;
/* Number of syntax errors so far. */
int yynerrs;
int yystate;
/* Number of tokens to shift before error messages enabled. */
int yyerrstatus;
/* The stacks and their tools:
'yyss': related to states.
'yyvs': related to semantic values.
'yyls': related to locations.
Refer to the stacks through separate pointers, to allow yyoverflow
to reallocate them elsewhere. */
/* The state stack. */
yytype_int16 yyssa[YYINITDEPTH];
yytype_int16 *yyss;
yytype_int16 *yyssp;
/* The semantic value stack. */
YYSTYPE yyvsa[YYINITDEPTH];
YYSTYPE *yyvs;
YYSTYPE *yyvsp;
/* The location stack. */
YYLTYPE yylsa[YYINITDEPTH];
YYLTYPE *yyls;
YYLTYPE *yylsp;
/* The locations where the error started and ended. */
YYLTYPE yyerror_range[3];
YYSIZE_T yystacksize;
int yyn;
int yyresult;
/* Lookahead token as an internal (translated) token number. */
int yytoken = 0;
/* The variables used to return semantic value and location from the
action routines. */
YYSTYPE yyval;
YYLTYPE yyloc;
#if YYERROR_VERBOSE
/* Buffer for error messages, and its allocated size. */
char yymsgbuf[128];
char *yymsg = yymsgbuf;
YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
#endif
#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N), yylsp -= (N))
/* The number of symbols on the RHS of the reduced rule.
Keep to zero when no symbol should be popped. */
int yylen = 0;
yyssp = yyss = yyssa;
yyvsp = yyvs = yyvsa;
yylsp = yyls = yylsa;
yystacksize = YYINITDEPTH;
...
二、源码
下面是gram.y产生式定义的部分源码
/*
* The target production for the whole parse.
*/
stmtblock: stmtmulti
{
pg_yyget_extra(yyscanner)->parsetree = $1;
}
;
/*
* At top level, we wrap each stmt with a RawStmt node carrying start location
* and length of the stmt's text. Notice that the start loc/len are driven
* entirely from semicolon locations (@2). It would seem natural to use
* @1 or @3 to get the true start location of a stmt, but that doesn't work
* for statements that can start with empty nonterminals (opt_with_clause is
* the main offender here); as noted in the comments for YYLLOC_DEFAULT,
* we'd get -1 for the location in such cases.
* We also take care to discard empty statements entirely.
*/
stmtmulti: stmtmulti ';' stmt
{
if ($1 != NIL)
{
/* update length of previous stmt */
updateRawStmtEnd(llast_node(RawStmt, $1), @2);
}
if ($3 != NULL)
$$ = lappend($1, makeRawStmt($3, @2 + 1));
else
$$ = $1;
}
| stmt
{
if ($1 != NULL)
$$ = list_make1(makeRawStmt($1, 0));
else
$$ = NIL;
}
;
stmt :
AlterEventTrigStmt
| AlterCollationStmt
| AlterDatabaseStmt
| AlterDatabaseSetStmt
| AlterDefaultPrivilegesStmt
| AlterDomainStmt
| AlterEnumStmt
| AlterExtensionStmt
| AlterExtensionContentsStmt
| AlterFdwStmt
| AlterForeignServerStmt
| AlterForeignTableStmt
| AlterFunctionStmt
| AlterGroupStmt
| AlterObjectDependsStmt
| AlterObjectSchemaStmt
| AlterOwnerStmt
| AlterOperatorStmt
| AlterPolicyStmt
| AlterSeqStmt
| AlterSystemStmt
| AlterTableStmt
| AlterTblSpcStmt
| AlterCompositeTypeStmt
| AlterPublicationStmt
| AlterRoleSetStmt
| AlterRoleStmt
| AlterSubscriptionStmt
| AlterTSConfigurationStmt
| AlterTSDictionaryStmt
| AlterUserMappingStmt
| AnalyzeStmt
| CallStmt
| CheckPointStmt
| ClosePortalStmt
| ClusterStmt
| CommentStmt
| ConstraintsSetStmt
| CopyStmt
| CreateAmStmt
| CreateAsStmt
| CreateAssertStmt
| CreateCastStmt
| CreateConversionStmt
| CreateDomainStmt
| CreateExtensionStmt
| CreateFdwStmt
| CreateForeignServerStmt
| CreateForeignTableStmt
| CreateFunctionStmt
| CreateGroupStmt
| CreateMatViewStmt
| CreateOpClassStmt
| CreateOpFamilyStmt
| CreatePublicationStmt
| AlterOpFamilyStmt
| CreatePolicyStmt
| CreatePLangStmt
| CreateSchemaStmt
| CreateSeqStmt
| CreateStmt
| CreateSubscriptionStmt
| CreateStatsStmt
| CreateTableSpaceStmt
| CreateTransformStmt
| CreateTrigStmt
| CreateEventTrigStmt
| CreateRoleStmt
| CreateUserStmt
| CreateUserMappingStmt
| CreatedbStmt
| DeallocateStmt
| DeclareCursorStmt
| DefineStmt
| DeleteStmt
| DiscardStmt
| DoStmt
| DropAssertStmt
| DropCastStmt
| DropOpClassStmt
| DropOpFamilyStmt
| DropOwnedStmt
| DropPLangStmt
| DropStmt
| DropSubscriptionStmt
| DropTableSpaceStmt
| DropTransformStmt
| DropRoleStmt
| DropUserMappingStmt
| DropdbStmt
| ExecuteStmt
| ExplainStmt
| FetchStmt
| GrantStmt
| GrantRoleStmt
| ImportForeignSchemaStmt
| IndexStmt
| InsertStmt
| ListenStmt
| RefreshMatViewStmt
| LoadStmt
| LockStmt
| NotifyStmt
| PrepareStmt
| ReassignOwnedStmt
| ReindexStmt
| RemoveAggrStmt
| RemoveFuncStmt
| RemoveOperStmt
| RenameStmt
| RevokeStmt
| RevokeRoleStmt
| RuleStmt
| SecLabelStmt
| SelectStmt
| TransactionStmt
| TruncateStmt
| UnlistenStmt
| UpdateStmt
| VacuumStmt
| VariableResetStmt
| VariableSetStmt
| VariableShowStmt
| ViewStmt
| /*EMPTY*/
{ $$ = NULL; }
;
/*****************************************************************************
*
* CALL statement
*
*****************************************************************************/
CallStmt: CALL func_application
{
CallStmt *n = makeNode(CallStmt);
n->funccall = castNode(FuncCall, $2);
$$ = (Node *)n;
}
;
...
简单解析如下:
1.stmtblock
stmtblock: stmtmulti
stmtblock为起始符号,最终应折叠(reduce)为该符号,否则会有语法错误.
执行的逻辑是:pg_yyget_extra(yyscanner)->parsetree = $1;
亦即完成语法解析,生成语法解析树parsetree.
2.stmtmulti
tmtmulti: stmtmulti ‘;’ stmt
左递归产生式,PG可接受多个以分号”;”分隔的语句,每个语句的定义为stmt
3.stmt
stmt :
AlterEventTrigStmt
| AlterCollationStmt
...
| SelectStmt
...
stmt包括N多种语句,我们看最常见的SelectStmt语句
4.SelectStmt
SelectStmt: select_no_parens %prec UMINUS
| select_with_parens %prec UMINUS
;
...
select_no_parens:
simple_select { $$ = $1; }
| select_clause sort_clause
{
insertSelectOptions((SelectStmt \*) $1, $2, NIL,
NULL, NULL, NULL,
yyscanner);
$$ = $1;
}
...
simple_select:
SELECT opt_all_clause opt_target_list
into_clause from_clause where_clause
group_clause having_clause window_clause
{
SelectStmt \*n = makeNode(SelectStmt);
n->targetList = $3;
n->intoClause = $4;
n->fromClause = $5;
n->whereClause = $6;
n->groupClause = $7;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node \*)n;
}
| SELECT distinct_clause target_list
...
三、参考资料
Flex&Bison
分享文章:PostgreSQL源码解读(176)-查询#94(语法分析:gram.y)#3
新闻来源:http://scgulin.cn/article/gsiisi.html