ANSI C语言(98版)草案的 词法(lex)与语法(yacc)描述
ANSI C语言某草案的词法(lex)和语法(yacc)描述,对学习编译原理、flex(lex)、bison(yacc)来说是一个很不错的选择!
以下代码的记法规则采用的是flex的正则表达式和BNF范式,很好理解。真遗憾上大学的时候编译原理没太认真,结果草草过了。
来源网站:ANSI C grammar (Lex),ANSI C grammar (Yacc)
Lex
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E ([Ee][+-]?{D}+)
P ([Pp][+-]?{D}+)
FS (f|F|l|L)
IS ((u|U)|(u|U)?(l|L|ll|LL)|(l|L|ll|LL)(u|U))
%{
#include <stdio.h>
#include "y.tab.h"
void count(void);
%}
%%
"/*" { comment(); }
"//"[^\n]* { /* consume //-comment */ }
"auto" { count(); return(AUTO); }
"_Bool" { count(); return(BOOL); }
"break" { count(); return(BREAK); }
"case" { count(); return(CASE); }
"char" { count(); return(CHAR); }
"_Complex" { count(); return(COMPLEX); }
"const" { count(); return(CONST); }
"continue" { count(); return(CONTINUE); }
"default" { count(); return(DEFAULT); }
"do" { count(); return(DO); }
"double" { count(); return(DOUBLE); }
"else" { count(); return(ELSE); }
"enum" { count(); return(ENUM); }
"extern" { count(); return(EXTERN); }
"float" { count(); return(FLOAT); }
"for" { count(); return(FOR); }
"goto" { count(); return(GOTO); }
"if" { count(); return(IF); }
"_Imaginary" { count(); return(IMAGINARY); }
"inline" { count(); return(INLINE); }
"int" { count(); return(INT); }
"long" { count(); return(LONG); }
"register" { count(); return(REGISTER); }
"restrict" { count(); return(RESTRICT); }
"return" { count(); return(RETURN); }
"short" { count(); return(SHORT); }
"signed" { count(); return(SIGNED); }
"sizeof" { count(); return(SIZEOF); }
"static" { count(); return(STATIC); }
"struct" { count(); return(STRUCT); }
"switch" { count(); return(SWITCH); }
"typedef" { count(); return(TYPEDEF); }
"union" { count(); return(UNION); }
"unsigned" { count(); return(UNSIGNED); }
"void" { count(); return(VOID); }
"volatile" { count(); return(VOLATILE); }
"while" { count(); return(WHILE); }
{L}({L}|{D})* { count(); return(check_type()); }
0[xX]{H}+{IS}? { count(); return(CONSTANT); }
0[0-7]*{IS}? { count(); return(CONSTANT); }
[1-9]{D}*{IS}? { count(); return(CONSTANT); }
L?'(\\.|[^\\'\n])+' { count(); return(CONSTANT); }
{D}+{E}{FS}? { count(); return(CONSTANT); }
{D}*"."{D}+{E}?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*{E}?{FS}? { count(); return(CONSTANT); }
0[xX]{H}+{P}{FS}? { count(); return(CONSTANT); }
0[xX]{H}*"."{H}+{P}?{FS}? { count(); return(CONSTANT); }
0[xX]{H}+"."{H}*{P}?{FS}? { count(); return(CONSTANT); }
L?\"(\\.|[^\\"\n])*\" { count(); return(STRING_LITERAL); }
"..." { count(); return(ELLIPSIS); }
">>=" { count(); return(RIGHT_ASSIGN); }
"<<=" { count(); return(LEFT_ASSIGN); }
"+=" { count(); return(ADD_ASSIGN); }
"-=" { count(); return(SUB_ASSIGN); }
"*=" { count(); return(MUL_ASSIGN); }
"/=" { count(); return(DIV_ASSIGN); }
"%=" { count(); return(MOD_ASSIGN); }
"&=" { count(); return(AND_ASSIGN); }
"^=" { count(); return(XOR_ASSIGN); }
"|=" { count(); return(OR_ASSIGN); }
">>" { count(); return(RIGHT_OP); }
"<<" { count(); return(LEFT_OP); }
"++" { count(); return(INC_OP); }
"--" { count(); return(DEC_OP); }
"->" { count(); return(PTR_OP); }
"&&" { count(); return(AND_OP); }
"||" { count(); return(OR_OP); }
"<=" { count(); return(LE_OP); }
">=" { count(); return(GE_OP); }
"==" { count(); return(EQ_OP); }
"!=" { count(); return(NE_OP); }
";" { count(); return(';'); }
("{"|"<%") { count(); return('{'); }
("}"|"%>") { count(); return('}'); }
"," { count(); return(','); }
":" { count(); return(':'); }
"=" { count(); return('='); }
"(" { count(); return('('); }
")" { count(); return(')'); }
("["|"<:") { count(); return('['); }
("]"|":>") { count(); return(']'); }
"." { count(); return('.'); }
"&" { count(); return('&'); }
"!" { count(); return('!'); }
"~" { count(); return('~'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"/" { count(); return('/'); }
"%" { count(); return('%'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
"^" { count(); return('^'); }
"|" { count(); return('|'); }
"?" { count(); return('?'); }
[ \t\v\n\f] { count(); }
. { /* Add code to complain about unmatched characters */ }
%%
int yywrap(void)
{
return 1;
}
void comment(void)
{
char c, prev = 0;
while ((c = input()) != 0) /* (EOF maps to 0) */
{
if (c == '/' && prev == '*')
return;
prev = c;
}
error("unterminated comment");
}
int column = 0;
void count(void)
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
ECHO;
}
int check_type(void)
{
/*
* pseudo code --- this is what it should check
*
* if (yytext == type_name)
* return TYPE_NAME;
*
* return IDENTIFIER;
*/
/*
* it actually will only return IDENTIFIER
*/
return IDENTIFIER;
}
Yacc
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN %token XOR_ASSIGN OR_ASSIGN TYPE_NAME %token TYPEDEF EXTERN STATIC AUTO REGISTER INLINE RESTRICT %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID %token BOOL COMPLEX IMAGINARY %token STRUCT UNION ENUM ELLIPSIS %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN %start translation_unit %% primary_expression : IDENTIFIER | CONSTANT | STRING_LITERAL | '(' expression ')' ; postfix_expression : primary_expression | postfix_expression '[' expression ']' | postfix_expression '(' ')' | postfix_expression '(' argument_expression_list ')' | postfix_expression '.' IDENTIFIER | postfix_expression PTR_OP IDENTIFIER | postfix_expression INC_OP | postfix_expression DEC_OP | '(' type_name ')' '{' initializer_list '}' | '(' type_name ')' '{' initializer_list ',' '}' ; argument_expression_list : assignment_expression | argument_expression_list ',' assignment_expression ; unary_expression : postfix_expression | INC_OP unary_expression | DEC_OP unary_expression | unary_operator cast_expression | SIZEOF unary_expression | SIZEOF '(' type_name ')' ; unary_operator : '&' | '*' | '+' | '-' | '~' | '!' ; cast_expression : unary_expression | '(' type_name ')' cast_expression ; multiplicative_expression : cast_expression | multiplicative_expression '*' cast_expression | multiplicative_expression '/' cast_expression | multiplicative_expression '%' cast_expression ; additive_expression : multiplicative_expression | additive_expression '+' multiplicative_expression | additive_expression '-' multiplicative_expression ; shift_expression : additive_expression | shift_expression LEFT_OP additive_expression | shift_expression RIGHT_OP additive_expression ; relational_expression : shift_expression | relational_expression '<' shift_expression | relational_expression '>' shift_expression | relational_expression LE_OP shift_expression | relational_expression GE_OP shift_expression ; equality_expression : relational_expression | equality_expression EQ_OP relational_expression | equality_expression NE_OP relational_expression ; and_expression : equality_expression | and_expression '&' equality_expression ; exclusive_or_expression : and_expression | exclusive_or_expression '^' and_expression ; inclusive_or_expression : exclusive_or_expression | inclusive_or_expression '|' exclusive_or_expression ; logical_and_expression : inclusive_or_expression | logical_and_expression AND_OP inclusive_or_expression ; logical_or_expression : logical_and_expression | logical_or_expression OR_OP logical_and_expression ; conditional_expression : logical_or_expression | logical_or_expression '?' expression ':' conditional_expression ; assignment_expression : conditional_expression | unary_expression assignment_operator assignment_expression ; assignment_operator : '=' | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN | ADD_ASSIGN | SUB_ASSIGN | LEFT_ASSIGN | RIGHT_ASSIGN | AND_ASSIGN | XOR_ASSIGN | OR_ASSIGN ; expression : assignment_expression | expression ',' assignment_expression ; constant_expression : conditional_expression ; declaration : declaration_specifiers ';' | declaration_specifiers init_declarator_list ';' ; declaration_specifiers : storage_class_specifier | storage_class_specifier declaration_specifiers | type_specifier | type_specifier declaration_specifiers | type_qualifier | type_qualifier declaration_specifiers | function_specifier | function_specifier declaration_specifiers ; init_declarator_list : init_declarator | init_declarator_list ',' init_declarator ; init_declarator : declarator | declarator '=' initializer ; storage_class_specifier : TYPEDEF | EXTERN | STATIC | AUTO | REGISTER ; type_specifier : VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED | BOOL | COMPLEX | IMAGINARY | struct_or_union_specifier | enum_specifier | TYPE_NAME ; struct_or_union_specifier : struct_or_union IDENTIFIER '{' struct_declaration_list '}' | struct_or_union '{' struct_declaration_list '}' | struct_or_union IDENTIFIER ; struct_or_union : STRUCT | UNION ; struct_declaration_list : struct_declaration | struct_declaration_list struct_declaration ; struct_declaration : specifier_qualifier_list struct_declarator_list ';' ; specifier_qualifier_list : type_specifier specifier_qualifier_list | type_specifier | type_qualifier specifier_qualifier_list | type_qualifier ; struct_declarator_list : struct_declarator | struct_declarator_list ',' struct_declarator ; struct_declarator : declarator | ':' constant_expression | declarator ':' constant_expression ; enum_specifier : ENUM '{' enumerator_list '}' | ENUM IDENTIFIER '{' enumerator_list '}' | ENUM '{' enumerator_list ',' '}' | ENUM IDENTIFIER '{' enumerator_list ',' '}' | ENUM IDENTIFIER ; enumerator_list : enumerator | enumerator_list ',' enumerator ; enumerator : IDENTIFIER | IDENTIFIER '=' constant_expression ; type_qualifier : CONST | RESTRICT | VOLATILE ; function_specifier : INLINE ; declarator : pointer direct_declarator | direct_declarator ; direct_declarator : IDENTIFIER | '(' declarator ')' | direct_declarator '[' type_qualifier_list assignment_expression ']' | direct_declarator '[' type_qualifier_list ']' | direct_declarator '[' assignment_expression ']' | direct_declarator '[' STATIC type_qualifier_list assignment_expression ']' | direct_declarator '[' type_qualifier_list STATIC assignment_expression ']' | direct_declarator '[' type_qualifier_list '*' ']' | direct_declarator '[' '*' ']' | direct_declarator '[' ']' | direct_declarator '(' parameter_type_list ')' | direct_declarator '(' identifier_list ')' | direct_declarator '(' ')' ; pointer : '*' | '*' type_qualifier_list | '*' pointer | '*' type_qualifier_list pointer ; type_qualifier_list : type_qualifier | type_qualifier_list type_qualifier ; parameter_type_list : parameter_list | parameter_list ',' ELLIPSIS ; parameter_list : parameter_declaration | parameter_list ',' parameter_declaration ; parameter_declaration : declaration_specifiers declarator | declaration_specifiers abstract_declarator | declaration_specifiers ; identifier_list : IDENTIFIER | identifier_list ',' IDENTIFIER ; type_name : specifier_qualifier_list | specifier_qualifier_list abstract_declarator ; abstract_declarator : pointer | direct_abstract_declarator | pointer direct_abstract_declarator ; direct_abstract_declarator : '(' abstract_declarator ')' | '[' ']' | '[' assignment_expression ']' | direct_abstract_declarator '[' ']' | direct_abstract_declarator '[' assignment_expression ']' | '[' '*' ']' | direct_abstract_declarator '[' '*' ']' | '(' ')' | '(' parameter_type_list ')' | direct_abstract_declarator '(' ')' | direct_abstract_declarator '(' parameter_type_list ')' ; initializer : assignment_expression | '{' initializer_list '}' | '{' initializer_list ',' '}' ; initializer_list : initializer | designation initializer | initializer_list ',' initializer | initializer_list ',' designation initializer ; designation : designator_list '=' ; designator_list : designator | designator_list designator ; designator : '[' constant_expression ']' | '.' IDENTIFIER ; statement : labeled_statement | compound_statement | expression_statement | selection_statement | iteration_statement | jump_statement ; labeled_statement : IDENTIFIER ':' statement | CASE constant_expression ':' statement | DEFAULT ':' statement ; compound_statement : '{' '}' | '{' block_item_list '}' ; block_item_list : block_item | block_item_list block_item ; block_item : declaration | statement ; expression_statement : ';' | expression ';' ; selection_statement : IF '(' expression ')' statement | IF '(' expression ')' statement ELSE statement | SWITCH '(' expression ')' statement ; iteration_statement : WHILE '(' expression ')' statement | DO statement WHILE '(' expression ')' ';' | FOR '(' expression_statement expression_statement ')' statement | FOR '(' expression_statement expression_statement expression ')' statement | FOR '(' declaration expression_statement ')' statement | FOR '(' declaration expression_statement expression ')' statement ; jump_statement : GOTO IDENTIFIER ';' | CONTINUE ';' | BREAK ';' | RETURN ';' | RETURN expression ';' ; translation_unit : external_declaration | translation_unit external_declaration ; external_declaration : function_definition | declaration ; function_definition : declaration_specifiers declarator declaration_list compound_statement | declaration_specifiers declarator compound_statement ; declaration_list : declaration | declaration_list declaration ; %% #include <stdio.h> extern char yytext[]; extern int column; void yyerror(char const *s) { fflush(stdout); printf("\n%*s\n%*s\n", column, "^", column, s); }