/* This is the parser for the dlg * This is a part of the Purdue Compiler Construction Tool Set * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * DLG 1.33 * Will Cohen * With mods by Terence Parr; AHPCRC, University of Minnesota * 1989-1995 */ #header << #include #include "dlg.h" >> << /* MR20 G. Hobbelt Fix for Borland C++ 4.x & 5.x compiling with ALL warnings enabled */ #ifdef __TURBOC__ #pragma warn -aus /* unused assignment of 'xxx' */ #endif #pragma clang diagnostic ignored "-Wparentheses-equality" int action_no = 0; /* keep track of actions outputted */ int nfa_allocated = 0; /* keeps track of number of nfa nodes */ nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */ nfa_node nfa_model_node; /* model to initialize new nodes */ set used_chars; /* used to label trans. arcs */ set used_classes; /* classes or chars used to label trans. arcs */ set normal_chars; /* mask to get rid elements that aren't used in set */ int flag_paren = FALSE; int flag_brace = FALSE; int mode_counter = 0; /* keep track of number of %%names */ >> #lexaction << int func_action; /* should actions be turned into functions?*/ int lex_mode_counter = 0; /* keeps track of the number of %%names */ /* MR1 */ /* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ /* MR1 via <<%%lexmember...>> */ /* MR1 */ int lexMember = 0; /* <<%%lexmemeber ...>> MR1 */ int lexAction = 0; /* <<%%lexaction ...>> MR1 */ int parserClass = 0; /* <<%%parserclass ...>> MR1 */ int lexPrefix = 0; /* <<%%lexprefix ...>> MR1 */ char theClassName[100]; /* MR11 */ char *pClassName=theClassName; /* MR11 */ int firstLexMember=1; /* MR1 */ #ifdef __USE_PROTOS void xxputc(int c) { /* MR1 */ #else void xxputc(c) /* MR1 */ int c; /* MR1 */ { /* MR1 */ #endif if (parserClass) { /* MR1 */ *pClassName++=c; /* MR1 */ *pClassName=0; /* MR1 */ } else if (lexMember || lexPrefix) { /* MR1 */ if (class_stream != NULL) fputc(c,class_stream); /* MR1 */ } else { /* MR1 */ fputc(c,OUT); /* MR1 */ }; /* MR1 */ } /* MR1 */ #ifdef __USE_PROTOS void xxprintf(char *format,char *string) { /* MR1 */ #else void xxprintf(format,string) /* MR1 */ char *format; /* MR1 */ char *string; /* MR1 */ { /* MR1 */ #endif if (lexMember || lexPrefix || parserClass) { /* MR1 */ if (class_stream != NULL) /* MR1 */ fprintf(class_stream,format,string); /* MR1 */ } else { /* MR1 */ fprintf(OUT,format,string); /* MR1 */ }; /* MR1 */ } /* MR1 */ >> #token "[\r\t\ ]+" << zzskip(); >> /* Ignore white */ #token "\n" << zzline++; zzskip(); DAWDLE; >> /* Track Line # */ #token L_EOF "\@" #token PER_PER "\%\%" #token NAME_PER_PER "\%\%[a-zA-Z_][a-zA-Z0-9_]*" << p_mode_def(&zzlextext[2],lex_mode_counter++); >> #token LEXMEMBER "\<\<\%\%lexmember" /* MR1 */ <> /* MR1 */ #token LEXACTION "\<\<\%\%lexaction" /* MR1 */ <> /* MR1 */ #token PARSERCLASS "\<\<\%\%parserclass" /* MR1 */ <> /* MR1 */ #token LEXPREFIX "\<\<\%\%lexprefix" /* MR1 */ <> /* MR1 */ #token ACTION "\<\<" << if (func_action) fprintf(OUT,"\n%s %sact%d()\n{ ", gen_cpp?"ANTLRTokenType":"static void", gen_cpp?ClassName("::"):"", ++action_no); zzmode(ACT); zzskip(); >> #token GREAT_GREAT "\>\>" #token L_BRACE "\{" #token R_BRACE "\}" #token L_PAR "\(" #token R_PAR "\)" #token L_BRACK "\[" #token R_BRACK "\]" #token ZERO_MORE "\*" #token ONE_MORE "\+" #token OR "\|" #token RANGE "\-" #token NOT "\~" #token OCTAL_VALUE "\\0[0-7]*" << {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>> #token HEX_VALUE "\\0[Xx][0-9a-fA-F]+" << {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>> #token DEC_VALUE "\\[1-9][0-9]*" << {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>> #token TAB "\\t" << zzlextext[0] = '\t';>> #token NL "\\n" << zzlextext[0] = '\n';>> #token CR "\\r" << zzlextext[0] = '\r';>> #token BS "\\b" << zzlextext[0] = '\b';>> /* MR1 */ /* MR1 10-Apr-97 MR1 Allow #token regular expressions to cross lines */ /* MR1 */ #token CONTINUATION "\\ \n" << zzline++; zzskip();>> /* MR1 */ /* NOTE: this takes ANYTHING after the \ */ #token LIT "\\~[tnrb]" << zzlextext[0] = zzlextext[1];>> /* NOTE: this takes ANYTHING that doesn't match the other tokens */ #token REGCHAR "~[\\]" grammar : << p_head(); p_class_hdr(); func_action = FALSE;>> ( {LEXACTION | LEXMEMBER | LEXPREFIX | PARSERCLASS } ACTION)* /* MR1 */ <> start_states << func_action = FALSE; p_tables(); p_tail(); >> (ACTION)* "@" << if (firstLexMember != 0) p_class_def1(); >> /* MR1 */ ; start_states : ( PER_PER do_conversion | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*) PER_PER ; do_conversion : <> rule_list << dfa_class_nop[mode_counter] = relabel($1.l,comp_level); if (comp_level) p_shift_table(mode_counter); dfa_basep[mode_counter] = dfa_allocated+1; make_dfa_model_node(dfa_class_nop[mode_counter]); nfa_to_dfa($1.l); ++mode_counter; func_action = FALSE; #ifdef HASH_STAT fprint_hash_stats(stderr); #endif >> ; rule_list : rule <<$$.l=$1.l; $$.r=$1.r;>> (rule <<{nfa_node *t1; t1 = new_nfa_node(); (t1)->trans[0]=$$.l; (t1)->trans[1]=$1.l; /* all accept nodes "dead ends" */ $$.l=t1; $$.r=NULL; } >> )* | /* empty */ <<$$.l = new_nfa_node(); $$.r = NULL; warning("no regular expressions", zzline); >> ; rule : reg_expr ACTION /* MR23 */ << if ($1.r != NULL) { $$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no; } >> | ACTION <<$$.l = NULL; $$.r = NULL; error("no expression for action ", zzline); >> ; reg_expr : and_expr <<$$.l=$1.l; $$.r=$1.r;>> (OR and_expr <<{nfa_node *t1, *t2; t1 = new_nfa_node(); t2 = new_nfa_node(); (t1)->trans[0]=$$.l; (t1)->trans[1]=$2.l; /* MR23 */ if ($$.r != NULL) ($$.r)->trans[1]=t2; if ($2.r) { ($2.r)->trans[1]=t2; /* MR20 */ } $$.l=t1; $$.r=t2; } >> )* ; and_expr : repeat_expr << $$.l=$1.l; $$.r=$1.r; >> (repeat_expr /* MR23 */ << if ($$.r != NULL) { ($$.r)->trans[1]=$1.l; $$.r=$1.r; } >> )* ; repeat_expr : expr <<$$.l=$1.l; $$.r=$1.r;>> { ZERO_MORE <<{ nfa_node *t1,*t2; /* MR23 */ if ($$.r != NULL) ($$.r)->trans[0] = $$.l; t1 = new_nfa_node(); t2 = new_nfa_node(); t1->trans[0]=$$.l; t1->trans[1]=t2; /* MR23 */ if ($$.r != NULL) ($$.r)->trans[1]=t2; $$.l=t1;$$.r=t2; } >> | ONE_MORE /* MR23 */ <trans[0] = $$.l;>> } | ZERO_MORE << error("no expression for *", zzline);>> | ONE_MORE << error("no expression for +", zzline);>> ; expr : << $$.l = new_nfa_node(); $$.r = new_nfa_node(); >> L_BRACK atom_list R_BRACK << /* MR23 */ if ($$.l != NULL) { ($$.l)->trans[0] = $$.r; ($$.l)->label = set_dup($2.label); set_orin(&used_chars,($$.l)->label); } >> | NOT L_BRACK atom_list R_BRACK << /* MR23 */ if ($$.l != NULL) { ($$.l)->trans[0] = $$.r; ($$.l)->label = set_dif(normal_chars,$3.label); set_orin(&used_chars,($$.l)->label); } >> | L_PAR reg_expr R_PAR << /* MR23 */ if ($$.l != NULL) { ($$.l)->trans[0] = $2.l; if ($2.r) { ($2.r)->trans[1] = $$.r; /* MR20 */ } } >> | L_BRACE reg_expr R_BRACE << /* MR23 */ if ($$.l != NULL) { ($$.l)->trans[0] = $2.l; ($$.l)->trans[1] = $$.r; if ($2.r) { ($2.r)->trans[1] = $$.r; /* MR20 */ } } >> | atom << /* MR23 */ if ($$.l != NULL) { ($$.l)->trans[0] = $$.r; ($$.l)->label = set_dup($1.label); set_orin(&used_chars,($$.l)->label); } >> ; atom_list : << set_free($$.label); >> (near_atom <>)* ; near_atom : << register int i; register int i_prime; >> anychar <<$$.letter=$1.letter; $$.label=set_of($1.letter); i_prime = $1.letter + MIN_CHAR; if (case_insensitive && islower(i_prime)) set_orel(toupper(i_prime)-MIN_CHAR, &($$.label)); if (case_insensitive && isupper(i_prime)) set_orel(tolower(i_prime)-MIN_CHAR, &($$.label)); >> { RANGE anychar << if (case_insensitive){ i_prime = $$.letter+MIN_CHAR; $$.letter = (islower(i_prime) ? toupper(i_prime) : i_prime)-MIN_CHAR; i_prime = $2.letter+MIN_CHAR; $2.letter = (islower(i_prime) ? toupper(i_prime) : i_prime)-MIN_CHAR; } /* check to see if range okay */ { int debugLetter1 = $$.letter; int debugLetter2 = $2.letter; } if ($$.letter > $2.letter && $2.letter != 0xff){ /* MR16 */ error("invalid range ", zzline); } for (i=$$.letter; i<= (int)$2.letter; ++i){ set_orel(i,&($$.label)); i_prime = i+MIN_CHAR; if (case_insensitive && islower(i_prime)) set_orel(toupper(i_prime)-MIN_CHAR, &($$.label)); if (case_insensitive && isupper(i_prime)) set_orel(tolower(i_prime)-MIN_CHAR, &($$.label)); } >> } ; atom : << register int i_prime;>> anychar <<$$.label = set_of($1.letter); i_prime = $1.letter + MIN_CHAR; if (case_insensitive && islower(i_prime)) set_orel(toupper(i_prime)-MIN_CHAR, &($$.label)); if (case_insensitive && isupper(i_prime)) set_orel(tolower(i_prime)-MIN_CHAR, &($$.label)); >> ; anychar : REGCHAR <<$$.letter = $1.letter - MIN_CHAR;>> | OCTAL_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> | HEX_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> | DEC_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> | TAB <<$$.letter = $1.letter - MIN_CHAR;>> | NL <<$$.letter = $1.letter - MIN_CHAR;>> | CR <<$$.letter = $1.letter - MIN_CHAR;>> | BS <<$$.letter = $1.letter - MIN_CHAR;>> | LIT <<$$.letter = $1.letter - MIN_CHAR;>> /* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/ | L_EOF <<$$.letter = 0;>> ; <> #lexclass ACT #token "@" << error("unterminated action", zzline); zzmode(START); >> #token ACTION "\>\>" << if (func_action) fprintf(OUT,"}\n\n"); zzmode(START); /* MR1 */ /* MR1 11-Apr-97 Provide mechanism for inserting code into DLG class */ /* MR1 via <<%%lexmember ...>> */ /* MR1 This is a consequence of not saving actions */ /* MR1 */ /* MR1 */ parserClass=0; /* MR1 */ lexPrefix=0; /* MR1 */ lexAction=0; /* MR1 */ lexMember=0; >> #token "\>" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */ #token "\\\>" << xxputc('>'); zzskip(); >> /* MR1 */ #token "\\" << xxputc('\\'); zzskip(); >> /* MR1 */ #token "\n" << xxputc(zzlextext[0]); ++zzline; zzskip(); >> /* MR1 */ #token "/\*" << zzmode(ACTION_COMMENTS); /* MR1 */ xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ >> /* MR1 */ #token "//" << zzmode(ACTION_CPP_COMMENTS); /* MR1 */ xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ >> /* MR1 */ #token "~[]" << xxputc(zzlextext[0]); zzskip(); >> /* MR1 */ /* MR1 */ #lexclass ACTION_COMMENTS /* MR1 */ #token "\*/" << zzmode(ACT); /* MR1 */ xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ >> /* MR1 */ #token "[\n\r]" << zzline++; xxputc(zzlextext[0]); zzskip();>> /* MR1 */ #token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */ /* MR1 */ #lexclass ACTION_CPP_COMMENTS /* MR1 */ #token "[\n\r]" << zzmode(ACT); zzline++; /* MR1 */ xxprintf("%s", &(zzlextext[0])); zzskip(); /* MR1 */ >> /* MR1 */ #token "~[]" << xxputc(zzlextext[0]); zzskip();>> /* MR1 */ << /* adds a new nfa to the binary tree and returns a pointer to it */ nfa_node * #ifdef __USE_PROTOS new_nfa_node(void) #else new_nfa_node() #endif { register nfa_node *t; static int nfa_size=0; /* elements nfa_array[] can hold */ ++nfa_allocated; if (nfa_size<=nfa_allocated){ /* need to redo array */ if (!nfa_array){ /* need some to do inital allocation */ nfa_size=nfa_allocated+NFA_MIN; nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)* nfa_size); }else{ /* need more space */ nfa_size=2*(nfa_allocated+1); nfa_array=(nfa_node **) realloc(nfa_array, sizeof(nfa_node*)*nfa_size); } } /* fill out entry in array */ t = (nfa_node*) malloc(sizeof(nfa_node)); nfa_array[nfa_allocated] = t; *t = nfa_model_node; t->node_no = nfa_allocated; return t; } /* initialize the model node used to fill in newly made nfa_nodes */ void #ifdef __USE_PROTOS make_nfa_model_node(void) #else make_nfa_model_node() #endif { nfa_model_node.node_no = -1; /* impossible value for real nfa node */ nfa_model_node.nfa_set = 0; nfa_model_node.accept = 0; /* error state default*/ nfa_model_node.trans[0] = NULL; nfa_model_node.trans[1] = NULL; nfa_model_node.label = empty; } >> << #if defined(DEBUG) || defined(_DEBUG) /* print out the pointer value and the node_number */ void #ifdef __USE_PROTOS fprint_dfa_pair(FILE *f, nfa_node *p) #else fprint_dfa_pair(f, p) FILE *f; nfa_node *p; #endif { if (p){ fprintf(f, "%x (%d)", p, p->node_no); }else{ fprintf(f, "(nil)"); } } /* print out interest information on a set */ void #ifdef __USE_PROTOS fprint_set(FILE *f, set s) #else fprint_set(f,s) FILE *f; set s; #endif { unsigned int *x; fprintf(f, "n = %d,", s.n); if (s.setword){ fprintf(f, "setword = %x, ", s.setword); /* print out all the elements in the set */ x = set_pdq(s); while (*x!=nil){ fprintf(f, "%d ", *x); ++x; } }else{ fprintf(f, "setword = (nil)"); } } /* code to be able to dump out the nfas return 0 if okay dump return 1 if screwed up */ int #ifdef __USE_PROTOS dump_nfas(int first_node, int last_node) #else dump_nfas(first_node, last_node) int first_node; int last_node; #endif { register int i; nfa_node *t; for (i=first_node; i<=last_node; ++i){ t = NFA(i); if (!t) break; fprintf(stderr, "nfa_node %d {\n", t->node_no); fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set); fprintf(stderr, "\taccept\t=\t%d\n", t->accept); fprintf(stderr, "\ttrans\t=\t("); fprint_dfa_pair(stderr, t->trans[0]); fprintf(stderr, ","); fprint_dfa_pair(stderr, t->trans[1]); fprintf(stderr, ")\n"); fprintf(stderr, "\tlabel\t=\t{ "); fprint_set(stderr, t->label); fprintf(stderr, "\t}\n"); fprintf(stderr, "}\n\n"); } return 0; } #endif >> << /* DLG-specific syntax error message generator * (define USER_ZZSYN when compiling so don't get 2 definitions) */ void #ifdef __USE_PROTOS zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) #else zzsyn(text, tok, egroup, eset, etok, k, bad_text) char *text, *egroup, *bad_text; int tok; int etok; int k; SetWordType *eset; #endif { fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline); fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text); if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} if ( k==1 ) fprintf(stderr, " missing"); else { fprintf(stderr, "; \"%s\" not", bad_text); if ( zzset_deg(eset)>1 ) fprintf(stderr, " in"); } if ( zzset_deg(eset)>0 ) zzedecode(eset); else fprintf(stderr, " %s", zztokens[etok]); if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup); fprintf(stderr, "\n"); } >>