mirror of https://github.com/acidanthera/audk.git
1865 lines
48 KiB
C
1865 lines
48 KiB
C
/*
|
|
* misc.c
|
|
*
|
|
* Manage tokens, regular expressions.
|
|
* Print methods for debugging
|
|
* Compute follow lists onto tail ends of rules.
|
|
*
|
|
* The following functions are visible:
|
|
*
|
|
* int addTname(char *); Add token name
|
|
* int addTexpr(char *); Add token expression
|
|
* int Tnum(char *); Get number of expr/token
|
|
* void Tklink(char *, char *); Link a name with an expression
|
|
* int hasAction(expr); Does expr already have action assigned?
|
|
* void setHasAction(expr); Indicate that expr now has an action
|
|
* Entry *newEntry(char *,int); Create new table entry with certain size
|
|
* void list_add(ListNode **list, char *e)
|
|
* void list_free(ListNode **list, int freeData); *** MR10 ***
|
|
* void list_apply(ListNode *list, void (*f)())
|
|
* void lexclass(char *m); switch to new/old lexical class
|
|
* void lexmode(int i); switch to old lexical class i
|
|
*
|
|
* SOFTWARE RIGHTS
|
|
*
|
|
* We reserve no LEGAL rights to the Purdue Compiler Construction Tool
|
|
* Set (PCCTS) -- PCCTS is in the public domain. An individual or
|
|
* company may do whatever they wish with source code distributed with
|
|
* PCCTS or the code generated by PCCTS, including the incorporation of
|
|
* PCCTS, or its output, into commerical software.
|
|
*
|
|
* We encourage users to develop software with PCCTS. However, we do ask
|
|
* that credit is given to us for developing PCCTS. By "credit",
|
|
* we mean that if you incorporate our source code into one of your
|
|
* programs (commercial product, research project, or otherwise) that you
|
|
* acknowledge this fact somewhere in the documentation, research report,
|
|
* etc... If you like PCCTS and have developed a nice tool with the
|
|
* output, please mention that you developed it using PCCTS. In
|
|
* addition, we ask that this header remain intact in our source code.
|
|
* As long as these guidelines are kept, we expect to continue enhancing
|
|
* this system and expect to make other tools available as they are
|
|
* completed.
|
|
*
|
|
* ANTLR 1.33
|
|
* Terence Parr
|
|
* Parr Research Corporation
|
|
* with Purdue University and AHPCRC, University of Minnesota
|
|
* 1989-2001
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include "pcctscfg.h"
|
|
#include "set.h"
|
|
#include "syn.h"
|
|
#include "hash.h"
|
|
#include "generic.h"
|
|
#include "dlgdef.h"
|
|
#include <ctype.h>
|
|
|
|
static int tsize=TSChunk; /* size of token str arrays */
|
|
|
|
static void
|
|
#ifdef __USE_PROTOS
|
|
RemapForcedTokensInSyntaxDiagram(Node *);
|
|
#else
|
|
RemapForcedTokensInSyntaxDiagram();
|
|
#endif
|
|
|
|
/* T o k e n M a n i p u l a t i o n */
|
|
|
|
/*
|
|
* add token 't' to the TokenStr/Expr array. Make more room if necessary.
|
|
* 't' is either an expression or a token name.
|
|
*
|
|
* There is only one TokenStr array, but multiple ExprStr's. Therefore,
|
|
* for each lex class (element of lclass) we must extend the ExprStr array.
|
|
* ExprStr's and TokenStr are always all the same size.
|
|
*
|
|
* Also, there is a Texpr hash table for each automaton.
|
|
*/
|
|
static void
|
|
#ifdef __USE_PROTOS
|
|
Ttrack( char *t )
|
|
#else
|
|
Ttrack( t )
|
|
char *t;
|
|
#endif
|
|
{
|
|
if ( TokenNum >= tsize ) /* terminal table overflow? */
|
|
{
|
|
char **p;
|
|
int i, more, j;
|
|
|
|
more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk));
|
|
tsize += more;
|
|
TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *));
|
|
require(TokenStr != NULL, "Ttrack: can't extend TokenStr");
|
|
for (i=0; i<NumLexClasses; i++)
|
|
{
|
|
lclass[i].exprs = (char **)
|
|
realloc((char *)lclass[i].exprs, tsize*sizeof(char *));
|
|
require(lclass[i].exprs != NULL, "Ttrack: can't extend ExprStr");
|
|
for (p= &lclass[i].exprs[tsize-more],j=1; j<=more; j++) *p++ = NULL;
|
|
}
|
|
for (p= &TokenStr[tsize-more],i=1; i<=more; i++) *p++ = NULL;
|
|
lexmode( CurrentLexClass ); /* reset ExprStr in case table moved */
|
|
}
|
|
/* note: we use the actual ExprStr/TokenStr array
|
|
* here as TokenInd doesn't exist yet
|
|
*/
|
|
if ( *t == '"' ) ExprStr[TokenNum] = t;
|
|
else TokenStr[TokenNum] = t;
|
|
}
|
|
|
|
static Expr *
|
|
#ifdef __USE_PROTOS
|
|
newExpr( char *e )
|
|
#else
|
|
newExpr( e )
|
|
char *e;
|
|
#endif
|
|
{
|
|
Expr *p = (Expr *) calloc(1, sizeof(Expr));
|
|
require(p!=NULL, "newExpr: cannot alloc Expr node");
|
|
|
|
p->expr = e;
|
|
p->lclass = CurrentLexClass;
|
|
return p;
|
|
}
|
|
|
|
/* switch to lexical class/mode m. This amounts to creating a new
|
|
* lex mode if one does not already exist and making ExprStr point
|
|
* to the correct char string array. We must also switch Texpr tables.
|
|
*
|
|
* BTW, we need multiple ExprStr arrays because more than one automaton
|
|
* may have the same label for a token, but with different expressions.
|
|
* We need to track an expr for each automaton. If we disallowed this
|
|
* feature, only one ExprStr would be required.
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
lexclass( char *m )
|
|
#else
|
|
lexclass( m )
|
|
char *m;
|
|
#endif
|
|
{
|
|
int i;
|
|
TermEntry *p;
|
|
static char EOFSTR[] = "\"@\"";
|
|
|
|
if ( hash_get(Tname, m) != NULL )
|
|
{
|
|
warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m));
|
|
}
|
|
/* does m already exist? */
|
|
i = LexClassIndex(m);
|
|
if ( i != -1 ) {lexmode(i); return;}
|
|
/* must make new one */
|
|
NumLexClasses++;
|
|
CurrentLexClass = NumLexClasses-1;
|
|
require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files");
|
|
lclass[CurrentLexClass].classnum = m;
|
|
lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *));
|
|
require(lclass[CurrentLexClass].exprs!=NULL,
|
|
"lexclass: cannot allocate ExprStr");
|
|
lclass[CurrentLexClass].htable = newHashTable();
|
|
ExprStr = lclass[CurrentLexClass].exprs;
|
|
Texpr = lclass[CurrentLexClass].htable;
|
|
/* define EOF for each automaton */
|
|
p = newTermEntry( EOFSTR );
|
|
p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */
|
|
hash_add(Texpr, EOFSTR, (Entry *)p);
|
|
list_add(&ExprOrder, (void *)newExpr(EOFSTR));
|
|
/* note: we use the actual ExprStr array
|
|
* here as TokenInd doesn't exist yet
|
|
*/
|
|
ExprStr[EofToken] = EOFSTR;
|
|
}
|
|
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
lexmode( int i )
|
|
#else
|
|
lexmode( i )
|
|
int i;
|
|
#endif
|
|
{
|
|
require(i<NumLexClasses, "lexmode: invalid mode");
|
|
ExprStr = lclass[i].exprs;
|
|
Texpr = lclass[i].htable;
|
|
CurrentLexClass = i;
|
|
}
|
|
|
|
/* return index into lclass array of lexical class. return -1 if nonexistent */
|
|
int
|
|
#ifdef __USE_PROTOS
|
|
LexClassIndex( char *cl )
|
|
#else
|
|
LexClassIndex( cl )
|
|
char *cl;
|
|
#endif
|
|
{
|
|
int i;
|
|
|
|
for (i=0; i<NumLexClasses; i++)
|
|
{
|
|
if ( strcmp(lclass[i].classnum, cl) == 0 ) return i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
#ifdef __USE_PROTOS
|
|
hasAction( char *expr )
|
|
#else
|
|
hasAction( expr )
|
|
char *expr;
|
|
#endif
|
|
{
|
|
TermEntry *p;
|
|
require(expr!=NULL, "hasAction: invalid expr");
|
|
|
|
p = (TermEntry *) hash_get(Texpr, expr);
|
|
require(p!=NULL, eMsg1("hasAction: expr '%s' doesn't exist",expr));
|
|
return (p->action!=NULL);
|
|
}
|
|
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
setHasAction( char *expr, char *action )
|
|
#else
|
|
setHasAction( expr, action )
|
|
char *expr;
|
|
char *action;
|
|
#endif
|
|
{
|
|
TermEntry *p;
|
|
require(expr!=NULL, "setHasAction: invalid expr");
|
|
|
|
p = (TermEntry *) hash_get(Texpr, expr);
|
|
require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr));
|
|
p->action = action;
|
|
}
|
|
|
|
ForcedToken *
|
|
#ifdef __USE_PROTOS
|
|
newForcedToken(char *token, int tnum)
|
|
#else
|
|
newForcedToken(token, tnum)
|
|
char *token;
|
|
int tnum;
|
|
#endif
|
|
{
|
|
ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken));
|
|
require(ft!=NULL, "out of memory");
|
|
ft->token = token;
|
|
ft->tnum = tnum;
|
|
return ft;
|
|
}
|
|
|
|
/*
|
|
* Make a token indirection array that remaps token numbers and then walk
|
|
* the appropriate symbol tables and SynDiag to change token numbers
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
RemapForcedTokens(void)
|
|
#else
|
|
RemapForcedTokens()
|
|
#endif
|
|
{
|
|
ListNode *p;
|
|
ForcedToken *q;
|
|
int max_token_number=0; /* MR9 23-Sep-97 Removed "unsigned" */
|
|
int i;
|
|
|
|
if ( ForcedTokens == NULL ) return;
|
|
|
|
/* find max token num */
|
|
for (p = ForcedTokens->next; p!=NULL; p=p->next)
|
|
{
|
|
q = (ForcedToken *) p->elem;
|
|
if ( q->tnum > max_token_number ) max_token_number = q->tnum;
|
|
}
|
|
fprintf(stderr, "max token number is %d\n", max_token_number);
|
|
|
|
/* make token indirection array */
|
|
TokenInd = (int *) calloc(max_token_number+1, sizeof(int));
|
|
LastTokenCounted = TokenNum;
|
|
TokenNum = max_token_number+1;
|
|
require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd");
|
|
|
|
/* fill token indirection array and change token id htable ; swap token indices */
|
|
for (i=1; i<TokenNum; i++) TokenInd[i] = i;
|
|
for (p = ForcedTokens->next; p!=NULL; p=p->next)
|
|
{
|
|
TermEntry *te;
|
|
int old_pos, t;
|
|
|
|
q = (ForcedToken *) p->elem;
|
|
fprintf(stderr, "%s forced to %d\n", q->token, q->tnum);
|
|
te = (TermEntry *) hash_get(Tname, q->token);
|
|
require(te!=NULL, "RemapForcedTokens: token not in hash table");
|
|
old_pos = te->token;
|
|
fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
|
|
fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
|
|
q = (ForcedToken *) p->elem;
|
|
t = TokenInd[old_pos];
|
|
TokenInd[old_pos] = q->tnum;
|
|
TokenInd[q->tnum] = t;
|
|
te->token = q->tnum; /* update token type id symbol table */
|
|
fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]);
|
|
fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]);
|
|
|
|
/* Change the token number in the sym tab entry for the exprs
|
|
* at the old position of the token id and the target position
|
|
*/
|
|
/* update expr at target (if any) of forced token id */
|
|
if ( q->tnum < TokenNum ) /* is it a valid position? */
|
|
{
|
|
for (i=0; i<NumLexClasses; i++)
|
|
{
|
|
if ( lclass[i].exprs[q->tnum]!=NULL )
|
|
{
|
|
/* update the symbol table for this expr */
|
|
TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]);
|
|
require(e!=NULL, "RemapForcedTokens: expr not in hash table");
|
|
e->token = old_pos;
|
|
fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n",
|
|
lclass[i].exprs[q->tnum], q->tnum, i, old_pos);
|
|
}
|
|
}
|
|
}
|
|
/* update expr at old position (if any) of forced token id */
|
|
for (i=0; i<NumLexClasses; i++)
|
|
{
|
|
if ( lclass[i].exprs[old_pos]!=NULL )
|
|
{
|
|
/* update the symbol table for this expr */
|
|
TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[old_pos]);
|
|
require(e!=NULL, "RemapForcedTokens: expr not in hash table");
|
|
e->token = q->tnum;
|
|
fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n",
|
|
lclass[i].exprs[old_pos], q->token, i, q->tnum);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Update SynDiag */
|
|
RemapForcedTokensInSyntaxDiagram((Node *)SynDiag);
|
|
}
|
|
|
|
static void
|
|
#ifdef __USE_PROTOS
|
|
RemapForcedTokensInSyntaxDiagram(Node *p)
|
|
#else
|
|
RemapForcedTokensInSyntaxDiagram(p)
|
|
Node *p;
|
|
#endif
|
|
{
|
|
Junction *j = (Junction *) p;
|
|
RuleRefNode *r = (RuleRefNode *) p;
|
|
TokNode *t = (TokNode *)p;
|
|
|
|
if ( p==NULL ) return;
|
|
require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node");
|
|
switch ( p->ntype )
|
|
{
|
|
case nJunction :
|
|
if ( j->visited ) return;
|
|
if ( j->jtype == EndRule ) return;
|
|
j->visited = TRUE;
|
|
RemapForcedTokensInSyntaxDiagram( j->p1 );
|
|
RemapForcedTokensInSyntaxDiagram( j->p2 );
|
|
j->visited = FALSE;
|
|
return;
|
|
case nRuleRef :
|
|
RemapForcedTokensInSyntaxDiagram( r->next );
|
|
return;
|
|
case nToken :
|
|
if ( t->remapped ) return; /* we've been here before */
|
|
t->remapped = 1;
|
|
fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]);
|
|
t->token = TokenInd[t->token];
|
|
RemapForcedTokensInSyntaxDiagram( t->next );
|
|
return;
|
|
case nAction :
|
|
RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next );
|
|
return;
|
|
default :
|
|
fatal_internal("invalid node type");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Add a token name. Return the token number associated with it. If it already
|
|
* exists, then return the token number assigned to it.
|
|
*
|
|
* Track the order in which tokens are found so that the DLG output maintains
|
|
* that order. It also lets us map token numbers to strings.
|
|
*/
|
|
int
|
|
#ifdef __USE_PROTOS
|
|
addTname( char *token )
|
|
#else
|
|
addTname( token )
|
|
char *token;
|
|
#endif
|
|
{
|
|
TermEntry *p;
|
|
require(token!=NULL, "addTname: invalid token name");
|
|
|
|
if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
|
|
p = newTermEntry( token );
|
|
Ttrack( p->str );
|
|
p->token = TokenNum++;
|
|
hash_add(Tname, token, (Entry *)p);
|
|
return p->token;
|
|
}
|
|
|
|
/* This is the same as addTname except we force the TokenNum to be tnum.
|
|
* We don't have to use the Forced token stuff as no tokens will have
|
|
* been defined with #tokens when this is called. This is only called
|
|
* when a #tokdefs meta-op is used.
|
|
*/
|
|
int
|
|
#ifdef __USE_PROTOS
|
|
addForcedTname( char *token, int tnum )
|
|
#else
|
|
addForcedTname( token, tnum )
|
|
char *token;
|
|
int tnum;
|
|
#endif
|
|
{
|
|
TermEntry *p;
|
|
require(token!=NULL, "addTname: invalid token name");
|
|
|
|
if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token;
|
|
p = newTermEntry( token );
|
|
Ttrack( p->str );
|
|
p->token = tnum;
|
|
hash_add(Tname, token, (Entry *)p);
|
|
return p->token;
|
|
}
|
|
|
|
/*
|
|
* Add a token expr. Return the token number associated with it. If it already
|
|
* exists, then return the token number assigned to it.
|
|
*/
|
|
int
|
|
#ifdef __USE_PROTOS
|
|
addTexpr( char *expr )
|
|
#else
|
|
addTexpr( expr )
|
|
char *expr;
|
|
#endif
|
|
{
|
|
TermEntry *p;
|
|
require(expr!=NULL, "addTexpr: invalid regular expression");
|
|
|
|
if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token;
|
|
p = newTermEntry( expr );
|
|
Ttrack( p->str );
|
|
/* track the order in which they occur */
|
|
list_add(&ExprOrder, (void *)newExpr(p->str));
|
|
p->token = TokenNum++;
|
|
hash_add(Texpr, expr, (Entry *)p);
|
|
return p->token;
|
|
}
|
|
|
|
/* return the token number of 'term'. Return 0 if no 'term' exists */
|
|
int
|
|
#ifdef __USE_PROTOS
|
|
Tnum( char *term )
|
|
#else
|
|
Tnum( term )
|
|
char *term;
|
|
#endif
|
|
{
|
|
TermEntry *p;
|
|
require(term!=NULL, "Tnum: invalid terminal");
|
|
|
|
if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term);
|
|
else p = (TermEntry *) hash_get(Tname, term);
|
|
if ( p == NULL ) return 0;
|
|
else return p->token;
|
|
}
|
|
|
|
/* associate a Name with an expr. If both have been already assigned
|
|
* token numbers, then an error is reported. Add the token or expr
|
|
* that has not been added if no error. This 'represents' the #token
|
|
* ANTLR pseudo-op. If both have not been defined, define them both
|
|
* linked to same token number.
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
Tklink( char *token, char *expr )
|
|
#else
|
|
Tklink( token, expr )
|
|
char *token;
|
|
char *expr;
|
|
#endif
|
|
{
|
|
TermEntry *p, *q;
|
|
require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr");
|
|
|
|
p = (TermEntry *) hash_get(Tname, token);
|
|
q = (TermEntry *) hash_get(Texpr, expr);
|
|
if ( p != NULL && q != NULL ) /* both defined */
|
|
{
|
|
warn( eMsg2("token name %s and rexpr %s already defined; ignored",
|
|
token, expr) );
|
|
return;
|
|
}
|
|
if ( p==NULL && q==NULL ) /* both not defined */
|
|
{
|
|
int t = addTname( token );
|
|
q = newTermEntry( expr );
|
|
hash_add(Texpr, expr, (Entry *)q);
|
|
q->token = t;
|
|
/* note: we use the actual ExprStr array
|
|
* here as TokenInd doesn't exist yet
|
|
*/
|
|
ExprStr[t] = q->str;
|
|
/* track the order in which they occur */
|
|
list_add(&ExprOrder, (void *)newExpr(q->str));
|
|
return;
|
|
}
|
|
if ( p != NULL ) /* one is defined, one is not */
|
|
{
|
|
q = newTermEntry( expr );
|
|
hash_add(Texpr, expr, (Entry *)q);
|
|
q->token = p->token;
|
|
ExprStr[p->token] = q->str; /* both expr and token str defined now */
|
|
list_add(&ExprOrder, (void *)newExpr(q->str));
|
|
}
|
|
else /* trying to associate name with expr here*/
|
|
{
|
|
p = newTermEntry( token );
|
|
hash_add(Tname, token, (Entry *)p);
|
|
p->token = q->token;
|
|
TokenStr[p->token] = p->str;/* both expr and token str defined now */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Given a string, this function allocates and returns a pointer to a
|
|
* hash table record of size 'sz' whose "str" pointer is reset to a position
|
|
* in the string table.
|
|
*/
|
|
Entry *
|
|
#ifdef __USE_PROTOS
|
|
newEntry( char *text, int sz )
|
|
#else
|
|
newEntry( text, sz )
|
|
char *text;
|
|
int sz;
|
|
#endif
|
|
{
|
|
Entry *p;
|
|
require(text!=NULL, "new: NULL terminal");
|
|
|
|
if ( (p = (Entry *) calloc(1,sz)) == 0 )
|
|
{
|
|
fatal_internal("newEntry: out of memory for terminals\n");
|
|
exit(PCCTS_EXIT_FAILURE);
|
|
}
|
|
p->str = mystrdup(text);
|
|
|
|
return(p);
|
|
}
|
|
|
|
/*
|
|
* add an element to a list.
|
|
*
|
|
* Any non-empty list has a sentinel node whose 'elem' pointer is really
|
|
* a pointer to the last element. (i.e. length(list) = #elemIn(list)+1).
|
|
* Elements are appended to the list.
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
list_add( ListNode **list, void *e )
|
|
#else
|
|
list_add( list, e )
|
|
ListNode **list;
|
|
void *e;
|
|
#endif
|
|
{
|
|
ListNode *p, *tail;
|
|
require(e!=NULL, "list_add: attempting to add NULL list element");
|
|
|
|
p = newListNode;
|
|
require(p!=NULL, "list_add: cannot alloc new list node");
|
|
p->elem = e;
|
|
if ( *list == NULL )
|
|
{
|
|
ListNode *sentinel = newListNode;
|
|
require(sentinel!=NULL, "list_add: cannot alloc sentinel node");
|
|
*list=sentinel;
|
|
sentinel->next = p;
|
|
sentinel->elem = (char *)p; /* set tail pointer */
|
|
}
|
|
else /* find end of list */
|
|
{
|
|
tail = (ListNode *) (*list)->elem; /* get tail pointer */
|
|
tail->next = p;
|
|
(*list)->elem = (char *) p; /* reset tail */
|
|
}
|
|
}
|
|
|
|
/* MR10 list_free() frees the ListNode elements in the list */
|
|
/* MR10 if freeData then free the data elements of the list too */
|
|
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
list_free(ListNode **list,int freeData)
|
|
#else
|
|
list_free(list,freeData)
|
|
ListNode **list;
|
|
int freeData;
|
|
#endif
|
|
{
|
|
ListNode *p;
|
|
ListNode *next;
|
|
|
|
if (list == NULL) return;
|
|
if (*list == NULL) return;
|
|
for (p=*list; p != NULL; p=next) {
|
|
next=p->next;
|
|
if (freeData && p->elem != NULL) {
|
|
free( (char *) p->elem);
|
|
};
|
|
free( (char *) p);
|
|
};
|
|
*list=NULL;
|
|
}
|
|
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
list_apply( ListNode *list, void (*f)(void *) )
|
|
#else
|
|
list_apply( list, f )
|
|
ListNode *list;
|
|
void (*f)();
|
|
#endif
|
|
{
|
|
ListNode *p;
|
|
require(f!=NULL, "list_apply: NULL function to apply");
|
|
|
|
if ( list == NULL ) return;
|
|
for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem );
|
|
}
|
|
|
|
/* MR27 */
|
|
|
|
#ifdef __USE_PROTOS
|
|
int list_search_cstring(ListNode *list, char * cstring)
|
|
#else
|
|
int list_search_cstring(list, cstring)
|
|
ListNode * list;
|
|
char * cstring;
|
|
#endif
|
|
{
|
|
ListNode *p;
|
|
if (list == NULL ) return 0;
|
|
for (p = list->next; p!=NULL; p=p->next) {
|
|
if (p->elem == NULL) continue;
|
|
if (0 == strcmp((char *) p->elem , cstring)) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* F O L L O W C y c l e S t u f f */
|
|
|
|
/* make a key based upon (rulename, computation, k value).
|
|
* Computation values are 'i'==FIRST, 'o'==FOLLOW.
|
|
*/
|
|
|
|
/* MR10 Make the key all characters so it can be read easily */
|
|
/* MR10 by a simple dump program. Also, separates */
|
|
/* MR10 'o' and 'i' from rule name */
|
|
|
|
char *
|
|
#ifdef __USE_PROTOS
|
|
Fkey( char *rule, int computation, int k )
|
|
#else
|
|
Fkey( rule, computation, k )
|
|
char *rule;
|
|
int computation;
|
|
int k;
|
|
#endif
|
|
{
|
|
static char key[MaxRuleName+2+2+1]; /* MR10 */
|
|
int i;
|
|
|
|
if ( k > 99 ) /* MR10 */
|
|
fatal("k>99 is too big for this implementation of ANTLR!\n"); /* MR10 */
|
|
if ( (i=strlen(rule)) > MaxRuleName ) /* MR10 */
|
|
fatal( eMsgd("rule name > max of %d\n", MaxRuleName) ); /* MR10 */
|
|
strcpy(key,rule);
|
|
|
|
/* MR10 */ key[i]='*';
|
|
/* MR10 */ key[i+1] = (char) computation; /* MR20 G. Hobbelt */
|
|
/* MR10 */ if (k < 10) {
|
|
/* MR10 */ key[i+2] = (char) ( '0' + k);
|
|
/* MR10 */ key[i+3] = '\0';
|
|
/* MR10 */ } else {
|
|
/* MR10 */ key[i+2] = (char) ( '0' + k/10);
|
|
/* MR10 */ key[i+3] = (char) ( '0' + k % 10);
|
|
/* MR10 */ key[i+4] = '\0';
|
|
/* MR10 */ };
|
|
|
|
return key;
|
|
}
|
|
|
|
/* Push a rule onto the kth FOLLOW stack */
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
FoPush( char *rule, int k )
|
|
#else
|
|
FoPush( rule, k )
|
|
char *rule;
|
|
int k;
|
|
#endif
|
|
{
|
|
RuleEntry *r;
|
|
require(rule!=NULL, "FoPush: tried to push NULL rule");
|
|
require(k<=CLL_k, "FoPush: tried to access non-existent stack");
|
|
|
|
/*fprintf(stderr, "FoPush(%s)\n", rule);*/
|
|
r = (RuleEntry *) hash_get(Rname, rule);
|
|
if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );}
|
|
if ( FoStack[k] == NULL ) /* Does the kth stack exist yet? */
|
|
{
|
|
/*fprintf(stderr, "allocating FoStack\n");*/
|
|
FoStack[k] = (int *) calloc(FoStackSize, sizeof(int));
|
|
require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n");
|
|
}
|
|
if ( FoTOS[k] == NULL )
|
|
{
|
|
FoTOS[k]=FoStack[k];
|
|
*(FoTOS[k]) = r->rulenum;
|
|
}
|
|
else
|
|
{
|
|
#ifdef MEMCHK
|
|
require(valid(FoStack[k]), "FoPush: invalid FoStack");
|
|
#endif
|
|
if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) )
|
|
fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n",
|
|
FoStackSize) );
|
|
require(FoTOS[k]>=FoStack[k],
|
|
eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox",
|
|
rule));
|
|
++(FoTOS[k]);
|
|
*(FoTOS[k]) = r->rulenum;
|
|
}
|
|
{
|
|
/*
|
|
**** int *p;
|
|
**** fprintf(stderr, "FoStack[k=%d]:\n", k);
|
|
**** for (p=FoStack[k]; p<=FoTOS[k]; p++)
|
|
**** {
|
|
**** fprintf(stderr, "\t%s\n", RulePtr[*p]->rname);
|
|
**** }
|
|
*/
|
|
}
|
|
}
|
|
|
|
/* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
FoPop( int k )
|
|
#else
|
|
FoPop( k )
|
|
int k;
|
|
#endif
|
|
{
|
|
require(k<=CLL_k, "FoPop: tried to access non-existent stack");
|
|
/*fprintf(stderr, "FoPop\n");*/
|
|
require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]),
|
|
"FoPop: FoStack stack-ptr is playing out of its sandbox");
|
|
if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL;
|
|
else (FoTOS[k])--;
|
|
}
|
|
|
|
/* Compute FOLLOW cycle.
|
|
* Mark all FOLLOW sets for rules in cycle as incomplete.
|
|
* Then, save cycle on the cycle list (Cycles) for later resolution.
|
|
* The Cycle is stored in the form:
|
|
* (head of cycle==croot, rest of rules in cycle==cyclicDep)
|
|
*
|
|
* e.g. (Fo means "FOLLOW of", "-->" means requires or depends on)
|
|
*
|
|
* Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x)
|
|
* ^----Infinite recursion (cycle)
|
|
*
|
|
* the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends
|
|
* on the FOLLOW of a,b, and c. The root of a cycle is always complete after
|
|
* Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules
|
|
* in a FOLLOW cycle have the same FOLLOW set.
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
RegisterCycle( char *rule, int k )
|
|
#else
|
|
RegisterCycle( rule, k )
|
|
char *rule;
|
|
int k;
|
|
#endif
|
|
{
|
|
CacheEntry *f;
|
|
Cycle *c;
|
|
int *p;
|
|
RuleEntry *r;
|
|
require(rule!=NULL, "RegisterCycle: tried to register NULL rule");
|
|
require(k<=CLL_k, "RegisterCycle: tried to access non-existent stack");
|
|
|
|
/*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/
|
|
/* Find cycle start */
|
|
r = (RuleEntry *) hash_get(Rname, rule);
|
|
require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule));
|
|
require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]),
|
|
eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox",
|
|
rule));
|
|
/*** if ( FoTOS[k]<FoStack[k]||FoTOS[k]>&(FoStack[k][FoStackSize-1]) )
|
|
**** {
|
|
**** fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n",
|
|
**** rule);
|
|
**** fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n",
|
|
**** FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1]));
|
|
**** exit(PCCTS_EXIT_FAILURE);
|
|
**** }
|
|
****/
|
|
|
|
#ifdef MEMCHK
|
|
require(valid(FoStack[k]), "RegisterCycle: invalid FoStack");
|
|
#endif
|
|
for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;}
|
|
require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief");
|
|
if ( p == FoTOS[k] ) return; /* don't worry about cycles to oneself */
|
|
|
|
/* compute cyclic dependents (rules in cycle except head) */
|
|
c = newCycle;
|
|
require(c!=NULL, "RegisterCycle: couldn't alloc new cycle");
|
|
c->cyclicDep = empty;
|
|
c->croot = *p++; /* record root of cycle */
|
|
for (; p<=FoTOS[k]; p++)
|
|
{
|
|
/* Mark all dependent rules as incomplete */
|
|
f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k));
|
|
if ( f==NULL )
|
|
{
|
|
f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) );
|
|
hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f);
|
|
}
|
|
f->incomplete = TRUE;
|
|
|
|
set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */
|
|
}
|
|
list_add(&(Cycles[k]), (void *)c);
|
|
}
|
|
|
|
/* make all rules in cycle complete
|
|
*
|
|
* while ( some set has changed ) do
|
|
* for each cycle do
|
|
* if degree of FOLLOW set for croot > old degree then
|
|
* update all FOLLOW sets for rules in cyclic dependency
|
|
* change = TRUE
|
|
* endif
|
|
* endfor
|
|
* endwhile
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
ResolveFoCycles( int k )
|
|
#else
|
|
ResolveFoCycles( k )
|
|
int k;
|
|
#endif
|
|
{
|
|
ListNode *p, *q;
|
|
Cycle *c;
|
|
int changed = 1;
|
|
CacheEntry *f,*g;
|
|
int r;
|
|
/* int i; */ /* MR10 not useful */
|
|
unsigned d;
|
|
|
|
unsigned *cursor; /* MR10 */
|
|
unsigned *origin; /* MR10 */
|
|
|
|
/*fprintf(stderr, "Resolving following cycles for %d\n", k);*/
|
|
while ( changed )
|
|
{
|
|
changed = 0;
|
|
/* MR10 i = 0; */
|
|
for (p = Cycles[k]->next; p!=NULL; p=p->next)
|
|
{
|
|
c = (Cycle *) p->elem;
|
|
/*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/
|
|
/*s_fprT(stderr, c->cyclicDep);*/
|
|
/*fprintf(stderr, "\n");*/
|
|
f = (CacheEntry *)
|
|
hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k));
|
|
require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) );
|
|
if ( (d=set_deg(f->fset)) > c->deg )
|
|
{
|
|
/*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/
|
|
changed = 1;
|
|
c->deg = d; /* update cycle FOLLOW set degree */
|
|
|
|
/* MR10 */ origin=set_pdq(c->cyclicDep);
|
|
/* MR10 */ for (cursor=origin; *cursor != nil; cursor++) {
|
|
/* MR10 */ r=*cursor;
|
|
|
|
/******** while ( !set_nil(c->cyclicDep) ) { *****/
|
|
/******** r = set_int(c->cyclicDep); *****/
|
|
/******** set_rm(r, c->cyclicDep); *****/
|
|
|
|
/*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/
|
|
g = (CacheEntry *)
|
|
hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k));
|
|
require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) );
|
|
set_orin(&(g->fset), f->fset);
|
|
g->incomplete = FALSE;
|
|
}
|
|
/* MR10 */ free( (char *) origin);
|
|
/* MR10 */ origin=NULL;
|
|
}
|
|
}
|
|
/* MR10 - this if statement appears to be meaningless since i is always 0 */
|
|
/* MR10 if ( i == 1 ) changed = 0; */ /* if only 1 cycle, no need to repeat */
|
|
}
|
|
/* kill Cycle list */
|
|
for (q = Cycles[k]->next; q != NULL; q=p)
|
|
{
|
|
p = q->next;
|
|
set_free( ((Cycle *)q->elem)->cyclicDep );
|
|
free((char *)q);
|
|
}
|
|
free( (char *)Cycles[k] );
|
|
Cycles[k] = NULL;
|
|
}
|
|
|
|
|
|
/* P r i n t i n g S y n t a x D i a g r a m s */
|
|
|
|
static void
|
|
#ifdef __USE_PROTOS
|
|
pBlk( Junction *q, int btype )
|
|
#else
|
|
pBlk( q, btype )
|
|
Junction *q;
|
|
int btype;
|
|
#endif
|
|
{
|
|
int k,a;
|
|
Junction *alt, *p;
|
|
|
|
q->end->pvisited = TRUE;
|
|
if ( btype == aLoopBegin )
|
|
{
|
|
require(q->p2!=NULL, "pBlk: invalid ()* block");
|
|
PRINT(q->p1);
|
|
alt = (Junction *)q->p2;
|
|
PRINT(alt->p1);
|
|
if ( PrintAnnotate )
|
|
{
|
|
printf(" /* Opt ");
|
|
k = 1;
|
|
while ( !set_nil(alt->fset[k]) )
|
|
{
|
|
s_fprT(stdout, alt->fset[k]);
|
|
if ( k++ == CLL_k ) break;
|
|
if ( !set_nil(alt->fset[k]) ) printf(", ");
|
|
}
|
|
printf(" */\n");
|
|
}
|
|
return;
|
|
}
|
|
for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++)
|
|
{
|
|
if ( alt->p1 != NULL ) PRINT(alt->p1);
|
|
if ( PrintAnnotate )
|
|
{
|
|
printf( " /* [%d] ", alt->altnum);
|
|
k = 1;
|
|
while ( !set_nil(alt->fset[k]) )
|
|
{
|
|
s_fprT(stdout, alt->fset[k]);
|
|
if ( k++ == CLL_k ) break;
|
|
if ( !set_nil(alt->fset[k]) ) printf(", ");
|
|
}
|
|
if ( alt->p2 == NULL && btype == aOptBlk )
|
|
printf( " (optional branch) */\n");
|
|
else printf( " */\n");
|
|
}
|
|
|
|
/* ignore implied empty alt of Plus blocks */
|
|
if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break;
|
|
|
|
if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) )
|
|
{
|
|
if ( pLevel == 1 )
|
|
{
|
|
printf("\n");
|
|
if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>");
|
|
printf("\t");
|
|
}
|
|
else printf(" ");
|
|
printf("|");
|
|
if ( pLevel == 1 )
|
|
{
|
|
p = (Junction *) ((Junction *)alt->p2)->p1;
|
|
while ( p!=NULL )
|
|
{
|
|
if ( p->ntype==nAction )
|
|
{
|
|
p=(Junction *)((ActionNode *)p)->next;
|
|
continue;
|
|
}
|
|
if ( p->ntype!=nJunction )
|
|
{
|
|
break;
|
|
}
|
|
if ( p->jtype==EndBlk || p->jtype==EndRule )
|
|
{
|
|
p = NULL;
|
|
break;
|
|
}
|
|
p = (Junction *)p->p1;
|
|
}
|
|
if ( p==NULL ) printf("\n\t"); /* Empty alt? */
|
|
}
|
|
}
|
|
}
|
|
q->end->pvisited = FALSE;
|
|
}
|
|
|
|
/* How to print out a junction */
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
pJunc( Junction *q )
|
|
#else
|
|
pJunc( q )
|
|
Junction *q;
|
|
#endif
|
|
{
|
|
int dum_k;
|
|
int doing_rule;
|
|
require(q!=NULL, "pJunc: NULL node");
|
|
require(q->ntype==nJunction, "pJunc: not junction");
|
|
|
|
if ( q->pvisited == TRUE ) return;
|
|
q->pvisited = TRUE;
|
|
switch ( q->jtype )
|
|
{
|
|
case aSubBlk :
|
|
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
|
|
if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction &&
|
|
((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1;
|
|
else doing_rule = 0;
|
|
pLevel++;
|
|
if ( pLevel==1 )
|
|
{
|
|
if ( pAlt1==1 ) printf("=>");
|
|
printf("\t");
|
|
}
|
|
else printf(" ");
|
|
if ( doing_rule )
|
|
{
|
|
if ( pLevel==1 ) printf(" ");
|
|
pBlk(q,q->jtype);
|
|
}
|
|
else {
|
|
printf("(");
|
|
if ( pLevel==1 ) printf(" ");
|
|
pBlk(q,q->jtype);
|
|
if ( pLevel>1 ) printf(" ");
|
|
printf(")");
|
|
}
|
|
if ( q->guess ) printf("?");
|
|
pLevel--;
|
|
if ( PrintAnnotate ) freeBlkFsets(q);
|
|
if ( q->end->p1 != NULL ) PRINT(q->end->p1);
|
|
break;
|
|
case aOptBlk :
|
|
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
|
|
pLevel++;
|
|
if ( pLevel==1 )
|
|
{
|
|
if ( pAlt1==1 ) printf("=>");
|
|
printf("\t");
|
|
}
|
|
else printf(" ");
|
|
printf("{");
|
|
if ( pLevel==1 ) printf(" ");
|
|
pBlk(q,q->jtype);
|
|
if ( pLevel>1 ) printf(" ");
|
|
else printf("\n\t");
|
|
printf("}");
|
|
pLevel--;
|
|
if ( PrintAnnotate ) freeBlkFsets(q);
|
|
if ( q->end->p1 != NULL ) PRINT(q->end->p1);
|
|
break;
|
|
case aLoopBegin :
|
|
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
|
|
pLevel++;
|
|
if ( pLevel==1 )
|
|
{
|
|
if ( pAlt1==1 ) printf("=>");
|
|
printf("\t");
|
|
}
|
|
else printf(" ");
|
|
printf("(");
|
|
if ( pLevel==1 ) printf(" ");
|
|
pBlk(q,q->jtype);
|
|
if ( pLevel>1 ) printf(" ");
|
|
else printf("\n\t");
|
|
printf(")*");
|
|
pLevel--;
|
|
if ( PrintAnnotate ) freeBlkFsets(q);
|
|
if ( q->end->p1 != NULL ) PRINT(q->end->p1);
|
|
break;
|
|
case aLoopBlk :
|
|
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
|
|
pBlk(q,q->jtype);
|
|
if ( PrintAnnotate ) freeBlkFsets(q);
|
|
break;
|
|
case aPlusBlk :
|
|
if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k);
|
|
pLevel++;
|
|
if ( pLevel==1 )
|
|
{
|
|
if ( pAlt1==1 ) printf("=>");
|
|
printf("\t");
|
|
}
|
|
else printf(" ");
|
|
printf("(");
|
|
if ( pLevel==1 ) printf(" ");
|
|
pBlk(q,q->jtype);
|
|
if ( pLevel>1 ) printf(" ");
|
|
printf(")+");
|
|
pLevel--;
|
|
if ( PrintAnnotate ) freeBlkFsets(q);
|
|
if ( q->end->p1 != NULL ) PRINT(q->end->p1);
|
|
break;
|
|
case EndBlk :
|
|
break;
|
|
case RuleBlk :
|
|
printf( "\n%s :\n", q->rname);
|
|
PRINT(q->p1);
|
|
if ( q->p2 != NULL ) PRINT(q->p2);
|
|
break;
|
|
case Generic :
|
|
if ( q->p1 != NULL ) PRINT(q->p1);
|
|
q->pvisited = FALSE;
|
|
if ( q->p2 != NULL ) PRINT(q->p2);
|
|
break;
|
|
case EndRule :
|
|
printf( "\n\t;\n");
|
|
break;
|
|
}
|
|
q->pvisited = FALSE;
|
|
}
|
|
|
|
/* How to print out a rule reference node */
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
pRuleRef( RuleRefNode *p )
|
|
#else
|
|
pRuleRef( p )
|
|
RuleRefNode *p;
|
|
#endif
|
|
{
|
|
require(p!=NULL, "pRuleRef: NULL node");
|
|
require(p->ntype==nRuleRef, "pRuleRef: not rule ref node");
|
|
|
|
printf( " %s", p->text);
|
|
PRINT(p->next);
|
|
}
|
|
|
|
/* How to print out a terminal node */
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
pToken( TokNode *p )
|
|
#else
|
|
pToken( p )
|
|
TokNode *p;
|
|
#endif
|
|
{
|
|
require(p!=NULL, "pToken: NULL node");
|
|
require(p->ntype==nToken, "pToken: not token node");
|
|
|
|
if ( p->wild_card ) printf(" .");
|
|
printf( " %s", TerminalString(p->token));
|
|
PRINT(p->next);
|
|
}
|
|
|
|
/* How to print out a terminal node */
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
pAction( ActionNode *p )
|
|
#else
|
|
pAction( p )
|
|
ActionNode *p;
|
|
#endif
|
|
{
|
|
require(p!=NULL, "pAction: NULL node");
|
|
require(p->ntype==nAction, "pAction: not action node");
|
|
|
|
PRINT(p->next);
|
|
}
|
|
|
|
/* F i l l F o l l o w L i s t s */
|
|
|
|
/*
|
|
* Search all rules for all rule reference nodes, q to rule, r.
|
|
* Add q->next to follow list dangling off of rule r.
|
|
* i.e.
|
|
*
|
|
* r: -o-R-o-->o--> Ptr to node following rule r in another rule
|
|
* |
|
|
* o--> Ptr to node following another reference to r.
|
|
*
|
|
* This is the data structure employed to avoid FOLLOW set computation. We
|
|
* simply compute the FIRST (reach) of the EndRule Node which follows the
|
|
* list found at the end of all rules which are referenced elsewhere. Rules
|
|
* not invoked by other rules have no follow list (r->end->p1==NULL).
|
|
* Generally, only start symbols are not invoked by another rule.
|
|
*
|
|
* Note that this mechanism also gives a free cross-reference mechanism.
|
|
*
|
|
* The entire syntax diagram is layed out like this:
|
|
*
|
|
* SynDiag
|
|
* |
|
|
* v
|
|
* o-->R1--o
|
|
* |
|
|
* o-->R2--o
|
|
* |
|
|
* ...
|
|
* |
|
|
* o-->Rn--o
|
|
*
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
FoLink( Node *p )
|
|
#else
|
|
FoLink( p )
|
|
Node *p;
|
|
#endif
|
|
{
|
|
RuleEntry *q;
|
|
Junction *j = (Junction *) p;
|
|
RuleRefNode *r = (RuleRefNode *) p;
|
|
|
|
if ( p==NULL ) return;
|
|
require(p->ntype>=1 && p->ntype<=NumNodeTypes,
|
|
eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype));
|
|
switch ( p->ntype )
|
|
{
|
|
case nJunction :
|
|
if ( j->fvisited ) return;
|
|
if ( j->jtype == EndRule ) return;
|
|
j->fvisited = TRUE;
|
|
FoLink( j->p1 );
|
|
FoLink( j->p2 );
|
|
/* MR14 */
|
|
/* MR14 */ /* Need to determine whether the guess block is an */
|
|
/* MR14 */ /* of the form (alpha)? beta before follow sets are */
|
|
/* MR14 */ /* computed. This is necessary to solve problem */
|
|
/* MR14 */ /* of doing follow on the alpha of an (alpha)? beta block. */
|
|
/* MR14 */
|
|
/* MR14 */ /* This is performed by analysis_point as a side-effect. */
|
|
/* MR14 */
|
|
/* MR14 */
|
|
/* MR14 */ if (j->jtype == aSubBlk && j->guess) {
|
|
/* MR14 */ Junction *ignore;
|
|
/* MR14 */ ignore=analysis_point(j);
|
|
/* MR14 */ }
|
|
/* MR14 */
|
|
return;
|
|
case nRuleRef :
|
|
if ( r->linked ) return;
|
|
q = (RuleEntry *) hash_get(Rname, r->text);
|
|
if ( q == NULL )
|
|
{
|
|
warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line );
|
|
}
|
|
else
|
|
{
|
|
if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL )
|
|
{
|
|
warnFL( eMsg1("rule %s accepts no parameter(s)", r->text),
|
|
FileStr[r->file], r->line );
|
|
}
|
|
if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL )
|
|
{
|
|
warnFL( eMsg1("rule %s requires parameter(s)", r->text),
|
|
FileStr[r->file], r->line );
|
|
}
|
|
if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL )
|
|
{
|
|
warnFL( eMsg1("rule %s yields no return value(s)", r->text),
|
|
FileStr[r->file], r->line );
|
|
}
|
|
if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL )
|
|
{
|
|
warnFL( eMsg1("rule %s returns a value(s)", r->text),
|
|
FileStr[r->file], r->line );
|
|
}
|
|
if ( !r->linked )
|
|
{
|
|
addFoLink( r->next, r->rname, RulePtr[q->rulenum] );
|
|
r->linked = TRUE;
|
|
}
|
|
}
|
|
FoLink( r->next );
|
|
return;
|
|
case nToken :
|
|
FoLink( ((TokNode *)p)->next );
|
|
return;
|
|
case nAction :
|
|
FoLink( ((ActionNode *)p)->next );
|
|
return;
|
|
default :
|
|
fatal_internal("invalid node type");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Add a reference to the end of a rule.
|
|
*
|
|
* 'r' points to the RuleBlk node in a rule. r->end points to the last node
|
|
* (EndRule jtype) in a rule.
|
|
*
|
|
* Initial:
|
|
* r->end --> o
|
|
*
|
|
* After:
|
|
* r->end --> o-->o--> Ptr to node following rule r in another rule
|
|
* |
|
|
* o--> Ptr to node following another reference to r.
|
|
*
|
|
* Note that the links are added to the head of the list so that r->end->p1
|
|
* always points to the most recently added follow-link. At the end, it should
|
|
* point to the last reference found in the grammar (starting from the 1st rule).
|
|
*/
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
addFoLink( Node *p, char *rname, Junction *r )
|
|
#else
|
|
addFoLink( p, rname, r )
|
|
Node *p;
|
|
char *rname;
|
|
Junction *r;
|
|
#endif
|
|
{
|
|
Junction *j;
|
|
require(r!=NULL, "addFoLink: incorrect rule graph");
|
|
require(r->end!=NULL, "addFoLink: incorrect rule graph");
|
|
require(r->end->jtype==EndRule, "addFoLink: incorrect rule graph");
|
|
require(p!=NULL, "addFoLink: NULL FOLLOW link");
|
|
|
|
j = newJunction();
|
|
j->rname = rname; /* rname on follow links point to target rule */
|
|
j->p1 = p; /* link to other rule */
|
|
j->p2 = (Node *) r->end->p1;/* point to head of list */
|
|
r->end->p1 = (Node *) j; /* reset head to point to new node */
|
|
}
|
|
|
|
void
|
|
#ifdef __USE_PROTOS
|
|
GenCrossRef( Junction *p )
|
|
#else
|
|
GenCrossRef( p )
|
|
Junction *p;
|
|
#endif
|
|
{
|
|
set a;
|
|
Junction *j;
|
|
RuleEntry *q;
|
|
unsigned e;
|
|
require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?");
|
|
|
|
printf("Cross Reference:\n\n");
|
|
a = empty;
|
|
for (; p!=NULL; p = (Junction *)p->p2)
|
|
{
|
|
printf("Rule %20s referenced by {", p->rname);
|
|
/* make a set of rules for uniqueness */
|
|
for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2)
|
|
{
|
|
q = (RuleEntry *) hash_get(Rname, j->rname);
|
|
require(q!=NULL, "GenCrossRef: FoLinks are screwed up");
|
|
set_orel(q->rulenum, &a);
|
|
}
|
|
for (; !set_nil(a); set_rm(e, a))
|
|
{
|
|
e = set_int(a);
|
|
printf(" %s", RulePtr[e]->rname);
|
|
}
|
|
printf(" }\n");
|
|
}
|
|
set_free( a );
|
|
}
|
|
|
|
/*
|
|
The single argument is a pointer to the start of an element of a
|
|
C++ style function prototypet list. Given a pointer to the start of
|
|
an formal we must locate the comma (or the end of the string)
|
|
and locate the datatype, formal name, and initial value expression.
|
|
|
|
The function returns a pointer to the character following the comma
|
|
which terminates the formal declaration, or a pointer to the end of
|
|
the string if none was found.
|
|
|
|
I thought we were parsing specialists, how come I'm doing this by
|
|
hand written code ?
|
|
|
|
Examples of input:
|
|
|
|
Foo f,
|
|
Foo f = Foo(1),
|
|
Foo f = Foo(1,2),
|
|
Foo f = &farray[1,2],
|
|
Foo f = ",",
|
|
Foo f = ',',
|
|
TFoo<int,char> f = TFoo<int,char>(1,2),
|
|
|
|
A non-zero value for nesting indicates a problem matching '(' and ')',
|
|
'[' and ']', '<' and '>', '{' and '}', or improperly terminated string
|
|
or character literal.
|
|
|
|
*/
|
|
|
|
|
|
/*
|
|
* Don't care if it is a valid string literal or not, just find the end
|
|
* Start with pointer to leading "\""
|
|
*/
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * skipStringLiteral(char *pCurrent)
|
|
#else
|
|
char * skipStringLiteral(pCurrent)
|
|
char *pCurrent;
|
|
#endif
|
|
{
|
|
char *p = pCurrent;
|
|
if (*p == 0) return p;
|
|
require (*p == '\"', "skipStringLiteral")
|
|
p++;
|
|
for (p = p; *p != 0; p++) {
|
|
if (*p == '\\') {
|
|
p++;
|
|
if (*p == 0) break;
|
|
p++;
|
|
}
|
|
if (*p == '\"') {
|
|
p++;
|
|
break;
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
* Don't care if it is a valid character literal or not, just find the end
|
|
* Start with pointer to leading "'"
|
|
*/
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * skipCharLiteral(char *pStart)
|
|
#else
|
|
char * skipCharLiteral(pStart)
|
|
char *pStart;
|
|
#endif
|
|
{
|
|
char *p = pStart;
|
|
if (*p == 0) return p;
|
|
require (*p == '\'', "skipCharLiteral")
|
|
p++;
|
|
for (p = p; *p != 0; p++) {
|
|
if (*p == '\\') {
|
|
p++;
|
|
if (*p == 0) break;
|
|
p++;
|
|
}
|
|
if (*p == '\'') {
|
|
p++;
|
|
break;
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * skipSpaces(char *pStart)
|
|
#else
|
|
char * skipSpaces(pStart)
|
|
char * pStart;
|
|
#endif
|
|
{
|
|
char *p = pStart;
|
|
while (*p != 0 && isspace(*p)) p++;
|
|
return p;
|
|
}
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * skipToSeparatorOrEqualSign(char *pStart, int *pNest)
|
|
#else
|
|
char * skipToSeparatorOrEqualSign(pStart, pNest)
|
|
char *pStart;
|
|
int *pNest;
|
|
#endif
|
|
{
|
|
char *p = pStart;
|
|
|
|
int nest = 0;
|
|
|
|
*pNest = (-1);
|
|
|
|
while (*p != 0) {
|
|
switch (*p) {
|
|
|
|
case '(' :
|
|
case '[' :
|
|
case '<' :
|
|
case '{' :
|
|
nest++;
|
|
p++;
|
|
break;
|
|
|
|
case ')' :
|
|
case ']' :
|
|
case '>' :
|
|
case '}' :
|
|
nest--;
|
|
p++;
|
|
break;
|
|
|
|
case '"' :
|
|
p = skipStringLiteral(p);
|
|
break;
|
|
|
|
case '\'' :
|
|
p = skipCharLiteral(p);
|
|
break;
|
|
|
|
case '\\':
|
|
p++;
|
|
if (*p == 0) goto EXIT;
|
|
p++;
|
|
break;
|
|
|
|
case ',':
|
|
case '=':
|
|
if (nest == 0) goto EXIT;
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
p++;
|
|
}
|
|
}
|
|
EXIT:
|
|
*pNest = nest;
|
|
return p;
|
|
}
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * skipToSeparator(char *pStart, int *pNest)
|
|
#else
|
|
char * skipToSeparator(pStart, pNest)
|
|
char *pStart;
|
|
int *pNest;
|
|
#endif
|
|
{
|
|
char * p = pStart;
|
|
for ( ; ; ) {
|
|
p = skipToSeparatorOrEqualSign(p, pNest);
|
|
if (*pNest != 0) return p;
|
|
if (*p == ',') return p;
|
|
if (*p == 0) return p;
|
|
p++;
|
|
}
|
|
}
|
|
|
|
/* skip to just past the "=" separating the declaration from the initialization value */
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * getInitializer(char *pStart)
|
|
#else
|
|
char * getInitializer(pStart)
|
|
char * pStart;
|
|
#endif
|
|
{
|
|
char *p;
|
|
char *pDataType;
|
|
char *pSymbol;
|
|
char *pEqualSign;
|
|
char *pValue;
|
|
char *pSeparator;
|
|
int nest = 0;
|
|
|
|
require(pStart!=NULL, "getInitializer: invalid string");
|
|
|
|
p = endFormal(pStart,
|
|
&pDataType,
|
|
&pSymbol,
|
|
&pEqualSign,
|
|
&pValue,
|
|
&pSeparator,
|
|
&nest);
|
|
if (nest != 0) return NULL;
|
|
if (pEqualSign == NULL) return NULL;
|
|
if (pValue == NULL) return NULL;
|
|
return strBetween(pValue, NULL, pSeparator);
|
|
}
|
|
|
|
/*
|
|
Examines the string from pStart to pEnd-1.
|
|
If the string has 0 length or is entirely white space
|
|
returns 1. Otherwise 0.
|
|
*/
|
|
|
|
#ifdef __USE_PROTOS
|
|
int isWhiteString(const char *pStart, const char *pEnd)
|
|
#else
|
|
int isWhiteString(pStart, pEnd)
|
|
const char *pStart;
|
|
const char *pEnd;
|
|
#endif
|
|
{
|
|
const char *p;
|
|
for (p = pStart; p < pEnd; p++) {
|
|
if (! isspace(*p)) return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
This replaces HasComma() which couldn't distinguish
|
|
|
|
foo ["a,b"]
|
|
|
|
from:
|
|
|
|
foo[a,b]
|
|
|
|
*/
|
|
|
|
#ifdef __USE_PROTOS
|
|
int hasMultipleOperands(char *pStart)
|
|
#else
|
|
int hasMultipleOperands(pStart)
|
|
char *pStart;
|
|
#endif
|
|
{
|
|
char *p = pStart;
|
|
int nest = 0;
|
|
|
|
p = skipSpaces(p);
|
|
if (*p == 0) return 0;
|
|
p = skipToSeparator(p, &nest);
|
|
if (nest == 0 && *p == ',') return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#define MAX_STR_BETWEEN_WORK_AREA 1000
|
|
|
|
static char strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA];
|
|
|
|
|
|
/*
|
|
strBetween(pStart, pNext, pStop)
|
|
|
|
Creates a null terminated string by copying the text between two pointers
|
|
to a work area. The start of the string is pStart. The end of the string
|
|
is the character before pNext, or if pNext is null then the character before
|
|
pStop. Trailing spaces are not included in the copy operation.
|
|
|
|
This is used when a string contains several parts. The pNext part may be
|
|
optional. The pStop will stop the scan when the optional part is not present
|
|
(is a null pointer).
|
|
*/
|
|
|
|
#ifdef __USE_PROTOS
|
|
char *strBetween(char *pStart, char *pNext, char *pStop)
|
|
#else
|
|
char *strBetween(pStart, pNext, pStop)
|
|
char *pStart;
|
|
char *pNext;
|
|
char *pStop;
|
|
#endif
|
|
{
|
|
char *p;
|
|
char *q = strBetweenWorkArea;
|
|
const char *pEnd;
|
|
|
|
pEnd = (pNext != NULL) ? pNext : pStop;
|
|
|
|
require (pEnd != NULL, "pEnd == NULL");
|
|
require (pEnd >= pStart, "pEnd < pStart");
|
|
for (pEnd--; pEnd >= pStart; pEnd--) { /* MR31 */
|
|
if (! isspace(*pEnd)) break;
|
|
}
|
|
for (p = pStart;
|
|
p <= pEnd && q < &strBetweenWorkArea[MAX_STR_BETWEEN_WORK_AREA-2];
|
|
p++, q++) {
|
|
*q = *p;
|
|
}
|
|
*q = 0;
|
|
return strBetweenWorkArea;
|
|
}
|
|
|
|
/*
|
|
function Returns pointer to character following separator at
|
|
value which to continue search for next formal. If at the
|
|
end of the string a pointer to the null byte at the
|
|
end of the string is returned.
|
|
|
|
pStart Pointer to the starting position of the formal list
|
|
|
|
This may be the middle of a longer string, for example
|
|
when looking for the end of formal #3 starting from
|
|
the middle of the complete formal list.
|
|
|
|
ppDataType Returns a pointer to the start of the data type in the
|
|
formal. Example: pointer to "Foo".
|
|
|
|
ppSymbol Returns a pointer to the start of the formal symbol.
|
|
Example: pointer to "f".
|
|
|
|
ppEqualSign Returns a pointer to the equal sign separating the
|
|
formal symbol from the initial value. If there is
|
|
no "=" then this will be NULL.
|
|
|
|
ppValue Returns a pointer to the initial value part of the
|
|
formal declaration. Example: pointer to "&farray[1,2]"
|
|
|
|
ppSeparator Returns a pointer to the character which terminated the
|
|
scan. This should be a pointer to a comma or a null
|
|
byte which terminates the string.
|
|
|
|
pNest Returns the nesting level when a separator was found.
|
|
This is non-zero for any kind of error. This is zero
|
|
for a successful parse of this portion of the formal
|
|
list.
|
|
|
|
*/
|
|
|
|
#ifdef __USE_PROTOS
|
|
char * endFormal(char *pStart,
|
|
char **ppDataType,
|
|
char **ppSymbol,
|
|
char **ppEqualSign,
|
|
char **ppValue,
|
|
char **ppSeparator,
|
|
int *pNest)
|
|
#else
|
|
char * endFormal(pStart,
|
|
ppDataType,
|
|
ppSymbol,
|
|
ppEqualSign,
|
|
ppValue,
|
|
ppSeparator,
|
|
pNest)
|
|
char *pStart;
|
|
char **ppDataType;
|
|
char **ppSymbol;
|
|
char **ppEqualSign;
|
|
char **ppValue;
|
|
char **ppSeparator;
|
|
int *pNest;
|
|
|
|
#endif
|
|
{
|
|
char *p = pStart;
|
|
char *q;
|
|
|
|
*ppDataType = NULL;
|
|
*ppSymbol = NULL;
|
|
*ppEqualSign = NULL;
|
|
*ppValue = NULL;
|
|
*ppSeparator = NULL;
|
|
|
|
*pNest = 0;
|
|
|
|
/* The first non-blank is the start of the datatype */
|
|
|
|
p = skipSpaces(p);
|
|
if (*p == 0) goto EXIT;
|
|
*ppDataType = p;
|
|
|
|
/* We are not looking for the symbol, we are looking
|
|
for the separator that follows the symbol. Then
|
|
we'll back up.
|
|
|
|
Search for the ',' or '=" or null terminator.
|
|
*/
|
|
|
|
p = skipToSeparatorOrEqualSign(p, pNest);
|
|
|
|
if (*pNest != 0) goto EXIT;
|
|
|
|
/*
|
|
Work backwards to find start of symbol
|
|
Skip spaces between the end of symbol and separator
|
|
Assume that there are no spaces in the formal, but
|
|
there is a space preceding the formal
|
|
*/
|
|
|
|
for (q = &p[-1]; q >= *ppDataType; q--) {
|
|
if (! isspace(*q)) break;
|
|
}
|
|
if (q < *ppDataType) goto EXIT;
|
|
|
|
/*
|
|
MR26 Handle things like: IIR_Bool (IIR_Decl::*constraint)()
|
|
Backup until we hit the end of a symbol string, then find the
|
|
start of the symbol string. This wont' work for functions
|
|
with prototypes, but works for the most common cases. For
|
|
others, use typedef names.
|
|
*/
|
|
|
|
/* MR26 */ for (q = q; q >= *ppDataType; q--) {
|
|
/* MR26 */ if (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$') break;
|
|
/* MR26 */ }
|
|
/* MR26 */ if (q < *ppDataType) goto EXIT;
|
|
|
|
for (q = q; q >= *ppDataType; q--) {
|
|
if ( ! (isalpha(*q) || isdigit(*q) || *q == '_' || *q == '$')) break;
|
|
}
|
|
|
|
*ppSymbol = &q[1];
|
|
|
|
if (*p == ',' || *p == 0) {
|
|
*ppSeparator = p;
|
|
goto EXIT;
|
|
}
|
|
|
|
*ppEqualSign = p;
|
|
p = skipSpaces(++p);
|
|
*ppValue = p;
|
|
if (*p == 0) goto EXIT;
|
|
|
|
|
|
while (*p != 0 && *pNest == 0 && *p != ',') {
|
|
p = skipToSeparator(p, pNest);
|
|
}
|
|
if (*pNest == 0) *ppSeparator = p;
|
|
|
|
EXIT:
|
|
if (*p == ',') p++;
|
|
return p;
|
|
}
|