From MAILER-DAEMON Wed May 26 18:31:20 1999
Date: Wed, 26 May 1999 18:31:20 +0800 (HKT)
From: Mail System Internal Data <[email protected]>
Subject: DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA
X-IMAP: 0927714680 0000000000
Status: RO

This text is part of the internal format of your mail folder, and is not
a real message. It is created automatically by the mail system software.
If deleted, important folder data will be lost, and it will be re-created
with the data reset to initial values.

From [email protected] Tue Apr 25 05:06:15 2000 +0800
Status: R
X-Status:
X-Keywords:
Received: from smv18.iname.net (lmtp08.iname.net [165.251.8.81])
by csns02.comp.polyu.edu.hk (8.8.8/8.8.8) with SMTP id FAA24258
for <[email protected]>; Tue, 25 Apr 2000 05:06:09 +0800 (HKT)
Received: from shell7.ba.best.com (shell7.ba.best.com [206.184.139.138])
by smv18.iname.net (8.9.3/8.9.1SMV2) with ESMTP id RAA15280
for <[email protected]> sent by <[email protected]>; Mon, 24 Apr 2000 17:12:49 -0400 (EDT)
Received: (from microman@localhost)
by shell7.ba.best.com (8.9.3/8.9.2/best.sh) id OAA03253
for [email protected]; Mon, 24 Apr 2000 14:11:57 -0700 (PDT)
From: Robert Dowling <[email protected]>
Message-Id: <[email protected]>
Subject: Re: Lex & Yacc Questions
In-Reply-To: <[email protected]> from "[email protected]" at "Apr 23, 0 10:02:52 pm"
To: [email protected]
Date: Mon, 24 Apr 2000 14:11:57 -0700 (PDT)
X-Mailer: ELM [version 2.4ME+ PL38 (25)]
MIME-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Content-Length: 54093

Hi,

here is a pascal grammar,

Robert
> language=lex+yacc
> otherlanguage=Other:
> material=example
> material=tutorial
> [email protected]
> why=I want to do a programming assignment that use lex and yacc to do pascal like language compiler, I am now finding more
tutorial and example source to take reference
>
 

--
Robert Dowling -- [email protected] -- Microman Computing -- www.uman.com
Mar 3 20:18 1992 GRAM.Y Page 1
 

/*
* Pascal parser
*
* Based on:
*
* Jensen, Kathleen, and Wirth, Niklaus. _Pascal:_User_Manual_and_
* _report_, third edition. Springer Verlag, 1985
*
* with some Turbo Pascal 3.0 extensions.
*
* Written by Chris Retterath, MKS Inc.
* Modified by Scott Nicol, MKS Inc.
*/
%{
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "symbol.h"
#include "node.h"
#include "pc.h"
#include "typechek.h"
#include "xref.h"
#include "tag.h"

extern int yylex (void);
extern int yylineno;

#define Assert(e) { if (! (e)) yyerror("bad type"); }

int inWithBlock; /* track WITH nesting */

%}

%union {
long i;
Symbol * sym;
Node * n;
char * s;
double d;
}
%token <i> PROGRAM LABEL
%token <i> WRITE WRITELN READ READLN
%token <i> CONST TYPE VAR
%token <i> FUNCTION PROCEDURE FORWARD
%token <i> ABSOLUTE INLINE INTERRUPT EXTERNAL /* TPascal */
%token <i> ARRAY PACKED OF
%token <i> DOTDOT
%token <i> BEGIN_BLOCK END
%token <i> ASSIGN
%token <i> GOTO
%token <i> IF THEN ELSE
%token <i> CASE WHILE DO REPEAT UNTIL FOR TO DOWNTO
%token <i> WITH
%token <i> SET RECORD FILE_DECL
%token <i> NOT NOTEQUAL LESSEQ GREATEQ IN OR AND DIV MOD NIL
%token <i> INTEGER REAL CHAR BOOLEAN
%token <i> BYTE STRING SINGLE DOUBLE EXTENDED COMP WORD SHORTINT LONGINT
%token <i> USES SHL SHR XOR
%token <i> INT_CONST
%token <s> STRING_CONST
%token <d> FLOAT_CONST
%token <sym> CONST_IDENT LABEL_IDENT VAR_IDENT FIELD_IDENT
%token <sym> PROC_IDENT FUNC_IDENT BOUND_IDENT TYPE_IDENT
%token <sym> UNKNOWN_IDENT
%token FUNC_CALL /* psuedo-token */
%token SYMBOL /* psuedo-token */
%token LAST_TOKEN /* psuedo-token */

%type <n> expression variable setConstructor
%type <n> indexList index elementDescriptionList elementDescription
%type <n> actualParameterList actualParameters actualParameter
%type <n> writeParameterList writeParameters writeParameter opt_modifiers
%type <n> identifierList
%type <sym> identifier procedureHeading functionHeading

%type <i> relOp addOp mulOp sign
%type <i> '=' '<' '>' '+' '-' ':'
%type <i> '*' '/' '[' ']' '.' '^' ','
%type <i> typeIdentifierOrConformantArraySchemaOrPointer
%type <i> typeIdentifierOrConformantArraySchema
%type <i> typeIdentifier conformantArraySchema
%type <i> packedConformantArraySchema unPackedConformantArraySchema
%type <i> unPackedStructuredType
%type <i> variantSelector recordSection
%type <i> arrayType recordType setType fileType componentType baseType
%type <i> domainType ordinalType tagType resultType
%type <i> type typeIdentifier
%type <i> enumeratedType subRangeType intType realType
%type <i> procedureRest procedureBody functionBody

%nonassoc ASSIGN
%left '=' NOTEQUAL '<' LESSEQ '>' GREATEQ IN
%left '+' '-' OR XOR
%left '*' '/' DIV MOD AND SHL SHR
%left '[' ']'
%left '.'
%left '^'
%left NOT
%left UMINUS

%%
program: programHeading { sym_enterBlock(Syms); fnStack((Symbol *)0);}
block '.' { sym_leaveBlock(Syms); }
;

programHeading:
PROGRAM identifier ';'
| PROGRAM identifier '(' identifierList ')' ';' { cleanup($4); }
| /*nix*/
;

/*-------*/

block: compoundStatement
| declarations [LABEL CONST TYPE VAR FUNCTION PROCEDURE USES]
{ sym_globalscope(Syms); } compoundStatement ;
declarations:
declaration
| declarations declaration
;
declaration:
USES identifierList ';' { cleanup($2); }
| LABEL labelDefinitions ';'
| CONST constantDefinitions
| TYPE typeDefinitions
| VAR variableDefinitions
| procedureDeclaration
| functionDeclaration
;
/*-------*/
labelDefinitions:
labelDefinition
| labelDefinitions ',' labelDefinition
;
labelDefinition:
INT_CONST
{ if ($1 < 0 || $1 > 9999)
yyerror("Label out of range 0-9999"); }
| identifier
{ promote($1, LABEL_IDENT, 0); }
;
constantDefinitions:
constantDefinition
| constantDefinitions constantDefinition
;
constantDefinition:
identifier '=' constant ';'
{promote($1, CONST_IDENT, T_INT);sym_localscope(Syms);}
| identifier ':' type '=' constant ';'
{ promote($1, CONST_IDENT, (int)$3);
sym_localscope(Syms); }
;

typeDefinitions:
typeDefinition
| typeDefinitions typeDefinition
;

variableDefinitions:
variableDefinition
| variableDefinitions variableDefinition
;

/*-------*/

typeDefinition:
identifier '=' type ';'
{ promote($1, TYPE_IDENT, (int)$3);
sym_localscope(Syms); }
;
variableDefinition:
identifierList ':' type opt_absolute ';'
{ Node * np;
for (np = $1; np->left; np = np->left)
promote(np->right->spval, VAR_IDENT, (int)$3);
promote(np->spval, VAR_IDENT, (int)$3);
cleanup($1);
sym_localscope(Syms);
}
;
opt_absolute: /**/
| ABSOLUTE INT_CONST ':' INT_CONST
| ABSOLUTE VAR_IDENT
;
procedureDeclaration:
procedureHeading ';'
procedureBody ';'
{ if ($3 == BEGIN_BLOCK)
tagit($1);
sym_leaveBlock(Syms);
fnPop();
}
;
functionDeclaration:
functionHeading ';'
functionBody ';'
{ if ($3 == BEGIN_BLOCK)
tagit($1);
sym_leaveBlock(Syms);
fnPop();
}
;

procedureHeading:
PROCEDURE identifier
{ promote($2, PROC_IDENT, 0);
fnStack($2);
$2->tagLineNo = yylineno;
sym_enterBlock(Syms); }
optFormalParameterList
{ $$=$2; }
| PROCEDURE PROC_IDENT
{ fnStack($2);
$2->tagLineNo = yylineno;
$$=$2;
sym_enterBlock(Syms); }
;
procedureBody: INLINE '(' inlineElements ')' { $$=$1; }
| optInterrupt procedureRest { $$=$2; }
;
inlineElements: inlineElement
| inlineElements '/' inlineElement
;
inlineElement: constant
| '<' constant
| '>' constant
| VAR_IDENT
| VAR_IDENT sign constant %prec UMINUS
;
sign: '+' | '-' ;
optInterrupt: /**/
| INTERRUPT ';'
;
procedureRest:
block { $$=BEGIN_BLOCK; }
| FORWARD
| EXTERNAL
;
functionHeading:
FUNCTION identifier
{ promote($2, FUNC_IDENT, 0);
fnStack($2);
$2->tagLineNo = yylineno;
sym_enterBlock(Syms); }
optFormalParameterList ':' resultType
{ $$=$2; $2->pascalType = $6; }
| FUNCTION FUNC_IDENT
{ fnStack($2);
$2->tagLineNo = yylineno;
$$=$2;
sym_enterBlock(Syms); }
;
functionBody: INLINE '(' inlineElements ')'
| procedureRest
;
optFormalParameterList:
/*nix*/
| '(' { sym_localscope(Syms); }
formalParameter optFormalParameters ')'
;
optFormalParameters:
/*nix*/
| ';' { sym_localscope(Syms); }
formalParameter optFormalParameters
;
formalParameter:
valueParameter
| variableParameter
| procedureHeading
| functionHeading
;
/*-------*/
valueParameter: identifierList ':'
typeIdentifierOrConformantArraySchemaOrPointer
{ Node * np;
for (np = $1; np->left; np = np->left)
promote(np->right->spval, VAR_IDENT, (int)$3);
promote(np->spval, VAR_IDENT, (int)$3);
cleanup($1);
}
;
variableParameter:
VAR identifierList ':'
typeIdentifierOrConformantArraySchemaOrPointer
{ Node * np;
for (np = $2; np->left; np = np->left)
promote(np->right->spval, VAR_IDENT, (int)$4);
promote(np->spval, VAR_IDENT, (int)$4);
cleanup($2);
}
;
conformantArraySchema:
packedConformantArraySchema
| unPackedConformantArraySchema
;
packedConformantArraySchema:
PACKED ARRAY '[' indexTypeSpecification ']' OF typeIdentifier
{ $$ = $7 | T_ARRAY; }
;
unPackedConformantArraySchema:
ARRAY '[' indexTypeSpecification optIndices ']'
OF typeIdentifierOrConformantArraySchema
{ $$ = $7 | T_ARRAY; }
;
typeIdentifierOrConformantArraySchema:
typeIdentifier
| conformantArraySchema
;
typeIdentifierOrConformantArraySchemaOrPointer:
typeIdentifier
| conformantArraySchema
| '^' domainType { $$ = $2 | T_POINTER; }
;
optIndices: /*nix*/
| ';' indexTypeSpecification optIndices
;
indexTypeSpecification:
identifier DOTDOT identifier ':' type
{ if (! ordinalTypeIdentifier((Symbol *) $5))
yyerror("expect Ordinal type identifier");
}
;

/*-------*/

compoundStatement:
BEGIN_BLOCK statementSequence END
;
statementSequence:
statement
| statementSequence ';' statement [^END]
| statementSequence ';' statement ';'
;
statement: /**/ [';' END]
| INT_CONST ':'
| LABEL_IDENT ':'
| INT_CONST ':' rest
| LABEL_IDENT ':' rest
| rest
;
rest: assignmentStatement
| procedureStatement
| gotoStatement
| compoundStatement
| conditionalStatement
| repetitiveStatement
| withStatement
;
conditionalStatement:
ifStatement
| caseStatement
;
repetitiveStatement:
whileStatement
| repeatStatement
| forStatement
;

/*-------*/

assignmentStatement:
variable ASSIGN expression { cleanup($1); cleanup($3); }
| FUNC_IDENT ASSIGN expression { cleanup($3); }
;
procedureStatement:
PROC_IDENT actualParameterList { cleanup($2); }
| writeIdentifier writeParameterList { cleanup($2); }
;
writeIdentifier:
WRITE
| WRITELN
| READ
| READLN
;
gotoStatement: GOTO INT_CONST
| GOTO LABEL_IDENT
;
ifStatement: IF expression
{ Assert(isBooleanExpression($2));
cleanup($2);
}
THEN statement elseStatement
;
elseStatement: /*nix*/
| ELSE statement
;
caseStatement: CASE caseIndex OF caseList END
;
caseList: case
| case ';' [END]
| caseList case
| caseList case ';' [END]
;
whileStatement: WHILE expression
{ Assert(isBooleanExpression($2)); cleanup($2); }
DO statement
;
repeatStatement:
REPEAT statementSequence UNTIL expression
{ Assert(isBooleanExpression($4)); cleanup($4); }
;
forStatement: FOR controlVariable ASSIGN initialValue to finalValue
DO statement
;
to: TO
| DOWNTO
;
withStatement: WITH recordVariableList DO
{ inWithBlock++; }
statement
{ inWithBlock--; }
;
recordVariableList:
recordVariable
| recordVariableList ',' recordVariable
;
caseIndex: expression
{ Assert(isOrdinalExpression($1)); cleanup($1); }
;
case: constantList ':' statement
;
constantList: constant
| constantList ',' constant
;
controlVariable:
VAR_IDENT {addxref($1);}
;
initialValue: expression
{ Assert(isOrdinalExpression($1)); cleanup($1); }
;
finalValue: expression
{ Assert(isOrdinalExpression($1)); cleanup($1); }
;

/*-------*/

type: enumeratedType
| subRangeType
| intType
| realType
| STRING opt_size { $$ = T_STRING; }
| TYPE_IDENT { $$ = $1->pascalType; }
| optPacked unPackedStructuredType { $$ = $2; }
| '^' domainType { $$ = $2 | T_POINTER; }
;
opt_size: /**/
| '[' INT_CONST ']'
| '[' CONST_IDENT ']'
| '[' ordinalType ']'
;
intType: INTEGER { $$ = T_INT; }
| SHORTINT{ $$ = T_INT; }
| WORD { $$ = T_INT; }
| LONGINT { $$ = T_INT; }
| CHAR { $$ = T_CHAR; }
| BOOLEAN { $$ = T_BOOLEAN; }
| BYTE { $$ = T_INT; }
;
realType: REAL { $$ = T_REAL; }
| SINGLE { $$ = T_REAL; }
| DOUBLE { $$ = T_REAL; }
| EXTENDED{ $$ = T_REAL; }
| COMP { $$ = T_REAL; }
;
optPacked: /*nix*/
| PACKED
;
ordinalType: enumeratedType
| subRangeType
| intType
| TYPE_IDENT
{ if (! ordinalTypeIdentifier($1))
yyerror("expect Ordinal type identifier");
$$ = $1->pascalType;
}
;
unPackedStructuredType:
arrayType
| recordType
| setType
| fileType
;
domainType: typeIdentifier
;
enumeratedType: '(' identifierList ')' { $$ = T_ENUM; cleanup($2); }
;
subRangeType: constant DOTDOT constant
{ $$ = T_SUBRANGE; }
;
arrayType: ARRAY '[' indexTypeList ']' OF componentType
{ $$ = $6 | T_ARRAY; }
;
indexTypeList: indexType
| indexTypeList ',' indexType
;
recordType: RECORD fieldList END
{ $$ = T_RECORD; }
;
setType: SET OF baseType { $$ = $3; }
;
fileType: FILE_DECL OF componentType { $$ = $3 | T_FILE; }
;
indexType: ordinalType
;
componentType: type
;
baseType: ordinalType
;
resultType: TYPE_IDENT
{ if (! ordinalTypeIdentifier($1) && ! realTypeIdentifier($1)
&& ! pointerTypeIdentifier($1) && ! stringTypeIdentifier($1))
yyerror("expect Real, Ordinal, Pointer or String type");
$$ = $1->pascalType;
}
| intType
| realType
| STRING { $$ = T_STRING; }
| '^' domainType { $$ = $2 | T_POINTER; }
;
fieldList: /*nix*/
| ';' [END]
| fixedPart ';' variantPart
| variantPart
;
fixedPart: recordSection
| fixedPart ';' recordSection
;
recordSection: identifierList ':' type { cleanup($1); $$ = $3; }
;
variantPart: CASE variantSelector OF variantList ';' [END]
| CASE variantSelector OF variantList
;
variantSelector:
tagField ':' tagType { $$ = $3; }
| tagType
;
variantList: variant
| variantList ';' variant
;
variant: constantList ':' '(' fieldList ')'
;
tagType: type
{ if (! ordinalTypeIdentifier((Symbol *) $1))
yyerror("expect Ordinal type identifier");
}
;
tagField: identifier
;

/*-------*/
constant: '-' number
| number
| '-' CONST_IDENT
| CONST_IDENT
| STRING_CONST
;
/*-------*/
expression: INT_CONST { $$ = leaf(INT_CONST); $$->ival = $1; }
| FLOAT_CONST { $$ = leaf(FLOAT_CONST); $$->dval = $1; }
| STRING_CONST { $$ = leaf(STRING_CONST);
$$->sval = strdup($1); }
| CONST_IDENT { $$ = leaf(SYMBOL); $$->spval = $1; }
| NIL { $$ = leaf(NIL); }
| BOUND_IDENT { $$ = leaf(SYMBOL); $$->spval = $1; }
| variable
| setConstructor
| FUNC_IDENT { Node *np;
np = leaf(SYMBOL);
np->spval = $1;
$$ = node(FUNC_CALL, np, NNULL);
}
| FUNC_IDENT '(' actualParameters ')'
{ Node *np;
np = leaf(SYMBOL);
np->spval = $1;
$$ = node(FUNC_CALL, np, $3);
}
| NOT expression { $$ = node((int)$1,$2,NNULL); }
| '(' expression ')' { $$ = $2; }
| expression relOp expression %prec '<'
{ $$ = node((int)$2,$1,$3); }
| expression addOp expression %prec '+'
{ $$ = node((int)$2,$1,$3); }
| expression mulOp expression %prec '*'
{ $$ = node((int)$2,$1,$3); }
| sign expression %prec UMINUS
{ if ($1 == '-')
$$ = node((int)$1,$2,NNULL);
else
$$ = $2;
}
;
relOp: '=' | NOTEQUAL | '<' | LESSEQ | '>' | GREATEQ | IN ;
addOp: '+' | '-' | OR | XOR ;
mulOp: '*' | '/' | DIV | MOD | AND | SHL | SHR ;
number: INT_CONST | FLOAT_CONST ;

/*-------*/

variable: VAR_IDENT { $$=leaf(SYMBOL); $$->spval=$1; addxref($1);}
| UNKNOWN_IDENT
{ yyerror("undeclared variable '%s'",$1->name); }
| FIELD_IDENT
{ if (! inWithBlock
|| ! withRecordMember($1))
yyerror("missing with(%s)",$1->name);
$$ = leaf(SYMBOL);
$$->spval = $1;
}
| variable '.' FIELD_IDENT
{ Node *np;
if (! isRecord($1) || ! recordMember($1,$3))
yyerror("'.' applied incorrectly");
np = leaf(SYMBOL);
np->spval = $3;
$$ = node((int)$2, $1, np);
}
| variable '[' indexList ']'
{ if (! isArrayVariable($1))
yyerror("[] applied to non-array");
$$ = node((int)$2, $1, $3);
}
| variable '^'
{ if (! isPointerVariable($1)
|| ! isFileVariable($1))
yyerror("^ applied to non-file or pointer");
$$ = node((int)$2, $1, NNULL);
}
;
indexList: index
| indexList ',' index
{ $$ = node((int)$2, $1, $3); }
;
index: expression
{ Assert(isOrdinalExpression($1)); }
;
setConstructor: '[' elementDescriptionList ']'
{ $$ = node(']', $2, NNULL); }
;
elementDescriptionList:
/*nix*/
{ $$ = NNULL; }
| elementDescription
| elementDescriptionList ',' elementDescription
{ $$ = node(',', $1, $3); }
;
elementDescription:
expression
{ Assert(isOrdinalExpression($1));
}
| expression DOTDOT expression
{ Assert(isOrdinalExpression($1));
Assert(isOrdinalExpression($3));
$$ = node((int)$2, $1, $3);
}
;
actualParameterList:
/*nix*/
{ $$ = NNULL; }
| '(' actualParameters ')' { $$ = $2; }
;
actualParameters:
actualParameter
| actualParameters ',' actualParameter
{ $$ = node((int)$2, $1, $3); }
;
actualParameter:
expression
| PROC_IDENT { $$ = leaf(SYMBOL); $$->spval = $1; }
;
writeParameterList:
/*nix*/
{ $$ = NNULL; }
| '(' writeParameters ')'
{ $$ = $2; }
;
writeParameters:
writeParameter
| writeParameters ',' writeParameter
{ $$ = node((int)$2, $1, $3); }
;
writeParameter: expression opt_modifiers
{ $$ = node(':', $1, $2); }
;
opt_modifiers: /**/ { $$ = NNULL; }
| opt_modifiers ':' expression
{ $$ = node((int)$2, $1, $3); }
;
recordVariable: variable { cleanup($1); };

/*-------*/

typeIdentifier: TYPE_IDENT { $$ = $1->pascalType; }
| intType
| realType
| STRING { $$ = T_STRING; }
;
identifierList: identifier { $$ = leaf(SYMBOL); $$->spval = $1; addxref($1); }
| identifierList ',' identifier
{ Node *np;
np = leaf(SYMBOL);
np->spval = $3;
$$ = node((int)$2, $1, np);
addxref($3);
}
;

identifier: idNull UNKNOWN_IDENT { sym_globalscope(Syms); $$ = $2; }
| idNull LABEL_IDENT { yyerror("redefinition of %s\n", $2->name); }
| idNull VAR_IDENT { yyerror("redefinition of %s\n", $2->name); }
| idNull FIELD_IDENT { yyerror("redefinition of %s\n", $2->name); }
| idNull BOUND_IDENT { yyerror("redefinition of %s\n", $2->name); }
;
idNull: /*nix*/ { sym_localscope(Syms); } ;
 

Mar 3 20:18 1992 HELLO.PAS Page 1
 

{ Sample Pascal program for pc.exe. }
program main;

var
i, j : integer;
arf : real;

procedure baz (var a:integer); forward;

procedure foo (b:integer);

var
i, j, k : integer;
foo : integer;

begin
for k := 1 to 5 do
begin
b := 0;
i := k * 5;
j := k + i;
end;
end;

procedure baz;
begin
end;

function bar (a:integer): real;

var
foo : integer;
bar : real;

begin
foo := 1;
i := j + 1;
bar := 1.0e10;
end;
 

begin
i := 1; { do lots of useless stuff }
j := i;
foo(j);
writeln('Hello, World');
arf := bar(j);
baz(i);
end.
 

Mar 3 20:18 1992 MAKEFILE Page 1
 

# Sample makefile

# you have to change these for your system & compiler
ROOTDIR=c:
LEXLIB=$(ROOTDIR)/lib/lex.lib
CC=cc

OBJ=pc.obj typechek.obj xref.obj tag.obj node.obj symbol.obj gram.obj scan.obj

pc.exe: $(OBJ)
$(CC) -o pc.exe $(OBJ) $(LEXLIB)
scan.c: scan.l
lex -o scan.c scan.l
gram.c: gram.y
yacc -o gram.c -D tok.h gram.y
 

Mar 3 20:18 1992 MSC.BAT Page 1
 

echo Building PASCAL parser, cross-referencer, and tag file generator
lex -o scan.c scan.l
yacc -D tok.h -o gram.c gram.y
cl -DYYDEBUG=0 pc.c typechek.c xref.c tag.c node.c symbol.c gram.c scan.c %ROOTDIR%\lib\sliblex.lib /link /noe
 

Mar 3 20:18 1992 NODE.C Page 1
 

/*
* Program memory (tree) memory management
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "symbol.h"
#include "node.h"
#include "tok.h"
#include "pc.h"

/*
* Node manipulations
*/
Node *
leaf(type)
int type;
{
Node * np;

if ((np = (Node *) malloc(sizeof(*np))) == NNULL)
yyerror("Out of node space");
np->type = type;
np->left = np->right = NNULL;
return np;
}
Node *
newString(s)
char * s;
{
Node * np = leaf(STRING_CONST);
np->sval = strdup(s);
return np;
}
Node *
node(type, left, right)
int type;
Node * left, * right;
{
Node * np = leaf(type);
np->left = left;
np->right = right;
return np;
}
void
cleanup(np)
Node * np;
{
if (!np)
return;
cleanup(np->left);
cleanup(np->right);
free(np);
}
 

Mar 3 20:18 1992 NODE.H Page 1
 

extern SymTab * Syms;

typedef struct node_tag {
int type;
union {
long ivalue;
double dvalue;
char * svalue;
Symbol * sp;
} value;
struct node_tag * left, * right;
} Node;

#define ival value.ivalue
#define dval value.dvalue
#define sval value.svalue
#define spval value.sp
#define NNULL ((Node *) 0)

#define T_BOOLEAN 0x0001
#define T_SUBRANGE 0x0002
#define T_ENUM 0x0004
#define T_CHAR 0x0008
#define T_INT 0x0010
#define T_ORDINAL \
(T_BOOLEAN|T_SUBRANGE|T_ENUM|T_CHAR|T_INT)
#define T_REAL 0x0020
#define T_POINTER 0x0040
#define T_STRING 0x0080
#define T_ARRAY 0x0100
#define T_RECORD 0x0200
#define T_FILE 0x0400

extern Node * leaf (int);
extern Node * newString (char *);
extern Node * node (int, Node *, Node *);
extern void cleanup (Node *);
 

Mar 3 20:18 1992 PC.C Page 1
 

/*
* Mainline for PC
* written by Chris Retterath, 1991.
*/

#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdarg.h>
#include "symbol.h"
#include "node.h"
#include "tok.h"
#include "xref.h"
#include "tag.h"

extern FILE * yyin;
extern int yylineno;
#if YYDEBUG
extern int yydebug;
#endif
extern int yyparse(void);
SymTab * Syms, * Words;

int Tag;
char *Filename;

int Xref;

int Foldcase;

int
prSym(sp, arg)
Symbol * sp;
void * arg;
{
printf("%s: '%s' ty=%d h=%d bl=%d uv=%d\n",
(char *) arg, sp->name, sp->type, sp->hashvalue,
sp->blocklevel, sp->pascalType);
return 1; /* keep going */
}

void
prInit(sp)
Symbol * sp;
{
sp->pascalType = 0;
sp->xref = (SymXref *)0;
}

void
prNew(sp)
Symbol * sp;
{
prInit(sp);
prSym(sp,"new");
}

void
prDelete(sp)
Symbol * sp;
{
dumpxref(sp);
prSym(sp,"delete");
}

void
yyerror(char * fmt,...)
{
va_list args;
va_start(args, fmt);
if (yylineno)
(void) fprintf(stderr,"Syntax error (line %d): ",yylineno);
(void) vfprintf(stderr, fmt, args);
va_end(args);
(void) fputc('\n',stderr);
#if YYDEBUG
if (yydebug)
sym_all(Syms, prSym, "dump");
#endif
exit(1);
}

char *
strdup(s)
const char * s;
{
char * cp;

if ((cp = malloc(strlen(s) + 1)) == NULL)
yyerror("Out of string space");
return strcpy(cp, s);
}

char *reserved_words[] = {
"absolute", "and", "array",
"begin", "boolean", "byte",
"case", "char", "comp", "const",
"div", "do", "double", "downto",
"else", "end", "extended", "external",
"file", "for", "forward", "function",
"goto",
"if", "in", "inline", "integer", "interrupt",
"label", "longint",
"mod",
"nil", "not",
"of", "or",
"packed", "procedure", "program",
"read", "readln", "real", "record", "repeat",
"set", "shl", "shortint", "shr", "single", "string",
"then", "to", "type",
"until", "uses",
"var",
"while", "with", "word", "write", "writeln",
"xor",
""
};

int reserved_val[] = {
ABSOLUTE, AND, ARRAY,
BEGIN_BLOCK, BOOLEAN, BYTE,
CASE, CHAR, COMP, CONST,
DIV, DO, DOUBLE, DOWNTO,
ELSE, END, EXTENDED, EXTERNAL,
FILE_DECL, FOR, FORWARD, FUNCTION,
GOTO,
IF, IN, INLINE, INTEGER, INTERRUPT,
LABEL, LONGINT,
MOD,
NIL, NOT,
OF, OR,
PACKED, PROCEDURE, PROGRAM,
READ, READLN, REAL, RECORD, REPEAT,
SET, SHL, SHORTINT, SHR, SINGLE, STRING,
THEN, TO, TYPE,
UNTIL, USES,
VAR,
WHILE, WITH, WORD, WRITE, WRITELN,
XOR,
0
};

static
void
init_words()
{
int i;

Words = sym_init(NIL_SYM_FUNC, NIL_SYM_FUNC);
sym_noalloc(Words);
for (i=0; reserved_val[i]; ++i)
sym_create(Words, reserved_words[i], reserved_val[i]);
}

int
main(argc,argv)
int argc;
char * argv[];
{
int i;

Foldcase = Tag = Xref = 0;
#if YYDEBUG
yydebug = 0;
#endif
for (;argc > 1; --argc, ++argv) {
if (argv[1][0] != '-' || argv[1][1] == '-')
break;
for (i=1;argv[1][i];++i) {
switch (argv[1][i]) {
#if YYDEBUG
case 'v': yydebug = 1; break;
#endif
case 'i': Foldcase = 1; break;
case 'x': Xref = 1; break;
case 't': Tag = 1; break;
default : printf("Unknown option \"-%c\"\n",argv[1][i]);
#if YYDEBUG
printf("Usage: pc [-ixtv] [file]\n");
#else
printf("Usage: pc [-ixt] [file]\n");
#endif
exit(1);
}
}
}
#if YYDEBUG
if (yydebug)
Syms = sym_init(prNew, prDelete);
else
#endif
Syms = sym_init(prInit, dumpxref);
init_words();

if (argc < 2) {
yyin = stdin;
if (Tag) {
fprintf(stderr, "WARNING: reading from stdin -- %s",
"no tagfile will be generated\n");
Tag = 0;
}
} else if ((yyin = fopen(argv[1],"r")) == NULL)
yyerror("cannot open input '%s'",argv[1]);
Filename = argv[1];
i = yyparse();
dumpTags();
return i;
}
 

Mar 3 20:18 1992 PC.H Page 1
 

extern char * strdup(const char *);
extern int Tag;
extern char *Filename;
extern int Foldcase;
extern int Xref;
extern SymTab * Words;
 

Mar 3 20:18 1992 README.1ST Page 1
 

Hints and Caveats regarding the Pascal Grammar

References:

Jensen, Kathleen, and Wirth, Niklaus. _Pascal:_User_Manual_and_
_report_, third edition. Springer Verlag, 1985

This grammar does not parse Turbo Pascal .tpu units or turbo.tpl.
This is a pain since things like System.tpu contain so much that is
needed to compile anything. For example, the type 'text' is defined
in System.tpu. You can always define it yourself:

type text = file of char;

Also, you can put in forward declarations for all the functions
and procedures that you need.

If you have a "USES" statement in your code, remove that statement
and pull in the corresponding ".int" file (from the "doc" directory in
the Turbo Pascal distribution). You should remove turbo-unit related
keywords such as "interface", and append "forward;" after procedure and
function definitions.

Of course, there is no ".int" file for the standard Turbo Pascal
"library", but that can be fixed by creating your own "stdlib.int" file,
and including it at the top of the source.

The compiled program will check the syntax of a given pascal source code
file, as well as produce cross-referencing information (with the -x option),
and produce a tags file (with the -t option). If you want the parser to
be case-insensitive, use the -i option.

The symbol table code used in this example (and in the dBase, SQL,
HyperTalk and PIC examples) is documented in the file symbol.doc in
this directory.

YACC has some problems with the grammar as given. Two shift-reduce
errors remain:

1.
State 249 token "'-'"
shift expression: expression.addOp expression
shift addOp: .'-'
reduce repeatStatement: REPEAT statementSequence UNTIL expression.

For example, looking at
repeat ... until a - b
YACC has a conflict at the '-' because a - b is a valid expression (SHIFT),
but "repeat ... until a" is also valid (REDUCE). The resolution builtin
to YACC is to always shift first, so the right thing happens

2.
State 263 token "ELSE"
shift ifStatement: IF expression THEN statement.ELSE statement
reduce ifStatement: IF expression THEN statement.

This is the classic IF..THEN..ELSE shift/reduce problem, as discussed
in the MKS LEX & YACC manual. SHIFT is preferred and the right thing happens
automatically.
 

Mar 3 20:18 1992 SCAN.L Page 1
 

%{
/*
* Lexical scanner for Pascal parser example
* Note that comments started with { must terminate with } (not *))
* and that comments started with (* must terminate with *) (not })
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "symbol.h"
#include "node.h"
#include "tok.h"
#include "pc.h"

#undef yygetc

static char Buffer[200];
static int gatherString(void);
%}

digit [0-9]
letter [a-zA-Z]
integer {digit}+
float ({integer}\.{integer})|({integer}(\.{integer})?[eE][+-]?{integer})
name {letter}({letter}|_|{digit})*

%%
[#'] { return gatherString(); }
"<=" { return (yylval.i = LESSEQ); }
">=" { return (yylval.i = GREATEQ); }
"<>" { return (yylval.i = NOTEQUAL); }
".." { return DOTDOT; }
":=" { return ASSIGN; }
\${integer} { yylval.i = strtol((char *)yytext+1,NULL,16);
return INT_CONST; }
{integer} { yylval.i = atol((char *)yytext);
return INT_CONST; }
{float} { yylval.d = atof((char *)yytext);
return FLOAT_CONST; }
"(*" { yycomment("*)"); }
"{" { yycomment("}"); }
[ \t\r\n] ; /* skip whitespace */
{name} { yylval.sym = sym_lookup(Words, (char *)yytext);
if (yylval.sym)
return (yylval.i = yylval.sym->type);
yylval.sym=sym_intern(Syms,(char *)yytext,UNKNOWN_IDENT);
return yylval.sym->type; }
. { yylval.i = *yytext; /* token has own value */
return *yytext; }
%%
/*
* Read Turbo Pascal string literal
* Accept string consisting of any number of #{digit}..
* interspersed with strings delineated by '..'
* Strings cannot be broken over newlines.
* Valid strings:
* '' 'a string' 'quote '' within string'
* #65 #103'again, with beep'#7
* Invalid syntax:
* 'two''strings' #63 'cannot have spaces between'
*/
static int
gatherString()
{
int ch = *yytext, val;
char * s = Buffer, * startp;

loop:
if (ch == '#') {
startp = s;
while ((ch = input()) != EOF && isdigit(ch))
*s++ = ch;
if (ch == EOF)
yyerror("Eof in string");
*s = '\0'; s = startp;
val = atoi(s);
*s++ = (char) val;
*s = '\0';
if (ch == '#' || ch == '\'')
goto loop;
if (ch != EOF)
unput(ch);
} else {
while ((ch = input()) != EOF) {
if (ch == '\'') {
if ((ch = input()) != '\'') {
unput(ch);
break;
} else
*s++ = ch;
} else if (ch == '\r' || ch == '\n')
yyerror("Newline in string");
else
*s++ = ch;
}
*s = '\0';
if (ch == EOF)
yyerror("Eof in string '%s'",Buffer);
if ((ch = input()) == '#')
goto loop;
if (ch != EOF)
unput(ch);
}
yylval.s = Buffer;
return STRING_CONST;
}

int
yygetc()
{
if (Foldcase) {
int c;
c = getc(yyin);
if (c == EOF)
return c;
else
return tolower(c);
} else
return(getc(yyin));
}
 

Mar 3 20:18 1992 SYMBOL.C Page 1
 

/*
* Symbol table management.
*
*/
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "symbol.h"

extern void yyerror();

SymTab *
sym_init(new, delete)
SymFunc new, delete;
{
SymTab * tabp;
register int i;
Symbol * ht;
if ((tabp = (SymTab *) malloc(sizeof(*tabp))) == NULL)
(void) yyerror("out of symbol table space\n");
tabp->new = new;
tabp->delete = delete;
tabp->blocklevel = 0;
tabp->syms = (Symbol *)0;
for (i=0; i < HASH_SIZE; ++i)
tabp->hash[i] = (Symbol *)0;
tabp->local=0;
tabp->str_alloc=1;
return tabp;
}

/*
* Delete a symbol table
*/
void
sym_delete(tabp)
SymTab * tabp;
{
Symbol * sp, * next;

for (sp = tabp->syms; sp; sp = next) {
next = sp->next;
if (tabp->delete)
(*tabp->delete)(sp);
free(sp->name);
free(sp);
}
free(tabp);
}

/*
* compute the hash value of an identifier
*/
static
int
sym_hash(name)
char * name;
{
int hashvalue;

for (hashvalue=0; *name != '\0'; hashvalue += *name++);
return (hashvalue & (HASH_SIZE-1));
}

/*
* setup sym_lookup() to look at all scopes for a identifier
*/
void
sym_globalscope(tabp)
SymTab * tabp;
{
tabp->local = 0;
}

/*
* setup sym_lookup() to only look at the lowest scope for a identifier
*/
void
sym_localscope(tabp)
SymTab * tabp;
{
tabp->local = 1;
}

/*
* Return pointer to existing named symbol, if found;
* otherwise return NULL
*/
static
Symbol *
_sym_lookup(tabp, s, hashvalue)
SymTab * tabp;
char * s;
int hashvalue;
{
Symbol * sp;
int bl;

if (tabp->local) {
bl = tabp->blocklevel;
for (sp = tabp->hash[hashvalue]; sp && sp->blocklevel>=bl;
sp = sp->hnext)
if (strcmp(sp->name,s) == 0)
return sp;
} else {
for (sp = tabp->hash[hashvalue]; sp; sp = sp->hnext)
if (strcmp(sp->name,s) == 0)
return sp;
}
return NULL;
}

Symbol *
sym_lookup(tabp, s)
SymTab * tabp;
char * s;
{
return _sym_lookup(tabp, s, sym_hash(s));
}

/*
* setup sym_create() to copy the given identifier for use in the symbol
*/
void
sym_alloc(tabp)
SymTab * tabp;
{
tabp->str_alloc = 1;
}

/*
* setup sym_create() to not copy (i.e. just point to the original string)
*/
void
sym_noalloc(tabp)
SymTab * tabp;
{
tabp->str_alloc = 0;
}

/*
* create a symbol table entry
*/
static
Symbol *
_sym_create(tabp, s, type, hashvalue)
SymTab * tabp;
char * s;
int type;
int hashvalue;
{
Symbol *sp;

if (tabp->str_alloc) {
if ((sp = (Symbol *) malloc(sizeof(*sp)+strlen(s)+1)) == NULL)
(void) yyerror("out of symbol table space\n");
sp->name = (char *) sp + sizeof(*sp);
(void) strcpy(sp->name,s);
} else {
if ((sp = (Symbol *) malloc(sizeof(*sp))) == NULL)
(void) yyerror("out of symbol table space\n");
sp->name = s;
}
sp->type = type;
sp->blocklevel = tabp->blocklevel;
sp->hashvalue = hashvalue;
/* confuse everybody by setting lots of pointers */
sp->next = tabp->syms;
sp->hnext = tabp->hash[hashvalue];
sp->prev = (Symbol *)0;
sp->hprev = (Symbol *)0;
if (tabp->syms)
tabp->syms->prev = sp;
if (tabp->hash[hashvalue])
tabp->hash[hashvalue]->hprev = sp;
tabp->syms = sp;
tabp->hash[hashvalue] = sp;
if (tabp->new)
(*tabp->new)(sp);
return sp;
}

Symbol *
sym_create(tabp, s, type)
SymTab * tabp;
char * s;
int type;
{
return _sym_create(tabp, s, type, sym_hash(s));
}

/*
* Like lookup(), but add symbol with given type if not found
*/
Symbol *
sym_intern(tabp, s, type)
SymTab * tabp;
char * s;
int type;
{
Symbol * sp;
int hashvalue;

hashvalue = sym_hash(s);
sp = _sym_lookup(tabp, s, hashvalue);
if (sp == NULL)
return _sym_create(tabp, s, type, hashvalue);
return sp;
}

/*
* Find an identifier with the same name, but at a lower scope.
* Remove the given identifier from the symbol table.
*/
Symbol *
sym_coalesce(tabp, sp)
SymTab * tabp;
Symbol * sp;
{
Symbol *orig;
char *s;

orig = sp;
s = sp->name;
/* find a symbol with the same name, or NULL if none found */
for (sp=sp->hnext; sp; sp=sp->hnext)
if (!strcmp (sp->name, s))
break;
/* fix up the pointers and remove the original symbol */
if (orig->prev)
orig->prev->next = orig->next;
else
tabp->syms = orig->next;
if (orig->hprev)
orig->hprev->hnext = orig->hnext;
else
tabp->hash[orig->hashvalue] = orig->hnext;
if (orig->next)
orig->next->prev = orig->prev;
if (orig->hnext)
orig->hnext->hprev = orig->hprev;
free (orig);
return sp;
}

/*
* Enter new scope declaration level
* such as Pascal function/procedure, or C block
*/
void
sym_enterBlock(tabp)
SymTab * tabp;
{
tabp->blocklevel++;
}
/*
* Leaving nested block.
* Discard all declarations made at that lower level
*/
void
sym_leaveBlock(tabp)
SymTab * tabp;
{
Symbol * sp, * next;
int elim;

elim = --tabp->blocklevel;

for (sp = tabp->syms; sp && sp->blocklevel > elim; sp = next){
next = sp->next;
if (tabp->delete)
(*tabp->delete)(sp);
if (sp->hprev)
sp->hprev->hnext = sp->hnext;
else
tabp->hash[sp->hashvalue] = sp->hnext;
if (sp->hnext)
sp->hnext->hprev = sp->hprev;
free(sp);
}
tabp->syms = sp;
if (sp)
sp->prev = (Symbol *)0;
return;
}
/*
* Invoke user function as (*fn)(Symbol *, arg)
* for every symbol in the symbol table
* Stop on zero return from (*fn)
*/
void
sym_all(tabp, fn, arg)
SymTab * tabp;
SymFunc1 fn;
void * arg;
{
Symbol * sp;

for (sp = tabp->syms; sp; sp = sp->next)
if (! (*fn)(sp, arg))
break;
return;
}
 

Mar 3 20:18 1992 SYMBOL.DOC Page 1
 

Symbol Table Management
=======================
 

Introduction:

The file symbol.c contains code for creating and managing symbol tables.
The functions are designed to be as general as possible (to support
a wide variety of applications), but at the same time fast. Multiple
scopes are supported, as well as multiple independent symbol tables.
The symbol table data structure is based on a combined linked list and
hash table representation.

The file symbol.h contains definitions for the various structures
used in the symbol.c.

The same symbol table code (in symbol.c) is used by the dBase, SQL,
HyperTalk, and PIC examples. The definitions (in symbol.h) are slightly
different for the PASCAL example, because the variable SymVal (in the
symbol tag structure) has a different definition.

Scope:

Scope is represented as a non-negative integer within each symbol tag,
with 0 being the lowest (i.e. global) scope. New symbols inherit the
current scope level, as stored in the symbol table header. When a new
scope is entered, the scope level in the symbol table header is incremented.
When a scope is abandoned, the scope level in the symbol table header is
decremented and all symbols with a scope level greater than the new scope
level are erased.

Structure:

Both the linked-list and hash-table representations have a guaranteed
ordering. In the linked-list, new symbols are pushed on to the head of
the list. This guarantees that the scope level of the symbols are sorted
in descending order. In the hash table, each bucket just contains a pointer
to a linked-list of symbols. When a new symbol is created, it is pushed
onto the front of this list. The next two diagrams represent a symbol
table containing five symbols, i, j, h, i, and k, respectively. The scope
level is represented in brackets. Assume a new scope was entered after
the symbol j (the second symbol) was created, and before h (the third
symbol) was created.

linked-list:

+======+ +======+ +======+ +======+ +======+
---->| k(1) |---->| i(1) |---->| h(1) |---->| j(0) |---->| i(0) |
+======+ +======+ +======+ +======+ +======+

hash table:

=========================================================
... | 104 | 105 | 106 | 107 | ...
=========================================================
| | | |
| | | |
V V V V
+======+ +======+ +======+ +======+
| h(1) | | i(1) | | j(0) | | k(1) |
+======+ +======+ +======+ +======+
|
|
V
+======+
| i(0) |
+======+

combined:

=========================================================
... | 104 | 105 | 106 | 107 | ...
=========================================================
| | | |
| | | |
V V V V
+======+ +======+ +======+ +======+
+-| h(1) |<-----| i(1) |<-+ +-| j(0) |<-+ +-| k(1) |<---
| +======+ +======+ | | +======+ | | +======+
| | | | | |
| | +-+-----------+-+
| | | |
+------------------+--------+-----------+
| |
V |
+======+ |
| i(0) |<---+
+======+

In all three diagrams, only the "next" pointers are shown. There
are also "prev" pointers, but drawing them in would only make the
diagram more confusing.

The reason that both data structures are used is to make the algorithm
faster. Each representation has its stengths and weaknesses. For instance,
if you wanted to lookup a symbol table entry for a given name (lets say "j"),
using the hash table you could find it immediately, whereas with the linked-
list, you would need to traverse most of the table to find the symbol.
On the other hand, if you wanted to traverse a scope, with the linked-list
you only need to traverse the list until you find a symbol with a lower
scope. With a hash-list, each bucket in the list will have to be inspected.
 

Functions:

SymTab * sym_init(SymFunc new, SymFunc delete);

- initialize a new symbol table
- returns a pointer to the symbol table header

SymFunc new => NULL, or a pointer to a function returning void
- if not NULL, (*new)(Symbol *) is called for every newly
created Symbol, with a pointer to the new Symbol
- can be used for diagnostics or to fill in the user slot
(see below)

SymFunc delete => NULL, or a pointer to a function returning void
- if not NULL, (*delete)(Symbol *) is called for every Symbol
just before it is deleted
- Symbols are deleted when a local block is being left or by
sym_delete() when a symbol table is being deleted
 

void sym_delete(SymTab * tabp);

- deletes the given table, and frees all space used by it

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

void sym_globalscope(SymTab * tabp);

- set sym_lookup and sym_intern to work on a global scope
(i.e. look for a symbol corresponding to a given name within
all scopes)
- this is the default

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

void sym_localscope(SymTab * tabp);

- set sym_lookup and sym_intern to work on a local scope
(i.e look for a symbol corresponding to a given name only within
the current scope).

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

Symbol * sym_lookup(SymTab * tabp, char * s);

- find a symbol in the symbol table
- scope of lookup is determined by use of sym_localscope()
and sym_globalscope()
- NULL is returned if a symbol is not found

SymTab * tabp => symbol table pointer that was obtained from
sym_init()

char * s => the name to lookup in the symbol table
 

void sym_alloc(SymTab * tabp);

- set sym_intern and sym_create to allocate memory for the name
of a newly created symbol
- this is the default behavior

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

void sym_noalloc(SymTab * tabp);

- set sym_intern and sym_create to not allocate memory for the name
of a newly created symbol. Use this only if you know that the
string that you passed will not be overwritten (don't do this
if the strings you are passing is yytext, since it does get
overwritten)

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

Symbol * sym_create(SymTab * tabp, char * s, int type);

- create a new symbol with the given name and type
- the scope level of the new symbol will be the same as the current
scope level
- returns a pointer to the new symbol table entry

SymTab * tabp => symbol table pointer that was obtained from
sym_init()

char * s => the name to attach to the new symbol

int type => the type to attach to the new symbol
 

Symbol * sym_intern(SymTab * tabp, char * s, int type);

- lookup the symbol in the symbol table. If the symbol is not
found, create a new symbol with the given name and type
- the lookup follows the same scoping rules as sym_lookup()
- the create (if necessary) follows the same rules as sym_create()
- returns a pointer to either a symbol table entry with the same
name (if one is found), or a new symbol table entry

SymTab * tabp => symbol table pointer that was obtained from
sym_init()

char * s => the name look for, and (if a symbol is not found)
to attach to the new symbol

int type => the type to attach to the new symbol (if one is
created)
 

Symbol * sym_coalesce(SymTab * tabp, Symbol * sp);

- remove the given symbol from the symbol table, and search
through the table for another symbol with the same name
- return a pointer to the found symbol, or NULL if another symbol
is not found

SymTab * tabp => symbol table pointer that was obtained from
sym_init()

Symbol * sp => the symbol to delete
 

void sym_enterBlock(SymTab * tabp);

- enter a new scope (increment the scope level)

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

void sym_leaveBlock(SymTab * tabp);

- leave the highest scope, and remove all symbols that were
created in this scope

SymTab * tabp => symbol table pointer that was obtained from
sym_init()
 

void sym_all(SymTab * tabp, SymFunc1 fn, void * arg);

- traverse the symbol table, performing the specified function
for every symbol

SymTab * tabp => symbol table pointer that was obtained from
sym_init()

SymFunc1 fn => the function to call. The first argument passed
this function will be the symbol pointer. The
remaining arguments are supplied by arg

void * arg => the second (and further) arguments that will be passed
to the given function
 
 

Symbols:

Symbols are defined as follows:

typedef struct symbol_tag {
char * name; /* copy of name */
int type; /* type of symbol */
int blocklevel; /* nesting depth */
int hashvalue; /* hash bucket # */
struct symbol_tag * next, * prev; /* next/prev symbol in scope */
struct symbol_tag * hnext, * hprev; /* next/prev symbol in bucket */
SymVal uvalue; /* user available slot */
} Symbol;

SymVal is defined as:

typedef int SymVal;

If you want to place some other value in the symbol uvalue slot,
simply redefine SymVal in symbol.h, and recompile.

For example, lets say you want a union of values, where the symbol
type will keep track of the current union value. Place this in symbol.h:

typedef union {
int ival;
double dval;
char * sval;
} SymVal;
 

Mar 3 20:18 1992 SYMBOL.H Page 1
 

/*
* Symbol table management
*/

typedef struct reference {
int lineno;
struct reference *next;
} SymXref;

typedef struct sym_user_val {
int PASCALTYPE;
SymXref * XREF;
int TAGLINENO;
} SymVal; /* Change this to store other values */

#define pascalType uvalue.PASCALTYPE
#define xref uvalue.XREF
#define tagLineNo uvalue.TAGLINENO

#define HASH_SIZE 256 /* don't change this unless you change */
/* sym_hash() while you're at it */

typedef struct symbol_tag {
char * name; /* copy of name */
int type; /* type of symbol */
int blocklevel; /* nesting depth */
int hashvalue; /* hash bucket # */
struct symbol_tag * next, * prev; /* next/prev symbol in scope */
struct symbol_tag * hnext, * hprev; /* next/prev symbol in bucket */
SymVal uvalue; /* user available slot */
} Symbol;

typedef void (*SymFunc)(Symbol *);
typedef int (*SymFunc1)(Symbol *, void *);

#define NIL_SYM_FUNC ((void (*)(Symbol *)) 0)

typedef struct symboltable_tag {
Symbol * syms;
Symbol * hash[HASH_SIZE];
int size;
int blocklevel;
int local;
int str_alloc;
SymFunc new;
SymFunc delete;
} SymTab;

extern SymTab * sym_init(SymFunc, SymFunc);
extern void sym_delete(SymTab *);
extern void sym_globalscope(SymTab *);
extern void sym_localscope(SymTab *);
extern Symbol * sym_lookup(SymTab *, char *);
extern Symbol * sym_intern(SymTab *, char *, int);
extern void sym_alloc(SymTab *);
extern void sym_noalloc(SymTab *);
extern Symbol * sym_create(SymTab *, char *, int);
extern Symbol * sym_coalesce(SymTab *, Symbol *);
extern void sym_enterBlock(SymTab *);
extern void sym_leaveBlock(SymTab *);
extern void sym_all(SymTab *, SymFunc1, void *);
 

Mar 3 20:18 1992 TAG.C Page 1
 

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "symbol.h"
#include "node.h"
#include "pc.h"
#include "tok.h"
#include "tag.h"
 

static last_tag = 0;

typedef struct tag_struct {
char *name;
int lineno;
} tagname;
static tagname * tag_array[TAGSIZE];

void
tagit(sp)
Symbol *sp;
{
tagname *t;

if (!Tag)
return;
if (last_tag == TAGSIZE ||
!(t = (tagname *) malloc (sizeof(tagname)+strlen(sp->name)+1))) {
printf("out of tag space at symbol %s\n", sp->name);
Tag = 0;
return;
}
t->lineno = sp->tagLineNo;
t->name = (char *) t + sizeof(*t);
strcpy(t->name, sp->name);
tag_array[last_tag++] = t;
}

int
tagcmp(t1, t2)
tagname **t1;
tagname **t2;
{
return strcmp((*t1)->name, (*t2)->name);
}

void
dumpTags()
{
int i;
tagname * t;
FILE * tagfile;

if (!Tag)
return;
if ((tagfile = fopen("tags", "w")) == NULL)
yyerror("cannot open tags");
qsort ((char *)tag_array, last_tag, sizeof(tag_array[0]), tagcmp);
for (i=0; i<last_tag; ++i) {
t = tag_array[i];
fprintf(tagfile, "%s\t%s\t%d\n", t->name, Filename, t->lineno);
}
fclose(tagfile);
}
 

Mar 3 20:18 1992 TAG.H Page 1
 

#define TAGSIZE 1000 /* size of the tag array */

extern void tagit(Symbol *);
extern void dumpTags(void);
 

Mar 3 20:18 1992 TURBO.BAT Page 1
 

echo Building PASCAL parser, cross-referencer, and tag file generator
lex -o scan.c scan.l
yacc -D tok.h -o gram.c gram.y
tcc -DYYDEBUG=0 pc.c typechek.c xref.c tag.c node.c symbol.c gram.c scan.c %ROOTDIR%\lib\tslex.lib
 

Mar 3 20:18 1992 TYPECHEK.C Page 1
 

#include <stdio.h>
#include "symbol.h"
#include "node.h"
#include "tok.h"
#include "pc.h"

/*
* Promote UNKNOWN_IDENT to its known type
*/
void
promote(sp, type, pascal_type)
Symbol * sp;
int type;
int pascal_type;
{
if (sp->type != UNKNOWN_IDENT)
yyerror("Redefinition of %s\n", sp->name);
sp->type = type;
sp->pascalType = pascal_type;
}

int
ordinalTypeIdentifier(sp)
Symbol * sp;
{
return sp->pascalType & T_ORDINAL;
}
int
realTypeIdentifier(sp)
Symbol * sp;
{
return sp->pascalType & T_REAL;
}
int
pointerTypeIdentifier(sp)
Symbol * sp;
{
return sp->pascalType & T_POINTER;
}
int
stringTypeIdentifier(sp)
Symbol * sp;
{
return sp->pascalType & T_STRING;
}

/*
* return true iff entire expression is boolean
*/
int
isBooleanExpression(np)
Node * np;
{
if (!np)
return 0;
if ( np->type == '='
|| np->type == NOTEQUAL
|| np->type == '<'
|| np->type == LESSEQ
|| np->type == '>'
|| np->type == GREATEQ
|| np->type == IN
|| ((np->type == SYMBOL) && (np->spval->type & T_BOOLEAN))
|| ((np->type == FUNC_CALL) && (np->left->spval->type & T_BOOLEAN)))
return 1;
if ( np->type == OR
|| np->type == XOR
|| np->type == AND)
return (isBooleanExpression(np->left) &&
isBooleanExpression(np->right));
if (np->type == NOT)
return (isBooleanExpression(np->left));
return 0;
}

int
isOrdinalExpression(np)
Node * np;
{
if (!np)
return 0;
if ( np->type == '='
|| np->type == NOTEQUAL
|| np->type == '<'
|| np->type == LESSEQ
|| np->type == '>'
|| np->type == GREATEQ
|| np->type == IN
|| np->type == ']'
|| np->type == INT_CONST
|| np->type == STRING_CONST
|| np->type == NIL
|| ((np->type == SYMBOL) && (np->spval->type & T_ORDINAL))
|| ((np->type == FUNC_CALL) && (np->left->spval->type & T_ORDINAL)))
return 1;
if ( np->type == '+'
|| (np->type == '-' && np->right)
|| np->type == OR
|| np->type == XOR
|| np->type == '*'
|| np->type == '/'
|| np->type == DIV
|| np->type == MOD
|| np->type == AND
|| np->type == SHL
|| np->type == SHR)
return (isOrdinalExpression(np->left) &&
isOrdinalExpression(np->right));
if ( np->type == NOT
|| np->type == '-')
return (isOrdinalExpression(np->left));
return 0;
}

int
withRecordMember(sp)
Symbol * sp;
{
if (sp->type == FIELD_IDENT)
return 1;
return 0;
}

/*
* Return 1 iff np is a valid record descriptor ('.' can be attached)
*/
int
isRecord(np)
Node * np;
{
if (np->spval->pascalType & T_RECORD)
return 1;
return 0;
}
/*
* Return 1 iff fp is a valid field of the record described by np
* Can assume isRecord(np)
*/
int
recordMember(np, sp)
Node * np;
Symbol * sp;
{
return 1;
}
/*
* Return 1 iff np is a valid array descriptor
*/
int
isArrayVariable(np)
Node * np;
{
if (!np)
return 0;
if (np->type != SYMBOL)
return 0;
if (np->spval->pascalType & (T_ARRAY|T_STRING))
return 1;
return 0;
}
/*
* Return 1 iff np is a valid pointer variable
*/
int
isPointerVariable(np)
Node * np;
{
if (!np)
return 0;
if (np->type != SYMBOL)
return 0;
if (np->spval->pascalType & T_POINTER)
return 1;
return 0;
}

/*
* Return 1 iff np is a valid file variable
*/
int
isFileVariable(np)
Node * np;
{
if (!np)
return 0;
if (np->type != SYMBOL)
return 0;
if (np->spval->pascalType & T_FILE)
return 1;
return 0;
}
 

Mar 3 20:18 1992 TYPECHEK.H Page 1
 

extern void promote(Symbol *, int, int);
extern int ordinalTypeIdentifier(Symbol *);
extern int realTypeIdentifier(Symbol *);
extern int pointerTypeIdentifier(Symbol *);
extern int stringTypeIdentifier(Symbol *);
extern int isBooleanExpression(Node *);
extern int isOrdinalExpression(Node *);
extern int withRecordMember(Symbol *);
extern int isRecord(Node *);
extern int recordMember(Node *, Symbol *);
extern int isArrayVariable(Node *);
extern int isPointerVariable(Node *);
extern int isFileVariable(Node *);
 

Mar 3 20:18 1992 WATCOM.BAT Page 1
 

echo Building PASCAL parser, cross-referencer, and tag file generator
lex -o scan.c scan.l
yacc -D tok.h -o gram.c gram.y
wcc -DYYDEBUG=0 pc.c
wcc -DYYDEBUG=0 typechek.c
wcc -DYYDEBUG=0 xref.c
wcc -DYYDEBUG=0 tag.c
wcc -DYYDEBUG=0 node.c
wcc -DYYDEBUG=0 symbol.c
wcc -DYYDEBUG=0 gram.c
wcc -DYYDEBUG=0 scan.c
wlink name pc file pc, typechek, xref, tag, node, symbol, gram, scan library %ROOTDIR%\lib\lexs
 

Mar 3 20:18 1992 XREF.C Page 1
 

#include <stdio.h>
#include <stdlib.h>
#include "symbol.h"
#include "pc.h"

Symbol * fnPtr;
extern int yylineno;

void
addxref(sp)
Symbol * sp;
{
SymXref * sx;

if (!Xref) return;
if (sp->xref == (SymXref *)0) {
if ((sx=sp->xref=(SymXref *) malloc (sizeof (SymXref))) == NULL)
yyerror ("out of xref space\n");
} else {
for (sx = sp->xref;sx->next;sx = sx->next);
if ((sx=sx->next=(SymXref *) malloc (sizeof (SymXref))) == NULL)
yyerror ("out of xref space\n");
}
sx->lineno = yylineno;
sx->next = (SymXref *)0;
}

void
dumpxref(sp)
Symbol * sp;
{
SymXref * sx, * sxnext;

if (sp->xref == (SymXref *)0)
return;
sx = sp->xref;
sp->xref = (SymXref *)0;
printf("%-20s\t%-20s\t%d", sp->name, fnPtr ? fnPtr->name : "--",
sx->lineno);
sxnext = sx->next;
free (sx);
sx = sxnext;
while (sx) {
printf (", %d", sx->lineno);
sxnext = sx->next;
free (sx);
sx = sxnext;
}
printf("\n");
}

typedef struct fnNameStack {
Symbol * sp;
struct fnNameStack *next, *prev;
} fnName;

fnName *fnNamePtr = (fnName *)0;

void
fnStack(sp)
Symbol * sp;
{
if (fnNamePtr == (fnName *)0) {
if ((fnNamePtr = (fnName *) malloc (sizeof (fnName))) == NULL)
yyerror ("out of name space\n");
fnNamePtr->next = fnNamePtr->prev = (fnName *)0;
fnNamePtr->sp = sp;
} else {
if ((fnNamePtr->next=(fnName *) malloc(sizeof(fnName))) == NULL)
yyerror ("out of name space\n");
fnNamePtr->next->prev = fnNamePtr;
fnNamePtr = fnNamePtr->next;
fnNamePtr->next = (fnName *)0;
fnNamePtr->sp = sp;
}
fnPtr = sp;
}

void
fnPop()
{
fnName *tmp;

tmp = fnNamePtr;
fnNamePtr = fnNamePtr->prev;
free(tmp);
if (fnNamePtr) {
fnNamePtr->next = (fnName *)0;
fnPtr = fnNamePtr->sp;
} else
fnPtr = (Symbol *)0;
}
 

Mar 3 20:18 1992 XREF.H Page 1
 

extern void addxref(Symbol *);
extern void dumpxref(Symbol *);
extern void fnStack(Symbol *);
extern void fnPop();