The final task for a compiler written in M2000 for rosetta.org
Four parts:
- Lexical_analyzer
- Syntax_analyzer
- CodeGenerator
- Virtual_Machine_Interpreter
Open M2000.exe write Edit A paste the code below, press esc, and write A and press enter.In Rosetta.org you can find samples for this tiny language.
Module lexical_analyzer (a$){
lim=Len(a$)
LineNo=1
ColumnNo=1
Document Output$
Buffer Scanner as Integer*lim
Return Scanner, 0:=a$
offset=0
buffer1$=""
flag_rem=true
Ahead=lambda Scanner (a$, offset)->{
=false
Try {
\\ second parameter is the offset in buffer units
\\ third parameter is length in bytes
=Eval$(Scanner, offset,2*len(a$))=a$
}
}
Ahead2=lambda Scanner (a$, offset)->{
=false
Try {
=Eval$(Scanner, offset,2) ~ a$
}
}
const nl$=chr$(13)+chr$(10), quo$="""", er$="@", Ansi=3
Try {
Do
If Ahead("/*", offset) Then {
offset+=2 : ColumnNo+=2
While not Ahead("*/", offset)
If Ahead(nl$, offset) Then
lineNo++: ColumnNo=1 : offset+=2
Else
offset++ : ColumnNo++
End If
if offset>lim then
Error "End-of-file in comment. Closing comment characters not found"+er$
End if
End While
offset+=2 : ColumnNo+=2
} Else.if Ahead(nl$, offset) Then{
LineNo++: ColumnNo=1
offset+=2
} Else.if Ahead(quo$, offset) Then {
Output$=format$("{0::-10}{1::-10} ", LineNo, ColumnNo)
offset++ : ColumnNo++
strin=offset
While not Ahead(quo$, offset)
If Ahead("/", offset) Then
offset+=2 : ColumnNo+=2
else
offset++ : ColumnNo++
End if
checkerror()
End While
Output$="String "+quote$(Eval$(Scanner, strin, (offset-strin)*2))+nl$
offset++ : ColumnNo++
} Else.if Ahead("'", offset) Then {
Output$=format$("{0::-10}{1::-10} ", LineNo, ColumnNo)
offset++ : ColumnNo++
strin=offset
While not Ahead("'", offset)
If Ahead("/", offset) Then
offset+=2 : ColumnNo+=2
else
offset++ : ColumnNo++
End if
checkerror()
End While
lit$=format$(Eval$(Scanner, strin, (offset-strin)*2))
select case len(lit$)
case 1
Output$="Integer "+str$(asc(lit$),0)+nl$
case >1
{Error "Multi-character constant."+er$}
case 0
{Error "Empty character constant."+er$}
end select
offset++ : ColumnNo++
} Else.if Ahead2("[a-z]", offset) Then {
strin=offset
Output$=format$("{0::-10}{1::-10} ", LineNo, ColumnNo)
offset++ : ColumnNo++
While Ahead2("[a-zA-Z0-9_]", offset)
offset++ : ColumnNo++
End While
Keywords(Eval$(Scanner, strin, (offset-strin)*2))
} Else.if Ahead2("[0-9]", offset) Then {
strin=offset
Output$=format$("{0::-10}{1::-10} Integer ", LineNo, ColumnNo)
offset++ : ColumnNo++
While Ahead2("[0-9]", offset)
offset++ : ColumnNo++
End While
if Ahead2("[a-zA-Z_]", offset) then
{Error " Invalid number. Starts like a number, but ends in non-numeric characters."+er$}
else
Output$=Eval$(Scanner, strin, (offset-strin)*2)+nl$
end if
} Else {
Symbols(Eval$(Scanner, Offset, 2))
offset++ : ColumnNo++
}
Until offset>=lim
}
er1$=leftpart$(error$,er$)
if er1$<>"" then
Print
Report "Error:"+er1$
Output$="(Error)"+nl$+"Error:"+er1$
else
Output$=format$("{0::-10}{1::-10}", LineNo, ColumnNo)+" End_of_Input"+nl$
end if
Push Output$
Exit
Clipboard Output$
Save.Doc Output$, "lex.t", Ansi
document lex$
Load.Doc lex$,"lex.t", Ansi
Report lex$
Sub Keywords(a$)
select case a$
case "if"
a$="Keyword_if"
case "else"
a$="Keyword_else"
case "while"
a$="Keyword_while"
case "print"
a$="Keyword_print"
case "putc"
a$="Keyword_putc"
else case
a$="Identifier "+a$
end select
Output$=a$+nl$
End sub
Sub Symbols(a$)
select case a$
case " ", chr$(9)
a$=""
case "("
a$="LeftParen"
case ")"
a$="RightParen"
case "{"
a$="LeftBrace"
case "}"
a$="RightBrace"
case ";"
a$="Semicolon"
case ","
a$="Comma"
case "*"
a$="Op_multiply"
case "/"
a$="Op_divide"
case "+"
a$="Op_add"
case "-"
a$="Op_subtract"
case "%"
a$="Op_mod"
case "<"
{ if Ahead("=", offset+1) Then
offset++
a$="Op_lessequal"
ColumnNo++
else
a$="Op_less"
end if
}
case ">"
{ if Ahead("=", offset+1) Then
offset++
ColumnNo++
a$="Op_greaterequal"
else
a$="Op_greater"
end if
}
case "="
{ if Ahead("=", offset+1) Then
offset++
ColumnNo++
a$="Op_equal"
else
a$="Op_assign"
end if
}
case "!"
{ if Ahead("=", offset+1) Then
offset++
ColumnNo++
a$="Op_notequal"
else
a$="Op_not"
end if
}
case "&"
{ if Ahead("&", offset+1) Then
offset++
ColumnNo++
a$="Op_and"
else
a$=""
end if
}
case "|"
{ if Ahead("|", offset+1) Then
offset++
ColumnNo++
a$="Op_or"
else
a$=""
end if
}
else case
{Error "Unrecognized character."+er$}
end select
if a$<>"" then
Output$=format$("{0::-10}{1::-10} ", LineNo, ColumnNo)+a$+nl$
end if
End Sub
Sub checkerror()
if offset>lim then {
Error "End-of-line while scanning string literal. Closing string character not found before end-of-line."+er$
} else.if Ahead(nl$,offset) then {
Error "End-of-file while scanning string literal. Closing string character not found."+er$
}
End Sub
}
Module syntax_analyzer (b$){
enum tokens {
Op_add, Op_subtract, Op_not=5, Op_multiply=10, Op_divide, Op_mod,
Op_negate, Op_less, Op_lessequal, Op_greater, Op_greaterequal,
Op_equal, Op_notequal, Op_and, Op_or, Op_assign=100, Keyword_if=110,
Keyword_else, Keyword_while, Keyword_print, Keyword_putc, LeftParen, RightParen,
LeftBrace, RightBrace, Semicolon, Comma, Identifier, Integer, String, End_of_input
}
Inventory precedence=Op_multiply:=13, Op_divide:=13, Op_mod:=13, Op_add:=12, Op_subtract:=12
Append precedence, Op_negate:=14, Op_not:=14, Op_less:=10, Op_lessequal:=10, Op_greater:=10
Append precedence, Op_greaterequal:=10, Op_equal:=9, Op_notequal:=9, Op_assign:=-1, Op_and:=5
Append precedence, Op_or:=4
Inventory symbols=Op_multiply:="Multiply", Op_divide:="Divide", Op_mod:="Mod", Op_add:="Add"
Append symbols, Op_negate:="Negate", Op_not:="Not", Op_less:="Less", Op_subtract:="Subtract"
Append symbols, Op_lessequal:="LessEqual", Op_greater:="Greater", Op_greaterequal:="GreaterEqual"
Append symbols, Op_equal:="Equal", Op_notequal:="NotEqual", Op_and:="And", Op_or:="Or"
def lineNo, ColumnNo, m, line$, a, lim, cur=-1
const nl$=chr$(13)+chr$(10), Ansi=3
Dim lex$()
lex$()=piece$(b$,chr$(13)+chr$(10))
lim=dimension(lex$(),1)-1
op=End_of_input
flush
k=0
Try {
push (,) ' Null
getone(&op)
repeat
stmt(&op)
shift 2 ' swap two top items
push ("Sequence", array, array)
k++
until op=End_of_Input
}
er$=error$
if er$<>"" then print er$ : flush: break
Print "Ast"
Document Output$
prt_ast()
Push Output$
exit
clipboard Output$
Save.Doc Output$, "parse.t", Ansi
document parse$
Load.Doc parse$,"parse.t", Ansi
Report parse$
sub prt_ast(t)
if len(t)<1 then
Output$=";"+nl$
else.if len(t)=3 then
Output$=t#val$(0) +nl$
prt_ast(t#val(1)) : prt_ast(t#val(2))
else
Output$=t#val$(0) +nl$
end if
end sub
sub expr(p) ' only a number
local x=(,), prev=op
if op>=Identifier then
x=(line$,)
getone(&op)
else.if op=LeftParen then
paren_exp()
x=array
else.if op<10 then
getone(&op)
expr(precedence(int(Op_negate)))
read local y
if prev=Op_add then
x=y
else
if prev=Op_subtract then prev=Op_negate
x=(symbols(prev), y,(,))
End if
else
{error "??? "+eval$(op)}
end if
local prec
while exist(precedence, int(op))
prev=op : prec=eval(precedence)
if prec<14 and prec>=p else exit
getone(&op)
expr(prec+1) ' all operators are left associative (use prec for right a.)
x=(symbols(int(prev)), x, array)
End While
Push x
end sub
sub paren_exp()
expected(LeftParen)
getone(&op)
expr(0)
expected(RightParen)
getone(&op)
end sub
sub stmt(&op)
local t=(,)
if op=Identifier then
t=(line$)
getone(&op)
expected(Op_assign)
getone(&op)
expr(0)
read local rightnode
Push ("Assign",t,rightnode)
expected(Semicolon)
getone(&op)
else.if op=Semicolon then
getone(&op)
Push (";",)
else.if op=Keyword_print then
getone(&op)
expected(LeftParen)
repeat
getone(&op)
if op=String then
Push ("Prts",(line$,),(,))
getone(&op)
else
expr(0)
Push ("Prti", array,(,))
end if
t=("Sequence", t, array)
until op<>Comma
expected(RightParen)
getone(&op)
expected(Semicolon)
getone(&op)
push t
else.if op=Keyword_while then
getone(&op)
paren_exp()
stmt(&op)
shift 2
Push ("While",array, array)
else.if op=Keyword_if then
getone(&op)
paren_exp()
stmt(&op)
local s2=(,)
if op=Keyword_else then
getone(&op)
stmt(&op)
read s2
end if
shift 2
Push ("If",array ,("If",array,s2))
else.if op=Keyword_putc then
getone(&op)
paren_exp()
Push ("Prtc",array,t)
expected(Semicolon)
getone(&op)
else.if op=LeftBrace then
Brace()
else
error "Unkown Op"
end if
end sub
Sub Brace()
getone(&op)
while op<>RightBrace and op<>End_of_input
stmt(&op)
t=("Sequence", t, array)
end while
expected(RightBrace)
getone(&op)
push t
End Sub
Sub expected(what)
if not op=what then {Error "Expected "+eval$(what)+str$(LineNo)+","+Str$(ColumnNo)}
End Sub
sub getone(&op)
op=End_of_input
while cur<lim
cur++
line$=trim$(lex$(cur))
if line$<>"" then exit
end while
if cur=lim then exit sub
LineNo=Val(line$,"int",m)
line$=mid$(line$, m)
ColumnNo=Val(line$,"int",m)
line$=trim$(mid$(line$, m))
Rem : Print LineNo, ColumnNo
m=instr(line$," ")
if m>0 then op=Eval("."+leftpart$(line$, " ")) else op=Eval("."+line$)
end sub
}
Module Virtual_Machine_Interpreter (a$){
\\ function to extract string, replacing escape codes.
Function GetString$(a$) {
s=instr(a$, chr$(34))
m=rinstr(a$,chr$(34))-s
if m>1 then
\\ process escape codes
=format$(mid$(a$, s+1, m-1))
else
=""
end if
}
\\ module to print a string to console using codes, 13, 10, 9
Module printsrv (a$) {
for i=1 to len(a$)
select case chrcode(Mid$(a$,i,1))
case 13
cursor 0
case 10
cursor 0 : Print
case 9
cursor ((pos+tab) div tab)*tab
else case
{
m=pos :if pos>=width then Print : m=pos
Print Mid$(a$,i,1);
if m<=width then cursor m+1
}
end select
next i
}
const nl$=chr$(13)+chr$(10)
\\ we can set starting value to any numbern where 0<=n<=232
enum op { halt_=232, add_, sub_, mul_, div_, mod_, not_, neg_, and_, or_, lt_,
gt_, le_, ge_, ne_, eq_, prts_, prti_, prtc_, store_, fetch_, push_,
jmp_, jz_
}
Rem : Form 120, 60 ' change console width X height to run Ascii Mandlebrot examlpe
Report "Virtual Assembly Code:"+{
}+a$
Print "Prepare Byte Code"
\\ get datasize
a$=rightpart$(a$, "Datasize:")
m=0
data_size=val(a$, "int", m)
a$=mid$(a$, m)
\\ make stack
if data_size>0 then Buffer Clear stack_ as long*data_size
\\ dim or redim buffer append 1000 long as is.
Buffer stack_ as long*(1000+data_size)
\\ get strings
a$=rightpart$(a$, "Strings:")
m=0
strings=val(a$, "int", m)
a$=rightpart$(a$, nl$)
if strings>0 then
Dim strings$(strings)
for i=0 to strings-1
strings$(i)=GetString$(leftpart$(a$, nl$))
a$=rightpart$(a$, nl$)
Next i
End if
buffer clear code_ as byte*1000
do
m=0
offset=val(a$,"int", m)
if m<0 then exit
a$=mid$(a$,m)
line$=trim$(leftpart$(a$,nl$))
if line$="" then line$=trim$(a$) else a$=trim$(rightpart$(a$, nl$))
op$=if$(instr(line$," ")>0->leftpart$(line$," "), line$)
if not valid(eval(op$+"_")) then exit
opc=eval(op$+"_")
Return code_, offset:=opc
if opc>=store_ then
line$=rightpart$(line$," ")
select case opc
case store_, fetch_
Return code_, offset+1:=val(rightpart$(leftpart$(line$,"]"),"[")) as long : offset+=4
case push_
Return code_, offset+1:=uint(val(line$)) as long : offset+=4
case jz_, jmp_
Return code_, offset+1:=val(rightpart$(line$,")")) as long : offset+=4
end select
end if
Always
Print "Press any key" : Push key$ : Drop
\\ Prepare VM
let pc=0, sp=len(stack_) div 4
do {
func=eval(code_, pc)
pc++
select case func
case halt_
exit
case push_
sp--:return stack_, sp:=eval(code_, pc as long):pc+=4
case jz_
sp++: if eval(stack_, sp-1)=0 then pc=eval(code_, pc as long) else pc+=4
case jmp_
pc=eval(code_, pc as long)
case fetch_
sp--:Return stack_, sp:=eval(stack_, eval(code_, pc as long)):pc+=4
case store_
Return stack_, eval(code_, pc as long):=eval(stack_, sp):sp++:pc+=4
case add_
Return stack_, sp+1:=uint(sint(eval(stack_, sp+1))+sint(eval(stack_, sp))):sp++
case sub_
Return stack_, sp+1:=uint(sint(eval(stack_, sp+1))-sint(eval(stack_, sp))):sp++
case mul_
Return stack_, sp+1:=uint(sint(eval(stack_, sp+1))*sint(eval(stack_, sp))):sp++
case div_
Return stack_, sp+1:=uint(sint(eval(stack_, sp+1)) div sint(eval(stack_, sp))):sp++
case mod_
Return stack_, sp+1:=uint(sint(eval(stack_, sp+1)) mod sint(eval(stack_, sp))) :sp++
case not_
Return stack_, sp:=if(eval(stack_, sp)=0->uint(-1),0)
case neg_ \\ we can use neg(sint(value))+1 or uint(-sint(value))
Return stack_, sp:=uint(-sint(eval(stack_, sp)))
case and_
Return stack_, sp+1:=binary.and(eval(stack_, sp+1),eval(stack_, sp)):sp++
case or_
Return stack_, sp+1:=binary.or(eval(stack_, sp+1),eval(stack_, sp)):sp++
case lt_
Return stack_, sp+1:=uint(if(sint(eval(stack_, sp+1))<sint(eval(stack_, sp))->-1, 0)):sp++
case gt_
Return stack_, sp+1:=uint(if(sint(eval(stack_, sp+1))>sint(eval(stack_, sp))->-1, 0)):sp++
case le_
Return stack_, sp+1:=uint(if(sint(eval(stack_, sp+1))<=sint(eval(stack_, sp))->-1, 0)):sp++
case ge_
Return stack_, sp+1:=uint(if(sint(eval(stack_, sp+1))>=sint(eval(stack_, sp))->-1, 0)):sp++
case ne_
Return stack_, sp+1:=uint(if(eval(stack_, sp+1)<>eval(stack_, sp)->-1, 0)):sp++
case eq_
Return stack_, sp+1:=uint(if(eval(stack_, sp+1)=eval(stack_, sp)->-1, 0)):sp++
case prts_
printsrv strings$(eval(stack_,sp)):sp++
case prti_
printsrv str$(sint(eval(stack_,sp)),0):sp++
case prtc_
printsrv chrcode$(eval(stack_,sp)):sp++
else case
Error "Unkown op "+str$(func)
end select
} always
Print "done"
}
Module CodeGenerator (s$){
Function code$(op$) {
=format$("{0::-6} {1}", pc, op$)
pc++
}
Function code2$(op$, n$) {
=format$("{0::-6} {1} {2}", pc, op$, n$)
pc+=5
}
Function code3$(op$,pc, st, ed) {
=format$("{0::-6} {1} ({2}) {3}", pc, op$, ed-st-1, ed)
}
Enum tok {
gneg, gnot, gmul, gdiv, gmod, gadd, gle, gsub, glt
gle, ggt, gge, geq, gne, gand, gor, gprtc, gprti, gprts,
gif, gwhile, gAssign, gSeq, gstring, gidentifier, gint, gnone
}
\\ Inventories are lists with keys, or keys/data (key must be unique)
\\ there is one type more the Invetory Queue which get same keys.
\\ But here not used.
Inventory symb="Multiply":=gmul, "Divide":=gdiv, "Mod":=gmod, "Add":=gadd
Append symb, "Negate":=gneg, "Not":=gnot,"Less":=glt,"Subtract":=gsub
Append symb, "LessEqual":=gle, "Greater":=ggt, "GreaterEqual":=gge, "Sequence":=gSeq
Append symb, "Equal":=geq, "NotEqual":=gne, "And":=gand, "Or":=gor, "While":=gwhile
Append symb, "Prtc":=gprtc,"Prti":=gprti,"Prts":=gprts, "Assign":=gAssign, "If":=gif
Append symb, "String":=gstring, "Identifier":=gidentifier, "Integer":=gint, ";", gnone
Inventory DataSet
\\ We set string as key. key maybe an empty string, a string or a number.
\\ so we want eash string to saved one time only.
Inventory Strings
Const nl$=chr$(13)+chr$(10), Ansi=3
Def z$, lim, line$, newvar_ok, i=0
Document message$=nl$
Global pc \\ functions have own scope, so we make it global, for this module, and childs.
Dim lines$()
s$=filter$(s$,chr$(9)) \\ exclude tabs
Lines$()=piece$(s$,nl$) \\ break to lines
lim=len(Lines$())
Flush ' empty stack (there is a current stack of values which we use here)
Load_Ast()
If not stack.size=1 Then Flush : Error "Ast not loaded"
AST=array \\ pop the array from stack
Document Assembly$, Header$
\\ all lines of assembly goes to stack. Maybe not in right order.
\\ Push statement push to top, Data statement push to bottom of stack
CodeGenerator(Ast)
Data code$("halt") ' append to end of stack
\\ So now we get all data (letters) from stack
While not empty
Assembly$=letter$+nl$
end while
\\ So now we have to place them in order
Sort Assembly$
\\ Let's make the header
Header$=format$("Datasize: {0} Strings: {1}", Len(Dataset),Len(strings))
\\ we use an iterator object, str^ is the counter, readonly, but Eval$() use it from object.
str=each(strings)
While str
Header$=nl$+Eval$(str)
End while
Assembly$=nl$
\\ insert to line 1 the Header
Insert 1 Assembly$=Header$
\\ Also we check for warnings
If len(message$)>2 then Assembly$="Warnings: "+nl$+message$
\\ So now we get a report
\\ (at each 3/4 of window's lines, the printing stop and wait for user response, any key)
Push Assembly$
Exit
Report Assembly$
Clipboard Assembly$
Save.Doc Assembly$, "code.t", Ansi
End
\\ subs have 10000 limit for recursion but can be extended to 1000000 or more.
Sub CodeGenerator(t)
If len(t)=3 then
select case t#val(0)
Case gSeq
CodeGenerator(t#val(1)) : CodeGenerator(t#val(2))
Case gwhile
{
local spc=pc
CodeGenerator(t#val(1))
local pc1=pc
pc+=5 ' room for jz
CodeGenerator(t#val(2))
data code3$("jz",pc1, pc1, pc+5)
data code3$("jmp",pc, pc, spc)
pc+=5 ' room for jmp
}
Case gif
{
CodeGenerator(t#val(1))
local pc1=pc, pc2
pc+=5
CodeGenerator(t#val(2)#val(1))
If len(t#val(2)#val(2))>0 then
pc2=pc
pc+=5
data code3$("jz",pc1, pc1, pc)
CodeGenerator(t#val(2)#val(2))
data code3$("jmp",pc2, pc2, pc)
else
data code3$("jz",pc1, pc1, pc)
end If
}
Case gAssign
{
CodeGenerator(t#val(2))
local newvar_ok=true
CodeGenerator(t#val(1))
}
case gneg to gnot, gprtc to gprts
CodeGenerator(t#val(1)) : data code$(mid$(eval$(t#val(0)),2))
case gmul to gor
{
CodeGenerator(t#val(1))
CodeGenerator(t#val(2))
data code$(mid$(eval$(t#val(0)),2))
}
End select
Else.if len(t)=2 then
select case t#val(0)
Case gString
{
local spos
If exist(strings,t#val$(1)) then
spos=eval(strings!)
else
append strings, t#val$(1)
spos=len(strings)-1
end If
Push code2$("push",str$(spos,0))
}
Case gInt
Push code2$("push",t#val$(1), pc)
Case gIdentifier
{
local ipos
If exist(dataset,t#val$(1)) then
ipos=Eval(dataset!) ' return position
else.if newvar_ok then
Append dataset, t#val$(1)
ipos=len(dataset)-1
else
message$="Variable "+t#val$(1)+" not initialized"+nl$
end If
If newvar_ok then
Push code2$("store","["+str$(ipos, 0)+"]")
else
Push code2$("fetch","["+str$(ipos, 0)+"]")
end If
}
end select
End If
End Sub
Sub Load_Ast()
If i>=lim then Push (,) : exit sub
do
line$=Trim$(lines$(i))
I++
tok$=piece$(line$," ")(0)
until line$<>"" or i>=lim
If tok$="Identifier" then
Push (gidentifier,trim$(Mid$(line$,11)))
else.if tok$="Integer" then
long n=Val(Mid$(line$,8)) ' check overflow
Push (gint, Trim$(Mid$(line$,8)))
else.if tok$="String" then
Push (gstring,Trim$(Mid$(line$,7)))
else.if tok$=";" then
Push (,)
Else
local otok=symb(tok$)
Load_Ast()
Load_Ast()
Shift 2
Push (otok,array, array)
End If
End Sub
}
Push {
{
/*
This is an integer ascii Mandelbrot generator
*/
left_edge= -420;
right_edge=300;
top_edge=300;
bottom_edge = -300;
x_step=7;
y_step=15;
max_iter=200;
y0 = top_edge;
while (y0 > bottom_edge) {
x0 = left_edge;
while (x0 < right_edge) {
y = 0;
x = 0;
the_char = ' ';
i = 0;
while (i < max_iter) {
x_x = (x * x) / 200;
y_y = (y * y) / 200;
if (x_x + y_y > 800 ) {
the_char = '0' + i;
if (i > 9) {
the_char = '@';
}
i = max_iter;
}
y = x * y / 100 + y0;
x = x_x - y_y + x0;
i = i + 1;
}
putc(the_char);
x0 = x0 + x_step;
}
putc('\n');
y0 = y0 - y_step;
}
}
}
Form 120, 60
lexical_analyzer
syntax_analyzer
CodeGenerator
Virtual_Machine_Interpreter