Just do it in asm.

This commit is contained in:
Simon Forman 2019-11-08 08:08:53 -08:00
parent 5172be7a0a
commit d67420ae68
1 changed files with 87 additions and 382 deletions

View File

@ -17,398 +17,103 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Thun. If not see <http://www.gnu.org/licenses/>.
The Joy interpreter that this implements is pretty crude. the only types
are 16-bit integers and linked lists. The lists are 32-bit words divided
into two 16-bit fields. The high half is the node value and the low half
points directly (not offset) to the next cell, zero terminates the list.
The expression is expected to be already written in RAM as a linked list at
the time the mainloop starts. As yet there is no support for actually doing
this. Both the new stack and expression cells are written to the same heap
intermixed. The stack and expression pointers never decrease, the whole
history of the computation is recorded in RAM. If the computation of the
expression overruns the end of RAM (or 16-bits whichever comes first) the
machine crashes.
At the moment, functions are recognized by setting high bit, but I don't
think I remembered to set the bits during compilation, so it won't work
at all right now. Er... Boo. Anyhow, the whole thing is very crude and
not at all what I am hoping eventually to build.
But it's a start, and I feel good about emitting machine code (even if the
program doesn't do anything useful yet.)
Mark II
*/
:- use_module(library(assoc)).
:- use_module(library(clpfd)).
do :- Program = [
,,,,,,
[,,,,,],(cons),
[,],(dup),
[,,[],[,,[,,,[,],[,],,],],,],(i),
[,],(new),
[,,,,,,,],(swap),
[new,cons],(unit),
[dup,i],(x),
[swap,cons],(swons)
],
compile_program(Program, Binary),
write_binary('joy_asm.bin', Binary).
% Just do it in assembler.
compile_program(Program, Binary) :-
phrase((init, (Program, IR)), [], [Context]),
phrase((IR), ASM),
program([ % Mainloop.
do_offset(Over), % Oberon bootloader writes MemLim to RAM[12] and
allocate(_, 16), % stackOrg to RAM[24], we don't need these
label(Over), % but they must not be allowed to corrupt our code.
mov_imm(0, 0), % zero out the root cell.
store_word(0, 0, 0),
mov_imm(SP, 0x1000),
mov_imm(EXPR_addr, 0x500),
mov_imm(TOS, 0),
mov_imm(TERM, 0),
store_word(TOS, SP, 0), % RAM[SP] := 0
label(Main),
% if_zero(EXPR_addr, HALT),
sub_imm(EXPR_addr, EXPR_addr, 0),
eq_offset(HALT),
% deref(EXPR_addr, EXPR),
load_word(EXPR, EXPR_addr, 0), % Load expr pair record into EXPR
% At this point EXPR holds the record word of the expression.
ror_imm(TermAddr, EXPR, -15), % put the offset in TermAddr
% No need to mask off high bits as the type tag for pairs is 00
add(TermAddr, TermAddr, EXPR_addr),
% TermAddr has the address of the term record.
load_word(TERM, TermAddr, 0), % Bring the record in from RAM.
% Now Term has the term's record data and TermAddr has the address of the term.
and_imm(TEMP0, EXPR, 0x7fff), % get the offset of the tail of the expr
eq_offset(Foo), % if the offset is zero don't add the adress. it's empty list.
add(TEMP0, TEMP0, EXPR_addr), % Add the address to the offset.
label(Foo),
mov(EXPR_addr, TEMP0),
% EXPR_addr now holds the address of the next cell of the expression list.
% if_literal(TERM, PUSH),
ror_imm(TEMP0, TERM, -30), % get just the two tag bits.
sub_imm(TEMP0, TEMP0, 2), % if this is a symbol result is zero.
ne_offset(PUSH),
% if it is a symbol the rest of it is the pointer to the machine code.
% lookup(TERM), % Jump to command.
mov_imm_with_shift(TEMP0, 0x3fff), % TEMP0 = 0x3fffffff
ior_imm(TEMP0, TEMP0, 0xffff),
and(TEMP0, TEMP0, TERM),
eq(TEMP0), % double check that this works with pointer in reg...
% going into push we have the term
label(PUSH),
% push2(TOS, TEMP1, SP), % stack = TERM, stack
sub_imm(SP, SP, 4), % SP -= 1 (word, not byte)
% SP points to the future home of the new stack cell.
sub(TOS, TermAddr, SP), % TOS := &temp - sp
% TOS has the offset from new stack cell to term cell.
% Combine with the offset to the previous stack cell.
lsl_imm(TOS, TOS, 15), % TOS := TOS << 15
ior(TOS, TOS, 4), % TOS := TOS | 4
% label(DONE),
store_word(TOS, SP, 0), % RAM[SP] := TOS
do_offset(Main),
label(HALT),
do_offset(HALT)
]) :- [SP, EXPR_addr, TOS, TERM, EXPR, TermAddr, TEMP0]=[0, 1, 2, 3, 4, 5, 6].
do :- program(Program),
compile_program(Program, Binary),
write_binary('joy_asmii.bin', Binary).
compile_program(ASM, Binary) :-
phrase(linker(ASM), EnumeratedASM),
% writeln(EnumeratedASM),
foo(Context),
phrase(asm(EnumeratedASM), Binary).
foo(Context) :-
get_assoc(dictionary, Context, D),
assoc_to_list(D, Dictionary),
portray_clause(Dictionary).
/*
This first stage //2 converts the Joy description into a kind of intermediate
representation that models the Joy interpreter on top of the machine but doesn't
actually use assembly instructions. It also manages the named registers and
memory locations so thet don't appear in the program.
The idea here is to extract the low-level "primitives" needed to define the Joy
interpreter to make it easier to think about (and maybe eventually retarget other
CPUs.)
*/
([], []) --> [].
([|Terms], Ts) --> % Preamble.
% Initialize context/state/symbol table.
set(dict_ptr, 11), % Reg 11 is a pointer used during func lookup.
set(dict_top, 12), % Reg 12 points to top of dictionary.
set(dict, 0), % Address of top of dict during compilation.
set(done, _DONE), % DONE label (logic variable.)
set(expr, 4), % Reg 4 points to expression.
set(halt, _HALT), % HALT label (logic variable.)
set(main, _MAIN), % MAIN label (logic variable.)
set(reset, _Reset), % Reset label (logic variable.)
set(sp, 2), % Reg 2 points to just under top of stack.
set(temp0, 6), % Reg 6 is a temp var.
set(temp1, 7), % Reg 7 is a temp var.
set(temp2, 8), % Reg 8 is a temp var.
set(temp3, 9), % Reg 9 is a temp var.
set(term, 5), % Reg 4 holds current term.
set(tos, 3), % Reg 3 holds Top of Stack.
(Terms, Ts).
([|Terms], [ % Initialization.
jump(Over), % Oberon bootloader writes MemLim to RAM[12] and
asm(allocate(_, 16)), % stackOrg to RAM[24], we don't need these
label(Over), % but they must not be allowed to corrupt our code.
set_reg_const(0, 0), % zero out the root cell.
write_ram(0, 0),
set_reg_const(SP, 0x1000),
set_reg_const(EXPR, 0x500),
set_reg_label(DICT_TOP, LastWord),
set_reg_const(TOS, 0),
set_reg_const(TERM, 0),
asm(store_word(TOS, SP, 0)) % RAM[SP] := 0
|Ts]) -->
get([dict_top, DICT_TOP, expr, EXPR, sp, SP, term, TERM, tos, TOS]),
(Terms, Ts), get(dict, LastWord).
([|Terms], [ % Mainloop.
label(MAIN),
if_zero(EXPR, HALT),
deref(EXPR, TEMP0),
% At this point EXPR holds the record word of the expression and TEMP0
% has a copy of the address of the record.
split_pair(TERM, TEMP1, EXPR, TEMP0),
% Now Term has the term's record data and temp1 has the address of the term.
% temp0 still has the address of the expression record.
if_literal(TERM, PUSH),
% if it is a symbol the rest of it is the pointer to the machine code.
lookup(TERM), % Jump to command.
% going in to push we have the term
label(PUSH), push2(TOS, TEMP1, SP), % stack = TERM, stack
label(DONE), write_ram(SP, TOS), % RAM[SP] := TOS
jump(MAIN)
|Ts]) -->
get([dict_ptr, DICT_PTR, dict_top, DICT_TOP, done, DONE, expr, EXPR,
halt, HALT, main, MAIN, sp, SP, term, TERM, tos, TOS,
temp0, TEMP0, temp1, TEMP1]),
(Terms, Ts).
([Body, (NameAtom)|Terms], [defi(Name, B, Prev, I, SP, TOS)|Ts]) -->
get(dict, Prev), set(dict, Name), get([sp, SP, tos, TOS]),
inscribe(NameAtom, Name), (Terms, Ts), lookup(i, I), lookup(Body, B).
([Body, (NameAtom)|Terms], [definition(Name, DONE, B, Prev)|Ts]) -->
get(dict, Prev), set(dict, Name), inscribe(NameAtom, Name),
get(done, DONE), ([Body, Terms], [B, Ts]).
([Body, (NameAtom)|Terms], [definition(Name, MAIN, B, Prev)|Ts]) -->
get(dict, Prev), set(dict, Name), inscribe(NameAtom, Name),
get(main, MAIN), ([Body, Terms], [B, Ts]).
([P, T, E, |Terms], [br(Predicate, Then, Else)|Ts]) -->
([P, T, E, Terms], [Predicate, Then, Else, Ts]).
([P, B, |Terms], [repeat_until(Predicate, Body)|Ts]) -->
([P, B, Terms], [Predicate, Body, Ts]).
([Term|Terms], [T|Ts]) --> (Term, T), (Terms, Ts).
(, dw(0)) --> [].
(, label(Reset)) --> get(reset, Reset).
(, halt(HALT)) --> get(halt, HALT).
(, pop(TEMP0, TOS)) --> get(temp0, TEMP0), get(tos, TOS).
(, push(TOS, TOS, SP)) --> get(tos, TOS), get(sp, SP).
(, high_half(TEMP1, TOS)) --> get(temp1, TEMP1), get(tos, TOS).
(, merge(SP, TOS)) --> get(tos, TOS), get(sp, SP).
(, swap_halves(TOS)) --> get(tos, TOS).
(, deref(TEMP0)) --> get(temp0, TEMP0).
(, if_zero(TEMP0)) --> get(temp0, TEMP0).
(, asm(mov(EXPR, TEMP3))) --> get(expr, EXPR), get(temp3, TEMP3).
(, asm(ior(TOS, TEMP1, SP))) --> get(tos, TOS), get(temp1, TEMP1), get(sp, SP).
(, add_const(TEMP2, SP, 8)) --> get(temp2, TEMP2), get(sp, SP).
(, add_const(TEMP3, SP, 4)) --> get(temp3, TEMP3), get(sp, SP).
(, add_const(SP, SP, 4)) --> get(sp, SP).
(, chop_word(TEMP1, TEMP0)) --> get(temp0, TEMP0), get(temp1, TEMP1).
(, chop_word(TEMP0, TOS)) --> get(temp0, TEMP0), get(tos, TOS).
(, chop_word(TEMP2, TOS)) --> get(temp2, TEMP2), get(tos, TOS).
(, or_inplace(TEMP1, EXPR)) --> get(expr, EXPR), get(temp1, TEMP1).
(, or_inplace(TEMP0, TEMP1)) --> get(temp0, TEMP0), get(temp1, TEMP1).
(, or_inplace(TEMP0, TEMP2)) --> get(temp0, TEMP0), get(temp2, TEMP2).
(, or_inplace(TEMP1, TEMP2)) --> get(temp1, TEMP1), get(temp2, TEMP2).
(, write_cell(TEMP0, SP)) --> get(temp0, TEMP0), get(sp, SP).
(, write_cell(TEMP1, SP)) --> get(temp1, TEMP1), get(sp, SP).
(, write_cell(TOS, SP)) --> get(tos, TOS), get(sp, SP).
(, low_half(TOS)) --> get(tos, TOS).
(, low_half(TEMP0, TOS)) --> get(temp0, TEMP0), get(tos, TOS).
(, low_half(TOS, SP)) --> get(sp, SP), get(tos, TOS).
/*
Context (state) manipulation for the //2 relation.
Association lists are used to keep a kind of symbol table as well as a dictionary
of name atoms to address logic variables for defined Joy functions.
*/
init, [Context] -->
{empty_assoc(C), empty_assoc(Dictionary),
put_assoc(dictionary, C, Dictionary, Context)}.
get([]) --> !.
get([Key, Value|Ts]) --> !, get(Key, Value), get(Ts).
get(Key, Value) --> state(Context), {get_assoc(Key, Context, Value)}.
set(Key, Value) --> state(ContextIn, ContextOut),
{put_assoc(Key, ContextIn, Value, ContextOut)}.
inscribe(NameAtom, Label) --> state(ContextIn, ContextOut),
{get_assoc(dictionary, ContextIn, Din),
put_assoc(NameAtom, Din, Label, Dout),
put_assoc(dictionary, ContextIn, Dout, ContextOut)}.
lookup([], []) --> !.
lookup([T|Ts], [V|Vs]) --> !, lookup(T, V), lookup(Ts, Vs).
lookup(NameAtom, Label) --> state(Context),
{get_assoc(dictionary, Context, D), get_assoc(NameAtom, D, Label)}.
state(S), [S] --> [S].
state(S0, S), [S] --> [S0].
/*
This second stage //1 converts the intermediate representation to assembly
language.
*/
([]) --> [].
([Term|Terms]) --> (Term), (Terms).
(if_literal(Reg, Label)) -->
[ior_imm(0, Reg, -30), % get just the two tag bits.
sub_imm(0, 0, 2), % subtract 2 to check if result is zero.
ne_offset(Label)].
% if reg = 0 jump to label.
(if_zero(Reg, Label)) --> [sub_imm(Reg, Reg, 0), eq_offset(Label)].
(set_reg_const(Reg, Immediate)) --> {Immediate >= -(2^15), Immediate < 2^16}, !,
[mov_imm(Reg, Immediate)].
(set_reg_const(Reg, Immediate)) --> {Immediate >= 0, Immediate < 2^33}, !, % FIXME: handle negative numbers.
{high_half_word(Immediate, HighHalf), low_half_word(Immediate, LowHalf)},
[ mov_imm_with_shift(Reg, HighHalf), ior_imm(Reg, Reg, LowHalf)].
(set_reg_label(Reg, Label)) --> [mov_imm(Reg, Label)].
( noop) --> [mov(0, 0)].
( halt(Halt)) --> [label(Halt), do_offset(Halt)].
( asm(ASM)) --> [ASM].
(label(Label)) --> [label(Label)].
( jump(Label)) --> [do_offset(Label)].
( dw(Int)) --> [word(Int)].
( low_half(Reg)) --> [and_imm(Reg, Reg, 0xffff)].
( low_half(To, From)) --> [and_imm(To, From, 0xffff)].
( high_half(Reg)) --> [mov_imm_with_shift(0, 0xffff), and(Reg, Reg, 0)].
(high_half(To, From)) --> [mov_imm_with_shift(0, 0xffff), and(To, From, 0)].
(swap_halves(Register)) --> [ror_imm(Register, Register, 16)].
(swap_halves(To, From)) --> [ror_imm( To, From, 16)].
(high_half_to(To, From)) --> ([swap_halves(To, From), low_half(To)]).
(split_word(To, From)) --> ([high_half_to(To, From), low_half(From)]).
(chop_word(To, From)) --> ([high_half(To, From), low_half(From)]).
(merge(SP, TOS)) -->
[lsl_imm(0, SP, 16),
ior(TOS, TOS, 0),
add_imm(SP, SP, 4)].
(push(TOS, TERM, SP)) -->
[lsl_imm(TOS, TERM, 16), % TOS := TERM << 16
ior(TOS, TOS, SP), % TOS := TOS | SP
add_imm(SP, SP, 4)]. % SP += 1 (word, not byte)
(push2(TOS, TERMADDR, SP)) -->
[sub_imm(SP, SP, 4), % SP -= 1 (word, not byte)
sub(TOS, TERMADDR, SP), % TOS := &temp - sp
lsl_imm(TOS, TOS, 15), % TOS := TOS << 15
ior(TOS, TOS, 4)]. % TOS := TOS | 4
( write_ram(To, From)) --> [store_word(From, To, 0)].
(write_cell(From, SP)) --> [add_imm(SP, SP, 4), store_word(From, SP, 0)].
(deref(Reg)) --> [load_word(Reg, Reg, 0)].
(deref(Reg, Temp)) -->
[mov(Temp, Reg), % Save the address for adding it to offsets later.
load_word(Reg, Reg, 0)].
(or_inplace(To, From)) --> [ior(To, To, From)].
(definition(Label, Exit, Body, Prev)) -->
([
dw(Prev),
label(Label),
Body,
jump(Exit)
]).
(defi(Label, Body, Prev, I, SP, TOS)) -->
([dw(Prev),
label(Label),
defi_def(BodyLabel, SP, TOS),
jump(I)]),
dexpr(Body, BodyLabel).
(defi_def(Label, SP, TOS)) -->
[mov_imm_with_shift(TOS, Label),
ior(TOS, TOS, SP)],
(write_cell(TOS, SP)).
(lookup(TERM)) -->
[mov_imm_with_shift(0, 0x3fff),
ior_imm(0, 0, 0xffff),
and(0, 0, TERM),
eq(0)]. % Jump to term's machine code.
(repeat_until(Condition, Body)) -->
{add_label(Condition, End, ConditionL)},
([
label(Loop),
Body,
ConditionL,
jump(Loop),
label(End)
]).
(br(Condition, [], Else)) --> !,
{add_label(Condition, END, ConditionL)},
([ConditionL, Else, label(END)]).
(br(Condition, Then, Else)) -->
{add_label(Condition, THEN, ConditionL)},
([
ConditionL, Else, jump(END),
label(THEN), Then, label(END)
]).
(add_const(To, From, Immediate)) --> [add_imm(To, From, Immediate)].
(pop(Reg, TOS)) --> ([split_word(Reg, TOS), deref(TOS)]).
% From is a register containing a pair record
% FromAddr is a register containing the address of the record in From
% after,
% To is a register that will contain the record from the head
% ToAddr holds the address of the record in To.
% From is a register containing a pair record
% FromAddr is a register containing the address of the record in From
(split_pair(To, ToAddr, From, FromAddr)) -->
[ior_imm(ToAddr, From, -15), % roll right 15 bits
% No need to mask off high bits as the type tag for pairs is 00
add(ToAddr, ToAddr, FromAddr),
load_word(To, ToAddr, 0), % Bring the record in from RAM.
and_imm(From, From, 0x7fff), % Mask off lower 15 bits.
add(From, From, FromAddr) % Add the address to the offset.
].
/*
Support for //1 second stage.
The dexpr//2 DCG establishes a sequence of labeled expr_cell/2 pseudo-assembly
memory locations as a linked list that encodes a Prolog list of Joy function
labels comprising e.g. the body of some Joy definition.
*/
dexpr([], 0) --> [].
dexpr([Func|Rest], ThisCell) -->
[label(ThisCell), expr_cell(Func, NextCell)],
dexpr(Rest, NextCell).
/*
The add_label/3 relation is a meta-logical construct that accepts a comparision
predicate (e.g. if_zero/2) and "patches" it by adding the Label logic variable
to the end.
*/
add_label(CmpIn, Label, CmpOut) :-
CmpIn =.. F,
append(F, [Label], G),
CmpOut =.. G.
/*
Two simple masking predicates.
*/
high_half_word(I, HighHalf) :- HighHalf is I >> 16 /\ 0xFFFF.
low_half_word( I, LowHalf) :- LowHalf is I /\ 0xFFFF.
/*