diff --git a/thun/compiler.markII.pl b/thun/compiler.markII.pl index 57a7a11..addd68d 100644 --- a/thun/compiler.markII.pl +++ b/thun/compiler.markII.pl @@ -17,398 +17,103 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Thun. If not see . - - -The Joy interpreter that this implements is pretty crude. the only types -are 16-bit integers and linked lists. The lists are 32-bit words divided -into two 16-bit fields. The high half is the node value and the low half -points directly (not offset) to the next cell, zero terminates the list. - -The expression is expected to be already written in RAM as a linked list at -the time the mainloop starts. As yet there is no support for actually doing -this. Both the new stack and expression cells are written to the same heap -intermixed. The stack and expression pointers never decrease, the whole -history of the computation is recorded in RAM. If the computation of the -expression overruns the end of RAM (or 16-bits whichever comes first) the -machine crashes. - -At the moment, functions are recognized by setting high bit, but I don't -think I remembered to set the bits during compilation, so it won't work -at all right now. Er... Boo. Anyhow, the whole thing is very crude and -not at all what I am hoping eventually to build. - -But it's a start, and I feel good about emitting machine code (even if the -program doesn't do anything useful yet.) +Mark II */ :- use_module(library(assoc)). :- use_module(library(clpfd)). - -do :- Program = [ - ヲ,∅,⟴,ヵ,メ,ョ, - [グ,ケ,ゲ,ド,ゴ,サ],ヮ(cons), - [ザ,シ],ヮ(dup), - [グ,ス,[],[ジ,ス,[ズ,セ,ス,[ゼ,ソ],[タ,ゾ],ヰ,ヂ],ヱ],ヰ,チ],ヮ(i), - [ヶ,ペ],ワ(new), - [ナ,ズ,セ,ネ,ヒ,ド,ャ,ペ],ワ(swap), - [new,cons],≡(unit), - [dup,i],≡(x), - [swap,cons],≡(swons) - ], -compile_program(Program, Binary), -write_binary('joy_asm.bin', Binary). +% Just do it in assembler. -compile_program(Program, Binary) :- - phrase((init, ⦾(Program, IR)), [], [Context]), - phrase(⟐(IR), ASM), +program([ % Mainloop. + + do_offset(Over), % Oberon bootloader writes MemLim to RAM[12] and + allocate(_, 16), % stackOrg to RAM[24], we don't need these + label(Over), % but they must not be allowed to corrupt our code. + + mov_imm(0, 0), % zero out the root cell. + store_word(0, 0, 0), + + mov_imm(SP, 0x1000), + mov_imm(EXPR_addr, 0x500), + mov_imm(TOS, 0), + mov_imm(TERM, 0), + store_word(TOS, SP, 0), % RAM[SP] := 0 + + label(Main), + + % if_zero(EXPR_addr, HALT), + sub_imm(EXPR_addr, EXPR_addr, 0), + eq_offset(HALT), + + % deref(EXPR_addr, EXPR), + load_word(EXPR, EXPR_addr, 0), % Load expr pair record into EXPR + + % At this point EXPR holds the record word of the expression. + + ror_imm(TermAddr, EXPR, -15), % put the offset in TermAddr + % No need to mask off high bits as the type tag for pairs is 00 + + add(TermAddr, TermAddr, EXPR_addr), + + % TermAddr has the address of the term record. + + load_word(TERM, TermAddr, 0), % Bring the record in from RAM. + + % Now Term has the term's record data and TermAddr has the address of the term. + + and_imm(TEMP0, EXPR, 0x7fff), % get the offset of the tail of the expr + eq_offset(Foo), % if the offset is zero don't add the adress. it's empty list. + add(TEMP0, TEMP0, EXPR_addr), % Add the address to the offset. + label(Foo), + mov(EXPR_addr, TEMP0), + + % EXPR_addr now holds the address of the next cell of the expression list. + + % if_literal(TERM, PUSH), + ror_imm(TEMP0, TERM, -30), % get just the two tag bits. + sub_imm(TEMP0, TEMP0, 2), % if this is a symbol result is zero. + ne_offset(PUSH), + + % if it is a symbol the rest of it is the pointer to the machine code. + % lookup(TERM), % Jump to command. + mov_imm_with_shift(TEMP0, 0x3fff), % TEMP0 = 0x3fffffff + ior_imm(TEMP0, TEMP0, 0xffff), + and(TEMP0, TEMP0, TERM), + eq(TEMP0), % double check that this works with pointer in reg... + + % going into push we have the term + label(PUSH), + % push2(TOS, TEMP1, SP), % stack = TERM, stack + + sub_imm(SP, SP, 4), % SP -= 1 (word, not byte) + % SP points to the future home of the new stack cell. + sub(TOS, TermAddr, SP), % TOS := &temp - sp + % TOS has the offset from new stack cell to term cell. + % Combine with the offset to the previous stack cell. + lsl_imm(TOS, TOS, 15), % TOS := TOS << 15 + ior(TOS, TOS, 4), % TOS := TOS | 4 + + % label(DONE), + store_word(TOS, SP, 0), % RAM[SP] := TOS + do_offset(Main), + label(HALT), + do_offset(HALT) + +]) :- [SP, EXPR_addr, TOS, TERM, EXPR, TermAddr, TEMP0]=[0, 1, 2, 3, 4, 5, 6]. + + + +do :- program(Program), + compile_program(Program, Binary), + write_binary('joy_asmii.bin', Binary). + + +compile_program(ASM, Binary) :- phrase(linker(ASM), EnumeratedASM), - % writeln(EnumeratedASM), - foo(Context), phrase(asm(EnumeratedASM), Binary). -foo(Context) :- - get_assoc(dictionary, Context, D), - assoc_to_list(D, Dictionary), - portray_clause(Dictionary). - - -/* - -This first stage ⦾//2 converts the Joy description into a kind of intermediate -representation that models the Joy interpreter on top of the machine but doesn't -actually use assembly instructions. It also manages the named registers and -memory locations so thet don't appear in the program. - -The idea here is to extract the low-level "primitives" needed to define the Joy -interpreter to make it easier to think about (and maybe eventually retarget other -CPUs.) - - */ - -⦾([], []) --> []. - -⦾([ヲ|Terms], Ts) --> % Preamble. - % Initialize context/state/symbol table. - set(dict_ptr, 11), % Reg 11 is a pointer used during func lookup. - set(dict_top, 12), % Reg 12 points to top of dictionary. - set(dict, 0), % Address of top of dict during compilation. - set(done, _DONE), % DONE label (logic variable.) - set(expr, 4), % Reg 4 points to expression. - set(halt, _HALT), % HALT label (logic variable.) - set(main, _MAIN), % MAIN label (logic variable.) - set(reset, _Reset), % Reset label (logic variable.) - set(sp, 2), % Reg 2 points to just under top of stack. - set(temp0, 6), % Reg 6 is a temp var. - set(temp1, 7), % Reg 7 is a temp var. - set(temp2, 8), % Reg 8 is a temp var. - set(temp3, 9), % Reg 9 is a temp var. - set(term, 5), % Reg 4 holds current term. - set(tos, 3), % Reg 3 holds Top of Stack. - ⦾(Terms, Ts). - -⦾([ヵ|Terms], [ % Initialization. - jump(Over), % Oberon bootloader writes MemLim to RAM[12] and - asm(allocate(_, 16)), % stackOrg to RAM[24], we don't need these - label(Over), % but they must not be allowed to corrupt our code. - set_reg_const(0, 0), % zero out the root cell. - write_ram(0, 0), - set_reg_const(SP, 0x1000), - set_reg_const(EXPR, 0x500), - set_reg_label(DICT_TOP, LastWord), - set_reg_const(TOS, 0), - set_reg_const(TERM, 0), - asm(store_word(TOS, SP, 0)) % RAM[SP] := 0 - |Ts]) --> - get([dict_top, DICT_TOP, expr, EXPR, sp, SP, term, TERM, tos, TOS]), - ⦾(Terms, Ts), get(dict, LastWord). - -⦾([メ|Terms], [ % Mainloop. - label(MAIN), - if_zero(EXPR, HALT), - deref(EXPR, TEMP0), - % At this point EXPR holds the record word of the expression and TEMP0 - % has a copy of the address of the record. - split_pair(TERM, TEMP1, EXPR, TEMP0), - % Now Term has the term's record data and temp1 has the address of the term. - % temp0 still has the address of the expression record. - if_literal(TERM, PUSH), - % if it is a symbol the rest of it is the pointer to the machine code. - lookup(TERM), % Jump to command. - % going in to push we have the term - label(PUSH), push2(TOS, TEMP1, SP), % stack = TERM, stack - label(DONE), write_ram(SP, TOS), % RAM[SP] := TOS - jump(MAIN) - |Ts]) --> - get([dict_ptr, DICT_PTR, dict_top, DICT_TOP, done, DONE, expr, EXPR, - halt, HALT, main, MAIN, sp, SP, term, TERM, tos, TOS, - temp0, TEMP0, temp1, TEMP1]), - ⦾(Terms, Ts). - -⦾([Body, ≡(NameAtom)|Terms], [defi(Name, B, Prev, I, SP, TOS)|Ts]) --> - get(dict, Prev), set(dict, Name), get([sp, SP, tos, TOS]), - inscribe(NameAtom, Name), ⦾(Terms, Ts), lookup(i, I), lookup(Body, B). - -⦾([Body, ヮ(NameAtom)|Terms], [definition(Name, DONE, B, Prev)|Ts]) --> - get(dict, Prev), set(dict, Name), inscribe(NameAtom, Name), - get(done, DONE), ⦾([Body, Terms], [B, Ts]). - -⦾([Body, ワ(NameAtom)|Terms], [definition(Name, MAIN, B, Prev)|Ts]) --> - get(dict, Prev), set(dict, Name), inscribe(NameAtom, Name), - get(main, MAIN), ⦾([Body, Terms], [B, Ts]). - -⦾([P, T, E, ヰ|Terms], [br(Predicate, Then, Else)|Ts]) --> - ⦾([P, T, E, Terms], [Predicate, Then, Else, Ts]). - -⦾([P, B, ヱ|Terms], [repeat_until(Predicate, Body)|Ts]) --> - ⦾([P, B, Terms], [Predicate, Body, Ts]). - -⦾([Term|Terms], [T|Ts]) --> ⦾(Term, T), ⦾(Terms, Ts). - -⦾(∅, dw(0)) --> []. -⦾(⟴, label(Reset)) --> get(reset, Reset). -⦾(ョ, halt(HALT)) --> get(halt, HALT). -⦾(グ, pop(TEMP0, TOS)) --> get(temp0, TEMP0), get(tos, TOS). -⦾(シ, push(TOS, TOS, SP)) --> get(tos, TOS), get(sp, SP). -⦾(ケ, high_half(TEMP1, TOS)) --> get(temp1, TEMP1), get(tos, TOS). -⦾(サ, merge(SP, TOS)) --> get(tos, TOS), get(sp, SP). -⦾(ザ, swap_halves(TOS)) --> get(tos, TOS). -⦾(ズ, deref(TEMP0)) --> get(temp0, TEMP0). -⦾(ス, if_zero(TEMP0)) --> get(temp0, TEMP0). -⦾(ソ, asm(mov(EXPR, TEMP3))) --> get(expr, EXPR), get(temp3, TEMP3). -⦾(ャ, asm(ior(TOS, TEMP1, SP))) --> get(tos, TOS), get(temp1, TEMP1), get(sp, SP). -⦾(タ, add_const(TEMP2, SP, 8)) --> get(temp2, TEMP2), get(sp, SP). -⦾(ジ, add_const(TEMP3, SP, 4)) --> get(temp3, TEMP3), get(sp, SP). -⦾(チ, add_const(SP, SP, 4)) --> get(sp, SP). -⦾(セ, chop_word(TEMP1, TEMP0)) --> get(temp0, TEMP0), get(temp1, TEMP1). -⦾(ト, chop_word(TEMP0, TOS)) --> get(temp0, TEMP0), get(tos, TOS). -⦾(ネ, chop_word(TEMP2, TOS)) --> get(temp2, TEMP2), get(tos, TOS). -⦾(ゼ, or_inplace(TEMP1, EXPR)) --> get(expr, EXPR), get(temp1, TEMP1). -⦾(ゲ, or_inplace(TEMP0, TEMP1)) --> get(temp0, TEMP0), get(temp1, TEMP1). -⦾(ヒ, or_inplace(TEMP0, TEMP2)) --> get(temp0, TEMP0), get(temp2, TEMP2). -⦾(ゾ, or_inplace(TEMP1, TEMP2)) --> get(temp1, TEMP1), get(temp2, TEMP2). -⦾(ド, write_cell(TEMP0, SP)) --> get(temp0, TEMP0), get(sp, SP). -⦾(ヂ, write_cell(TEMP1, SP)) --> get(temp1, TEMP1), get(sp, SP). -⦾(ペ, write_cell(TOS, SP)) --> get(tos, TOS), get(sp, SP). -⦾(ゴ, low_half(TOS)) --> get(tos, TOS). -⦾(ナ, low_half(TEMP0, TOS)) --> get(temp0, TEMP0), get(tos, TOS). -⦾(ヶ, low_half(TOS, SP)) --> get(sp, SP), get(tos, TOS). - - -/* - -Context (state) manipulation for the ⦾//2 relation. - -Association lists are used to keep a kind of symbol table as well as a dictionary -of name atoms to address logic variables for defined Joy functions. - -*/ - -init, [Context] --> - {empty_assoc(C), empty_assoc(Dictionary), - put_assoc(dictionary, C, Dictionary, Context)}. - -get([]) --> !. -get([Key, Value|Ts]) --> !, get(Key, Value), get(Ts). - -get(Key, Value) --> state(Context), {get_assoc(Key, Context, Value)}. -set(Key, Value) --> state(ContextIn, ContextOut), - {put_assoc(Key, ContextIn, Value, ContextOut)}. - -inscribe(NameAtom, Label) --> state(ContextIn, ContextOut), - {get_assoc(dictionary, ContextIn, Din), - put_assoc(NameAtom, Din, Label, Dout), - put_assoc(dictionary, ContextIn, Dout, ContextOut)}. - -lookup([], []) --> !. -lookup([T|Ts], [V|Vs]) --> !, lookup(T, V), lookup(Ts, Vs). -lookup(NameAtom, Label) --> state(Context), - {get_assoc(dictionary, Context, D), get_assoc(NameAtom, D, Label)}. - -state(S), [S] --> [S]. -state(S0, S), [S] --> [S0]. - - -/* - -This second stage ⟐//1 converts the intermediate representation to assembly -language. - -*/ - -⟐([]) --> []. -⟐([Term|Terms]) --> ⟐(Term), ⟐(Terms). - -⟐(if_literal(Reg, Label)) --> - [ior_imm(0, Reg, -30), % get just the two tag bits. - sub_imm(0, 0, 2), % subtract 2 to check if result is zero. - ne_offset(Label)]. - -% if reg = 0 jump to label. -⟐(if_zero(Reg, Label)) --> [sub_imm(Reg, Reg, 0), eq_offset(Label)]. - -⟐(set_reg_const(Reg, Immediate)) --> {Immediate >= -(2^15), Immediate < 2^16}, !, - [mov_imm(Reg, Immediate)]. - -⟐(set_reg_const(Reg, Immediate)) --> {Immediate >= 0, Immediate < 2^33}, !, % FIXME: handle negative numbers. - {high_half_word(Immediate, HighHalf), low_half_word(Immediate, LowHalf)}, - [ mov_imm_with_shift(Reg, HighHalf), ior_imm(Reg, Reg, LowHalf)]. - -⟐(set_reg_label(Reg, Label)) --> [mov_imm(Reg, Label)]. - -⟐( noop) --> [mov(0, 0)]. -⟐( halt(Halt)) --> [label(Halt), do_offset(Halt)]. -⟐( asm(ASM)) --> [ASM]. -⟐(label(Label)) --> [label(Label)]. -⟐( jump(Label)) --> [do_offset(Label)]. -⟐( dw(Int)) --> [word(Int)]. - -⟐( low_half(Reg)) --> [and_imm(Reg, Reg, 0xffff)]. -⟐( low_half(To, From)) --> [and_imm(To, From, 0xffff)]. -⟐( high_half(Reg)) --> [mov_imm_with_shift(0, 0xffff), and(Reg, Reg, 0)]. -⟐(high_half(To, From)) --> [mov_imm_with_shift(0, 0xffff), and(To, From, 0)]. - -⟐(swap_halves(Register)) --> [ror_imm(Register, Register, 16)]. -⟐(swap_halves(To, From)) --> [ror_imm( To, From, 16)]. - -⟐(high_half_to(To, From)) --> ⟐([swap_halves(To, From), low_half(To)]). - -⟐(split_word(To, From)) --> ⟐([high_half_to(To, From), low_half(From)]). - -⟐(chop_word(To, From)) --> ⟐([high_half(To, From), low_half(From)]). - -⟐(merge(SP, TOS)) --> - [lsl_imm(0, SP, 16), - ior(TOS, TOS, 0), - add_imm(SP, SP, 4)]. - -⟐(push(TOS, TERM, SP)) --> - [lsl_imm(TOS, TERM, 16), % TOS := TERM << 16 - ior(TOS, TOS, SP), % TOS := TOS | SP - add_imm(SP, SP, 4)]. % SP += 1 (word, not byte) - -⟐(push2(TOS, TERMADDR, SP)) --> - [sub_imm(SP, SP, 4), % SP -= 1 (word, not byte) - sub(TOS, TERMADDR, SP), % TOS := &temp - sp - lsl_imm(TOS, TOS, 15), % TOS := TOS << 15 - ior(TOS, TOS, 4)]. % TOS := TOS | 4 - -⟐( write_ram(To, From)) --> [store_word(From, To, 0)]. -⟐(write_cell(From, SP)) --> [add_imm(SP, SP, 4), store_word(From, SP, 0)]. - -⟐(deref(Reg)) --> [load_word(Reg, Reg, 0)]. - -⟐(deref(Reg, Temp)) --> - [mov(Temp, Reg), % Save the address for adding it to offsets later. - load_word(Reg, Reg, 0)]. - -⟐(or_inplace(To, From)) --> [ior(To, To, From)]. - -⟐(definition(Label, Exit, Body, Prev)) --> - ⟐([ - dw(Prev), - label(Label), - Body, - jump(Exit) - ]). - -⟐(defi(Label, Body, Prev, I, SP, TOS)) --> - ⟐([dw(Prev), - label(Label), - defi_def(BodyLabel, SP, TOS), - jump(I)]), - dexpr(Body, BodyLabel). - -⟐(defi_def(Label, SP, TOS)) --> - [mov_imm_with_shift(TOS, Label), - ior(TOS, TOS, SP)], - ⟐(write_cell(TOS, SP)). - -⟐(lookup(TERM)) --> - [mov_imm_with_shift(0, 0x3fff), - ior_imm(0, 0, 0xffff), - and(0, 0, TERM), - eq(0)]. % Jump to term's machine code. - -⟐(repeat_until(Condition, Body)) --> - {add_label(Condition, End, ConditionL)}, - ⟐([ - label(Loop), - Body, - ConditionL, - jump(Loop), - label(End) - ]). - -⟐(br(Condition, [], Else)) --> !, - {add_label(Condition, END, ConditionL)}, - ⟐([ConditionL, Else, label(END)]). - -⟐(br(Condition, Then, Else)) --> - {add_label(Condition, THEN, ConditionL)}, - ⟐([ - ConditionL, Else, jump(END), - label(THEN), Then, label(END) - ]). - -⟐(add_const(To, From, Immediate)) --> [add_imm(To, From, Immediate)]. - -⟐(pop(Reg, TOS)) --> ⟐([split_word(Reg, TOS), deref(TOS)]). - -% From is a register containing a pair record -% FromAddr is a register containing the address of the record in From -% after, -% To is a register that will contain the record from the head -% ToAddr holds the address of the record in To. -% From is a register containing a pair record -% FromAddr is a register containing the address of the record in From -⟐(split_pair(To, ToAddr, From, FromAddr)) --> - [ior_imm(ToAddr, From, -15), % roll right 15 bits - % No need to mask off high bits as the type tag for pairs is 00 - add(ToAddr, ToAddr, FromAddr), - load_word(To, ToAddr, 0), % Bring the record in from RAM. - and_imm(From, From, 0x7fff), % Mask off lower 15 bits. - add(From, From, FromAddr) % Add the address to the offset. - ]. - - -/* - -Support for ⟐//1 second stage. - -The dexpr//2 DCG establishes a sequence of labeled expr_cell/2 pseudo-assembly -memory locations as a linked list that encodes a Prolog list of Joy function -labels comprising e.g. the body of some Joy definition. - -*/ - -dexpr([], 0) --> []. -dexpr([Func|Rest], ThisCell) --> - [label(ThisCell), expr_cell(Func, NextCell)], - dexpr(Rest, NextCell). - -/* - -The add_label/3 relation is a meta-logical construct that accepts a comparision -predicate (e.g. if_zero/2) and "patches" it by adding the Label logic variable -to the end. - -*/ - -add_label(CmpIn, Label, CmpOut) :- - CmpIn =.. F, - append(F, [Label], G), - CmpOut =.. G. - -/* - -Two simple masking predicates. - -*/ - -high_half_word(I, HighHalf) :- HighHalf is I >> 16 /\ 0xFFFF. -low_half_word( I, LowHalf) :- LowHalf is I /\ 0xFFFF. /*