;*** pgmmngr.h
; Program manager (not a real "part", just a set of routines... )


;** startpgm
;* IN HL: Pointer to start of program.
;* OUT A: Garbage.
;* OUT BC: Garbage.
;* OUT DE: Garbage.
;* OUT HL: $FFFF on failure.
;* OUT IX: Garbage.
;* Starts a program after which execution is immidiatly continued.

startpgm: ; HL is pointer to start of program.

    LD A, (mydir)
    LD B, A
    LD A, (myscrn)
    LD C, A
    LD DE, startpgm_empty

;** startpgmex
;* IN B: Flash page that will be in use by the program.
;* IN C: Virtual screen to run on.
;* IN DE: Pointer to name of the program.
;* IN HL: Pointer to start of program.
;* OUT A: Garbage.
;* OUT BC: Garbage.
;* OUT DE: Garbage.
;* OUT HL: $FFFF on failure.
;* OUT IX: Garbage.
;* Extended version of startpgm.

startpgmex:
    ; Accepts HL as pointer to program.
    ; Accepts B as current directory.
    ; Accepts C as screen.
    ; Accepts DE as program name.
    ; Stores IN (6) as flash page of the new process.

    ; First newproc, then loadpgm.

    ; This is how it works:
    ; * Allocate memory
    ; * Put inside that memory the following program.
    ;
    ; LD A, [PID of caller]
    ; LD HL, [ptr to start of program]
    ; CALL startpgm_return.
    ;
    ; * newproc to that memory.
    ; * exit.
    ;
    ; * free the memory previously allocated (PID of caller is needed).
    ; * jump to loadpgm.

    PUSH HL
    PUSH DE
    PUSH BC
    LD HL, 18
    CALL pgmalloc
    LD A, H
    OR L
    JR Z, startpgm_failure
    PUSH HL
    POP IX
    POP BC
    POP DE
    POP HL
    LD (IX+0), %00111110       ; LD A,
    IN A, (6)
    LD (IX+1), A
    LD (IX+2), %11010011       ; OUT (n), A
    LD (IX+3), 6
    LD (IX+4), %00111110       ; LD A,
    LD A, (curproc)
    LD (IX+5), A
    LD (IX+6), %00010001
    LD (IX+7), E
    LD (IX+8), D
    LD (IX+9), %00100001          ; LD HL,
    LD (IX+10), L
    LD (IX+11), H
    LD (IX+12), %00000110      ; LD B,
    LD (IX+13), B
    LD (IX+14), %00001110      ; LD C,
    LD (IX+15), C
    LD (IX+16), %11001101      ; CALL
    LD DE, startpgm_return
    LD (IX+17), E
    LD (IX+18), D
    PUSH IX
    PUSH IX
    POP HL
    CALL newproc
    LD A, H
    OR L
    JR Z, startpgm_failure_free
    POP HL
    LD B, $ff
    CALL sleep      ; Give the proces 1.7 secs time to start
    LD HL, $0000
    RET

startpgm_failure_pop:
    POP HL
    POP HL
    POP HL
    JR startpgm_failure
startpgm_failure_free:
    POP HL
    CALL free
startpgm_failure:
    LD HL, $ffff
    RET

startpgm_return:  ; HL = start of program, A = pid of caller, B = directory, C = screen, DE = pointer to name.
    PUSH AF
    LD A, B
    LD (mydir), A
    LD A, C
    LD (myscrn), A
    PUSH HL
    EX DE, HL
    LD A, (curproc)
    CALL setprocname
    POP HL
    POP AF
    EX DE, HL
    POP HL
    PUSH DE
    PUSH AF
    LD A, (curproc)
    LD B, A
    POP AF
    PUSH BC
    DI            ; We don't want any multitasking while we are not ourself...
    LD (curproc), A
    CALL free     ; Clean up ourself
    LD A, (curproc)
    LD B, A
    POP AF
    LD (curproc), A
    LD A, B
    CALL wakeup
    EI
    POP HL
    JP loadpgm

startpgm_empty:
.db 0

;** loadpgm
;* IN HL: Pointer to start of program.
;* OUT A: Depending on the program.
;* OUT BC: Depending on the program.
;* OUT DE: Depending on the program.
;* OUT HL: Depending on the program, $FFFF <i>can</i> indicate loading was unsuccessfull.
;* OUT IX: Depending on the program.
;* OUT IY: Depending on the program.
;* Load and starts a program, waits until it's finished and continues executing.
;* <b>NOTE</b>: if the program loaded calls <code>endproc</code>, then the programm that was calling <code>loadpgm</code> will also die.

loadpgm:	; Load a program.
    ; HL = program start address.
    ; If you ment to, you should newproc() BEFORE this function !!!!

    LD C, (HL)
    INC HL
    LD B, (HL)
    INC HL
    PUSH BC     ; BC contains the start of the data in the source (*"$4000")
    POP IX      ; BC -> IX
    ;LD E, (HL)
    INC HL
    ;LD D, (HL)  ; Now DE - $4000 - 4 contains the length of the program.

    PUSH HL
    LD HL, $0000 - $4004
    ADD HL, BC            ; We now want to copy the program only.
    POP DE
    INC DE

    ; DE = start adress of program (minus header)
    ; HL = Program length.

    PUSH HL     ; Length
    PUSH DE     ; Start address (+4)
    CALL pgmalloc
    LD A, H
    OR L
    JP Z, loadpgm_failure

    POP DE ; Start adress
    EX DE, HL
    POP BC ; Length

    PUSH DE ; Start address in memory (of program)
    PUSH HL ; Start address of program in source (+4)
    PUSH BC ; Length

    ; Start the transfer :p

    LDIR

    ; This is an important part, we need to be sure that all JP and CALL are redirected to
    ; Their normal places. Since the program doesn't know were it gets loaded at assemble time.

    POP BC  ; Length
    POP DE
    POP HL  ; Start of program (in memory)
    PUSH HL
    PUSH DE ; Restore stack.

    PUSH HL
    POP IY

    LD DE, $0000 - $4004
    ADD HL, DE    ; Difference between .ORGed CALL and real CALL address.
    PUSH HL

    XOR A
    OR B
    JR NZ, loadpgm_preloop
    OR C
    CP 3
    JP C, loadpgm_cont

loadpgm_preloop:

    DEC BC
    DEC BC  ; Since we are looking for 3 byte patterns, dec BC by 2.

loadpgm_loop:
    LD A, (IY+0)
    CP $CD  ;%11001101
    JR Z, loadpgm_checkmsb
    CP $C3  ;%11000011
    JR Z, loadpgm_checkmsb
    AND $C7 ;%11000111
    CP $C4  ;%11[cc]100
    JR Z, loadpgm_checkmsb
    CP $C2  ;11[cc]010
    JR Z, loadpgm_checkmsb
loadpgm_resume:
    CALL advance_IY
    LD A, B
    OR C
    JR Z, loadpgm_cont
    JR loadpgm_loop
loadpgm_checkmsb:
    LD A, (IY+2)
    AND $C0       ; A bit loose checking, but what would a program call inside $40 - $7f, if it wasn't itself?
    CP $40
    JR NZ, loadpgm_resume

    LD E, (IY+1)
    LD D, (IY+2)
    POP HL
    PUSH HL
    ADD HL, DE
    LD (IY+1), L
    LD (IY+2), H

    JR loadpgm_resume

loadpgm_cont:    ; Copy the program data.

    POP HL  ; Get rid of the data used inside the above loop.

    POP HL  ; Start address of actual program ("$4004")
            ; IX still hold the place where the data begins (.orged at $4000)

    DEC HL
    LD D, (HL)
    DEC HL
    LD E, (HL)
    PUSH HL   ; Pointer to "$4002".
    PUSH IX   ; Start address of data (*"$4000"), DE = end of program (*"$4002") (both .orged at $4000)
    POP HL    ; IX -> HL
    EX DE, HL
    SCF
    CCF
    SBC HL, DE  ; HL holds the length of data.
                ; Now we have that we better malloc directly.

    ; We should now check if HL == 0, in that case skip the whole data part.
    LD A, H
    OR L
    JP NZ, startpgm_handlepgmdata
    LD IY, $0000
    POP HL
    JP loadpgm_startup


startpgm_handlepgmdata:
    PUSH HL
    CALL malloc
    LD A, H
    OR L
    JP Z, loadpgm_failure_free

    PUSH HL
    POP IY     ; put it into IY for later on, start of data (program expects it).


    POP BC    ; Amount of data that needs transferred.

    ; start of data = IX - $4004 + Start of program (in source) = IX - $4002 + (POP)
    ; DE is not in use.
    ; HL is backed up in IY

    PUSH IX   ; The start of the data in source (*"$4000")
    POP HL    ; IX -> HL
    LD DE, $0000 - $4002
    ADD HL, DE  ; HL = Offset of data from the begin of the whole program - 2
                ; (why? because POP is a pointer to "$4002")
    POP DE
    PUSH DE
    ADD HL, DE  ; HL = Start of data in source (source).
    PUSH IY     ; Start of data in memory
    POP DE      ; IY -> DE (destination)

    LDIR    ; Transfer data

    ; TODO: parse data memory locations.
    ; Mind that we need to keep IY intact, and also restore the stack.

    ; Ok what we need:

    ; * The "adder", the value we need to add to every pointer in the LD statements (IY - [the .orged data-label]) -> HL.
    ; * A Byte Counter, the length of the program minus the length of the op-codes -> BC
    ; * A pointer to the start of the program -> IX

    ; * A outer loop (that loops trought to program bytes).
    ; * Two inner loops (that loop trough the op-codes).

    POP HL
    DEC HL
    LD D, (HL)
    DEC HL
    LD E, (HL)    ; DE contains *"$4000" (start of the data, .orged at $4000)

    PUSH DE

    EX DE, HL
    LD DE, $4004  ; Start of the program.
    SCF
    CCF
    SBC HL, DE
    LD B, H       ; BC is now the byte counter
    LD C, L

    POP DE        ; DE contains *"$4000" (start of the data, .orged at $4000), again.

    PUSH IY
    POP HL        ; HL contain the start of the data block in the memory.

    SCF
    CCF
    SBC HL, DE    ; HL contains the "adder", the value we need to add to data pointers.

    POP IX
    PUSH IX       ; IX points to the start of the program.

    PUSH IY       ; For safe keeping ;-)


    XOR A         ; It's a bit out of the point, but still, better safe than sorry.
    OR B
    JR NZ, loadpgm_data_preloop
    OR C
    CP 4
    JR NC, loadpgm_data_preloop
    CP 3
    JR C, loadpgm_end

loadpgm_data_preloop:
    DEC BC
    DEC BC

loadpgm_data_loop:    ; Outer loop
    XOR A
    OR B
    JR NZ, loadpgm_data_preloop4
    OR C
    JR Z, loadpgm_end
    CP 2
    JR C, loadpgm_data_preloop3

loadpgm_data_preloop4:
    LD A, (IX+3)
    AND $C0
    CP $40
    JR NZ, loadpgm_data_preloop3      ; Saves time, we don't need to test all the opcodes, if we don't have a valid msb.
    LD IY, loadpgm_imm16_opcodes_2
loadpgm_data_loop4:   ; 4 byte opcode inner loop.
    LD A, (IY+0)
    CP (IX+0)
    JR NZ, loadpgm_data_postloop4
    LD A, (IY+1)
    CP (IX+1)
    JR NZ, loadpgm_data_postloop4

    ; Ok, we have a valid opcode-sequence, add the "adder":

    PUSH HL
    LD D, (IX+3)
    LD E, (IX+2)
    ADD HL, DE
    LD (IX+3), H
    LD (IX+2), L
    POP HL
    JR loadpgm_data_advance    ; We don't need to check for the other opcodes.

loadpgm_data_postloop4:
    INC IY
    INC IY
    LD A, (IY+0)
    OR A
    JR NZ, loadpgm_data_loop4


loadpgm_data_preloop3:   ; 3 byte opcode inner loop.
    LD A, (IX+2)
    AND $C0
    CP $40
    JR NZ, loadpgm_data_advance      ; Saves time, we don't need to test all the opcodes, if we don't have a valid msb.
    LD IY, loadpgm_imm16_opcodes_1

loadpgm_data_loop3:   ; 3 byte opcode inner loop.
    LD A, (IY+0)
    CP (IX+0)
    JR NZ, loadpgm_data_postloop3

    ; Ok, we have a valid opcode-sequence, add the "adder":

    PUSH HL
    LD D, (IX+2)
    LD E, (IX+1)
    ADD HL, DE
    LD (IX+2), H
    LD (IX+1), L
    POP HL
    JR loadpgm_data_advance    ; We don't need to check for the other opcodes.

loadpgm_data_postloop3:
    INC IY
    LD A, (IY+0)
    OR A
    JR NZ, loadpgm_data_loop3

loadpgm_data_advance:
    CALL advance_IX
    JR loadpgm_data_loop

loadpgm_end:

    POP IY  ; Program expects it.

    ; Were already a new process (because otherwise we hadn't malloc'ed for the right pid), so just start up...
    ; Start up, we don't expect to return.
    ; We have still the right value being pushed in the stack, so just RET

loadpgm_startup:
    ; The next bit is for the times a process called loadpgm without startpgm (and thus waits for it to come down).
    ; We don't want a memory leek, and otherwise the location where the program code resists will never be freed.
    POP DE
    LD HL, loadpgm_freecode
    PUSH DE
    PUSH HL
    PUSH DE
    RET

loadpgm_freecode:
    POP HL
    JP free

loadpgm_failure_free:
    POP HL
    POP HL
    JP free ; free and exit.

loadpgm_failure:
    POP HL
    POP HL
    LD HL, $FFFF
    RET

advance_IY:
    PUSH IX
    PUSH IY
    POP IX
    CALL advance_IX
    PUSH IX
    POP IY
    POP IX
    RET

advance_IX:
    PUSH AF
    PUSH IY

    ; First step: 1 byte patterns:
    LD IY, opcode1

advance_IX_loop1:
    LD A, (IY+0)
    OR (IY+1)
    JR Z, advance_IX_op2
    LD A, (IX+0)
    AND (IY+1)
    CP (IY+0)
    JP Z, advance_IX_1
    INC IY
    INC IY
    JR advance_IX_loop1

advance_IX_op2:

    ; Second step: 2 bytes....
    LD IY, opcode2

advance_IX_loop2:
    LD A, (IY+0)
    OR (IY+1)
    OR (IY+2)
    OR (IY+3)
    JR Z, advance_IX_op3
    LD A, (IX+0)
    AND (IY+2)
    CP (IY+0)
    JR NZ, advance_IX_op2_next
    LD A, (IX+1)
    AND (IY+3)
    CP (IY+1)
    JP Z, advance_IX_2
advance_IX_op2_next:
    INC IY
    INC IY
    INC IY
    INC IY
    JR advance_IX_loop2

advance_IX_op3:

    ; 3 bytes....
    LD IY, opcode3

advance_IX_loop3:
    LD A, (IY+0)
    OR (IY+1)
    OR (IY+2)
    OR (IY+3)
    OR (IY+4)
    OR (IY+5)
    JR Z, advance_IX_op4
    LD A, (IX+0)
    AND (IY+3)
    CP (IY+0)
    JR NZ, advance_IX_op3_next
    LD A, (IX+1)
    AND (IY+4)
    CP (IY+1)
    JR NZ, advance_IX_op3_next
    LD A, (IX+2)
    AND (IY+5)
    CP (IY+2)
    JR Z, advance_IX_3
advance_IX_op3_next:
    INC IY
    INC IY
    INC IY
    INC IY
    INC IY
    INC IY
    JR advance_IX_loop3

advance_IX_op4:
    ; Last try, 4 bytes....
    LD IY, opcode4

advance_IX_loop4:
    LD A, (IY+0)
    OR (IY+1)
    OR (IY+2)
    OR (IY+3)
    OR (IY+4)
    OR (IY+5)
    OR (IY+6)
    OR (IY+7)
    JR Z, advance_IX_1    ; Best we can do: scroll one byte.
    LD A, (IX+0)
    AND (IY+4)
    CP (IY+0)
    JR NZ, advance_IX_op4_next
    LD A, (IX+1)
    AND (IY+5)
    CP (IY+1)
    JR NZ, advance_IX_op4_next
    LD A, (IX+2)
    AND (IY+6)
    CP (IY+2)
    JR NZ, advance_IX_op4_next
    LD A, (IX+3)
    AND (IY+7)
    CP (IY+3)
    JR Z, advance_IX_4
advance_IX_op4_next:
    INC IY
    INC IY
    INC IY
    INC IY
    INC IY
    INC IY
    INC IY
    INC IY
    JR advance_IX_loop4

advance_IX_4:
    INC IX
    DEC BC
advance_IX_3:
    INC IX
    DEC BC
advance_IX_2:
    INC IX
    DEC BC
advance_IX_1:
    INC IX
    DEC BC
    LD A, B
    CP $FF
    JR NZ, advance_IX_exit
    LD BC, $0000
advance_IX_exit:
    POP IY
    POP AF
    RET


