;===================================================================
; Very Fast 8-bit Multiplication Library
; written by Kirk Meyer
;
; You may use this library in your programs in unmodified form as
; long as you give me credit somewhere in your documentation. If
; you must modify the code, please ask first. The multiplication
; routine is extremely optimized and an innocent-looking change
; might cause mass havoc on the functionality of the routine. To use
; this library, simply copy it to the directory where you put your
; source code and then #include it in the source file. Also, as I
; mention later, you must give the term "MultiplyTable" a value.
;
; If you would rather, you can statically include the multiplication
; table. It would take 256 bytes, and is just a multiplication table
; 16x16. (0,1,..,15,16; 0,2,..,30,32; 0,3,..,45,48; etc.)
;
; Routines included in library:
; Multiply, MultiplyInitTable
;===================================================================
;Very Fast 8-bit Multiplication Routine
;
;Inputs:
; MultiplyInitTable must have been called previous to this routine.
; D = number to multiply
; E = number to multiply
;
;Outputs:
; HL = result of unsigned multiplication, D * E
;
;Destroys:
; AF, BC, HL
;
;Impacts:
; 48 bytes code
; 207 cycles
;
;Notes:
; The number of cycles does not take into account cycles incurred
; while calling and returning from the routine. If you intend to
; use this function as a call and not inline, add 27 to the number
; of cycles. For the best performance, cut and paste this function
; inline in your code when you call it only once or twice.
;
;How it works:
; This function uses nibbles and a lookup table to attempt to make
; multiplication very fast. The input is split up as follows:
; D = (w << 4) + x E = (y << 4) + z
; This is a simple operation for the Z80 to do since it has some
; commands designed to deal with nibbles (4-bit groups). Then, by
; definition, D * E is simply the following:
; D * E = ((w * y) << 8) + ((w * z) + (x * y)) << 4) + (x * z)
; The middle calculation (the << 4 one) is performed first because
; it was easiest that way. Then the outer calculations are performed
; and they are added to the inner calculation result.
Multiply:
ld hl,MultiplyModify1 ;10
ld (hl),e ;7
ld a,d ;4
rrd ;18
ld h,MultiplyTable ;7
ld l,a ;4
ld a,(hl) ;7
ld l,0 ;7
MultiplyModify1 = $ - 1
add a,(hl) ;7
ld hl,MultiplyModify2 ;10
ld (hl),a ;7
sbc a,a ;4
and $10 ;7
rld ;18
ld b,a ;4
ld c,(hl) ;7
ld (hl),e ;7
ld a,d ;4
rld ;18
ld h,MultiplyTable ;7
ld l,a ;4
ld a,(hl) ;7
ld l,0 ;7
MultiplyModify2 = $ - 1
ld l,(hl) ;7
ld h,a ;4
add hl,bc ;11
ret
;===================================================================
;8-bit Multiplication Table Generator
;
;Inputs:
; Make sure that (MultiplyTable << 8) points to a 256-byte block
; of memory that is okay for the routine to use. For example, if
; MultiplyTable equals $90, the routine will put its 256-byte
; table at $9000 through $90FF.
;
;Outputs:
; HL = (MultiplyTable << 8) + 256
; BC = $0010
;
;Destroys:
; AF, BC, HL
; 256 bytes at (MultiplyTable << 8)
;
;Impacts:
; 19 bytes code
; 8045 cycles
;
;Notes:
; The number of cycles does not take into account cycles incurred
; while calling and returning from the routine. If you intend to
; use this function as a call and not inline, add 27 to the number
; of cycles. This routine must be called before you can use the
; included Multiply routine.
;
;How it works:
; This is simply a brute force multiplication routine. It
; successively multiplies 0-15 by 0-15 and stores each result in
; a large table. I did not bother optimizing this since it will be
; run only once.
MultiplyInitTable:
ld h,MultiplyTable ;7
ld l,0 ;7
ld c,l ;4
MultiplyInitTableOutter:
xor a ;4
ld b,16 ;7
MultiplyInitTableLoop:
ld (hl),a ;7
inc hl ;6
add a,c ;4
djnz MultiplyInitTableLoop ;13/8
inc c ;4
ld a,c ;4
cp 16 ;7
jr c,MultiplyInitTableOutter ;12/7
ret