ВходНаше всё Теги codebook 无线电组件 Поиск Опросы Закон Пятница
3 мая
3620 Топик полностью
=mse= (08.01.2004 17:08, просмотров: 3) ответил =mse= на Ищщё раз цитирую whetstone: Used to test compiler optimization and floating point performance.
math_mega.s90 

/* Long or relative jumps and calls */
#if (A90_PROC_OPTION == 0) || (A90_PROC_OPTION == 1)
#define XCALL RCALL
#define XJMP RJMP
#else
#define XCALL CALL
#define XJMP JMP
#endif

;*** FLOAT ***

#define expbias $7F

#define _0 $01
#define _1 $02
#define _2 $04
#define _3 $08
#define _4 $10
#define _5 $20
#define _6 $40
#define _7 $80

#define T0 r0
#define T1 r1
#define T2 r2
#define T3 r3

#define a0 r4
#define a1 r5
#define a2 r6
#define a3 r7
#define a4 r8
#define a5 r9
#define a6 r10
#define a7 r11
#define a8 r12

#define P0 r16
#define P1 r17
#define P2 r18
#define P3 r19
#define Q0 r20
#define Q1 r21
#define Q2 r22
#define Q3 r23

#define X0 r26
#define X1 r27
#define X2 r25
#define Y0 r28
#define Y1 r29
#define Z0 r30
#define Z1 r31

;**************************

MODULE ?FLOATS_L04

EXTERN ?L_NEG_L03
EXTERN ?UL_SHR_L03

PUBLIC ?F2L_L04
PUBLIC ?SL2F_L04
PUBLIC ?UL2F_L04

PUBLIC ?F_ADD_L04
PUBLIC ?F_SUB_L04
PUBLIC ?F_MUL_L04
PUBLIC ?F_DIV_L04
PUBLIC ?F_CMP_L04

;Things used by ?DOUBLES_L11
PUBLIC ?F_ZERO_L04
PUBLIC ?F_NAN_L04

;***
RSEG CODE:CODE:NOROOT
?F2L_L04
rol P2
rol P3
ror P2
tst P3
brne ??01
rjmp ?F_ZERO_L04
??01
mov T0,P2
subi P3,31+expbias
brpl ?setiov32
neg P3
sbr P2,_7
mov Q0,P3
mov P3,P2
mov P2,P1
mov P1,P0
clr P0
ldi Z0,4
?int32loop
cpi Q0,8
brlo ?tshift3232
subi Q0,8
mov P0,P1
mov P1,P2
mov P2,P3
clr P3
dec Z0
brne ?int32loop
rjmp ?int3232ok
?tshift3232
tst Q0
breq ?int3232ok
?shift3232
lsr P3
ror P2
ror P1
ror P0
dec Q0
brne ?shift3232
?int3232ok
sbrs T0,7
ret
XJMP ?L_NEG_L03
?setiov32
clr Z0
sbrs T0,7
ser Z0
mov P3,Z0
mov P2,Z0
mov P1,Z0
mov P0,Z0
rol T0
ror P3
ret

;***
RSEG CODE:CODE:NOROOT
?F_FZ32
rjmp ?F_ZERO_L04
?F_FDZ32
rjmp ?F_NAN_L04
?F_DIV_L04
rol Q2
rol Q3
ror Q2
rol P2
rol P3
ror P2
tst P3
breq ?F_FZ32
mov T0,P2
tst Q3
breq ?F_FDZ32
eor T0,Q2
sbr P2,_7
sbr Q2,_7
clr Z0
ldi Z1,expbias-1
clr T1
clr T2
clr T3
?talign32
cp P0,Q0
cpc P1,Q1
cpc P2,Q2
brlo ?dalign32ok
lsr P2
ror P1
ror P0
ror T1
subi Z1,-1
?dalign32ok
sub P3,Q3
brlo ?altb32
?ageb32
add P3,Z1
brcc ?dargok32
rjmp ?F_NAN_L04
?altb32
add P3,Z1
brcs ?dargok32
rjmp ?F_ZERO_L04
?dargok32
ldi Z1,24
rjmp ?dloop32
?dnext32
sub P0,Q0
sbc P1,Q1
sbc P2,Q2
?dnext32set
inc T3
dec Z1
breq ?dok32
?dloop32
lsl T3
rol T2
rol T1
rol P0
rol P1
rol P2
brcs ?dnext32
sub P0,Q0
sbc P1,Q1
sbc P2,Q2
brcc ?dnext32set
add P0,Q0
adc P1,Q1
adc P2,Q2
dec Z1
brne ?dloop32
?dok32
lsl P0
rol P1
rol P2
brcs ?dok32rnd
sub P0,Q0
sbc P1,Q1
sbc P2,Q2
brcs ?div32ok
brne ?dok32rnd
sbrs T3,0
rjmp ?div32ok
?dok32rnd
ldi Z0,1
add T3,Z0
adc T2,Z1
adc T1,Z1
brcc ?div32ok
ror T1
inc P3
breq ?F_NAN_L04
?div32ok
mov P2,T1
mov P1,T2
mov P0,T3
rjmp ?FIXR32

;***
RSEG CODE:CODE:NOROOT
?F_ZERO_L04
CLR P3
CLR P2
CLR P1
CLR P0
RET

;***
RSEG CODE:CODE:NOROOT
?F_NAN_L04
LDI P3,$7F
OR P3,T0
LDI P2,$FF
LDI P1,$FF
LDI P0,$FF
RET

;***
RSEG CODE:CODE:NOROOT
?NRM32
clr Q3
?nrm32l
tst P2
brne ?norm32
mov P2,P1
mov P1,P0
mov P0,Z0
clr Z0
clt
subi Q3,-8
sbrs Q3,5
rjmp ?nrm32l
clr P3
ret
?norm32
sub P3,Q3
breq ?F_ZERO_L04
brlo ?F_ZERO_L04
?norm32l
sbrc P2,7 ;if MSB=1, normalisation done
rjmp ?NRMRND32
lsl Z0
rol P0
rol P1
rol P2
dec P3
brne ?norm32l
rjmp ?F_ZERO_L04

;***
RSEG CODE:CODE:NOROOT
?F_ADD_L04
rol Q2
rol Q3
ror Q2
rol P2
rol P3
ror P2
mov T0,P2
mov T1,Q2
sbr P2,_7
sbr Q2,_7
cp P0,Q0
cpc P1,Q1
cpc P2,Q2
cpc P3,Q3
brsh ?usea32
mov T3,P3
mov P3,Q3
mov Q3,T3
mov T3,P2
mov P2,Q2
mov Q2,T3
mov T3,P1
mov P1,Q1
mov Q1,T3
mov T3,P0
mov P0,Q0
mov Q0,T3
mov T3,T0
mov T0,T1
mov T1,T3
?usea32
tst P3
breq ?aret32
clt
clr Z1
sub Q3,P3
neg Q3
breq ?aligned32
ldi Z0,4
?shiftloop
cpi Q3,8
brlo ?alignb32
subi Q3,8
tst Z1
breq $+4
set
mov Z1,Q0
mov Q0,Q1
mov Q1,Q2
clr Q2
dec Z0
brne ?shiftloop
?aret32
rjmp ?FIXR32
?alignb32
tst Q3
breq ?aligned32
?aloopb32
lsr Q2
ror Q1
ror Q0
ror Z1
brcc $+4
set
dec Q3
brne ?aloopb32
?aligned32
ldi Z0,0
eor T1,T0
brmi ?sok32
?aok32
add Z0,Z1
adc P0,Q0
adc P1,Q1
adc P2,Q2
brcc ?NRMRND32
ror P2
ror P1
ror P0
ror Z0
brcc $+4
set
inc P3
brne ?NRMRND32
rjmp ?F_NAN_L04
?sok32
clc
brtc $+4
sec
sbc Z0,Z1
sbc P0,Q0
sbc P1,Q1
sbc P2,Q2
rjmp ?NRM32

;***
RSEG CODE:CODE:NOROOT
?NRMRND32
cpi Z0,$80
brne ?rnd_set
brts ?rnd_set
sbrs P0,0
sec
?rnd_set
sbci P0,$FF
sbci P1,$FF
sbci P2,$FF
sbci P3,$FF
brne ?FIXR32
rjmp ?F_NAN_L04

;***
RSEG CODE:CODE:NOROOT
?F_SUB_L04
ldi Z0,$80
eor Q3,Z0
rjmp ?F_ADD_L04

;***
RSEG CODE:CODE:NOROOT
?FIXR32
rol P2
rol T0
ror P3
ror P2
ret

;***
RSEG CODE:CODE:NOROOT
?SL2F_L04
mov T0,P3
sbrc T0,7
XCALL ?L_NEG_L03
?SL2F_L04_U
mov Z0,P0
mov P0,P1
mov P1,P2
mov P2,P3
ldi P3,31+expbias
clt
rjmp ?NRM32

;***
RSEG CODE:CODE:NOROOT
?UL2F_L04
clr T0
rjmp ?SL2F_L04_U

;***
RSEG CODE:CODE:NOROOT
?TSTW:
brmi $+4
?F_MULZ
rjmp ?F_ZERO_L04
rjmp ?F_NAN_L04
?F_MUL_L04
rol Q2
rol Q3
ror Q2
rol P2
rol P3
ror P2
tst P3
breq ?F_MULZ
tst Q3
breq ?F_MULZ
mov T3,P2
eor T3,Q2
;
; Establish exponent.
subi P3,$7F
subi Q3,$7F
add P3,Q3
brvs ?TSTW
subi P3,$80 ;Same as 'ADDI P3,$80
;'Final' exponent is now in P3
breq ?F_MULZ
sbr P2,_7
sbr Q2,_7
;;;;;*
st -Y,a0
st -Y,a1
st -Y,a2
st -Y,a3
st -Y,a4
st -Y,a5
st -Y,a6

clr a2
clr a3
clr a4
clr a5
clr a6
clr T2

fmul Q0,P0 ; 11
movw a0,T0
adc a2,T2
fmul Q0,P1 ; 12
adc a3,T2
add a1,T0
adc a2,T1
adc a3,T2
fmul Q0,P2 ; 13
adc a4,T2
add a2,T0
adc a3,T1
adc a4,T2

fmul Q1,P0 ; 21
adc a3,T2
adc a4,T2
add a1,T0
adc a2,T1
adc a3,T2
adc a4,T2
fmul Q1,P1 ; 22
adc a4,T2
add a2,T0
adc a3,T1
adc a4,T2
fmul Q1,P2 ; 23
adc a5,T2
add a3,T0
adc a4,T1
adc a5,T2

fmul Q2,P0 ; 31
adc a4,T2
adc a5,T2
add a2,T0
adc a3,T1
adc a4,T2
adc a5,T2
fmul Q2,P1 ; 32
adc a5,T2
add a3,T0
adc a4,T1
adc a5,T2
fmul Q2,P2 ; 33
adc a6,T2
add a4,T0
adc a5,T1
adc a6,T2

sbrs a6,0
rjmp ?NRMRND32T
lsr a6
ror a5
ror a4
ror a3
ror a2
ror a1
ror a0
rol T2
inc P3
?NRMRND32T
dec P3
clt
or T2,a0
or T2,a1
breq $+4
set
mov T0,T3

mov P2,a5
mov P1,a4
mov P0,a3
mov Z0,a2

ld a6,Y+
ld a5,Y+
ld a4,Y+
ld a3,Y+
ld a2,Y+
ld a1,Y+
ld a0,Y+
rjmp ?NRMRND32

;***
RSEG CODE:CODE:NOROOT
?F_CMP_L04
mov Z0,P3
eor Z0,Q3
brpl ?LCA6
cp Q3,P3
?LCA2
clz
?LCA4
ret
?LCA6
cp P0,Q0
cpc P1,Q1
cpc P2,Q2
cpc P3,Q3
breq ?LCA4
tst P3
brpl ?LCA2
brcc ?LCBA
clc
ret
?LCBA
sec
ret

ENDMOD

;********** FLOATS 64 *****

MODULE ?DOUBLES_L11

PUBLIC ?FL2D_L11
PUBLIC ?D2FL_L11
PUBLIC ?D2L_L11
PUBLIC ?D_UNPACK_1_L11
PUBLIC ?D_NORMALIZE_L11
PUBLIC ?UL2D_L11
PUBLIC ?SL2D_L11
PUBLIC ?D_ADD_L11
PUBLIC ?D_SUB_L11
PUBLIC ?D_MUL_L11
PUBLIC ?D_DIV_L11
PUBLIC ?D_CMP_EQ_L11
PUBLIC ?D_CMP_LT_L11
PUBLIC ?D_CMP_GE_L11

EXTERN ?L_NEG_L03
EXTERN ?UL_SHR_L03
EXTERN ?F_ZERO_L04
EXTERN ?F_NAN_L04

;***
;------------------------------------------------------------------------
;?FL2D_L11
;Conversion from float to double
;On call floatnumber is in P3:P0
;On return doublenumber is in P3:P0 Q3:Q0
;
;Destroys: P3 P2 P1 P0 Q3 Q2 Q1 Q0 T0 Z0 Z1

RSEG CODE:CODE:NOROOT(1)
?FL2D_L11:
MOV Q0,P0
MOV Q1,P1
MOV Q2,P2
MOV Q3,P3
XCALL ?F_ZERO_L04
BST Q3,7 ;Store sign bit in T
CBR Q3,$80 ;Remove sign bit
LDI Z1,3
?FL2D_SHIFT:
LSR Q3
ROR Q2
ROR Q1
ROR Q0
ROR P3
DEC Z1
BRNE ?FL2D_SHIFT
SUBI Q3,$C8 ;Adjust exponent because of the difference in bias
BLD Q3,7 ;Load sign bit from T
RET

;***
;------------------------------------------------------------------------
;?D2FL_L11
;Conversion from double to float
;On call doublenumber is in P3:P0 Q3:Q0
;On return floatnumber is in Q3:Q0
;
;Destroys: P3 P2 P1 P0 Q3 Q2 Q1 Q0 Z1 Z0 T0

?D2FL_L11:
RCALL ?D_UNPACK_1_L11
SUBI Q3,$80 ;Adjust for bias
SBCI Z0,$03
BRNE ?SKIPI
XCALL ?F_ZERO_L04 ;If resulting exponent is too small
RJMP ?MOVPTOQ
?SKIPI:
BRCC ?SKIPJ
XCALL ?F_ZERO_L04 ;return Zero
RJMP ?MOVPTOQ
?SKIPJ:
CLR Z1
CPI Q3,$FF
CPC Z0,Z1
BRCS ?SKIPK
XCALL ?F_NAN_L04 ;If exponent too large, return Inf
?MOVPTOQ:
MOV Q0,P0
MOV Q1,P1
MOV Q2,P2
MOV Q3,P3
OR P3,T0
OR Q3,T0
RET
?SKIPK:
LSL Q2 ;Remove 'implied' bit
LSR Q3 ;and shift exponent into right position
ROR Q2
ADD Q3,T0 ;Add sign bit
RET

;***
;------------------------------------------------------------------------
;?D2L_L11 Double to long conversion
;On call number is in P3:P0 and Q3:Q0
;On return long number is in P3:P0
;
;Destroys: P3 P2 P1 P0 Q3 Q2 Q1 Q0 Z1 Z0 T1 T0

RSEG CODE:CODE:NOROOT(1)
?D2L_L11:
RCALL ?D_UNPACK_1_L11
SUBI Q3,$FF ;Subtract bias
SBCI Z0,$03
BRLO ?D2L1 ;Branch if exponent < 0
CPI Q3,32
SBCI Z0,0
BRLO ?D2L4 ;Branch if exponent < 33
XJMP ?F_NAN_L04
?D2L1:
XJMP ?F_ZERO_L04
?D2L4:
MOV P2,Q1
MOV P1,Q0
MOV P0,P3
MOV P3,Q2

MOV Q0,Q3
SUBI Q0,31
NEG Q0
XCALL ?UL_SHR_L03
SBRS T0,7
RET
XJMP ?L_NEG_L03

;***
;------------------------------------------------------------------------
;D_UNPACK unpacks two operands
;On call: first operand, A, is in register P3:P0, Q3:Q0
; LSB (P0:P1:P2:P3:Q0:Q1)(mantissa):(Q2:Q3)(exponent) MSB
; and the X-pointer pointing to the other.
;On return: A is stored as:
; sign bit in bit 7 of T0
; exponent in Z0 and Q3
; mantissa in P3:P0 and Q2:Q0
;the other operand B is stored as
; sign bit in T3
; exponent in T2 and T1
; and the 7 bytes of the mantissa on the Y stack
;D_UNPACK_1 only unpacks the first operand A
;
;Destroys: All scratch registers.

RSEG CODE:CODE:NOROOT(1)
?D_UNPACK:
SBIW Y0,7
ST -Y,P3 ;Make room for unpacking B
ST -Y,P2
ST -Y,P1
ST -Y,P0
ST -Y,Q0

LD T0,X+ ;Load B from X
LD P3,X+
LD Z1,X+
LD Z0,X+
LD P0,X+
LD P1,X+
LD P2,X+
LD T1,X
SBIW X0,7 ;Reset X

CLR T2
CLR T3
LDI Q0,3
LSL T1 ;Shift sign bit --> C
ROR T3 ;C --> sign bit (MSB of Z0)
LSR T1

?SHIFT_LOOP: ;Shift of exponent and mantissa
LSL T0
ROL P3
ROL Z1
ROL Z0
ROL P0
ROL P1
ROL P2
ROL T1
ROL T2
DEC Q0
BRNE ?SHIFT_LOOP

LSL P2
ROL T1
ROL T2
CP T1,Q0
CPC T2,Q0
SEC
BRNE SKIP_1 ;Test for MSB of mantissa
CLC ;If exp == 0, set MSB = 0
SKIP_1:
ROR P2

STD Y+5,T0 ;Store mantissa of B on Y-stack
STD Y+6,P3
STD Y+7,Z1
STD Y+8,Z0
STD Y+9,P0
STD Y+10,P1
STD Y+11,P2

LD Q0,Y+ ;Restore A
LD P0,Y+
LD P1,Y+
LD P2,Y+
LD P3,Y+

;Destroys: P3 P2 P1 P0 Q3 Q2 Q1 Q0 Z1 Z0 T0
;LSB (P0:P1:P2:P3:Q0:Q1)(mantissa):(Q2:Q3)(exponent) MSB
;
?D_UNPACK_1_L11: ;Unpacks number in P3:P0 and Q3:Q0
CLR T0 ;Clear sign bit
CLR Z0 ;Clear lowest bit
LDI Z1,3
LSL Q3 ;Shift sign bit --> C
ROR T0 ;C --> sign bit (T0)
LSR Q3
?SHIFT_LOOP_1: ;Shift of exponent and mantissa 3 steps
LSL P0
ROL P1
ROL P2
ROL P3
ROL Q0
ROL Q1
ROL Q2
ROL Q3
ROL Z0
DEC Z1
BRNE ?SHIFT_LOOP_1

LSL Q2 ;Exponent is shifted one more step
ROL Q3
ROL Z0
CP Q3,Z1 ;Check if exponent==0
CPC Z0,Z1
SEC
BRNE ?SKIP1 ;Shift 'implied' bit to MSB of mantissa
CLC
?SKIP1:
ROR Q2
RET

;***
;------------------------------------------------------------------------
;D_NORMALIZE shifts the number until MSB is 1 or exponent is 1
;D_ROUND performs the correct rounding of the number. On call the T flag
; shall be set if the 'trailing part' of the mantissa is non-zero.
;DENORM checks if number is denormal, i.e. if exp == 1 and MSB of mantissa = 0,
; exponent is set to Zero
;INF_CHECK checks if number should be returned as an Inf
;D_PACK finally packs the number to P3:P0 Q3:Q0
;
;Destroys: P3 P2 P1 P0 Q3 Q2 Q1 Q0 Z1 Z0 T0

RSEG CODE:CODE:NOROOT(1)
?D_NORMALIZE_L11:
CLR Z1
CP Q3,Z1 ;Test if exponent is larger than 1
CPC Z0,Z1
BREQ ?OUT_ZERO
?LOOP0:
TST Q2
BRNE ?NEXT_NRM
MOV Q2,Q1
MOV Q1,Q0
MOV Q0,P3
MOV P3,P2
MOV P2,P1
MOV P1,P0
CLR P0
SUBI Z1,-8
CPI Z1,56
BRLO ?LOOP0
CLR Q3
RET
?NEXT_NRM:
SUB Q3,Z1
SBCI Z0,0
BREQ ?OUT_ZERO
BRLO ?OUT_ZERO
?LOOP1:
SBRC Q2,7 ;Test if MSB is 1
RJMP ?D_ROUND
LSL P0 ;If not shift mantissa and
ROL P1 ;decrement exponent
ROL P2
ROL P3
ROL Q0
ROL Q1
ROL Q2
SUBI Q3,1
SBCI Z0,0
BRNE ?LOOP1
?OUT_ZERO:
RJMP ?D_ZERO
?ROUND2:
BRCC ?ROUND3 ;If Z1 > $04, round
RJMP ?DENORM
?D_ROUND:
LDI Z1,$07 ;Put low part of mantissa in Z1
AND Z1,P0
CPI Z1,$04
BRNE ?ROUND2
BRTS ?ROUND3 ;If Z1 == $04, round if T is set
SBRS P0,3 ;If Z1 == $04, T is cleared:
RJMP ?DENORM ;don't round if Q0:3 is cleared
?ROUND3:
SUBI P0,$F8 ;Round, i.e. add $08
SBCI P1,$FF
SBCI P2,$FF
SBCI P3,$FF
SBCI Q0,$FF
SBCI Q1,$FF
SBCI Q2,$FF
SBCI Q3,$FF
SBCI Z0,$FF
?DENORM: ;Check if denormal number
SBRC Z0,3
RJMP ?D_INF
?D_PACK:
LDI Z1,3 ;Loop counter
LSL Q2 ;Shift out implied bit
LSR Z0
ROR Q3
ROR Q2
?SHIFTLOOP: ;Shift exponent and mantissa to
LSR Z0 ;correct position
ROR Q3
ROR Q2
ROR Q1
ROR Q0
ROR P3
ROR P2
ROR P1
ROR P0
DEC Z1
BRNE ?SHIFTLOOP
ADD Q3,T0 ;Add sign bit
RET

;***
;-----------------------------------------------
;?SL2D_L11 Signed long to double conversion
;?UL2D_L11 Unsigned long to double conversion,
; uses ?SL2D_L11
;On call number to be converted is in P3:P0
;On return number is in P3:P0 and Q3:Q0
;
;Destroys: P3 P2 P1 P0 Q3 Q2 Q1 Q0 T0 Z1 Z0

RSEG CODE:CODE:NOROOT(1)
?SL2D_L11:
CLR T0
BST P3,7
BLD T0,7
BRTC ?SL2D_1
XCALL ?L_NEG_L03
?SL2D_1:
MOV Q0,P1
MOV Q1,P2
MOV Q2,P3
MOV P3,P0
CLR P0
CLR P1
CLR P2
LDI Z0,$04 ;Initial exponent is 31 + BIAS = 31 + 1023
LDI Q3,$1E
RJMP ?D_NORMALIZE_L11

RSEG CODE:CODE:NOROOT(1)
?UL2D_L11:
CLR T0 ;Set sign to positive
RJMP ?SL2D_1 ;Use signed-long-to-double conversion

;***
RSEG CODE:CODE:NOROOT(1)
?TEST_A:
MOV Z0,Q2
OR Z0,Q3
ANDI Z0,$7F
RET

?TEST_B:
ADIW X0,6
LD T0,X+
LD Z0,X
SBIW X0,7
OR T0,Z0
RET

;***
;------------------------------------------------------------------------
;?D_ADD_L11
;Addition of the operands A and B.
;On call P3:P0 and Q3:Q0 contain A and X is pointing to B
;On return result is placed in P3:P0 and Q3:Q0
;
;Works also on denormal numbers
;
;Destroys: All scratch registers

RSEG CODE:CODE:NOROOT(1)
?ADD_A_ZERO:
LD P0,X+ ;0+B = B (also if B=Inf or NaN)
LD P1,X+
LD P2,X+
LD P3,X+
LD Q0,X+
LD Q1,X+
LD Q2,X+
LD Q3,X
SBIW X0,7
?ADD_B_ZERO:
RET

?D_ADD_L11:
RCALL ?TEST_A
BREQ ?ADD_A_ZERO
RCALL ?TEST_B
BREQ ?ADD_B_ZERO
SBIW Y0,3
CLT
RCALL ?D_UNPACK
CLR Z1
CP T1,Q3
CPC T2,Z0
BRCC ?SHIFT_MANTT
;If exp A > exp B
STD Y+7,T0 ;store sign and
STD Y+8,Z0 ;store exponent
STD Y+9,Q3
SUB Q3,T1 ;determine number of shifts
SBC Z0,T2
CPI Q3,53
SBCI Z0,0
BRCC ?A_PLUS_0 ;If Z0 is nonzero, result = A (A>>B)
PUSH X2 ;make extra room
PUSH X1
PUSH T3 ;store sign of B on top of the stack
MOV X2,Q3
LD Q3,Y+ ;Load mantissa
LD Z0,Y+
LD Z1,Y+
LD T0,Y+
LD T1,Y+
LD T2,Y+
LD T3,Y+

?SHIFTING0:
CPI X2,8
BRLO ?SHIFTINGT
SUBI X2,8
TST Q3
BREQ $+4
SET
MOV Q3,Z0
MOV Z0,Z1
MOV Z1,T0
MOV T0,T1
MOV T1,T2
MOV T2,T3
CLR T3
RJMP ?SHIFTING0
?SHIFTINGT:
TST X2
BREQ ?SHIFTINGE
?SHIFTING1:
LSR T3
ROR T2
ROR T1
ROR T0
ROR Z1
ROR Z0
ROR Q3
BRCC ?SKIP_1 ;Check if carry out is set. This is needed
SET ;to determine rounding
?SKIP_1:
DEC X2
BRNE ?SHIFTING1
?SHIFTINGE:
POP X2 ;Put sign of B in X2
LD X1,Y ;Load sign of A
EOR X2,X1
POP X1
POP X2
RJMP ?SUB_OR_ADD
?A_PLUS_0:
ADD Q3,T1
ADC Z0,T2
ADIW Y0,10 ;Reset Y
RJMP ?D_PACK ;and then pack A

?SHIFT_MANTT: ;exp. A = exp. B
SUBI Q3,LOW(-8)
SBCI Z0,HIGH(-8)
CP T1,Q3
CPC T2,Z0
BRLO ?SHIFT_REST
CPSE P0,Z1
SET
MOV P0,P1
MOV P1,P2
MOV P2,P3
MOV P3,Q0
MOV Q0,Q1
MOV Q1,Q2
CLR Q2
RJMP ?SHIFT_MANTT
?SHIFT_REST:
SUBI Q3,LOW(8)
SBCI Z0,HIGH(8)
?SHIFT_MANT1: ;exp. A <= exp. B
CP T1,Q3
CPC T2,Z0
BREQ ?ADD1
LSR Q2 ;Shift mantissa of A
ROR Q1
ROR Q0
ROR P3
ROR P2
ROR P1
ROR P0
BRCC ?SKIP_2 ;Check if carry out is set. This is needed
SET ;to determine rounding
?SKIP_2: ;and increment exponent of A
SUBI Q3,LOW(-1)
SBCI Z0,HIGH(-1)
RJMP ?SHIFT_MANT1

?ADD1:
EOR T3,T0
STD Y+7,T0 ;Store sign...
STD Y+8,Z0 ;and exponent
STD Y+9,Q3
LD Q3,Y+ ;Load mantissa
LD Z0,Y+
LD Z1,Y+
LD T0,Y+
LD T1,Y+
LD T2,Y+
LD T3,Y+
?SUB_OR_ADD:
BRMI ?SUB_MANT

?ADD_MANT:
ADD P0,Q3 ;If signs were equal, add mantissas
ADC P1,Z0
ADC P2,Z1
ADC P3,T0
ADC Q0,T1
ADC Q1,T2
ADC Q2,T3 ;Carry set?
LD T0,Y+ ;Load sign and exponent
LD Z0,Y+
LD Q3,Y+
BRCC ?ADD3
ROR Q2 ;If Carry set, mantissa has to be shifted left
ROR Q1
ROR Q0
ROR P3
ROR P2
ROR P1
ROR P0
LDI Z1,$01
ADD Q3,Z1 ;and exponent incremented
CLR Z1
ADC Z0,Z1
RJMP ?D_NORMALIZE_L11
?SUB_MANT:
CP P0,Q3 ;If signs are different, subtract the mantissa with
CPC P1,Z0 ;the smaller magnitude from the larger one.
CPC P2,Z1 ;final sign is equal to the sign of the larger number
CPC P3,T0
CPC Q0,T1
CPC Q1,T2
CPC Q2,T3
BREQ ?POS_ZERO
BRCC ?SUB2
;Initially Carry is set
BRTS ?SUB1 ;If T flag is set Carry is set
CLC ;otherwise it's cleared
?SUB1:
SBC Q3,P0 ;B's mantissa largest
SBC Z0,P1
SBC Z1,P2
SBC T0,P3
SBC T1,Q0
SBC T2,Q1
SBC T3,Q2
MOV P0,Q3 ;Move mantissa to right position
MOV P1,Z0
MOV P2,Z1
MOV P3,T0
MOV Q0,T1
MOV Q1,T2
MOV Q2,T3
LD T0,Y+ ;Load sign and exponent
LD Z0,Y+
LD Q3,Y+
LDI Z1,$80 ;If B larger, swap sign
EOR T0,Z1
RJMP ?D_NORMALIZE_L11

?POS_ZERO:
CLR T0
ADIW Y0,3
RJMP ?D_ZERO
?SUB2:
BRTC ?SUB3
SEC
?SUB3:
SBC P0,Q3 ;A's mantissa largest
SBC P1,Z0
SBC P2,Z1
SBC P3,T0
SBC Q0,T1
SBC Q1,T2
SBC Q2,T3
LD T0,Y+ ;Load sign and exponent
LD Z0,Y+
LD Q3,Y+
?ADD3:
RJMP ?D_NORMALIZE_L11

;------------------------------------------------------------------------
;D_SUB performs B - A
;On call A is placed in P3:P0 and Q3:Q0 and the adress of B in X
;On return difference is placed in P3:P0 and Q3:Q0
;
;Works also on denormal numbers
;
;Destroys: All scratch registers.

RSEG CODE:CODE:NOROOT(1)
?D_SUB_L11:
LDI Z1,$80
EOR Q3,Z1 ;Swap sign
RJMP ?D_ADD_L11 ;and perform addition

;?MULT_INF and ?MULT_ZERO are called in ?D_MUL_L11 and ?D_DIV_L11 if the
;resulting exponent becomes too large or too small. Final sign is in both cases in T0.

RSEG CODE:CODE:NOROOT(1)
?MULT_INF:
ADIW Y0,10 ;Reset Y
RSEG CODE:CODE:NOROOT(1)
?D_INF:
LDI Q3,$7F ;Put exponent to 7_FF
OR Q3,T0 ;and add correct sign
LDI Q2,$FF
LDI Q1,$FF
LDI Q0,$FF
LDI P3,$FF
LDI P2,$FF
LDI P1,$FF
LDI P0,$FF
RET

RSEG CODE:CODE:NOROOT(1)
?MULT_ZERO:
ADIW Y0,10
?D_ZERO:
CLR Q3
CLR Q2
?CLEAR:
CLR Q1 ;Clear rest of mantissa
CLR Q0
XJMP ?F_ZERO_L04

;------------------------------------------------------------------------
;D_MUL performs A * B
;On call A is placed in P3:P0 and Q3:Q0 and the adress of B in X
;On return difference is placed in P3:P0 and Q3:Q0
;
;Works also on denormal numbers
;
;Destroys: All scratch registers.

RSEG CODE:CODE:NOROOT(1)
?D_MUL_L11:
RCALL ?TEST_A
BREQ ?D_ZERO
RCALL ?TEST_B
BREQ ?D_ZERO

SBIW Y0,3 ;Make room for sign and exponent to be
;stored later
CLT ;Test flag is used to determine rounding
RCALL ?D_UNPACK
EOR T0,T3 ;Determine final sign
ADD Q3,T1 ;Add exponents
ADC Z0,T2
SUBI Q3,$FF ;and subtract one bias (=03_FF)
SBCI Z0,$03
BREQ ?MULT_ZERO ;If result was negative or Zero, return Zero
BRCS ?MULT_ZERO
LDI Z1,$07
CPI Q3,$FF
CPC Z0,Z1
BRCC ?MULT_INF ;If exp >= 07_FF, return Inf

;----Multiplication of mantissas----

STD Y+7,T0 ;Store sign
STD Y+8,Z0 ;store exponent
STD Y+9,Q3
PUSH X0 ;Need more space...
PUSH X1
PUSH X2
;;;;;;;*
push a0
push a1
push a2
push a3
push a4
push a5
push a6
push a7
push a8
clt
clr T2
ldi X2,7
mov X0,Y0
mov X1,Y1
?_mul64_loop
clr a2
clr a3
clr a4
clr a5
clr a6
clr a7
clr a8
ld Q3,X+

fmul Q3,P0 ; 11
movw a0,T0
adc a2,T2
fmul Q3,P1 ; 12
adc a3,T2
add a1,T0
adc a2,T1
adc a3,T2
fmul Q3,P2 ; 13
adc a4,T2
add a2,T0
adc a3,T1
adc a4,T2
fmul Q3,P3 ; 14
adc a5,T2
add a3,T0
adc a4,T1
adc a5,T2
fmul Q3,Q0 ; 15
adc a6,T2
add a4,T0
adc a5,T1
adc a6,T2
fmul Q3,Q1 ; 16
adc a7,T2
add a5,T0
adc a6,T1
adc a7,T2
fmul Q3,Q2 ; 17
adc a8,T2
add a6,T0
adc a7,T1
adc a8,T2
brts ?_skp64_mov
;*mov mantissa a
st -Y,a0
st -Y,a1
st -Y,a2
st -Y,a3
st -Y,a4
st -Y,a5
st -Y,a6
st -Y,a7
st -Y,a8
st -Y,T2
st -Y,T2
st -Y,T2
st -Y,T2
st -Y,T2
st -Y,T2
mov Z0,Y0
mov Z1,Y1
adiw Z0,14
set
dec X2
rjmp ?_mul64_loop
?_skp64_mov
ld T3,-Z ; 1
add T3,a0
st Z,T3
ld T3,-Z ; 2
adc T3,a1
st Z,T3
ld T3,-Z ; 3
adc T3,a2
st Z,T3
ld T3,-Z ; 4
adc T3,a3
st Z,T3
ld T3,-Z ; 5
adc T3,a4
st Z,T3
ld T3,-Z ; 6
adc T3,a5
st Z,T3
ld T3,-Z ; 7
adc T3,a6
st Z,T3
ld T3,-Z ; 8
adc T3,a7
st Z,T3
ld T3,-Z ; 9
adc T3,a8
st Z,T3
adiw Z0,8
?_next64_loop
subi X2,1
breq $+4
rjmp ?_mul64_loop

ld a8,Y+ ; load result multiple
ld Q2,Y+
ld Q1,Y+
ld Q0,Y+
ld P3,Y+
ld P2,Y+
ld P1,Y+
ld P0,Y+
ld a7,Y+
ld a6,Y+
ld a5,Y+
ld a4,Y+
ld a3,Y+
ld a2,Y+
ld a1,Y+
adiw Y0,7

LD T0,Y+ ;Load sign and exponent to right position
LD Z0,Y+
LD Q3,Y+
sbrs a8,0
rjmp ?NRMRND64T
SUBI Q3,$FF ;If MSB of mantissa set, add one to exponent
SBCI Z0,$FF
lsr a8
ror Q2
ror Q1
ror Q0
ror P3
ror P2
ror P1
ror P0
?NRMRND64T
clt
clr a0
rol a0
or a0,a7
or a0,a6
or a0,a5
or a0,a4
or a0,a3
or a0,a2
or a0,a1
breq $+4
set

pop a8
pop a7
pop a6
pop a5
pop a4
pop a3
pop a2
pop a1
pop a0

POP X2
POP X1
POP X0
RJMP ?D_NORMALIZE_L11

;------------------------------------------------------------------------
;?D_DIV_L11 performs a division between A and B i.e. A/B
;On call P3:P0 and Q3:Q0 contain A (dividend) and X contains the adress of B (divisor)
;NOTE: If A or B is denormal, they will be treated as if they were Zero
;If result is denormal, it might be set to Zero
;
;Destroys: All scratch registers

RSEG CODE:CODE:NOROOT(1)
?D_DIV_L11:
RCALL ?TEST_A
BRNE $+4
RJMP ?D_ZERO
RCALL ?TEST_B
BRNE $+4
RJMP ?D_INF
SBIW Y0,3 ;Make room for sign and exponent to be
;stored later
CLT ;Test flag is used to determine rounding
RCALL ?D_UNPACK
EOR T0,T3 ;Determine final sign
SUBI Q3,$01 ;Add Bias = 03_FF
SBCI Z0,$FC
SUB Q3,T1 ;And subtract exponent of B
SBC Z0,T2
BRNE ?SKIP10
RJMP ?MULT_ZERO ;If result was negative or Zero, return Zero
?SKIP10:
BRCC ?SKIP11
RJMP ?MULT_ZERO
?SKIP11:
LDI Z1,$07
CPI Q3,$FF
CPC Z0,Z1
BRCS ?SKIP12
RJMP ?MULT_INF ;If exp >= 07_FF, return Inf
?SKIP12:
STD Y+7,T0 ;Store sign
STD Y+8,Z0 ;and exponent
STD Y+9,Q3

;----Division of mantissas----

PUSH X2 ;17 registers are needed, i.e. 3 extra
PUSH X1
PUSH X0
LD Q3,Y+ ;Load mantissa
LD Z0,Y+ ;Initially remainder is set to A
LD Z1,Y+
LD T0,Y+
LD T1,Y+
LD T2,Y+
LD T3,Y+
LSR Q2
ROR Q1
ROR Q0
ROR P3
ROR P2
ROR P1
ROR P0
LSR T3
ROR T2
ROR T1
ROR T0
ROR Z1
ROR Z0
ROR Q3
LDI X0,7 ;quotient will be placed in X2
?O_DIV_LOOP:
CLR X2
LDI X1,8
?DIV_LOOP:
LSL X2 ;Shift quotient left
CP P0,Q3
CPC P1,Z0
CPC P2,Z1
CPC P3,T0
CPC Q0,T1
CPC Q1,T2
CPC Q2,T3 ;If Carry set, put remainder = remainder - B
BRCS ?DIV_SHIFT
SUB P0,Q3
SBC P1,Z0
SBC P2,Z1
SBC P3,T0
SBC Q0,T1
SBC Q1,T2
SBC Q2,T3
INC X2 ;and also add one to quotient
?DIV_SHIFT:
LSL P0 ;Shift remaider
ROL P1
ROL P2
ROL P3
ROL Q0
ROL Q1
ROL Q2
DEC X1
BRNE ?DIV_LOOP
ST -Y,X2 ;Store quotient
DEC X0
BRNE ?O_DIV_LOOP ;We do this 7 times to calculate 7 quotient bytes
OR P0,P1 ;Check if remainder is nonzero
OR P0,P2
OR P0,P3
OR P0,Q0
OR P0,Q1
OR P0,Q2
BREQ ?DIV_SKIP
SET ;If remainder was nonzero, set T flag
?DIV_SKIP:
POP X0
POP X1
POP X2
LD P0,Y+ ;Load mantissa (which is placed on Y-stack
LD P1,Y+ ;in reversed order)
LD P2,Y+
LD P3,Y+
LD Q0,Y+
LD Q1,Y+
LD Q2,Y+
LD T0,Y+ ;and sign...
LD Z0,Y+ ;and finally exponent
LD Q3,Y+
RJMP ?D_NORMALIZE_L11 ;and then normalize and return there

;***
;------------------------------------------------------------------------
;?D_CMP_EQ_L11
;compare between two doubles A and B
;On call P3:P0 and Q3:Q0 contains A and X the adress of B
;
;On return: If A=B then Z=1
; If A<>B then Z=0
;
;Destroys: T3 T2 T1 T0 Z1 Z0. A and B are left unchanged.

RSEG CODE:CODE:NOROOT(1)

?D_CMP_EQ_L11:
LD T3,X+
LD T2,X
SBIW X0,1
CP P0,T3
CPC P1,T2
BRNE ?CMP_NEQ
ADIW X0,2
LD T3,X+
LD T2,X+
LD T1,X+
LD T0,X+
LD Z1,X+
LD Z0,X
SBIW X0,7
CP P2,T3
CPC P3,T2
CPC Q0,T1
CPC Q1,T0
CPC Q2,Z1
CPC Q3,Z0
?CMP_NEQ:
RET

;***
;------------------------------------------------------------------------
;?D_CMP_LT_L11
;compare between two doubles A and B
;On call P3:P0 and Q3:Q0 contains A and X the adress of B
;
;On return: If A ; If A>=B then C=0
;
;Destroys: T3 T2 T1 T0 Z1 Z0. A and B are left unchanged.

RSEG CODE:CODE:NOROOT(1)

?D_CMP_LT_L11:
MOV Z0,Q3
ADIW X0,7
LD Z1,X
EOR Z0,Z1 ;Start with comparing signs
BRPL ?CMP1 ;Signs were equal, continue comparing
SBIW X0,7
CP Z1,Q3 ;A RET

?CMP1: LD T3,-X
LD T2,-X
LD T1,-X
SBIW X0,4
CP Q0,T1 ;Compare 4 highest bytes
CPC Q1,T2
CPC Q2,T3
CPC Q3,Z1
BRCS ?CMP3
BRNE ?CMP3
ADIW X0,4
LD T3,-X ;If they were equal, continue with
LD T2,-X ;4 lowest bytes
LD T1,-X
LD T0,-X
CP P0,T0
CPC P1,T1
CPC P2,T2
CPC P3,T3
?CMP3:
TST Q3 ;If signs were negative, invert Carry
BRPL ?SKIP_SWAP
BRCC ?CMP2
CLC
RET
?CMP2:
SEC
?SKIP_SWAP:
RET

;***
;------------------------------------------------------------------------
;?D_CMP_GE_L11
;compare between two doubles A and B
;On call P3:P0 and Q3:Q0 contains A and X the adress of B
;
;On return: If A>=B then C=1
; If A ;
;Destroys: T3 T2 T1 T0 Z1 Z0. A and B are left unchanged

?D_CMP_GE_L11:
RCALL ?D_CMP_LT_L11 ;Use D_CMP_LT and then just invert Carry
BRCS ?GE_SWAP
SEC
RET
?GE_SWAP:
CLC
RET

ENDMOD

END