Retired Document
Important: This sample code may not represent best practices for current development. The project may use deprecated symbols and illustrate technologies and techniques that are no longer recommended.
vfactor source/vecarith.s
#/************************************************************************************* |
#* |
#* vecarith.s |
#* |
#* Assembly routines for AltiVec-based giant arithmetic. |
#* |
#* Updates: |
#* 31 Mar 99 (proj) Delivery to A. Sazegari, Apple/Cupertino |
#* 1 Feb 99 JAK - creation. |
#* |
#* This package is part of ongoing research in the |
#* Advanced Computation Group, Apple Computer. |
#* |
#* c. 1999 Apple Computer, Inc. |
#* All Rights Reserved. |
#* |
#************************************************************************************/ |
#//---------------------------------------------------------- |
MACRO |
MakeFunction &fnName |
EXPORT &fnName[DS] |
EXPORT .&fnName[PR] |
TC &fnName[TC], &fnName[DS] |
CSECT &fnName[DS] |
DC.L .&fnName[PR] |
DC.L TOC[tc0] |
CSECT .&fnName[PR] |
;FUNCTION .&fnName[PR] |
ENDM |
#//---------------------------------------------------------- |
# /* |
# addvecs |
# arguments: |
# r3 = vector unsigned long *pvec1 |
# r4 = unsigned long len1 |
# r5 = vector unsigned long *pvec2 |
# r6 = unsigned long len2 |
# r7 = vector unsigned long *presults |
# */ |
MakeFunction addvecs |
cmpl cr0, r4, r6 # compare lengths |
bng cr0,.len2shorter |
mr r11, r3 # len1 is shorter |
mr r9, r4 |
mr r4, r6 |
mr r3, r5 |
b .lenCompareDone |
.len2shorter: |
mr r9, r6 # len2 is shorter |
mr r11, r5 |
.lenCompareDone: |
addic r8, 0, 0 # r8 = 0, clear carry |
cmpli cr0, r4, 0 |
addi r8, 0, 0 |
addi r12, 0, 0 |
bng cr0,.addvecs_startcarry # short length is zero |
addi r11, r11, -16 |
addi r10, r3, -16 |
mtspr CTR, r4 |
addi r12, r7, -4 |
.addvecs_looptop: |
lwz r0, 28(r10) # load long digit 1 |
lwz r3, 28(r11) # load long digit 2 |
adde r6, r0, r3 # add with carry |
stwu r6, 16(r12) # store result |
lwz r7, 24(r10) |
lwz r0, 24(r11) |
adde r0, r7, r0 |
stw r0, -4(r12) |
lwz r0, 20(r10) |
lwz r7, 20(r11) |
adde r0, r0, r7 |
stw r0, -8(r12) |
lwzu r0, 16(r10) |
lwzu r7, 16(r11) |
adde r0, r0, r7 |
stw r0, -12(r12) |
bdnz .addvecs_looptop # loop through short count |
addi r7, r12, 4 |
addi r11, r11, 16 |
mr r12, r4 |
.addvecs_startcarry: |
cmpl cr0, r12, r9 |
addi r10, 0, 1 |
bnl cr0,.addvecs_done |
addi r11, r11, -16 |
subf r0, r12, r9 |
addi r12, r7, -4 |
mtspr CTR, r0 |
.addvecs_carrylooptop: |
lwz r3, 28(r11) |
addze r3, r3 |
stwu r3, 16(r12) |
lwz r5, 24(r11) |
addze r5, r5 |
stw r5, -4(r12) |
lwz r6, 20(r11) |
addze r6, r6 |
stw r6, -8(r12) |
lwzu r7, 16(r11) |
addze r7, r7 |
stw r7, -12(r12) |
bdnz .addvecs_carrylooptop |
addi r7, r12, 4 |
.addvecs_done: |
addze r3, r8 # add carry bit to zero for top digit |
stw r8, 0(r7) |
stw r8, 8(r7) |
stw r3, 12(r7) |
stw r8, 4(r7) |
blr |
#//--------------------------------------------------------------------------------- |
# /* |
# subvecs |
# arguments: |
# r3 = vector unsigned long *pvec1 |
# r4 = unsigned long len1 |
# r5 = vector unsigned long *pvec2 |
# r6 = unsigned long len2 |
# r7 = vector unsigned long *presults |
# |
# subtracts p1 from p2, stores in presults |
# NOTE, assumes len1 <= len2 |
# */ |
MakeFunction subvecs |
cmpli cr0, r4, 0 |
addi r9, 0, 0 |
addi r12, 0, 0 |
subfic r9, r9, 0 |
bng cr0,.subvecs_carrystart |
addi r10, r3, -16 |
addi r11, r5, -16 |
mtspr CTR, r4 |
addi r12, r7, -4 |
.subvecs_looptop: |
lwz r0, 28(r11) |
lwz r3, 28(r10) |
subfe r7, r3, r0 |
stwu r7, 16(r12) |
lwz r8, 24(r11) |
lwz r0, 24(r10) |
subfe r0, r0, r8 |
stw r0, -4(r12) |
lwz r0, 20(r11) |
lwz r8, 20(r10) |
subfe r0, r8, r0 |
stw r0, -8(r12) |
lwzu r0, 16(r11) |
lwzu r8, 16(r10) |
subfe r0, r8, r0 |
stw r0, -12(r12) |
bdnz .subvecs_looptop |
addi r7, r12, 4 |
addi r5, r11, 16 |
mr r12, r4 |
.subvecs_carrystart: |
cmpl cr0, r12, r6 |
bnl cr0,.subvecs_done |
addi r11, r5, -16 |
subf r0, r12, r6 |
addi r12, r7, -4 |
mtspr CTR, r0 |
.subvecs_carrylooptop: |
lwz r3, 28(r11) |
subfe r3, r9, r3 |
stwu r3, 16(r12) |
lwz r5, 24(r11) |
subfe r5, r9, r5 |
stw r5, -4(r12) |
lwz r6, 20(r11) |
subfe r6, r9, r6 |
stw r6, -8(r12) |
lwzu r7, 16(r11) |
subfe r7, r9, r7 |
stw r7, -12(r12) |
bdnz .subvecs_carrylooptop |
addi r7, r12, 4 |
.subvecs_done: |
blr |
#//--------------------------------------------------------------------------------- |
#// MultVecsByULong |
#// arguments : pointer to vectors (r3) |
#// number of vectors (r4) |
#// multiplier (r5) |
MakeFunction MultVecsByULong |
li r6,0 # clear carry digit |
mtctr r4 # r4 -> counter |
.MVBL_looptop: |
lwz r4,12(r3) # lowest long in vector->r4 |
mullw r0,r5,r4 # multiplier*long -> r0 (low result) |
mulhwu r4,r5,r4 # multiplier*long -> r4 (high result) |
addc r0,r0,r6 # add carry to low result, and maybe set carry bit |
addze r6,r4 # add any carry bit to high result, move to carry digit |
stw r0,12(r3) # save low multiplier |
# same as above for long #2 in vector |
lwz r4,8(r3) |
mullw r0,r5,r4 |
mulhwu r4,r5,r4 |
addc r0,r0,r6 |
addze r6,r4 |
stw r0,8(r3) |
# same as above for long #3 in vector |
lwz r4,4(r3) |
mullw r0,r5,r4 |
mulhwu r4,r5,r4 |
addc r0,r0,r6 |
addze r6,r4 |
stw r0,4(r3) |
# same as above for long #4 in vector |
lwz r4,0(r3) |
mullw r0,r5,r4 |
mulhwu r4,r5,r4 |
addc r0,r0,r6 |
addze r6,r4 |
stw r0,0(r3) |
# move on to next vector |
addi r3,r3,16 |
bdnz .MVBL_looptop |
mr r3,r6 # return last carry |
blr |
#//--------------------------------------------------------------------------------- |
#// AddULongToVecs |
#// arguments : pointer to vectors (r3) |
#// number of vectors (r4) |
#// ulong to add (r5) |
MakeFunction AddULongToVecs |
addi r6, 0, 0 # r6 = 0 |
lwz r0,12(r3) # r0 = lsd |
addc r0,r0,r5 # add in ulong |
stw r0,12(r3) # store back |
# propagate carry through next 3 longs |
lwz r5,8(r3) |
addze r0,r5 |
stw r0,8(r3) |
lwz r5,4(r3) |
addze r0,r5 |
stw r0,4(r3) |
lwz r5,0(r3) |
addze r0,r5 |
stw r0,0(r3) |
subi r0,r4,1 |
# propagate carry through all vectors |
mtctr r0 |
cmplwi r4,$0001 |
ble .AULTV_exit |
.AULTV_looptop: |
lwz r4,28(r3) |
addze r0,r4 |
stw r0,28(r3) |
lwz r4,24(r3) |
addze r0,r4 |
stw r0,24(r3) |
lwz r4,20(r3) |
addze r0,r4 |
stw r0,20(r3) |
lwz r4,16(r3) |
addze r0,r4 |
stwu r0,16(r3) # store top long and update r3 ptr |
bdnz .AULTV_looptop |
.AULTV_exit: |
addze r3, r6 # return last carry |
blr |
Copyright © 2003 Apple Computer, Inc. All Rights Reserved. Terms of Use | Privacy Policy | Updated: 2003-01-14