;Last Modified: 16-APR-1992 09:06:30.46
.title fprims Fast Multiple Precision Primitives
.ident /V1.7B/
;+
; **-FPRIMS-Fast Multiple Precision Primitives
;
; Facility: PGP
;
; Language: Macro-32
;
; Functional Description:
;
; This module contains fast multiple precision routines for operating on arrays
; of long words. Error checking is minimised at the expense of speed.
;
; Restrictions:
;
; This code is shareable but NOT reentrant as written because of static data.
; A reentrant version of this module could be written but it would be slower!
;
; Version: 1
;
; Original: 00A Date: 17-Sep-1991 Author: Hugh A.J. Kennedy
;
; Based on FPRIMS.ASM written by Zhahai Stewart for the Intel 8086
; architecture.
;
; Modification: 02A Date: 27-Sep-1991 Author: Hugh A.J. Kennedy.
;
; Add fast multiply routine, P_SMUL.
; Re-organise code slightly.
; Ammend/clarify copyright and license statement.
; Add checking for maximum precision exceeded, display a warning message
; and bomb!
;
; Modification: 03A Date: 16-Mar-1992 Author: Hugh A.J. Kennedy.
;
; Sniff for MSB in P_SMUL. In this way, avoid multiplies by leading zeroes
; (not efficient).
;
; Modification: 05A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy.
;
; Encode entire double precision multiply in VAX assembler.
; Correct some minor problems with handling embedded zeroes.
;
; Modification: 06A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy
;
; Align everything for speed. VAXen like stuff on 64-bit, or at least 32-bit
; boundaries. Therefore, we align the add, subtract and rotate tables and then
; we align the multiply loops. The extra NOPs used to pad these loops are of
; negligable cost because they already exist in the memory buffer. When the
; following instruction is aligned, it executes MUCH faster.
;
; Modification: 07A Date: 24-Mar-1991 Author: Hugh A.J. Kennedy.
;
; Implement fast compare.
;-
.sbttl Copyright Notice And License To Use
;
; Copyright (c) 1991-1992, All Rights Reserved by
; Hugh A.J. Kennedy.
;
; A license to use and adapt this software without payment is hereby
; granted subject to the following conditions:
;
; 1) It may only be copied with the inclusion of this copyright
; notice in the program source with these associated conditions.
;
; 2) No title to or ownership of this software is hereby
; transferred.
;
; 3) The information in this software is subject to change
; without notice and should not be construed as a commitment by
; Hugh Kennedy.
;
; 4) The author assumes no liability for any damages arising from the
; use of this software, even if said damages arises from defects in
; this software.
;
; 5) No warranty as to merchantability or fitness of purpose is
; expressed or implied.
;
; 6) Any modifications to this source must be clearly identified as
; such and added to the modification history.
;
; 7) These routines may not be incorporated in a commercial cryptographic
; product.
;
; If you can not comply with these conditions, you *must* contact the author
; and obtain permission other wise you are in violation of copyright.
.sbttl Misc Macros & Definitions
;
; Assembly Parameters
;
max_unit_prec = 72 ; Maximum unit precision
supersniffer = 1 ; Enable bit msb locator.
;
; The following parameter is dependent on the kind of VAX you are running on
; and should be defined if the execution time of the SOBGTR loop control
; instruction and the appropriate operation (ADWC or SBWC) from cache is much
; less than the execution time in main memory. If you have a slow VAX you
; should comment the following line out to use a vector of instructions.
;
novector = 1 ; Use loops rather than vectors.
.macro ascid .string
;+
; *-ASCID-Build An ASCII String Referenced By Descriptor
;
; Functional Description:
;
; This macro is a little like the system supplied .ASCID directive
; but it uses a separate program section to store the ASCII data.
;
; Arguments:
;
; STRING String to create
;-
.nocross
.save_psect
.psect puret
$$$t0 = .
.ascii @.string@
$$$t1 = .-$$$t0
.restore_psect
.word $$$t1
.byte dsc$k_dtype_t
.byte dsc$k_class_s
.address -
$$$t0
.cross
.endm ascid
.sbttl Misc Data Areas
;
; Misc. Data Areas
;
.psect impurd,con,lcl,noshr,exe,rd,wrt,long
;
; This data is static and is used to hold the current precision established
; by P_SETP for other calls to this library.
;
.if not_defined novector
addoff: ; Offset into add table.
.blkl 1 ; also for sub and rot.
.endc
precis: ; Precision in longwords.
.blkl 1
.psect pure,con,rel,shr,exe,rd,nowrt,quad
.align quad
.if not_defined novector
prectoobig:
ascid <PGP (FPRIMS) - Requested precision (!ZL) exceeds capacity (!ZL)>
.endc
.sbttl Start of Code
.sbttl P_CMP Compare two very long integers
;+
; **-P_COMP-Compare two very long integers
;
; Functional Description:
;
; This procedure is invoked to compare two extended precision unsigned
; integers.
;
; Calling Sequence
;
; short P_CMP ( r1, r2)
;
; Parameters:
;
; R1 -> Extended Precision Integer 1
; R2 -> Extended Precision Integer 2
;
; Implicit Inputs:
;
; PRECIS lr*r Precision expresses in longs.
;
; Returns:
;
; -1 if r1 < r2
; 0 if r1 = r2
; +1 if r1 > r2
;
;-
.align long
.entry p_cmp,^m<r2>
movl 4(ap),r1 ; R1 -> Sum.
movl 8(ap),r2 ; R2 -> Addend.
movl precis,r0 ; R0 = Precision.
moval (r1)[r0],r1 ; Get MS longwords.
moval (r2)[r0],r2 ; Get MS longwords.
.align long 1 ; Align loop with NOPS.
10$: cmpl -(r1),-(r2) ; Compare.
bnequ 20$ ; If ne, then exit loop.
sobgtr r0,10$ ; Loop until done.
ret ; R0 = zero so R1 = R2.
20$:
bgtru 30$ ; If R1 > R2 then branch.
movw #-1,r0 ; Flag <.
ret
30$:
movw #1,r0 ; Flag >.
ret
.sbttl P_ADDC Add two very long integers with carry
;+
; **-P_ADDC-Add very long integers
;
; Functional Description:
;
; This procedure is invoked to add two very long integers with carry. Each
; integer is represented as an array of longwords, least significant first.
;
; Calling Sequence:
;
; P_ADDC sum,addend,carry
;
; Parameters:
;
; sum lm*r Sum.
; addend lr*r Addend.
; carry lr*v Carry bit.
;
; Implicit Inputs:
;
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; R0 Resulting carry bit.
;-
.align long
.entry p_addc,^m<r2,r3>
movl 4(ap),r1 ; R1 -> Sum.
movl 8(ap),r2 ; R2 -> Addend.
.if defined novector
movl precis,r3 ; R3 = Precision.
subl3 12(ap),#0,r0 ; Set carry bit.
.align quad,1 ; Align loop with NOPs
10$: adwc (r2)+,(r1)+ ; Add with carry one longword.
.align quad,1 ; Align next instruction.
sobgtr r3,10$ ; Loop until done.
.iff ; novector
moval 10$,r3
addl2 addoff,r3 ; Jump into table.
subl3 12(ap),#0,r0 ; Set carry bit.
jmp (r3)
.align quad
10$:
.rept max_unit_prec
$$$ = .
adwc (r2)+,(r1)+ ; Add with carry one longword.
nop
addsiz = .-$$$
.endr
.endc ; novector
clrl r0 ; Assume carry clear.
bcc 20$ ; Carry set?
incl r0 ; Flag carry was set.
20$: ret
.sbttl P_SUBB Subtract very long integers with borrow
;+
; **-P_SUBB-Subtract very long integers
;
; Functional Description:
;
; This procedure is invoked to add subtract very long integers with carry. Each
; integer is represented as an array of longwords, least significant first.
;
; Calling Sequence:
;
; P_SUBB diff,sub,borrow
;
; Parameters:
;
; diff lm*r Difference
; sub lr*r Subtrahend.
; borrow lr*v Borrow bit.
;
; Implicit Inputs:
;
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; R0 Resulting carry bit.
;-
.align long
.entry p_subb,^m<r2,r3>
movl 4(ap),r1 ; R1 -> Difference.
movl 8(ap),r2 ; R2 -> Minuend.
.if defined novector
movl precis,r3 ; R3 = No. of longs.
subl3 12(ap),#0,r0 ; Set borrow bit.
.align quad,1 ; Align loop with NOPs.
10$: sbwc (r2)+,(r1)+ ; Subtract with borrow one long.
.align quad,1 ; Align with NOPs.
sobgtr r3,10$ ; Loop through.
.iff ; novector
moval 10$,r3
addl2 addoff,r3 ; Jump into table.
subl3 12(ap),#0,r0 ; Set borrow bit.
jmp (r3)
.align quad
10$:
.rept max_unit_prec
sbwc (r2)+,(r1)+ ; Subtract w/carry one longword.
nop
.endr
.endc ; novector
clrl r0 ; Assume carry clear.
bcc 20$ ; Carry set?
incl r0 ; Flag carry was set.
20$: ret
.sbttl P_ROTL Rotate left a very long integer with carry.
;+
; **-P_ROTL-Rotate left one bit very long integers
;
; Functional Description:
;
; This procedure is invoked to rotate left one bit (e.g. divide by 2) very
; long integers with carry. Each integer is represented as an array of
; longwords, least significant first. Note that we use the add with carry
; instruction here because the VAX (unlike the dear old PDP-11) lacks a
; rotate instruction that includes the carry bit.
;
; Calling Sequence:
;
; P_ROTL num,carry
;
; Parameters:
;
; num lm*r Number to be shifted
; carry lr*v Carry bit.
;
; Implicit Inputs:
;
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; R0 Resulting carry bit.
;-
.align long
.entry p_rotl,^m<r3>
movl 4(ap),r1 ; R1 -> Sum.
.if defined novector
movl precis,r3 ; R3 = No. of longwords.
subl3 8(ap),#0,r0 ; Set carry bit.
.align quad,1 ; Align loop with NOPs
10$: adwc (r1),(r1)+ ; Add to itself with carry.
.align quad,1 ; Align with NOPs.
sobgtr r3,10$ ; Loop until done.
.iff ; novector
moval 10$,r3
addl2 addoff,r3 ; Jump into table.
subl3 8(ap),#0,r0 ; Set carry bit.
jmp (r3)
.align quad
10$:
.rept max_unit_prec
adwc (r1),(r1)+ ; *2+carry.
nop
.endr
.endc ; novector
clrl r0 ; Assume carry clear.
bcc 20$ ; Carry set?
incl r0 ; Flag carry was set.
20$: ret
.sbttl P_DMUL Extended Multiple Precision Multiply
;*
; **-P_DMUL-Extended Multiple Precision Multiply
;
; Functional Description:
;
; This procedure multiplies an unsigned single precision multiplier by a
; single precision multiplicand. The product register is double precision.
; It is expected that the length of the single precision multiplier and
; multiplicand has been previously set by a call to P_SETP. Note that the
; entire length of the product register is zeroed - so it must be a full
; double precision size.
;
; Calling Sequence:
;
; P_DMUL prod, multiplicand, multiplier
;
; Parameters:
;
; prod lw*r Product.
; multuplicand lr*r Multiplicand
; multiplier lr*r Multiplier
;
; Implicit Inputs:
;
; PRECIS lr*r Precision expresses in longs.
;
; Status Returns:
;
; None.
;-
.align long
.entry p_dmul,^m<r2,r3,r4,r5,r6,r7,r8,r9,r10,r11>
movl 4(ap),r8 ; R8 -> Product.
beql 49$ ; If eq, not specified.
movl precis,r10 ; R10 = Precision (longs)
ashl #3,r10,r2 ; R0 = No. of bytes to zero.
movc5 #0,#0,#0,r2,(r8) ; Zero product buffer.
movl 8(ap),r3 ; R3 -> Multiplicand.
beql 49$ ; If eq, not specified.
pushl r3 ; Save for posterity.
movl 12(ap),r11 ; R11 -> Multiplier.
beql 49$ ; If eq, not specified.
movl r10,r12 ; R12 = Multiplicand prec.
.if defined SUPERSNIFFER
;
; Here we calculate the effective maximum precision for the multiply by
; locating the long containing the most significant bit of the multiplier
; and the multiplicand.
;
moval (r11)[r10],r0 ; Supersniffer...
.align quad,1 ; Align with nops
45$: tstl -(r0) ; Examine next long.
bneq 50$ ; If ne, then we found msb.
sobgtr r10,45$ ; Loop until done.
49$: ret ; Multiplier = 0!
50$:
moval (r3)[r12],r0 ; Supersniffer...
.align quad,1 ; Align with nops
55$: tstl -(r0) ; Examine next long.
bneq 200$ ; If ne, then we found msb.
sobgtr r12,55$ ; Loop until done.
ret ; Multiplicand = 0!
.iff
brb 200$
49$:
ret
.endc ; SUPERSNIFFER
;
; Multiplier Loop
;
; R12 = Count of multiplicand longs to process.
; R11 -> Next long of multiplier.
; R10 = Count of multiplier longs to process.
; R8 -> Next long of product.
;
.align quad,1 ; Align with nops
200$: movl r12,r5 ; Multiplicand precision.
moval (r8)+,r4 ; R4 -> Next long of product.
movl (sp),r3 ; R3 -> 1st multiplier long.
movl (r4),r0 ; R0,R1 = Partial Sum.
movl 4(r4),r1
clrl r7 ; Zero look-ahead carry.
;
; Perform an extended multiply of two unsigned numbers. This means that
; we have to compensate the hi-order product because either the multiplier
; or the multiplicand may be apparently a negative number. EMUL is a signed
; multiply - so we must be careful. Also, the EMUL longword addend is sign
; extended before adding into the product so we have to add the hard way.
;
; R6 = Current Multiplicand
; R2 = Multiplier
; R4 -> Current quadword of partial product.
; R0,R1 = Partial sum to which product is added
; R7 = Lookahead carry. This gets set if we try to carry after adding
; the partial product to the partial sum. This gets a little more
; complicated because here we are setting the high-order long of
; the next quadword to be operated on.
;
; Essentially the algorithm is as follows:
;
; 0) R0,R1 = (R4) ; Save current partial sum.
; 1) R6 = Next longword of multiplicand.
; 2) (R4) = R6 * R2 ; quad result compensating for negative numbers)
; 3) (R4) = (R4) + R0,R1 ; add back partial sum.
; 4) R7 = Carry bit.
; 5) R4 = R4 + 4 ; Point to next long.
; 6) R1 = 4(R4) + R7 ; Propagate carry to high order of next partial
; ; sum.
; 7) Loop back to step 1 until multiplicand completely processed.
;
movl (r11)+,r2 ; R2 = Multiplier.
beql 999$ ; If eq, not specified.
blss 1500$ ; This unfolds the compensation
; test out of the loop.
;
; This version of the multiply loop is entered when the multiplier is positive
; saving three instructions per unit of precision.
;
.align quad,1 ; Align with NOPs.
500$: movl (r3)+,r6 ; R6 = Current multplicand.
emul r2,r6,#0,(r4) ; Multiply (64-bit result).
;
; Because we have removed leading zeroes, multiplication by zero is very
; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after
; the EMUL (looking at the zero product) that the multiplicand was zero so we
; don't need any special case logic later to adjust the product pointer.
;
beql 550$ ; If result eq, skip.
tstl r6 ; Was multiplicand negative?
bgeq 550$ ; No, skip.
addl2 r2,4(r4) ; Yes, compensate.
550$: addl2 r0,(r4)+ ; Accumulate.
adwc r1,(r4)+
movl (r4),r1 ; R1 = Next hi-end partial sum.
adwc r7,r1 ; Add carry if needed.
clrl r7 ; Reset lookahead register.
adwc #0,r7 ; Save lookahead carry.
movl -(r4),r0 ; R0 = Next lo-end partial.
sobgtr r5,500$ ; More units?
999$: sobgtr r10,200$ ; Nope, go get next multiplier
ret
;
; This version of the above multiply loop is entered when the multiplier is
; negative - and we must compensate by adding the multiplicand to the hi-order
; product. This saves a test and a conditional branch per unit of precision.
;
.align quad,1 ; Align with NOPs.
1500$:
movl (r3)+,r6 ; R6 = Current multplicand.
emul r2,r6,#0,(r4) ; Multiply (64-bit result).
;
; Because we have removed leading zeroes, multiplication by zero is very
; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after
; the EMUL (looking at the zero product) that the multiplicand was zero so we
; don't need any special case logic later to adjust the product pointer.
;
beql 1560$ ; If result eq, skip.
tstl r6 ; Was multiplicand negative?
bgeq 1550$ ; No, skip.
addl2 r2,4(r4) ; Yes, compensate.
1550$:
; As documented above, we unfolded the following to save instructions
; tstl r2 ; Multiplier negative?
; bgeq 1560$ ; No, skip.
addl2 r6,4(r4) ; Yes, compensate.
1560$: addl2 r0,(r4)+ ; Accumulate.
adwc r1,(r4)+ ; R1 = High-end partial sum.
movl (r4),r1 ; R1 = Next hi-end partial sum.
adwc r7,r1 ; Add carry if needed.
clrl r7 ; Reset lookahead register.
adwc #0,r7 ; Save lookahead carry.
movl -(r4),r0 ; R0 = Next lo-end partial.
sobgtr r5,1500$ ; More units?
sobgtr r10,200$ ; Nope, go get next multiplier
ret
.sbttl P_SETP Set Precison.
;+
; **-P_SETP-Set Precision
;
; Functional Description:
;
; This procedure is invoked to set the operating precision of the package.
;
; Calling Sequence:
;
; P_SETP nbits
;
; Parameters:
;
; nbits rw*v Number of bits in number.
;
; Implicit Outputs:
;
; Precis Set to the number of longwords required to implement
; the requested precision.
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; None.
;
; Side Effects:
;
; If the maximum precision set in 32-bit units by the assembly
; parameter "max_unit_prec" is exceeded, a message to that effect will
; be displayed and the program will terminate with a fatal error.
;-
.entry p_setp,^m<>
movzwl 4(ap),r1 ; R1 = No. of bits.
addl2 #31,r1 ; Round up to next long word.
ashl #-5,r1,r1 ; R1 = No. of 32 bit words.
movl r1,precis ; Save precision.
.if not_defined novector
subl3 r1,#max_unit_prec,r0 ; R0 = Number of steps reqd.
blss 10$ ; If > 0 then exit.
mull3 #addsiz,r0,addoff ; Get add table offset.
.iftf ; novector
ret
.ift ; novector
10$: ; Table size exceeded!
movab -80(sp),sp ; Output buffer.
pushab (sp) ; Build descriptor
movzwl #80,-(sp)
clrl -(sp) ; Receive return length.
pushl #max_unit_prec ; Compiled max table size.
pushl r1 ; Requested table size.
pushaq 8+4(sp) ; -> Output buffer descriptor.
pushaw 12(sp) ; -> Returned length.
pushaq prectoobig ; -> FAO control string.
calls #5,g^sys$fao ; Format output string.
movl (sp)+,(sp) ; Set actual buffer size.
pushaq (sp) ; -> Output buffer descr.
calls #1,g^lib$put_output ; Output message.
$exit_s - ; Exit with severe error.
code=#4
.endc ; novector
.end
|