From 93fe6e13a379c5902dddb49f3d331e795d049a43 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sat, 16 Sep 2000 23:31:03 +0000 Subject: [PATCH 01/15] Add BIO_seek() and BIO_tell() to the BIO control functions manual. --- doc/crypto/BIO_ctrl.pod | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/crypto/BIO_ctrl.pod b/doc/crypto/BIO_ctrl.pod index 421a3ac1cb..acc46db8ce 100644 --- a/doc/crypto/BIO_ctrl.pod +++ b/doc/crypto/BIO_ctrl.pod @@ -3,9 +3,9 @@ =head1 NAME BIO_ctrl, BIO_callback_ctrl, BIO_ptr_ctrl, BIO_int_ctrl, BIO_reset, -BIO_flush, BIO_eof, BIO_set_close, BIO_get_close, BIO_pending, -BIO_wpending, BIO_ctrl_pending, BIO_ctrl_wpending, BIO_get_info_callback, -BIO_set_info_callback - BIO control operations +BIO_seek, BIO_tell, BIO_flush, BIO_eof, BIO_set_close, BIO_get_close, +BIO_pending, BIO_wpending, BIO_ctrl_pending, BIO_ctrl_wpending, +BIO_get_info_callback, BIO_set_info_callback - BIO control operations =head1 SYNOPSIS @@ -17,6 +17,8 @@ BIO_set_info_callback - BIO control operations long BIO_int_ctrl(BIO *bp,int cmd,long larg,int iarg); int BIO_reset(BIO *b); + int BIO_seek(BIO *b, int ofs); + int BIO_tell(BIO *b); int BIO_flush(BIO *b); int BIO_eof(BIO *b); int BIO_set_close(BIO *b,long flag); @@ -41,8 +43,14 @@ specific to a particular type of BIO are described in the specific BIOs manual page as well as any special features of the standard calls. -BIO_reset() typically reset a BIO to some initial state, in the case -of file related BIOs for example it rewinds the file pointer. +BIO_reset() typically resets a BIO to some initial state, in the case +of file related BIOs for example it rewinds the file pointer to the +start of the file. + +BIO_seek() resets a file related BIO's file position pointer to B +bytes from start of file. + +BIO_tell() returns the current file position of a file related BIO. BIO_flush() normally writes out any internally buffered data, in some cases it is used to signal EOF and that no more data will be written. @@ -67,6 +75,9 @@ macros which call BIO_ctrl(). BIO_reset() returns 1 for success and 0 for failure. +BIO_seek() and BIO_tell() both return the current file position on success +and -1 for failure. + BIO_flush() returns 1 for success and 0 or -1 for failure. BIO_eof() returns 1 if EOF has been reached 0 otherwise. From da542e1bf762507bc6630847e4c3dd18de81359d Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sat, 16 Sep 2000 23:32:33 +0000 Subject: [PATCH 02/15] Move text that isn't really descriptions of the functions in the page to the NOTES section, and add references to the functions mentioned (and perhaps a few more). --- doc/crypto/BIO_s_fd.pod | 29 ++++++++++++++--------------- doc/crypto/BIO_s_file.pod | 35 ++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/doc/crypto/BIO_s_fd.pod b/doc/crypto/BIO_s_fd.pod index 4681e5e13b..61789fd2c1 100644 --- a/doc/crypto/BIO_s_fd.pod +++ b/doc/crypto/BIO_s_fd.pod @@ -20,15 +20,6 @@ BIO_s_fd, BIO_set_fd, BIO_get_fd, BIO_new_fd - file descriptor BIO BIO_s_fd() returns the file descriptor BIO method. This is a wrapper round the platforms file descriptor routines such as read() and write(). -BIO_read() and BIO_write() read or write the underlying descriptor. -BIO_puts() is supported but BIO_gets() is not. - -If the close flag is set then then close() is called on the underlying -file descriptor when the BIO is freed. - -BIO_reset() attempts to change the file pointer to the start of file -using lseek(fd, 0, 0). - BIO_set_fd() sets the file descriptor of BIO B to B and the close flag to B. @@ -40,6 +31,15 @@ BIO_new_fd() returns a file descriptor BIO using B and B. =head1 NOTES +If the close flag is set then then close() is called on the underlying +file descriptor when the BIO is freed. + +BIO_reset() attempts to change the file pointer to the start of file +using lseek(fd, 0, 0). + +BIO_read() and BIO_write() read or write the underlying descriptor. +BIO_puts() is supported but BIO_gets() is not. + The behaviour of BIO_read() and BIO_write() depends on the behavior of the platforms read() and write() calls on the descriptor. If the underlying file descriptor is in a non blocking mode then the BIO will behave in the @@ -53,11 +53,6 @@ instead. BIO_s_fd() returns the file descriptor BIO method. -BIO_reset() returns zero for success and -1 if an error occurred. -BIO_seek() and BIO_tell() return the current file position or -1 -is an error occurred. These values reflect the underlying lseek() -behaviour. - BIO_set_fd() always returns 1. BIO_get_fd() returns the file descriptor or -1 if the BIO has not @@ -77,4 +72,8 @@ This is a file descriptor BIO version of "Hello World": =head1 SEE ALSO -L, L, TBA +L, L, +L, L, +L, L, +L, L, +L, L diff --git a/doc/crypto/BIO_s_file.pod b/doc/crypto/BIO_s_file.pod index 10fe4933c7..166734b0bc 100644 --- a/doc/crypto/BIO_s_file.pod +++ b/doc/crypto/BIO_s_file.pod @@ -28,20 +28,6 @@ BIO_s_file() returns the BIO file method. As its name implies it is a wrapper round the stdio FILE structure and it is a source/sink BIO. -Calls to BIO_read() and BIO_write() read and write data to the -underlying stream. BIO_gets() and BIO_puts() are supported on file BIOs. - -BIO_flush() on a file BIO calls the fflush() function on the wrapped -stream. - -BIO_reset() on a file BIO calls fseek() to reset the position indicator -to the start of the file. - -BIO_eof() calls feof(). - -Setting the BIO_CLOSE flag calls fclose() on the stream when the BIO -is freed. - BIO_new_file() creates a new file BIO with mode B the meaning of B is the same as the stdio function fopen(). The BIO_CLOSE flag is set on the returned BIO. @@ -62,6 +48,20 @@ reading, writing, append or read write respectively. =head1 NOTES +Calls to BIO_read() and BIO_write() read and write data to the +underlying stream. BIO_gets() and BIO_puts() are supported on file BIOs. + +BIO_flush() on a file BIO calls the fflush() function on the wrapped +stream. + +BIO_reset() on a file BIO calls fseek() to reset the position indicator +to the start of the file. + +BIO_eof() calls feof(). + +Setting the BIO_CLOSE flag calls fclose() on the stream when the BIO +is freed. + When wrapping stdout, stdin or stderr the underlying stream should not normally be closed so the BIO_NOCLOSE flag should be set. @@ -116,4 +116,9 @@ BIO_rw_filename() return 1 for success or 0 for failure. =head1 SEE ALSO -L, L, TBA +L, L, +L, L, +L, +L, L, +L, L, +L, L From 07fcf422a1cb184ceda785e9aaed33c9d354bdcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bodo=20M=C3=B6ller?= Date: Sun, 17 Sep 2000 01:23:53 +0000 Subject: [PATCH 03/15] Rename new BIO_set_shutdown_wr macro to just BIO_shutdown_wr (it's similar to the shutdown(..., SHUT_WR) system call for sockets). --- CHANGES | 3 +++ crypto/bio/bio.h | 2 +- doc/crypto/BIO_s_bio.pod | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES b/CHANGES index d47658a203..67b0f565d1 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,9 @@ Changes between 0.9.5a and 0.9.6 [xx XXX 2000] + *) New BIO_shutdown_wr macro, which invokes the BIO_C_SHUTDOWN_WR + BIO_ctrl (for BIO pairs). + *) Add DSO method for VMS. [Richard Levitte] diff --git a/crypto/bio/bio.h b/crypto/bio/bio.h index 137fd0de48..b7ab206777 100644 --- a/crypto/bio/bio.h +++ b/crypto/bio/bio.h @@ -474,7 +474,7 @@ size_t BIO_ctrl_wpending(BIO *b); #define BIO_get_write_buf_size(b,size) (size_t)BIO_ctrl(b,BIO_C_GET_WRITE_BUF_SIZE,size,NULL) #define BIO_make_bio_pair(b1,b2) (int)BIO_ctrl(b1,BIO_C_MAKE_BIO_PAIR,0,b2) #define BIO_destroy_bio_pair(b) (int)BIO_ctrl(b,BIO_C_DESTROY_BIO_PAIR,0,NULL) -#define BIO_set_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) +#define BIO_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) /* macros with inappropriate type -- but ...pending macros use int too: */ #define BIO_get_write_guarantee(b) (int)BIO_ctrl(b,BIO_C_GET_WRITE_GUARANTEE,0,NULL) #define BIO_get_read_request(b) (int)BIO_ctrl(b,BIO_C_GET_READ_REQUEST,0,NULL) diff --git a/doc/crypto/BIO_s_bio.pod b/doc/crypto/BIO_s_bio.pod index cf357c669d..95ae802e47 100644 --- a/doc/crypto/BIO_s_bio.pod +++ b/doc/crypto/BIO_s_bio.pod @@ -2,7 +2,7 @@ =head1 NAME -BIO_s_bio, BIO_make_bio_pair, BIO_destroy_bio_pair, BIO_set_shutdown_wr, +BIO_s_bio, BIO_make_bio_pair, BIO_destroy_bio_pair, BIO_shutdown_wr, BIO_set_write_buf_size, BIO_get_write_buf_size, BIO_new_bio_pair, BIO_get_write_guarantee, BIO_ctrl_get_write_guarantee, BIO_get_read_request, BIO_ctrl_get_read_request, BIO_ctrl_reset_read_request - BIO pair BIO @@ -16,7 +16,7 @@ BIO_ctrl_get_read_request, BIO_ctrl_reset_read_request - BIO pair BIO #define BIO_make_bio_pair(b1,b2) (int)BIO_ctrl(b1,BIO_C_MAKE_BIO_PAIR,0,b2) #define BIO_destroy_bio_pair(b) (int)BIO_ctrl(b,BIO_C_DESTROY_BIO_PAIR,0,NULL) - #define BIO_set_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) + #define BIO_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) #define BIO_set_write_buf_size(b,size) (int)BIO_ctrl(b,BIO_C_SET_WRITE_BUF_SIZE,size,NULL) #define BIO_get_write_buf_size(b,size) (size_t)BIO_ctrl(b,BIO_C_GET_WRITE_BUF_SIZE,size,NULL) @@ -62,7 +62,7 @@ BIO_make_bio_pair() joins two separate BIOs into a connected pair. BIO_destroy_pair() destroys the association between two connected BIOs. Freeing up any half of the pair will automatically destroy the association. -BIO_set_shutdown_wr() is used to close down a BIO B. After this call no further +BIO_shutdown_wr() is used to close down a BIO B. After this call no further writes on BIO B are allowed (they will return an error). Reads on the other half of the pair will return any pending data or EOF when all pending data has been read. From 8e913bff55adfb2b2f0ee9e3065b1432bb5d59bc Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 11:56:53 +0000 Subject: [PATCH 04/15] VMS didn't work out too well... --- STATUS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/STATUS b/STATUS index af8f921890..77d0ff1b5c 100644 --- a/STATUS +++ b/STATUS @@ -1,6 +1,6 @@ OpenSSL STATUS Last modified at - ______________ $Date: 2000/09/13 12:14:39 $ + ______________ $Date: 2000/09/17 11:56:53 $ DEVELOPMENT STATE @@ -35,6 +35,8 @@ MingW32 - failed thelp32.h aix-gcc (AIX 4.3.2) - passed + VMS/Alpha - failed + Some things were missing [FIXED] o OpenSSL 0.9.5a: Released on April 1st, 2000 o OpenSSL 0.9.5: Released on February 28th, 2000 o OpenSSL 0.9.4: Released on August 09th, 1999 From 282f92f7dd52deeb853c0627e5ece940ce3f5f79 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 11:57:33 +0000 Subject: [PATCH 05/15] Inform the VMS people that RSAref is no longer needed --- INSTALL.VMS | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/INSTALL.VMS b/INSTALL.VMS index 0a25324033..1fe78a41bb 100644 --- a/INSTALL.VMS +++ b/INSTALL.VMS @@ -82,12 +82,17 @@ directory. The syntax is trhe following: RSAREF compile using the RSAREF Library NORSAREF compile without using RSAREF -Note 1: The RSAREF libraries are NOT INCLUDED and you have to - download it from "ftp://ftp.rsa.com/rsaref". You have to - get the ".tar-Z" file as the ".zip" file doesn't have the - directory structure stored. You have to extract the file - into the [.RSAREF] directory as that is where the scripts - will look for the files. +Note 0: The RASREF library IS NO LONGER NEEDED. The RSA patent + expires September 20, 2000, and RSA Security chose to make + the algorithm public domain two weeks before that. + +Note 1: If you still want to use RSAREF, the library is NOT INCLUDED + and you have to download it. RSA Security doesn't carry it + any more, but there are a number of places where you can find + it. You have to get the ".tar-Z" file as the ".zip" file + doesn't have the directory structure stored. You have to + extract the file into the [.RSAREF] directory as that is where + the scripts will look for the files. Note 2: I have never done this, so I've no idea if it works or not. @@ -129,7 +134,7 @@ Currently, the logical names supported are: used. This is good to try if something doesn't work. OPENSSL_NO_'alg' with value YES, the corresponding crypto algorithm will not be implemented. Supported algorithms to - do this with are: RSA, DSA, DH, MD2, MD5, RIPEMD, + do this with are: RSA, DSA, DH, MD2, MD4, MD5, RIPEMD, SHA, DES, MDC2, CR2, RC4, RC5, IDEA, BF, CAST, HMAC, SSL2. So, for example, having the logical name OPENSSL_NO_RSA with the value YES means that the From 3aa477f6ec8f184ea72eca19826c92abc488ad12 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 14:42:46 +0000 Subject: [PATCH 06/15] Make sure Compaq C doesn'r complain about dollars, and go around the incompatibility between function and data pointers. --- crypto/dso/dso_vms.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/crypto/dso/dso_vms.c b/crypto/dso/dso_vms.c index 58b9027397..d2c06ddafa 100644 --- a/crypto/dso/dso_vms.c +++ b/crypto/dso/dso_vms.c @@ -60,6 +60,7 @@ #include #include #ifdef VMS +#pragma message disable DOLLARID #include #include #include @@ -255,13 +256,13 @@ static int do_find_symbol(DSO_VMS_INTERNAL *ptr, 0, flags); } -static void *vms_bind_sym(DSO *dso, const char *symname) +void vms_bind_sym(DSO *dso, const char *symname, void **sym) { DSO_VMS_INTERNAL *ptr; - void *sym = 0; int status; int flags = LIB$M_FIS_MIXEDCASE; struct dsc$descriptor_s symname_dsc; + *sym = NULL; symname_dsc.dsc$w_length = strlen(symname); symname_dsc.dsc$b_dtype = DSC$K_DTYPE_T; @@ -271,24 +272,24 @@ static void *vms_bind_sym(DSO *dso, const char *symname) if((dso == NULL) || (symname == NULL)) { DSOerr(DSO_F_VMS_BIND_VAR,ERR_R_PASSED_NULL_PARAMETER); - return(NULL); + return; } if(sk_num(dso->meth_data) < 1) { DSOerr(DSO_F_VMS_BIND_VAR,DSO_R_STACK_ERROR); - return(NULL); + return; } ptr = (DSO_VMS_INTERNAL *)sk_value(dso->meth_data, sk_num(dso->meth_data) - 1); if(ptr == NULL) { DSOerr(DSO_F_VMS_BIND_VAR,DSO_R_NULL_HANDLE); - return(NULL); + return; } if(dso->flags & DSO_FLAG_UPCASE_SYMBOL) flags = 0; - status = do_find_symbol(ptr, &symname_dsc, &sym, flags); + status = do_find_symbol(ptr, &symname_dsc, sym, flags); if(!$VMS_STATUS_SUCCESS(status)) { @@ -301,6 +302,8 @@ static void *vms_bind_sym(DSO *dso, const char *symname) errstring_dsc.dsc$b_class = DSC$K_CLASS_S; errstring_dsc.dsc$a_pointer = errstring; + *sym = NULL; + status = sys$getmsg(status, &length, &errstring_dsc, 1, 0); if (!$VMS_STATUS_SUCCESS(status)) @@ -322,19 +325,23 @@ static void *vms_bind_sym(DSO *dso, const char *symname) " in ", ptr->filename, ": ", errstring); } - return(NULL); + return; } - return(sym); + return; } static void *vms_bind_var(DSO *dso, const char *symname) { - return vms_bind_sym(dso, symname); + void *sym = 0; + vms_bind_sym(dso, symname, &sym); + return sym; } static DSO_FUNC_TYPE vms_bind_func(DSO *dso, const char *symname) { - return (DSO_FUNC_TYPE)vms_bind_sym(dso, symname); + DSO_FUNC_TYPE sym = 0; + vms_bind_sym(dso, symname, &sym); + return sym; } static long vms_ctrl(DSO *dso, int cmd, long larg, void *parg) From df320c4473c7addf2b928849ecb8a116cf95af2f Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 14:44:35 +0000 Subject: [PATCH 07/15] A couple of more names need to be shortened for VMS on VAX. --- crypto/symhacks.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/symhacks.h b/crypto/symhacks.h index 5b66d0f1ba..6b472b2f4c 100644 --- a/crypto/symhacks.h +++ b/crypto/symhacks.h @@ -137,6 +137,10 @@ #define SSL_CTX_set_cert_verify_callback SSL_CTX_set_cert_verify_cb #define SSL_CTX_set_default_passwd_cb_userdata SSL_CTX_set_def_passwd_cb_ud +/* Hack some long ENGINE names */ +#define ENGINE_get_default_BN_mod_exp_crt ENGINE_get_def_BN_mod_exp_crt +#define ENGINE_set_default_BN_mod_exp_crt ENGINE_set_def_BN_mod_exp_crt + #endif /* defined VMS */ From 60dae9985d0120c62552776919ca474266649908 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 14:46:09 +0000 Subject: [PATCH 08/15] Some platforms define NULL as ((void *)0). Unfortunately, a void* can't be used as a function pointer according the the standards. Use a 0 instead and there will be no trouble. --- crypto/stack/stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/stack/stack.c b/crypto/stack/stack.c index 2eb531fd0d..02857f0446 100644 --- a/crypto/stack/stack.c +++ b/crypto/stack/stack.c @@ -111,7 +111,7 @@ err: STACK *sk_new_null(void) { - return sk_new((int (*)(const char * const *, const char * const *))NULL); + return sk_new((int (*)(const char * const *, const char * const *))0); } STACK *sk_new(int (*c)(const char * const *, const char * const *)) From 6d50071e84b3ce6e7ddfe11af0b9fab50f31f3ea Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 15:41:24 +0000 Subject: [PATCH 09/15] Tell users that a rewrite might be a good idea. --- util/mkdef.pl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/util/mkdef.pl b/util/mkdef.pl index 5bc02355ff..318d3a8aed 100755 --- a/util/mkdef.pl +++ b/util/mkdef.pl @@ -179,6 +179,7 @@ $crypto.=" crypto/dsa/dsa.h" unless $no_dsa; $crypto.=" crypto/dh/dh.h" unless $no_dh; $crypto.=" crypto/hmac/hmac.h" unless $no_hmac; +$crypto.=" crypto/engine/engine.h"; $crypto.=" crypto/stack/stack.h"; $crypto.=" crypto/buffer/buffer.h"; $crypto.=" crypto/bio/bio.h"; @@ -622,6 +623,9 @@ sub maybe_add_info { } if ($new_info) { print STDERR "$new_info old symbols got an info update\n"; + if (!$do_rewrite) { + print STDERR "You should do a rewrite to fix this.\n"; + } } else { print STDERR "No old symbols needed info update\n"; } From 5ef8093732ab22266a69043e1209be80eb8b83c2 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 15:45:43 +0000 Subject: [PATCH 10/15] Oops, no engine in the main trunk. --- util/mkdef.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/util/mkdef.pl b/util/mkdef.pl index 318d3a8aed..b673e05a7b 100755 --- a/util/mkdef.pl +++ b/util/mkdef.pl @@ -179,7 +179,6 @@ $crypto.=" crypto/dsa/dsa.h" unless $no_dsa; $crypto.=" crypto/dh/dh.h" unless $no_dh; $crypto.=" crypto/hmac/hmac.h" unless $no_hmac; -$crypto.=" crypto/engine/engine.h"; $crypto.=" crypto/stack/stack.h"; $crypto.=" crypto/buffer/buffer.h"; $crypto.=" crypto/bio/bio.h"; From 623eea376a7bff6d50c88407c10f279e6f838de4 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 18:08:38 +0000 Subject: [PATCH 11/15] siglen is unsigned, so comparing it to less than 0 is silly, and generates a compiler warning with Compaq C. --- apps/dgst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/dgst.c b/apps/dgst.c index a151fb8457..5d5ab94aea 100644 --- a/apps/dgst.c +++ b/apps/dgst.c @@ -280,7 +280,7 @@ int MAIN(int argc, char **argv) } siglen = BIO_read(sigbio, sigbuf, siglen); BIO_free(sigbio); - if(siglen <= 0) { + if(siglen == 0) { BIO_printf(bio_err, "Error reading signature file %s\n", sigfile); ERR_print_errors(bio_err); From 62324627aa3309e4f72e3ff0241801f4286fa242 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 18:21:27 +0000 Subject: [PATCH 12/15] Use sk_*_new_null() instead of sk_*_new(NULL), since that takes care of complaints from the compiler about data pointers and function pointers not being compatible with each other. --- apps/apps.c | 2 +- apps/crl2p7.c | 6 +++--- apps/nseq.c | 2 +- apps/pkcs12.c | 10 +++++----- crypto/asn1/a_set.c | 2 +- crypto/dso/dso_lib.c | 4 ++-- crypto/pkcs12/p12_attr.c | 12 ++++++------ crypto/pkcs12/p12_crt.c | 6 +++--- crypto/pkcs12/p12_kiss.c | 2 +- crypto/pkcs12/p12_npas.c | 2 +- crypto/pkcs7/pk7_doit.c | 2 +- crypto/pkcs7/pk7_mime.c | 2 +- crypto/pkcs7/pk7_smime.c | 4 ++-- crypto/x509v3/v3_akey.c | 2 +- crypto/x509v3/v3_alt.c | 6 +++--- crypto/x509v3/v3_crld.c | 2 +- crypto/x509v3/v3_extku.c | 2 +- crypto/x509v3/v3_genn.c | 2 +- crypto/x509v3/v3_info.c | 4 ++-- crypto/x509v3/v3_utl.c | 2 +- ssl/ssl_cert.c | 2 +- ssl/ssl_ciph.c | 2 +- ssl/ssl_lib.c | 2 +- 23 files changed, 41 insertions(+), 41 deletions(-) diff --git a/apps/apps.c b/apps/apps.c index e5a42d4d97..c22550b294 100644 --- a/apps/apps.c +++ b/apps/apps.c @@ -660,7 +660,7 @@ STACK_OF(X509) *load_certs(BIO *err, char *file, int format) if (format == FORMAT_PEM) { - othercerts = sk_X509_new(NULL); + othercerts = sk_X509_new_null(); if(!othercerts) { sk_X509_free(othercerts); diff --git a/apps/crl2p7.c b/apps/crl2p7.c index ebf5fe90ec..d02862710d 100644 --- a/apps/crl2p7.c +++ b/apps/crl2p7.c @@ -141,7 +141,7 @@ int MAIN(int argc, char **argv) else if (strcmp(*argv,"-certfile") == 0) { if (--argc < 1) goto bad; - if(!certflst) certflst = sk_new(NULL); + if(!certflst) certflst = sk_new_null(); sk_push(certflst,*(++argv)); } else @@ -215,7 +215,7 @@ bad: p7s->contents->type=OBJ_nid2obj(NID_pkcs7_data); if (!ASN1_INTEGER_set(p7s->version,1)) goto end; - if ((crl_stack=sk_X509_CRL_new(NULL)) == NULL) goto end; + if ((crl_stack=sk_X509_CRL_new_null()) == NULL) goto end; p7s->crl=crl_stack; if (crl != NULL) { @@ -223,7 +223,7 @@ bad: crl=NULL; /* now part of p7 for OPENSSL_freeing */ } - if ((cert_stack=sk_X509_new(NULL)) == NULL) goto end; + if ((cert_stack=sk_X509_new_null()) == NULL) goto end; p7s->cert=cert_stack; if(certflst) for(i = 0; i < sk_num(certflst); i++) { diff --git a/apps/nseq.c b/apps/nseq.c index cc88d50ceb..7210fbdb5e 100644 --- a/apps/nseq.c +++ b/apps/nseq.c @@ -123,7 +123,7 @@ int MAIN(int argc, char **argv) if (toseq) { seq = NETSCAPE_CERT_SEQUENCE_new(); - seq->certs = sk_X509_new(NULL); + seq->certs = sk_X509_new_null(); while((x509 = PEM_read_bio_X509(in, NULL, NULL, NULL))) sk_X509_push(seq->certs,x509); diff --git a/apps/pkcs12.c b/apps/pkcs12.c index 0f3ac4977a..261139565d 100644 --- a/apps/pkcs12.c +++ b/apps/pkcs12.c @@ -197,7 +197,7 @@ int MAIN(int argc, char **argv) } else if (!strcmp (*args, "-caname")) { if (args[1]) { args++; - if (!canames) canames = sk_new(NULL); + if (!canames) canames = sk_new_null(); sk_push(canames, *args); } else badarg = 1; } else if (!strcmp (*args, "-in")) { @@ -404,7 +404,7 @@ int MAIN(int argc, char **argv) CRYPTO_push_info("reading certs from input"); #endif - certs = sk_X509_new(NULL); + certs = sk_X509_new_null(); /* Load in all certs in input file */ if(!cert_load(in, certs)) { @@ -436,7 +436,7 @@ int MAIN(int argc, char **argv) CRYPTO_push_info("reading certs from certfile"); #endif - bags = sk_PKCS12_SAFEBAG_new (NULL); + bags = sk_PKCS12_SAFEBAG_new_null (); /* Add any more certificates asked for */ if (certsin) { @@ -527,7 +527,7 @@ int MAIN(int argc, char **argv) goto export_end; } - safes = sk_PKCS7_new (NULL); + safes = sk_PKCS7_new_null (); sk_PKCS7_push (safes, authsafe); #ifdef CRYPTO_MDEBUG @@ -543,7 +543,7 @@ int MAIN(int argc, char **argv) p8 = NULL; if (name) PKCS12_add_friendlyname (bag, name, -1); PKCS12_add_localkeyid (bag, keyid, keyidlen); - bags = sk_PKCS12_SAFEBAG_new(NULL); + bags = sk_PKCS12_SAFEBAG_new_null(); sk_PKCS12_SAFEBAG_push (bags, bag); #ifdef CRYPTO_MDEBUG diff --git a/crypto/asn1/a_set.c b/crypto/asn1/a_set.c index 1921f5eaa1..caf5a1419c 100644 --- a/crypto/asn1/a_set.c +++ b/crypto/asn1/a_set.c @@ -158,7 +158,7 @@ STACK *d2i_ASN1_SET(STACK **a, unsigned char **pp, long length, STACK *ret=NULL; if ((a == NULL) || ((*a) == NULL)) - { if ((ret=sk_new(NULL)) == NULL) goto err; } + { if ((ret=sk_new_null()) == NULL) goto err; } else ret=(*a); diff --git a/crypto/dso/dso_lib.c b/crypto/dso/dso_lib.c index d1d50cc5e6..acd166697e 100644 --- a/crypto/dso/dso_lib.c +++ b/crypto/dso/dso_lib.c @@ -107,8 +107,8 @@ DSO *DSO_new_method(DSO_METHOD *meth) return(NULL); } memset(ret, 0, sizeof(DSO)); - ret->meth_data = sk_new(NULL); - if((ret->meth_data = sk_new(NULL)) == NULL) + ret->meth_data = sk_new_null(); + if((ret->meth_data = sk_new_null()) == NULL) { /* sk_new doesn't generate any errors so we do */ DSOerr(DSO_F_DSO_NEW_METHOD,ERR_R_MALLOC_FAILURE); diff --git a/crypto/pkcs12/p12_attr.c b/crypto/pkcs12/p12_attr.c index b370c9cf3f..f1a210b5d2 100644 --- a/crypto/pkcs12/p12_attr.c +++ b/crypto/pkcs12/p12_attr.c @@ -87,13 +87,13 @@ int PKCS12_add_localkeyid (PKCS12_SAFEBAG *bag, unsigned char *name, return 0; } attrib->object = OBJ_nid2obj(NID_localKeyID); - if (!(attrib->value.set = sk_ASN1_TYPE_new(NULL))) { + if (!(attrib->value.set = sk_ASN1_TYPE_new_null())) { PKCS12err(PKCS12_F_PKCS12_ADD_LOCALKEYID, ERR_R_MALLOC_FAILURE); return 0; } sk_ASN1_TYPE_push (attrib->value.set,keyid); attrib->set = 1; - if (!bag->attrib && !(bag->attrib = sk_X509_ATTRIBUTE_new (NULL))) { + if (!bag->attrib && !(bag->attrib = sk_X509_ATTRIBUTE_new_null ())) { PKCS12err(PKCS12_F_PKCS12_ADD_LOCALKEYID, ERR_R_MALLOC_FAILURE); return 0; } @@ -129,14 +129,14 @@ int PKCS8_add_keyusage (PKCS8_PRIV_KEY_INFO *p8, int usage) return 0; } attrib->object = OBJ_nid2obj(NID_key_usage); - if (!(attrib->value.set = sk_ASN1_TYPE_new(NULL))) { + if (!(attrib->value.set = sk_ASN1_TYPE_new_null())) { PKCS12err(PKCS12_F_PKCS8_ADD_KEYUSAGE, ERR_R_MALLOC_FAILURE); return 0; } sk_ASN1_TYPE_push (attrib->value.set,keyid); attrib->set = 1; if (!p8->attributes - && !(p8->attributes = sk_X509_ATTRIBUTE_new (NULL))) { + && !(p8->attributes = sk_X509_ATTRIBUTE_new_null ())) { PKCS12err(PKCS12_F_PKCS8_ADD_KEYUSAGE, ERR_R_MALLOC_FAILURE); return 0; } @@ -195,14 +195,14 @@ int PKCS12_add_friendlyname_uni (PKCS12_SAFEBAG *bag, return 0; } attrib->object = OBJ_nid2obj(NID_friendlyName); - if (!(attrib->value.set = sk_ASN1_TYPE_new(NULL))) { + if (!(attrib->value.set = sk_ASN1_TYPE_new_null())) { PKCS12err(PKCS12_F_PKCS12_ADD_FRIENDLYNAME, ERR_R_MALLOC_FAILURE); return 0; } sk_ASN1_TYPE_push (attrib->value.set,fname); attrib->set = 1; - if (!bag->attrib && !(bag->attrib = sk_X509_ATTRIBUTE_new (NULL))) { + if (!bag->attrib && !(bag->attrib = sk_X509_ATTRIBUTE_new_null ())) { PKCS12err(PKCS12_F_PKCS12_ADD_FRIENDLYNAME_UNI, ERR_R_MALLOC_FAILURE); return 0; diff --git a/crypto/pkcs12/p12_crt.c b/crypto/pkcs12/p12_crt.c index 5641a00898..a8f7b48882 100644 --- a/crypto/pkcs12/p12_crt.c +++ b/crypto/pkcs12/p12_crt.c @@ -88,7 +88,7 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert, if(!X509_check_private_key(cert, pkey)) return NULL; - if(!(bags = sk_PKCS12_SAFEBAG_new (NULL))) { + if(!(bags = sk_PKCS12_SAFEBAG_new_null ())) { PKCS12err(PKCS12_F_PKCS12_CREATE,ERR_R_MALLOC_FAILURE); return NULL; } @@ -123,7 +123,7 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert, if (!authsafe) return NULL; - if(!(safes = sk_PKCS7_new (NULL)) + if(!(safes = sk_PKCS7_new_null ()) || !sk_PKCS7_push(safes, authsafe)) { PKCS12err(PKCS12_F_PKCS12_CREATE,ERR_R_MALLOC_FAILURE); return NULL; @@ -137,7 +137,7 @@ PKCS12 *PKCS12_create(char *pass, char *name, EVP_PKEY *pkey, X509 *cert, PKCS8_PRIV_KEY_INFO_free(p8); if (name && !PKCS12_add_friendlyname (bag, name, -1)) return NULL; if(!PKCS12_add_localkeyid (bag, keyid, keyidlen)) return NULL; - if(!(bags = sk_PKCS12_SAFEBAG_new(NULL)) + if(!(bags = sk_PKCS12_SAFEBAG_new_null()) || !sk_PKCS12_SAFEBAG_push (bags, bag)) { PKCS12err(PKCS12_F_PKCS12_CREATE,ERR_R_MALLOC_FAILURE); return NULL; diff --git a/crypto/pkcs12/p12_kiss.c b/crypto/pkcs12/p12_kiss.c index 368c98765c..1fbbd6c99f 100644 --- a/crypto/pkcs12/p12_kiss.c +++ b/crypto/pkcs12/p12_kiss.c @@ -93,7 +93,7 @@ int PKCS12_parse (PKCS12 *p12, const char *pass, EVP_PKEY **pkey, X509 **cert, /* Allocate stack for ca certificates if needed */ if ((ca != NULL) && (*ca == NULL)) { - if (!(*ca = sk_X509_new(NULL))) { + if (!(*ca = sk_X509_new_null())) { PKCS12err(PKCS12_F_PKCS12_PARSE,ERR_R_MALLOC_FAILURE); return 0; } diff --git a/crypto/pkcs12/p12_npas.c b/crypto/pkcs12/p12_npas.c index eed494a3f3..84e31a7f21 100644 --- a/crypto/pkcs12/p12_npas.c +++ b/crypto/pkcs12/p12_npas.c @@ -114,7 +114,7 @@ static int newpass_p12(PKCS12 *p12, char *oldpass, char *newpass) unsigned int maclen; if (!(asafes = M_PKCS12_unpack_authsafes(p12))) return 0; - if(!(newsafes = sk_PKCS7_new(NULL))) return 0; + if(!(newsafes = sk_PKCS7_new_null())) return 0; for (i = 0; i < sk_PKCS7_num (asafes); i++) { p7 = sk_PKCS7_value(asafes, i); bagnid = OBJ_obj2nid(p7->type); diff --git a/crypto/pkcs7/pk7_doit.c b/crypto/pkcs7/pk7_doit.c index 2768247818..099e9651c1 100644 --- a/crypto/pkcs7/pk7_doit.c +++ b/crypto/pkcs7/pk7_doit.c @@ -909,7 +909,7 @@ static int add_attribute(STACK_OF(X509_ATTRIBUTE) **sk, int nid, int atrtype, if (*sk == NULL) { - *sk = sk_X509_ATTRIBUTE_new(NULL); + *sk = sk_X509_ATTRIBUTE_new_null(); new_attrib: attr=X509_ATTRIBUTE_create(nid,atrtype,value); sk_X509_ATTRIBUTE_push(*sk,attr); diff --git a/crypto/pkcs7/pk7_mime.c b/crypto/pkcs7/pk7_mime.c index a7b6929436..994473c0bd 100644 --- a/crypto/pkcs7/pk7_mime.c +++ b/crypto/pkcs7/pk7_mime.c @@ -372,7 +372,7 @@ static int multi_split(BIO *bio, char *bound, STACK_OF(BIO) **ret) part = 0; state = 0; first = 1; - parts = sk_BIO_new(NULL); + parts = sk_BIO_new_null(); *ret = parts; while ((len = BIO_gets(bio, linebuf, MAX_SMLEN)) > 0) { state = mime_bound_check(linebuf, len, bound, blen); diff --git a/crypto/pkcs7/pk7_smime.c b/crypto/pkcs7/pk7_smime.c index c8cd5a7f73..2ececcd07e 100644 --- a/crypto/pkcs7/pk7_smime.c +++ b/crypto/pkcs7/pk7_smime.c @@ -111,7 +111,7 @@ PKCS7 *PKCS7_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, /* Add SMIMECapabilities */ if(!(flags & PKCS7_NOSMIMECAP)) { - if(!(smcap = sk_X509_ALGOR_new(NULL))) { + if(!(smcap = sk_X509_ALGOR_new_null())) { PKCS7err(PKCS7_F_PKCS7_SIGN,ERR_R_MALLOC_FAILURE); return NULL; } @@ -285,7 +285,7 @@ STACK_OF(X509) *PKCS7_get0_signers(PKCS7 *p7, STACK_OF(X509) *certs, int flags) PKCS7err(PKCS7_F_PKCS7_GET0_SIGNERS,PKCS7_R_WRONG_CONTENT_TYPE); return NULL; } - if(!(signers = sk_X509_new(NULL))) { + if(!(signers = sk_X509_new_null())) { PKCS7err(PKCS7_F_PKCS7_GET0_SIGNERS,ERR_R_MALLOC_FAILURE); return NULL; } diff --git a/crypto/x509v3/v3_akey.c b/crypto/x509v3/v3_akey.c index 877ae7da7b..0889a18993 100644 --- a/crypto/x509v3/v3_akey.c +++ b/crypto/x509v3/v3_akey.c @@ -224,7 +224,7 @@ if((issuer && !ikeyid) || (issuer == 2)) { if(!(akeyid = AUTHORITY_KEYID_new())) goto err; if(isname) { - if(!(gens = sk_GENERAL_NAME_new(NULL)) || !(gen = GENERAL_NAME_new()) + if(!(gens = sk_GENERAL_NAME_new_null()) || !(gen = GENERAL_NAME_new()) || !sk_GENERAL_NAME_push(gens, gen)) { X509V3err(X509V3_F_V2I_AUTHORITY_KEYID,ERR_R_MALLOC_FAILURE); goto err; diff --git a/crypto/x509v3/v3_alt.c b/crypto/x509v3/v3_alt.c index 5ccd1e0e3d..733919f250 100644 --- a/crypto/x509v3/v3_alt.c +++ b/crypto/x509v3/v3_alt.c @@ -160,7 +160,7 @@ static STACK_OF(GENERAL_NAME) *v2i_issuer_alt(X509V3_EXT_METHOD *method, STACK_OF(GENERAL_NAME) *gens = NULL; CONF_VALUE *cnf; int i; - if(!(gens = sk_GENERAL_NAME_new(NULL))) { + if(!(gens = sk_GENERAL_NAME_new_null())) { X509V3err(X509V3_F_V2I_GENERAL_NAMES,ERR_R_MALLOC_FAILURE); return NULL; } @@ -225,7 +225,7 @@ static STACK_OF(GENERAL_NAME) *v2i_subject_alt(X509V3_EXT_METHOD *method, STACK_OF(GENERAL_NAME) *gens = NULL; CONF_VALUE *cnf; int i; - if(!(gens = sk_GENERAL_NAME_new(NULL))) { + if(!(gens = sk_GENERAL_NAME_new_null())) { X509V3err(X509V3_F_V2I_GENERAL_NAMES,ERR_R_MALLOC_FAILURE); return NULL; } @@ -304,7 +304,7 @@ STACK_OF(GENERAL_NAME) *v2i_GENERAL_NAMES(X509V3_EXT_METHOD *method, STACK_OF(GENERAL_NAME) *gens = NULL; CONF_VALUE *cnf; int i; - if(!(gens = sk_GENERAL_NAME_new(NULL))) { + if(!(gens = sk_GENERAL_NAME_new_null())) { X509V3err(X509V3_F_V2I_GENERAL_NAMES,ERR_R_MALLOC_FAILURE); return NULL; } diff --git a/crypto/x509v3/v3_crld.c b/crypto/x509v3/v3_crld.c index 5ca5373f39..67feea4017 100644 --- a/crypto/x509v3/v3_crld.c +++ b/crypto/x509v3/v3_crld.c @@ -109,7 +109,7 @@ static STACK_OF(DIST_POINT) *v2i_crld(X509V3_EXT_METHOD *method, GENERAL_NAME *gen = NULL; CONF_VALUE *cnf; int i; - if(!(crld = sk_DIST_POINT_new(NULL))) goto merr; + if(!(crld = sk_DIST_POINT_new_null())) goto merr; for(i = 0; i < sk_CONF_VALUE_num(nval); i++) { DIST_POINT *point; cnf = sk_CONF_VALUE_value(nval, i); diff --git a/crypto/x509v3/v3_extku.c b/crypto/x509v3/v3_extku.c index e039d21cbf..53ec40a027 100644 --- a/crypto/x509v3/v3_extku.c +++ b/crypto/x509v3/v3_extku.c @@ -129,7 +129,7 @@ ASN1_OBJECT *objtmp; CONF_VALUE *val; int i; -if(!(extku = sk_ASN1_OBJECT_new(NULL))) { +if(!(extku = sk_ASN1_OBJECT_new_null())) { X509V3err(X509V3_F_V2I_EXT_KU,ERR_R_MALLOC_FAILURE); return NULL; } diff --git a/crypto/x509v3/v3_genn.c b/crypto/x509v3/v3_genn.c index 2de1c4b428..d44751458e 100644 --- a/crypto/x509v3/v3_genn.c +++ b/crypto/x509v3/v3_genn.c @@ -220,7 +220,7 @@ void GENERAL_NAME_free(GENERAL_NAME *a) STACK_OF(GENERAL_NAME) *GENERAL_NAMES_new() { - return sk_GENERAL_NAME_new(NULL); + return sk_GENERAL_NAME_new_null(); } void GENERAL_NAMES_free(STACK_OF(GENERAL_NAME) *a) diff --git a/crypto/x509v3/v3_info.c b/crypto/x509v3/v3_info.c index 9ada0b310b..a045a629ee 100644 --- a/crypto/x509v3/v3_info.c +++ b/crypto/x509v3/v3_info.c @@ -119,7 +119,7 @@ static STACK_OF(ACCESS_DESCRIPTION) *v2i_AUTHORITY_INFO_ACCESS(X509V3_EXT_METHOD ACCESS_DESCRIPTION *acc; int i, objlen; char *objtmp, *ptmp; - if(!(ainfo = sk_ACCESS_DESCRIPTION_new(NULL))) { + if(!(ainfo = sk_ACCESS_DESCRIPTION_new_null())) { X509V3err(X509V3_F_V2I_ACCESS_DESCRIPTION,ERR_R_MALLOC_FAILURE); return NULL; } @@ -209,7 +209,7 @@ void ACCESS_DESCRIPTION_free(ACCESS_DESCRIPTION *a) STACK_OF(ACCESS_DESCRIPTION) *AUTHORITY_INFO_ACCESS_new(void) { - return sk_ACCESS_DESCRIPTION_new(NULL); + return sk_ACCESS_DESCRIPTION_new_null(); } void AUTHORITY_INFO_ACCESS_free(STACK_OF(ACCESS_DESCRIPTION) *a) diff --git a/crypto/x509v3/v3_utl.c b/crypto/x509v3/v3_utl.c index e6e99b2d5c..619f161b58 100644 --- a/crypto/x509v3/v3_utl.c +++ b/crypto/x509v3/v3_utl.c @@ -80,7 +80,7 @@ int X509V3_add_value(const char *name, const char *value, if(name && !(tname = BUF_strdup(name))) goto err; if(value && !(tvalue = BUF_strdup(value))) goto err;; if(!(vtmp = (CONF_VALUE *)OPENSSL_malloc(sizeof(CONF_VALUE)))) goto err; - if(!*extlist && !(*extlist = sk_CONF_VALUE_new(NULL))) goto err; + if(!*extlist && !(*extlist = sk_CONF_VALUE_new_null())) goto err; vtmp->section = NULL; vtmp->name = tname; vtmp->value = tvalue; diff --git a/ssl/ssl_cert.c b/ssl/ssl_cert.c index 7579a3432d..c26df62c20 100644 --- a/ssl/ssl_cert.c +++ b/ssl/ssl_cert.c @@ -589,7 +589,7 @@ STACK_OF(X509_NAME) *SSL_load_client_CA_file(const char *file) X509_NAME *xn=NULL; STACK_OF(X509_NAME) *ret,*sk; - ret=sk_X509_NAME_new(NULL); + ret=sk_X509_NAME_new_null(); sk=sk_X509_NAME_new(xname_cmp); in=BIO_new(BIO_s_file_internal()); diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c index ce73213b12..f63163f26c 100644 --- a/ssl/ssl_ciph.c +++ b/ssl/ssl_ciph.c @@ -799,7 +799,7 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, * Allocate new "cipherstack" for the result, return with error * if we cannot get one. */ - if ((cipherstack = sk_SSL_CIPHER_new(NULL)) == NULL) + if ((cipherstack = sk_SSL_CIPHER_new_null()) == NULL) { OPENSSL_free(list); return(NULL); diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c index 1483fad19a..635b25062e 100644 --- a/ssl/ssl_lib.c +++ b/ssl/ssl_lib.c @@ -1036,7 +1036,7 @@ STACK_OF(SSL_CIPHER) *ssl_bytes_to_cipher_list(SSL *s,unsigned char *p,int num, return(NULL); } if ((skp == NULL) || (*skp == NULL)) - sk=sk_SSL_CIPHER_new(NULL); /* change perhaps later */ + sk=sk_SSL_CIPHER_new_null(); /* change perhaps later */ else { sk= *skp; From 1d95fb659da89615152aeb881058793d45024dcd Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 18:42:13 +0000 Subject: [PATCH 13/15] Jeffrey Altman sent me a patch that fixes the problems with GetCursorInfo, but also adds network statistics and performance statistics where available. --- crypto/rand/rand_win.c | 92 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 9 deletions(-) diff --git a/crypto/rand/rand_win.c b/crypto/rand/rand_win.c index e9fc37c567..5ef197341f 100644 --- a/crypto/rand/rand_win.c +++ b/crypto/rand/rand_win.c @@ -169,6 +169,12 @@ typedef BOOL (WINAPI *PROCESS32)(HANDLE, LPPROCESSENTRY32); typedef BOOL (WINAPI *THREAD32)(HANDLE, LPTHREADENTRY32); typedef BOOL (WINAPI *MODULE32)(HANDLE, LPMODULEENTRY32); +#include +#include +typedef NET_API_STATUS (NET_API_FUNCTION * NETSTATGET) + (LMSTR, LMSTR, DWORD, DWORD, LPBYTE*); +typedef NET_API_STATUS (NET_API_FUNCTION * NETFREE)(LPBYTE); + int RAND_poll(void) { MEMORYSTATUS m; @@ -177,15 +183,71 @@ int RAND_poll(void) DWORD w; HWND h; - HMODULE advapi, kernel, user; + HMODULE advapi, kernel, user, netapi; CRYPTACQUIRECONTEXT acquire; CRYPTGENRANDOM gen; CRYPTRELEASECONTEXT release; + NETSTATGET netstatget; + NETFREE netfree; /* load functions dynamically - not available on all systems */ advapi = GetModuleHandle("ADVAPI32.DLL"); kernel = GetModuleHandle("KERNEL32.DLL"); user = GetModuleHandle("USER32.DLL"); + netapi = GetModuleHandle("NETAPI32.DLL"); + + if (netapi) + { + netstatget = (NETSTATGET) GetProcAddress(netapi,"NetStatisticsGet"); + netfree = (NETFREE) GetProcAddress(netapi,"NetApiBufferFree"); + } + + if (netstatget && netfree) + { + LPBYTE outbuf; + /* NetStatisticsGet() is a Unicode only function */ + if (netstatget(NULL, L"LanmanWorkstation", 0, 0, &outbuf) == 0) + { + RAND_add(outbuf, sizeof(STAT_WORKSTATION_0), 0); + netfree(outbuf); + } + if (netstatget(NULL, L"LanmanServer", 0, 0, &outbuf) == 0) + { + RAND_add(outbuf, sizeof(STAT_SERVER_0), 0); + netfree(outbuf); + } + } + + /* Read Performance Statistics from NT/2000 registry */ + /* The size of the performance data can vary from call to call */ + /* so we must guess the size of the buffer to use and increase */ + /* its size if we get an ERROR_MORE_DATA return instead of */ + /* ERROR_SUCCESS. */ + { + LONG rc=ERROR_MORE_DATA; + char * buf=NULL; + DWORD bufsz=0; + DWORD length; + + while (rc == ERROR_MORE_DATA) + { + buf = realloc(buf,bufsz+8192); + if (!buf) + break; + bufsz += 8192; + + length = bufsz; + rc = RegQueryValueEx(HKEY_PERFORMANCE_DATA, "Global", + NULL, NULL, buf, &length); + } + if (rc == ERROR_SUCCESS) + { + RAND_add(&length, sizeof(length), 0); + RAND_add(buf, length, 0); + } + if ( buf ) + free(buf); + } if (advapi) { @@ -255,17 +317,29 @@ int RAND_poll(void) RAND_add(&h, sizeof(h), 0); } -#if 0 + if (cursor) + { + /* unfortunately, its not safe to call GetCursorInfo() + * on NT4 even though it exists in SP3 (or SP6) and + * higher. + */ + OSVERSIONINFO osverinfo ; + osverinfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO) ; + GetVersionEx( &osverinfo ) ; + + if ( osverinfo.dwPlatformId == VER_PLATFORM_WIN32_NT && + osverinfo.dwMajorVersion < 5) + cursor = 0; + } + if (cursor) { /* cursor position */ - PCURSORINFO p = (PCURSORINFO) buf; - p->cbSize = sizeof(CURSORINFO); - if (cursor(p)) - RAND_add(p+sizeof(p->cbSize), - p->cbSize-sizeof(p->cbSize), 0); + CURSORINFO ci; + ci.cbSize = sizeof(CURSORINFO); + if (cursor(&ci)) + RAND_add(&ci, ci.cbSize, 0); } -#endif if (queue) { @@ -277,7 +351,7 @@ int RAND_poll(void) /* Toolhelp32 snapshot: enumerate processes, threads, modules and heap * http://msdn.microsoft.com/library/psdk/winbase/toolhelp_5pfd.htm - * (Win 9x only, not available on NT) + * (Win 9x and 2000 only, not available on NT) * * This seeding method was proposed in Peter Gutmann, Software * Generation of Practically Strong Random Numbers, From e17b7128943f01299481c4f4f091e93017f87598 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 19:20:17 +0000 Subject: [PATCH 14/15] Restore the descriptions to conform with the rest of the documentation. We'll work on better documents after the release of 0.9.6. --- doc/crypto/BIO_s_fd.pod | 28 +++++++++++++++++--------- doc/crypto/BIO_s_file.pod | 41 ++++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/doc/crypto/BIO_s_fd.pod b/doc/crypto/BIO_s_fd.pod index 61789fd2c1..b1de1d1015 100644 --- a/doc/crypto/BIO_s_fd.pod +++ b/doc/crypto/BIO_s_fd.pod @@ -20,6 +20,20 @@ BIO_s_fd, BIO_set_fd, BIO_get_fd, BIO_new_fd - file descriptor BIO BIO_s_fd() returns the file descriptor BIO method. This is a wrapper round the platforms file descriptor routines such as read() and write(). +BIO_read() and BIO_write() read or write the underlying descriptor. +BIO_puts() is supported but BIO_gets() is not. + +If the close flag is set then then close() is called on the underlying +file descriptor when the BIO is freed. + +BIO_reset() attempts to change the file pointer to the start of file +using lseek(fd, 0, 0). + +BIO_seek() sets the file pointer to position B from start of file +using lseek(fd, ofs, 0). + +BIO_tell() returns the current file position by calling lseek(fd, 0, 1). + BIO_set_fd() sets the file descriptor of BIO B to B and the close flag to B. @@ -31,15 +45,6 @@ BIO_new_fd() returns a file descriptor BIO using B and B. =head1 NOTES -If the close flag is set then then close() is called on the underlying -file descriptor when the BIO is freed. - -BIO_reset() attempts to change the file pointer to the start of file -using lseek(fd, 0, 0). - -BIO_read() and BIO_write() read or write the underlying descriptor. -BIO_puts() is supported but BIO_gets() is not. - The behaviour of BIO_read() and BIO_write() depends on the behavior of the platforms read() and write() calls on the descriptor. If the underlying file descriptor is in a non blocking mode then the BIO will behave in the @@ -53,6 +58,11 @@ instead. BIO_s_fd() returns the file descriptor BIO method. +BIO_reset() returns zero for success and -1 if an error occurred. +BIO_seek() and BIO_tell() return the current file position or -1 +is an error occurred. These values reflect the underlying lseek() +behaviour. + BIO_set_fd() always returns 1. BIO_get_fd() returns the file descriptor or -1 if the BIO has not diff --git a/doc/crypto/BIO_s_file.pod b/doc/crypto/BIO_s_file.pod index 166734b0bc..d449702bff 100644 --- a/doc/crypto/BIO_s_file.pod +++ b/doc/crypto/BIO_s_file.pod @@ -28,6 +28,23 @@ BIO_s_file() returns the BIO file method. As its name implies it is a wrapper round the stdio FILE structure and it is a source/sink BIO. +Calls to BIO_read() and BIO_write() read and write data to the +underlying stream. BIO_gets() and BIO_puts() are supported on file BIOs. + +BIO_flush() on a file BIO calls the fflush() function on the wrapped +stream. + +BIO_reset() attempts to change the file pointer to the start of file +using fseek(stream, 0, 0). + +BIO_seek() sets the file pointer to position B from start of file +using lseek(stream, ofs, 0). + +BIO_eof() calls feof(). + +Setting the BIO_CLOSE flag calls fclose() on the stream when the BIO +is freed. + BIO_new_file() creates a new file BIO with mode B the meaning of B is the same as the stdio function fopen(). The BIO_CLOSE flag is set on the returned BIO. @@ -42,26 +59,17 @@ meaning as in BIO_new_fp(), it is a macro. BIO_get_fp() retrieves the fp of a file BIO, it is a macro. +BIO_seek() is a macro that sets the position pointer to B bytes +from the start of file. + +BIO_tell() returns the value of the position pointer. + BIO_read_filename(), BIO_write_filename(), BIO_append_filename() and BIO_rw_filename() set the file BIO B to use file B for reading, writing, append or read write respectively. =head1 NOTES -Calls to BIO_read() and BIO_write() read and write data to the -underlying stream. BIO_gets() and BIO_puts() are supported on file BIOs. - -BIO_flush() on a file BIO calls the fflush() function on the wrapped -stream. - -BIO_reset() on a file BIO calls fseek() to reset the position indicator -to the start of the file. - -BIO_eof() calls feof(). - -Setting the BIO_CLOSE flag calls fclose() on the stream when the BIO -is freed. - When wrapping stdout, stdin or stderr the underlying stream should not normally be closed so the BIO_NOCLOSE flag should be set. @@ -111,6 +119,11 @@ occurred. BIO_set_fp() and BIO_get_fp() return 1 for success or 0 for failure (although the current implementation never return 0). +BIO_seek() returns the same value as the underlying fseek() function: +0 for success or -1 for failure. + +BIO_tell() returns the current file position. + BIO_read_filename(), BIO_write_filename(), BIO_append_filename() and BIO_rw_filename() return 1 for success or 0 for failure. From d0c2ebf414bf2ebf135bcb36d76087de663832fa Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 17 Sep 2000 20:04:42 +0000 Subject: [PATCH 15/15] A patch from HP for better performance. Submitted by Kevin Steves 3 months ago... --- Configure | 11 +- crypto/bn/asm/README | 12 +- crypto/bn/asm/pa-risc2.s | 2020 +++++++++++++++++++++++++++------- crypto/bn/asm/pa-risc2.s.old | 416 +++++++ crypto/bn/asm/pa-risc2W.s | 1605 +++++++++++++++++++++++++++ 5 files changed, 3648 insertions(+), 416 deletions(-) create mode 100644 crypto/bn/asm/pa-risc2.s.old create mode 100644 crypto/bn/asm/pa-risc2W.s diff --git a/Configure b/Configure index 6195c41b8b..7c9e97bce4 100755 --- a/Configure +++ b/Configure @@ -212,12 +212,21 @@ my %table=( # crypto/sha/sha_lcl.h. # # -"hpux-parisc-cc","cc:-Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl", +#!#"hpux-parisc-cc","cc:-Ae +O3 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl", # Since there is mention of this in shlib/hpux10-cc.sh "hpux-parisc-cc-o4","cc:-Ae +O4 +ESlit -z -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl", "hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W:::-ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1::::::::::dl", "hpux64-parisc-cc","cc:-Ae +DD64 +O3 +ESlit -z -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldld:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::::::::::dl", +# More attempts at unified 10.X and 11.X targets for HP C compiler. +# +# Chris Ruemmler +# Kevin Steves +"hpux-parisc-cc","cc:+O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DBN_DIV2W -DMD32_XARRAY::-D_REENTRANT:-ldl:MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::::::::::dl", +"hpux-parisc2-cc","cc:+DA2.0 +DS2.0 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldl:SIXTY_FOUR_BIT MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:asm/pa-risc2.o:::::::::dl", +"hpux64-parisc2-cc","cc:+DD64 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT:asm/pa-risc2W.o:::::::::dl", +"hpux-parisc1_1-cc","cc:+DA1.1 +DS1.1 +O3 +Optrs_strongly_typed +Olibcalls -Ae +ESlit -DB_ENDIAN -DMD32_XARRAY::-D_REENTRANT:-ldl:MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::::::::::dl", + # HPUX 9.X config. # Don't use the bundled cc. It is broken. Use HP ANSI C if possible, or # egcs. gcc 2.8.1 is also broken. diff --git a/crypto/bn/asm/README b/crypto/bn/asm/README index 86bf64cfc2..a0fe58a677 100644 --- a/crypto/bn/asm/README +++ b/crypto/bn/asm/README @@ -15,9 +15,9 @@ On the 2 alpha C compilers I had access to, it was not possible to do were 64 bits). So the hand assember gives access to the 128 bit result and a 2 times speedup :-). -There are 2 versions of assember for the HP PA-RISC. -pa-risc.s is the origional one which works fine. -pa-risc2.s is a new version that often generates warnings but if the -tests pass, it gives performance that is over 2 times faster than -pa-risc.s. -Both were generated using gcc :-) +There are 3 versions of assember for the HP PA-RISC. + +pa-risc.s is the origional one which works fine and generated using gcc :-) + +pa-risc2W.s and pa-risc2.s are 64 and 32-bit PA-RISC 2.0 implementations +by Chris Ruemmler from HP (with some help from the HP C compiler). diff --git a/crypto/bn/asm/pa-risc2.s b/crypto/bn/asm/pa-risc2.s index c2725996a4..7239aa2c76 100644 --- a/crypto/bn/asm/pa-risc2.s +++ b/crypto/bn/asm/pa-risc2.s @@ -1,416 +1,1618 @@ - .SPACE $PRIVATE$ - .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 - .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 - .SPACE $TEXT$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 - .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY - .IMPORT $global$,DATA - .IMPORT $$dyncall,MILLICODE -; gcc_compiled.: - .SPACE $TEXT$ - .SUBSPA $CODE$ +; +; PA-RISC 2.0 implementation of bn_asm code, based on the +; 64-bit version of the code. This code is effectively the +; same as the 64-bit version except the register model is +; slightly different given all values must be 32-bit between +; function calls. Thus the 64-bit return values are returned +; in %ret0 and %ret1 vs just %ret0 as is done in 64-bit +; +; +; This code is approximately 2x faster than the C version +; for RSA/DSA. +; +; See http://devresource.hp.com/ for more details on the PA-RISC +; architecture. Also see the book "PA-RISC 2.0 Architecture" +; by Gerry Kane for information on the instruction set architecture. +; +; Code written by Chris Ruemmler (with some help from the HP C +; compiler). +; +; The code compiles with HP's assembler +; + + .level 2.0N + .space $TEXT$ + .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY + +; +; Global Register definitions used for the routines. +; +; Some information about HP's runtime architecture for 32-bits. +; +; "Caller save" means the calling function must save the register +; if it wants the register to be preserved. +; "Callee save" means if a function uses the register, it must save +; the value before using it. +; +; For the floating point registers +; +; "caller save" registers: fr4-fr11, fr22-fr31 +; "callee save" registers: fr12-fr21 +; "special" registers: fr0-fr3 (status and exception registers) +; +; For the integer registers +; value zero : r0 +; "caller save" registers: r1,r19-r26 +; "callee save" registers: r3-r18 +; return register : r2 (rp) +; return values ; r28,r29 (ret0,ret1) +; Stack pointer ; r30 (sp) +; millicode return ptr ; r31 (also a caller save register) + + +; +; Arguments to the routines +; +r_ptr .reg %r26 +a_ptr .reg %r25 +b_ptr .reg %r24 +num .reg %r24 +n .reg %r23 + +; +; Note that the "w" argument for bn_mul_add_words and bn_mul_words +; is passed on the stack at a delta of -56 from the top of stack +; as the routine is entered. +; + +; +; Globals used in some routines +; + +top_overflow .reg %r23 +high_mask .reg %r22 ; value 0xffffffff80000000L + + +;------------------------------------------------------------------------------ +; +; bn_mul_add_words +; +;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, +; int num, BN_ULONG w) +; +; arg0 = r_ptr +; arg1 = a_ptr +; arg3 = num +; -56(sp) = w +; +; Local register definitions +; + +fm1 .reg %fr22 +fm .reg %fr23 +ht_temp .reg %fr24 +ht_temp_1 .reg %fr25 +lt_temp .reg %fr26 +lt_temp_1 .reg %fr27 +fm1_1 .reg %fr28 +fm_1 .reg %fr29 + +fw_h .reg %fr7L +fw_l .reg %fr7R +fw .reg %fr7 + +fht_0 .reg %fr8L +flt_0 .reg %fr8R +t_float_0 .reg %fr8 + +fht_1 .reg %fr9L +flt_1 .reg %fr9R +t_float_1 .reg %fr9 + +tmp_0 .reg %r31 +tmp_1 .reg %r21 +m_0 .reg %r20 +m_1 .reg %r19 +ht_0 .reg %r1 +ht_1 .reg %r3 +lt_0 .reg %r4 +lt_1 .reg %r5 +m1_0 .reg %r6 +m1_1 .reg %r7 +rp_val .reg %r8 +rp_val_1 .reg %r9 - .align 4 - .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR bn_mul_add_words - .PROC - .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r4,64(0,%r30) - copy %r24,%r31 - stw %r3,-60(0,%r30) - ldi 0,%r20 - ldo 12(%r26),%r2 - stw %r23,-16(0,%r30) - copy %r25,%r3 - ldo 12(%r3),%r1 - fldws -16(0,%r30),%fr8L -L$0010 - copy %r20,%r25 - ldi 0,%r24 - fldws 0(0,%r3),%fr9L - ldw 0(0,%r26),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,0(0,%r26) - copy %r20,%r25 - ldi 0,%r24 - fldws -8(0,%r1),%fr9L - ldw -8(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,-8(0,%r2) - copy %r20,%r25 - ldi 0,%r24 - fldws -4(0,%r1),%fr9L - ldw -4(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,-4(0,%r2) - copy %r20,%r25 - ldi 0,%r24 - fldws 0(0,%r1),%fr9L - ldw 0(0,%r2),%r19 - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r19,%r23 - ldw -16(0,%r30),%r28 - ldw -12(0,%r30),%r29 - ldi 0,%r22 - add %r23,%r29,%r29 - addc %r22,%r28,%r28 - add %r25,%r29,%r29 - addc %r24,%r28,%r28 - copy %r28,%r21 - ldi 0,%r20 - copy %r21,%r20 - addib,= -1,%r31,L$0011 - stw %r29,0(0,%r2) - ldo 16(%r1),%r1 - ldo 16(%r3),%r3 - ldo 16(%r2),%r2 - bl L$0010,0 - ldo 16(%r26),%r26 -L$0011 - copy %r20,%r28 - ldw -84(0,%r30),%r2 - ldw -60(0,%r30),%r3 - bv 0(%r2) - ldwm -64(0,%r30),%r4 - .EXIT - .PROCEND - .align 4 - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR + .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN + .proc + .callinfo frame=128 + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP ; Needed to make the loop 16-byte aligned + NOP ; needed to make the loop 16-byte aligned + + STD %r5,16(%sp) ; save r5 + NOP + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + + STD %r8,40(%sp) ; save r8 + STD %r9,48(%sp) ; save r9 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit + LDO 128(%sp),%sp ; bump stack + + ; + ; The loop is unrolled twice, so if there is only 1 number + ; then go straight to the cleanup code. + ; + CMPIB,= 1,num,bn_mul_add_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_add_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDD 8(r_ptr),rp_val_1 ; rp[1] + + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1[0] + FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] + + XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h + XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m[0] + FSTD fm_1,-40(%sp) ; -40(sp) = m[1] + + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 + + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 + + LDD -8(%sp),m_0 ; m[0] + LDD -40(%sp),m_1 ; m[1] + LDD -16(%sp),m1_0 ; m1[0] + LDD -48(%sp),m1_1 ; m1[1] + + LDD -24(%sp),ht_0 ; ht[0] + LDD -56(%sp),ht_1 ; ht[1] + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; + + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) + ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) + + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) + ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) + EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 + + EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 + ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) + ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) + + ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + + ADD %ret1,lt_0,lt_0 ; lt[0] = lt[0] + c; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + + LDO -2(num),num ; num = num - 2; + ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + + ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] + ADD,DC ht_1,%r0,%ret1 ; ht[1]++ + LDO 16(a_ptr),a_ptr ; a_ptr += 2 + + STD lt_1,8(r_ptr) ; rp[1] = lt[1] + CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do + LDO 16(r_ptr),r_ptr ; r_ptr += 2 + + CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_add_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDO 8(a_ptr),a_ptr ; a_ptr++ + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 ; m1 = temp1 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD %ret1,tmp_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] + ADD,DC ht_0,%r0,%ret1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_add_words_exit + .EXIT + + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -80(%sp),%r9 ; restore r9 + LDD -88(%sp),%r8 ; restore r8 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +; +; arg0 = rp +; arg1 = ap +; arg3 = num +; w on stack at -56(sp) + bn_mul_words - .PROC - .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 - .ENTRY - stw %r2,-20(0,%r30) - copy %r25,%r2 - stwm %r4,64(0,%r30) - copy %r24,%r19 - ldi 0,%r28 - stw %r23,-16(0,%r30) - ldo 12(%r26),%r31 - ldo 12(%r2),%r29 - fldws -16(0,%r30),%fr8L -L$0026 - fldws 0(0,%r2),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,0(0,%r26) - fldws -8(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,-8(0,%r31) - fldws -4(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,-4(0,%r31) - fldws 0(0,%r29),%fr9L - xmpyu %fr8L,%fr9L,%fr9 - fstds %fr9,-16(0,%r30) - copy %r28,%r21 - ldi 0,%r20 - ldw -16(0,%r30),%r24 - ldw -12(0,%r30),%r25 - add %r21,%r25,%r25 - addc %r20,%r24,%r24 - copy %r24,%r23 - ldi 0,%r22 - copy %r23,%r28 - addib,= -1,%r19,L$0027 - stw %r25,0(0,%r31) - ldo 16(%r29),%r29 - ldo 16(%r2),%r2 - ldo 16(%r31),%r31 - bl L$0026,0 - ldo 16(%r26),%r26 -L$0027 - ldw -84(0,%r30),%r2 - bv 0(%r2) - ldwm -64(0,%r30),%r4 - .EXIT - .PROCEND - .align 4 - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR + .proc + .callinfo frame=128 + .entry + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + COPY %r0,%ret1 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + + CMPIB,>= 0,num,bn_mul_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; See if only 1 word to do, thus just do cleanup + ; + CMPIB,= 1,num,bn_mul_words_single_top + FLDD -184(%sp),fw ; (-56-128) load up w into fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l + + FSTD fm1,-16(%sp) ; -16(sp) = m1 + FSTD fm1_1,-48(%sp) ; -48(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h + + FSTD fm,-8(%sp) ; -8(sp) = m + FSTD fm_1,-40(%sp) ; -40(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h + + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt + LDD -8(%sp),m_0 + LDD -40(%sp),m_1 + + LDD -16(%sp),m1_0 + LDD -48(%sp),m1_1 + LDD -24(%sp),ht_0 + LDD -56(%sp),ht_1 + + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) + ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + EXTRD,U tmp_1,31,32,m_1 ; m>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD lt_1,m1_1,lt_1 ; lt = lt+m1; + ADD,DC ht_1,%r0,ht_1 ; ht++ + ADD %ret1,lt_0,lt_0 ; lt = lt + c (ret1); + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) + ADD,DC ht_1,%r0,ht_1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + STD lt_1,8(r_ptr) ; rp[1] = lt + + COPY ht_1,%ret1 ; carry = ht + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_mul_words_unroll2 + LDO 16(r_ptr),r_ptr ; rp++ + + CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt= lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD %ret1,lt_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + COPY ht_0,%ret1 ; copy carry + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_words_exit + .EXIT + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND + +;---------------------------------------------------------------------------- +; +;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; + bn_sqr_words - .PROC - .CALLINFO FRAME=0,NO_CALLS - .ENTRY - ldo 28(%r26),%r19 - ldo 12(%r25),%r28 -L$0042 - fldws 0(0,%r25),%fr8L - fldws 0(0,%r25),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,0(0,%r26) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-24(0,%r19) - fldws -8(0,%r28),%fr8L - fldws -8(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-20(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-16(0,%r19) - fldws -4(0,%r28),%fr8L - fldws -4(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-12(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,-8(0,%r19) - fldws 0(0,%r28),%fr8L - fldws 0(0,%r28),%fr8R - xmpyu %fr8L,%fr8R,%fr8 - fstds %fr8,-16(0,%r30) - ldw -16(0,%r30),%r22 - ldw -12(0,%r30),%r23 - stw %r23,-4(0,%r19) - copy %r22,%r21 - ldi 0,%r20 - addib,= -1,%r24,L$0049 - stw %r21,0(0,%r19) - ldo 16(%r28),%r28 - ldo 16(%r25),%r25 - ldo 32(%r19),%r19 - bl L$0042,0 - ldo 32(%r26),%r26 -L$0049 - bv,n 0(%r2) - .EXIT - .PROCEND - .IMPORT BN_num_bits_word,CODE - .IMPORT fprintf,CODE - .IMPORT __iob,DATA - .SPACE $TEXT$ - .SUBSPA $LIT$ + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 - .align 4 -L$C0000 - .STRING "Division would overflow (%d)\x0a\x00" - .IMPORT abort,CODE - .SPACE $TEXT$ - .SUBSPA $CODE$ + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 - .align 4 - .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR -bn_div64 + CMPIB,>= 0,num,bn_sqr_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; If only 1, the goto straight to cleanup + ; + CMPIB,= 1,num,bn_sqr_words_single_top + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + +bn_sqr_words_unroll2 + FLDD 0(a_ptr),t_float_0 ; a[0] + FLDD 8(a_ptr),t_float_1 ; a[1] + XMPYU fht_0,flt_0,fm ; m[0] + XMPYU fht_1,flt_1,fm_1 ; m[1] + + FSTD fm,-24(%sp) ; store m[0] + FSTD fm_1,-56(%sp) ; store m[1] + XMPYU flt_0,flt_0,lt_temp ; lt[0] + XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] + + FSTD lt_temp,-16(%sp) ; store lt[0] + FSTD lt_temp_1,-48(%sp) ; store lt[1] + XMPYU fht_0,fht_0,ht_temp ; ht[0] + XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] + + FSTD ht_temp,-8(%sp) ; store ht[0] + FSTD ht_temp_1,-40(%sp) ; store ht[1] + LDD -24(%sp),m_0 + LDD -56(%sp),m_1 + + AND m_0,high_mask,tmp_0 ; m[0] & Mask + AND m_1,high_mask,tmp_1 ; m[1] & Mask + DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 + DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 + + LDD -16(%sp),lt_0 + LDD -48(%sp),lt_1 + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 + EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 + + LDD -8(%sp),ht_0 + LDD -40(%sp),ht_1 + ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 + ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 + + ADD lt_0,m_0,lt_0 ; lt = lt+m + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + STD ht_0,8(r_ptr) ; rp[1] = ht[1] + + ADD lt_1,m_1,lt_1 ; lt = lt+m + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_1,16(r_ptr) ; rp[2] = lt[1] + STD ht_1,24(r_ptr) ; rp[3] = ht[1] + + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_sqr_words_unroll2 + LDO 32(r_ptr),r_ptr ; rp += 4 + + CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_sqr_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,flt_0,fm ; m + FSTD fm,-24(%sp) ; store m + + XMPYU flt_0,flt_0,lt_temp ; lt + FSTD lt_temp,-16(%sp) ; store lt + + XMPYU fht_0,fht_0,ht_temp ; ht + FSTD ht_temp,-8(%sp) ; store ht + + LDD -24(%sp),m_0 ; load m + AND m_0,high_mask,tmp_0 ; m & Mask + DEPD,Z m_0,30,31,m_0 ; m << 32+1 + LDD -16(%sp),lt_0 ; lt + + LDD -8(%sp),ht_0 ; ht + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 + ADD m_0,lt_0,lt_0 ; lt = lt+m + ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 + ADD,DC ht_0,%r0,ht_0 ; ht++ + + STD lt_0,0(r_ptr) ; rp[0] = lt + STD ht_0,8(r_ptr) ; rp[1] = ht + +bn_sqr_words_exit + .EXIT + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + .PROCEND ;in=23,24,25,26,29;out=28; + + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t .reg %r22 +b .reg %r21 +l .reg %r20 + +bn_add_words + .proc + .entry + .callinfo + .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + CMPIB,>= 0,n,bn_add_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_add_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_add_words_unroll2 + LDD 0(a_ptr),t + LDD 0(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + + LDD 8(a_ptr),t + LDD 8(b_ptr),b + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_add_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_add_words_exit ; are we done? + +bn_add_words_single_top + LDD 0(a_ptr),t + LDD 0(b_ptr),b + + ADD t,%ret1,t ; t = t+c; + ADD,DC %r0,%r0,%ret1 ; set c to carry (could use CMPCLR??) + ADD t,b,l ; l = t + b[0] + ADD,DC %ret1,%r0,%ret1 ; c+= carry + STD l,0(r_ptr) + +bn_add_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t1 .reg %r22 +t2 .reg %r21 +sub_tmp1 .reg %r20 +sub_tmp2 .reg %r19 + + +bn_sub_words + .proc + .callinfo + .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + CMPIB,>= 0,n,bn_sub_words_exit + COPY %r0,%ret1 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_sub_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_sub_words_unroll2 + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,0(r_ptr) + + LDD 8(a_ptr),t1 + LDD 8(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + STD sub_tmp1,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_sub_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? + +bn_sub_words_single_top + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret1,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret1 + + STD sub_tmp1,0(r_ptr) + +bn_sub_words_exit + .EXIT + BVE (%rp) + EXTRD,U %ret1,31,32,%ret0 ; for 32-bit, return in ret0/ret1 + .PROCEND ;in=23,24,25,26,29;out=28; + +;------------------------------------------------------------------------------ +; +; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) +; +; arg0 = h +; arg1 = l +; arg2 = d +; +; This is mainly just output from the HP C compiler. +; +;------------------------------------------------------------------------------ +bn_div_words .PROC - .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r8,128(0,%r30) - stw %r7,-124(0,%r30) - stw %r4,-112(0,%r30) - stw %r3,-108(0,%r30) - copy %r26,%r3 - copy %r25,%r4 - stw %r6,-120(0,%r30) - ldi 0,%r7 - stw %r5,-116(0,%r30) - movb,<> %r24,%r5,L$0051 - ldi 2,%r6 - bl L$0068,0 - ldi -1,%r28 -L$0051 - .CALL ARGW0=GR - bl BN_num_bits_word,%r2 - copy %r5,%r26 - copy %r28,%r24 - ldi 32,%r19 - comb,= %r19,%r24,L$0052 - subi 31,%r24,%r19 - mtsar %r19 - zvdepi 1,32,%r19 - comb,>>= %r19,%r3,L$0052 - addil LR'__iob-$global$+32,%r27 - ldo RR'__iob-$global$+32(%r1),%r26 - ldil LR'L$C0000,%r25 - .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR - bl fprintf,%r2 - ldo RR'L$C0000(%r25),%r25 - .CALL - bl abort,%r2 - nop -L$0052 - comb,>> %r5,%r3,L$0053 - subi 32,%r24,%r24 - sub %r3,%r5,%r3 -L$0053 - comib,= 0,%r24,L$0054 - subi 31,%r24,%r19 - mtsar %r19 - zvdep %r5,32,%r5 - zvdep %r3,32,%r21 - subi 32,%r24,%r20 - mtsar %r20 - vshd 0,%r4,%r20 - or %r21,%r20,%r3 - mtsar %r19 - zvdep %r4,32,%r4 -L$0054 - extru %r5,15,16,%r23 - extru %r5,31,16,%r28 -L$0055 - extru %r3,15,16,%r19 - comb,<> %r23,%r19,L$0058 - copy %r3,%r26 - bl L$0059,0 - zdepi -1,31,16,%r29 -L$0058 - .IMPORT $$divU,MILLICODE - bl $$divU,%r31 - copy %r23,%r25 -L$0059 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r23,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr9 - ldw -16(0,%r30),%r8 - fstws %fr9R,-16(0,%r30) - copy %r8,%r22 - ldw -16(0,%r30),%r8 - extru %r4,15,16,%r24 - copy %r8,%r21 -L$0060 - sub %r3,%r21,%r20 - copy %r20,%r19 - depi 0,31,16,%r19 - comib,<> 0,%r19,L$0061 - zdep %r20,15,16,%r19 - addl %r19,%r24,%r19 - comb,>>= %r19,%r22,L$0061 - sub %r22,%r28,%r22 - sub %r21,%r23,%r21 - bl L$0060,0 - ldo -1(%r29),%r29 -L$0061 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r8 - stw %r23,-16(0,%r30) - fldws -16(0,%r30),%fr10R - copy %r8,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - extru %r19,15,16,%r20 - ldw -16(0,%r30),%r8 - zdep %r19,15,16,%r19 - addl %r8,%r20,%r20 - comclr,<<= %r19,%r4,0 - addi 1,%r20,%r20 - comb,<<= %r20,%r3,L$0066 - sub %r4,%r19,%r4 - addl %r3,%r5,%r3 - ldo -1(%r29),%r29 -L$0066 - addib,= -1,%r6,L$0056 - sub %r3,%r20,%r3 - zdep %r29,15,16,%r7 - shd %r3,%r4,16,%r3 - bl L$0055,0 - zdep %r4,15,16,%r4 -L$0056 - or %r7,%r29,%r28 -L$0068 - ldw -148(0,%r30),%r2 - ldw -124(0,%r30),%r7 - ldw -120(0,%r30),%r6 - ldw -116(0,%r30),%r5 - ldw -112(0,%r30),%r4 - ldw -108(0,%r30),%r3 - bv 0(%r2) - ldwm -128(0,%r30),%r8 - .EXIT - .PROCEND + .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR,LONG_RETURN + .IMPORT BN_num_bits_word,CODE + .IMPORT __iob,DATA + .IMPORT fprintf,CODE + .IMPORT abort,CODE + .IMPORT $$div2U,MILLICODE + .CALLINFO CALLER,FRAME=144,ENTRY_GR=%r9,SAVE_RP,ARGS_SAVED,ORDERING_AWARE + .ENTRY + STW %r2,-20(%r30) ;offset 0x8ec + STW,MA %r3,192(%r30) ;offset 0x8f0 + STW %r4,-188(%r30) ;offset 0x8f4 + DEPD %r5,31,32,%r6 ;offset 0x8f8 + STD %r6,-184(%r30) ;offset 0x8fc + DEPD %r7,31,32,%r8 ;offset 0x900 + STD %r8,-176(%r30) ;offset 0x904 + STW %r9,-168(%r30) ;offset 0x908 + LDD -248(%r30),%r3 ;offset 0x90c + COPY %r26,%r4 ;offset 0x910 + COPY %r24,%r5 ;offset 0x914 + DEPD %r25,31,32,%r4 ;offset 0x918 + CMPB,*<> %r3,%r0,$0006000C ;offset 0x91c + DEPD %r23,31,32,%r5 ;offset 0x920 + MOVIB,TR -1,%r29,$00060002 ;offset 0x924 + EXTRD,U %r29,31,32,%r28 ;offset 0x928 +$0006002A + LDO -1(%r29),%r29 ;offset 0x92c + SUB %r23,%r7,%r23 ;offset 0x930 +$00060024 + SUB %r4,%r31,%r25 ;offset 0x934 + AND %r25,%r19,%r26 ;offset 0x938 + CMPB,*<>,N %r0,%r26,$00060046 ;offset 0x93c + DEPD,Z %r25,31,32,%r20 ;offset 0x940 + OR %r20,%r24,%r21 ;offset 0x944 + CMPB,*<<,N %r21,%r23,$0006002A ;offset 0x948 + SUB %r31,%r2,%r31 ;offset 0x94c +$00060046 +$0006002E + DEPD,Z %r23,31,32,%r25 ;offset 0x950 + EXTRD,U %r23,31,32,%r26 ;offset 0x954 + AND %r25,%r19,%r24 ;offset 0x958 + ADD,L %r31,%r26,%r31 ;offset 0x95c + CMPCLR,*>>= %r5,%r24,%r0 ;offset 0x960 + LDO 1(%r31),%r31 ;offset 0x964 +$00060032 + CMPB,*<<=,N %r31,%r4,$00060036 ;offset 0x968 + LDO -1(%r29),%r29 ;offset 0x96c + ADD,L %r4,%r3,%r4 ;offset 0x970 +$00060036 + ADDIB,=,N -1,%r8,$D0 ;offset 0x974 + SUB %r5,%r24,%r28 ;offset 0x978 +$0006003A + SUB %r4,%r31,%r24 ;offset 0x97c + SHRPD %r24,%r28,32,%r4 ;offset 0x980 + DEPD,Z %r29,31,32,%r9 ;offset 0x984 + DEPD,Z %r28,31,32,%r5 ;offset 0x988 +$0006001C + EXTRD,U %r4,31,32,%r31 ;offset 0x98c + CMPB,*<>,N %r31,%r2,$00060020 ;offset 0x990 + MOVB,TR %r6,%r29,$D1 ;offset 0x994 + STD %r29,-152(%r30) ;offset 0x998 +$0006000C + EXTRD,U %r3,31,32,%r25 ;offset 0x99c + COPY %r3,%r26 ;offset 0x9a0 + EXTRD,U %r3,31,32,%r9 ;offset 0x9a4 + EXTRD,U %r4,31,32,%r8 ;offset 0x9a8 + .CALL ARGW0=GR,ARGW1=GR,RTNVAL=GR ;in=25,26;out=28; + B,L BN_num_bits_word,%r2 ;offset 0x9ac + EXTRD,U %r5,31,32,%r7 ;offset 0x9b0 + LDI 64,%r20 ;offset 0x9b4 + DEPD %r7,31,32,%r5 ;offset 0x9b8 + DEPD %r8,31,32,%r4 ;offset 0x9bc + DEPD %r9,31,32,%r3 ;offset 0x9c0 + CMPB,= %r28,%r20,$00060012 ;offset 0x9c4 + COPY %r28,%r24 ;offset 0x9c8 + MTSARCM %r24 ;offset 0x9cc + DEPDI,Z -1,%sar,1,%r19 ;offset 0x9d0 + CMPB,*>>,N %r4,%r19,$D2 ;offset 0x9d4 +$00060012 + SUBI 64,%r24,%r31 ;offset 0x9d8 + CMPCLR,*<< %r4,%r3,%r0 ;offset 0x9dc + SUB %r4,%r3,%r4 ;offset 0x9e0 +$00060016 + CMPB,= %r31,%r0,$0006001A ;offset 0x9e4 + COPY %r0,%r9 ;offset 0x9e8 + MTSARCM %r31 ;offset 0x9ec + DEPD,Z %r3,%sar,64,%r3 ;offset 0x9f0 + SUBI 64,%r31,%r26 ;offset 0x9f4 + MTSAR %r26 ;offset 0x9f8 + SHRPD %r4,%r5,%sar,%r4 ;offset 0x9fc + MTSARCM %r31 ;offset 0xa00 + DEPD,Z %r5,%sar,64,%r5 ;offset 0xa04 +$0006001A + DEPDI,Z -1,31,32,%r19 ;offset 0xa08 + AND %r3,%r19,%r29 ;offset 0xa0c + EXTRD,U %r29,31,32,%r2 ;offset 0xa10 + DEPDI,Z -1,63,32,%r6 ;offset 0xa14 + MOVIB,TR 2,%r8,$0006001C ;offset 0xa18 + EXTRD,U %r3,63,32,%r7 ;offset 0xa1c +$D2 + ADDIL LR'__iob-$global$,%r27,%r1 ;offset 0xa20 + LDIL LR'C$7,%r21 ;offset 0xa24 + LDO RR'__iob-$global$+32(%r1),%r26 ;offset 0xa28 + .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR ;in=24,25,26;out=28; + B,L fprintf,%r2 ;offset 0xa2c + LDO RR'C$7(%r21),%r25 ;offset 0xa30 + .CALL ; + B,L abort,%r2 ;offset 0xa34 + NOP ;offset 0xa38 + B $D3 ;offset 0xa3c + LDW -212(%r30),%r2 ;offset 0xa40 +$00060020 + COPY %r4,%r26 ;offset 0xa44 + EXTRD,U %r4,31,32,%r25 ;offset 0xa48 + COPY %r2,%r24 ;offset 0xa4c + .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) + B,L $$div2U,%r31 ;offset 0xa50 + EXTRD,U %r2,31,32,%r23 ;offset 0xa54 + DEPD %r28,31,32,%r29 ;offset 0xa58 +$00060022 + STD %r29,-152(%r30) ;offset 0xa5c +$D1 + AND %r5,%r19,%r24 ;offset 0xa60 + EXTRD,U %r24,31,32,%r24 ;offset 0xa64 + STW %r2,-160(%r30) ;offset 0xa68 + STW %r7,-128(%r30) ;offset 0xa6c + FLDD -152(%r30),%fr4 ;offset 0xa70 + FLDD -152(%r30),%fr7 ;offset 0xa74 + FLDW -160(%r30),%fr8L ;offset 0xa78 + FLDW -128(%r30),%fr5L ;offset 0xa7c + XMPYU %fr8L,%fr7L,%fr10 ;offset 0xa80 + FSTD %fr10,-136(%r30) ;offset 0xa84 + XMPYU %fr8L,%fr7R,%fr22 ;offset 0xa88 + FSTD %fr22,-144(%r30) ;offset 0xa8c + XMPYU %fr5L,%fr4L,%fr11 ;offset 0xa90 + XMPYU %fr5L,%fr4R,%fr23 ;offset 0xa94 + FSTD %fr11,-112(%r30) ;offset 0xa98 + FSTD %fr23,-120(%r30) ;offset 0xa9c + LDD -136(%r30),%r28 ;offset 0xaa0 + DEPD,Z %r28,31,32,%r31 ;offset 0xaa4 + LDD -144(%r30),%r20 ;offset 0xaa8 + ADD,L %r20,%r31,%r31 ;offset 0xaac + LDD -112(%r30),%r22 ;offset 0xab0 + DEPD,Z %r22,31,32,%r22 ;offset 0xab4 + LDD -120(%r30),%r21 ;offset 0xab8 + B $00060024 ;offset 0xabc + ADD,L %r21,%r22,%r23 ;offset 0xac0 +$D0 + OR %r9,%r29,%r29 ;offset 0xac4 +$00060040 + EXTRD,U %r29,31,32,%r28 ;offset 0xac8 +$00060002 +$L2 + LDW -212(%r30),%r2 ;offset 0xacc +$D3 + LDW -168(%r30),%r9 ;offset 0xad0 + LDD -176(%r30),%r8 ;offset 0xad4 + EXTRD,U %r8,31,32,%r7 ;offset 0xad8 + LDD -184(%r30),%r6 ;offset 0xadc + EXTRD,U %r6,31,32,%r5 ;offset 0xae0 + LDW -188(%r30),%r4 ;offset 0xae4 + BVE (%r2) ;offset 0xae8 + .EXIT + LDW,MB -192(%r30),%r3 ;offset 0xaec + .PROCEND ;in=23,25;out=28,29;fpin=105,107; + + + + +;---------------------------------------------------------------------------- +; +; Registers to hold 64-bit values to manipulate. The "L" part +; of the register corresponds to the upper 32-bits, while the "R" +; part corresponds to the lower 32-bits +; +; Note, that when using b6 and b7, the code must save these before +; using them because they are callee save registers +; +; +; Floating point registers to use to save values that +; are manipulated. These don't collide with ftemp1-6 and +; are all caller save registers +; +a0 .reg %fr22 +a0L .reg %fr22L +a0R .reg %fr22R + +a1 .reg %fr23 +a1L .reg %fr23L +a1R .reg %fr23R + +a2 .reg %fr24 +a2L .reg %fr24L +a2R .reg %fr24R + +a3 .reg %fr25 +a3L .reg %fr25L +a3R .reg %fr25R + +a4 .reg %fr26 +a4L .reg %fr26L +a4R .reg %fr26R + +a5 .reg %fr27 +a5L .reg %fr27L +a5R .reg %fr27R + +a6 .reg %fr28 +a6L .reg %fr28L +a6R .reg %fr28R + +a7 .reg %fr29 +a7L .reg %fr29L +a7R .reg %fr29R + +b0 .reg %fr30 +b0L .reg %fr30L +b0R .reg %fr30R + +b1 .reg %fr31 +b1L .reg %fr31L +b1R .reg %fr31R + +; +; Temporary floating point variables, these are all caller save +; registers +; +ftemp1 .reg %fr4 +ftemp2 .reg %fr5 +ftemp3 .reg %fr6 +ftemp4 .reg %fr7 + +; +; The B set of registers when used. +; + +b2 .reg %fr8 +b2L .reg %fr8L +b2R .reg %fr8R + +b3 .reg %fr9 +b3L .reg %fr9L +b3R .reg %fr9R + +b4 .reg %fr10 +b4L .reg %fr10L +b4R .reg %fr10R + +b5 .reg %fr11 +b5L .reg %fr11L +b5R .reg %fr11R + +b6 .reg %fr12 +b6L .reg %fr12L +b6R .reg %fr12R + +b7 .reg %fr13 +b7L .reg %fr13L +b7R .reg %fr13R + +c1 .reg %r21 ; only reg +temp1 .reg %r20 ; only reg +temp2 .reg %r19 ; only reg +temp3 .reg %r31 ; only reg + +m1 .reg %r28 +c2 .reg %r23 +high_one .reg %r1 +ht .reg %r6 +lt .reg %r5 +m .reg %r4 +c3 .reg %r3 + +SQR_ADD_C .macro A0L,A0R,C1,C2,C3 + XMPYU A0L,A0R,ftemp1 ; m + FSTD ftemp1,-24(%sp) ; store m + + XMPYU A0R,A0R,ftemp2 ; lt + FSTD ftemp2,-16(%sp) ; store lt + + XMPYU A0L,A0L,ftemp3 ; ht + FSTD ftemp3,-8(%sp) ; store ht + + LDD -24(%sp),m ; load m + AND m,high_mask,temp2 ; m & Mask + DEPD,Z m,30,31,temp3 ; m << 32+1 + LDD -16(%sp),lt ; lt + + LDD -8(%sp),ht ; ht + EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 + ADD temp3,lt,lt ; lt = lt+m + ADD,L ht,temp1,ht ; ht += temp1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; ht++ + + ADD C2,ht,C2 ; c2=c2+ht + ADD,DC C3,%r0,C3 ; c3++ +.endm + +SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 + XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,A1L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,A1R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,A1L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD ht,ht,ht ; ht=ht+ht; + ADD,DC C3,%r0,C3 ; add in carry (c3++) + + ADD lt,lt,lt ; lt=lt+lt; + ADD,DC ht,%r0,ht ; add in carry (ht++) + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) + LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + +; +;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba8 + .PROC + .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .ENTRY + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 + SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 + STD c2,56(r_ptr) ; r[7] = c2; + COPY %r0,c2 + + SQR_ADD_C a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 + SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 + STD c3,64(r_ptr) ; r[8] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 + STD c1,72(r_ptr) ; r[9] = c1; + COPY %r0,c1 + + SQR_ADD_C a5L,a5R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 + SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 + STD c2,80(r_ptr) ; r[10] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 + STD c3,88(r_ptr) ; r[11] = c3; + COPY %r0,c3 + + SQR_ADD_C a6L,a6R,c1,c2,c3 + SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 + STD c1,96(r_ptr) ; r[12] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 + STD c2,104(r_ptr) ; r[13] = c2; + COPY %r0,c2 + + SQR_ADD_C a7L,a7R,c3,c1,c2 + STD c3, 112(r_ptr) ; r[14] = c3 + STD c1, 120(r_ptr) ; r[15] = c1 + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + STD c2,56(r_ptr) ; r[7] = c2; + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--------------------------------------------------------------------------- + +MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 + XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,B0L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,B0R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,B0L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + + +; +;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba8 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + FLDD 32(b_ptr),b4 + FLDD 40(b_ptr),b5 + FLDD 48(b_ptr),b6 + FLDD 56(b_ptr),b7 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 + STD c1,48(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 + STD c2,56(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 + STD c3,64(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 + STD c1,72(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 + STD c2,80(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 + STD c3,88(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 + STD c1,96(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 + STD c2,104(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 + STD c3,112(r_ptr) + STD c1,120(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + STD c1,48(r_ptr) + STD c2,56(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SPACE $PRIVATE$,SORT=16 + .IMPORT $global$,DATA + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16 +C$7 + .ALIGN 8 + .STRINGZ "Division would overflow (%d)\n" + .END diff --git a/crypto/bn/asm/pa-risc2.s.old b/crypto/bn/asm/pa-risc2.s.old new file mode 100644 index 0000000000..c2725996a4 --- /dev/null +++ b/crypto/bn/asm/pa-risc2.s.old @@ -0,0 +1,416 @@ + .SPACE $PRIVATE$ + .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 + .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 + .SPACE $TEXT$ + .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 + .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY + .IMPORT $global$,DATA + .IMPORT $$dyncall,MILLICODE +; gcc_compiled.: + .SPACE $TEXT$ + .SUBSPA $CODE$ + + .align 4 + .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +bn_mul_add_words + .PROC + .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 + .ENTRY + stw %r2,-20(0,%r30) + stwm %r4,64(0,%r30) + copy %r24,%r31 + stw %r3,-60(0,%r30) + ldi 0,%r20 + ldo 12(%r26),%r2 + stw %r23,-16(0,%r30) + copy %r25,%r3 + ldo 12(%r3),%r1 + fldws -16(0,%r30),%fr8L +L$0010 + copy %r20,%r25 + ldi 0,%r24 + fldws 0(0,%r3),%fr9L + ldw 0(0,%r26),%r19 + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r19,%r23 + ldw -16(0,%r30),%r28 + ldw -12(0,%r30),%r29 + ldi 0,%r22 + add %r23,%r29,%r29 + addc %r22,%r28,%r28 + add %r25,%r29,%r29 + addc %r24,%r28,%r28 + copy %r28,%r21 + ldi 0,%r20 + copy %r21,%r20 + addib,= -1,%r31,L$0011 + stw %r29,0(0,%r26) + copy %r20,%r25 + ldi 0,%r24 + fldws -8(0,%r1),%fr9L + ldw -8(0,%r2),%r19 + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r19,%r23 + ldw -16(0,%r30),%r28 + ldw -12(0,%r30),%r29 + ldi 0,%r22 + add %r23,%r29,%r29 + addc %r22,%r28,%r28 + add %r25,%r29,%r29 + addc %r24,%r28,%r28 + copy %r28,%r21 + ldi 0,%r20 + copy %r21,%r20 + addib,= -1,%r31,L$0011 + stw %r29,-8(0,%r2) + copy %r20,%r25 + ldi 0,%r24 + fldws -4(0,%r1),%fr9L + ldw -4(0,%r2),%r19 + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r19,%r23 + ldw -16(0,%r30),%r28 + ldw -12(0,%r30),%r29 + ldi 0,%r22 + add %r23,%r29,%r29 + addc %r22,%r28,%r28 + add %r25,%r29,%r29 + addc %r24,%r28,%r28 + copy %r28,%r21 + ldi 0,%r20 + copy %r21,%r20 + addib,= -1,%r31,L$0011 + stw %r29,-4(0,%r2) + copy %r20,%r25 + ldi 0,%r24 + fldws 0(0,%r1),%fr9L + ldw 0(0,%r2),%r19 + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r19,%r23 + ldw -16(0,%r30),%r28 + ldw -12(0,%r30),%r29 + ldi 0,%r22 + add %r23,%r29,%r29 + addc %r22,%r28,%r28 + add %r25,%r29,%r29 + addc %r24,%r28,%r28 + copy %r28,%r21 + ldi 0,%r20 + copy %r21,%r20 + addib,= -1,%r31,L$0011 + stw %r29,0(0,%r2) + ldo 16(%r1),%r1 + ldo 16(%r3),%r3 + ldo 16(%r2),%r2 + bl L$0010,0 + ldo 16(%r26),%r26 +L$0011 + copy %r20,%r28 + ldw -84(0,%r30),%r2 + ldw -60(0,%r30),%r3 + bv 0(%r2) + ldwm -64(0,%r30),%r4 + .EXIT + .PROCEND + .align 4 + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +bn_mul_words + .PROC + .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 + .ENTRY + stw %r2,-20(0,%r30) + copy %r25,%r2 + stwm %r4,64(0,%r30) + copy %r24,%r19 + ldi 0,%r28 + stw %r23,-16(0,%r30) + ldo 12(%r26),%r31 + ldo 12(%r2),%r29 + fldws -16(0,%r30),%fr8L +L$0026 + fldws 0(0,%r2),%fr9L + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r28,%r21 + ldi 0,%r20 + ldw -16(0,%r30),%r24 + ldw -12(0,%r30),%r25 + add %r21,%r25,%r25 + addc %r20,%r24,%r24 + copy %r24,%r23 + ldi 0,%r22 + copy %r23,%r28 + addib,= -1,%r19,L$0027 + stw %r25,0(0,%r26) + fldws -8(0,%r29),%fr9L + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r28,%r21 + ldi 0,%r20 + ldw -16(0,%r30),%r24 + ldw -12(0,%r30),%r25 + add %r21,%r25,%r25 + addc %r20,%r24,%r24 + copy %r24,%r23 + ldi 0,%r22 + copy %r23,%r28 + addib,= -1,%r19,L$0027 + stw %r25,-8(0,%r31) + fldws -4(0,%r29),%fr9L + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r28,%r21 + ldi 0,%r20 + ldw -16(0,%r30),%r24 + ldw -12(0,%r30),%r25 + add %r21,%r25,%r25 + addc %r20,%r24,%r24 + copy %r24,%r23 + ldi 0,%r22 + copy %r23,%r28 + addib,= -1,%r19,L$0027 + stw %r25,-4(0,%r31) + fldws 0(0,%r29),%fr9L + xmpyu %fr8L,%fr9L,%fr9 + fstds %fr9,-16(0,%r30) + copy %r28,%r21 + ldi 0,%r20 + ldw -16(0,%r30),%r24 + ldw -12(0,%r30),%r25 + add %r21,%r25,%r25 + addc %r20,%r24,%r24 + copy %r24,%r23 + ldi 0,%r22 + copy %r23,%r28 + addib,= -1,%r19,L$0027 + stw %r25,0(0,%r31) + ldo 16(%r29),%r29 + ldo 16(%r2),%r2 + ldo 16(%r31),%r31 + bl L$0026,0 + ldo 16(%r26),%r26 +L$0027 + ldw -84(0,%r30),%r2 + bv 0(%r2) + ldwm -64(0,%r30),%r4 + .EXIT + .PROCEND + .align 4 + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR +bn_sqr_words + .PROC + .CALLINFO FRAME=0,NO_CALLS + .ENTRY + ldo 28(%r26),%r19 + ldo 12(%r25),%r28 +L$0042 + fldws 0(0,%r25),%fr8L + fldws 0(0,%r25),%fr8R + xmpyu %fr8L,%fr8R,%fr8 + fstds %fr8,-16(0,%r30) + ldw -16(0,%r30),%r22 + ldw -12(0,%r30),%r23 + stw %r23,0(0,%r26) + copy %r22,%r21 + ldi 0,%r20 + addib,= -1,%r24,L$0049 + stw %r21,-24(0,%r19) + fldws -8(0,%r28),%fr8L + fldws -8(0,%r28),%fr8R + xmpyu %fr8L,%fr8R,%fr8 + fstds %fr8,-16(0,%r30) + ldw -16(0,%r30),%r22 + ldw -12(0,%r30),%r23 + stw %r23,-20(0,%r19) + copy %r22,%r21 + ldi 0,%r20 + addib,= -1,%r24,L$0049 + stw %r21,-16(0,%r19) + fldws -4(0,%r28),%fr8L + fldws -4(0,%r28),%fr8R + xmpyu %fr8L,%fr8R,%fr8 + fstds %fr8,-16(0,%r30) + ldw -16(0,%r30),%r22 + ldw -12(0,%r30),%r23 + stw %r23,-12(0,%r19) + copy %r22,%r21 + ldi 0,%r20 + addib,= -1,%r24,L$0049 + stw %r21,-8(0,%r19) + fldws 0(0,%r28),%fr8L + fldws 0(0,%r28),%fr8R + xmpyu %fr8L,%fr8R,%fr8 + fstds %fr8,-16(0,%r30) + ldw -16(0,%r30),%r22 + ldw -12(0,%r30),%r23 + stw %r23,-4(0,%r19) + copy %r22,%r21 + ldi 0,%r20 + addib,= -1,%r24,L$0049 + stw %r21,0(0,%r19) + ldo 16(%r28),%r28 + ldo 16(%r25),%r25 + ldo 32(%r19),%r19 + bl L$0042,0 + ldo 32(%r26),%r26 +L$0049 + bv,n 0(%r2) + .EXIT + .PROCEND + .IMPORT BN_num_bits_word,CODE + .IMPORT fprintf,CODE + .IMPORT __iob,DATA + .SPACE $TEXT$ + .SUBSPA $LIT$ + + .align 4 +L$C0000 + .STRING "Division would overflow (%d)\x0a\x00" + .IMPORT abort,CODE + .SPACE $TEXT$ + .SUBSPA $CODE$ + + .align 4 + .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR +bn_div64 + .PROC + .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 + .ENTRY + stw %r2,-20(0,%r30) + stwm %r8,128(0,%r30) + stw %r7,-124(0,%r30) + stw %r4,-112(0,%r30) + stw %r3,-108(0,%r30) + copy %r26,%r3 + copy %r25,%r4 + stw %r6,-120(0,%r30) + ldi 0,%r7 + stw %r5,-116(0,%r30) + movb,<> %r24,%r5,L$0051 + ldi 2,%r6 + bl L$0068,0 + ldi -1,%r28 +L$0051 + .CALL ARGW0=GR + bl BN_num_bits_word,%r2 + copy %r5,%r26 + copy %r28,%r24 + ldi 32,%r19 + comb,= %r19,%r24,L$0052 + subi 31,%r24,%r19 + mtsar %r19 + zvdepi 1,32,%r19 + comb,>>= %r19,%r3,L$0052 + addil LR'__iob-$global$+32,%r27 + ldo RR'__iob-$global$+32(%r1),%r26 + ldil LR'L$C0000,%r25 + .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR + bl fprintf,%r2 + ldo RR'L$C0000(%r25),%r25 + .CALL + bl abort,%r2 + nop +L$0052 + comb,>> %r5,%r3,L$0053 + subi 32,%r24,%r24 + sub %r3,%r5,%r3 +L$0053 + comib,= 0,%r24,L$0054 + subi 31,%r24,%r19 + mtsar %r19 + zvdep %r5,32,%r5 + zvdep %r3,32,%r21 + subi 32,%r24,%r20 + mtsar %r20 + vshd 0,%r4,%r20 + or %r21,%r20,%r3 + mtsar %r19 + zvdep %r4,32,%r4 +L$0054 + extru %r5,15,16,%r23 + extru %r5,31,16,%r28 +L$0055 + extru %r3,15,16,%r19 + comb,<> %r23,%r19,L$0058 + copy %r3,%r26 + bl L$0059,0 + zdepi -1,31,16,%r29 +L$0058 + .IMPORT $$divU,MILLICODE + bl $$divU,%r31 + copy %r23,%r25 +L$0059 + stw %r29,-16(0,%r30) + fldws -16(0,%r30),%fr10L + stw %r28,-16(0,%r30) + fldws -16(0,%r30),%fr10R + stw %r23,-16(0,%r30) + xmpyu %fr10L,%fr10R,%fr8 + fldws -16(0,%r30),%fr10R + fstws %fr8R,-16(0,%r30) + xmpyu %fr10L,%fr10R,%fr9 + ldw -16(0,%r30),%r8 + fstws %fr9R,-16(0,%r30) + copy %r8,%r22 + ldw -16(0,%r30),%r8 + extru %r4,15,16,%r24 + copy %r8,%r21 +L$0060 + sub %r3,%r21,%r20 + copy %r20,%r19 + depi 0,31,16,%r19 + comib,<> 0,%r19,L$0061 + zdep %r20,15,16,%r19 + addl %r19,%r24,%r19 + comb,>>= %r19,%r22,L$0061 + sub %r22,%r28,%r22 + sub %r21,%r23,%r21 + bl L$0060,0 + ldo -1(%r29),%r29 +L$0061 + stw %r29,-16(0,%r30) + fldws -16(0,%r30),%fr10L + stw %r28,-16(0,%r30) + fldws -16(0,%r30),%fr10R + xmpyu %fr10L,%fr10R,%fr8 + fstws %fr8R,-16(0,%r30) + ldw -16(0,%r30),%r8 + stw %r23,-16(0,%r30) + fldws -16(0,%r30),%fr10R + copy %r8,%r19 + xmpyu %fr10L,%fr10R,%fr8 + fstws %fr8R,-16(0,%r30) + extru %r19,15,16,%r20 + ldw -16(0,%r30),%r8 + zdep %r19,15,16,%r19 + addl %r8,%r20,%r20 + comclr,<<= %r19,%r4,0 + addi 1,%r20,%r20 + comb,<<= %r20,%r3,L$0066 + sub %r4,%r19,%r4 + addl %r3,%r5,%r3 + ldo -1(%r29),%r29 +L$0066 + addib,= -1,%r6,L$0056 + sub %r3,%r20,%r3 + zdep %r29,15,16,%r7 + shd %r3,%r4,16,%r3 + bl L$0055,0 + zdep %r4,15,16,%r4 +L$0056 + or %r7,%r29,%r28 +L$0068 + ldw -148(0,%r30),%r2 + ldw -124(0,%r30),%r7 + ldw -120(0,%r30),%r6 + ldw -116(0,%r30),%r5 + ldw -112(0,%r30),%r4 + ldw -108(0,%r30),%r3 + bv 0(%r2) + ldwm -128(0,%r30),%r8 + .EXIT + .PROCEND diff --git a/crypto/bn/asm/pa-risc2W.s b/crypto/bn/asm/pa-risc2W.s new file mode 100644 index 0000000000..54b6606252 --- /dev/null +++ b/crypto/bn/asm/pa-risc2W.s @@ -0,0 +1,1605 @@ +; +; PA-RISC 64-bit implementation of bn_asm code +; +; This code is approximately 2x faster than the C version +; for RSA/DSA. +; +; See http://devresource.hp.com/ for more details on the PA-RISC +; architecture. Also see the book "PA-RISC 2.0 Architecture" +; by Gerry Kane for information on the instruction set architecture. +; +; Code written by Chris Ruemmler (with some help from the HP C +; compiler). +; +; The code compiles with HP's assembler +; + + .level 2.0W + .space $TEXT$ + .subspa $CODE$,QUAD=0,ALIGN=8,ACCESS=0x2c,CODE_ONLY + +; +; Global Register definitions used for the routines. +; +; Some information about HP's runtime architecture for 64-bits. +; +; "Caller save" means the calling function must save the register +; if it wants the register to be preserved. +; "Callee save" means if a function uses the register, it must save +; the value before using it. +; +; For the floating point registers +; +; "caller save" registers: fr4-fr11, fr22-fr31 +; "callee save" registers: fr12-fr21 +; "special" registers: fr0-fr3 (status and exception registers) +; +; For the integer registers +; value zero : r0 +; "caller save" registers: r1,r19-r26 +; "callee save" registers: r3-r18 +; return register : r2 (rp) +; return values ; r28 (ret0,ret1) +; Stack pointer ; r30 (sp) +; global data pointer ; r27 (dp) +; argument pointer ; r29 (ap) +; millicode return ptr ; r31 (also a caller save register) + + +; +; Arguments to the routines +; +r_ptr .reg %r26 +a_ptr .reg %r25 +b_ptr .reg %r24 +num .reg %r24 +w .reg %r23 +n .reg %r23 + + +; +; Globals used in some routines +; + +top_overflow .reg %r29 +high_mask .reg %r22 ; value 0xffffffff80000000L + + +;------------------------------------------------------------------------------ +; +; bn_mul_add_words +; +;BN_ULONG bn_mul_add_words(BN_ULONG *r_ptr, BN_ULONG *a_ptr, +; int num, BN_ULONG w) +; +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = num +; arg3 = w +; +; Local register definitions +; + +fm1 .reg %fr22 +fm .reg %fr23 +ht_temp .reg %fr24 +ht_temp_1 .reg %fr25 +lt_temp .reg %fr26 +lt_temp_1 .reg %fr27 +fm1_1 .reg %fr28 +fm_1 .reg %fr29 + +fw_h .reg %fr7L +fw_l .reg %fr7R +fw .reg %fr7 + +fht_0 .reg %fr8L +flt_0 .reg %fr8R +t_float_0 .reg %fr8 + +fht_1 .reg %fr9L +flt_1 .reg %fr9R +t_float_1 .reg %fr9 + +tmp_0 .reg %r31 +tmp_1 .reg %r21 +m_0 .reg %r20 +m_1 .reg %r19 +ht_0 .reg %r1 +ht_1 .reg %r3 +lt_0 .reg %r4 +lt_1 .reg %r5 +m1_0 .reg %r6 +m1_1 .reg %r7 +rp_val .reg %r8 +rp_val_1 .reg %r9 + +bn_mul_add_words + .export bn_mul_add_words,entry,NO_RELOCATION,LONG_RETURN + .proc + .callinfo frame=128 + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP ; Needed to make the loop 16-byte aligned + NOP ; Needed to make the loop 16-byte aligned + + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + STD %r7,32(%sp) ; save r7 + STD %r8,40(%sp) ; save r8 + + STD %r9,48(%sp) ; save r9 + COPY %r0,%ret0 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + STD w,56(%sp) ; store w on stack + + CMPIB,>= 0,num,bn_mul_add_words_exit ; if (num <= 0) then exit + LDO 128(%sp),%sp ; bump stack + + ; + ; The loop is unrolled twice, so if there is only 1 number + ; then go straight to the cleanup code. + ; + CMPIB,= 1,num,bn_mul_add_words_single_top + FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_add_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDD 8(r_ptr),rp_val_1 ; rp[1] + + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = fht_1*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1[0] + FSTD fm1_1,-48(%sp) ; -48(sp) = m1[1] + + XMPYU flt_0,fw_h,fm ; m[0] = flt_0*fw_h + XMPYU flt_1,fw_h,fm_1 ; m[1] = flt_1*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m[0] + FSTD fm_1,-40(%sp) ; -40(sp) = m[1] + + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp_1 = fht_1*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht_temp + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht_temp_1 + + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt_temp + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt_temp_1 + + LDD -8(%sp),m_0 ; m[0] + LDD -40(%sp),m_1 ; m[1] + LDD -16(%sp),m1_0 ; m1[0] + LDD -48(%sp),m1_1 ; m1[1] + + LDD -24(%sp),ht_0 ; ht[0] + LDD -56(%sp),ht_1 ; ht[1] + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m[0] + m1[0]; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m[1] + m1[1]; + + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m[0] < m1[0]) + ADD,L ht_0,top_overflow,ht_0 ; ht[0] += (1<<32) + + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m[1] < m1[1]) + ADD,L ht_1,top_overflow,ht_1 ; ht[1] += (1<<32) + EXTRD,U tmp_0,31,32,m_0 ; m[0]>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1[0] = m[0]<<32 + + EXTRD,U tmp_1,31,32,m_1 ; m[1]>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1[1] = m[1]<<32 + ADD,L ht_0,m_0,ht_0 ; ht[0]+= (m[0]>>32) + ADD,L ht_1,m_1,ht_1 ; ht[1]+= (m[1]>>32) + + ADD lt_0,m1_0,lt_0 ; lt[0] = lt[0]+m1[0]; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_1,m1_1,lt_1 ; lt[1] = lt[1]+m1[1]; + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + + ADD %ret0,lt_0,lt_0 ; lt[0] = lt[0] + c; + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + ADD lt_0,rp_val,lt_0 ; lt[0] = lt[0]+rp[0] + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + + LDO -2(num),num ; num = num - 2; + ADD ht_0,lt_1,lt_1 ; lt[1] = lt[1] + ht_0 (c); + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + + ADD lt_1,rp_val_1,lt_1 ; lt[1] = lt[1]+rp[1] + ADD,DC ht_1,%r0,%ret0 ; ht[1]++ + LDO 16(a_ptr),a_ptr ; a_ptr += 2 + + STD lt_1,8(r_ptr) ; rp[1] = lt[1] + CMPIB,<= 2,num,bn_mul_add_words_unroll2 ; go again if more to do + LDO 16(r_ptr),r_ptr ; r_ptr += 2 + + CMPIB,=,N 0,num,bn_mul_add_words_exit ; are we done, or cleanup last one + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_add_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + LDD 0(r_ptr),rp_val ; rp[0] + LDO 8(a_ptr),a_ptr ; a_ptr++ + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 ; m1 = temp1 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,tmp_0 ; tmp_0 = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD %ret0,tmp_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + ADD lt_0,rp_val,lt_0 ; lt = lt+rp[0] + ADD,DC ht_0,%r0,%ret0 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_add_words_exit + .EXIT + LDD -80(%sp),%r9 ; restore r9 + LDD -88(%sp),%r8 ; restore r8 + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; arg3 = w + +bn_mul_words + .proc + .callinfo frame=128 + .entry + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + STD %r7,32(%sp) ; save r7 + COPY %r0,%ret0 ; return 0 by default + DEPDI,Z 1,31,1,top_overflow ; top_overflow = 1 << 32 + STD w,56(%sp) ; w on stack + + CMPIB,>= 0,num,bn_mul_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; See if only 1 word to do, thus just do cleanup + ; + CMPIB,= 1,num,bn_mul_words_single_top + FLDD -72(%sp),fw ; load up w into fp register fw (fw_h/fw_l) + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + ; PA-RISC 2.0 chips have two fully pipelined multipliers, thus + ; two 32-bit mutiplies can be issued per cycle. + ; +bn_mul_words_unroll2 + + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + FLDD 8(a_ptr),t_float_1 ; load up 64-bit value (fr8L) ht(L)/lt(R) + XMPYU fht_0,fw_l,fm1 ; m1[0] = fht_0*fw_l + XMPYU fht_1,fw_l,fm1_1 ; m1[1] = ht*fw_l + + FSTD fm1,-16(%sp) ; -16(sp) = m1 + FSTD fm1_1,-48(%sp) ; -48(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + XMPYU flt_1,fw_h,fm_1 ; m = lt*fw_h + + FSTD fm,-8(%sp) ; -8(sp) = m + FSTD fm_1,-40(%sp) ; -40(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = fht_0*fw_h + XMPYU fht_1,fw_h,ht_temp_1 ; ht_temp = ht*fw_h + + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + FSTD ht_temp_1,-56(%sp) ; -56(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + XMPYU flt_1,fw_l,lt_temp_1 ; lt_temp = lt*fw_l + + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + FSTD lt_temp_1,-64(%sp) ; -64(sp) = lt + LDD -8(%sp),m_0 + LDD -40(%sp),m_1 + + LDD -16(%sp),m1_0 + LDD -48(%sp),m1_1 + LDD -24(%sp),ht_0 + LDD -56(%sp),ht_1 + + ADD,L m1_0,m_0,tmp_0 ; tmp_0 = m + m1; + ADD,L m1_1,m_1,tmp_1 ; tmp_1 = m + m1; + LDD -32(%sp),lt_0 + LDD -64(%sp),lt_1 + + CMPCLR,*>>= tmp_0,m1_0, %r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + CMPCLR,*>>= tmp_1,m1_1,%r0 ; if (m < m1) + ADD,L ht_1,top_overflow,ht_1 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + EXTRD,U tmp_1,31,32,m_1 ; m>>32 + DEPD,Z tmp_1,31,32,m1_1 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD,L ht_1,m_1,ht_1 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt = lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD lt_1,m1_1,lt_1 ; lt = lt+m1; + ADD,DC ht_1,%r0,ht_1 ; ht++ + ADD %ret0,lt_0,lt_0 ; lt = lt + c (ret0); + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD ht_0,lt_1,lt_1 ; lt = lt + c (ht_0) + ADD,DC ht_1,%r0,ht_1 ; ht++ + STD lt_0,0(r_ptr) ; rp[0] = lt + STD lt_1,8(r_ptr) ; rp[1] = lt + + COPY ht_1,%ret0 ; carry = ht + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_mul_words_unroll2 + LDO 16(r_ptr),r_ptr ; rp++ + + CMPIB,=,N 0,num,bn_mul_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_mul_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,fw_l,fm1 ; m1 = ht*fw_l + FSTD fm1,-16(%sp) ; -16(sp) = m1 + XMPYU flt_0,fw_h,fm ; m = lt*fw_h + FSTD fm,-8(%sp) ; -8(sp) = m + XMPYU fht_0,fw_h,ht_temp ; ht_temp = ht*fw_h + FSTD ht_temp,-24(%sp) ; -24(sp) = ht + XMPYU flt_0,fw_l,lt_temp ; lt_temp = lt*fw_l + FSTD lt_temp,-32(%sp) ; -32(sp) = lt + + LDD -8(%sp),m_0 + LDD -16(%sp),m1_0 + ADD,L m_0,m1_0,tmp_0 ; tmp_0 = m + m1; + LDD -24(%sp),ht_0 + LDD -32(%sp),lt_0 + + CMPCLR,*>>= tmp_0,m1_0,%r0 ; if (m < m1) + ADD,L ht_0,top_overflow,ht_0 ; ht += (1<<32) + + EXTRD,U tmp_0,31,32,m_0 ; m>>32 + DEPD,Z tmp_0,31,32,m1_0 ; m1 = m<<32 + + ADD,L ht_0,m_0,ht_0 ; ht+= (m>>32) + ADD lt_0,m1_0,lt_0 ; lt= lt+m1; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + ADD %ret0,lt_0,lt_0 ; lt = lt + c; + ADD,DC ht_0,%r0,ht_0 ; ht++ + + COPY ht_0,%ret0 ; copy carry + STD lt_0,0(r_ptr) ; rp[0] = lt + +bn_mul_words_exit + .EXIT + LDD -96(%sp),%r7 ; restore r7 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 ; restore r3 + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num) +; +; arg0 = rp +; arg1 = ap +; arg2 = num +; + +bn_sqr_words + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + NOP + STD %r5,16(%sp) ; save r5 + + CMPIB,>= 0,num,bn_sqr_words_exit + LDO 128(%sp),%sp ; bump stack + + ; + ; If only 1, the goto straight to cleanup + ; + CMPIB,= 1,num,bn_sqr_words_single_top + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; + +bn_sqr_words_unroll2 + FLDD 0(a_ptr),t_float_0 ; a[0] + FLDD 8(a_ptr),t_float_1 ; a[1] + XMPYU fht_0,flt_0,fm ; m[0] + XMPYU fht_1,flt_1,fm_1 ; m[1] + + FSTD fm,-24(%sp) ; store m[0] + FSTD fm_1,-56(%sp) ; store m[1] + XMPYU flt_0,flt_0,lt_temp ; lt[0] + XMPYU flt_1,flt_1,lt_temp_1 ; lt[1] + + FSTD lt_temp,-16(%sp) ; store lt[0] + FSTD lt_temp_1,-48(%sp) ; store lt[1] + XMPYU fht_0,fht_0,ht_temp ; ht[0] + XMPYU fht_1,fht_1,ht_temp_1 ; ht[1] + + FSTD ht_temp,-8(%sp) ; store ht[0] + FSTD ht_temp_1,-40(%sp) ; store ht[1] + LDD -24(%sp),m_0 + LDD -56(%sp),m_1 + + AND m_0,high_mask,tmp_0 ; m[0] & Mask + AND m_1,high_mask,tmp_1 ; m[1] & Mask + DEPD,Z m_0,30,31,m_0 ; m[0] << 32+1 + DEPD,Z m_1,30,31,m_1 ; m[1] << 32+1 + + LDD -16(%sp),lt_0 + LDD -48(%sp),lt_1 + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m[0]&Mask >> 32-1 + EXTRD,U tmp_1,32,33,tmp_1 ; tmp_1 = m[1]&Mask >> 32-1 + + LDD -8(%sp),ht_0 + LDD -40(%sp),ht_1 + ADD,L ht_0,tmp_0,ht_0 ; ht[0] += tmp_0 + ADD,L ht_1,tmp_1,ht_1 ; ht[1] += tmp_1 + + ADD lt_0,m_0,lt_0 ; lt = lt+m + ADD,DC ht_0,%r0,ht_0 ; ht[0]++ + STD lt_0,0(r_ptr) ; rp[0] = lt[0] + STD ht_0,8(r_ptr) ; rp[1] = ht[1] + + ADD lt_1,m_1,lt_1 ; lt = lt+m + ADD,DC ht_1,%r0,ht_1 ; ht[1]++ + STD lt_1,16(r_ptr) ; rp[2] = lt[1] + STD ht_1,24(r_ptr) ; rp[3] = ht[1] + + LDO -2(num),num ; num = num - 2; + LDO 16(a_ptr),a_ptr ; ap += 2 + CMPIB,<= 2,num,bn_sqr_words_unroll2 + LDO 32(r_ptr),r_ptr ; rp += 4 + + CMPIB,=,N 0,num,bn_sqr_words_exit ; are we done? + + ; + ; Top of loop aligned on 64-byte boundary + ; +bn_sqr_words_single_top + FLDD 0(a_ptr),t_float_0 ; load up 64-bit value (fr8L) ht(L)/lt(R) + + XMPYU fht_0,flt_0,fm ; m + FSTD fm,-24(%sp) ; store m + + XMPYU flt_0,flt_0,lt_temp ; lt + FSTD lt_temp,-16(%sp) ; store lt + + XMPYU fht_0,fht_0,ht_temp ; ht + FSTD ht_temp,-8(%sp) ; store ht + + LDD -24(%sp),m_0 ; load m + AND m_0,high_mask,tmp_0 ; m & Mask + DEPD,Z m_0,30,31,m_0 ; m << 32+1 + LDD -16(%sp),lt_0 ; lt + + LDD -8(%sp),ht_0 ; ht + EXTRD,U tmp_0,32,33,tmp_0 ; tmp_0 = m&Mask >> 32-1 + ADD m_0,lt_0,lt_0 ; lt = lt+m + ADD,L ht_0,tmp_0,ht_0 ; ht += tmp_0 + ADD,DC ht_0,%r0,ht_0 ; ht++ + + STD lt_0,0(r_ptr) ; rp[0] = lt + STD ht_0,8(r_ptr) ; rp[1] = ht + +bn_sqr_words_exit + .EXIT + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + .PROCEND ;in=23,24,25,26,29;out=28; + + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_add_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t .reg %r22 +b .reg %r21 +l .reg %r20 + +bn_add_words + .proc + .entry + .callinfo + .EXPORT bn_add_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .align 64 + + CMPIB,>= 0,n,bn_add_words_exit + COPY %r0,%ret0 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_add_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_add_words_unroll2 + LDD 0(a_ptr),t + LDD 0(b_ptr),b + ADD t,%ret0,t ; t = t+c; + ADD,DC %r0,%r0,%ret0 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret0,%r0,%ret0 ; c+= carry + STD l,0(r_ptr) + + LDD 8(a_ptr),t + LDD 8(b_ptr),b + ADD t,%ret0,t ; t = t+c; + ADD,DC %r0,%r0,%ret0 ; set c to carry + ADD t,b,l ; l = t + b[0] + ADD,DC %ret0,%r0,%ret0 ; c+= carry + STD l,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_add_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_add_words_exit ; are we done? + +bn_add_words_single_top + LDD 0(a_ptr),t + LDD 0(b_ptr),b + + ADD t,%ret0,t ; t = t+c; + ADD,DC %r0,%r0,%ret0 ; set c to carry (could use CMPCLR??) + ADD t,b,l ; l = t + b[0] + ADD,DC %ret0,%r0,%ret0 ; c+= carry + STD l,0(r_ptr) + +bn_add_words_exit + .EXIT + BVE (%rp) + NOP + .PROCEND ;in=23,24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +;BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) +; +; arg0 = rp +; arg1 = ap +; arg2 = bp +; arg3 = n + +t1 .reg %r22 +t2 .reg %r21 +sub_tmp1 .reg %r20 +sub_tmp2 .reg %r19 + + +bn_sub_words + .proc + .callinfo + .EXPORT bn_sub_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + CMPIB,>= 0,n,bn_sub_words_exit + COPY %r0,%ret0 ; return 0 by default + + ; + ; If 2 or more numbers do the loop + ; + CMPIB,= 1,n,bn_sub_words_single_top + NOP + + ; + ; This loop is unrolled 2 times (64-byte aligned as well) + ; +bn_sub_words_unroll2 + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; + + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret0 + STD sub_tmp1,0(r_ptr) + + LDD 8(a_ptr),t1 + LDD 8(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret0 + STD sub_tmp1,8(r_ptr) + + LDO -2(n),n + LDO 16(a_ptr),a_ptr + LDO 16(b_ptr),b_ptr + + CMPIB,<= 2,n,bn_sub_words_unroll2 + LDO 16(r_ptr),r_ptr + + CMPIB,=,N 0,n,bn_sub_words_exit ; are we done? + +bn_sub_words_single_top + LDD 0(a_ptr),t1 + LDD 0(b_ptr),t2 + SUB t1,t2,sub_tmp1 ; t3 = t1-t2; + SUB sub_tmp1,%ret0,sub_tmp1 ; t3 = t3- c; + CMPCLR,*>> t1,t2,sub_tmp2 ; clear if t1 > t2 + LDO 1(%r0),sub_tmp2 + + CMPCLR,*= t1,t2,%r0 + COPY sub_tmp2,%ret0 + + STD sub_tmp1,0(r_ptr) + +bn_sub_words_exit + .EXIT + BVE (%rp) + NOP + .PROCEND ;in=23,24,25,26,29;out=28; + +;------------------------------------------------------------------------------ +; +; unsigned long bn_div_words(unsigned long h, unsigned long l, unsigned long d) +; +; arg0 = h +; arg1 = l +; arg2 = d +; +; This is mainly just modified assembly from the compiler, thus the +; lack of variable names. +; +;------------------------------------------------------------------------------ +bn_div_words + .proc + .callinfo CALLER,FRAME=272,ENTRY_GR=%r10,SAVE_RP,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_div_words,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .IMPORT BN_num_bits_word,CODE,NO_RELOCATION + .IMPORT __iob,DATA + .IMPORT fprintf,CODE,NO_RELOCATION + .IMPORT abort,CODE,NO_RELOCATION + .IMPORT $$div2U,MILLICODE + .entry + STD %r2,-16(%r30) + STD,MA %r3,352(%r30) + STD %r4,-344(%r30) + STD %r5,-336(%r30) + STD %r6,-328(%r30) + STD %r7,-320(%r30) + STD %r8,-312(%r30) + STD %r9,-304(%r30) + STD %r10,-296(%r30) + + STD %r27,-288(%r30) ; save gp + + COPY %r24,%r3 ; save d + COPY %r26,%r4 ; save h (high 64-bits) + LDO -1(%r0),%ret0 ; return -1 by default + + CMPB,*= %r0,%arg2,$D3 ; if (d == 0) + COPY %r25,%r5 ; save l (low 64-bits) + + LDO -48(%r30),%r29 ; create ap + .CALL ;in=26,29;out=28; + B,L BN_num_bits_word,%r2 + COPY %r3,%r26 + LDD -288(%r30),%r27 ; restore gp + LDI 64,%r21 + + CMPB,= %r21,%ret0,$00000012 ;if (i == 64) (forward) + COPY %ret0,%r24 ; i + MTSARCM %r24 + DEPDI,Z -1,%sar,1,%r29 + CMPB,*<<,N %r29,%r4,bn_div_err_case ; if (h > 1<= d) + SUB %r4,%r3,%r4 ; h -= d + CMPB,= %r31,%r0,$0000001A ; if (i) + COPY %r0,%r10 ; ret = 0 + MTSARCM %r31 ; i to shift + DEPD,Z %r3,%sar,64,%r3 ; d <<= i; + SUBI 64,%r31,%r19 ; 64 - i; redundent + MTSAR %r19 ; (64 -i) to shift + SHRPD %r4,%r5,%sar,%r4 ; l>> (64-i) + MTSARCM %r31 ; i to shift + DEPD,Z %r5,%sar,64,%r5 ; l <<= i; + +$0000001A + DEPDI,Z -1,31,32,%r19 + EXTRD,U %r3,31,32,%r6 ; dh=(d&0xfff)>>32 + EXTRD,U %r3,63,32,%r8 ; dl = d&0xffffff + LDO 2(%r0),%r9 + STD %r3,-280(%r30) ; "d" to stack + +$0000001C + DEPDI,Z -1,63,32,%r29 ; + EXTRD,U %r4,31,32,%r31 ; h >> 32 + CMPB,*=,N %r31,%r6,$D2 ; if ((h>>32) != dh)(forward) div + COPY %r4,%r26 + EXTRD,U %r4,31,32,%r25 + COPY %r6,%r24 + .CALL ;in=23,24,25,26;out=20,21,22,28,29; (MILLICALL) + B,L $$div2U,%r2 + EXTRD,U %r6,31,32,%r23 + DEPD %r28,31,32,%r29 +$D2 + STD %r29,-272(%r30) ; q + AND %r5,%r19,%r24 ; t & 0xffffffff00000000; + EXTRD,U %r24,31,32,%r24 ; ??? + FLDD -272(%r30),%fr7 ; q + FLDD -280(%r30),%fr8 ; d + XMPYU %fr8L,%fr7L,%fr10 + FSTD %fr10,-256(%r30) + XMPYU %fr8L,%fr7R,%fr22 + FSTD %fr22,-264(%r30) + XMPYU %fr8R,%fr7L,%fr11 + XMPYU %fr8R,%fr7R,%fr23 + FSTD %fr11,-232(%r30) + FSTD %fr23,-240(%r30) + LDD -256(%r30),%r28 + DEPD,Z %r28,31,32,%r2 + LDD -264(%r30),%r20 + ADD,L %r20,%r2,%r31 + LDD -232(%r30),%r22 + DEPD,Z %r22,31,32,%r22 + LDD -240(%r30),%r21 + B $00000024 ; enter loop + ADD,L %r21,%r22,%r23 + +$0000002A + LDO -1(%r29),%r29 + SUB %r23,%r8,%r23 +$00000024 + SUB %r4,%r31,%r25 + AND %r25,%r19,%r26 + CMPB,*<>,N %r0,%r26,$00000046 ; (forward) + DEPD,Z %r25,31,32,%r20 + OR %r20,%r24,%r21 + CMPB,*<<,N %r21,%r23,$0000002A ;(backward) + SUB %r31,%r6,%r31 +;-------------Break path--------------------- + +$00000046 + DEPD,Z %r23,31,32,%r25 ;tl + EXTRD,U %r23,31,32,%r26 ;t + AND %r25,%r19,%r24 ;tl = (tl<<32)&0xfffffff0000000L + ADD,L %r31,%r26,%r31 ;th += t; + CMPCLR,*>>= %r5,%r24,%r0 ;if (l>32)); + DEPD,Z %r29,31,32,%r10 ; ret = q<<32 + b $0000001C + DEPD,Z %r28,31,32,%r5 ; l = l << 32 + +$D1 + OR %r10,%r29,%r28 ; ret |= q +$D3 + LDD -368(%r30),%r2 +$D0 + LDD -296(%r30),%r10 + LDD -304(%r30),%r9 + LDD -312(%r30),%r8 + LDD -320(%r30),%r7 + LDD -328(%r30),%r6 + LDD -336(%r30),%r5 + LDD -344(%r30),%r4 + BVE (%r2) + .EXIT + LDD,MB -352(%r30),%r3 + +bn_div_err_case + MFIA %r6 + ADDIL L'bn_div_words-bn_div_err_case,%r6,%r1 + LDO R'bn_div_words-bn_div_err_case(%r1),%r6 + ADDIL LT'__iob,%r27,%r1 + LDD RT'__iob(%r1),%r26 + ADDIL L'C$4-bn_div_words,%r6,%r1 + LDO R'C$4-bn_div_words(%r1),%r25 + LDO 64(%r26),%r26 + .CALL ;in=24,25,26,29;out=28; + B,L fprintf,%r2 + LDO -48(%r30),%r29 + LDD -288(%r30),%r27 + .CALL ;in=29; + B,L abort,%r2 + LDO -48(%r30),%r29 + LDD -288(%r30),%r27 + B $D0 + LDD -368(%r30),%r2 + .PROCEND ;in=24,25,26,29;out=28; + +;---------------------------------------------------------------------------- +; +; Registers to hold 64-bit values to manipulate. The "L" part +; of the register corresponds to the upper 32-bits, while the "R" +; part corresponds to the lower 32-bits +; +; Note, that when using b6 and b7, the code must save these before +; using them because they are callee save registers +; +; +; Floating point registers to use to save values that +; are manipulated. These don't collide with ftemp1-6 and +; are all caller save registers +; +a0 .reg %fr22 +a0L .reg %fr22L +a0R .reg %fr22R + +a1 .reg %fr23 +a1L .reg %fr23L +a1R .reg %fr23R + +a2 .reg %fr24 +a2L .reg %fr24L +a2R .reg %fr24R + +a3 .reg %fr25 +a3L .reg %fr25L +a3R .reg %fr25R + +a4 .reg %fr26 +a4L .reg %fr26L +a4R .reg %fr26R + +a5 .reg %fr27 +a5L .reg %fr27L +a5R .reg %fr27R + +a6 .reg %fr28 +a6L .reg %fr28L +a6R .reg %fr28R + +a7 .reg %fr29 +a7L .reg %fr29L +a7R .reg %fr29R + +b0 .reg %fr30 +b0L .reg %fr30L +b0R .reg %fr30R + +b1 .reg %fr31 +b1L .reg %fr31L +b1R .reg %fr31R + +; +; Temporary floating point variables, these are all caller save +; registers +; +ftemp1 .reg %fr4 +ftemp2 .reg %fr5 +ftemp3 .reg %fr6 +ftemp4 .reg %fr7 + +; +; The B set of registers when used. +; + +b2 .reg %fr8 +b2L .reg %fr8L +b2R .reg %fr8R + +b3 .reg %fr9 +b3L .reg %fr9L +b3R .reg %fr9R + +b4 .reg %fr10 +b4L .reg %fr10L +b4R .reg %fr10R + +b5 .reg %fr11 +b5L .reg %fr11L +b5R .reg %fr11R + +b6 .reg %fr12 +b6L .reg %fr12L +b6R .reg %fr12R + +b7 .reg %fr13 +b7L .reg %fr13L +b7R .reg %fr13R + +c1 .reg %r21 ; only reg +temp1 .reg %r20 ; only reg +temp2 .reg %r19 ; only reg +temp3 .reg %r31 ; only reg + +m1 .reg %r28 +c2 .reg %r23 +high_one .reg %r1 +ht .reg %r6 +lt .reg %r5 +m .reg %r4 +c3 .reg %r3 + +SQR_ADD_C .macro A0L,A0R,C1,C2,C3 + XMPYU A0L,A0R,ftemp1 ; m + FSTD ftemp1,-24(%sp) ; store m + + XMPYU A0R,A0R,ftemp2 ; lt + FSTD ftemp2,-16(%sp) ; store lt + + XMPYU A0L,A0L,ftemp3 ; ht + FSTD ftemp3,-8(%sp) ; store ht + + LDD -24(%sp),m ; load m + AND m,high_mask,temp2 ; m & Mask + DEPD,Z m,30,31,temp3 ; m << 32+1 + LDD -16(%sp),lt ; lt + + LDD -8(%sp),ht ; ht + EXTRD,U temp2,32,33,temp1 ; temp1 = m&Mask >> 32-1 + ADD temp3,lt,lt ; lt = lt+m + ADD,L ht,temp1,ht ; ht += temp1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; ht++ + + ADD C2,ht,C2 ; c2=c2+ht + ADD,DC C3,%r0,C3 ; c3++ +.endm + +SQR_ADD_C2 .macro A0L,A0R,A1L,A1R,C1,C2,C3 + XMPYU A0L,A1R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,A1L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,A1R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,A1L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD ht,ht,ht ; ht=ht+ht; + ADD,DC C3,%r0,C3 ; add in carry (c3++) + + ADD lt,lt,lt ; lt=lt+lt; + ADD,DC ht,%r0,ht ; add in carry (ht++) + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC,*NUV ht,%r0,ht ; add in carry (ht++) + LDO 1(C3),C3 ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + +; +;void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba8 + .PROC + .CALLINFO FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .ENTRY + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a0L,a0R,c2,c3,c1 + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a5L,a5R,a0L,a0R,c3,c1,c2 + SQR_ADD_C2 a4L,a4R,a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a4L,a4R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a1L,a1R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a0L,a0R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a0L,a0R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a1L,a1R,c2,c3,c1 + SQR_ADD_C2 a5L,a5R,a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a4L,a4R,a3L,a3R,c2,c3,c1 + STD c2,56(r_ptr) ; r[7] = c2; + COPY %r0,c2 + + SQR_ADD_C a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a5L,a5R,a3L,a3R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a2L,a2R,c3,c1,c2 + SQR_ADD_C2 a7L,a7R,a1L,a1R,c3,c1,c2 + STD c3,64(r_ptr) ; r[8] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a7L,a7R,a2L,a2R,c1,c2,c3 + SQR_ADD_C2 a6L,a6R,a3L,a3R,c1,c2,c3 + SQR_ADD_C2 a5L,a5R,a4L,a4R,c1,c2,c3 + STD c1,72(r_ptr) ; r[9] = c1; + COPY %r0,c1 + + SQR_ADD_C a5L,a5R,c2,c3,c1 + SQR_ADD_C2 a6L,a6R,a4L,a4R,c2,c3,c1 + SQR_ADD_C2 a7L,a7R,a3L,a3R,c2,c3,c1 + STD c2,80(r_ptr) ; r[10] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a7L,a7R,a4L,a4R,c3,c1,c2 + SQR_ADD_C2 a6L,a6R,a5L,a5R,c3,c1,c2 + STD c3,88(r_ptr) ; r[11] = c3; + COPY %r0,c3 + + SQR_ADD_C a6L,a6R,c1,c2,c3 + SQR_ADD_C2 a7L,a7R,a5L,a5R,c1,c2,c3 + STD c1,96(r_ptr) ; r[12] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a7L,a7R,a6L,a6R,c2,c3,c1 + STD c2,104(r_ptr) ; r[13] = c2; + COPY %r0,c2 + + SQR_ADD_C a7L,a7R,c3,c1,c2 + STD c3, 112(r_ptr) ; r[14] = c3 + STD c1, 120(r_ptr) ; r[15] = c1 + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +; arg0 = r_ptr +; arg1 = a_ptr +; + +bn_sqr_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_sqr_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z -1,32,33,high_mask ; Create Mask 0xffffffff80000000L + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + SQR_ADD_C a0L,a0R,c1,c2,c3 + + STD c1,0(r_ptr) ; r[0] = c1; + COPY %r0,c1 + + SQR_ADD_C2 a1L,a1R,a0L,a0R,c2,c3,c1 + + STD c2,8(r_ptr) ; r[1] = c2; + COPY %r0,c2 + + SQR_ADD_C a1L,a1R,c3,c1,c2 + SQR_ADD_C2 a2L,a2R,a0L,a0R,c3,c1,c2 + + STD c3,16(r_ptr) ; r[2] = c3; + COPY %r0,c3 + + SQR_ADD_C2 a3L,a3R,a0L,a0R,c1,c2,c3 + SQR_ADD_C2 a2L,a2R,a1L,a1R,c1,c2,c3 + + STD c1,24(r_ptr) ; r[3] = c1; + COPY %r0,c1 + + SQR_ADD_C a2L,a2R,c2,c3,c1 + SQR_ADD_C2 a3L,a3R,a1L,a1R,c2,c3,c1 + + STD c2,32(r_ptr) ; r[4] = c2; + COPY %r0,c2 + + SQR_ADD_C2 a3L,a3R,a2L,a2R,c3,c1,c2 + STD c3,40(r_ptr) ; r[5] = c3; + COPY %r0,c3 + + SQR_ADD_C a3L,a3R,c1,c2,c3 + STD c1,48(r_ptr) ; r[6] = c1; + STD c2,56(r_ptr) ; r[7] = c2; + + .EXIT + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + +;--------------------------------------------------------------------------- + +MUL_ADD_C .macro A0L,A0R,B0L,B0R,C1,C2,C3 + XMPYU A0L,B0R,ftemp1 ; m1 = bl*ht + FSTD ftemp1,-16(%sp) ; + XMPYU A0R,B0L,ftemp2 ; m = bh*lt + FSTD ftemp2,-8(%sp) ; + XMPYU A0R,B0R,ftemp3 ; lt = bl*lt + FSTD ftemp3,-32(%sp) + XMPYU A0L,B0L,ftemp4 ; ht = bh*ht + FSTD ftemp4,-24(%sp) ; + + LDD -8(%sp),m ; r21 = m + LDD -16(%sp),m1 ; r19 = m1 + ADD,L m,m1,m ; m+m1 + + DEPD,Z m,31,32,temp3 ; (m+m1<<32) + LDD -24(%sp),ht ; r24 = ht + + CMPCLR,*>>= m,m1,%r0 ; if (m < m1) + ADD,L ht,high_one,ht ; ht+=high_one + + EXTRD,U m,31,32,temp1 ; m >> 32 + LDD -32(%sp),lt ; lt + ADD,L ht,temp1,ht ; ht+= m>>32 + ADD lt,temp3,lt ; lt = lt+m1 + ADD,DC ht,%r0,ht ; ht++ + + ADD C1,lt,C1 ; c1=c1+lt + ADD,DC ht,%r0,ht ; bump c3 if overflow,nullify otherwise + + ADD C2,ht,C2 ; c2 = c2 + ht + ADD,DC C3,%r0,C3 ; add in carry (c3++) +.endm + + +; +;void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba8 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba8,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + FLDD 32(a_ptr),a4 + FLDD 40(a_ptr),a5 + FLDD 48(a_ptr),a6 + FLDD 56(a_ptr),a7 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + FLDD 32(b_ptr),b4 + FLDD 40(b_ptr),b5 + FLDD 48(b_ptr),b6 + FLDD 56(b_ptr),b7 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a4L,a4R,b0L,b0R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a0L,a0R,b4L,b4R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a0L,a0R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b0L,b0R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a6L,a6R,b0L,b0R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a0L,a0R,b6L,b6R,c1,c2,c3 + STD c1,48(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b0L,b0R,c2,c3,c1 + STD c2,56(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b2L,b2R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a4L,a4R,b4L,b4R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a2L,a2R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b7L,b7R,c3,c1,c2 + STD c3,64(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a2L,a2R,b7L,b7R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a4L,a4R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b4L,b4R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a7L,a7R,b2L,b2R,c1,c2,c3 + STD c1,72(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a7L,a7R,b3L,b3R,c2,c3,c1 + MUL_ADD_C a6L,a6R,b4L,b4R,c2,c3,c1 + MUL_ADD_C a5L,a5R,b5L,b5R,c2,c3,c1 + MUL_ADD_C a4L,a4R,b6L,b6R,c2,c3,c1 + MUL_ADD_C a3L,a3R,b7L,b7R,c2,c3,c1 + STD c2,80(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a4L,a4R,b7L,b7R,c3,c1,c2 + MUL_ADD_C a5L,a5R,b6L,b6R,c3,c1,c2 + MUL_ADD_C a6L,a6R,b5L,b5R,c3,c1,c2 + MUL_ADD_C a7L,a7R,b4L,b4R,c3,c1,c2 + STD c3,88(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a7L,a7R,b5L,b5R,c1,c2,c3 + MUL_ADD_C a6L,a6R,b6L,b6R,c1,c2,c3 + MUL_ADD_C a5L,a5R,b7L,b7R,c1,c2,c3 + STD c1,96(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a6L,a6R,b7L,b7R,c2,c3,c1 + MUL_ADD_C a7L,a7R,b6L,b6R,c2,c3,c1 + STD c2,104(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a7L,a7R,b7L,b7R,c3,c1,c2 + STD c3,112(r_ptr) + STD c1,120(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + +;----------------------------------------------------------------------------- +; +;void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) +; arg0 = r_ptr +; arg1 = a_ptr +; arg2 = b_ptr +; + +bn_mul_comba4 + .proc + .callinfo FRAME=128,ENTRY_GR=%r3,ARGS_SAVED,ORDERING_AWARE + .EXPORT bn_mul_comba4,ENTRY,PRIV_LEV=3,NO_RELOCATION,LONG_RETURN + .entry + .align 64 + + STD %r3,0(%sp) ; save r3 + STD %r4,8(%sp) ; save r4 + STD %r5,16(%sp) ; save r5 + STD %r6,24(%sp) ; save r6 + FSTD %fr12,32(%sp) ; save r6 + FSTD %fr13,40(%sp) ; save r7 + + ; + ; Zero out carries + ; + COPY %r0,c1 + COPY %r0,c2 + COPY %r0,c3 + + LDO 128(%sp),%sp ; bump stack + DEPDI,Z 1,31,1,high_one ; Create Value 1 << 32 + + ; + ; Load up all of the values we are going to use + ; + FLDD 0(a_ptr),a0 + FLDD 8(a_ptr),a1 + FLDD 16(a_ptr),a2 + FLDD 24(a_ptr),a3 + + FLDD 0(b_ptr),b0 + FLDD 8(b_ptr),b1 + FLDD 16(b_ptr),b2 + FLDD 24(b_ptr),b3 + + MUL_ADD_C a0L,a0R,b0L,b0R,c1,c2,c3 + STD c1,0(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a0L,a0R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b0L,b0R,c2,c3,c1 + STD c2,8(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b0L,b0R,c3,c1,c2 + MUL_ADD_C a1L,a1R,b1L,b1R,c3,c1,c2 + MUL_ADD_C a0L,a0R,b2L,b2R,c3,c1,c2 + STD c3,16(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a0L,a0R,b3L,b3R,c1,c2,c3 + MUL_ADD_C a1L,a1R,b2L,b2R,c1,c2,c3 + MUL_ADD_C a2L,a2R,b1L,b1R,c1,c2,c3 + MUL_ADD_C a3L,a3R,b0L,b0R,c1,c2,c3 + STD c1,24(r_ptr) + COPY %r0,c1 + + MUL_ADD_C a3L,a3R,b1L,b1R,c2,c3,c1 + MUL_ADD_C a2L,a2R,b2L,b2R,c2,c3,c1 + MUL_ADD_C a1L,a1R,b3L,b3R,c2,c3,c1 + STD c2,32(r_ptr) + COPY %r0,c2 + + MUL_ADD_C a2L,a2R,b3L,b3R,c3,c1,c2 + MUL_ADD_C a3L,a3R,b2L,b2R,c3,c1,c2 + STD c3,40(r_ptr) + COPY %r0,c3 + + MUL_ADD_C a3L,a3R,b3L,b3R,c1,c2,c3 + STD c1,48(r_ptr) + STD c2,56(r_ptr) + + .EXIT + FLDD -88(%sp),%fr13 + FLDD -96(%sp),%fr12 + LDD -104(%sp),%r6 ; restore r6 + LDD -112(%sp),%r5 ; restore r5 + LDD -120(%sp),%r4 ; restore r4 + BVE (%rp) + LDD,MB -128(%sp),%r3 + + .PROCEND + + + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SPACE $PRIVATE$,SORT=16 + .IMPORT $global$,DATA + .SPACE $TEXT$ + .SUBSPA $CODE$ + .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=0x2c,SORT=16 +C$4 + .ALIGN 8 + .STRINGZ "Division would overflow (%d)\n" + .END