;;; -*- Mode: Lisp; Package: bignum -*-
;;;
;;; **********************************************************************
;;; This code was written by Raymond Toy and Douglas T. Crosher and
;;; has been placed in the Public domain, and is provided 'as is'.
;;;
(ext:file-comment
  "$Header$")
;;;
;;; **********************************************************************
;;;
;;; Karatsuba multiplication.
;;; References:  Knuth, Seminumerical Algorithms.
;;;

;;; Based on an implementation by Raymond Toy, who's explanation follows:
;;;
;;; Let U = (b^n)*U1 + U0 and V = (b^n)*V1 + V0 be two positive
;;; integers consisting of b^(2n) base-b digits.  Typically, we use
;;; either base 2 digits or base 2^32 digits.
;;;
;;; The product U*V can be written
;;;
;;;    U*V = (b^(2n))*U1*V1 + (b^n)(U0*V1 + U1*V0) + U0*V0
;;;
;;; However, the middle term can be written as
;;;
;;;    U0*V1 + U1*V0 = U0*V0 + U1*V1.- (U0 - U1)*(V0 - V1) 
;;;
;;; Thus,
;;;
;;;    U*V = (b^(2n))*U1+V1 + (b^n)*[U1*V1 + U0*V0 - (U0 - U1)*(V0 - V1)]
;;;           U0*V0
;;;
;;; We can that now we require only three multiplies:  U1*V1, U0*V0, and
;;; (U0 - U1)*(V0 - V1).  This should speed up multiplication considerably.
;;;
;;; For implemenation, we want to work with only positive numbers.  Since
;;; U0-U1 or V0-V1 may be negative, we need to account for this.  To do so, we
;;; compute
;;;
;;;    U1*V1 + U0*V0 - Su*Sv*|U0 - U1|*|V0 - V1|
;;;
;;; where Su = sign (U0 - U1) and Sv = sign (V0 - V1).
;;;
;;; Note that some authors use the expression
;;;
;;;    U0*V1 + U1*V0 = (U0 + U1)*(V0 + V1) - U0*V0 - U1*V1
;;;
;;; We don't because if U0 and U1 each contain b^n digits, U0 + U1 can
;;; overflow to b^n + 1 digits.  This complicates the multiplication quite a
;;; bit, so we use |U0 - U1|.  This is guaranteed not to overflow since U0 and
;;; U1 are both positive with b^n digits.
;;;

(in-package "BIGNUM")


;;; karatsuba-internal  --  Internal.
;;;
;;; Main routine for Karatsuba multiplication.
;;; By Douglas Crosher.
;;;
;;; The result is placed in the result bignum starting at position RET, and is
;;; of size 2xD words. An extra D words are used during the calculations.
;;;
;;; The calculations are packed into this working space as follows:
;;;
;;; ------------------------------------------------------------------
;;; |    |   D/2   |   D/2   |  D/2    |   D/2   |   D/2   |  D/2    |
;;; |----+---------+---------+---------+---------+---------+---------|
;;; | A. | |u1-u0| | |v1-v0| |    x    |       |u1-u0|*|v1-v0|       | 
;;; | B. |           u0 * v0           |       |u1-u0|*|v1-v0|       |
;;; | C. | u0 * v0 - S|u1-u0|*|v1-v0|  |             x               |
;;; | D. | u0 * v0 - S|u1-u0|*|v1-v0|  |          v1 * v0            |
;;; | E. | u0 * v0 - S|u1-u0|*|v1-v0| + v1 * v0  |    x    |    x    |
;;; ------------------------------------------------------------------
;;;
;;; A. Calculate |u1-u0| place the result in the first D/2 words, and
;;;    calculate |v1-v0| placing the result the second D/2 words. The
;;;    calculate |u1-u0|*|v1-v0| using the last 3 D/2 words for the result and
;;;    subcalculate scratch area.
;;;
;;; B. Calculate the product u0*v0, using the first 3 D/2 words for the result
;;;    and subcalculate scratch area.
;;;
;;; C. Sum the results of first two stages into the first 3 D/2 words.
;;;    The calculation is (u0*v0) + ((u0*v0) - S|u1-u0|*|v1-v0|) *2^(d/2).
;;;
;;; D. Calculate the product u1*v1, using the last 3 D/2 words for the result
;;;    and subcalculate scratch area.
;;;
;;; E. Finally sum in u1*v1 to give the result in the first 4 D/2 words.
;;;
(defun karatsuba-internal (u v ret d result)
  (declare (type bignum-index u v ret)
	   (type bignum-index d)
	   (type bignum-type result)
	   (optimize (speed 3) (safety 0) (debug 0)))

  (labels ((bignum-multiply-in-place (u v ret d)
	     ;; Clear out the result area, which might contain garbage for
	     ;; other recursions!
	     (do ((i ret (1+ i))
		  (end (+ ret (* 2 d))))
		 ((>= i end))
	       (setf (%bignum-ref result i) 0))
	     ;; Stolen from bignum.lisp multiply-bignum: a straight forward
	     ;; multiplication via the classical schoolbook algorithm.
	     (dotimes (i d)
	       (declare (type bignum-index i))
	       (let ((carry-digit 0)
		     (x (%bignum-ref result (+ u i)))
		     (k (+ ret i)))
		 (declare (type bignum-index k)
			  (type bignum-element-type carry-digit x))
		 (do ((j 0 (1+ j))
		      (v+j v (1+ v+j)))
		     ((>= j d))
		   (declare (type bignum-index j v+j))
		   (multiple-value-bind (big-carry res-digit)
		       (%multiply-and-add x
					  (%bignum-ref result v+j)
					  (%bignum-ref result k)
					  carry-digit)
		     (declare (type bignum-element-type big-carry res-digit))
		     (setf (%bignum-ref result k) res-digit)
		     (setf carry-digit big-carry)
		     (incf k)))
		 (setf (%bignum-ref result k) carry-digit))))
	   (abs-diff (n x y res result)
	     (declare (type bignum-index n)
		      (type bignum-index x y res))
	     (flet ((bigger-p ()
		      ;; Return T if X >= Y.
		      (do ((i (1- (+ x n)) (1- i))
			   (j (1- (+ y n)) (1- j)))
			  (())
			(declare (type bignum-index i j))
			(let ((x-digit (%bignum-ref result i))
			      (y-digit (%bignum-ref result j)))
			  (when (/= x-digit y-digit)
			    (return (>= x-digit y-digit))))
			(when (= i x)
			  (return t)))))
	       (declare (inline bigger-p))
	       (let ((sign 1))
		 (when (not (bigger-p))
		   (rotatef x y)
		   (setf sign -1))
		 (let ((borrow 1))
		   (do ((i x (1+ i))
			(j y (1+ j))
			(k res (1+ k))
			(end (+ x n)))
		       ((>= i end))
		     (declare (type bignum-index i j k end))
		     (multiple-value-bind (diff new-borrow)
			 (%subtract-with-borrow (%bignum-ref result i)
						(%bignum-ref result j)
						borrow)
		       (setf (%bignum-ref result k) diff)
		       (setf borrow new-borrow)))
		   sign))))
	   (propagate-carry (start end carry)
	     (declare (type bignum-type result)
		      (type bignum-index start end)
		      (type (integer -4 4) carry))
	     (cond ((minusp carry)
		    (multiple-value-bind (sum borrow)
			(%subtract-with-borrow (%bignum-ref result start)
					       (- carry) 1)
		      (setf (%bignum-ref result start) sum)
		      (do ((i (1+ start) (1+ i)))
			  ((or (>= i end) (= borrow 1)) (- borrow 1))
			(declare (type bignum-index i))
			(multiple-value-bind (sum next-borrow)
			    (%subtract-with-borrow (%bignum-ref result i)
						   0 borrow)
			  (setf borrow next-borrow)
			  (setf (%bignum-ref result i) sum)))))
		   (t
		    (multiple-value-bind (sum carry)
			(%add-with-carry (%bignum-ref result start) carry 0)
		      (setf (%bignum-ref result start) sum)
		      (do ((i (1+ start) (1+ i)))
			  ((or (>= i end) (zerop carry)) carry)
			(declare (type bignum-index i))
			(multiple-value-bind (sum next-carry)
			    (%add-with-carry (%bignum-ref result i) 0 carry)
			  (setf carry next-carry)
			  (setf (%bignum-ref result i) sum)))))))
	   (stageC-add ()
	     (let ((carry11 0)
		   (carry12 0)
		   (carry2 0)
		   (d/2 (ash d -1)))
	       (do ((i0 ret (1+ i0))
		    (i1 (+ ret d/2) (1+ i1))
		    (i2 (+ ret d) (1+ i2))
		    (i3 (+ ret d d/2) (1+ i3))
		    (i4 (+ ret d d) (1+ i4))
		    (end (+ ret d/2)))
		   ((>= i0 end))
		 (declare (type bignum-index i0 i1 i2 i3 i4))
		 (let ((b (%bignum-ref result i1)))
		   (multiple-value-bind (sum c1)
		       (%add-with-carry b (%bignum-ref result i0) carry11)
		     (multiple-value-bind (sum c2)
			 (%add-with-carry sum (%bignum-ref result i3) carry12)
		       (setf carry11 c1)
		       (setf carry12 c2)
		       (setf (%bignum-ref result i1) sum)))
		   (multiple-value-bind (sum carry)
		       (%add-with-carry b (%bignum-ref result i4) carry2)
		     (setf (%bignum-ref result i2) sum)
		     (setf carry2 carry))))
	       (+ (propagate-carry (+ ret d) (+ ret d d/2) (+ carry11 carry12))
		  carry2)))
	   (stageC-subtract ()
	     (let ((carry11 0)
		   (borrow12 1)
		   (borrow2 1)
		   (d/2 (ash d -1)))
	       (do ((i0 ret (1+ i0))
		    (i1 (+ ret d/2) (1+ i1))
		    (i2 (+ ret d) (1+ i2))
		    (i3 (+ ret d d/2) (1+ i3))
		    (i4 (+ ret d d) (1+ i4))
		    (end (+ ret d/2)))
		   ((>= i0 end))
		 (declare (type bignum-index i0 i1 i2 i3 i4))
		 (let ((b (%bignum-ref result i1)))
		   (multiple-value-bind (sum c1)
		       (%add-with-carry b (%bignum-ref result i0) carry11)
		     (multiple-value-bind (sum b2)
			 (%subtract-with-borrow sum (%bignum-ref result i3)
						borrow12)
		       (setf carry11 c1)
		       (setf borrow12 b2)
		       (setf (%bignum-ref result i1) sum)))
		   (multiple-value-bind (sum borrow)
		       (%subtract-with-borrow b (%bignum-ref result i4)
					      borrow2)
		     (setf (%bignum-ref result i2) sum)
		     (setf borrow2 borrow))))
	       (+ (propagate-carry (+ ret d) (+ ret d d/2)
				   (+ carry11 borrow12 -1))
		  (- borrow2 1))))
	   (stageE-add (stageC-carry)
	     ;; Sum the above pieces, result to R0124.
	     (let ((carry1 0)
		   (carry21 0)
		   (carry22 0)
		   (d/2 (ash d -1)))
	       (declare (type (mod 2) carry1 carry21 carry22))
	       (do ((i0 ret (1+ i0))
		    (i1 (+ ret d/2) (1+ i1))
		    (i2 (+ ret d) (1+ i2))
		    (i3 (+ ret d d/2) (1+ i3))
		    (i4 (+ ret d d) (1+ i4))
		    (end (+ ret d/2)))
		   ((>= i0 end))
		 (declare (type bignum-index i0 i1 i2 i3 i4))
		 (multiple-value-bind (sum c1)
		     (%add-with-carry (%bignum-ref result i1)
				      (%bignum-ref result i3)
				      carry1)
		   (setf carry1 c1)
		   (setf (%bignum-ref result i1) sum))
		 (let ((e (%bignum-ref result i4)))
		   (multiple-value-bind (sum c1)
		       (%add-with-carry e (%bignum-ref result i2) carry21)
		     (multiple-value-bind (sum c2)
			 (%add-with-carry sum (%bignum-ref result i3) carry22)
		       (setf carry21 c1)
		       (setf carry22 c2)
		       (setf (%bignum-ref result i2) sum)))
		   (setf (%bignum-ref result i3) e)))
	       (assert (zerop (propagate-carry (+ ret d) (+ ret d d) carry1)))
	       (assert (zerop (propagate-carry (+ ret d d/2) (+ ret d d)
					       (+ carry21 carry22 stageC-carry)))))))

    (cond ((<= d 10)
	   (bignum-multiply-in-place u v ret d))
	  (t
	   (let* ((d/2 (ash d -1))
		  (ret1 (+ ret d/2))
		  (ret3 (+ ret d d/2))
		  (u-hi (+ u d/2))
		  (v-hi (+ v d/2))
		  (diff-sign (* (abs-diff d/2 u u-hi ret result)
				(abs-diff d/2 v v-hi ret1 result))))
	     (declare (type bignum-index d/2 ret1 ret3 u-hi v-hi)
		      (type (member -1 1) diff-sign))
	     (karatsuba-internal ret ret1 ret3 d/2 result)
	     (karatsuba-internal u v ret d/2 result)
	     (let ((stagec-carry (if (minusp diff-sign)
				     (stagec-add)
				     (stagec-subtract))))
	       (karatsuba-internal u-hi v-hi ret3 d/2 result)
	       (stageE-add stageC-carry))))))
  result)


;;; karatsuba  --  Interface.
;;;
;;; Multiply two bignums using the Karatsuba multiplication technique.
;;;
;;; The arguments are currently copied to the end of the working
;;; result causing some extra consing, and the arguments are both
;;; extended to have the same width implementation rather than
;;; exploiting a small width.
;;;
(defun karatsuba (x y)
  (declare (type bignum-type x y)
	   (optimize (speed 3) (safety 0) (debug 0)))
  (flet ((power-of-two (n)
	   ;; Compute the smallest power of two greater than or equal
	   ;; to the given number.
	   (declare (type bignum-index n))
	   (let ((power 1))
	     (declare (type bignum-index power))
	     (loop while (< power n) do
		   (setf power (ash power 1)))
	     power)))
    (let* ((x-plusp (%bignum-0-or-plusp x (%bignum-length x)))
	   (y-plusp (%bignum-0-or-plusp y (%bignum-length y)))
	   (x (if x-plusp x (negate-bignum x)))
	   (y (if y-plusp y (negate-bignum y)))
	   (negate-res (not (eq x-plusp y-plusp)))
	   (x-words (ceiling (integer-length x) 32))
	   (y-words (ceiling (integer-length y) 32))
	   (n (power-of-two (max x-words y-words))))
      (declare (type bignum-index n))

      (let ((result (%allocate-bignum (* 5 n))))

      ;; Copy X, Y to the end of the result area, so that we can use
      ;; indices off of the result area for accessing everything.
      (do ((k 0 (1+ k))
	   (x-save (* 3 n) (1+ x-save)))
	  ((>= k x-words))
	(setf (%bignum-ref result x-save)
	      (%bignum-ref x k)))
      
      (do ((k 0 (1+ k))
	   (save (* 4 n) (1+ save)))
	  ((>= k y-words))
	(setf (%bignum-ref result save)
	      (%bignum-ref y k)))
      ;; Do it.
      (karatsuba-internal (* 3 n) (* 4 n) 0 n result)
      ;; Convert the result into a bignum.
      (let ((new-len (1+ (* 2 n)))
	    (n5 (* n 5)))
	(do ((k (* 2 n) (1+ k)))
	    ((>= k n5))
	  (assert (< k (%bignum-length result)))
	  (setf (%bignum-ref result k) 0))
	(%bignum-set-length result new-len)
	(when negate-res
	  (negate-bignum-in-place result))
	(%normalize-bignum result (1+ (* 2 n))))))))


;;;; Link into the CMUCL bignum support.

(defun classical-multiply-bignums (a b)
  (declare (type bignum-type a b))
  (let* ((a-plusp (%bignum-0-or-plusp a (%bignum-length a)))
	 (b-plusp (%bignum-0-or-plusp b (%bignum-length b)))
	 (a (if a-plusp a (negate-bignum a)))
	 (b (if b-plusp b (negate-bignum b)))
	 (len-a (%bignum-length a))
	 (len-b (%bignum-length b))
	 (len-res (+ len-a len-b))
	 (res (%allocate-bignum len-res))
	 (negate-res (not (eq a-plusp b-plusp))))
    (declare (type bignum-index len-a len-b len-res))
    (dotimes (i len-a)
      (declare (type bignum-index i))
      (let ((carry-digit 0)
	    (x (%bignum-ref a i))
	    (k i))
	(declare (type bignum-index k)
		 (type bignum-element-type carry-digit x))
	(dotimes (j len-b)
	  (multiple-value-bind (big-carry res-digit)
			       (%multiply-and-add x (%bignum-ref b j)
						  (%bignum-ref res k)
						  carry-digit)
	    (declare (type bignum-element-type big-carry res-digit))
	    (setf (%bignum-ref res k) res-digit)
	    (setf carry-digit big-carry)
	    (incf k)))
	(setf (%bignum-ref res k) carry-digit)))
    (when negate-res (negate-bignum-in-place res))
    (%normalize-bignum res len-res)))

(defun multiply-bignums (a b)
  (declare (type bignum-type a b))
  ;; Use either the classical algorithm or the Karatsuba algorithm
  ;; depending on the size of the numbers.
  (let* ((len-a (%bignum-length a))
	 (len-b (%bignum-length b))
	 (min-len (min len-a len-b))
	 (max-len (max len-a len-b)))
    ;; If both numbers are short, use the classical algorithm.  If the
    ;; numbers vary greatly in length, use the classical algorithm.
    ;; Otherwise use the Karatsuba algorithm.
    (if (or (<= min-len #.(floor 512 32))
	    (>= max-len (* 4 min-len)))
	(classical-multiply-bignums a b)
	(karatsuba a b))))


#|
;;;; Test code.

(defun karat-time (n size)
  (declare (fixnum n))
  (let ((max-num (ash 1 size))
	(true 0)
	(kar 0))
    (time
     (dotimes (i n)
       (declare (fixnum i))
       (let ((x (random max-num))
	     (y (random max-num)))
	 (dotimes (j 10)
	   (declare (fixnum j))
	   (setf true (classical-multiply-bignums x y))))))
    (time
     (dotimes (i n)
       (declare (fixnum i))
       (let ((x (random max-num))
	     (y (random max-num)))
	 (dotimes (j 10)
	   (declare (fixnum j))
	   (setf kar (multiply-bignums x y))))))
    (let ((bad (/= kar true)))
      (format t "DIFF~%")
      #+nil
      (when bad
	(format t "(- (* ~X ~X) ~X)~%" x y kar)
	(format t "~X~%~X~%" (* x y) kar)))
    (values)))

(defun karat-test (n size)
  (declare (fixnum n))
  (dotimes (k n)
    (let* ((max-num (ash 1 size))
	   (x (random max-num))
	   (y (random max-num))
	   (true 0)
	   (kar 0))
      (declare (fixnum k))
      (setf true (classical-multiply-bignums x y))
      (setf kar (multiply-bignums x y))
      (let ((bad (/= kar true)))
	(when bad
	  (format t "(- (* ~X ~X) ~X)~%" x y kar)
	  (format t "~X~%~X~%" (* x y) kar)))))
  (values))
|#