Crypto++  5.6.4
Free C++ class library of cryptographic schemes
integer.cpp
1 // integer.cpp - written and placed in the public domain by Wei Dai
2 // contains public domain code contributed by Alister Lee and Leonard Janke
3 
4 #include "pch.h"
5 #include "config.h"
6 
7 #if CRYPTOPP_MSC_VERSION
8 # pragma warning(disable: 4100)
9 #endif
10 
11 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12 # pragma GCC diagnostic ignored "-Wunused"
13 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14 #endif
15 
16 #ifndef CRYPTOPP_IMPORTS
17 
18 #include "integer.h"
19 #include "secblock.h"
20 #include "modarith.h"
21 #include "nbtheory.h"
22 #include "smartptr.h"
23 #include "algparam.h"
24 #include "filters.h"
25 #include "asn.h"
26 #include "oids.h"
27 #include "words.h"
28 #include "pubkey.h" // for P1363_KDF2
29 #include "sha.h"
30 #include "cpu.h"
31 #include "misc.h"
32 
33 #include <iostream>
34 
35 #if (_MSC_VER >= 1400) && !defined(_M_ARM)
36  #include <intrin.h>
37 #endif
38 
39 #ifdef __DECCXX
40  #include <c_asm.h>
41 #endif
42 
43 #ifdef CRYPTOPP_MSVC6_NO_PP
44  #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45 #endif
46 
47 // "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
48 #if (__SUNPRO_CC >= 0x5130)
49 # define MAYBE_CONST
50 # define MAYBE_UNCONST_CAST const_cast<word*>
51 #else
52 # define MAYBE_CONST const
53 # define MAYBE_UNCONST_CAST
54 #endif
55 
56 // "Inline assembly operands don't work with .intel_syntax",
57 // http://llvm.org/bugs/show_bug.cgi?id=24232
58 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
59 # undef CRYPTOPP_X86_ASM_AVAILABLE
60 # undef CRYPTOPP_X32_ASM_AVAILABLE
61 # undef CRYPTOPP_X64_ASM_AVAILABLE
62 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
66 #else
67 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
68 #endif
69 
70 NAMESPACE_BEGIN(CryptoPP)
71 
72 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
73 {
74  if (valueType != typeid(Integer))
75  return false;
76  *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
77  return true;
78 }
79 
80 inline static int Compare(const word *A, const word *B, size_t N)
81 {
82  while (N--)
83  if (A[N] > B[N])
84  return 1;
85  else if (A[N] < B[N])
86  return -1;
87 
88  return 0;
89 }
90 
91 inline static int Increment(word *A, size_t N, word B=1)
92 {
93  assert(N);
94  word t = A[0];
95  A[0] = t+B;
96  if (A[0] >= t)
97  return 0;
98  for (unsigned i=1; i<N; i++)
99  if (++A[i])
100  return 0;
101  return 1;
102 }
103 
104 inline static int Decrement(word *A, size_t N, word B=1)
105 {
106  assert(N);
107  word t = A[0];
108  A[0] = t-B;
109  if (A[0] <= t)
110  return 0;
111  for (unsigned i=1; i<N; i++)
112  if (A[i]--)
113  return 0;
114  return 1;
115 }
116 
117 static void TwosComplement(word *A, size_t N)
118 {
119  Decrement(A, N);
120  for (unsigned i=0; i<N; i++)
121  A[i] = ~A[i];
122 }
123 
124 static word AtomicInverseModPower2(word A)
125 {
126  assert(A%2==1);
127 
128  word R=A%8;
129 
130  for (unsigned i=3; i<WORD_BITS; i*=2)
131  R = R*(2-R*A);
132 
133  assert(R*A==1);
134  return R;
135 }
136 
137 // ********************************************************
138 
139 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140  #define Declare2Words(x) word x##0, x##1;
141  #define AssignWord(a, b) a##0 = b; a##1 = 0;
142  #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143  #define LowWord(a) a##0
144  #define HighWord(a) a##1
145  #ifdef _MSC_VER
146  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147  #ifndef __INTEL_COMPILER
148  #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
149  #endif
150  #elif defined(__DECCXX)
151  #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152  #elif defined(__x86_64__)
153  #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
154  // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
155  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
156  #else
157  #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158  #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159  #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160  #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161  #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162  #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
163  #endif
164  #endif
165  #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
166  #ifndef Double3Words
167  #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
168  #endif
169  #ifndef Acc2WordsBy2
170  #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
171  #endif
172  #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173  #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174  #define GetCarry(u) u##1
175  #define GetBorrow(u) u##1
176 #else
177  #define Declare2Words(x) dword x;
178  #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179  #define MultiplyWords(p, a, b) p = __emulu(a, b);
180  #else
181  #define MultiplyWords(p, a, b) p = (dword)a*b;
182  #endif
183  #define AssignWord(a, b) a = b;
184  #define Add2WordsBy1(a, b, c) a = b + c;
185  #define Acc2WordsBy2(a, b) a += b;
186  #define LowWord(a) word(a)
187  #define HighWord(a) word(a>>WORD_BITS)
188  #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
189  #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190  #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191  #define GetCarry(u) HighWord(u)
192  #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
193 #endif
194 #ifndef MulAcc
195  #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
196 #endif
197 #ifndef Acc2WordsBy1
198  #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
199 #endif
200 #ifndef Acc3WordsBy2
201  #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
202 #endif
203 
204 class DWord
205 {
206 public:
207  // Converity finding on default ctor. We've isntrumented the code,
208  // and cannot uncover a case where it affects a result.
209 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
210  // Repeating pattern of 1010 for debug builds to break things...
211  DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
212 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
213  // Repeating pattern of 1010 for debug builds to break things...
214  DWord() : m_halfs() {memset(&m_halfs, 0xaa, sizeof(m_halfs));}
215 #else
216  DWord() {}
217 #endif
218 
219 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
220  explicit DWord(word low) : m_whole(low) {}
221 #else
222  explicit DWord(word low)
223  {
224  m_halfs.low = low;
225  m_halfs.high = 0;
226  }
227 #endif
228 
229  DWord(word low, word high)
230  {
231  m_halfs.low = low;
232  m_halfs.high = high;
233  }
234 
235  static DWord Multiply(word a, word b)
236  {
237  DWord r;
238  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
239  r.m_whole = (dword)a * b;
240  #elif defined(MultiplyWordsLoHi)
241  MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
242  #else
243  assert(0);
244  #endif
245  return r;
246  }
247 
248  static DWord MultiplyAndAdd(word a, word b, word c)
249  {
250  DWord r = Multiply(a, b);
251  return r += c;
252  }
253 
254  DWord & operator+=(word a)
255  {
256  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
257  m_whole = m_whole + a;
258  #else
259  m_halfs.low += a;
260  m_halfs.high += (m_halfs.low < a);
261  #endif
262  return *this;
263  }
264 
265  DWord operator+(word a)
266  {
267  DWord r;
268  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
269  r.m_whole = m_whole + a;
270  #else
271  r.m_halfs.low = m_halfs.low + a;
272  r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
273  #endif
274  return r;
275  }
276 
277  DWord operator-(DWord a)
278  {
279  DWord r;
280  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
281  r.m_whole = m_whole - a.m_whole;
282  #else
283  r.m_halfs.low = m_halfs.low - a.m_halfs.low;
284  r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
285  #endif
286  return r;
287  }
288 
289  DWord operator-(word a)
290  {
291  DWord r;
292  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
293  r.m_whole = m_whole - a;
294  #else
295  r.m_halfs.low = m_halfs.low - a;
296  r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
297  #endif
298  return r;
299  }
300 
301  // returns quotient, which must fit in a word
302  word operator/(word divisor);
303 
304  word operator%(word a);
305 
306  bool operator!() const
307  {
308  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
309  return !m_whole;
310  #else
311  return !m_halfs.high && !m_halfs.low;
312  #endif
313  }
314 
315  word GetLowHalf() const {return m_halfs.low;}
316  word GetHighHalf() const {return m_halfs.high;}
317  word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
318 
319 private:
320  union
321  {
322  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
323  dword m_whole;
324  #endif
325  struct
326  {
327  #ifdef IS_LITTLE_ENDIAN
328  word low;
329  word high;
330  #else
331  word high;
332  word low;
333  #endif
334  } m_halfs;
335  };
336 };
337 
338 class Word
339 {
340 public:
341  // Converity finding on default ctor. We've isntrumented the code,
342  // and cannot uncover a case where it affects a result.
343 #if defined(__COVERITY__)
344  Word() : m_whole(0) {}
345 #elif !defined(NDEBUG)
346  // Repeating pattern of 1010 for debug builds to break things...
347  Word() : m_whole(0) {memset(&m_whole, 0xaa, sizeof(m_whole));}
348 #else
349  Word() {}
350 #endif
351 
352  Word(word value) : m_whole(value) {}
353  Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
354 
355  static Word Multiply(hword a, hword b)
356  {
357  Word r;
358  r.m_whole = (word)a * b;
359  return r;
360  }
361 
362  Word operator-(Word a)
363  {
364  Word r;
365  r.m_whole = m_whole - a.m_whole;
366  return r;
367  }
368 
369  Word operator-(hword a)
370  {
371  Word r;
372  r.m_whole = m_whole - a;
373  return r;
374  }
375 
376  // returns quotient, which must fit in a word
377  hword operator/(hword divisor)
378  {
379  return hword(m_whole / divisor);
380  }
381 
382  bool operator!() const
383  {
384  return !m_whole;
385  }
386 
387  word GetWhole() const {return m_whole;}
388  hword GetLowHalf() const {return hword(m_whole);}
389  hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
390  hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
391 
392 private:
393  word m_whole;
394 };
395 
396 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
397 template <class S, class D>
398 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
399 {
400  CRYPTOPP_UNUSED(dummy);
401 
402  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
403  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
404 
405  // estimate the quotient: do a 2 S by 1 S divide
406  S Q;
407  if (S(B1+1) == 0)
408  Q = A[2];
409  else if (B1 > 0)
410  Q = D(A[1], A[2]) / S(B1+1);
411  else
412  Q = D(A[0], A[1]) / B0;
413 
414  // now subtract Q*B from A
415  D p = D::Multiply(B0, Q);
416  D u = (D) A[0] - p.GetLowHalf();
417  A[0] = u.GetLowHalf();
418  u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
419  A[1] = u.GetLowHalf();
420  A[2] += u.GetHighHalf();
421 
422  // Q <= actual quotient, so fix it
423  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
424  {
425  u = (D) A[0] - B0;
426  A[0] = u.GetLowHalf();
427  u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
428  A[1] = u.GetLowHalf();
429  A[2] += u.GetHighHalf();
430  Q++;
431  assert(Q); // shouldn't overflow
432  }
433 
434  return Q;
435 }
436 
437 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
438 template <class S, class D>
439 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
440 {
441  if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
442  return D(Ah.GetLowHalf(), Ah.GetHighHalf());
443  else
444  {
445  S Q[2];
446  T[0] = Al.GetLowHalf();
447  T[1] = Al.GetHighHalf();
448  T[2] = Ah.GetLowHalf();
449  T[3] = Ah.GetHighHalf();
450  Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
451  Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
452  return D(Q[0], Q[1]);
453  }
454 }
455 
456 // returns quotient, which must fit in a word
457 inline word DWord::operator/(word a)
458 {
459  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
460  return word(m_whole / a);
461  #else
462  hword r[4];
463  return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
464  #endif
465 }
466 
467 inline word DWord::operator%(word a)
468 {
469  #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
470  return word(m_whole % a);
471  #else
472  if (a < (word(1) << (WORD_BITS/2)))
473  {
474  hword h = hword(a);
475  word r = m_halfs.high % h;
476  r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
477  return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
478  }
479  else
480  {
481  hword r[4];
482  DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
483  return Word(r[0], r[1]).GetWhole();
484  }
485  #endif
486 }
487 
488 // ********************************************************
489 
490 // Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
491 #if defined(__GNUC__)
492  #define AddPrologue \
493  int result; \
494  __asm__ __volatile__ \
495  ( \
496  INTEL_NOPREFIX
497  #define AddEpilogue \
498  ATT_PREFIX \
499  : "=a" (result)\
500  : "d" (C), "a" (A), "D" (B), "c" (N) \
501  : "%esi", "memory", "cc" \
502  );\
503  return result;
504  #define MulPrologue \
505  __asm__ __volatile__ \
506  ( \
507  INTEL_NOPREFIX \
508  AS1( push ebx) \
509  AS2( mov ebx, edx)
510  #define MulEpilogue \
511  AS1( pop ebx) \
512  ATT_PREFIX \
513  : \
514  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
515  : "%esi", "memory", "cc" \
516  );
517  #define SquPrologue MulPrologue
518  #define SquEpilogue \
519  AS1( pop ebx) \
520  ATT_PREFIX \
521  : \
522  : "d" (s_maskLow16), "c" (C), "a" (A) \
523  : "%esi", "%edi", "memory", "cc" \
524  );
525  #define TopPrologue MulPrologue
526  #define TopEpilogue \
527  AS1( pop ebx) \
528  ATT_PREFIX \
529  : \
530  : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
531  : "memory", "cc" \
532  );
533 #else
534  #define AddPrologue \
535  __asm push edi \
536  __asm push esi \
537  __asm mov eax, [esp+12] \
538  __asm mov edi, [esp+16]
539  #define AddEpilogue \
540  __asm pop esi \
541  __asm pop edi \
542  __asm ret 8
543 #if _MSC_VER < 1300
544  #define SaveEBX __asm push ebx
545  #define RestoreEBX __asm pop ebx
546 #else
547  #define SaveEBX
548  #define RestoreEBX
549 #endif
550  #define SquPrologue \
551  AS2( mov eax, A) \
552  AS2( mov ecx, C) \
553  SaveEBX \
554  AS2( lea ebx, s_maskLow16)
555  #define MulPrologue \
556  AS2( mov eax, A) \
557  AS2( mov edi, B) \
558  AS2( mov ecx, C) \
559  SaveEBX \
560  AS2( lea ebx, s_maskLow16)
561  #define TopPrologue \
562  AS2( mov eax, A) \
563  AS2( mov edi, B) \
564  AS2( mov ecx, C) \
565  AS2( mov esi, L) \
566  SaveEBX \
567  AS2( lea ebx, s_maskLow16)
568  #define SquEpilogue RestoreEBX
569  #define MulEpilogue RestoreEBX
570  #define TopEpilogue RestoreEBX
571 #endif
572 
573 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
574 extern "C" {
575 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
576 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
577 }
578 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
579 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
580 {
581  word result;
582  __asm__ __volatile__
583  (
584  INTEL_NOPREFIX
585  AS1( neg %1)
586  ASJ( jz, 1, f)
587  AS2( mov %0,[%3+8*%1])
588  AS2( add %0,[%4+8*%1])
589  AS2( mov [%2+8*%1],%0)
590  ASL(0)
591  AS2( mov %0,[%3+8*%1+8])
592  AS2( adc %0,[%4+8*%1+8])
593  AS2( mov [%2+8*%1+8],%0)
594  AS2( lea %1,[%1+2])
595  ASJ( jrcxz, 1, f)
596  AS2( mov %0,[%3+8*%1])
597  AS2( adc %0,[%4+8*%1])
598  AS2( mov [%2+8*%1],%0)
599  ASJ( jmp, 0, b)
600  ASL(1)
601  AS2( mov %0, 0)
602  AS2( adc %0, %0)
603  ATT_NOPREFIX
604  : "=&r" (result), "+c" (N)
605  : "r" (C+N), "r" (A+N), "r" (B+N)
606  : "memory", "cc"
607  );
608  return (int)result;
609 }
610 
611 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
612 {
613  word result;
614  __asm__ __volatile__
615  (
616  INTEL_NOPREFIX
617  AS1( neg %1)
618  ASJ( jz, 1, f)
619  AS2( mov %0,[%3+8*%1])
620  AS2( sub %0,[%4+8*%1])
621  AS2( mov [%2+8*%1],%0)
622  ASL(0)
623  AS2( mov %0,[%3+8*%1+8])
624  AS2( sbb %0,[%4+8*%1+8])
625  AS2( mov [%2+8*%1+8],%0)
626  AS2( lea %1,[%1+2])
627  ASJ( jrcxz, 1, f)
628  AS2( mov %0,[%3+8*%1])
629  AS2( sbb %0,[%4+8*%1])
630  AS2( mov [%2+8*%1],%0)
631  ASJ( jmp, 0, b)
632  ASL(1)
633  AS2( mov %0, 0)
634  AS2( adc %0, %0)
635  ATT_NOPREFIX
636  : "=&r" (result), "+c" (N)
637  : "r" (C+N), "r" (A+N), "r" (B+N)
638  : "memory", "cc"
639  );
640  return (int)result;
641 }
642 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
643 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
644 {
645  AddPrologue
646 
647  // now: eax = A, edi = B, edx = C, ecx = N
648  AS2( lea eax, [eax+4*ecx])
649  AS2( lea edi, [edi+4*ecx])
650  AS2( lea edx, [edx+4*ecx])
651 
652  AS1( neg ecx) // ecx is negative index
653  AS2( test ecx, 2) // this clears carry flag
654  ASJ( jz, 0, f)
655  AS2( sub ecx, 2)
656  ASJ( jmp, 1, f)
657 
658  ASL(0)
659  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
660  AS2( mov esi,[eax+4*ecx])
661  AS2( adc esi,[edi+4*ecx])
662  AS2( mov [edx+4*ecx],esi)
663  AS2( mov esi,[eax+4*ecx+4])
664  AS2( adc esi,[edi+4*ecx+4])
665  AS2( mov [edx+4*ecx+4],esi)
666  ASL(1)
667  AS2( mov esi,[eax+4*ecx+8])
668  AS2( adc esi,[edi+4*ecx+8])
669  AS2( mov [edx+4*ecx+8],esi)
670  AS2( mov esi,[eax+4*ecx+12])
671  AS2( adc esi,[edi+4*ecx+12])
672  AS2( mov [edx+4*ecx+12],esi)
673 
674  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
675  ASJ( jmp, 0, b)
676 
677  ASL(2)
678  AS2( mov eax, 0)
679  AS1( setc al) // store carry into eax (return result register)
680 
681  AddEpilogue
682 }
683 
684 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
685 {
686  AddPrologue
687 
688  // now: eax = A, edi = B, edx = C, ecx = N
689  AS2( lea eax, [eax+4*ecx])
690  AS2( lea edi, [edi+4*ecx])
691  AS2( lea edx, [edx+4*ecx])
692 
693  AS1( neg ecx) // ecx is negative index
694  AS2( test ecx, 2) // this clears carry flag
695  ASJ( jz, 0, f)
696  AS2( sub ecx, 2)
697  ASJ( jmp, 1, f)
698 
699  ASL(0)
700  ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
701  AS2( mov esi,[eax+4*ecx])
702  AS2( sbb esi,[edi+4*ecx])
703  AS2( mov [edx+4*ecx],esi)
704  AS2( mov esi,[eax+4*ecx+4])
705  AS2( sbb esi,[edi+4*ecx+4])
706  AS2( mov [edx+4*ecx+4],esi)
707  ASL(1)
708  AS2( mov esi,[eax+4*ecx+8])
709  AS2( sbb esi,[edi+4*ecx+8])
710  AS2( mov [edx+4*ecx+8],esi)
711  AS2( mov esi,[eax+4*ecx+12])
712  AS2( sbb esi,[edi+4*ecx+12])
713  AS2( mov [edx+4*ecx+12],esi)
714 
715  AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
716  ASJ( jmp, 0, b)
717 
718  ASL(2)
719  AS2( mov eax, 0)
720  AS1( setc al) // store carry into eax (return result register)
721 
722  AddEpilogue
723 }
724 
725 #if CRYPTOPP_INTEGER_SSE2
726 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
727 {
728  AddPrologue
729 
730  // now: eax = A, edi = B, edx = C, ecx = N
731  AS2( lea eax, [eax+4*ecx])
732  AS2( lea edi, [edi+4*ecx])
733  AS2( lea edx, [edx+4*ecx])
734 
735  AS1( neg ecx) // ecx is negative index
736  AS2( pxor mm2, mm2)
737  ASJ( jz, 2, f)
738  AS2( test ecx, 2) // this clears carry flag
739  ASJ( jz, 0, f)
740  AS2( sub ecx, 2)
741  ASJ( jmp, 1, f)
742 
743  ASL(0)
744  AS2( movd mm0, DWORD PTR [eax+4*ecx])
745  AS2( movd mm1, DWORD PTR [edi+4*ecx])
746  AS2( paddq mm0, mm1)
747  AS2( paddq mm2, mm0)
748  AS2( movd DWORD PTR [edx+4*ecx], mm2)
749  AS2( psrlq mm2, 32)
750 
751  AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
752  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
753  AS2( paddq mm0, mm1)
754  AS2( paddq mm2, mm0)
755  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
756  AS2( psrlq mm2, 32)
757 
758  ASL(1)
759  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
760  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
761  AS2( paddq mm0, mm1)
762  AS2( paddq mm2, mm0)
763  AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
764  AS2( psrlq mm2, 32)
765 
766  AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
767  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
768  AS2( paddq mm0, mm1)
769  AS2( paddq mm2, mm0)
770  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
771  AS2( psrlq mm2, 32)
772 
773  AS2( add ecx, 4)
774  ASJ( jnz, 0, b)
775 
776  ASL(2)
777  AS2( movd eax, mm2)
778  AS1( emms)
779 
780  AddEpilogue
781 }
782 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
783 {
784  AddPrologue
785 
786  // now: eax = A, edi = B, edx = C, ecx = N
787  AS2( lea eax, [eax+4*ecx])
788  AS2( lea edi, [edi+4*ecx])
789  AS2( lea edx, [edx+4*ecx])
790 
791  AS1( neg ecx) // ecx is negative index
792  AS2( pxor mm2, mm2)
793  ASJ( jz, 2, f)
794  AS2( test ecx, 2) // this clears carry flag
795  ASJ( jz, 0, f)
796  AS2( sub ecx, 2)
797  ASJ( jmp, 1, f)
798 
799  ASL(0)
800  AS2( movd mm0, DWORD PTR [eax+4*ecx])
801  AS2( movd mm1, DWORD PTR [edi+4*ecx])
802  AS2( psubq mm0, mm1)
803  AS2( psubq mm0, mm2)
804  AS2( movd DWORD PTR [edx+4*ecx], mm0)
805  AS2( psrlq mm0, 63)
806 
807  AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
808  AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
809  AS2( psubq mm2, mm1)
810  AS2( psubq mm2, mm0)
811  AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
812  AS2( psrlq mm2, 63)
813 
814  ASL(1)
815  AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
816  AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
817  AS2( psubq mm0, mm1)
818  AS2( psubq mm0, mm2)
819  AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
820  AS2( psrlq mm0, 63)
821 
822  AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
823  AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
824  AS2( psubq mm2, mm1)
825  AS2( psubq mm2, mm0)
826  AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
827  AS2( psrlq mm2, 63)
828 
829  AS2( add ecx, 4)
830  ASJ( jnz, 0, b)
831 
832  ASL(2)
833  AS2( movd eax, mm2)
834  AS1( emms)
835 
836  AddEpilogue
837 }
838 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
839 #else
840 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
841 {
842  assert (N%2 == 0);
843 
844  Declare2Words(u);
845  AssignWord(u, 0);
846  for (size_t i=0; i<N; i+=2)
847  {
848  AddWithCarry(u, A[i], B[i]);
849  C[i] = LowWord(u);
850  AddWithCarry(u, A[i+1], B[i+1]);
851  C[i+1] = LowWord(u);
852  }
853  return int(GetCarry(u));
854 }
855 
856 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
857 {
858  assert (N%2 == 0);
859 
860  Declare2Words(u);
861  AssignWord(u, 0);
862  for (size_t i=0; i<N; i+=2)
863  {
864  SubtractWithBorrow(u, A[i], B[i]);
865  C[i] = LowWord(u);
866  SubtractWithBorrow(u, A[i+1], B[i+1]);
867  C[i+1] = LowWord(u);
868  }
869  return int(GetBorrow(u));
870 }
871 #endif
872 
873 static word LinearMultiply(word *C, const word *AA, word B, size_t N)
874 {
875  // http://github.com/weidai11/cryptopp/issues/188
876  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
877 
878  word carry=0;
879  for(unsigned i=0; i<N; i++)
880  {
881  Declare2Words(p);
882  MultiplyWords(p, A[i], B);
883  Acc2WordsBy1(p, carry);
884  C[i] = LowWord(p);
885  carry = HighWord(p);
886  }
887  return carry;
888 }
889 
890 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
891 
892 #define Mul_2 \
893  Mul_Begin(2) \
894  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
895  Mul_End(1, 1)
896 
897 #define Mul_4 \
898  Mul_Begin(4) \
899  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
900  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
901  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
902  Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
903  Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
904  Mul_End(5, 3)
905 
906 #define Mul_8 \
907  Mul_Begin(8) \
908  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
909  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
910  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
911  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
912  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
913  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
914  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
915  Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
916  Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
917  Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
918  Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
919  Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
920  Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
921  Mul_End(13, 7)
922 
923 #define Mul_16 \
924  Mul_Begin(16) \
925  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
926  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
927  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
928  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
929  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
930  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
931  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
932  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
933  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
934  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
935  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
936  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
937  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
938  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
939  Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
940  Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
941  Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
942  Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
943  Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
944  Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
945  Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
946  Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
947  Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
948  Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
949  Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
950  Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
951  Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
952  Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
953  Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
954  Mul_End(29, 15)
955 
956 #define Squ_2 \
957  Squ_Begin(2) \
958  Squ_End(2)
959 
960 #define Squ_4 \
961  Squ_Begin(4) \
962  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
963  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
964  Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
965  Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
966  Squ_End(4)
967 
968 #define Squ_8 \
969  Squ_Begin(8) \
970  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
971  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
972  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
973  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
974  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
975  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
976  Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
977  Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
978  Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
979  Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
980  Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
981  Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
982  Squ_End(8)
983 
984 #define Squ_16 \
985  Squ_Begin(16) \
986  Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
987  Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
988  Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
989  Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
990  Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
991  Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
992  Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
993  Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
994  Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
995  Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
996  Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
997  Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
998  Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
999  Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1000  Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1001  Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1002  Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1003  Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1004  Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1005  Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1006  Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1007  Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1008  Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1009  Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1010  Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1011  Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1012  Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1013  Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1014  Squ_End(16)
1015 
1016 #define Bot_2 \
1017  Mul_Begin(2) \
1018  Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1019  Bot_End(2)
1020 
1021 #define Bot_4 \
1022  Mul_Begin(4) \
1023  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1024  Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1025  Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1026  Bot_End(4)
1027 
1028 #define Bot_8 \
1029  Mul_Begin(8) \
1030  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1031  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1032  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1033  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1034  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1035  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1036  Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1037  Bot_End(8)
1038 
1039 #define Bot_16 \
1040  Mul_Begin(16) \
1041  Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1042  Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1043  Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1044  Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1045  Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1046  Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1047  Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1048  Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1049  Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1050  Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1051  Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1052  Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1053  Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1054  Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1055  Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1056  Bot_End(16)
1057 
1058 #endif
1059 
1060 #if 0
1061 #define Mul_Begin(n) \
1062  Declare2Words(p) \
1063  Declare2Words(c) \
1064  Declare2Words(d) \
1065  MultiplyWords(p, A[0], B[0]) \
1066  AssignWord(c, LowWord(p)) \
1067  AssignWord(d, HighWord(p))
1068 
1069 #define Mul_Acc(i, j) \
1070  MultiplyWords(p, A[i], B[j]) \
1071  Acc2WordsBy1(c, LowWord(p)) \
1072  Acc2WordsBy1(d, HighWord(p))
1073 
1074 #define Mul_SaveAcc(k, i, j) \
1075  R[k] = LowWord(c); \
1076  Add2WordsBy1(c, d, HighWord(c)) \
1077  MultiplyWords(p, A[i], B[j]) \
1078  AssignWord(d, HighWord(p)) \
1079  Acc2WordsBy1(c, LowWord(p))
1080 
1081 #define Mul_End(n) \
1082  R[2*n-3] = LowWord(c); \
1083  Acc2WordsBy1(d, HighWord(c)) \
1084  MultiplyWords(p, A[n-1], B[n-1])\
1085  Acc2WordsBy2(d, p) \
1086  R[2*n-2] = LowWord(d); \
1087  R[2*n-1] = HighWord(d);
1088 
1089 #define Bot_SaveAcc(k, i, j) \
1090  R[k] = LowWord(c); \
1091  word e = LowWord(d) + HighWord(c); \
1092  e += A[i] * B[j];
1093 
1094 #define Bot_Acc(i, j) \
1095  e += A[i] * B[j];
1096 
1097 #define Bot_End(n) \
1098  R[n-1] = e;
1099 #else
1100 #define Mul_Begin(n) \
1101  Declare2Words(p) \
1102  word c; \
1103  Declare2Words(d) \
1104  MultiplyWords(p, A[0], B[0]) \
1105  c = LowWord(p); \
1106  AssignWord(d, HighWord(p))
1107 
1108 #define Mul_Acc(i, j) \
1109  MulAcc(c, d, A[i], B[j])
1110 
1111 #define Mul_SaveAcc(k, i, j) \
1112  R[k] = c; \
1113  c = LowWord(d); \
1114  AssignWord(d, HighWord(d)) \
1115  MulAcc(c, d, A[i], B[j])
1116 
1117 #define Mul_End(k, i) \
1118  R[k] = c; \
1119  MultiplyWords(p, A[i], B[i]) \
1120  Acc2WordsBy2(p, d) \
1121  R[k+1] = LowWord(p); \
1122  R[k+2] = HighWord(p);
1123 
1124 #define Bot_SaveAcc(k, i, j) \
1125  R[k] = c; \
1126  c = LowWord(d); \
1127  c += A[i] * B[j];
1128 
1129 #define Bot_Acc(i, j) \
1130  c += A[i] * B[j];
1131 
1132 #define Bot_End(n) \
1133  R[n-1] = c;
1134 #endif
1135 
1136 #define Squ_Begin(n) \
1137  Declare2Words(p) \
1138  word c; \
1139  Declare2Words(d) \
1140  Declare2Words(e) \
1141  MultiplyWords(p, A[0], A[0]) \
1142  R[0] = LowWord(p); \
1143  AssignWord(e, HighWord(p)) \
1144  MultiplyWords(p, A[0], A[1]) \
1145  c = LowWord(p); \
1146  AssignWord(d, HighWord(p)) \
1147  Squ_NonDiag \
1148 
1149 #define Squ_NonDiag \
1150  Double3Words(c, d)
1151 
1152 #define Squ_SaveAcc(k, i, j) \
1153  Acc3WordsBy2(c, d, e) \
1154  R[k] = c; \
1155  MultiplyWords(p, A[i], A[j]) \
1156  c = LowWord(p); \
1157  AssignWord(d, HighWord(p)) \
1158 
1159 #define Squ_Acc(i, j) \
1160  MulAcc(c, d, A[i], A[j])
1161 
1162 #define Squ_Diag(i) \
1163  Squ_NonDiag \
1164  MulAcc(c, d, A[i], A[i])
1165 
1166 #define Squ_End(n) \
1167  Acc3WordsBy2(c, d, e) \
1168  R[2*n-3] = c; \
1169  MultiplyWords(p, A[n-1], A[n-1])\
1170  Acc2WordsBy2(p, e) \
1171  R[2*n-2] = LowWord(p); \
1172  R[2*n-1] = HighWord(p);
1173 
1174 
1175 void Baseline_Multiply2(word *R, const word *AA, const word *BB)
1176 {
1177  // http://github.com/weidai11/cryptopp/issues/188
1178  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1179  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1180 
1181  Mul_2
1182 }
1183 
1184 void Baseline_Multiply4(word *R, const word *AA, const word *BB)
1185 {
1186  // http://github.com/weidai11/cryptopp/issues/188
1187  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1188  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1189 
1190  Mul_4
1191 }
1192 
1193 void Baseline_Multiply8(word *R, const word *AA, const word *BB)
1194 {
1195  // http://github.com/weidai11/cryptopp/issues/188
1196  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1197  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1198 
1199  Mul_8
1200 }
1201 
1202 void Baseline_Square2(word *R, const word *AA)
1203 {
1204  // http://github.com/weidai11/cryptopp/issues/188
1205  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1206 
1207  Squ_2
1208 }
1209 
1210 void Baseline_Square4(word *R, const word *AA)
1211 {
1212  // http://github.com/weidai11/cryptopp/issues/188
1213  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1214 
1215  Squ_4
1216 }
1217 
1218 void Baseline_Square8(word *R, const word *AA)
1219 {
1220  // http://github.com/weidai11/cryptopp/issues/188
1221  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1222 
1223  Squ_8
1224 }
1225 
1226 void Baseline_MultiplyBottom2(word *R, const word *AA, const word *BB)
1227 {
1228  // http://github.com/weidai11/cryptopp/issues/188
1229  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1230  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1231 
1232  Bot_2
1233 }
1234 
1235 void Baseline_MultiplyBottom4(word *R, const word *AA, const word *BB)
1236 {
1237  // http://github.com/weidai11/cryptopp/issues/188
1238  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1239  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1240 
1241  Bot_4
1242 }
1243 
1244 void Baseline_MultiplyBottom8(word *R, const word *AA, const word *BB)
1245 {
1246  // http://github.com/weidai11/cryptopp/issues/188
1247  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1248  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1249 
1250  Bot_8
1251 }
1252 
1253 #define Top_Begin(n) \
1254  Declare2Words(p) \
1255  word c; \
1256  Declare2Words(d) \
1257  MultiplyWords(p, A[0], B[n-2]);\
1258  AssignWord(d, HighWord(p));
1259 
1260 #define Top_Acc(i, j) \
1261  MultiplyWords(p, A[i], B[j]);\
1262  Acc2WordsBy1(d, HighWord(p));
1263 
1264 #define Top_SaveAcc0(i, j) \
1265  c = LowWord(d); \
1266  AssignWord(d, HighWord(d)) \
1267  MulAcc(c, d, A[i], B[j])
1268 
1269 #define Top_SaveAcc1(i, j) \
1270  c = L<c; \
1271  Acc2WordsBy1(d, c); \
1272  c = LowWord(d); \
1273  AssignWord(d, HighWord(d)) \
1274  MulAcc(c, d, A[i], B[j])
1275 
1276 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
1277 {
1278  CRYPTOPP_UNUSED(L);
1279  word T[4];
1280  Baseline_Multiply2(T, A, B);
1281  R[0] = T[2];
1282  R[1] = T[3];
1283 }
1284 
1285 void Baseline_MultiplyTop4(word *R, const word *AA, const word *BB, word L)
1286 {
1287  // http://github.com/weidai11/cryptopp/issues/188
1288  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1289  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1290 
1291  Top_Begin(4)
1292  Top_Acc(1, 1) Top_Acc(2, 0) \
1293  Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1294  Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1295  Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1296  Mul_End(1, 3)
1297 }
1298 
1299 void Baseline_MultiplyTop8(word *R, const word *AA, const word *BB, word L)
1300 {
1301  // http://github.com/weidai11/cryptopp/issues/188
1302  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1303  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1304 
1305  Top_Begin(8)
1306  Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1307  Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1308  Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1309  Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1310  Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1311  Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1312  Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1313  Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1314  Mul_End(5, 7)
1315 }
1316 
1317 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1318 void Baseline_Multiply16(word *R, const word *AA, const word *BB)
1319 {
1320  // http://github.com/weidai11/cryptopp/issues/188
1321  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1322  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1323 
1324  Mul_16
1325 }
1326 
1327 void Baseline_Square16(word *R, const word *AA)
1328 {
1329  // http://github.com/weidai11/cryptopp/issues/188
1330  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1331 
1332  Squ_16
1333 }
1334 
1335 void Baseline_MultiplyBottom16(word *R, const word *AA, const word *BB)
1336 {
1337  // http://github.com/weidai11/cryptopp/issues/188
1338  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1339  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1340 
1341  Bot_16
1342 }
1343 
1344 void Baseline_MultiplyTop16(word *R, const word *AA, const word *BB, word L)
1345 {
1346  // http://github.com/weidai11/cryptopp/issues/188
1347  MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1348  MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1349 
1350  Top_Begin(16)
1351  Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1352  Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1353  Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1354  Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1355  Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1356  Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1357  Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1358  Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1359  Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1360  Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1361  Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1362  Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1363  Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1364  Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1365  Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1366  Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1367  Mul_End(13, 15)
1368 }
1369 #endif
1370 
1371 // ********************************************************
1372 
1373 #if CRYPTOPP_INTEGER_SSE2
1374 
1375 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1376 
1377 #undef Mul_Begin
1378 #undef Mul_Acc
1379 #undef Top_Begin
1380 #undef Top_Acc
1381 #undef Squ_Acc
1382 #undef Squ_NonDiag
1383 #undef Squ_Diag
1384 #undef Squ_SaveAcc
1385 #undef Squ_Begin
1386 #undef Mul_SaveAcc
1387 #undef Bot_Acc
1388 #undef Bot_SaveAcc
1389 #undef Bot_End
1390 #undef Squ_End
1391 #undef Mul_End
1392 
1393 #define SSE2_FinalSave(k) \
1394  AS2( psllq xmm5, 16) \
1395  AS2( paddq xmm4, xmm5) \
1396  AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1397 
1398 #define SSE2_SaveShift(k) \
1399  AS2( movq xmm0, xmm6) \
1400  AS2( punpckhqdq xmm6, xmm0) \
1401  AS2( movq xmm1, xmm7) \
1402  AS2( punpckhqdq xmm7, xmm1) \
1403  AS2( paddd xmm6, xmm0) \
1404  AS2( pslldq xmm6, 4) \
1405  AS2( paddd xmm7, xmm1) \
1406  AS2( paddd xmm4, xmm6) \
1407  AS2( pslldq xmm7, 4) \
1408  AS2( movq xmm6, xmm4) \
1409  AS2( paddd xmm5, xmm7) \
1410  AS2( movq xmm7, xmm5) \
1411  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1412  AS2( psrlq xmm6, 16) \
1413  AS2( paddq xmm6, xmm7) \
1414  AS2( punpckhqdq xmm4, xmm0) \
1415  AS2( punpckhqdq xmm5, xmm0) \
1416  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1417  AS2( psrlq xmm6, 3*16) \
1418  AS2( paddd xmm4, xmm6) \
1419 
1420 #define Squ_SSE2_SaveShift(k) \
1421  AS2( movq xmm0, xmm6) \
1422  AS2( punpckhqdq xmm6, xmm0) \
1423  AS2( movq xmm1, xmm7) \
1424  AS2( punpckhqdq xmm7, xmm1) \
1425  AS2( paddd xmm6, xmm0) \
1426  AS2( pslldq xmm6, 4) \
1427  AS2( paddd xmm7, xmm1) \
1428  AS2( paddd xmm4, xmm6) \
1429  AS2( pslldq xmm7, 4) \
1430  AS2( movhlps xmm6, xmm4) \
1431  AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1432  AS2( paddd xmm5, xmm7) \
1433  AS2( movhps QWORD PTR [esp+12], xmm5)\
1434  AS2( psrlq xmm4, 16) \
1435  AS2( paddq xmm4, xmm5) \
1436  AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1437  AS2( psrlq xmm4, 3*16) \
1438  AS2( paddd xmm4, xmm6) \
1439  AS2( movq QWORD PTR [esp+4], xmm4)\
1440 
1441 #define SSE2_FirstMultiply(i) \
1442  AS2( movdqa xmm7, [esi+(i)*16])\
1443  AS2( movdqa xmm5, [edi-(i)*16])\
1444  AS2( pmuludq xmm5, xmm7) \
1445  AS2( movdqa xmm4, [ebx])\
1446  AS2( movdqa xmm6, xmm4) \
1447  AS2( pand xmm4, xmm5) \
1448  AS2( psrld xmm5, 16) \
1449  AS2( pmuludq xmm7, [edx-(i)*16])\
1450  AS2( pand xmm6, xmm7) \
1451  AS2( psrld xmm7, 16)
1452 
1453 #define Squ_Begin(n) \
1454  SquPrologue \
1455  AS2( mov esi, esp)\
1456  AS2( and esp, 0xfffffff0)\
1457  AS2( lea edi, [esp-32*n])\
1458  AS2( sub esp, 32*n+16)\
1459  AS1( push esi)\
1460  AS2( mov esi, edi) \
1461  AS2( xor edx, edx) \
1462  ASL(1) \
1463  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1464  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1465  AS2( movdqa [edi+2*edx], xmm0) \
1466  AS2( psrlq xmm0, 32) \
1467  AS2( movdqa [edi+2*edx+16], xmm0) \
1468  AS2( movdqa [edi+16*n+2*edx], xmm1) \
1469  AS2( psrlq xmm1, 32) \
1470  AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
1471  AS2( add edx, 16) \
1472  AS2( cmp edx, 8*(n)) \
1473  ASJ( jne, 1, b) \
1474  AS2( lea edx, [edi+16*n])\
1475  SSE2_FirstMultiply(0) \
1476 
1477 #define Squ_Acc(i) \
1478  ASL(LSqu##i) \
1479  AS2( movdqa xmm1, [esi+(i)*16]) \
1480  AS2( movdqa xmm0, [edi-(i)*16]) \
1481  AS2( movdqa xmm2, [ebx]) \
1482  AS2( pmuludq xmm0, xmm1) \
1483  AS2( pmuludq xmm1, [edx-(i)*16]) \
1484  AS2( movdqa xmm3, xmm2) \
1485  AS2( pand xmm2, xmm0) \
1486  AS2( psrld xmm0, 16) \
1487  AS2( paddd xmm4, xmm2) \
1488  AS2( paddd xmm5, xmm0) \
1489  AS2( pand xmm3, xmm1) \
1490  AS2( psrld xmm1, 16) \
1491  AS2( paddd xmm6, xmm3) \
1492  AS2( paddd xmm7, xmm1) \
1493 
1494 #define Squ_Acc1(i)
1495 #define Squ_Acc2(i) ASC(call, LSqu##i)
1496 #define Squ_Acc3(i) Squ_Acc2(i)
1497 #define Squ_Acc4(i) Squ_Acc2(i)
1498 #define Squ_Acc5(i) Squ_Acc2(i)
1499 #define Squ_Acc6(i) Squ_Acc2(i)
1500 #define Squ_Acc7(i) Squ_Acc2(i)
1501 #define Squ_Acc8(i) Squ_Acc2(i)
1502 
1503 #define SSE2_End(E, n) \
1504  SSE2_SaveShift(2*(n)-3) \
1505  AS2( movdqa xmm7, [esi+16]) \
1506  AS2( movdqa xmm0, [edi]) \
1507  AS2( pmuludq xmm0, xmm7) \
1508  AS2( movdqa xmm2, [ebx]) \
1509  AS2( pmuludq xmm7, [edx]) \
1510  AS2( movdqa xmm6, xmm2) \
1511  AS2( pand xmm2, xmm0) \
1512  AS2( psrld xmm0, 16) \
1513  AS2( paddd xmm4, xmm2) \
1514  AS2( paddd xmm5, xmm0) \
1515  AS2( pand xmm6, xmm7) \
1516  AS2( psrld xmm7, 16) \
1517  SSE2_SaveShift(2*(n)-2) \
1518  SSE2_FinalSave(2*(n)-1) \
1519  AS1( pop esp)\
1520  E
1521 
1522 #define Squ_End(n) SSE2_End(SquEpilogue, n)
1523 #define Mul_End(n) SSE2_End(MulEpilogue, n)
1524 #define Top_End(n) SSE2_End(TopEpilogue, n)
1525 
1526 #define Squ_Column1(k, i) \
1527  Squ_SSE2_SaveShift(k) \
1528  AS2( add esi, 16) \
1529  SSE2_FirstMultiply(1)\
1530  Squ_Acc##i(i) \
1531  AS2( paddd xmm4, xmm4) \
1532  AS2( paddd xmm5, xmm5) \
1533  AS2( movdqa xmm3, [esi]) \
1534  AS2( movq xmm1, QWORD PTR [esi+8]) \
1535  AS2( pmuludq xmm1, xmm3) \
1536  AS2( pmuludq xmm3, xmm3) \
1537  AS2( movdqa xmm0, [ebx])\
1538  AS2( movdqa xmm2, xmm0) \
1539  AS2( pand xmm0, xmm1) \
1540  AS2( psrld xmm1, 16) \
1541  AS2( paddd xmm6, xmm0) \
1542  AS2( paddd xmm7, xmm1) \
1543  AS2( pand xmm2, xmm3) \
1544  AS2( psrld xmm3, 16) \
1545  AS2( paddd xmm6, xmm6) \
1546  AS2( paddd xmm7, xmm7) \
1547  AS2( paddd xmm4, xmm2) \
1548  AS2( paddd xmm5, xmm3) \
1549  AS2( movq xmm0, QWORD PTR [esp+4])\
1550  AS2( movq xmm1, QWORD PTR [esp+12])\
1551  AS2( paddd xmm4, xmm0)\
1552  AS2( paddd xmm5, xmm1)\
1553 
1554 #define Squ_Column0(k, i) \
1555  Squ_SSE2_SaveShift(k) \
1556  AS2( add edi, 16) \
1557  AS2( add edx, 16) \
1558  SSE2_FirstMultiply(1)\
1559  Squ_Acc##i(i) \
1560  AS2( paddd xmm6, xmm6) \
1561  AS2( paddd xmm7, xmm7) \
1562  AS2( paddd xmm4, xmm4) \
1563  AS2( paddd xmm5, xmm5) \
1564  AS2( movq xmm0, QWORD PTR [esp+4])\
1565  AS2( movq xmm1, QWORD PTR [esp+12])\
1566  AS2( paddd xmm4, xmm0)\
1567  AS2( paddd xmm5, xmm1)\
1568 
1569 #define SSE2_MulAdd45 \
1570  AS2( movdqa xmm7, [esi]) \
1571  AS2( movdqa xmm0, [edi]) \
1572  AS2( pmuludq xmm0, xmm7) \
1573  AS2( movdqa xmm2, [ebx]) \
1574  AS2( pmuludq xmm7, [edx]) \
1575  AS2( movdqa xmm6, xmm2) \
1576  AS2( pand xmm2, xmm0) \
1577  AS2( psrld xmm0, 16) \
1578  AS2( paddd xmm4, xmm2) \
1579  AS2( paddd xmm5, xmm0) \
1580  AS2( pand xmm6, xmm7) \
1581  AS2( psrld xmm7, 16)
1582 
1583 #define Mul_Begin(n) \
1584  MulPrologue \
1585  AS2( mov esi, esp)\
1586  AS2( and esp, 0xfffffff0)\
1587  AS2( sub esp, 48*n+16)\
1588  AS1( push esi)\
1589  AS2( xor edx, edx) \
1590  ASL(1) \
1591  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1592  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1593  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1594  AS2( movdqa [esp+20+2*edx], xmm0) \
1595  AS2( psrlq xmm0, 32) \
1596  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1597  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1598  AS2( psrlq xmm1, 32) \
1599  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1600  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1601  AS2( psrlq xmm2, 32) \
1602  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1603  AS2( add edx, 16) \
1604  AS2( cmp edx, 8*(n)) \
1605  ASJ( jne, 1, b) \
1606  AS2( lea edi, [esp+20])\
1607  AS2( lea edx, [esp+20+16*n])\
1608  AS2( lea esi, [esp+20+32*n])\
1609  SSE2_FirstMultiply(0) \
1610 
1611 #define Mul_Acc(i) \
1612  ASL(LMul##i) \
1613  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1614  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1615  AS2( movdqa xmm2, [ebx]) \
1616  AS2( pmuludq xmm0, xmm1) \
1617  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1618  AS2( movdqa xmm3, xmm2) \
1619  AS2( pand xmm2, xmm0) \
1620  AS2( psrld xmm0, 16) \
1621  AS2( paddd xmm4, xmm2) \
1622  AS2( paddd xmm5, xmm0) \
1623  AS2( pand xmm3, xmm1) \
1624  AS2( psrld xmm1, 16) \
1625  AS2( paddd xmm6, xmm3) \
1626  AS2( paddd xmm7, xmm1) \
1627 
1628 #define Mul_Acc1(i)
1629 #define Mul_Acc2(i) ASC(call, LMul##i)
1630 #define Mul_Acc3(i) Mul_Acc2(i)
1631 #define Mul_Acc4(i) Mul_Acc2(i)
1632 #define Mul_Acc5(i) Mul_Acc2(i)
1633 #define Mul_Acc6(i) Mul_Acc2(i)
1634 #define Mul_Acc7(i) Mul_Acc2(i)
1635 #define Mul_Acc8(i) Mul_Acc2(i)
1636 #define Mul_Acc9(i) Mul_Acc2(i)
1637 #define Mul_Acc10(i) Mul_Acc2(i)
1638 #define Mul_Acc11(i) Mul_Acc2(i)
1639 #define Mul_Acc12(i) Mul_Acc2(i)
1640 #define Mul_Acc13(i) Mul_Acc2(i)
1641 #define Mul_Acc14(i) Mul_Acc2(i)
1642 #define Mul_Acc15(i) Mul_Acc2(i)
1643 #define Mul_Acc16(i) Mul_Acc2(i)
1644 
1645 #define Mul_Column1(k, i) \
1646  SSE2_SaveShift(k) \
1647  AS2( add esi, 16) \
1648  SSE2_MulAdd45\
1649  Mul_Acc##i(i) \
1650 
1651 #define Mul_Column0(k, i) \
1652  SSE2_SaveShift(k) \
1653  AS2( add edi, 16) \
1654  AS2( add edx, 16) \
1655  SSE2_MulAdd45\
1656  Mul_Acc##i(i) \
1657 
1658 #define Bot_Acc(i) \
1659  AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
1660  AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
1661  AS2( pmuludq xmm0, xmm1) \
1662  AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1663  AS2( paddq xmm4, xmm0) \
1664  AS2( paddd xmm6, xmm1)
1665 
1666 #define Bot_SaveAcc(k) \
1667  SSE2_SaveShift(k) \
1668  AS2( add edi, 16) \
1669  AS2( add edx, 16) \
1670  AS2( movdqa xmm6, [esi]) \
1671  AS2( movdqa xmm0, [edi]) \
1672  AS2( pmuludq xmm0, xmm6) \
1673  AS2( paddq xmm4, xmm0) \
1674  AS2( psllq xmm5, 16) \
1675  AS2( paddq xmm4, xmm5) \
1676  AS2( pmuludq xmm6, [edx])
1677 
1678 #define Bot_End(n) \
1679  AS2( movhlps xmm7, xmm6) \
1680  AS2( paddd xmm6, xmm7) \
1681  AS2( psllq xmm6, 32) \
1682  AS2( paddd xmm4, xmm6) \
1683  AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1684  AS1( pop esp)\
1685  MulEpilogue
1686 
1687 #define Top_Begin(n) \
1688  TopPrologue \
1689  AS2( mov edx, esp)\
1690  AS2( and esp, 0xfffffff0)\
1691  AS2( sub esp, 48*n+16)\
1692  AS1( push edx)\
1693  AS2( xor edx, edx) \
1694  ASL(1) \
1695  ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1696  ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1697  ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1698  AS2( movdqa [esp+20+2*edx], xmm0) \
1699  AS2( psrlq xmm0, 32) \
1700  AS2( movdqa [esp+20+2*edx+16], xmm0) \
1701  AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
1702  AS2( psrlq xmm1, 32) \
1703  AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
1704  AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
1705  AS2( psrlq xmm2, 32) \
1706  AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
1707  AS2( add edx, 16) \
1708  AS2( cmp edx, 8*(n)) \
1709  ASJ( jne, 1, b) \
1710  AS2( mov eax, esi) \
1711  AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
1712  AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
1713  AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
1714  AS2( pxor xmm4, xmm4)\
1715  AS2( pxor xmm5, xmm5)
1716 
1717 #define Top_Acc(i) \
1718  AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
1719  AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
1720  AS2( psrlq xmm0, 48) \
1721  AS2( paddd xmm5, xmm0)\
1722 
1723 #define Top_Column0(i) \
1724  AS2( psllq xmm5, 32) \
1725  AS2( add edi, 16) \
1726  AS2( add edx, 16) \
1727  SSE2_MulAdd45\
1728  Mul_Acc##i(i) \
1729 
1730 #define Top_Column1(i) \
1731  SSE2_SaveShift(0) \
1732  AS2( add esi, 16) \
1733  SSE2_MulAdd45\
1734  Mul_Acc##i(i) \
1735  AS2( shr eax, 16) \
1736  AS2( movd xmm0, eax)\
1737  AS2( movd xmm1, [ecx+4])\
1738  AS2( psrld xmm1, 16)\
1739  AS2( pcmpgtd xmm1, xmm0)\
1740  AS2( psrld xmm1, 31)\
1741  AS2( paddd xmm4, xmm1)\
1742 
1743 void SSE2_Square4(word *C, const word *A)
1744 {
1745  Squ_Begin(2)
1746  Squ_Column0(0, 1)
1747  Squ_End(2)
1748 }
1749 
1750 void SSE2_Square8(word *C, const word *A)
1751 {
1752  Squ_Begin(4)
1753 #ifndef __GNUC__
1754  ASJ( jmp, 0, f)
1755  Squ_Acc(2)
1756  AS1( ret) ASL(0)
1757 #endif
1758  Squ_Column0(0, 1)
1759  Squ_Column1(1, 1)
1760  Squ_Column0(2, 2)
1761  Squ_Column1(3, 1)
1762  Squ_Column0(4, 1)
1763  Squ_End(4)
1764 }
1765 
1766 void SSE2_Square16(word *C, const word *A)
1767 {
1768  Squ_Begin(8)
1769 #ifndef __GNUC__
1770  ASJ( jmp, 0, f)
1771  Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1772  AS1( ret) ASL(0)
1773 #endif
1774  Squ_Column0(0, 1)
1775  Squ_Column1(1, 1)
1776  Squ_Column0(2, 2)
1777  Squ_Column1(3, 2)
1778  Squ_Column0(4, 3)
1779  Squ_Column1(5, 3)
1780  Squ_Column0(6, 4)
1781  Squ_Column1(7, 3)
1782  Squ_Column0(8, 3)
1783  Squ_Column1(9, 2)
1784  Squ_Column0(10, 2)
1785  Squ_Column1(11, 1)
1786  Squ_Column0(12, 1)
1787  Squ_End(8)
1788 }
1789 
1790 void SSE2_Square32(word *C, const word *A)
1791 {
1792  Squ_Begin(16)
1793  ASJ( jmp, 0, f)
1794  Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1795  AS1( ret) ASL(0)
1796  Squ_Column0(0, 1)
1797  Squ_Column1(1, 1)
1798  Squ_Column0(2, 2)
1799  Squ_Column1(3, 2)
1800  Squ_Column0(4, 3)
1801  Squ_Column1(5, 3)
1802  Squ_Column0(6, 4)
1803  Squ_Column1(7, 4)
1804  Squ_Column0(8, 5)
1805  Squ_Column1(9, 5)
1806  Squ_Column0(10, 6)
1807  Squ_Column1(11, 6)
1808  Squ_Column0(12, 7)
1809  Squ_Column1(13, 7)
1810  Squ_Column0(14, 8)
1811  Squ_Column1(15, 7)
1812  Squ_Column0(16, 7)
1813  Squ_Column1(17, 6)
1814  Squ_Column0(18, 6)
1815  Squ_Column1(19, 5)
1816  Squ_Column0(20, 5)
1817  Squ_Column1(21, 4)
1818  Squ_Column0(22, 4)
1819  Squ_Column1(23, 3)
1820  Squ_Column0(24, 3)
1821  Squ_Column1(25, 2)
1822  Squ_Column0(26, 2)
1823  Squ_Column1(27, 1)
1824  Squ_Column0(28, 1)
1825  Squ_End(16)
1826 }
1827 
1828 void SSE2_Multiply4(word *C, const word *A, const word *B)
1829 {
1830  Mul_Begin(2)
1831 #ifndef __GNUC__
1832  ASJ( jmp, 0, f)
1833  Mul_Acc(2)
1834  AS1( ret) ASL(0)
1835 #endif
1836  Mul_Column0(0, 2)
1837  Mul_End(2)
1838 }
1839 
1840 void SSE2_Multiply8(word *C, const word *A, const word *B)
1841 {
1842  Mul_Begin(4)
1843 #ifndef __GNUC__
1844  ASJ( jmp, 0, f)
1845  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1846  AS1( ret) ASL(0)
1847 #endif
1848  Mul_Column0(0, 2)
1849  Mul_Column1(1, 3)
1850  Mul_Column0(2, 4)
1851  Mul_Column1(3, 3)
1852  Mul_Column0(4, 2)
1853  Mul_End(4)
1854 }
1855 
1856 void SSE2_Multiply16(word *C, const word *A, const word *B)
1857 {
1858  Mul_Begin(8)
1859 #ifndef __GNUC__
1860  ASJ( jmp, 0, f)
1861  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1862  AS1( ret) ASL(0)
1863 #endif
1864  Mul_Column0(0, 2)
1865  Mul_Column1(1, 3)
1866  Mul_Column0(2, 4)
1867  Mul_Column1(3, 5)
1868  Mul_Column0(4, 6)
1869  Mul_Column1(5, 7)
1870  Mul_Column0(6, 8)
1871  Mul_Column1(7, 7)
1872  Mul_Column0(8, 6)
1873  Mul_Column1(9, 5)
1874  Mul_Column0(10, 4)
1875  Mul_Column1(11, 3)
1876  Mul_Column0(12, 2)
1877  Mul_End(8)
1878 }
1879 
1880 void SSE2_Multiply32(word *C, const word *A, const word *B)
1881 {
1882  Mul_Begin(16)
1883  ASJ( jmp, 0, f)
1884  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1885  AS1( ret) ASL(0)
1886  Mul_Column0(0, 2)
1887  Mul_Column1(1, 3)
1888  Mul_Column0(2, 4)
1889  Mul_Column1(3, 5)
1890  Mul_Column0(4, 6)
1891  Mul_Column1(5, 7)
1892  Mul_Column0(6, 8)
1893  Mul_Column1(7, 9)
1894  Mul_Column0(8, 10)
1895  Mul_Column1(9, 11)
1896  Mul_Column0(10, 12)
1897  Mul_Column1(11, 13)
1898  Mul_Column0(12, 14)
1899  Mul_Column1(13, 15)
1900  Mul_Column0(14, 16)
1901  Mul_Column1(15, 15)
1902  Mul_Column0(16, 14)
1903  Mul_Column1(17, 13)
1904  Mul_Column0(18, 12)
1905  Mul_Column1(19, 11)
1906  Mul_Column0(20, 10)
1907  Mul_Column1(21, 9)
1908  Mul_Column0(22, 8)
1909  Mul_Column1(23, 7)
1910  Mul_Column0(24, 6)
1911  Mul_Column1(25, 5)
1912  Mul_Column0(26, 4)
1913  Mul_Column1(27, 3)
1914  Mul_Column0(28, 2)
1915  Mul_End(16)
1916 }
1917 
1918 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
1919 {
1920  Mul_Begin(2)
1921  Bot_SaveAcc(0) Bot_Acc(2)
1922  Bot_End(2)
1923 }
1924 
1925 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
1926 {
1927  Mul_Begin(4)
1928 #ifndef __GNUC__
1929  ASJ( jmp, 0, f)
1930  Mul_Acc(3) Mul_Acc(2)
1931  AS1( ret) ASL(0)
1932 #endif
1933  Mul_Column0(0, 2)
1934  Mul_Column1(1, 3)
1935  Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1936  Bot_End(4)
1937 }
1938 
1939 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
1940 {
1941  Mul_Begin(8)
1942 #ifndef __GNUC__
1943  ASJ( jmp, 0, f)
1944  Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1945  AS1( ret) ASL(0)
1946 #endif
1947  Mul_Column0(0, 2)
1948  Mul_Column1(1, 3)
1949  Mul_Column0(2, 4)
1950  Mul_Column1(3, 5)
1951  Mul_Column0(4, 6)
1952  Mul_Column1(5, 7)
1953  Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1954  Bot_End(8)
1955 }
1956 
1957 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
1958 {
1959  Mul_Begin(16)
1960 #ifndef __GNUC__
1961  ASJ( jmp, 0, f)
1962  Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1963  AS1( ret) ASL(0)
1964 #endif
1965  Mul_Column0(0, 2)
1966  Mul_Column1(1, 3)
1967  Mul_Column0(2, 4)
1968  Mul_Column1(3, 5)
1969  Mul_Column0(4, 6)
1970  Mul_Column1(5, 7)
1971  Mul_Column0(6, 8)
1972  Mul_Column1(7, 9)
1973  Mul_Column0(8, 10)
1974  Mul_Column1(9, 11)
1975  Mul_Column0(10, 12)
1976  Mul_Column1(11, 13)
1977  Mul_Column0(12, 14)
1978  Mul_Column1(13, 15)
1979  Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1980  Bot_End(16)
1981 }
1982 
1983 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
1984 {
1985  Top_Begin(4)
1986  Top_Acc(3) Top_Acc(2) Top_Acc(1)
1987 #ifndef __GNUC__
1988  ASJ( jmp, 0, f)
1989  Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1990  AS1( ret) ASL(0)
1991 #endif
1992  Top_Column0(4)
1993  Top_Column1(3)
1994  Mul_Column0(0, 2)
1995  Top_End(2)
1996 }
1997 
1998 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
1999 {
2000  Top_Begin(8)
2001  Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2002 #ifndef __GNUC__
2003  ASJ( jmp, 0, f)
2004  Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2005  AS1( ret) ASL(0)
2006 #endif
2007  Top_Column0(8)
2008  Top_Column1(7)
2009  Mul_Column0(0, 6)
2010  Mul_Column1(1, 5)
2011  Mul_Column0(2, 4)
2012  Mul_Column1(3, 3)
2013  Mul_Column0(4, 2)
2014  Top_End(4)
2015 }
2016 
2017 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
2018 {
2019  Top_Begin(16)
2020  Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2021 #ifndef __GNUC__
2022  ASJ( jmp, 0, f)
2023  Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2024  AS1( ret) ASL(0)
2025 #endif
2026  Top_Column0(16)
2027  Top_Column1(15)
2028  Mul_Column0(0, 14)
2029  Mul_Column1(1, 13)
2030  Mul_Column0(2, 12)
2031  Mul_Column1(3, 11)
2032  Mul_Column0(4, 10)
2033  Mul_Column1(5, 9)
2034  Mul_Column0(6, 8)
2035  Mul_Column1(7, 7)
2036  Mul_Column0(8, 6)
2037  Mul_Column1(9, 5)
2038  Mul_Column0(10, 4)
2039  Mul_Column1(11, 3)
2040  Mul_Column0(12, 2)
2041  Top_End(8)
2042 }
2043 
2044 #endif // #if CRYPTOPP_INTEGER_SSE2
2045 
2046 // ********************************************************
2047 
2048 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
2049 typedef void (* PMul)(word *C, const word *A, const word *B);
2050 typedef void (* PSqu)(word *C, const word *A);
2051 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
2052 
2053 #if CRYPTOPP_INTEGER_SSE2
2054 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2055 static size_t s_recursionLimit = 8;
2056 #else
2057 static const size_t s_recursionLimit = 16;
2058 #endif
2059 
2060 static PMul s_pMul[9], s_pBot[9];
2061 static PSqu s_pSqu[9];
2062 static PMulTop s_pTop[9];
2063 
2064 static void SetFunctionPointers()
2065 {
2066  s_pMul[0] = &Baseline_Multiply2;
2067  s_pBot[0] = &Baseline_MultiplyBottom2;
2068  s_pSqu[0] = &Baseline_Square2;
2069  s_pTop[0] = &Baseline_MultiplyTop2;
2070  s_pTop[1] = &Baseline_MultiplyTop4;
2071 
2072 #if CRYPTOPP_INTEGER_SSE2
2073  if (HasSSE2())
2074  {
2075 #if _MSC_VER != 1200 || defined(NDEBUG)
2076  if (IsP4())
2077  {
2078  s_pAdd = &SSE2_Add;
2079  s_pSub = &SSE2_Sub;
2080  }
2081 #endif
2082 
2083  s_recursionLimit = 32;
2084 
2085  s_pMul[1] = &SSE2_Multiply4;
2086  s_pMul[2] = &SSE2_Multiply8;
2087  s_pMul[4] = &SSE2_Multiply16;
2088  s_pMul[8] = &SSE2_Multiply32;
2089 
2090  s_pBot[1] = &SSE2_MultiplyBottom4;
2091  s_pBot[2] = &SSE2_MultiplyBottom8;
2092  s_pBot[4] = &SSE2_MultiplyBottom16;
2093  s_pBot[8] = &SSE2_MultiplyBottom32;
2094 
2095  s_pSqu[1] = &SSE2_Square4;
2096  s_pSqu[2] = &SSE2_Square8;
2097  s_pSqu[4] = &SSE2_Square16;
2098  s_pSqu[8] = &SSE2_Square32;
2099 
2100  s_pTop[2] = &SSE2_MultiplyTop8;
2101  s_pTop[4] = &SSE2_MultiplyTop16;
2102  s_pTop[8] = &SSE2_MultiplyTop32;
2103  }
2104  else
2105 #endif
2106  {
2107  s_pMul[1] = &Baseline_Multiply4;
2108  s_pMul[2] = &Baseline_Multiply8;
2109 
2110  s_pBot[1] = &Baseline_MultiplyBottom4;
2111  s_pBot[2] = &Baseline_MultiplyBottom8;
2112 
2113  s_pSqu[1] = &Baseline_Square4;
2114  s_pSqu[2] = &Baseline_Square8;
2115 
2116  s_pTop[2] = &Baseline_MultiplyTop8;
2117 
2118 #if !CRYPTOPP_INTEGER_SSE2
2119  s_pMul[4] = &Baseline_Multiply16;
2120  s_pBot[4] = &Baseline_MultiplyBottom16;
2121  s_pSqu[4] = &Baseline_Square16;
2122  s_pTop[4] = &Baseline_MultiplyTop16;
2123 #endif
2124  }
2125 }
2126 
2127 inline int Add(word *C, const word *A, const word *B, size_t N)
2128 {
2129 #if CRYPTOPP_INTEGER_SSE2
2130  return s_pAdd(N, C, A, B);
2131 #else
2132  return Baseline_Add(N, C, A, B);
2133 #endif
2134 }
2135 
2136 inline int Subtract(word *C, const word *A, const word *B, size_t N)
2137 {
2138 #if CRYPTOPP_INTEGER_SSE2
2139  return s_pSub(N, C, A, B);
2140 #else
2141  return Baseline_Sub(N, C, A, B);
2142 #endif
2143 }
2144 
2145 // ********************************************************
2146 
2147 
2148 #define A0 A
2149 #define A1 (A+N2)
2150 #define B0 B
2151 #define B1 (B+N2)
2152 
2153 #define T0 T
2154 #define T1 (T+N2)
2155 #define T2 (T+N)
2156 #define T3 (T+N+N2)
2157 
2158 #define R0 R
2159 #define R1 (R+N2)
2160 #define R2 (R+N)
2161 #define R3 (R+N+N2)
2162 
2163 // R[2*N] - result = A*B
2164 // T[2*N] - temporary work space
2165 // A[N] --- multiplier
2166 // B[N] --- multiplicant
2167 
2168 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
2169 {
2170  assert(N>=2 && N%2==0);
2171 
2172  if (N <= s_recursionLimit)
2173  s_pMul[N/4](R, A, B);
2174  else
2175  {
2176  const size_t N2 = N/2;
2177 
2178  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2179  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2180 
2181  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2182  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2183 
2184  RecursiveMultiply(R2, T2, A1, B1, N2);
2185  RecursiveMultiply(T0, T2, R0, R1, N2);
2186  RecursiveMultiply(R0, T2, A0, B0, N2);
2187 
2188  // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
2189 
2190  int c2 = Add(R2, R2, R1, N2);
2191  int c3 = c2;
2192  c2 += Add(R1, R2, R0, N2);
2193  c3 += Add(R2, R2, R3, N2);
2194 
2195  if (AN2 == BN2)
2196  c3 -= Subtract(R1, R1, T0, N);
2197  else
2198  c3 += Add(R1, R1, T0, N);
2199 
2200  c3 += Increment(R2, N2, c2);
2201  assert (c3 >= 0 && c3 <= 2);
2202  Increment(R3, N2, c3);
2203  }
2204 }
2205 
2206 // R[2*N] - result = A*A
2207 // T[2*N] - temporary work space
2208 // A[N] --- number to be squared
2209 
2210 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
2211 {
2212  assert(N && N%2==0);
2213 
2214  if (N <= s_recursionLimit)
2215  s_pSqu[N/4](R, A);
2216  else
2217  {
2218  const size_t N2 = N/2;
2219 
2220  RecursiveSquare(R0, T2, A0, N2);
2221  RecursiveSquare(R2, T2, A1, N2);
2222  RecursiveMultiply(T0, T2, A0, A1, N2);
2223 
2224  int carry = Add(R1, R1, T0, N);
2225  carry += Add(R1, R1, T0, N);
2226  Increment(R3, N2, carry);
2227  }
2228 }
2229 
2230 // R[N] - bottom half of A*B
2231 // T[3*N/2] - temporary work space
2232 // A[N] - multiplier
2233 // B[N] - multiplicant
2234 
2235 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2236 {
2237  assert(N>=2 && N%2==0);
2238 
2239  if (N <= s_recursionLimit)
2240  s_pBot[N/4](R, A, B);
2241  else
2242  {
2243  const size_t N2 = N/2;
2244 
2245  RecursiveMultiply(R, T, A0, B0, N2);
2246  RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2247  Add(R1, R1, T0, N2);
2248  RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2249  Add(R1, R1, T0, N2);
2250  }
2251 }
2252 
2253 // R[N] --- upper half of A*B
2254 // T[2*N] - temporary work space
2255 // L[N] --- lower half of A*B
2256 // A[N] --- multiplier
2257 // B[N] --- multiplicant
2258 
2259 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
2260 {
2261  assert(N>=2 && N%2==0);
2262 
2263  if (N <= s_recursionLimit)
2264  s_pTop[N/4](R, A, B, L[N-1]);
2265  else
2266  {
2267  const size_t N2 = N/2;
2268 
2269  size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2270  Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2271 
2272  size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2273  Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2274 
2275  RecursiveMultiply(T0, T2, R0, R1, N2);
2276  RecursiveMultiply(R0, T2, A1, B1, N2);
2277 
2278  // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
2279 
2280  int t, c3;
2281  int c2 = Subtract(T2, L+N2, L, N2);
2282 
2283  if (AN2 == BN2)
2284  {
2285  c2 -= Add(T2, T2, T0, N2);
2286  t = (Compare(T2, R0, N2) == -1);
2287  c3 = t - Subtract(T2, T2, T1, N2);
2288  }
2289  else
2290  {
2291  c2 += Subtract(T2, T2, T0, N2);
2292  t = (Compare(T2, R0, N2) == -1);
2293  c3 = t + Add(T2, T2, T1, N2);
2294  }
2295 
2296  c2 += t;
2297  if (c2 >= 0)
2298  c3 += Increment(T2, N2, c2);
2299  else
2300  c3 -= Decrement(T2, N2, -c2);
2301  c3 += Add(R0, T2, R1, N2);
2302 
2303  assert (c3 >= 0 && c3 <= 2);
2304  Increment(R1, N2, c3);
2305  }
2306 }
2307 
2308 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
2309 {
2310  RecursiveMultiply(R, T, A, B, N);
2311 }
2312 
2313 inline void Square(word *R, word *T, const word *A, size_t N)
2314 {
2315  RecursiveSquare(R, T, A, N);
2316 }
2317 
2318 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
2319 {
2320  RecursiveMultiplyBottom(R, T, A, B, N);
2321 }
2322 
2323 // R[NA+NB] - result = A*B
2324 // T[NA+NB] - temporary work space
2325 // A[NA] ---- multiplier
2326 // B[NB] ---- multiplicant
2327 
2328 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
2329 {
2330  if (NA == NB)
2331  {
2332  if (A == B)
2333  Square(R, T, A, NA);
2334  else
2335  Multiply(R, T, A, B, NA);
2336 
2337  return;
2338  }
2339 
2340  if (NA > NB)
2341  {
2342  std::swap(A, B);
2343  std::swap(NA, NB);
2344  }
2345 
2346  assert(NB % NA == 0);
2347 
2348  if (NA==2 && !A[1])
2349  {
2350  switch (A[0])
2351  {
2352  case 0:
2353  SetWords(R, 0, NB+2);
2354  return;
2355  case 1:
2356  CopyWords(R, B, NB);
2357  R[NB] = R[NB+1] = 0;
2358  return;
2359  default:
2360  R[NB] = LinearMultiply(R, B, A[0], NB);
2361  R[NB+1] = 0;
2362  return;
2363  }
2364  }
2365 
2366  size_t i;
2367  if ((NB/NA)%2 == 0)
2368  {
2369  Multiply(R, T, A, B, NA);
2370  CopyWords(T+2*NA, R+NA, NA);
2371 
2372  for (i=2*NA; i<NB; i+=2*NA)
2373  Multiply(T+NA+i, T, A, B+i, NA);
2374  for (i=NA; i<NB; i+=2*NA)
2375  Multiply(R+i, T, A, B+i, NA);
2376  }
2377  else
2378  {
2379  for (i=0; i<NB; i+=2*NA)
2380  Multiply(R+i, T, A, B+i, NA);
2381  for (i=NA; i<NB; i+=2*NA)
2382  Multiply(T+NA+i, T, A, B+i, NA);
2383  }
2384 
2385  if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2386  Increment(R+NB, NA);
2387 }
2388 
2389 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
2390 // T[3*N/2] - temporary work space
2391 // A[N] ----- an odd number as input
2392 
2393 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
2394 {
2395  if (N==2)
2396  {
2397  T[0] = AtomicInverseModPower2(A[0]);
2398  T[1] = 0;
2399  s_pBot[0](T+2, T, A);
2400  TwosComplement(T+2, 2);
2401  Increment(T+2, 2, 2);
2402  s_pBot[0](R, T, T+2);
2403  }
2404  else
2405  {
2406  const size_t N2 = N/2;
2407  RecursiveInverseModPower2(R0, T0, A0, N2);
2408  T0[0] = 1;
2409  SetWords(T0+1, 0, N2-1);
2410  MultiplyTop(R1, T1, T0, R0, A0, N2);
2411  MultiplyBottom(T0, T1, R0, A1, N2);
2412  Add(T0, R1, T0, N2);
2413  TwosComplement(T0, N2);
2414  MultiplyBottom(R1, T1, R0, T0, N2);
2415  }
2416 }
2417 
2418 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
2419 // T[3*N] - temporary work space
2420 // X[2*N] - number to be reduced
2421 // M[N] --- modulus
2422 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
2423 
2424 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
2425 {
2426 #if 1
2427  MultiplyBottom(R, T, X, U, N);
2428  MultiplyTop(T, T+N, X, R, M, N);
2429  word borrow = Subtract(T, X+N, T, N);
2430  // defend against timing attack by doing this Add even when not needed
2431  word carry = Add(T+N, T, M, N);
2432  assert(carry | !borrow);
2433  CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2434  CopyWords(R, T + ((0-borrow) & N), N);
2435 #elif 0
2436  const word u = 0-U[0];
2437  Declare2Words(p)
2438  for (size_t i=0; i<N; i++)
2439  {
2440  const word t = u * X[i];
2441  word c = 0;
2442  for (size_t j=0; j<N; j+=2)
2443  {
2444  MultiplyWords(p, t, M[j]);
2445  Acc2WordsBy1(p, X[i+j]);
2446  Acc2WordsBy1(p, c);
2447  X[i+j] = LowWord(p);
2448  c = HighWord(p);
2449  MultiplyWords(p, t, M[j+1]);
2450  Acc2WordsBy1(p, X[i+j+1]);
2451  Acc2WordsBy1(p, c);
2452  X[i+j+1] = LowWord(p);
2453  c = HighWord(p);
2454  }
2455 
2456  if (Increment(X+N+i, N-i, c))
2457  while (!Subtract(X+N, X+N, M, N)) {}
2458  }
2459 
2460  memcpy(R, X+N, N*WORD_SIZE);
2461 #else
2462  __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2463  for (size_t i=0; i<N; i++)
2464  {
2465  __m64 t = _mm_cvtsi32_si64(X[i]);
2466  t = _mm_mul_su32(t, u);
2467  __m64 c = _mm_setzero_si64();
2468  for (size_t j=0; j<N; j+=2)
2469  {
2470  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2471  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2472  c = _mm_add_si64(c, p);
2473  X[i+j] = _mm_cvtsi64_si32(c);
2474  c = _mm_srli_si64(c, 32);
2475  p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2476  p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2477  c = _mm_add_si64(c, p);
2478  X[i+j+1] = _mm_cvtsi64_si32(c);
2479  c = _mm_srli_si64(c, 32);
2480  }
2481 
2482  if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2483  while (!Subtract(X+N, X+N, M, N)) {}
2484  }
2485 
2486  memcpy(R, X+N, N*WORD_SIZE);
2487  _mm_empty();
2488 #endif
2489 }
2490 
2491 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
2492 // T[2*N] - temporary work space
2493 // X[2*N] - number to be reduced
2494 // M[N] --- modulus
2495 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
2496 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
2497 
2498 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
2499 {
2500  assert(N%2==0 && N>=4);
2501 
2502 #define M0 M
2503 #define M1 (M+N2)
2504 #define V0 V
2505 #define V1 (V+N2)
2506 
2507 #define X0 X
2508 #define X1 (X+N2)
2509 #define X2 (X+N)
2510 #define X3 (X+N+N2)
2511 
2512  const size_t N2 = N/2;
2513  Multiply(T0, T2, V0, X3, N2);
2514  int c2 = Add(T0, T0, X0, N);
2515  MultiplyBottom(T3, T2, T0, U, N2);
2516  MultiplyTop(T2, R, T0, T3, M0, N2);
2517  c2 -= Subtract(T2, T1, T2, N2);
2518  Multiply(T0, R, T3, M1, N2);
2519  c2 -= Subtract(T0, T2, T0, N2);
2520  int c3 = -(int)Subtract(T1, X2, T1, N2);
2521  Multiply(R0, T2, V1, X3, N2);
2522  c3 += Add(R, R, T, N);
2523 
2524  if (c2>0)
2525  c3 += Increment(R1, N2);
2526  else if (c2<0)
2527  c3 -= Decrement(R1, N2, -c2);
2528 
2529  assert(c3>=-1 && c3<=1);
2530  if (c3>0)
2531  Subtract(R, R, M, N);
2532  else if (c3<0)
2533  Add(R, R, M, N);
2534 
2535 #undef M0
2536 #undef M1
2537 #undef V0
2538 #undef V1
2539 
2540 #undef X0
2541 #undef X1
2542 #undef X2
2543 #undef X3
2544 }
2545 
2546 #undef A0
2547 #undef A1
2548 #undef B0
2549 #undef B1
2550 
2551 #undef T0
2552 #undef T1
2553 #undef T2
2554 #undef T3
2555 
2556 #undef R0
2557 #undef R1
2558 #undef R2
2559 #undef R3
2560 
2561 /*
2562 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2563 static word SubatomicDivide(word *A, word B0, word B1)
2564 {
2565  // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2566  assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
2567 
2568  // estimate the quotient: do a 2 word by 1 word divide
2569  word Q;
2570  if (B1+1 == 0)
2571  Q = A[2];
2572  else
2573  Q = DWord(A[1], A[2]).DividedBy(B1+1);
2574 
2575  // now subtract Q*B from A
2576  DWord p = DWord::Multiply(B0, Q);
2577  DWord u = (DWord) A[0] - p.GetLowHalf();
2578  A[0] = u.GetLowHalf();
2579  u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2580  A[1] = u.GetLowHalf();
2581  A[2] += u.GetHighHalf();
2582 
2583  // Q <= actual quotient, so fix it
2584  while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
2585  {
2586  u = (DWord) A[0] - B0;
2587  A[0] = u.GetLowHalf();
2588  u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2589  A[1] = u.GetLowHalf();
2590  A[2] += u.GetHighHalf();
2591  Q++;
2592  assert(Q); // shouldn't overflow
2593  }
2594 
2595  return Q;
2596 }
2597 
2598 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2599 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2600 {
2601  if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
2602  {
2603  Q[0] = A[2];
2604  Q[1] = A[3];
2605  }
2606  else
2607  {
2608  word T[4];
2609  T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2610  Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2611  Q[0] = SubatomicDivide(T, B[0], B[1]);
2612 
2613 #ifndef NDEBUG
2614  // multiply quotient and divisor and add remainder, make sure it equals dividend
2615  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2616  word P[4];
2617  LowLevel::Multiply2(P, Q, B);
2618  Add(P, P, T, 4);
2619  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2620 #endif
2621  }
2622 }
2623 */
2624 
2625 static inline void AtomicDivide(word *Q, const word *A, const word *B)
2626 {
2627  word T[4];
2628  DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2629  Q[0] = q.GetLowHalf();
2630  Q[1] = q.GetHighHalf();
2631 
2632 #ifndef NDEBUG
2633  if (B[0] || B[1])
2634  {
2635  // multiply quotient and divisor and add remainder, make sure it equals dividend
2636  assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
2637  word P[4];
2638  s_pMul[0](P, Q, B);
2639  Add(P, P, T, 4);
2640  assert(memcmp(P, A, 4*WORD_SIZE)==0);
2641  }
2642 #endif
2643 }
2644 
2645 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2646 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
2647 {
2648  assert(N && N%2==0);
2649 
2650  AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2651 
2652  word borrow = Subtract(R, R, T, N+2);
2653  assert(!borrow && !R[N+1]);
2654  CRYPTOPP_UNUSED(borrow);
2655 
2656  while (R[N] || Compare(R, B, N) >= 0)
2657  {
2658  R[N] -= Subtract(R, R, B, N);
2659  Q[1] += (++Q[0]==0);
2660  assert(Q[0] || Q[1]); // no overflow
2661  }
2662 }
2663 
2664 // R[NB] -------- remainder = A%B
2665 // Q[NA-NB+2] --- quotient = A/B
2666 // T[NA+3*(NB+2)] - temp work space
2667 // A[NA] -------- dividend
2668 // B[NB] -------- divisor
2669 
2670 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
2671 {
2672  assert(NA && NB && NA%2==0 && NB%2==0);
2673  assert(B[NB-1] || B[NB-2]);
2674  assert(NB <= NA);
2675 
2676  // set up temporary work space
2677  word *const TA=T;
2678  word *const TB=T+NA+2;
2679  word *const TP=T+NA+2+NB;
2680 
2681  // copy B into TB and normalize it so that TB has highest bit set to 1
2682  unsigned shiftWords = (B[NB-1]==0);
2683  TB[0] = TB[NB-1] = 0;
2684  CopyWords(TB+shiftWords, B, NB-shiftWords);
2685  unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2686  assert(shiftBits < WORD_BITS);
2687  ShiftWordsLeftByBits(TB, NB, shiftBits);
2688 
2689  // copy A into TA and normalize it
2690  TA[0] = TA[NA] = TA[NA+1] = 0;
2691  CopyWords(TA+shiftWords, A, NA);
2692  ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2693 
2694  if (TA[NA+1]==0 && TA[NA] <= 1)
2695  {
2696  Q[NA-NB+1] = Q[NA-NB] = 0;
2697  while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
2698  {
2699  TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2700  ++Q[NA-NB];
2701  }
2702  }
2703  else
2704  {
2705  NA+=2;
2706  assert(Compare(TA+NA-NB, TB, NB) < 0);
2707  }
2708 
2709  word BT[2];
2710  BT[0] = TB[NB-2] + 1;
2711  BT[1] = TB[NB-1] + (BT[0]==0);
2712 
2713  // start reducing TA mod TB, 2 words at a time
2714  for (size_t i=NA-2; i>=NB; i-=2)
2715  {
2716  AtomicDivide(Q+i-NB, TA+i-2, BT);
2717  CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2718  }
2719 
2720  // copy TA into R, and denormalize it
2721  CopyWords(R, TA+shiftWords, NB);
2722  ShiftWordsRightByBits(R, NB, shiftBits);
2723 }
2724 
2725 static inline size_t EvenWordCount(const word *X, size_t N)
2726 {
2727  while (N && X[N-2]==0 && X[N-1]==0)
2728  N-=2;
2729  return N;
2730 }
2731 
2732 // return k
2733 // R[N] --- result = A^(-1) * 2^k mod M
2734 // T[4*N] - temporary work space
2735 // A[NA] -- number to take inverse of
2736 // M[N] --- modulus
2737 
2738 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
2739 {
2740  assert(NA<=N && N && N%2==0);
2741 
2742  word *b = T;
2743  word *c = T+N;
2744  word *f = T+2*N;
2745  word *g = T+3*N;
2746  size_t bcLen=2, fgLen=EvenWordCount(M, N);
2747  unsigned int k=0;
2748  bool s=false;
2749 
2750  SetWords(T, 0, 3*N);
2751  b[0]=1;
2752  CopyWords(f, A, NA);
2753  CopyWords(g, M, N);
2754 
2755  while (1)
2756  {
2757  word t=f[0];
2758  while (!t)
2759  {
2760  if (EvenWordCount(f, fgLen)==0)
2761  {
2762  SetWords(R, 0, N);
2763  return 0;
2764  }
2765 
2766  ShiftWordsRightByWords(f, fgLen, 1);
2767  bcLen += 2 * (c[bcLen-1] != 0);
2768  assert(bcLen <= N);
2769  ShiftWordsLeftByWords(c, bcLen, 1);
2770  k+=WORD_BITS;
2771  t=f[0];
2772  }
2773 
2774  // t must be non-0; otherwise undefined.
2775  unsigned int i = TrailingZeros(t);
2776  t >>= i;
2777  k += i;
2778 
2779  if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2780  {
2781  if (s)
2782  Subtract(R, M, b, N);
2783  else
2784  CopyWords(R, b, N);
2785  return k;
2786  }
2787 
2788  ShiftWordsRightByBits(f, fgLen, i);
2789  t = ShiftWordsLeftByBits(c, bcLen, i);
2790  c[bcLen] += t;
2791  bcLen += 2 * (t!=0);
2792  assert(bcLen <= N);
2793 
2794  bool swap = Compare(f, g, fgLen)==-1;
2795  ConditionalSwapPointers(swap, f, g);
2796  ConditionalSwapPointers(swap, b, c);
2797  s ^= swap;
2798 
2799  fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
2800 
2801  Subtract(f, f, g, fgLen);
2802  t = Add(b, b, c, bcLen);
2803  b[bcLen] += t;
2804  bcLen += 2*t;
2805  assert(bcLen <= N);
2806  }
2807 }
2808 
2809 // R[N] - result = A/(2^k) mod M
2810 // A[N] - input
2811 // M[N] - modulus
2812 
2813 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2814 {
2815  CopyWords(R, A, N);
2816 
2817  while (k--)
2818  {
2819  if (R[0]%2==0)
2820  ShiftWordsRightByBits(R, N, 1);
2821  else
2822  {
2823  word carry = Add(R, R, M, N);
2824  ShiftWordsRightByBits(R, N, 1);
2825  R[N-1] += carry<<(WORD_BITS-1);
2826  }
2827  }
2828 }
2829 
2830 // R[N] - result = A*(2^k) mod M
2831 // A[N] - input
2832 // M[N] - modulus
2833 
2834 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
2835 {
2836  CopyWords(R, A, N);
2837 
2838  while (k--)
2839  if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
2840  Subtract(R, R, M, N);
2841 }
2842 
2843 // ******************************************************************
2844 
2845 InitializeInteger::InitializeInteger()
2846 {
2847  if (!g_pAssignIntToInteger)
2848  {
2849  SetFunctionPointers();
2850  g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2851  }
2852 }
2853 
2854 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2855 
2856 static inline size_t RoundupSize(size_t n)
2857 {
2858  if (n<=8)
2859  return RoundupSizeTable[n];
2860  else if (n<=16)
2861  return 16;
2862  else if (n<=32)
2863  return 32;
2864  else if (n<=64)
2865  return 64;
2866  else return size_t(1) << BitPrecision(n-1);
2867 }
2868 
2870  : reg(2), sign(POSITIVE)
2871 {
2872  reg[0] = reg[1] = 0;
2873 }
2874 
2876  : reg(RoundupSize(t.WordCount())), sign(t.sign)
2877 {
2878  CopyWords(reg, t.reg, reg.size());
2879 }
2880 
2881 Integer::Integer(Sign s, lword value)
2882  : reg(2), sign(s)
2883 {
2884  reg[0] = word(value);
2885  reg[1] = word(SafeRightShift<WORD_BITS>(value));
2886 }
2887 
2888 Integer::Integer(signed long value)
2889  : reg(2)
2890 {
2891  if (value >= 0)
2892  sign = POSITIVE;
2893  else
2894  {
2895  sign = NEGATIVE;
2896  value = -value;
2897  }
2898  reg[0] = word(value);
2899  reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2900 }
2901 
2902 Integer::Integer(Sign s, word high, word low)
2903  : reg(2), sign(s)
2904 {
2905  reg[0] = low;
2906  reg[1] = high;
2907 }
2908 
2910 {
2911  if (ByteCount() > sizeof(long))
2912  return false;
2913 
2914  unsigned long value = (unsigned long)reg[0];
2915  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2916 
2917  if (sign==POSITIVE)
2918  return (signed long)value >= 0;
2919  else
2920  return -(signed long)value < 0;
2921 }
2922 
2923 signed long Integer::ConvertToLong() const
2924 {
2925  assert(IsConvertableToLong());
2926 
2927  unsigned long value = (unsigned long)reg[0];
2928  value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2929  return sign==POSITIVE ? value : -(signed long)value;
2930 }
2931 
2932 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
2933 {
2934  Decode(encodedInteger, byteCount, s);
2935 }
2936 
2937 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2938 {
2939  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2940 
2941  if(o == LITTLE_ENDIAN_ORDER)
2942  {
2943  SecByteBlock block(byteCount);
2944  encodedInteger.Get(block, block.size());
2945  std::reverse(block.begin(), block.begin()+block.size());
2946 
2947  Decode(block.begin(), block.size(), s);
2948  return;
2949  }
2950 
2951  Decode(encodedInteger, byteCount, s);
2952 }
2953 
2954 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
2955 {
2956  Decode(encodedInteger, byteCount, s);
2957 }
2958 
2959 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2960 {
2961  assert(o == BIG_ENDIAN_ORDER || o == LITTLE_ENDIAN_ORDER);
2962 
2963  if(o == LITTLE_ENDIAN_ORDER)
2964  {
2965  SecByteBlock block(byteCount);
2966 #if (CRYPTOPP_MSC_VERSION >= 1400)
2967  std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2968  stdext::make_checked_array_iterator(block.begin(), block.size()));
2969 #else
2970  std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2971 #endif
2972  Decode(block.begin(), block.size(), s);
2973  return;
2974  }
2975 
2976  Decode(encodedInteger, byteCount, s);
2977 }
2978 
2980 {
2981  BERDecode(bt);
2982 }
2983 
2985 {
2986  Randomize(rng, bitcount);
2987 }
2988 
2989 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
2990 {
2991  if (!Randomize(rng, min, max, rnType, equiv, mod))
2993 }
2994 
2996 {
2997  Integer r((word)0, BitsToWords(e+1));
2998  r.SetBit(e);
2999  return r;
3000 }
3001 
3002 template <long i>
3004 {
3005  Integer * operator()() const
3006  {
3007  return new Integer(i);
3008  }
3009 };
3010 
3012 {
3013  return Singleton<Integer>().Ref();
3014 }
3015 
3017 {
3018  return Singleton<Integer, NewInteger<1> >().Ref();
3019 }
3020 
3022 {
3023  return Singleton<Integer, NewInteger<2> >().Ref();
3024 }
3025 
3026 bool Integer::operator!() const
3027 {
3028  return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3029 }
3030 
3031 Integer& Integer::operator=(const Integer& t)
3032 {
3033  if (this != &t)
3034  {
3035  if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
3036  reg.New(RoundupSize(t.WordCount()));
3037  CopyWords(reg, t.reg, reg.size());
3038  sign = t.sign;
3039  }
3040  return *this;
3041 }
3042 
3043 bool Integer::GetBit(size_t n) const
3044 {
3045  if (n/WORD_BITS >= reg.size())
3046  return 0;
3047  else
3048  return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3049 }
3050 
3051 void Integer::SetBit(size_t n, bool value)
3052 {
3053  if (value)
3054  {
3055  reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3056  reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
3057  }
3058  else
3059  {
3060  if (n/WORD_BITS < reg.size())
3061  reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3062  }
3063 }
3064 
3065 byte Integer::GetByte(size_t n) const
3066 {
3067  if (n/WORD_SIZE >= reg.size())
3068  return 0;
3069  else
3070  return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3071 }
3072 
3073 void Integer::SetByte(size_t n, byte value)
3074 {
3075  reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3076  reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3077  reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
3078 }
3079 
3080 lword Integer::GetBits(size_t i, size_t n) const
3081 {
3082  lword v = 0;
3083  assert(n <= sizeof(v)*8);
3084  for (unsigned int j=0; j<n; j++)
3085  v |= lword(GetBit(i+j)) << j;
3086  return v;
3087 }
3088 
3089 Integer Integer::operator-() const
3090 {
3091  Integer result(*this);
3092  result.Negate();
3093  return result;
3094 }
3095 
3096 Integer Integer::AbsoluteValue() const
3097 {
3098  Integer result(*this);
3099  result.sign = POSITIVE;
3100  return result;
3101 }
3102 
3104 {
3105  reg.swap(a.reg);
3106  std::swap(sign, a.sign);
3107 }
3108 
3109 Integer::Integer(word value, size_t length)
3110  : reg(RoundupSize(length)), sign(POSITIVE)
3111 {
3112  reg[0] = value;
3113  SetWords(reg+1, 0, reg.size()-1);
3114 }
3115 
3116 template <class T>
3117 static Integer StringToInteger(const T *str, ByteOrder order = BIG_ENDIAN_ORDER)
3118 {
3119  assert( order == BIG_ENDIAN_ORDER || order == LITTLE_ENDIAN_ORDER );
3120 
3121  int radix, sign = 1;
3122  // GCC workaround
3123  // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3124  unsigned int length;
3125  for (length = 0; str[length] != 0; length++) {}
3126 
3127  Integer v;
3128 
3129  if (length == 0)
3130  return Integer::Zero();
3131 
3132  switch (str[length-1])
3133  {
3134  case 'h':
3135  case 'H':
3136  radix=16;
3137  break;
3138  case 'o':
3139  case 'O':
3140  radix=8;
3141  break;
3142  case 'b':
3143  case 'B':
3144  radix=2;
3145  break;
3146  default:
3147  radix=10;
3148  }
3149 
3150  // 'str' is of length 1 or more
3151  if (str[0] == '-')
3152  {
3153  sign = -1;
3154  str += 1, length -= 1;
3155  }
3156 
3157  if (length > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
3158  {
3159  radix = 16;
3160  str += 2, length -= 2;
3161  }
3162 
3163  if(order == BIG_ENDIAN_ORDER)
3164  {
3165  for (unsigned int i=0; i<length; i++)
3166  {
3167  int digit, ch = static_cast<int>(str[i]);
3168 
3169  if (ch >= '0' && ch <= '9')
3170  digit = ch - '0';
3171  else if (ch >= 'A' && ch <= 'F')
3172  digit = ch - 'A' + 10;
3173  else if (ch >= 'a' && ch <= 'f')
3174  digit = ch - 'a' + 10;
3175  else
3176  digit = radix;
3177 
3178  if (digit < radix)
3179  {
3180  v *= radix;
3181  v += digit;
3182  }
3183  }
3184  }
3185  else if(radix == 16 && order == LITTLE_ENDIAN_ORDER)
3186  {
3187  // Nibble high, low and count
3188  unsigned int nh = 0, nl = 0, nc = 0;
3189  Integer position(Integer::One());
3190 
3191  for (unsigned int i=0; i<length; i++)
3192  {
3193  int digit, ch = static_cast<int>(str[i]);
3194 
3195  if (ch >= '0' && ch <= '9')
3196  digit = ch - '0';
3197  else if (ch >= 'A' && ch <= 'F')
3198  digit = ch - 'A' + 10;
3199  else if (ch >= 'a' && ch <= 'f')
3200  digit = ch - 'a' + 10;
3201  else
3202  digit = radix;
3203 
3204  if (digit < radix)
3205  {
3206  if(nc++ == 0)
3207  nh = digit;
3208  else
3209  nl = digit;
3210 
3211  if(nc == 2)
3212  {
3213  v += position * (nh << 4 | nl);
3214  nc = 0, position <<= 8;
3215  }
3216  }
3217  }
3218 
3219  if(nc == 1)
3220  v += nh * position;
3221  }
3222  else // LITTLE_ENDIAN_ORDER && radix != 16
3223  {
3224  for (int i=static_cast<int>(length)-1; i>=0; i--)
3225  {
3226  int digit, ch = static_cast<int>(str[i]);
3227 
3228  if (ch >= '0' && ch <= '9')
3229  digit = ch - '0';
3230  else if (ch >= 'A' && ch <= 'F')
3231  digit = ch - 'A' + 10;
3232  else if (ch >= 'a' && ch <= 'f')
3233  digit = ch - 'a' + 10;
3234  else
3235  digit = radix;
3236 
3237  if (digit < radix)
3238  {
3239  v *= radix;
3240  v += digit;
3241  }
3242  }
3243  }
3244 
3245  if (sign == -1)
3246  v.Negate();
3247 
3248  return v;
3249 }
3250 
3251 Integer::Integer(const char *str)
3252  : reg(2), sign(POSITIVE)
3253 {
3254  *this = StringToInteger(str);
3255 }
3256 
3257 Integer::Integer(const char *str, ByteOrder order)
3258  : reg(2), sign(POSITIVE)
3259 {
3260  *this = StringToInteger(str,order);
3261 }
3262 
3263 Integer::Integer(const wchar_t *str)
3264  : reg(2), sign(POSITIVE)
3265 {
3266  *this = StringToInteger(str);
3267 }
3268 
3269 Integer::Integer(const wchar_t *str, ByteOrder order)
3270  : reg(2), sign(POSITIVE)
3271 {
3272  *this = StringToInteger(str,order);
3273 }
3274 
3275 unsigned int Integer::WordCount() const
3276 {
3277  return (unsigned int)CountWords(reg, reg.size());
3278 }
3279 
3280 unsigned int Integer::ByteCount() const
3281 {
3282  unsigned wordCount = WordCount();
3283  if (wordCount)
3284  return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3285  else
3286  return 0;
3287 }
3288 
3289 unsigned int Integer::BitCount() const
3290 {
3291  unsigned wordCount = WordCount();
3292  if (wordCount)
3293  return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3294  else
3295  return 0;
3296 }
3297 
3298 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3299 {
3300  StringStore store(input, inputLen);
3301  Decode(store, inputLen, s);
3302 }
3303 
3305 {
3306  assert(bt.MaxRetrievable() >= inputLen);
3307 
3308  byte b;
3309  bt.Peek(b);
3310  sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3311 
3312  while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3313  {
3314  bt.Skip(1);
3315  inputLen--;
3316  bt.Peek(b);
3317  }
3318 
3319  // The call to CleanNew is optimized away above -O0/-Og.
3320  const size_t size = RoundupSize(BytesToWords(inputLen));
3321  reg.CleanNew(size);
3322 
3323  assert(reg.SizeInBytes() >= inputLen);
3324  for (size_t i=inputLen; i > 0; i--)
3325  {
3326  bt.Get(b);
3327  reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
3328  }
3329 
3330  if (sign == NEGATIVE)
3331  {
3332  for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3333  reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
3334  TwosComplement(reg, reg.size());
3335  }
3336 }
3337 
3338 size_t Integer::MinEncodedSize(Signedness signedness) const
3339 {
3340  unsigned int outputLen = STDMAX(1U, ByteCount());
3341  if (signedness == UNSIGNED)
3342  return outputLen;
3343  if (NotNegative() && (GetByte(outputLen-1) & 0x80))
3344  outputLen++;
3345  if (IsNegative() && *this < -Power2(outputLen*8-1))
3346  outputLen++;
3347  return outputLen;
3348 }
3349 
3350 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3351 {
3352  assert(output && outputLen);
3353  ArraySink sink(output, outputLen);
3354  Encode(sink, outputLen, signedness);
3355 }
3356 
3357 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3358 {
3359  if (signedness == UNSIGNED || NotNegative())
3360  {
3361  for (size_t i=outputLen; i > 0; i--)
3362  bt.Put(GetByte(i-1));
3363  }
3364  else
3365  {
3366  // take two's complement of *this
3367  Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
3368  temp.Encode(bt, outputLen, UNSIGNED);
3369  }
3370 }
3371 
3373 {
3374  DERGeneralEncoder enc(bt, INTEGER);
3376  enc.MessageEnd();
3377 }
3378 
3379 void Integer::BERDecode(const byte *input, size_t len)
3380 {
3381  StringStore store(input, len);
3382  BERDecode(store);
3383 }
3384 
3386 {
3387  BERGeneralDecoder dec(bt, INTEGER);
3388  if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
3389  BERDecodeError();
3390  Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3391  dec.MessageEnd();
3392 }
3393 
3395 {
3396  DERGeneralEncoder enc(bt, OCTET_STRING);
3397  Encode(enc, length);
3398  enc.MessageEnd();
3399 }
3400 
3402 {
3403  BERGeneralDecoder dec(bt, OCTET_STRING);
3404  if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
3405  BERDecodeError();
3406  Decode(dec, length);
3407  dec.MessageEnd();
3408 }
3409 
3410 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3411 {
3412  ArraySink sink(output, len);
3413  return OpenPGPEncode(sink);
3414 }
3415 
3417 {
3418  word16 bitCount = word16(BitCount());
3419  bt.PutWord16(bitCount);
3420  size_t byteCount = BitsToBytes(bitCount);
3421  Encode(bt, byteCount);
3422  return 2 + byteCount;
3423 }
3424 
3425 void Integer::OpenPGPDecode(const byte *input, size_t len)
3426 {
3427  StringStore store(input, len);
3428  OpenPGPDecode(store);
3429 }
3430 
3432 {
3433  word16 bitCount;
3434  if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
3435  throw OpenPGPDecodeErr();
3436  Decode(bt, BitsToBytes(bitCount));
3437 }
3438 
3440 {
3441  const size_t nbytes = nbits/8 + 1;
3442  SecByteBlock buf(nbytes);
3443  rng.GenerateBlock(buf, nbytes);
3444  if (nbytes)
3445  buf[0] = (byte)Crop(buf[0], nbits % 8);
3446  Decode(buf, nbytes, UNSIGNED);
3447 }
3448 
3449 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3450 {
3451  if (min > max)
3452  throw InvalidArgument("Integer: Min must be no greater than Max");
3453 
3454  Integer range = max - min;
3455  const unsigned int nbits = range.BitCount();
3456 
3457  do
3458  {
3459  Randomize(rng, nbits);
3460  }
3461  while (*this > range);
3462 
3463  *this += min;
3464 }
3465 
3466 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3467 {
3468  return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3469 }
3470 
3472 {
3473 public:
3474  KDF2_RNG(const byte *seed, size_t seedSize)
3475  : m_counter(0), m_counterAndSeed(seedSize + 4)
3476  {
3477  memcpy(m_counterAndSeed + 4, seed, seedSize);
3478  }
3479 
3480  void GenerateBlock(byte *output, size_t size)
3481  {
3482  PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3483  ++m_counter;
3484  P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3485  }
3486 
3487 private:
3488  word32 m_counter;
3489  SecByteBlock m_counterAndSeed;
3490 };
3491 
3492 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3493 {
3494  Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3495  Integer max;
3496  if (!params.GetValue("Max", max))
3497  {
3498  int bitLength;
3499  if (params.GetIntValue("BitLength", bitLength))
3500  max = Integer::Power2(bitLength);
3501  else
3502  throw InvalidArgument("Integer: missing Max argument");
3503  }
3504  if (min > max)
3505  throw InvalidArgument("Integer: Min must be no greater than Max");
3506 
3507  Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3508  Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3509 
3510  if (equiv.IsNegative() || equiv >= mod)
3511  throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3512 
3513  Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3514 
3515  member_ptr<KDF2_RNG> kdf2Rng;
3517  if (params.GetValue(Name::Seed(), seed))
3518  {
3519  ByteQueue bq;
3520  DERSequenceEncoder seq(bq);
3521  min.DEREncode(seq);
3522  max.DEREncode(seq);
3523  equiv.DEREncode(seq);
3524  mod.DEREncode(seq);
3525  DEREncodeUnsigned(seq, rnType);
3526  DEREncodeOctetString(seq, seed.begin(), seed.size());
3527  seq.MessageEnd();
3528 
3529  SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3530  bq.Get(finalSeed, finalSeed.size());
3531  kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3532  }
3533  RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3534 
3535  switch (rnType)
3536  {
3537  case ANY:
3538  if (mod == One())
3539  Randomize(rng, min, max);
3540  else
3541  {
3542  Integer min1 = min + (equiv-min)%mod;
3543  if (max < min1)
3544  return false;
3545  Randomize(rng, Zero(), (max - min1) / mod);
3546  *this *= mod;
3547  *this += min1;
3548  }
3549  return true;
3550 
3551  case PRIME:
3552  {
3553  const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
3554 
3555  int i;
3556  i = 0;
3557  while (1)
3558  {
3559  if (++i==16)
3560  {
3561  // check if there are any suitable primes in [min, max]
3562  Integer first = min;
3563  if (FirstPrime(first, max, equiv, mod, pSelector))
3564  {
3565  // if there is only one suitable prime, we're done
3566  *this = first;
3567  if (!FirstPrime(first, max, equiv, mod, pSelector))
3568  return true;
3569  }
3570  else
3571  return false;
3572  }
3573 
3574  Randomize(rng, min, max);
3575  if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3576  return true;
3577  }
3578  }
3579 
3580  default:
3581  throw InvalidArgument("Integer: invalid RandomNumberType argument");
3582  }
3583 }
3584 
3585 std::istream& operator>>(std::istream& in, Integer &a)
3586 {
3587  char c;
3588  unsigned int length = 0;
3589  SecBlock<char> str(length + 16);
3590 
3591  std::ws(in);
3592 
3593  do
3594  {
3595  in.read(&c, 1);
3596  str[length++] = c;
3597  if (length >= str.size())
3598  str.Grow(length + 16);
3599  }
3600  while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
3601 
3602  if (in.gcount())
3603  in.putback(c);
3604  str[length-1] = '\0';
3605  a = Integer(str);
3606 
3607  return in;
3608 }
3609 
3610 std::ostream& operator<<(std::ostream& out, const Integer &a)
3611 {
3612  // Get relevant conversion specifications from ostream.
3613  const long f = out.flags() & std::ios::basefield; // Get base digits.
3614  int base, block;
3615  char suffix;
3616  switch(f)
3617  {
3618  case std::ios::oct :
3619  base = 8;
3620  block = 8;
3621  suffix = 'o';
3622  break;
3623  case std::ios::hex :
3624  base = 16;
3625  block = 4;
3626  suffix = 'h';
3627  break;
3628  default :
3629  base = 10;
3630  block = 3;
3631  suffix = '.';
3632  }
3633 
3634  Integer temp1=a, temp2;
3635 
3636  if (a.IsNegative())
3637  {
3638  out << '-';
3639  temp1.Negate();
3640  }
3641 
3642  if (!a)
3643  out << '0';
3644 
3645  static const char upper[]="0123456789ABCDEF";
3646  static const char lower[]="0123456789abcdef";
3647 
3648  const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3649  unsigned int i=0;
3650  SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3651 
3652  while (!!temp1)
3653  {
3654  word digit;
3655  Integer::Divide(digit, temp2, temp1, base);
3656  s[i++]=vec[digit];
3657  temp1.swap(temp2);
3658  }
3659 
3660  while (i--)
3661  {
3662  out << s[i];
3663 // if (i && !(i%block))
3664 // out << ",";
3665  }
3666 
3667 #ifdef CRYPTOPP_USE_STD_SHOWBASE
3668  if(out.flags() & std::ios_base::showbase)
3669  out << suffix;
3670 
3671  return out;
3672 #else
3673  return out << suffix;
3674 #endif
3675 }
3676 
3677 Integer& Integer::operator++()
3678 {
3679  if (NotNegative())
3680  {
3681  if (Increment(reg, reg.size()))
3682  {
3683  reg.CleanGrow(2*reg.size());
3684  reg[reg.size()/2]=1;
3685  }
3686  }
3687  else
3688  {
3689  word borrow = Decrement(reg, reg.size());
3690  assert(!borrow);
3691  CRYPTOPP_UNUSED(borrow);
3692 
3693  if (WordCount()==0)
3694  *this = Zero();
3695  }
3696  return *this;
3697 }
3698 
3699 Integer& Integer::operator--()
3700 {
3701  if (IsNegative())
3702  {
3703  if (Increment(reg, reg.size()))
3704  {
3705  reg.CleanGrow(2*reg.size());
3706  reg[reg.size()/2]=1;
3707  }
3708  }
3709  else
3710  {
3711  if (Decrement(reg, reg.size()))
3712  *this = -One();
3713  }
3714  return *this;
3715 }
3716 
3717 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3718 {
3719  int carry;
3720  if (a.reg.size() == b.reg.size())
3721  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3722  else if (a.reg.size() > b.reg.size())
3723  {
3724  carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3725  CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3726  carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3727  }
3728  else
3729  {
3730  carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3731  CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3732  carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3733  }
3734 
3735  if (carry)
3736  {
3737  sum.reg.CleanGrow(2*sum.reg.size());
3738  sum.reg[sum.reg.size()/2] = 1;
3739  }
3740  sum.sign = Integer::POSITIVE;
3741 }
3742 
3743 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3744 {
3745  unsigned aSize = a.WordCount();
3746  aSize += aSize%2;
3747  unsigned bSize = b.WordCount();
3748  bSize += bSize%2;
3749 
3750  if (aSize == bSize)
3751  {
3752  if (Compare(a.reg, b.reg, aSize) >= 0)
3753  {
3754  Subtract(diff.reg, a.reg, b.reg, aSize);
3755  diff.sign = Integer::POSITIVE;
3756  }
3757  else
3758  {
3759  Subtract(diff.reg, b.reg, a.reg, aSize);
3760  diff.sign = Integer::NEGATIVE;
3761  }
3762  }
3763  else if (aSize > bSize)
3764  {
3765  word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3766  CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3767  borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3768  assert(!borrow);
3769  diff.sign = Integer::POSITIVE;
3770  }
3771  else
3772  {
3773  word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3774  CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3775  borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3776  assert(!borrow);
3777  diff.sign = Integer::NEGATIVE;
3778  }
3779 }
3780 
3781 // MSVC .NET 2003 workaround
3782 template <class T> inline const T& STDMAX2(const T& a, const T& b)
3783 {
3784  return a < b ? b : a;
3785 }
3786 
3787 Integer Integer::Plus(const Integer& b) const
3788 {
3789  Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3790  if (NotNegative())
3791  {
3792  if (b.NotNegative())
3793  PositiveAdd(sum, *this, b);
3794  else
3795  PositiveSubtract(sum, *this, b);
3796  }
3797  else
3798  {
3799  if (b.NotNegative())
3800  PositiveSubtract(sum, b, *this);
3801  else
3802  {
3803  PositiveAdd(sum, *this, b);
3804  sum.sign = Integer::NEGATIVE;
3805  }
3806  }
3807  return sum;
3808 }
3809 
3810 Integer& Integer::operator+=(const Integer& t)
3811 {
3812  reg.CleanGrow(t.reg.size());
3813  if (NotNegative())
3814  {
3815  if (t.NotNegative())
3816  PositiveAdd(*this, *this, t);
3817  else
3818  PositiveSubtract(*this, *this, t);
3819  }
3820  else
3821  {
3822  if (t.NotNegative())
3823  PositiveSubtract(*this, t, *this);
3824  else
3825  {
3826  PositiveAdd(*this, *this, t);
3827  sign = Integer::NEGATIVE;
3828  }
3829  }
3830  return *this;
3831 }
3832 
3833 Integer Integer::Minus(const Integer& b) const
3834 {
3835  Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3836  if (NotNegative())
3837  {
3838  if (b.NotNegative())
3839  PositiveSubtract(diff, *this, b);
3840  else
3841  PositiveAdd(diff, *this, b);
3842  }
3843  else
3844  {
3845  if (b.NotNegative())
3846  {
3847  PositiveAdd(diff, *this, b);
3848  diff.sign = Integer::NEGATIVE;
3849  }
3850  else
3851  PositiveSubtract(diff, b, *this);
3852  }
3853  return diff;
3854 }
3855 
3856 Integer& Integer::operator-=(const Integer& t)
3857 {
3858  reg.CleanGrow(t.reg.size());
3859  if (NotNegative())
3860  {
3861  if (t.NotNegative())
3862  PositiveSubtract(*this, *this, t);
3863  else
3864  PositiveAdd(*this, *this, t);
3865  }
3866  else
3867  {
3868  if (t.NotNegative())
3869  {
3870  PositiveAdd(*this, *this, t);
3871  sign = Integer::NEGATIVE;
3872  }
3873  else
3874  PositiveSubtract(*this, t, *this);
3875  }
3876  return *this;
3877 }
3878 
3879 Integer& Integer::operator<<=(size_t n)
3880 {
3881  const size_t wordCount = WordCount();
3882  const size_t shiftWords = n / WORD_BITS;
3883  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3884 
3885  reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3886  ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3887  ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3888  return *this;
3889 }
3890 
3891 Integer& Integer::operator>>=(size_t n)
3892 {
3893  const size_t wordCount = WordCount();
3894  const size_t shiftWords = n / WORD_BITS;
3895  const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3896 
3897  ShiftWordsRightByWords(reg, wordCount, shiftWords);
3898  if (wordCount > shiftWords)
3899  ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3900  if (IsNegative() && WordCount()==0) // avoid -0
3901  *this = Zero();
3902  return *this;
3903 }
3904 
3905 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3906 {
3907  size_t aSize = RoundupSize(a.WordCount());
3908  size_t bSize = RoundupSize(b.WordCount());
3909 
3910  product.reg.CleanNew(RoundupSize(aSize+bSize));
3911  product.sign = Integer::POSITIVE;
3912 
3913  IntegerSecBlock workspace(aSize + bSize);
3914  AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3915 }
3916 
3917 void Multiply(Integer &product, const Integer &a, const Integer &b)
3918 {
3919  PositiveMultiply(product, a, b);
3920 
3921  if (a.NotNegative() != b.NotNegative())
3922  product.Negate();
3923 }
3924 
3926 {
3927  Integer product;
3928  Multiply(product, *this, b);
3929  return product;
3930 }
3931 
3932 /*
3933 void PositiveDivide(Integer &remainder, Integer &quotient,
3934  const Integer &dividend, const Integer &divisor)
3935 {
3936  remainder.reg.CleanNew(divisor.reg.size());
3937  remainder.sign = Integer::POSITIVE;
3938  quotient.reg.New(0);
3939  quotient.sign = Integer::POSITIVE;
3940  unsigned i=dividend.BitCount();
3941  while (i--)
3942  {
3943  word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3944  remainder.reg[0] |= dividend[i];
3945  if (overflow || remainder >= divisor)
3946  {
3947  Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3948  quotient.SetBit(i);
3949  }
3950  }
3951 }
3952 */
3953 
3954 void PositiveDivide(Integer &remainder, Integer &quotient,
3955  const Integer &a, const Integer &b)
3956 {
3957  unsigned aSize = a.WordCount();
3958  unsigned bSize = b.WordCount();
3959 
3960  if (!bSize)
3961  throw Integer::DivideByZero();
3962 
3963  if (aSize < bSize)
3964  {
3965  remainder = a;
3966  remainder.sign = Integer::POSITIVE;
3967  quotient = Integer::Zero();
3968  return;
3969  }
3970 
3971  aSize += aSize%2; // round up to next even number
3972  bSize += bSize%2;
3973 
3974  remainder.reg.CleanNew(RoundupSize(bSize));
3975  remainder.sign = Integer::POSITIVE;
3976  quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
3977  quotient.sign = Integer::POSITIVE;
3978 
3979  IntegerSecBlock T(aSize+3*(bSize+2));
3980  Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
3981 }
3982 
3983 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
3984 {
3985  PositiveDivide(remainder, quotient, dividend, divisor);
3986 
3987  if (dividend.IsNegative())
3988  {
3989  quotient.Negate();
3990  if (remainder.NotZero())
3991  {
3992  --quotient;
3993  remainder = divisor.AbsoluteValue() - remainder;
3994  }
3995  }
3996 
3997  if (divisor.IsNegative())
3998  quotient.Negate();
3999 }
4000 
4001 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4002 {
4003  q = a;
4004  q >>= n;
4005 
4006  const size_t wordCount = BitsToWords(n);
4007  if (wordCount <= a.WordCount())
4008  {
4009  r.reg.resize(RoundupSize(wordCount));
4010  CopyWords(r.reg, a.reg, wordCount);
4011  SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4012  if (n % WORD_BITS != 0)
4013  r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4014  }
4015  else
4016  {
4017  r.reg.resize(RoundupSize(a.WordCount()));
4018  CopyWords(r.reg, a.reg, r.reg.size());
4019  }
4020  r.sign = POSITIVE;
4021 
4022  if (a.IsNegative() && r.NotZero())
4023  {
4024  --q;
4025  r = Power2(n) - r;
4026  }
4027 }
4028 
4029 Integer Integer::DividedBy(const Integer &b) const
4030 {
4031  Integer remainder, quotient;
4032  Integer::Divide(remainder, quotient, *this, b);
4033  return quotient;
4034 }
4035 
4037 {
4038  Integer remainder, quotient;
4039  Integer::Divide(remainder, quotient, *this, b);
4040  return remainder;
4041 }
4042 
4043 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4044 {
4045  if (!divisor)
4046  throw Integer::DivideByZero();
4047 
4048  assert(divisor);
4049 
4050  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4051  {
4052  quotient = dividend >> (BitPrecision(divisor)-1);
4053  remainder = dividend.reg[0] & (divisor-1);
4054  return;
4055  }
4056 
4057  unsigned int i = dividend.WordCount();
4058  quotient.reg.CleanNew(RoundupSize(i));
4059  remainder = 0;
4060  while (i--)
4061  {
4062  quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4063  remainder = DWord(dividend.reg[i], remainder) % divisor;
4064  }
4065 
4066  if (dividend.NotNegative())
4067  quotient.sign = POSITIVE;
4068  else
4069  {
4070  quotient.sign = NEGATIVE;
4071  if (remainder)
4072  {
4073  --quotient;
4074  remainder = divisor - remainder;
4075  }
4076  }
4077 }
4078 
4079 Integer Integer::DividedBy(word b) const
4080 {
4081  word remainder;
4082  Integer quotient;
4083  Integer::Divide(remainder, quotient, *this, b);
4084  return quotient;
4085 }
4086 
4087 word Integer::Modulo(word divisor) const
4088 {
4089  if (!divisor)
4090  throw Integer::DivideByZero();
4091 
4092  assert(divisor);
4093 
4094  word remainder;
4095 
4096  if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4097  remainder = reg[0] & (divisor-1);
4098  else
4099  {
4100  unsigned int i = WordCount();
4101 
4102  if (divisor <= 5)
4103  {
4104  DWord sum(0, 0);
4105  while (i--)
4106  sum += reg[i];
4107  remainder = sum % divisor;
4108  }
4109  else
4110  {
4111  remainder = 0;
4112  while (i--)
4113  remainder = DWord(reg[i], remainder) % divisor;
4114  }
4115  }
4116 
4117  if (IsNegative() && remainder)
4118  remainder = divisor - remainder;
4119 
4120  return remainder;
4121 }
4122 
4124 {
4125  if (!!(*this)) // don't flip sign if *this==0
4126  sign = Sign(1-sign);
4127 }
4128 
4129 int Integer::PositiveCompare(const Integer& t) const
4130 {
4131  unsigned size = WordCount(), tSize = t.WordCount();
4132 
4133  if (size == tSize)
4134  return CryptoPP::Compare(reg, t.reg, size);
4135  else
4136  return size > tSize ? 1 : -1;
4137 }
4138 
4139 int Integer::Compare(const Integer& t) const
4140 {
4141  if (NotNegative())
4142  {
4143  if (t.NotNegative())
4144  return PositiveCompare(t);
4145  else
4146  return 1;
4147  }
4148  else
4149  {
4150  if (t.NotNegative())
4151  return -1;
4152  else
4153  return -PositiveCompare(t);
4154  }
4155 }
4156 
4158 {
4159  if (!IsPositive())
4160  return Zero();
4161 
4162  // overestimate square root
4163  Integer x, y = Power2((BitCount()+1)/2);
4164  assert(y*y >= *this);
4165 
4166  do
4167  {
4168  x = y;
4169  y = (x + *this/x) >> 1;
4170  } while (y<x);
4171 
4172  return x;
4173 }
4174 
4175 bool Integer::IsSquare() const
4176 {
4177  Integer r = SquareRoot();
4178  return *this == r.Squared();
4179 }
4180 
4181 bool Integer::IsUnit() const
4182 {
4183  return (WordCount() == 1) && (reg[0] == 1);
4184 }
4185 
4187 {
4188  return IsUnit() ? *this : Zero();
4189 }
4190 
4191 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4192 {
4193  return x*y%m;
4194 }
4195 
4196 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4197 {
4198  ModularArithmetic mr(m);
4199  return mr.Exponentiate(x, e);
4200 }
4201 
4203 {
4204  return EuclideanDomainOf<Integer>().Gcd(a, b);
4205 }
4206 
4208 {
4209  assert(m.NotNegative());
4210 
4211  if (IsNegative())
4212  return Modulo(m).InverseMod(m);
4213 
4214  if (m.IsEven())
4215  {
4216  if (!m || IsEven())
4217  return Zero(); // no inverse
4218  if (*this == One())
4219  return One();
4220 
4221  Integer u = m.Modulo(*this).InverseMod(*this);
4222  return !u ? Zero() : (m*(*this-u)+1)/(*this);
4223  }
4224 
4225  SecBlock<word> T(m.reg.size() * 4);
4226  Integer r((word)0, m.reg.size());
4227  unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4228  DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4229  return r;
4230 }
4231 
4232 word Integer::InverseMod(word mod) const
4233 {
4234  word g0 = mod, g1 = *this % mod;
4235  word v0 = 0, v1 = 1;
4236  word y;
4237 
4238  while (g1)
4239  {
4240  if (g1 == 1)
4241  return v1;
4242  y = g0 / g1;
4243  g0 = g0 % g1;
4244  v0 += y * v1;
4245 
4246  if (!g0)
4247  break;
4248  if (g0 == 1)
4249  return mod-v0;
4250  y = g1 / g0;
4251  g1 = g1 % g0;
4252  v1 += y * v0;
4253  }
4254  return 0;
4255 }
4256 
4257 // ********************************************************
4258 
4260 {
4261  BERSequenceDecoder seq(bt);
4262  OID oid(seq);
4263  if (oid != ASN1::prime_field())
4264  BERDecodeError();
4265  m_modulus.BERDecode(seq);
4266  seq.MessageEnd();
4267  m_result.reg.resize(m_modulus.reg.size());
4268 }
4269 
4271 {
4272  DERSequenceEncoder seq(bt);
4273  ASN1::prime_field().DEREncode(seq);
4274  m_modulus.DEREncode(seq);
4275  seq.MessageEnd();
4276 }
4277 
4279 {
4280  a.DEREncodeAsOctetString(out, MaxElementByteLength());
4281 }
4282 
4284 {
4285  a.BERDecodeAsOctetString(in, MaxElementByteLength());
4286 }
4287 
4289 {
4290  if (a.reg.size()==m_modulus.reg.size())
4291  {
4292  CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4293  return m_result;
4294  }
4295  else
4296  return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4297 }
4298 
4299 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4300 {
4301  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4302  {
4303  if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4304  || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4305  {
4306  CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4307  }
4308  return m_result;
4309  }
4310  else
4311  {
4312  m_result1 = a+b;
4313  if (m_result1 >= m_modulus)
4314  m_result1 -= m_modulus;
4315  return m_result1;
4316  }
4317 }
4318 
4320 {
4321  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4322  {
4323  if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4324  || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4325  {
4326  CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4327  }
4328  }
4329  else
4330  {
4331  a+=b;
4332  if (a>=m_modulus)
4333  a-=m_modulus;
4334  }
4335 
4336  return a;
4337 }
4338 
4339 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4340 {
4341  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4342  {
4343  if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4344  CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4345  return m_result;
4346  }
4347  else
4348  {
4349  m_result1 = a-b;
4350  if (m_result1.IsNegative())
4351  m_result1 += m_modulus;
4352  return m_result1;
4353  }
4354 }
4355 
4357 {
4358  if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4359  {
4360  if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4361  CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4362  }
4363  else
4364  {
4365  a-=b;
4366  if (a.IsNegative())
4367  a+=m_modulus;
4368  }
4369 
4370  return a;
4371 }
4372 
4374 {
4375  if (!a)
4376  return a;
4377 
4378  CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4379  if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4380  Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4381 
4382  return m_result;
4383 }
4384 
4385 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4386 {
4387  if (m_modulus.IsOdd())
4388  {
4389  MontgomeryRepresentation dr(m_modulus);
4390  return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4391  }
4392  else
4393  return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4394 }
4395 
4396 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
4397 {
4398  if (m_modulus.IsOdd())
4399  {
4400  MontgomeryRepresentation dr(m_modulus);
4401  dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4402  for (unsigned int i=0; i<exponentsCount; i++)
4403  results[i] = dr.ConvertOut(results[i]);
4404  }
4405  else
4406  AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4407 }
4408 
4410  : ModularArithmetic(m),
4411  m_u((word)0, m_modulus.reg.size()),
4412  m_workspace(5*m_modulus.reg.size())
4413 {
4414  if (!m_modulus.IsOdd())
4415  throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4416 
4417  RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4418 }
4419 
4421 {
4422  word *const T = m_workspace.begin();
4423  word *const R = m_result.reg.begin();
4424  const size_t N = m_modulus.reg.size();
4425  assert(a.reg.size()<=N && b.reg.size()<=N);
4426 
4427  AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4428  SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4429  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4430  return m_result;
4431 }
4432 
4434 {
4435  word *const T = m_workspace.begin();
4436  word *const R = m_result.reg.begin();
4437  const size_t N = m_modulus.reg.size();
4438  assert(a.reg.size()<=N);
4439 
4440  CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4441  SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
4442  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4443  return m_result;
4444 }
4445 
4447 {
4448  word *const T = m_workspace.begin();
4449  word *const R = m_result.reg.begin();
4450  const size_t N = m_modulus.reg.size();
4451  assert(a.reg.size()<=N);
4452 
4453  CopyWords(T, a.reg, a.reg.size());
4454  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4455  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4456  return m_result;
4457 }
4458 
4460 {
4461 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
4462  word *const T = m_workspace.begin();
4463  word *const R = m_result.reg.begin();
4464  const size_t N = m_modulus.reg.size();
4465  assert(a.reg.size()<=N);
4466 
4467  CopyWords(T, a.reg, a.reg.size());
4468  SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4469  MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4470  unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4471 
4472 // cout << "k=" << k << " N*32=" << 32*N << endl;
4473 
4474  if (k>N*WORD_BITS)
4475  DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4476  else
4477  MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4478 
4479  return m_result;
4480 }
4481 
4482 // Specialization declared in misc.h to allow us to print integers
4483 // with additional control options, like arbirary bases and uppercase.
4484 template <> CRYPTOPP_DLL
4485 std::string IntToString<Integer>(Integer value, unsigned int base)
4486 {
4487  // Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4488  static const unsigned int BIT_32 = (1U << 31);
4489  const bool UPPER = !!(base & BIT_32);
4490  static const unsigned int BIT_31 = (1U << 30);
4491  const bool BASE = !!(base & BIT_31);
4492 
4493  const char CH = UPPER ? 'A' : 'a';
4494  base &= ~(BIT_32|BIT_31);
4495  assert(base >= 2 && base <= 32);
4496 
4497  if (value == 0)
4498  return "0";
4499 
4500  bool negative = false, zero = false;
4501  if (value.IsNegative())
4502  {
4503  negative = true;
4504  value.Negate();
4505  }
4506 
4507  if (!value)
4508  zero = true;
4509 
4510  SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4511  Integer temp;
4512 
4513  unsigned int i=0;
4514  while (!!value)
4515  {
4516  word digit;
4517  Integer::Divide(digit, temp, value, word(base));
4518  s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4519  value.swap(temp);
4520  }
4521 
4522  std::string result;
4523  result.reserve(i+2);
4524 
4525  if (negative)
4526  result += '-';
4527 
4528  if (zero)
4529  result += '0';
4530 
4531  while (i--)
4532  result += s[i];
4533 
4534  if (BASE)
4535  {
4536  if (base == 10)
4537  result += '.';
4538  else if (base == 16)
4539  result += 'h';
4540  else if (base == 8)
4541  result += 'o';
4542  else if (base == 2)
4543  result += 'b';
4544  }
4545 
4546  return result;
4547 }
4548 
4549 // Specialization declared in misc.h to avoid Coverity findings.
4550 template <> CRYPTOPP_DLL
4551 std::string IntToString<word64>(word64 value, unsigned int base)
4552 {
4553  // Hack... set the high bit for uppercase.
4554  static const unsigned int HIGH_BIT = (1U << 31);
4555  const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4556  base &= ~HIGH_BIT;
4557 
4558  assert(base >= 2);
4559  if (value == 0)
4560  return "0";
4561 
4562  std::string result;
4563  while (value > 0)
4564  {
4565  word64 digit = value % base;
4566  result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4567  value /= base;
4568  }
4569  return result;
4570 }
4571 
4572 NAMESPACE_END
4573 
4574 #endif
Used to pass byte array input as part of a NameValuePairs object.
Definition: algparam.h:29
An invalid argument was detected.
Definition: cryptlib.h:182
void DEREncode(BufferedTransformation &bt) const
Encodes in DER format.
Definition: integer.cpp:4270
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: integer.cpp:4385
Classes for working with NameValuePairs.
const Integer & Square(const Integer &a) const
Square an element in the ring.
Definition: integer.cpp:4433
const Integer & MultiplicativeInverse(const Integer &a) const
Calculate the multiplicative inverse of an element in the ring.
Definition: integer.cpp:4459
a number which is probabilistically prime
Definition: integer.h:91
Utility functions for the Crypto++ library.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:123
Restricts the instantiation of a class to one static object without locks.
Definition: misc.h:264
bool NotZero() const
Determines if the Integer is non-0.
Definition: integer.h:333
bool GetBit(size_t i) const
Provides the i-th bit of the Integer.
Definition: integer.cpp:3043
void CleanNew(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:660
void Encode(byte *output, size_t outputLen, Signedness sign=UNSIGNED) const
Encode in big-endian format.
Definition: integer.cpp:3350
an unsigned value
Definition: integer.h:81
Integer & Reduce(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4356
virtual size_t Peek(byte &outByte) const
Peek a 8-bit byte.
Definition: cryptlib.cpp:538
T GetValueWithDefault(const char *name, T defaultValue) const
Get a named value.
Definition: cryptlib.h:348
size_t DEREncodeUnsigned(BufferedTransformation &out, T w, byte asnTag=INTEGER)
DER Encode unsigned value.
Definition: asn.h:443
virtual void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: cryptlib.cpp:330
size_t BitsToBytes(size_t bitCount)
Returns the number of 8-bit bytes or octets required for the specified number of bits.
Definition: misc.h:740
This file contains helper classes/functions for implementing public key algorithms.
bool FirstPrime(Integer &p, const Integer &max, const Integer &equiv, const Integer &mod, const PrimeSelector *pSelector)
Finds a random prime of special form.
Definition: nbtheory.cpp:381
static Integer Gcd(const Integer &a, const Integer &n)
greatest common divisor
Definition: integer.cpp:4202
void resize(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:705
size_t BitsToWords(size_t bitCount)
Returns the number of words required for the specified number of bits.
Definition: misc.h:760
const Integer & Subtract(const Integer &a, const Integer &b) const
Subtracts elements in the ring.
Definition: integer.cpp:4339
unsigned int BytePrecision(const T &value)
Returns the number of 8-bit bytes or octets required for a value.
Definition: misc.h:623
void CleanGrow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:689
bool IsEven() const
Determines if the Integer is even parity.
Definition: integer.h:348
Secure memory block with allocator and cleanup.
Definition: secblock.h:437
unsigned int WordCount() const
Determines the number of words required to represent the Integer.
Definition: integer.cpp:3275
void OpenPGPDecode(const byte *input, size_t inputLen)
Decode from OpenPGP format.
Definition: integer.cpp:3425
Signedness
Used when importing and exporting integers.
Definition: integer.h:79
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:524
ASN.1 object identifiers for algorthms and schemes.
Square block cipher.
Definition: square.h:24
Classes for automatic resource management.
size_t size() const
Length of the memory block.
Definition: algparam.h:93
bool IsP4()
Determines if the CPU is an Intel P4.
Definition: cpu.h:291
byte GetByte(size_t i) const
Provides the i-th byte of the Integer.
Definition: integer.cpp:3065
Library configuration file.
MontgomeryRepresentation(const Integer &modulus)
Construct a IsMontgomeryRepresentation.
Definition: integer.cpp:4409
static void DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
returns same result as Divide(r, q, a, Power2(n)), but faster
Definition: integer.cpp:4001
Ring of congruence classes modulo n.
Definition: modarith.h:34
STL namespace.
Interface for random number generators.
Definition: cryptlib.h:1186
size_t BytesToWords(size_t byteCount)
Returns the number of words required for the specified number of bytes.
Definition: misc.h:750
void Randomize(RandomNumberGenerator &rng, size_t bitCount)
Set this Integer to random integer.
Definition: integer.cpp:3439
std::string IntToString< word64 >(word64 value, unsigned int base)
Converts an unsigned value to a string.
Definition: integer.cpp:4551
size_t MinEncodedSize(Signedness sign=UNSIGNED) const
The minimum number of bytes to encode this integer.
Definition: integer.cpp:3338
void SetByte(size_t n, byte value)
Set the n-th byte to value.
Definition: integer.cpp:3073
the value is negative
Definition: integer.h:73
SecBlock<byte> typedef.
Definition: secblock.h:731
virtual void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the Ring.
Definition: algebra.cpp:334
BER Sequence Decoder.
Definition: asn.h:295
Interface for buffered transformations.
Definition: cryptlib.h:1352
void DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
encode absolute value as big-endian octet string
Definition: integer.cpp:3394
lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: queue.h:35
const byte * begin() const
Pointer to the first byte in the memory block.
Definition: algparam.h:89
bool IsConvertableToLong() const
Determines if the Integer is convertable to Long.
Definition: integer.cpp:2909
Integer MultiplicativeInverse() const
return inverse if 1 or -1, otherwise return 0
Definition: integer.cpp:4186
static const Integer & One()
Integer representing 1.
Definition: integer.cpp:3016
bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:371
const Integer & Add(const Integer &a, const Integer &b) const
Adds elements in the ring.
Definition: integer.cpp:4299
byte order is little-endian
Definition: cryptlib.h:125
Sign
Used internally to represent the integer.
Definition: integer.h:69
Pointer that overloads operator ->
Definition: smartptr.h:39
bool IsSquare() const
return whether this integer is a perfect square
Definition: integer.cpp:4175
size_t OpenPGPEncode(byte *output, size_t bufferSize) const
Encode absolute value in OpenPGP format.
Definition: integer.cpp:3410
Classes and functions for secure memory allocations.
unsigned int BitCount() const
Determines the number of bits required to represent the Integer.
Definition: integer.cpp:3289
void BERDecodeElement(BufferedTransformation &in, Element &a) const
Decodes element in DER format.
Definition: integer.cpp:4283
bool IsUnit() const
is 1 or -1
Definition: integer.cpp:4181
Copy input to a memory buffer.
Definition: filters.h:1016
Integer SquareRoot() const
extract square root, if negative return 0, else return floor of square root
Definition: integer.cpp:4157
bool IsPositive() const
Determines if the Integer is positive.
Definition: integer.h:342
bool GetValue(const char *name, T &value) const
Get a named value.
Definition: cryptlib.h:335
a number with no special properties
Definition: integer.h:89
Integer Squared() const
Definition: integer.h:500
size_t Put(byte inByte, bool blocking=true)
Input a byte for processing.
Definition: cryptlib.h:1378
bool IsNegative() const
Determines if the Integer is negative.
Definition: integer.h:336
AlgorithmParameters MakeParameters(const char *name, const T &value, bool throwIfNotUsed=true)
Create an object that implements NameValuePairs.
Definition: algparam.h:554
void swap(Integer &a)
Swaps this Integer with another Integer.
Definition: integer.cpp:3103
Integer()
Creates the zero integer.
Definition: integer.cpp:2869
unsigned int TrailingZeros(word32 v)
Determines the number of trailing 0-bits in a value.
Definition: misc.h:670
signed long ConvertToLong() const
Convert the Integer to Long.
Definition: integer.cpp:2923
Exception thrown when an error is encountered decoding an OpenPGP integer.
Definition: integer.h:282
void Negate()
Reverse the Sign of the Integer.
Definition: integer.cpp:4123
T Crop(T value, size_t bits)
Truncates the value to the specified number of bits.
Definition: misc.h:728
Integer Times(const Integer &b) const
Definition: integer.cpp:3925
void BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
Decode nonnegative value from big-endian octet string.
Definition: integer.cpp:3401
Application callback to signal suitability of a cabdidate prime.
Definition: nbtheory.h:80
void ConditionalSwapPointers(bool c, T &a, T &b)
Performs a branchless swap of pointers a and b if condition c is true.
Definition: misc.h:1054
static Integer Power2(size_t e)
Exponentiates to a power of 2.
Definition: integer.cpp:2995
Multiple precision integer with arithmetic operations.
Definition: integer.h:45
a signed value
Definition: integer.h:83
static const Integer & Two()
Integer representing 2.
Definition: integer.cpp:3021
Integer ConvertOut(const Integer &a) const
Reduces an element in the congruence class.
Definition: integer.cpp:4446
virtual Element CascadeExponentiate(const Element &x, const Integer &e1, const Element &y, const Integer &e2) const
TODO.
Definition: algebra.cpp:323
virtual lword Skip(lword skipMax=LWORD_MAX)
Discard skipMax bytes from the output buffer.
Definition: cryptlib.cpp:557
RandomNumberType
Properties of a random integer.
Definition: integer.h:87
const Integer & Inverse(const Integer &a) const
Inverts the element in the ring.
Definition: integer.cpp:4373
const char * Seed()
ConstByteArrayParameter.
Definition: argnames.h:19
byte order is big-endian
Definition: cryptlib.h:127
Safely right shift values when undefined behavior could occur.
const T & STDMIN(const T &a, const T &b)
Replacement function for std::min.
Definition: misc.h:467
String-based implementation of Store interface.
Definition: filters.h:1066
static void Divide(Integer &r, Integer &q, const Integer &a, const Integer &d)
calculate r and q such that (a == d*q + r) && (0 <= r < abs(d))
Definition: integer.cpp:3983
ModularArithmetic(const Integer &modulus=Integer::One())
Construct a ModularArithmetic.
Definition: modarith.h:47
void BERDecodeError()
Raises a BERDecodeErr.
Definition: asn.h:62
Data structure used to store byte strings.
Definition: queue.h:20
Functions for CPU features and intrinsics.
Classes and functions for working with ANS.1 objects.
Classes for SHA-1 and SHA-2 family of message digests.
void SetBit(size_t n, bool value=1)
Set the n-th bit to value.
Definition: integer.cpp:3051
size_t GetWord16(word16 &value, ByteOrder order=BIG_ENDIAN_ORDER)
Retrieve a 16-bit word.
Definition: cryptlib.cpp:754
iterator begin()
Provides an iterator pointing to the first element in the memory block.
Definition: secblock.h:499
Integer & Accumulate(Integer &a, const Integer &b) const
TODO.
Definition: integer.cpp:4319
const char * PointerToPrimeSelector()
const PrimeSelector *
Definition: argnames.h:41
Implementation of BufferedTransformation&#39;s attachment interface.
Classes and functions for number theoretic operations.
const Integer & Half(const Integer &a) const
TODO.
Definition: integer.cpp:4288
void DEREncode(BufferedTransformation &bt) const
Encode in DER format.
Definition: integer.cpp:3372
Exception thrown when division by 0 is encountered.
Definition: integer.h:51
DER Sequence Encoder.
Definition: asn.h:305
T1 SaturatingSubtract1(const T1 &a, const T2 &b)
Performs a saturating subtract clamped at 1.
Definition: misc.h:976
Exception thrown when a random number cannot be found that satisfies the condition.
Definition: integer.h:59
Performs modular arithmetic in Montgomery representation for increased speed.
Definition: modarith.h:274
DER General Encoder.
Definition: asn.h:272
bool HasSSE2()
Determines SSE2 availability.
Definition: cpu.h:236
Integer CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
TODO.
Definition: modarith.h:308
const Integer & Multiply(const Integer &a, const Integer &b) const
Multiplies elements in the ring.
Definition: integer.cpp:4420
void GenerateBlock(byte *output, size_t size)
Generate random array of bytes.
Definition: integer.cpp:3480
Integer InverseMod(const Integer &n) const
calculate multiplicative inverse of *this mod n
Definition: integer.cpp:4207
void Decode(const byte *input, size_t inputLen, Signedness sign=UNSIGNED)
Decode from big-endian byte array.
Definition: integer.cpp:3298
size_t PutWord16(word16 value, ByteOrder order=BIG_ENDIAN_ORDER, bool blocking=true)
Input a 16-bit word for processing.
Definition: cryptlib.cpp:718
size_t DEREncodeOctetString(BufferedTransformation &bt, const byte *str, size_t strLen)
DER encode octet string.
Definition: asn.cpp:104
virtual lword MaxRetrievable() const
Provides the number of bytes ready for retrieval.
Definition: cryptlib.cpp:500
Multiple precision integer with arithmetic operations.
void DEREncodeElement(BufferedTransformation &out, const Element &a) const
Encodes element in DER format.
Definition: integer.cpp:4278
Safely left shift values when undefined behavior could occur.
static const Integer & Zero()
Integer representing 0.
Definition: integer.cpp:3011
const T & STDMAX(const T &a, const T &b)
Replacement function for std::max.
Definition: misc.h:477
Euclidean domain.
Definition: algebra.h:315
void Grow(size_type newSize)
Change size and preserve contents.
Definition: secblock.h:673
void BERDecode(const byte *input, size_t inputLen)
Decode from BER format.
Definition: integer.cpp:3379
std::string IntToString< Integer >(Integer value, unsigned int base)
Converts an Integer to a string.
Definition: integer.cpp:4485
lword GetBits(size_t i, size_t n) const
Provides the low order bits of the Integer.
Definition: integer.cpp:3080
virtual size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: cryptlib.cpp:519
Class file for performing modular arithmetic.
Crypto++ library namespace.
size_type SizeInBytes() const
Provides the number of bytes in the SecBlock.
Definition: secblock.h:538
BER General Decoder.
Definition: asn.h:235
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: integer.cpp:4396
int Compare(const Integer &a) const
Perform signed comparison.
Definition: integer.cpp:4139
void SimultaneousExponentiate(Element *results, const Element &base, const Integer *exponents, unsigned int exponentsCount) const
Exponentiates a base to multiple exponents in the ring.
Definition: modarith.h:311
Object Identifier.
Definition: asn.h:159
Integer Modulo(const Integer &b) const
Definition: integer.cpp:4036
size_t Get(byte &outByte)
Retrieve a 8-bit byte.
Definition: queue.cpp:300
unsigned int BitPrecision(const T &value)
Returns the number of bits required for a value.
Definition: misc.h:645
unsigned int ByteCount() const
Determines the number of bytes required to represent the Integer.
Definition: integer.cpp:3280
virtual Element Exponentiate(const Element &a, const Integer &e) const
Raises a base to an exponent in the group.
Definition: algebra.cpp:316
Integer ConvertIn(const Integer &a) const
Reduces an element in the congruence class.
Definition: modarith.h:294
the value is positive or 0
Definition: integer.h:71
bool NotNegative() const
Determines if the Integer is non-negative.
Definition: integer.h:339
Interface for retrieving values given their names.
Definition: cryptlib.h:277