Rename VecPolyMultiplyLE to VecIntelMultiply (PR #908)

The LE multiplies are compatible with Intel's _mm_clmulepi64_si128
2025-02-17 03:48:38 +00:00 · 2019-10-26 22:57:58 -04:00 · 2019-10-26 22:57:58 -04:00 · 183fba44bf
commit 183fba44bf
parent fa39314b7a
4 changed files with 37 additions and 37 deletions
--- a/gcm_simd.cpp
+++ b/gcm_simd.cpp
@ -190,8 +190,8 @@ bool CPU_ProbePMULL()
                             0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
        const uint32x4_p a2=VecLoad(wa2), b2=VecLoad(wb2);

-        const uint64x2_p r1 = VecPolyMultiply00LE(a1, b1);
-        const uint64x2_p r2 = VecPolyMultiply11LE((uint64x2_p)a2, (uint64x2_p)b2);
+        const uint64x2_p r1 = VecIntelMultiply00(a1, b1);
+        const uint64x2_p r2 = VecIntelMultiply11((uint64x2_p)a2, (uint64x2_p)b2);

        const uint64_t wc1[]={W64LIT(0x5300530053005300), W64LIT(0x5300530053005300)},
                       wc2[]={W64LIT(0x6c006c006c006c00), W64LIT(0x6c006c006c006c00)};
@ -575,9 +575,9 @@ uint64x2_p GCM_Reduce_VMULL(uint64x2_p c0, uint64x2_p c1, uint64x2_p c2, uint64x
    const uint64x2_p m1 = {1,1}, m63 = {63,63};

    c1 = VecXor(c1, VecShiftRightOctet<8>(c0));
-    c1 = VecXor(c1, VecPolyMultiply10LE(c0, r));
+    c1 = VecXor(c1, VecIntelMultiply10(c0, r));
    c0 = VecXor(c1, VecShiftLeftOctet<8>(c0));
-    c0 = VecPolyMultiply00LE(vec_sl(c0, m1), r);
+    c0 = VecIntelMultiply00(vec_sl(c0, m1), r);
    c2 = VecXor(c2, c0);
    c2 = VecXor(c2, VecShiftLeftOctet<8>(c1));
    c1 = vec_sr(vec_mergeh(c1, c2), m63);
@ -588,9 +588,9 @@ uint64x2_p GCM_Reduce_VMULL(uint64x2_p c0, uint64x2_p c1, uint64x2_p c2, uint64x

 inline uint64x2_p GCM_Multiply_VMULL(uint64x2_p x, uint64x2_p h, uint64x2_p r)
 {
-    const uint64x2_p c0 = VecPolyMultiply00LE(x, h);
-    const uint64x2_p c1 = VecXor(VecPolyMultiply01LE(x, h), VecPolyMultiply10LE(x, h));
-    const uint64x2_p c2 = VecPolyMultiply11LE(x, h);
+    const uint64x2_p c0 = VecIntelMultiply00(x, h);
+    const uint64x2_p c1 = VecXor(VecIntelMultiply01(x, h), VecIntelMultiply10(x, h));
+    const uint64x2_p c2 = VecIntelMultiply11(x, h);

    return GCM_Reduce_VMULL(c0, c1, c2, r);
 }
@ -685,35 +685,35 @@ size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mt
            {
                d1 = LoadBuffer2(data);
                d1 = VecXor(d1, x);
-                c0 = VecXor(c0, VecPolyMultiply00LE(d1, h0));
-                c2 = VecXor(c2, VecPolyMultiply01LE(d1, h1));
+                c0 = VecXor(c0, VecIntelMultiply00(d1, h0));
+                c2 = VecXor(c2, VecIntelMultiply01(d1, h1));
                d1 = VecXor(d1, SwapWords(d1));
-                c1 = VecXor(c1, VecPolyMultiply00LE(d1, h2));
+                c1 = VecXor(c1, VecIntelMultiply00(d1, h2));
                break;
            }

            d1 = LoadBuffer1(data+(s-i)*16-8);
-            c0 = VecXor(c0, VecPolyMultiply01LE(d2, h0));
-            c2 = VecXor(c2, VecPolyMultiply01LE(d1, h1));
+            c0 = VecXor(c0, VecIntelMultiply01(d2, h0));
+            c2 = VecXor(c2, VecIntelMultiply01(d1, h1));
            d2 = VecXor(d2, d1);
-            c1 = VecXor(c1, VecPolyMultiply01LE(d2, h2));
+            c1 = VecXor(c1, VecIntelMultiply01(d2, h2));

            if (++i == s)
            {
                d1 = LoadBuffer2(data);
                d1 = VecXor(d1, x);
-                c0 = VecXor(c0, VecPolyMultiply10LE(d1, h0));
-                c2 = VecXor(c2, VecPolyMultiply11LE(d1, h1));
+                c0 = VecXor(c0, VecIntelMultiply10(d1, h0));
+                c2 = VecXor(c2, VecIntelMultiply11(d1, h1));
                d1 = VecXor(d1, SwapWords(d1));
-                c1 = VecXor(c1, VecPolyMultiply10LE(d1, h2));
+                c1 = VecXor(c1, VecIntelMultiply10(d1, h2));
                break;
            }

            d2 = LoadBuffer2(data+(s-i)*16-8);
-            c0 = VecXor(c0, VecPolyMultiply10LE(d1, h0));
-            c2 = VecXor(c2, VecPolyMultiply10LE(d2, h1));
+            c0 = VecXor(c0, VecIntelMultiply10(d1, h0));
+            c2 = VecXor(c2, VecIntelMultiply10(d2, h1));
            d1 = VecXor(d1, d2);
-            c1 = VecXor(c1, VecPolyMultiply10LE(d1, h2));
+            c1 = VecXor(c1, VecIntelMultiply10(d1, h2));
        }
        data += s*16;
        len -= s*16;
--- a/gf2n_simd.cpp
+++ b/gf2n_simd.cpp
@ -325,8 +325,8 @@ using CryptoPP::VecMergeHigh;
 using CryptoPP::VecShiftLeft;
 using CryptoPP::VecShiftRight;

-using CryptoPP::VecPolyMultiply00LE;
-using CryptoPP::VecPolyMultiply11LE;
+using CryptoPP::VecIntelMultiply00;
+using CryptoPP::VecIntelMultiply11;

 // c1c0 = a * b
 inline void
@ -335,13 +335,13 @@ F2N_Multiply_128x128_POWER8(uint64x2_p& c1, uint64x2_p& c0, const uint64x2_p& a,
    uint64x2_p t1, t2;
    const uint64x2_p z0={0};

-    c0 = VecPolyMultiply00LE(a, b);
-    c1 = VecPolyMultiply11LE(a, b);
+    c0 = VecIntelMultiply00(a, b);
+    c1 = VecIntelMultiply11(a, b);
    t1 = VecMergeLow(a, a);
    t1 = VecXor(a, t1);
    t2 = VecMergeLow(b, b);
    t2 = VecXor(b, t2);
-    t1 = VecPolyMultiply00LE(t1, t2);
+    t1 = VecIntelMultiply00(t1, t2);
    t1 = VecXor(c0, t1);
    t1 = VecXor(c1, t1);
    t2 = t1;
@ -380,10 +380,10 @@ inline void
 F2N_Square_256_POWER8(uint64x2_p& c3, uint64x2_p& c2, uint64x2_p& c1,
    uint64x2_p& c0, const uint64x2_p& a1, const uint64x2_p& a0)
 {
-    c0 = VecPolyMultiply00LE(a0, a0);
-    c1 = VecPolyMultiply11LE(a0, a0);
-    c2 = VecPolyMultiply00LE(a1, a1);
-    c3 = VecPolyMultiply11LE(a1, a1);
+    c0 = VecIntelMultiply00(a0, a0);
+    c1 = VecIntelMultiply11(a0, a0);
+    c2 = VecIntelMultiply00(a1, a1);
+    c3 = VecIntelMultiply11(a1, a1);
 }

 // x = (x << n), z = 0
--- a/ppc_power7.cpp
+++ b/ppc_power7.cpp
@ -73,7 +73,7 @@ bool CPU_ProbePower7()
            result = (0 == std::memcmp(b1+3, b2+1, 16));
        #else
            result = false;
-        #endif        
+        #endif
    }

    sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
--- a/ppc_simd.h
+++ b/ppc_simd.h
@ -1437,7 +1437,7 @@ inline uint64x2_p VecPolyMultiply(const uint64x2_p& a, const uint64x2_p& b)
 /// \param a the first term
 /// \param b the second term
 /// \returns vector product
-/// \details VecPolyMultiply00LE() performs polynomial multiplication and presents
+/// \details VecIntelMultiply00() performs polynomial multiplication and presents
 ///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x00)</tt>.
 ///  The <tt>0x00</tt> indicates the low 64-bits of <tt>a</tt> and <tt>b</tt>
 ///  are multiplied.
@ -1446,7 +1446,7 @@ inline uint64x2_p VecPolyMultiply(const uint64x2_p& a, const uint64x2_p& b)
 /// \par Wraps
 ///  __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
 /// \since Crypto++ 8.0
-inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
+inline uint64x2_p VecIntelMultiply00(const uint64x2_p& a, const uint64x2_p& b)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
    return VecSwapWords(VecPolyMultiply(VecGetHigh(a), VecGetHigh(b)));
@ -1459,7 +1459,7 @@ inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
 /// \param a the first term
 /// \param b the second term
 /// \returns vector product
-/// \details VecPolyMultiply01LE performs() polynomial multiplication and presents
+/// \details VecIntelMultiply01 performs() polynomial multiplication and presents
 ///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x01)</tt>.
 ///  The <tt>0x01</tt> indicates the low 64-bits of <tt>a</tt> and high
 ///  64-bits of <tt>b</tt> are multiplied.
@ -1468,7 +1468,7 @@ inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
 /// \par Wraps
 ///  __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
 /// \since Crypto++ 8.0
-inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
+inline uint64x2_p VecIntelMultiply01(const uint64x2_p& a, const uint64x2_p& b)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
    return VecSwapWords(VecPolyMultiply(a, VecGetHigh(b)));
@ -1481,7 +1481,7 @@ inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
 /// \param a the first term
 /// \param b the second term
 /// \returns vector product
-/// \details VecPolyMultiply10LE() performs polynomial multiplication and presents
+/// \details VecIntelMultiply10() performs polynomial multiplication and presents
 ///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x10)</tt>.
 ///  The <tt>0x10</tt> indicates the high 64-bits of <tt>a</tt> and low
 ///  64-bits of <tt>b</tt> are multiplied.
@ -1490,7 +1490,7 @@ inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
 /// \par Wraps
 ///  __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
 /// \since Crypto++ 8.0
-inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
+inline uint64x2_p VecIntelMultiply10(const uint64x2_p& a, const uint64x2_p& b)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
    return VecSwapWords(VecPolyMultiply(VecGetHigh(a), b));
@ -1503,7 +1503,7 @@ inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
 /// \param a the first term
 /// \param b the second term
 /// \returns vector product
-/// \details VecPolyMultiply11LE() performs polynomial multiplication and presents
+/// \details VecIntelMultiply11() performs polynomial multiplication and presents
 ///  the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x11)</tt>.
 ///  The <tt>0x11</tt> indicates the high 64-bits of <tt>a</tt> and <tt>b</tt>
 ///  are multiplied.
@ -1512,7 +1512,7 @@ inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
 /// \par Wraps
 ///  __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
 /// \since Crypto++ 8.0
-inline uint64x2_p VecPolyMultiply11LE(const uint64x2_p& a, const uint64x2_p& b)
+inline uint64x2_p VecIntelMultiply11(const uint64x2_p& a, const uint64x2_p& b)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
    return VecSwapWords(VecPolyMultiply(VecGetLow(a), b));