From f6e04e5f338d2573f182a2daabed3220ce3dda7e Mon Sep 17 00:00:00 2001
From: Jeffrey Walton <noloader@gmail.com>
Date: Thu, 15 Nov 2018 15:17:49 -0500
Subject: [PATCH] Rename PPC vector functions from VectorFunc to VecFunc

---
 adv_simd.h        | 216 +++++++++++++++---------------
 blake2b_simd.cpp  | 124 ++++++++---------
 blake2s_simd.cpp  | 118 ++++++++--------
 chacha_simd.cpp   | 335 +++++++++++++++++++++++-----------------------
 gcm_simd.cpp      | 136 +++++++++----------
 lea_simd.cpp      |  18 +--
 ppc_simd.cpp      |   2 +-
 ppc_simd.h        | 314 +++++++++++++++++++++----------------------
 rijndael_simd.cpp | 132 +++++++++---------
 sha_simd.cpp      | 290 +++++++++++++++++++--------------------
 simon128_simd.cpp | 117 ++++++++--------
 simon64_simd.cpp  | 173 ++++++++++++------------
 speck128_simd.cpp | 115 ++++++++--------
 speck64_simd.cpp  | 137 +++++++++----------
 validat1.cpp      |  48 +++----
 15 files changed, 1140 insertions(+), 1135 deletions(-)

diff --git a/adv_simd.h b/adv_simd.h
index c79a9f25..547cd262 100644
--- a/adv_simd.h
+++ b/adv_simd.h
@@ -1857,54 +1857,54 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
                 // even harder without POWER8 due to lack of 64-bit elements.
                 std::memcpy(temp+LowOffset, inBlocks, 8);
                 std::memcpy(temp+HighOffset, inBlocks, 8);
-                uint32x4_p ctr = (uint32x4_p)VectorLoadBE(temp);
+                uint32x4_p ctr = (uint32x4_p)VecLoadBE(temp);
 
                 // For 64-bit block ciphers we need to load the CTR block,
                 // which is 8 bytes. After the dup load we have two counters
                 // in the Altivec word. Then we need to increment the low ctr
                 // by 0 and the high ctr by 1.
-                block0 = VectorAdd(s_one, ctr);
+                block0 = VecAdd(s_one, ctr);
 
                 // After initial increment of {0,1} remaining counters
                 // increment by {2,2}.
-                block1 = VectorAdd(s_two, block0);
-                block2 = VectorAdd(s_two, block1);
-                block3 = VectorAdd(s_two, block2);
-                block4 = VectorAdd(s_two, block3);
-                block5 = VectorAdd(s_two, block4);
+                block1 = VecAdd(s_two, block0);
+                block2 = VecAdd(s_two, block1);
+                block3 = VecAdd(s_two, block2);
+                block4 = VecAdd(s_two, block3);
+                block5 = VecAdd(s_two, block4);
 
                 // Update the counter in the caller.
                 const_cast<byte*>(inBlocks)[7] += 12;
             }
             else
             {
-                block0 = VectorLoadBE(inBlocks);
+                block0 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block1 = VectorLoadBE(inBlocks);
+                block1 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block2 = VectorLoadBE(inBlocks);
+                block2 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block3 = VectorLoadBE(inBlocks);
+                block3 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block4 = VectorLoadBE(inBlocks);
+                block4 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block5 = VectorLoadBE(inBlocks);
+                block5 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
             }
 
             if (xorInput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block2 = VectorXor(block2, VectorLoadBE(xorBlocks));
+                block2 = VecXor(block2, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block3 = VectorXor(block3, VectorLoadBE(xorBlocks));
+                block3 = VecXor(block3, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block4 = VectorXor(block4, VectorLoadBE(xorBlocks));
+                block4 = VecXor(block4, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block5 = VectorXor(block5, VectorLoadBE(xorBlocks));
+                block5 = VecXor(block5, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
@@ -1912,31 +1912,31 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
 
             if (xorOutput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block2 = VectorXor(block2, VectorLoadBE(xorBlocks));
+                block2 = VecXor(block2, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block3 = VectorXor(block3, VectorLoadBE(xorBlocks));
+                block3 = VecXor(block3, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block4 = VectorXor(block4, VectorLoadBE(xorBlocks));
+                block4 = VecXor(block4, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block5 = VectorXor(block5, VectorLoadBE(xorBlocks));
+                block5 = VecXor(block5, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
-            VectorStoreBE(block0, outBlocks);
+            VecStoreBE(block0, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block1, outBlocks);
+            VecStoreBE(block1, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block2, outBlocks);
+            VecStoreBE(block2, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block3, outBlocks);
+            VecStoreBE(block3, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block4, outBlocks);
+            VecStoreBE(block4, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block5, outBlocks);
+            VecStoreBE(block5, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
 
             length -= 6*vsxBlockSize;
@@ -1951,34 +1951,34 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
                 // even harder without POWER8 due to lack of 64-bit elements.
                 std::memcpy(temp+LowOffset, inBlocks, 8);
                 std::memcpy(temp+HighOffset, inBlocks, 8);
-                uint32x4_p ctr = (uint32x4_p)VectorLoadBE(temp);
+                uint32x4_p ctr = (uint32x4_p)VecLoadBE(temp);
 
                 // For 64-bit block ciphers we need to load the CTR block,
                 // which is 8 bytes. After the dup load we have two counters
                 // in the Altivec word. Then we need to increment the low ctr
                 // by 0 and the high ctr by 1.
-                block0 = VectorAdd(s_one, ctr);
+                block0 = VecAdd(s_one, ctr);
 
                 // After initial increment of {0,1} remaining counters
                 // increment by {2,2}.
-                block1 = VectorAdd(s_two, block0);
+                block1 = VecAdd(s_two, block0);
 
                 // Update the counter in the caller.
                 const_cast<byte*>(inBlocks)[7] += 4;
             }
             else
             {
-                block0 = VectorLoadBE(inBlocks);
+                block0 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block1 = VectorLoadBE(inBlocks);
+                block1 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
             }
 
             if (xorInput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
@@ -1986,15 +1986,15 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
 
             if (xorOutput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
-            VectorStoreBE(block0, outBlocks);
+            VecStoreBE(block0, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block1, outBlocks);
+            VecStoreBE(block1, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
 
             length -= 2*vsxBlockSize;
@@ -2030,14 +2030,14 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
             // initialize the block then it generates warnings.
             std::memcpy(temp+LowOffset, inBlocks, 8);
             std::memcpy(temp+HighOffset, inBlocks, 8);  // don't care
-            block = (uint32x4_p)VectorLoadBE(temp);
+            block = (uint32x4_p)VecLoadBE(temp);
 
             if (xorInput)
             {
                 std::memcpy(temp+LowOffset, xorBlocks, 8);
                 std::memcpy(temp+HighOffset, xorBlocks, 8);  // don't care
-                uint32x4_p x = (uint32x4_p)VectorLoadBE(temp);
-                block = VectorXor(block, x);
+                uint32x4_p x = (uint32x4_p)VecLoadBE(temp);
+                block = VecXor(block, x);
             }
 
             // Update the counter in the caller.
@@ -2050,11 +2050,11 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
             {
                 std::memcpy(temp+LowOffset, xorBlocks, 8);
                 std::memcpy(temp+HighOffset, xorBlocks, 8);  // don't care
-                uint32x4_p x = (uint32x4_p)VectorLoadBE(temp);
-                block = VectorXor(block, x);
+                uint32x4_p x = (uint32x4_p)VecLoadBE(temp);
+                block = VecXor(block, x);
             }
 
-            VectorStoreBE(block, temp);
+            VecStoreBE(block, temp);
             std::memcpy(outBlocks, temp+LowOffset, 8);
 
             inBlocks = PtrAdd(inBlocks, inIncrement);
@@ -2120,10 +2120,10 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
 
             if (flags & BT_InBlockIsCounter)
             {
-                block0 = VectorLoadBE(inBlocks);
-                block1 = VectorAdd(block0, s_one);
-                block2 = VectorAdd(block1, s_one);
-                block3 = VectorAdd(block2, s_one);
+                block0 = VecLoadBE(inBlocks);
+                block1 = VecAdd(block0, s_one);
+                block2 = VecAdd(block1, s_one);
+                block3 = VecAdd(block2, s_one);
 
                 // Hack due to big-endian loads used by POWER8 (and maybe ARM-BE).
                 // CTR_ModePolicy::OperateKeystream is wired such that after
@@ -2137,25 +2137,25 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
             }
             else
             {
-                block0 = VectorLoadBE(inBlocks);
+                block0 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block1 = VectorLoadBE(inBlocks);
+                block1 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block2 = VectorLoadBE(inBlocks);
+                block2 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block3 = VectorLoadBE(inBlocks);
+                block3 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
             }
 
             if (xorInput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block2 = VectorXor(block2, VectorLoadBE(xorBlocks));
+                block2 = VecXor(block2, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block3 = VectorXor(block3, VectorLoadBE(xorBlocks));
+                block3 = VecXor(block3, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
@@ -2163,23 +2163,23 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
 
             if (xorOutput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block2 = VectorXor(block2, VectorLoadBE(xorBlocks));
+                block2 = VecXor(block2, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block3 = VectorXor(block3, VectorLoadBE(xorBlocks));
+                block3 = VecXor(block3, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
-            VectorStoreBE(block0, outBlocks);
+            VecStoreBE(block0, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block1, outBlocks);
+            VecStoreBE(block1, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block2, outBlocks);
+            VecStoreBE(block2, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block3, outBlocks);
+            VecStoreBE(block3, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
 
             length -= 4*blockSize;
@@ -2188,10 +2188,10 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
 
     while (length >= blockSize)
     {
-        uint32x4_p block = VectorLoadBE(inBlocks);
+        uint32x4_p block = VecLoadBE(inBlocks);
 
         if (xorInput)
-            block = VectorXor(block, VectorLoadBE(xorBlocks));
+            block = VecXor(block, VecLoadBE(xorBlocks));
 
         if (flags & BT_InBlockIsCounter)
             const_cast<byte *>(inBlocks)[15]++;
@@ -2199,9 +2199,9 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
         func1(block, subKeys, rounds);
 
         if (xorOutput)
-            block = VectorXor(block, VectorLoadBE(xorBlocks));
+            block = VecXor(block, VecLoadBE(xorBlocks));
 
-        VectorStoreBE(block, outBlocks);
+        VecStoreBE(block, outBlocks);
 
         inBlocks = PtrAdd(inBlocks, inIncrement);
         outBlocks = PtrAdd(outBlocks, outIncrement);
@@ -2265,12 +2265,12 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
 
             if (flags & BT_InBlockIsCounter)
             {
-                block0 = VectorLoadBE(inBlocks);
-                block1 = VectorAdd(block0, s_one);
-                block2 = VectorAdd(block1, s_one);
-                block3 = VectorAdd(block2, s_one);
-                block4 = VectorAdd(block3, s_one);
-                block5 = VectorAdd(block4, s_one);
+                block0 = VecLoadBE(inBlocks);
+                block1 = VecAdd(block0, s_one);
+                block2 = VecAdd(block1, s_one);
+                block3 = VecAdd(block2, s_one);
+                block4 = VecAdd(block3, s_one);
+                block5 = VecAdd(block4, s_one);
 
                 // Hack due to big-endian loads used by POWER8 (and maybe ARM-BE).
                 // CTR_ModePolicy::OperateKeystream is wired such that after
@@ -2286,38 +2286,38 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
                 // the issue. If the last octet was 0xFC then 4 would trigger it.
                 // We dumb-lucked into the test with SPECK-128. The test case of
                 // interest is the one with IV 348ECA9766C09F04 826520DE47A212FA.
-                uint8x16_p temp = VectorAdd((uint8x16_p)block5, (uint8x16_p)s_one);
-                VectorStoreBE(temp, const_cast<byte*>(inBlocks));
+                uint8x16_p temp = VecAdd((uint8x16_p)block5, (uint8x16_p)s_one);
+                VecStoreBE(temp, const_cast<byte*>(inBlocks));
             }
             else
             {
-                block0 = VectorLoadBE(inBlocks);
+                block0 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block1 = VectorLoadBE(inBlocks);
+                block1 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block2 = VectorLoadBE(inBlocks);
+                block2 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block3 = VectorLoadBE(inBlocks);
+                block3 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block4 = VectorLoadBE(inBlocks);
+                block4 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
-                block5 = VectorLoadBE(inBlocks);
+                block5 = VecLoadBE(inBlocks);
                 inBlocks = PtrAdd(inBlocks, inIncrement);
             }
 
             if (xorInput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block2 = VectorXor(block2, VectorLoadBE(xorBlocks));
+                block2 = VecXor(block2, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block3 = VectorXor(block3, VectorLoadBE(xorBlocks));
+                block3 = VecXor(block3, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block4 = VectorXor(block4, VectorLoadBE(xorBlocks));
+                block4 = VecXor(block4, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block5 = VectorXor(block5, VectorLoadBE(xorBlocks));
+                block5 = VecXor(block5, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
@@ -2325,31 +2325,31 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
 
             if (xorOutput)
             {
-                block0 = VectorXor(block0, VectorLoadBE(xorBlocks));
+                block0 = VecXor(block0, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block1 = VectorXor(block1, VectorLoadBE(xorBlocks));
+                block1 = VecXor(block1, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block2 = VectorXor(block2, VectorLoadBE(xorBlocks));
+                block2 = VecXor(block2, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block3 = VectorXor(block3, VectorLoadBE(xorBlocks));
+                block3 = VecXor(block3, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block4 = VectorXor(block4, VectorLoadBE(xorBlocks));
+                block4 = VecXor(block4, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
-                block5 = VectorXor(block5, VectorLoadBE(xorBlocks));
+                block5 = VecXor(block5, VecLoadBE(xorBlocks));
                 xorBlocks = PtrAdd(xorBlocks, xorIncrement);
             }
 
-            VectorStoreBE(block0, outBlocks);
+            VecStoreBE(block0, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block1, outBlocks);
+            VecStoreBE(block1, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block2, outBlocks);
+            VecStoreBE(block2, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block3, outBlocks);
+            VecStoreBE(block3, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block4, outBlocks);
+            VecStoreBE(block4, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
-            VectorStoreBE(block5, outBlocks);
+            VecStoreBE(block5, outBlocks);
             outBlocks = PtrAdd(outBlocks, outIncrement);
 
             length -= 6*blockSize;
@@ -2358,10 +2358,10 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
 
     while (length >= blockSize)
     {
-        uint32x4_p block = VectorLoadBE(inBlocks);
+        uint32x4_p block = VecLoadBE(inBlocks);
 
         if (xorInput)
-            block = VectorXor(block, VectorLoadBE(xorBlocks));
+            block = VecXor(block, VecLoadBE(xorBlocks));
 
         if (flags & BT_InBlockIsCounter)
             const_cast<byte *>(inBlocks)[15]++;
@@ -2369,9 +2369,9 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
         func1(block, subKeys, rounds);
 
         if (xorOutput)
-            block = VectorXor(block, VectorLoadBE(xorBlocks));
+            block = VecXor(block, VecLoadBE(xorBlocks));
 
-        VectorStoreBE(block, outBlocks);
+        VecStoreBE(block, outBlocks);
 
         inBlocks = PtrAdd(inBlocks, inIncrement);
         outBlocks = PtrAdd(outBlocks, outIncrement);
diff --git a/blake2b_simd.cpp b/blake2b_simd.cpp
index 853a4cb5..ee701bd4 100644
--- a/blake2b_simd.cpp
+++ b/blake2b_simd.cpp
@@ -742,7 +742,7 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state)
 
 #if (CRYPTOPP_POWER8_AVAILABLE)
 
-inline uint64x2_p VectorLoad64(const void* p)
+inline uint64x2_p VecLoad64(const void* p)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (uint64x2_p)vec_xl(0, (uint8_t*)p);
@@ -751,18 +751,18 @@ inline uint64x2_p VectorLoad64(const void* p)
 #endif
 }
 
-inline uint64x2_p VectorLoad64LE(const void* p)
+inline uint64x2_p VecLoad64LE(const void* p)
 {
 #if __BIG_ENDIAN__
     const uint8x16_p m = {7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8};
-    const uint64x2_p v = VectorLoad64(p);
-    return vec_perm(v, v, m);
+    const uint64x2_p v = VecLoad64(p);
+    return VecPermute(v, v, m);
 #else
-    return VectorLoad64(p);
+    return VecLoad64(p);
 #endif
 }
 
-inline void VectorStore64(void* p, const uint64x2_p x)
+inline void VecStore64(void* p, const uint64x2_p x)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     vec_xst((uint8x16_p)x,0,(uint8_t*)p);
@@ -771,18 +771,18 @@ inline void VectorStore64(void* p, const uint64x2_p x)
 #endif
 }
 
-inline void VectorStore64LE(void* p, const uint64x2_p x)
+inline void VecStore64LE(void* p, const uint64x2_p x)
 {
 #if __BIG_ENDIAN__
     const uint8x16_p m = {7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8};
-    VectorStore64(p, vec_perm(x, x, m));
+    VecStore64(p, VecPermute(x, x, m));
 #else
-    VectorStore64(p, x);
+    VecStore64(p, x);
 #endif
 }
 
 template <unsigned int C>
-inline uint64x2_p VectorShiftLeftOctet(const uint64x2_p a, const uint64x2_p b)
+inline uint64x2_p VecShiftLeftOctet(const uint64x2_p a, const uint64x2_p b)
 {
 #if __BIG_ENDIAN__
     return (uint64x2_p)vec_sld((uint8x16_p)a, (uint8x16_p)b, C);
@@ -791,18 +791,18 @@ inline uint64x2_p VectorShiftLeftOctet(const uint64x2_p a, const uint64x2_p b)
 #endif
 }
 
-#define vec_shl_octet(a,b,c) VectorShiftLeftOctet<c*8>(a, b)
+#define vec_shl_octet(a,b,c) VecShiftLeftOctet<c*8>(a, b)
 
-// vec_mergeh(a,b) is equivalent to vec_perm(a,b,HH_MASK); and
-// vec_mergel(a,b) is equivalent vec_perm(a,b,LL_MASK). Benchmarks
+// vec_mergeh(a,b) is equivalent to VecPermute(a,b,HH_MASK); and
+// vec_mergel(a,b) is equivalent VecPermute(a,b,LL_MASK). Benchmarks
 // show vec_mergeh and vec_mergel is faster on little-endian
-// machines by 0.4 cpb. Benchmarks show vec_perm is faster on
+// machines by 0.4 cpb. Benchmarks show VecPermute is faster on
 // big-endian machines by 1.5 cpb. The code that uses
 // vec_mergeh and vec_mergel is about 880 bytes shorter.
 
 #if defined(__GNUC__) && (__BIG_ENDIAN__)
-#  define vec_merge_hi(a,b) vec_perm(a,b, HH_MASK)
-#  define vec_merge_lo(a,b) vec_perm(a,b, LL_MASK)
+#  define vec_merge_hi(a,b) VecPermute(a,b, HH_MASK)
+#  define vec_merge_lo(a,b) VecPermute(a,b, LL_MASK)
 #else
 #  define vec_merge_hi(a,b) vec_mergeh(a,b)
 #  define vec_merge_lo(a,b) vec_mergel(a,b)
@@ -878,12 +878,12 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
     #define BLAKE2B_LOAD_MSG_2_2(b0, b1) \
     do { \
          b0 = vec_merge_hi(m4, m0); \
-         b1 = vec_perm(m1, m6, HL_MASK); \
+         b1 = VecPermute(m1, m6, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_2_3(b0, b1) \
        do { \
-         b0 = vec_perm(m5, m1, HL_MASK); \
+         b0 = VecPermute(m5, m1, HL_MASK); \
          b1 = vec_merge_lo(m3, m4); \
     } while(0)
 
@@ -907,8 +907,8 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
 
     #define BLAKE2B_LOAD_MSG_3_3(b0, b1) \
        do { \
-         b0 = vec_perm(m1, m2, HL_MASK); \
-         b1 = vec_perm(m2, m7, HL_MASK); \
+         b0 = VecPermute(m1, m2, HL_MASK); \
+         b1 = VecPermute(m2, m7, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_3_4(b0, b1) \
@@ -925,20 +925,20 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
 
     #define BLAKE2B_LOAD_MSG_4_2(b0, b1) \
        do { \
-         b0 = vec_perm(m0, m3, HL_MASK); \
-         b1 = vec_perm(m2, m7, HL_MASK); \
+         b0 = VecPermute(m0, m3, HL_MASK); \
+         b1 = VecPermute(m2, m7, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_4_3(b0, b1) \
        do { \
-         b0 = vec_perm(m7, m5, HL_MASK); \
-         b1 = vec_perm(m3, m1, HL_MASK); \
+         b0 = VecPermute(m7, m5, HL_MASK); \
+         b1 = VecPermute(m3, m1, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_4_4(b0, b1) \
        do { \
          b0 = vec_shl_octet(m0, m6, 1); \
-         b1 = vec_perm(m4, m6, HL_MASK); \
+         b1 = VecPermute(m4, m6, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_5_1(b0, b1) \
@@ -955,19 +955,19 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
 
     #define BLAKE2B_LOAD_MSG_5_3(b0, b1) \
        do { \
-         b0 = vec_perm(m2, m3, HL_MASK); \
+         b0 = VecPermute(m2, m3, HL_MASK); \
          b1 = vec_merge_lo(m7, m0); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_5_4(b0, b1) \
        do { \
          b0 = vec_merge_lo(m6, m2); \
-         b1 = vec_perm(m7, m4, HL_MASK); \
+         b1 = VecPermute(m7, m4, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_6_1(b0, b1) \
        do { \
-         b0 = vec_perm(m6, m0, HL_MASK); \
+         b0 = VecPermute(m6, m0, HL_MASK); \
          b1 = vec_merge_hi(m7, m2); \
     } while(0)
 
@@ -986,13 +986,13 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
     #define BLAKE2B_LOAD_MSG_6_4(b0, b1) \
        do { \
          b0 = vec_merge_lo(m3, m1); \
-         b1 = vec_perm(m1, m5, HL_MASK); \
+         b1 = VecPermute(m1, m5, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_7_1(b0, b1) \
        do { \
          b0 = vec_merge_lo(m6, m3); \
-         b1 = vec_perm(m6, m1, HL_MASK); \
+         b1 = VecPermute(m6, m1, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_7_2(b0, b1) \
@@ -1033,7 +1033,7 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
 
     #define BLAKE2B_LOAD_MSG_8_4(b0, b1) \
        do { \
-         b0 = vec_perm(m1, m3, HL_MASK); \
+         b0 = VecPermute(m1, m3, HL_MASK); \
          b1 = m2; \
     } while(0)
 
@@ -1046,7 +1046,7 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
     #define BLAKE2B_LOAD_MSG_9_2(b0, b1) \
        do { \
          b0 = vec_merge_hi(m1, m2); \
-         b1 = vec_perm(m3, m2, HL_MASK); \
+         b1 = VecPermute(m3, m2, HL_MASK); \
     } while(0)
 
     #define BLAKE2B_LOAD_MSG_9_3(b0, b1) \
@@ -1122,23 +1122,23 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
 
     #define BLAKE2B_G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
     do { \
-      row1l = vec_add(vec_add(row1l, b0), row2l); \
-      row1h = vec_add(vec_add(row1h, b1), row2h); \
-      row4l = vec_xor(row4l, row1l); row4h = vec_xor(row4h, row1h); \
+      row1l = VecAdd(VecAdd(row1l, b0), row2l); \
+      row1h = VecAdd(VecAdd(row1h, b1), row2h); \
+      row4l = VecXor(row4l, row1l); row4h = VecXor(row4h, row1h); \
       row4l = vec_ror_32(row4l); row4h = vec_ror_32(row4h); \
-      row3l = vec_add(row3l, row4l); row3h = vec_add(row3h, row4h); \
-      row2l = vec_xor(row2l, row3l); row2h = vec_xor(row2h, row3h); \
+      row3l = VecAdd(row3l, row4l); row3h = VecAdd(row3h, row4h); \
+      row2l = VecXor(row2l, row3l); row2h = VecXor(row2h, row3h); \
       row2l = vec_ror_24(row2l); row2h = vec_ror_24(row2h); \
     } while(0)
 
     #define BLAKE2B_G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
     do { \
-      row1l = vec_add(vec_add(row1l, b0), row2l); \
-      row1h = vec_add(vec_add(row1h, b1), row2h); \
-      row4l = vec_xor(row4l, row1l); row4h = vec_xor(row4h, row1h); \
+      row1l = VecAdd(VecAdd(row1l, b0), row2l); \
+      row1h = VecAdd(VecAdd(row1h, b1), row2h); \
+      row4l = VecXor(row4l, row1l); row4h = VecXor(row4h, row1h); \
       row4l = vec_ror_16(row4l); row4h = vec_ror_16(row4h); \
-      row3l = vec_add(row3l, row4l); row3h = vec_add(row3h, row4h); \
-      row2l = vec_xor(row2l, row3l); row2h = vec_xor(row2h, row3h); \
+      row3l = VecAdd(row3l, row4l); row3h = VecAdd(row3h, row4h); \
+      row2l = VecXor(row2l, row3l); row2h = VecXor(row2h, row3h); \
       row2l = vec_ror_63(row2l); row2h = vec_ror_63(row2h); \
     } while(0)
 
@@ -1175,27 +1175,27 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
       BLAKE2B_UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
     } while(0)
 
-    const uint64x2_p m0 = VectorLoad64LE(input +  00);
-    const uint64x2_p m1 = VectorLoad64LE(input +  16);
-    const uint64x2_p m2 = VectorLoad64LE(input +  32);
-    const uint64x2_p m3 = VectorLoad64LE(input +  48);
-    const uint64x2_p m4 = VectorLoad64LE(input +  64);
-    const uint64x2_p m5 = VectorLoad64LE(input +  80);
-    const uint64x2_p m6 = VectorLoad64LE(input +  96);
-    const uint64x2_p m7 = VectorLoad64LE(input + 112);
+    const uint64x2_p m0 = VecLoad64LE(input +  00);
+    const uint64x2_p m1 = VecLoad64LE(input +  16);
+    const uint64x2_p m2 = VecLoad64LE(input +  32);
+    const uint64x2_p m3 = VecLoad64LE(input +  48);
+    const uint64x2_p m4 = VecLoad64LE(input +  64);
+    const uint64x2_p m5 = VecLoad64LE(input +  80);
+    const uint64x2_p m6 = VecLoad64LE(input +  96);
+    const uint64x2_p m7 = VecLoad64LE(input + 112);
 
     uint64x2_p row1l, row1h, row2l, row2h;
     uint64x2_p row3l, row3h, row4l, row4h;
 
-    const uint64x2_p h0 = row1l = VectorLoad64LE(&state.h[0]);
-    const uint64x2_p h1 = row1h = VectorLoad64LE(&state.h[2]);
-    const uint64x2_p h2 = row2l = VectorLoad64LE(&state.h[4]);
-    const uint64x2_p h3 = row2h = VectorLoad64LE(&state.h[6]);
+    const uint64x2_p h0 = row1l = VecLoad64LE(&state.h[0]);
+    const uint64x2_p h1 = row1h = VecLoad64LE(&state.h[2]);
+    const uint64x2_p h2 = row2l = VecLoad64LE(&state.h[4]);
+    const uint64x2_p h3 = row2h = VecLoad64LE(&state.h[6]);
 
-    row3l = VectorLoad64(&BLAKE2B_IV[0]);
-    row3h = VectorLoad64(&BLAKE2B_IV[2]);
-    row4l = vec_xor(VectorLoad64(&BLAKE2B_IV[4]), VectorLoad64(&state.tf[0]));
-    row4h = vec_xor(VectorLoad64(&BLAKE2B_IV[6]), VectorLoad64(&state.tf[2]));
+    row3l = VecLoad64(&BLAKE2B_IV[0]);
+    row3h = VecLoad64(&BLAKE2B_IV[2]);
+    row4l = VecXor(VecLoad64(&BLAKE2B_IV[4]), VecLoad64(&state.tf[0]));
+    row4h = VecXor(VecLoad64(&BLAKE2B_IV[6]), VecLoad64(&state.tf[2]));
 
     BLAKE2B_ROUND(0);
     BLAKE2B_ROUND(1);
@@ -1210,10 +1210,10 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
     BLAKE2B_ROUND(10);
     BLAKE2B_ROUND(11);
 
-    VectorStore64LE(&state.h[0], vec_xor(h0, vec_xor(row1l, row3l)));
-    VectorStore64LE(&state.h[2], vec_xor(h1, vec_xor(row1h, row3h)));
-    VectorStore64LE(&state.h[4], vec_xor(h2, vec_xor(row2l, row4l)));
-    VectorStore64LE(&state.h[6], vec_xor(h3, vec_xor(row2h, row4h)));
+    VecStore64LE(&state.h[0], VecXor(h0, VecXor(row1l, row3l)));
+    VecStore64LE(&state.h[2], VecXor(h1, VecXor(row1h, row3h)));
+    VecStore64LE(&state.h[4], VecXor(h2, VecXor(row2l, row4l)));
+    VecStore64LE(&state.h[6], VecXor(h3, VecXor(row2h, row4h)));
 }
 #endif  // CRYPTOPP_POWER8_AVAILABLE
 
diff --git a/blake2s_simd.cpp b/blake2s_simd.cpp
index cc1ec137..1456ad23 100644
--- a/blake2s_simd.cpp
+++ b/blake2s_simd.cpp
@@ -683,34 +683,34 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
 
 #if (CRYPTOPP_ALTIVEC_AVAILABLE)
 
-inline uint32x4_p VectorLoad32(const void* p)
+inline uint32x4_p VecLoad32(const void* p)
 {
-    return VectorLoad((const word32*)p);
+    return VecLoad((const word32*)p);
 }
 
-inline uint32x4_p VectorLoad32LE(const void* p)
+inline uint32x4_p VecLoad32LE(const void* p)
 {
 #if __BIG_ENDIAN__
     const uint8x16_p m = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
-    const uint32x4_p v = VectorLoad((const word32*)p);
-    return vec_perm(v, v, m);
+    const uint32x4_p v = VecLoad((const word32*)p);
+    return VecPermute(v, v, m);
 #else
-    return VectorLoad((const word32*)p);
+    return VecLoad((const word32*)p);
 #endif
 }
 
-inline void VectorStore32(void* p, const uint32x4_p x)
+inline void VecStore32(void* p, const uint32x4_p x)
 {
-    VectorStore(x, (word32*)p);
+    VecStore(x, (word32*)p);
 }
 
-inline void VectorStore32LE(void* p, const uint32x4_p x)
+inline void VecStore32LE(void* p, const uint32x4_p x)
 {
 #if __BIG_ENDIAN__
     const uint8x16_p m = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
-    VectorStore(vec_perm(x, x, m), (word32*)p);
+    VecStore(VecPermute(x, x, m), (word32*)p);
 #else
-    VectorStore(x, (word32*)p);
+    VecStore(x, (word32*)p);
 #endif
 }
 
@@ -718,7 +718,7 @@ template <unsigned int E1, unsigned int E2>
 inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b)
 {
     // Re-index. I'd like to use something like Z=Y*4 and then
-    // VectorShiftLeftOctet<Z>(b) but it crashes early Red Hat
+    // VecShiftLeftOctet<Z>(b) but it crashes early Red Hat
     // GCC compilers.
     enum {X=E1&3, Y=E2&3};
 
@@ -729,88 +729,88 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b)
     if (X == 0 && Y == 0)
     {
         const uint8x16_p mask = {0,1,2,3, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, b, mask);
+        return VecPermute(a, b, mask);
     }
     else if (X == 0 && Y == 1)
     {
         const uint8x16_p mask = {0,1,2,3, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<4>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<4>(b), mask);
     }
     else if (X == 0 && Y == 2)
     {
         const uint8x16_p mask = {0,1,2,3, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<8>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<8>(b), mask);
     }
     else if (X == 0 && Y == 3)
     {
         const uint8x16_p mask = {0,1,2,3, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<12>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
     }
 
     // Element 1 combinations
     else if (X == 1 && Y == 0)
     {
         const uint8x16_p mask = {4,5,6,7, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, b, mask);
+        return VecPermute(a, b, mask);
     }
     else if (X == 1 && Y == 1)
     {
         const uint8x16_p mask = {4,5,6,7, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<4>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<4>(b), mask);
     }
     else if (X == 1 && Y == 2)
     {
         const uint8x16_p mask = {4,5,6,7, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<8>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<8>(b), mask);
     }
     else if (X == 1 && Y == 3)
     {
         const uint8x16_p mask = {4,5,6,7, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<12>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
     }
 
     // Element 2 combinations
     else if (X == 2 && Y == 0)
     {
         const uint8x16_p mask = {8,9,10,11, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, b, mask);
+        return VecPermute(a, b, mask);
     }
     else if (X == 2 && Y == 1)
     {
         const uint8x16_p mask = {8,9,10,11, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<4>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<4>(b), mask);
     }
     else if (X == 2 && Y == 2)
     {
         const uint8x16_p mask = {8,9,10,11, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<8>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<8>(b), mask);
     }
     else if (X == 2 && Y == 3)
     {
         const uint8x16_p mask = {8,9,10,11, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<12>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
     }
 
     // Element 3 combinations
     else if (X == 3 && Y == 0)
     {
         const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, b, mask);
+        return VecPermute(a, b, mask);
     }
     else if (X == 3 && Y == 1)
     {
         const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<4>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<4>(b), mask);
     }
     else if (X == 3 && Y == 2)
     {
         const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<8>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<8>(b), mask);
     }
     else if (X == 3 && Y == 3)
     {
         const uint8x16_p mask = {12,13,14,15, 16,17,18,19, DC,DC,DC,DC, DC,DC,DC,DC};
-        return vec_perm(a, VectorShiftLeftOctet<12>(b), mask);
+        return VecPermute(a, VecShiftLeftOctet<12>(b), mask);
     }
 }
 
@@ -826,7 +826,7 @@ inline uint32x4_p VectorSet32(const uint32x4_p a, const uint32x4_p b,
 
     // Power7 follows SSE2's implementation, and this is _mm_set_epi32.
     const uint8x16_p mask = {20,21,22,23, 16,17,18,19, 4,5,6,7, 0,1,2,3};
-    return vec_perm(t0, t1, mask);
+    return VecPermute(t0, t1, mask);
 }
 
 template<>
@@ -835,7 +835,7 @@ uint32x4_p VectorSet32<2,0,2,0>(const uint32x4_p a, const uint32x4_p b,
 {
     // a=b, c=d, mask is {2,0, 2,0}
     const uint8x16_p mask = {16,17,18,19, 24,25,26,27, 0,1,2,3, 8,9,10,11};
-    return vec_perm(a, c, mask);
+    return VecPermute(a, c, mask);
 }
 
 template<>
@@ -844,7 +844,7 @@ uint32x4_p VectorSet32<3,1,3,1>(const uint32x4_p a, const uint32x4_p b,
 {
     // a=b, c=d, mask is {3,1, 3,1}
     const uint8x16_p mask = {20,21,22,23, 28,29,30,31, 4,5,6,7, 12,13,14,15};
-    return vec_perm(a, c, mask);
+    return VecPermute(a, c, mask);
 }
 
 void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
@@ -919,25 +919,25 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
     #define BLAKE2S_LOAD_MSG_9_3(buf) buf = VectorSet32<13,3,9,15>(m13,m3,m9,m15)
     #define BLAKE2S_LOAD_MSG_9_4(buf) buf = VectorSet32<0,12,14,11>(m0,m12,m14,m11)
 
-    #define vec_ror_16(x) VectorRotateRight<16>(x)
-    #define vec_ror_12(x) VectorRotateRight<12>(x)
-    #define vec_ror_8(x)  VectorRotateRight<8>(x)
-    #define vec_ror_7(x)  VectorRotateRight<7>(x)
+    #define vec_ror_16(x) VecRotateRight<16>(x)
+    #define vec_ror_12(x) VecRotateRight<12>(x)
+    #define vec_ror_8(x)  VecRotateRight<8>(x)
+    #define vec_ror_7(x)  VecRotateRight<7>(x)
 
     #define BLAKE2S_G1(row1,row2,row3,row4,buf) \
-      row1 = vec_add(vec_add(row1, buf), row2); \
-      row4 = vec_xor(row4, row1); \
+      row1 = VecAdd(VecAdd(row1, buf), row2); \
+      row4 = VecXor(row4, row1); \
       row4 = vec_ror_16(row4); \
-      row3 = vec_add(row3, row4);   \
-      row2 = vec_xor(row2, row3); \
+      row3 = VecAdd(row3, row4);   \
+      row2 = VecXor(row2, row3); \
       row2 = vec_ror_12(row2);
 
     #define BLAKE2S_G2(row1,row2,row3,row4,buf) \
-      row1 = vec_add(vec_add(row1, buf), row2); \
-      row4 = vec_xor(row4, row1); \
+      row1 = VecAdd(VecAdd(row1, buf), row2); \
+      row4 = VecXor(row4, row1); \
       row4 = vec_ror_8(row4); \
-      row3 = vec_add(row3, row4);   \
-      row2 = vec_xor(row2, row3); \
+      row3 = VecAdd(row3, row4);   \
+      row2 = VecXor(row2, row3); \
       row2 = vec_ror_7(row2);
 
     const uint8x16_p D2103_MASK = {12,13,14,15, 0,1,2,3, 4,5,6,7, 8,9,10,11};
@@ -945,14 +945,14 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
     const uint8x16_p D0321_MASK = {4,5,6,7, 8,9,10,11, 12,13,14,15, 0,1,2,3};
 
     #define BLAKE2S_DIAGONALIZE(row1,row2,row3,row4) \
-      row4 = vec_perm(row4, row4, D2103_MASK); \
-      row3 = vec_perm(row3, row3, D1032_MASK); \
-      row2 = vec_perm(row2, row2, D0321_MASK);
+      row4 = VecPermute(row4, row4, D2103_MASK); \
+      row3 = VecPermute(row3, row3, D1032_MASK); \
+      row2 = VecPermute(row2, row2, D0321_MASK);
 
     #define BLAKE2S_UNDIAGONALIZE(row1,row2,row3,row4) \
-      row4 = vec_perm(row4, row4, D0321_MASK); \
-      row3 = vec_perm(row3, row3, D1032_MASK); \
-      row2 = vec_perm(row2, row2, D2103_MASK);
+      row4 = VecPermute(row4, row4, D0321_MASK); \
+      row3 = VecPermute(row3, row3, D1032_MASK); \
+      row2 = VecPermute(row2, row2, D2103_MASK);
 
     #define BLAKE2S_ROUND(r)  \
       BLAKE2S_LOAD_MSG_ ##r ##_1(buf1); \
@@ -970,15 +970,15 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
     uint32x4_p buf1, buf2, buf3, buf4;
     uint32x4_p  ff0,  ff1;
 
-    const uint32x4_p  m0 = VectorLoad32LE(input +  0);
-    const uint32x4_p  m4 = VectorLoad32LE(input + 16);
-    const uint32x4_p  m8 = VectorLoad32LE(input + 32);
-    const uint32x4_p m12 = VectorLoad32LE(input + 48);
+    const uint32x4_p  m0 = VecLoad32LE(input +  0);
+    const uint32x4_p  m4 = VecLoad32LE(input + 16);
+    const uint32x4_p  m8 = VecLoad32LE(input + 32);
+    const uint32x4_p m12 = VecLoad32LE(input + 48);
 
-    row1 = ff0 = VectorLoad32LE(&state.h[0]);
-    row2 = ff1 = VectorLoad32LE(&state.h[4]);
-    row3 = VectorLoad32(&BLAKE2S_IV[0]);
-    row4 = vec_xor(VectorLoad32(&BLAKE2S_IV[4]), VectorLoad32(&state.tf[0]));
+    row1 = ff0 = VecLoad32LE(&state.h[0]);
+    row2 = ff1 = VecLoad32LE(&state.h[4]);
+    row3 = VecLoad32(&BLAKE2S_IV[0]);
+    row4 = VecXor(VecLoad32(&BLAKE2S_IV[4]), VecLoad32(&state.tf[0]));
 
     BLAKE2S_ROUND(0);
     BLAKE2S_ROUND(1);
@@ -991,8 +991,8 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
     BLAKE2S_ROUND(8);
     BLAKE2S_ROUND(9);
 
-    VectorStore32LE(&state.h[0], vec_xor(ff0, vec_xor(row1, row3)));
-    VectorStore32LE(&state.h[4], vec_xor(ff1, vec_xor(row2, row4)));
+    VecStore32LE(&state.h[0], VecXor(ff0, VecXor(row1, row3)));
+    VecStore32LE(&state.h[4], VecXor(ff1, VecXor(row2, row4)));
 }
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
 
diff --git a/chacha_simd.cpp b/chacha_simd.cpp
index 97e78f49..9a0bd6c3 100644
--- a/chacha_simd.cpp
+++ b/chacha_simd.cpp
@@ -206,7 +206,7 @@ inline __m128i RotateLeft<16>(const __m128i val)
 #if (CRYPTOPP_ALTIVEC_AVAILABLE)
 
 // ChaCha_OperateKeystream_POWER7 is optimized for POWER7. However, Altivec
-// is supported by using vec_ld and vec_st, and using a composite vec_add
+// is supported by using vec_ld and vec_st, and using a composite VecAdd
 // that supports 64-bit element adds. vec_ld and vec_st add significant
 // overhead when memory is not aligned. Despite the drawbacks Altivec
 // is profitable. The numbers for ChaCha8 are:
@@ -216,33 +216,34 @@ inline __m128i RotateLeft<16>(const __m128i val)
 
 using CryptoPP::uint8x16_p;
 using CryptoPP::uint32x4_p;
-using CryptoPP::VectorLoad;
-using CryptoPP::VectorStore;
+using CryptoPP::VecLoad;
+using CryptoPP::VecStore;
+using CryptoPP::VecPermute;
 
 // Permutes bytes in packed 32-bit words to little endian.
 // State is already in proper endian order. Input and
 // output must be permuted during load and save.
-inline uint32x4_p VectorLoad32LE(const uint8_t src[16])
+inline uint32x4_p VecLoad32LE(const uint8_t src[16])
 {
 #if (CRYPTOPP_BIG_ENDIAN)
     const uint8x16_p mask = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
-    const uint32x4_p val = VectorLoad(src);
-    return vec_perm(val, val, mask);
+    const uint32x4_p val = VecLoad(src);
+    return VecPermute(val, val, mask);
 #else
-    return VectorLoad(src);
+    return VecLoad(src);
 #endif
 }
 
 // Permutes bytes in packed 32-bit words to little endian.
 // State is already in proper endian order. Input and
 // output must be permuted during load and save.
-inline void VectorStore32LE(uint8_t dest[16], const uint32x4_p& val)
+inline void VecStore32LE(uint8_t dest[16], const uint32x4_p& val)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
     const uint8x16_p mask = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
-    VectorStore(vec_perm(val, val, mask), dest);
+    VecStore(VecPermute(val, val, mask), dest);
 #else
-    return VectorStore(val, dest);
+    return VecStore(val, dest);
 #endif
 }
 
@@ -262,21 +263,21 @@ template <>
 inline uint32x4_p Shuffle<1>(const uint32x4_p& val)
 {
     const uint8x16_p mask = {4,5,6,7, 8,9,10,11, 12,13,14,15, 0,1,2,3};
-    return vec_perm(val, val, mask);
+    return VecPermute(val, val, mask);
 }
 
 template <>
 inline uint32x4_p Shuffle<2>(const uint32x4_p& val)
 {
     const uint8x16_p mask = {8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7};
-    return vec_perm(val, val, mask);
+    return VecPermute(val, val, mask);
 }
 
 template <>
 inline uint32x4_p Shuffle<3>(const uint32x4_p& val)
 {
     const uint8x16_p mask = {12,13,14,15, 0,1,2,3, 4,5,6,7, 8,9,10,11};
-    return vec_perm(val, val, mask);
+    return VecPermute(val, val, mask);
 }
 
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
@@ -825,10 +826,10 @@ void ChaCha_OperateKeystream_SSE2(const word32 *state, const byte* input, byte *
 
 void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte *output, unsigned int rounds)
 {
-    const uint32x4_p state0 = VectorLoad(state + 0*4);
-    const uint32x4_p state1 = VectorLoad(state + 1*4);
-    const uint32x4_p state2 = VectorLoad(state + 2*4);
-    const uint32x4_p state3 = VectorLoad(state + 3*4);
+    const uint32x4_p state0 = VecLoad(state + 0*4);
+    const uint32x4_p state1 = VecLoad(state + 1*4);
+    const uint32x4_p state2 = VecLoad(state + 2*4);
+    const uint32x4_p state3 = VecLoad(state + 3*4);
 
     const uint32x4_p CTRS[3] = {
         {1,0,0,0}, {2,0,0,0}, {3,0,0,0}
@@ -842,79 +843,79 @@ void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte
     uint32x4_p r1_0 = state0;
     uint32x4_p r1_1 = state1;
     uint32x4_p r1_2 = state2;
-    uint32x4_p r1_3 = VectorAdd64(r0_3, CTRS[0]);
+    uint32x4_p r1_3 = VecAdd64(r0_3, CTRS[0]);
 
     uint32x4_p r2_0 = state0;
     uint32x4_p r2_1 = state1;
     uint32x4_p r2_2 = state2;
-    uint32x4_p r2_3 = VectorAdd64(r0_3, CTRS[1]);
+    uint32x4_p r2_3 = VecAdd64(r0_3, CTRS[1]);
 
     uint32x4_p r3_0 = state0;
     uint32x4_p r3_1 = state1;
     uint32x4_p r3_2 = state2;
-    uint32x4_p r3_3 = VectorAdd64(r0_3, CTRS[2]);
+    uint32x4_p r3_3 = VecAdd64(r0_3, CTRS[2]);
 
     for (int i = static_cast<int>(rounds); i > 0; i -= 2)
     {
-        r0_0 = VectorAdd(r0_0, r0_1);
-        r1_0 = VectorAdd(r1_0, r1_1);
-        r2_0 = VectorAdd(r2_0, r2_1);
-        r3_0 = VectorAdd(r3_0, r3_1);
+        r0_0 = VecAdd(r0_0, r0_1);
+        r1_0 = VecAdd(r1_0, r1_1);
+        r2_0 = VecAdd(r2_0, r2_1);
+        r3_0 = VecAdd(r3_0, r3_1);
 
-        r0_3 = VectorXor(r0_3, r0_0);
-        r1_3 = VectorXor(r1_3, r1_0);
-        r2_3 = VectorXor(r2_3, r2_0);
-        r3_3 = VectorXor(r3_3, r3_0);
+        r0_3 = VecXor(r0_3, r0_0);
+        r1_3 = VecXor(r1_3, r1_0);
+        r2_3 = VecXor(r2_3, r2_0);
+        r3_3 = VecXor(r3_3, r3_0);
 
-        r0_3 = VectorRotateLeft<16>(r0_3);
-        r1_3 = VectorRotateLeft<16>(r1_3);
-        r2_3 = VectorRotateLeft<16>(r2_3);
-        r3_3 = VectorRotateLeft<16>(r3_3);
+        r0_3 = VecRotateLeft<16>(r0_3);
+        r1_3 = VecRotateLeft<16>(r1_3);
+        r2_3 = VecRotateLeft<16>(r2_3);
+        r3_3 = VecRotateLeft<16>(r3_3);
 
-        r0_2 = VectorAdd(r0_2, r0_3);
-        r1_2 = VectorAdd(r1_2, r1_3);
-        r2_2 = VectorAdd(r2_2, r2_3);
-        r3_2 = VectorAdd(r3_2, r3_3);
+        r0_2 = VecAdd(r0_2, r0_3);
+        r1_2 = VecAdd(r1_2, r1_3);
+        r2_2 = VecAdd(r2_2, r2_3);
+        r3_2 = VecAdd(r3_2, r3_3);
 
-        r0_1 = VectorXor(r0_1, r0_2);
-        r1_1 = VectorXor(r1_1, r1_2);
-        r2_1 = VectorXor(r2_1, r2_2);
-        r3_1 = VectorXor(r3_1, r3_2);
+        r0_1 = VecXor(r0_1, r0_2);
+        r1_1 = VecXor(r1_1, r1_2);
+        r2_1 = VecXor(r2_1, r2_2);
+        r3_1 = VecXor(r3_1, r3_2);
 
-        r0_1 = VectorRotateLeft<12>(r0_1);
-        r1_1 = VectorRotateLeft<12>(r1_1);
-        r2_1 = VectorRotateLeft<12>(r2_1);
-        r3_1 = VectorRotateLeft<12>(r3_1);
+        r0_1 = VecRotateLeft<12>(r0_1);
+        r1_1 = VecRotateLeft<12>(r1_1);
+        r2_1 = VecRotateLeft<12>(r2_1);
+        r3_1 = VecRotateLeft<12>(r3_1);
 
-        r0_0 = VectorAdd(r0_0, r0_1);
-        r1_0 = VectorAdd(r1_0, r1_1);
-        r2_0 = VectorAdd(r2_0, r2_1);
-        r3_0 = VectorAdd(r3_0, r3_1);
+        r0_0 = VecAdd(r0_0, r0_1);
+        r1_0 = VecAdd(r1_0, r1_1);
+        r2_0 = VecAdd(r2_0, r2_1);
+        r3_0 = VecAdd(r3_0, r3_1);
 
-        r0_3 = VectorXor(r0_3, r0_0);
-        r1_3 = VectorXor(r1_3, r1_0);
-        r2_3 = VectorXor(r2_3, r2_0);
-        r3_3 = VectorXor(r3_3, r3_0);
+        r0_3 = VecXor(r0_3, r0_0);
+        r1_3 = VecXor(r1_3, r1_0);
+        r2_3 = VecXor(r2_3, r2_0);
+        r3_3 = VecXor(r3_3, r3_0);
 
-        r0_3 = VectorRotateLeft<8>(r0_3);
-        r1_3 = VectorRotateLeft<8>(r1_3);
-        r2_3 = VectorRotateLeft<8>(r2_3);
-        r3_3 = VectorRotateLeft<8>(r3_3);
+        r0_3 = VecRotateLeft<8>(r0_3);
+        r1_3 = VecRotateLeft<8>(r1_3);
+        r2_3 = VecRotateLeft<8>(r2_3);
+        r3_3 = VecRotateLeft<8>(r3_3);
 
-        r0_2 = VectorAdd(r0_2, r0_3);
-        r1_2 = VectorAdd(r1_2, r1_3);
-        r2_2 = VectorAdd(r2_2, r2_3);
-        r3_2 = VectorAdd(r3_2, r3_3);
+        r0_2 = VecAdd(r0_2, r0_3);
+        r1_2 = VecAdd(r1_2, r1_3);
+        r2_2 = VecAdd(r2_2, r2_3);
+        r3_2 = VecAdd(r3_2, r3_3);
 
-        r0_1 = VectorXor(r0_1, r0_2);
-        r1_1 = VectorXor(r1_1, r1_2);
-        r2_1 = VectorXor(r2_1, r2_2);
-        r3_1 = VectorXor(r3_1, r3_2);
+        r0_1 = VecXor(r0_1, r0_2);
+        r1_1 = VecXor(r1_1, r1_2);
+        r2_1 = VecXor(r2_1, r2_2);
+        r3_1 = VecXor(r3_1, r3_2);
 
-        r0_1 = VectorRotateLeft<7>(r0_1);
-        r1_1 = VectorRotateLeft<7>(r1_1);
-        r2_1 = VectorRotateLeft<7>(r2_1);
-        r3_1 = VectorRotateLeft<7>(r3_1);
+        r0_1 = VecRotateLeft<7>(r0_1);
+        r1_1 = VecRotateLeft<7>(r1_1);
+        r2_1 = VecRotateLeft<7>(r2_1);
+        r3_1 = VecRotateLeft<7>(r3_1);
 
         r0_1 = Shuffle<1>(r0_1);
         r0_2 = Shuffle<2>(r0_2);
@@ -932,65 +933,65 @@ void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte
         r3_2 = Shuffle<2>(r3_2);
         r3_3 = Shuffle<3>(r3_3);
 
-        r0_0 = VectorAdd(r0_0, r0_1);
-        r1_0 = VectorAdd(r1_0, r1_1);
-        r2_0 = VectorAdd(r2_0, r2_1);
-        r3_0 = VectorAdd(r3_0, r3_1);
+        r0_0 = VecAdd(r0_0, r0_1);
+        r1_0 = VecAdd(r1_0, r1_1);
+        r2_0 = VecAdd(r2_0, r2_1);
+        r3_0 = VecAdd(r3_0, r3_1);
 
-        r0_3 = VectorXor(r0_3, r0_0);
-        r1_3 = VectorXor(r1_3, r1_0);
-        r2_3 = VectorXor(r2_3, r2_0);
-        r3_3 = VectorXor(r3_3, r3_0);
+        r0_3 = VecXor(r0_3, r0_0);
+        r1_3 = VecXor(r1_3, r1_0);
+        r2_3 = VecXor(r2_3, r2_0);
+        r3_3 = VecXor(r3_3, r3_0);
 
-        r0_3 = VectorRotateLeft<16>(r0_3);
-        r1_3 = VectorRotateLeft<16>(r1_3);
-        r2_3 = VectorRotateLeft<16>(r2_3);
-        r3_3 = VectorRotateLeft<16>(r3_3);
+        r0_3 = VecRotateLeft<16>(r0_3);
+        r1_3 = VecRotateLeft<16>(r1_3);
+        r2_3 = VecRotateLeft<16>(r2_3);
+        r3_3 = VecRotateLeft<16>(r3_3);
 
-        r0_2 = VectorAdd(r0_2, r0_3);
-        r1_2 = VectorAdd(r1_2, r1_3);
-        r2_2 = VectorAdd(r2_2, r2_3);
-        r3_2 = VectorAdd(r3_2, r3_3);
+        r0_2 = VecAdd(r0_2, r0_3);
+        r1_2 = VecAdd(r1_2, r1_3);
+        r2_2 = VecAdd(r2_2, r2_3);
+        r3_2 = VecAdd(r3_2, r3_3);
 
-        r0_1 = VectorXor(r0_1, r0_2);
-        r1_1 = VectorXor(r1_1, r1_2);
-        r2_1 = VectorXor(r2_1, r2_2);
-        r3_1 = VectorXor(r3_1, r3_2);
+        r0_1 = VecXor(r0_1, r0_2);
+        r1_1 = VecXor(r1_1, r1_2);
+        r2_1 = VecXor(r2_1, r2_2);
+        r3_1 = VecXor(r3_1, r3_2);
 
-        r0_1 = VectorRotateLeft<12>(r0_1);
-        r1_1 = VectorRotateLeft<12>(r1_1);
-        r2_1 = VectorRotateLeft<12>(r2_1);
-        r3_1 = VectorRotateLeft<12>(r3_1);
+        r0_1 = VecRotateLeft<12>(r0_1);
+        r1_1 = VecRotateLeft<12>(r1_1);
+        r2_1 = VecRotateLeft<12>(r2_1);
+        r3_1 = VecRotateLeft<12>(r3_1);
 
-        r0_0 = VectorAdd(r0_0, r0_1);
-        r1_0 = VectorAdd(r1_0, r1_1);
-        r2_0 = VectorAdd(r2_0, r2_1);
-        r3_0 = VectorAdd(r3_0, r3_1);
+        r0_0 = VecAdd(r0_0, r0_1);
+        r1_0 = VecAdd(r1_0, r1_1);
+        r2_0 = VecAdd(r2_0, r2_1);
+        r3_0 = VecAdd(r3_0, r3_1);
 
-        r0_3 = VectorXor(r0_3, r0_0);
-        r1_3 = VectorXor(r1_3, r1_0);
-        r2_3 = VectorXor(r2_3, r2_0);
-        r3_3 = VectorXor(r3_3, r3_0);
+        r0_3 = VecXor(r0_3, r0_0);
+        r1_3 = VecXor(r1_3, r1_0);
+        r2_3 = VecXor(r2_3, r2_0);
+        r3_3 = VecXor(r3_3, r3_0);
 
-        r0_3 = VectorRotateLeft<8>(r0_3);
-        r1_3 = VectorRotateLeft<8>(r1_3);
-        r2_3 = VectorRotateLeft<8>(r2_3);
-        r3_3 = VectorRotateLeft<8>(r3_3);
+        r0_3 = VecRotateLeft<8>(r0_3);
+        r1_3 = VecRotateLeft<8>(r1_3);
+        r2_3 = VecRotateLeft<8>(r2_3);
+        r3_3 = VecRotateLeft<8>(r3_3);
 
-        r0_2 = VectorAdd(r0_2, r0_3);
-        r1_2 = VectorAdd(r1_2, r1_3);
-        r2_2 = VectorAdd(r2_2, r2_3);
-        r3_2 = VectorAdd(r3_2, r3_3);
+        r0_2 = VecAdd(r0_2, r0_3);
+        r1_2 = VecAdd(r1_2, r1_3);
+        r2_2 = VecAdd(r2_2, r2_3);
+        r3_2 = VecAdd(r3_2, r3_3);
 
-        r0_1 = VectorXor(r0_1, r0_2);
-        r1_1 = VectorXor(r1_1, r1_2);
-        r2_1 = VectorXor(r2_1, r2_2);
-        r3_1 = VectorXor(r3_1, r3_2);
+        r0_1 = VecXor(r0_1, r0_2);
+        r1_1 = VecXor(r1_1, r1_2);
+        r2_1 = VecXor(r2_1, r2_2);
+        r3_1 = VecXor(r3_1, r3_2);
 
-        r0_1 = VectorRotateLeft<7>(r0_1);
-        r1_1 = VectorRotateLeft<7>(r1_1);
-        r2_1 = VectorRotateLeft<7>(r2_1);
-        r3_1 = VectorRotateLeft<7>(r3_1);
+        r0_1 = VecRotateLeft<7>(r0_1);
+        r1_1 = VecRotateLeft<7>(r1_1);
+        r2_1 = VecRotateLeft<7>(r2_1);
+        r3_1 = VecRotateLeft<7>(r3_1);
 
         r0_1 = Shuffle<3>(r0_1);
         r0_2 = Shuffle<2>(r0_2);
@@ -1009,80 +1010,80 @@ void ChaCha_OperateKeystream_POWER7(const word32 *state, const byte* input, byte
         r3_3 = Shuffle<1>(r3_3);
     }
 
-    r0_0 = VectorAdd(r0_0, state0);
-    r0_1 = VectorAdd(r0_1, state1);
-    r0_2 = VectorAdd(r0_2, state2);
-    r0_3 = VectorAdd(r0_3, state3);
+    r0_0 = VecAdd(r0_0, state0);
+    r0_1 = VecAdd(r0_1, state1);
+    r0_2 = VecAdd(r0_2, state2);
+    r0_3 = VecAdd(r0_3, state3);
 
-    r1_0 = VectorAdd(r1_0, state0);
-    r1_1 = VectorAdd(r1_1, state1);
-    r1_2 = VectorAdd(r1_2, state2);
-    r1_3 = VectorAdd(r1_3, state3);
-    r1_3 = VectorAdd64(r1_3, CTRS[0]);
+    r1_0 = VecAdd(r1_0, state0);
+    r1_1 = VecAdd(r1_1, state1);
+    r1_2 = VecAdd(r1_2, state2);
+    r1_3 = VecAdd(r1_3, state3);
+    r1_3 = VecAdd64(r1_3, CTRS[0]);
 
-    r2_0 = VectorAdd(r2_0, state0);
-    r2_1 = VectorAdd(r2_1, state1);
-    r2_2 = VectorAdd(r2_2, state2);
-    r2_3 = VectorAdd(r2_3, state3);
-    r2_3 = VectorAdd64(r2_3, CTRS[1]);
+    r2_0 = VecAdd(r2_0, state0);
+    r2_1 = VecAdd(r2_1, state1);
+    r2_2 = VecAdd(r2_2, state2);
+    r2_3 = VecAdd(r2_3, state3);
+    r2_3 = VecAdd64(r2_3, CTRS[1]);
 
-    r3_0 = VectorAdd(r3_0, state0);
-    r3_1 = VectorAdd(r3_1, state1);
-    r3_2 = VectorAdd(r3_2, state2);
-    r3_3 = VectorAdd(r3_3, state3);
-    r3_3 = VectorAdd64(r3_3, CTRS[2]);
+    r3_0 = VecAdd(r3_0, state0);
+    r3_1 = VecAdd(r3_1, state1);
+    r3_2 = VecAdd(r3_2, state2);
+    r3_3 = VecAdd(r3_3, state3);
+    r3_3 = VecAdd64(r3_3, CTRS[2]);
 
     if (input)
     {
-        r0_0 = VectorXor(VectorLoad32LE(input + 0*16), r0_0);
-        r0_1 = VectorXor(VectorLoad32LE(input + 1*16), r0_1);
-        r0_2 = VectorXor(VectorLoad32LE(input + 2*16), r0_2);
-        r0_3 = VectorXor(VectorLoad32LE(input + 3*16), r0_3);
+        r0_0 = VecXor(VecLoad32LE(input + 0*16), r0_0);
+        r0_1 = VecXor(VecLoad32LE(input + 1*16), r0_1);
+        r0_2 = VecXor(VecLoad32LE(input + 2*16), r0_2);
+        r0_3 = VecXor(VecLoad32LE(input + 3*16), r0_3);
     }
 
-    VectorStore32LE(output + 0*16, r0_0);
-    VectorStore32LE(output + 1*16, r0_1);
-    VectorStore32LE(output + 2*16, r0_2);
-    VectorStore32LE(output + 3*16, r0_3);
+    VecStore32LE(output + 0*16, r0_0);
+    VecStore32LE(output + 1*16, r0_1);
+    VecStore32LE(output + 2*16, r0_2);
+    VecStore32LE(output + 3*16, r0_3);
 
     if (input)
     {
-        r1_0 = VectorXor(VectorLoad32LE(input + 4*16), r1_0);
-        r1_1 = VectorXor(VectorLoad32LE(input + 5*16), r1_1);
-        r1_2 = VectorXor(VectorLoad32LE(input + 6*16), r1_2);
-        r1_3 = VectorXor(VectorLoad32LE(input + 7*16), r1_3);
+        r1_0 = VecXor(VecLoad32LE(input + 4*16), r1_0);
+        r1_1 = VecXor(VecLoad32LE(input + 5*16), r1_1);
+        r1_2 = VecXor(VecLoad32LE(input + 6*16), r1_2);
+        r1_3 = VecXor(VecLoad32LE(input + 7*16), r1_3);
     }
 
-    VectorStore32LE(output + 4*16, r1_0);
-    VectorStore32LE(output + 5*16, r1_1);
-    VectorStore32LE(output + 6*16, r1_2);
-    VectorStore32LE(output + 7*16, r1_3);
+    VecStore32LE(output + 4*16, r1_0);
+    VecStore32LE(output + 5*16, r1_1);
+    VecStore32LE(output + 6*16, r1_2);
+    VecStore32LE(output + 7*16, r1_3);
 
     if (input)
     {
-        r2_0 = VectorXor(VectorLoad32LE(input +  8*16), r2_0);
-        r2_1 = VectorXor(VectorLoad32LE(input +  9*16), r2_1);
-        r2_2 = VectorXor(VectorLoad32LE(input + 10*16), r2_2);
-        r2_3 = VectorXor(VectorLoad32LE(input + 11*16), r2_3);
+        r2_0 = VecXor(VecLoad32LE(input +  8*16), r2_0);
+        r2_1 = VecXor(VecLoad32LE(input +  9*16), r2_1);
+        r2_2 = VecXor(VecLoad32LE(input + 10*16), r2_2);
+        r2_3 = VecXor(VecLoad32LE(input + 11*16), r2_3);
     }
 
-    VectorStore32LE(output +  8*16, r2_0);
-    VectorStore32LE(output +  9*16, r2_1);
-    VectorStore32LE(output + 10*16, r2_2);
-    VectorStore32LE(output + 11*16, r2_3);
+    VecStore32LE(output +  8*16, r2_0);
+    VecStore32LE(output +  9*16, r2_1);
+    VecStore32LE(output + 10*16, r2_2);
+    VecStore32LE(output + 11*16, r2_3);
 
     if (input)
     {
-        r3_0 = VectorXor(VectorLoad32LE(input + 12*16), r3_0);
-        r3_1 = VectorXor(VectorLoad32LE(input + 13*16), r3_1);
-        r3_2 = VectorXor(VectorLoad32LE(input + 14*16), r3_2);
-        r3_3 = VectorXor(VectorLoad32LE(input + 15*16), r3_3);
+        r3_0 = VecXor(VecLoad32LE(input + 12*16), r3_0);
+        r3_1 = VecXor(VecLoad32LE(input + 13*16), r3_1);
+        r3_2 = VecXor(VecLoad32LE(input + 14*16), r3_2);
+        r3_3 = VecXor(VecLoad32LE(input + 15*16), r3_3);
     }
 
-    VectorStore32LE(output + 12*16, r3_0);
-    VectorStore32LE(output + 13*16, r3_1);
-    VectorStore32LE(output + 14*16, r3_2);
-    VectorStore32LE(output + 15*16, r3_3);
+    VecStore32LE(output + 12*16, r3_0);
+    VecStore32LE(output + 13*16, r3_1);
+    VecStore32LE(output + 14*16, r3_2);
+    VecStore32LE(output + 15*16, r3_3);
 }
 
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
diff --git a/gcm_simd.cpp b/gcm_simd.cpp
index 2b054898..3e42f16b 100644
--- a/gcm_simd.cpp
+++ b/gcm_simd.cpp
@@ -171,16 +171,16 @@ inline uint64x2_t VEXT_U8(uint64x2_t a, uint64x2_t b)
 #if CRYPTOPP_POWER8_VMULL_AVAILABLE
 using CryptoPP::uint32x4_p;
 using CryptoPP::uint64x2_p;
-using CryptoPP::VectorGetLow;
-using CryptoPP::VectorGetHigh;
-using CryptoPP::VectorRotateLeftOctet;
+using CryptoPP::VecGetLow;
+using CryptoPP::VecGetHigh;
+using CryptoPP::VecRotateLeftOctet;
 
 // POWER8 GCM mode is confusing. The algorithm is reflected so
 // nearly everything we do is reversed for a little-endian system,
 // including on big-endian machines. VMULL2LE swaps dwords for a
 // little endian machine; VMULL_00LE, VMULL_01LE, VMULL_10LE and
 // VMULL_11LE are backwards and (1) read low words with
-// VectorGetHigh, (2) read high words with VectorGetLow, and
+// VecGetHigh, (2) read high words with VecGetLow, and
 // (3) yields a product that is endian swapped. The steps ensures
 // GCM parameters are presented in the correct order for the
 // algorithm on both big and little-endian systems, but it is
@@ -192,7 +192,7 @@ using CryptoPP::VectorRotateLeftOctet;
 inline uint64x2_p VMULL2LE(const uint64x2_p& val)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
-    return VectorRotateLeftOctet<8>(val);
+    return VecRotateLeftOctet<8>(val);
 #else
     return val;
 #endif
@@ -202,48 +202,48 @@ inline uint64x2_p VMULL2LE(const uint64x2_p& val)
 inline uint64x2_p VMULL_00LE(const uint64x2_p& a, const uint64x2_p& b)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
-    return VMULL2LE(__vpmsumd (VectorGetHigh(a), VectorGetHigh(b)));
+    return VMULL2LE(__vpmsumd (VecGetHigh(a), VecGetHigh(b)));
 #else
-    return VMULL2LE(__builtin_crypto_vpmsumd (VectorGetHigh(a), VectorGetHigh(b)));
+    return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), VecGetHigh(b)));
 #endif
 }
 
 // _mm_clmulepi64_si128(a, b, 0x01)
 inline uint64x2_p VMULL_01LE(const uint64x2_p& a, const uint64x2_p& b)
 {
-    // Small speedup. VectorGetHigh(b) ensures the high dword of 'b' is 0.
+    // Small speedup. VecGetHigh(b) ensures the high dword of 'b' is 0.
     // The 0 used in the vmull yields 0 for the high product, so the high
     // dword of 'a' is "don't care".
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
-    return VMULL2LE(__vpmsumd (a, VectorGetHigh(b)));
+    return VMULL2LE(__vpmsumd (a, VecGetHigh(b)));
 #else
-    return VMULL2LE(__builtin_crypto_vpmsumd (a, VectorGetHigh(b)));
+    return VMULL2LE(__builtin_crypto_vpmsumd (a, VecGetHigh(b)));
 #endif
 }
 
 // _mm_clmulepi64_si128(a, b, 0x10)
 inline uint64x2_p VMULL_10LE(const uint64x2_p& a, const uint64x2_p& b)
 {
-    // Small speedup. VectorGetHigh(a) ensures the high dword of 'a' is 0.
+    // Small speedup. VecGetHigh(a) ensures the high dword of 'a' is 0.
     // The 0 used in the vmull yields 0 for the high product, so the high
     // dword of 'b' is "don't care".
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
-    return VMULL2LE(__vpmsumd (VectorGetHigh(a), b));
+    return VMULL2LE(__vpmsumd (VecGetHigh(a), b));
 #else
-    return VMULL2LE(__builtin_crypto_vpmsumd (VectorGetHigh(a), b));
+    return VMULL2LE(__builtin_crypto_vpmsumd (VecGetHigh(a), b));
 #endif
 }
 
 // _mm_clmulepi64_si128(a, b, 0x11)
 inline uint64x2_p VMULL_11LE(const uint64x2_p& a, const uint64x2_p& b)
 {
-    // Small speedup. VectorGetLow(a) ensures the high dword of 'a' is 0.
+    // Small speedup. VecGetLow(a) ensures the high dword of 'a' is 0.
     // The 0 used in the vmull yields 0 for the high product, so the high
     // dword of 'b' is "don't care".
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
-    return VMULL2LE(__vpmsumd (VectorGetLow(a), b));
+    return VMULL2LE(__vpmsumd (VecGetLow(a), b));
 #else
-    return VMULL2LE(__builtin_crypto_vpmsumd (VectorGetLow(a), b));
+    return VMULL2LE(__builtin_crypto_vpmsumd (VecGetLow(a), b));
 #endif
 }
 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
@@ -373,7 +373,7 @@ bool CPU_ProbePMULL()
         const uint64x2_p r3 = VMULL_10LE((uint64x2_p)(a), (uint64x2_p)(b));
         const uint64x2_p r4 = VMULL_11LE((uint64x2_p)(a), (uint64x2_p)(b));
 
-        result = VectorNotEqual(r1, r2) && VectorNotEqual(r3, r4);
+        result = VecNotEqual(r1, r2) && VecNotEqual(r3, r4);
     }
 
     sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
@@ -743,7 +743,7 @@ void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer)
 #if CRYPTOPP_ALTIVEC_AVAILABLE
 void GCM_Xor16_ALTIVEC(byte *a, const byte *b, const byte *c)
 {
-    VectorStore(VectorXor(VectorLoad(b), VectorLoad(c)), a);
+    VecStore(VecXor(VecLoad(b), VecLoad(c)), a);
 }
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
 
@@ -753,22 +753,22 @@ uint64x2_p GCM_Reduce_VMULL(uint64x2_p c0, uint64x2_p c1, uint64x2_p c2, uint64x
 {
     const uint64x2_p m1 = {1,1}, m63 = {63,63};
 
-    c1 = VectorXor(c1, VectorShiftRightOctet<8>(c0));
-    c1 = VectorXor(c1, VMULL_10LE(c0, r));
-    c0 = VectorXor(c1, VectorShiftLeftOctet<8>(c0));
+    c1 = VecXor(c1, VecShiftRightOctet<8>(c0));
+    c1 = VecXor(c1, VMULL_10LE(c0, r));
+    c0 = VecXor(c1, VecShiftLeftOctet<8>(c0));
     c0 = VMULL_00LE(vec_sl(c0, m1), r);
-    c2 = VectorXor(c2, c0);
-    c2 = VectorXor(c2, VectorShiftLeftOctet<8>(c1));
+    c2 = VecXor(c2, c0);
+    c2 = VecXor(c2, VecShiftLeftOctet<8>(c1));
     c1 = vec_sr(vec_mergeh(c1, c2), m63);
     c2 = vec_sl(c2, m1);
 
-    return VectorXor(c2, c1);
+    return VecXor(c2, c1);
 }
 
 inline uint64x2_p GCM_Multiply_VMULL(uint64x2_p x, uint64x2_p h, uint64x2_p r)
 {
     const uint64x2_p c0 = VMULL_00LE(x, h);
-    const uint64x2_p c1 = VectorXor(VMULL_01LE(x, h), VMULL_10LE(x, h));
+    const uint64x2_p c1 = VecXor(VMULL_01LE(x, h), VMULL_10LE(x, h));
     const uint64x2_p c2 = VMULL_11LE(x, h);
 
     return GCM_Reduce_VMULL(c0, c1, c2, r);
@@ -777,13 +777,13 @@ inline uint64x2_p GCM_Multiply_VMULL(uint64x2_p x, uint64x2_p h, uint64x2_p r)
 inline uint64x2_p LoadHashKey(const byte *hashKey)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
-    const uint64x2_p key = (uint64x2_p)VectorLoad(hashKey);
+    const uint64x2_p key = (uint64x2_p)VecLoad(hashKey);
     const uint8x16_p mask = {8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7};
-    return vec_perm(key, key, mask);
+    return VecPermute(key, key, mask);
 #else
-    const uint64x2_p key = (uint64x2_p)VectorLoad(hashKey);
+    const uint64x2_p key = (uint64x2_p)VecLoad(hashKey);
     const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
-    return vec_perm(key, key, mask);
+    return VecPermute(key, key, mask);
 #endif
 }
 
@@ -798,21 +798,21 @@ void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned
     for (i=0; i<tableSize-32; i+=32)
     {
         const uint64x2_p h1 = GCM_Multiply_VMULL(h, h0, r);
-        VectorStore(h, (byte*)temp);
+        VecStore(h, (byte*)temp);
         std::memcpy(mulTable+i, temp+0, 8);
-        VectorStore(h1, mulTable+i+16);
-        VectorStore(h, mulTable+i+8);
-        VectorStore(h1, (byte*)temp);
+        VecStore(h1, mulTable+i+16);
+        VecStore(h, mulTable+i+8);
+        VecStore(h1, (byte*)temp);
         std::memcpy(mulTable+i+8, temp+0, 8);
         h = GCM_Multiply_VMULL(h1, h0, r);
     }
 
     const uint64x2_p h1 = GCM_Multiply_VMULL(h, h0, r);
-    VectorStore(h, (byte*)temp);
+    VecStore(h, (byte*)temp);
     std::memcpy(mulTable+i, temp+0, 8);
-    VectorStore(h1, mulTable+i+16);
-    VectorStore(h, mulTable+i+8);
-    VectorStore(h1, (byte*)temp);
+    VecStore(h1, mulTable+i+16);
+    VecStore(h, mulTable+i+8);
+    VecStore(h1, (byte*)temp);
     std::memcpy(mulTable+i+8, temp+0, 8);
 }
 
@@ -820,33 +820,33 @@ void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned
 template <class T>
 inline T SwapWords(const T& data)
 {
-    return (T)VectorRotateLeftOctet<8>(data);
+    return (T)VecRotateLeftOctet<8>(data);
 }
 
 inline uint64x2_p LoadBuffer1(const byte *dataBuffer)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
-    return (uint64x2_p)VectorLoad(dataBuffer);
+    return (uint64x2_p)VecLoad(dataBuffer);
 #else
-    const uint64x2_p data = (uint64x2_p)VectorLoad(dataBuffer);
+    const uint64x2_p data = (uint64x2_p)VecLoad(dataBuffer);
     const uint8x16_p mask = {7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8};
-    return vec_perm(data, data, mask);
+    return VecPermute(data, data, mask);
 #endif
 }
 
 inline uint64x2_p LoadBuffer2(const byte *dataBuffer)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
-    return (uint64x2_p)SwapWords(VectorLoadBE(dataBuffer));
+    return (uint64x2_p)SwapWords(VecLoadBE(dataBuffer));
 #else
-    return (uint64x2_p)VectorLoadBE(dataBuffer);
+    return (uint64x2_p)VecLoadBE(dataBuffer);
 #endif
 }
 
 size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer)
 {
     const uint64x2_p r = {0xe100000000000000ull, 0xc200000000000000ull};
-    uint64x2_p x = (uint64x2_p)VectorLoad(hbuffer);
+    uint64x2_p x = (uint64x2_p)VecLoad(hbuffer);
 
     while (len >= 16)
     {
@@ -856,59 +856,59 @@ size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mt
 
         while (true)
         {
-            const uint64x2_p h0 = (uint64x2_p)VectorLoad(mtable+(i+0)*16);
-            const uint64x2_p h1 = (uint64x2_p)VectorLoad(mtable+(i+1)*16);
-            const uint64x2_p h2 = (uint64x2_p)VectorXor(h0, h1);
+            const uint64x2_p h0 = (uint64x2_p)VecLoad(mtable+(i+0)*16);
+            const uint64x2_p h1 = (uint64x2_p)VecLoad(mtable+(i+1)*16);
+            const uint64x2_p h2 = (uint64x2_p)VecXor(h0, h1);
 
             if (++i == s)
             {
                 d1 = LoadBuffer2(data);
-                d1 = VectorXor(d1, x);
-                c0 = VectorXor(c0, VMULL_00LE(d1, h0));
-                c2 = VectorXor(c2, VMULL_01LE(d1, h1));
-                d1 = VectorXor(d1, SwapWords(d1));
-                c1 = VectorXor(c1, VMULL_00LE(d1, h2));
+                d1 = VecXor(d1, x);
+                c0 = VecXor(c0, VMULL_00LE(d1, h0));
+                c2 = VecXor(c2, VMULL_01LE(d1, h1));
+                d1 = VecXor(d1, SwapWords(d1));
+                c1 = VecXor(c1, VMULL_00LE(d1, h2));
                 break;
             }
 
             d1 = LoadBuffer1(data+(s-i)*16-8);
-            c0 = VectorXor(c0, VMULL_01LE(d2, h0));
-            c2 = VectorXor(c2, VMULL_01LE(d1, h1));
-            d2 = VectorXor(d2, d1);
-            c1 = VectorXor(c1, VMULL_01LE(d2, h2));
+            c0 = VecXor(c0, VMULL_01LE(d2, h0));
+            c2 = VecXor(c2, VMULL_01LE(d1, h1));
+            d2 = VecXor(d2, d1);
+            c1 = VecXor(c1, VMULL_01LE(d2, h2));
 
             if (++i == s)
             {
                 d1 = LoadBuffer2(data);
-                d1 = VectorXor(d1, x);
-                c0 = VectorXor(c0, VMULL_10LE(d1, h0));
-                c2 = VectorXor(c2, VMULL_11LE(d1, h1));
-                d1 = VectorXor(d1, SwapWords(d1));
-                c1 = VectorXor(c1, VMULL_10LE(d1, h2));
+                d1 = VecXor(d1, x);
+                c0 = VecXor(c0, VMULL_10LE(d1, h0));
+                c2 = VecXor(c2, VMULL_11LE(d1, h1));
+                d1 = VecXor(d1, SwapWords(d1));
+                c1 = VecXor(c1, VMULL_10LE(d1, h2));
                 break;
             }
 
             d2 = LoadBuffer2(data+(s-i)*16-8);
-            c0 = VectorXor(c0, VMULL_10LE(d1, h0));
-            c2 = VectorXor(c2, VMULL_10LE(d2, h1));
-            d1 = VectorXor(d1, d2);
-            c1 = VectorXor(c1, VMULL_10LE(d1, h2));
+            c0 = VecXor(c0, VMULL_10LE(d1, h0));
+            c2 = VecXor(c2, VMULL_10LE(d2, h1));
+            d1 = VecXor(d1, d2);
+            c1 = VecXor(c1, VMULL_10LE(d1, h2));
         }
         data += s*16;
         len -= s*16;
 
-        c1 = VectorXor(VectorXor(c1, c0), c2);
+        c1 = VecXor(VecXor(c1, c0), c2);
         x = GCM_Reduce_VMULL(c0, c1, c2, r);
     }
 
-    VectorStore(x, hbuffer);
+    VecStore(x, hbuffer);
     return len;
 }
 
 void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer)
 {
     const uint64x2_p mask = {0x08090a0b0c0d0e0full, 0x0001020304050607ull};
-    VectorStore(VectorPermute(VectorLoad(hashBuffer), mask), hashBuffer);
+    VecStore(VecPermute(VecLoad(hashBuffer), mask), hashBuffer);
 }
 #endif  // CRYPTOPP_POWER8_VMULL_AVAILABLE
 
diff --git a/lea_simd.cpp b/lea_simd.cpp
index 245407bf..8278330e 100644
--- a/lea_simd.cpp
+++ b/lea_simd.cpp
@@ -439,17 +439,17 @@ using CryptoPP::uint64x2_p;
 
 inline uint32x4_p Xor(const uint32x4_p& a, const uint32x4_p& b)
 {
-    return vec_xor(a, b);
+    return VecXor(a, b);
 }
 
 inline uint32x4_p Add(const uint32x4_p& a, const uint32x4_p& b)
 {
-    return vec_add(a, b);
+    return VecAdd(a, b);
 }
 
 inline uint32x4_p Sub(const uint32x4_p& a, const uint32x4_p& b)
 {
-    return vec_sub(a, b);
+    return VecSub(a, b);
 }
 
 template <unsigned int R>
@@ -479,7 +479,7 @@ inline uint32x4_p UnpackSIMD(const uint32x4_p& a, const uint32x4_p& b, const uin
     CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
     CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
     CRYPTOPP_ASSERT(0);
-    return vec_xor(a, a);
+    return VecXor(a, a);
 }
 
 template <>
@@ -519,7 +519,7 @@ inline uint32x4_p UnpackSIMD(const uint32x4_p& v)
 {
     // Should not be instantiated
     CRYPTOPP_ASSERT(0);
-    return vec_xor(v, v);
+    return VecXor(v, v);
 }
 
 template <>
@@ -527,7 +527,7 @@ inline uint32x4_p UnpackSIMD<0>(const uint32x4_p& v)
 {
     // Splat to all lanes
     const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
-    return (uint32x4_p)vec_perm(v, v, m);
+    return (uint32x4_p)VecPermute(v, v, m);
 }
 
 template <>
@@ -535,7 +535,7 @@ inline uint32x4_p UnpackSIMD<1>(const uint32x4_p& v)
 {
     // Splat to all lanes
     const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
-    return (uint32x4_p)vec_perm(v, v, m);
+    return (uint32x4_p)VecPermute(v, v, m);
 }
 
 template <>
@@ -543,7 +543,7 @@ inline uint32x4_p UnpackSIMD<2>(const uint32x4_p& v)
 {
     // Splat to all lanes
     const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
-    return (uint32x4_p)vec_perm(v, v, m);
+    return (uint32x4_p)VecPermute(v, v, m);
 }
 
 template <>
@@ -551,7 +551,7 @@ inline uint32x4_p UnpackSIMD<3>(const uint32x4_p& v)
 {
     // Splat to all lanes
     const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
-    return (uint32x4_p)vec_perm(v, v, m);
+    return (uint32x4_p)VecPermute(v, v, m);
 }
 
 template <unsigned int IDX>
diff --git a/ppc_simd.cpp b/ppc_simd.cpp
index 99616c4f..7fcbedca 100644
--- a/ppc_simd.cpp
+++ b/ppc_simd.cpp
@@ -73,7 +73,7 @@ bool CPU_ProbeAltivec()
         // Specifically call the Altivec loads and stores
         const uint8x16_p v1 = (uint8x16_p)vec_ld(0, (byte*)b1);
         const uint8x16_p v2 = (uint8x16_p)vec_ld(0, (byte*)b2);
-        const uint8x16_p v3 = (uint8x16_p)vec_xor(v1, v2);
+        const uint8x16_p v3 = (uint8x16_p)VecXor(v1, v2);
         vec_st(v3, 0, b3);
 
         result = (0 == std::memcmp(b2, b3, 16));
diff --git a/ppc_simd.h b/ppc_simd.h
index 7dcd37eb..d697f7e4 100644
--- a/ppc_simd.h
+++ b/ppc_simd.h
@@ -29,7 +29,7 @@
 # undef bool
 #endif
 
-// VectorLoad_ALTIVEC and VectorStore_ALTIVEC are
+// VecLoad_ALTIVEC and VecStore_ALTIVEC are
 // too noisy on modern compilers
 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
 # pragma GCC diagnostic push
@@ -49,14 +49,14 @@ typedef __vector unsigned int    uint32x4_p;
 typedef __vector unsigned long long uint64x2_p;
 #endif  // _ARCH_PWR8
 
-/// \brief Reverse a vector
+/// \brief Reverse bytes in a vector
 /// \tparam T vector type
 /// \param src the vector
 /// \returns vector
-/// \details Reverse() endian swaps the bytes in a vector
+/// \details VecReverse() reverses the bytes in a vector
 /// \since Crypto++ 6.0
 template <class T>
-inline T Reverse(const T src)
+inline T VecReverse(const T src)
 {
     const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
     return (T)vec_perm(src, src, mask);
@@ -67,16 +67,16 @@ inline T Reverse(const T src)
 /// \brief Loads a vector from a byte array
 /// \param src the byte array
 /// \details Loads a vector in native endian format from a byte array.
-/// \details VectorLoad_ALTIVEC() uses <tt>vec_ld</tt> if the effective address
+/// \details VecLoad_ALTIVEC() uses <tt>vec_ld</tt> if the effective address
 ///   of <tt>dest</tt> is aligned, and uses <tt>vec_lvsl</tt> and <tt>vec_perm</tt>
 ///   otherwise.
 ///   <tt>vec_lvsl</tt> and <tt>vec_perm</tt> are relatively expensive so you should
 ///   provide aligned memory adresses.
-/// \details VectorLoad_ALTIVEC() is used automatically when POWER7 or above
+/// \details VecLoad_ALTIVEC() is used automatically when POWER7 or above
 ///   and unaligned loads is not available.
-/// \note VectorLoad does not require an aligned array.
+/// \note VecLoad does not require an aligned array.
 /// \since Crypto++ 6.0
-inline uint32x4_p VectorLoad_ALTIVEC(const byte src[16])
+inline uint32x4_p VecLoad_ALTIVEC(const byte src[16])
 {
     if (IsAlignedOn(src, 16))
     {
@@ -96,14 +96,14 @@ inline uint32x4_p VectorLoad_ALTIVEC(const byte src[16])
 /// \param src the byte array
 /// \param off offset into the src byte array
 /// \details Loads a vector in native endian format from a byte array.
-/// \details VectorLoad_ALTIVEC() uses <tt>vec_ld</tt> if the effective address
+/// \details VecLoad_ALTIVEC() uses <tt>vec_ld</tt> if the effective address
 ///   of <tt>dest</tt> is aligned, and uses <tt>vec_lvsl</tt> and <tt>vec_perm</tt>
 ///   otherwise.
 ///   <tt>vec_lvsl</tt> and <tt>vec_perm</tt> are relatively expensive so you should
 ///   provide aligned memory adresses.
-/// \note VectorLoad does not require an aligned array.
+/// \note VecLoad does not require an aligned array.
 /// \since Crypto++ 6.0
-inline uint32x4_p VectorLoad_ALTIVEC(int off, const byte src[16])
+inline uint32x4_p VecLoad_ALTIVEC(int off, const byte src[16])
 {
     if (IsAlignedOn(src, 16))
     {
@@ -122,14 +122,14 @@ inline uint32x4_p VectorLoad_ALTIVEC(int off, const byte src[16])
 /// \brief Loads a vector from a byte array
 /// \param src the byte array
 /// \details Loads a vector in native endian format from a byte array.
-/// \details VectorLoad uses POWER7's <tt>vec_xl</tt> or
+/// \details VecLoad uses POWER7's <tt>vec_xl</tt> or
 ///   <tt>vec_vsx_ld</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorLoad_ALTIVEC() is relatively expensive.
-/// \note VectorLoad does not require an aligned array.
+/// \details VecLoad_ALTIVEC() is used if POWER7 or above
+///   is not available. VecLoad_ALTIVEC() is relatively expensive.
+/// \note VecLoad does not require an aligned array.
 /// \since Crypto++ 6.0
-inline uint32x4_p VectorLoad(const byte src[16])
+inline uint32x4_p VecLoad(const byte src[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -138,7 +138,7 @@ inline uint32x4_p VectorLoad(const byte src[16])
     return (uint32x4_p)vec_vsx_ld(0, (byte*)src);
 #  endif
 #else
-    return VectorLoad_ALTIVEC(src);
+    return VecLoad_ALTIVEC(src);
 #endif
 }
 
@@ -146,14 +146,14 @@ inline uint32x4_p VectorLoad(const byte src[16])
 /// \param src the byte array
 /// \param off offset into the byte array
 /// \details Loads a vector in native endian format from a byte array.
-/// \details VectorLoad uses POWER7's <tt>vec_xl</tt> or
+/// \details VecLoad uses POWER7's <tt>vec_xl</tt> or
 ///   <tt>vec_vsx_ld</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorLoad_ALTIVEC() is relatively expensive.
-/// \note VectorLoad does not require an aligned array.
+/// \details VecLoad_ALTIVEC() is used if POWER7 or above
+///   is not available. VecLoad_ALTIVEC() is relatively expensive.
+/// \note VecLoad does not require an aligned array.
 /// \since Crypto++ 6.0
-inline uint32x4_p VectorLoad(int off, const byte src[16])
+inline uint32x4_p VecLoad(int off, const byte src[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -162,48 +162,48 @@ inline uint32x4_p VectorLoad(int off, const byte src[16])
     return (uint32x4_p)vec_vsx_ld(off, (byte*)src);
 #  endif
 #else
-    return VectorLoad_ALTIVEC(off, src);
+    return VecLoad_ALTIVEC(off, src);
 #endif
 }
 
 /// \brief Loads a vector from a byte array
 /// \param src the byte array
 /// \details Loads a vector in native endian format from a byte array.
-/// \details VectorLoad uses POWER7's <tt>vec_xl</tt> or
+/// \details VecLoad uses POWER7's <tt>vec_xl</tt> or
 ///   <tt>vec_vsx_ld</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorLoad_ALTIVEC() is relatively expensive.
-/// \note VectorLoad does not require an aligned array.
+/// \details VecLoad_ALTIVEC() is used if POWER7 or above
+///   is not available. VecLoad_ALTIVEC() is relatively expensive.
+/// \note VecLoad does not require an aligned array.
 /// \since Crypto++ 8.0
-inline uint32x4_p VectorLoad(const word32 src[4])
+inline uint32x4_p VecLoad(const word32 src[4])
 {
-    return VectorLoad((const byte*)src);
+    return VecLoad((const byte*)src);
 }
 
 /// \brief Loads a vector from a byte array
 /// \param src the byte array
 /// \param off offset into the byte array
 /// \details Loads a vector in native endian format from a byte array.
-/// \note VectorLoad does not require an aligned array.
+/// \note VecLoad does not require an aligned array.
 /// \since Crypto++ 8.0
-inline uint32x4_p VectorLoad(int off, const word32 src[4])
+inline uint32x4_p VecLoad(int off, const word32 src[4])
 {
-    return VectorLoad(off, (const byte*)src);
+    return VecLoad(off, (const byte*)src);
 }
 
 /// \brief Loads a vector from a byte array
 /// \param src the byte array
 /// \details Loads a vector in big endian format from a byte array.
-///   VectorLoadBE will swap all bytes on little endian systems.
-/// \details VectorLoadBE uses POWER7's <tt>vec_xl</tt> or
+///   VecLoadBE will swap all bytes on little endian systems.
+/// \details VecLoadBE uses POWER7's <tt>vec_xl</tt> or
 ///   <tt>vec_vsx_ld</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorLoad_ALTIVEC() is relatively expensive.
-/// \note VectorLoadBE() does not require an aligned array.
+/// \details VecLoad_ALTIVEC() is used if POWER7 or above
+///   is not available. VecLoad_ALTIVEC() is relatively expensive.
+/// \note VecLoadBE() does not require an aligned array.
 /// \since Crypto++ 6.0
-inline uint32x4_p VectorLoadBE(const byte src[16])
+inline uint32x4_p VecLoadBE(const byte src[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -212,14 +212,14 @@ inline uint32x4_p VectorLoadBE(const byte src[16])
 #    if (CRYPTOPP_BIG_ENDIAN)
        return (uint32x4_p)vec_vsx_ld(0, (byte*)src);
 #    else
-       return (uint32x4_p)Reverse(vec_vsx_ld(0, (byte*)src));
+       return (uint32x4_p)VecReverse(vec_vsx_ld(0, (byte*)src));
 #    endif
 #  endif
 #else  // _ARCH_PWR7
 #  if (CRYPTOPP_BIG_ENDIAN)
-     return (uint32x4_p)VectorLoad((const byte*)src);
+     return (uint32x4_p)VecLoad((const byte*)src);
 #  else
-     return (uint32x4_p)Reverse(VectorLoad((const byte*)src));
+     return (uint32x4_p)VecReverse(VecLoad((const byte*)src));
 #  endif
 #endif  // _ARCH_PWR7
 }
@@ -228,15 +228,15 @@ inline uint32x4_p VectorLoadBE(const byte src[16])
 /// \param src the byte array
 /// \param off offset into the src byte array
 /// \details Loads a vector in big endian format from a byte array.
-///   VectorLoadBE will swap all bytes on little endian systems.
-/// \details VectorLoadBE uses POWER7's <tt>vec_xl</tt> or
+///   VecLoadBE will swap all bytes on little endian systems.
+/// \details VecLoadBE uses POWER7's <tt>vec_xl</tt> or
 ///   <tt>vec_vsx_ld</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorLoad_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorLoad_ALTIVEC() is relatively expensive.
-/// \note VectorLoadBE does not require an aligned array.
+/// \details VecLoad_ALTIVEC() is used if POWER7 or above
+///   is not available. VecLoad_ALTIVEC() is relatively expensive.
+/// \note VecLoadBE does not require an aligned array.
 /// \since Crypto++ 6.0
-inline uint32x4_p VectorLoadBE(int off, const byte src[16])
+inline uint32x4_p VecLoadBE(int off, const byte src[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -245,14 +245,14 @@ inline uint32x4_p VectorLoadBE(int off, const byte src[16])
 #    if (CRYPTOPP_BIG_ENDIAN)
        return (uint32x4_p)vec_vsx_ld(off, (byte*)src);
 #    else
-       return (uint32x4_p)Reverse(vec_vsx_ld(off, (byte*)src));
+       return (uint32x4_p)VecReverse(vec_vsx_ld(off, (byte*)src));
 #    endif
 #  endif
 #else  // _ARCH_PWR7
 #  if (CRYPTOPP_BIG_ENDIAN)
-     return (uint32x4_p)VectorLoad(off, (const byte*)src);
+     return (uint32x4_p)VecLoad(off, (const byte*)src);
 #  else
-     return (uint32x4_p)Reverse(VectorLoad(off, (const byte*)src));
+     return (uint32x4_p)VecReverse(VecLoad(off, (const byte*)src));
 #  endif
 #endif  // _ARCH_PWR7
 }
@@ -264,16 +264,16 @@ inline uint32x4_p VectorLoadBE(int off, const byte src[16])
 /// \param data the vector
 /// \param dest the byte array
 /// \details Stores a vector in native endian format to a byte array.
-/// \details VectorStore_ALTIVEC() uses <tt>vec_st</tt> if the effective address
+/// \details VecStore_ALTIVEC() uses <tt>vec_st</tt> if the effective address
 ///   of <tt>dest</tt> is aligned, and uses <tt>vec_ste</tt> otherwise.
 ///   <tt>vec_ste</tt> is relatively expensive so you should provide aligned
 ///   memory adresses.
-/// \details VectorStore_ALTIVEC() is used automatically when POWER7 or above
+/// \details VecStore_ALTIVEC() is used automatically when POWER7 or above
 ///   and unaligned loads is not available.
-/// \note VectorStore does not require an aligned array.
+/// \note VecStore does not require an aligned array.
 /// \since Crypto++ 8.0
 template<class T>
-inline void VectorStore_ALTIVEC(const T data, byte dest[16])
+inline void VecStore_ALTIVEC(const T data, byte dest[16])
 {
     if (IsAlignedOn(dest, 16))
     {
@@ -300,16 +300,16 @@ inline void VectorStore_ALTIVEC(const T data, byte dest[16])
 /// \param off the byte offset into the array
 /// \param dest the byte array
 /// \details Stores a vector in native endian format to a byte array.
-/// \details VectorStore_ALTIVEC() uses <tt>vec_st</tt> if the effective address
+/// \details VecStore_ALTIVEC() uses <tt>vec_st</tt> if the effective address
 ///   of <tt>dest</tt> is aligned, and uses <tt>vec_ste</tt> otherwise.
 ///   <tt>vec_ste</tt> is relatively expensive so you should provide aligned
 ///   memory adresses.
-/// \details VectorStore_ALTIVEC() is used automatically when POWER7 or above
+/// \details VecStore_ALTIVEC() is used automatically when POWER7 or above
 ///   and unaligned loads is not available.
-/// \note VectorStore does not require an aligned array.
+/// \note VecStore does not require an aligned array.
 /// \since Crypto++ 8.0
 template<class T>
-inline void VectorStore_ALTIVEC(const T data, int off, byte dest[16])
+inline void VecStore_ALTIVEC(const T data, int off, byte dest[16])
 {
     if (IsAlignedOn(dest, 16))
     {
@@ -335,15 +335,15 @@ inline void VectorStore_ALTIVEC(const T data, int off, byte dest[16])
 /// \param data the vector
 /// \param dest the byte array
 /// \details Stores a vector in native endian format to a byte array.
-/// \details VectorStore uses POWER7's <tt>vec_xst</tt> or
+/// \details VecStore uses POWER7's <tt>vec_xst</tt> or
 ///   <tt>vec_vsx_st</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorStore_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorStore_ALTIVEC() is relatively expensive.
-/// \note VectorStore does not require an aligned array.
+/// \details VecStore_ALTIVEC() is used if POWER7 or above
+///   is not available. VecStore_ALTIVEC() is relatively expensive.
+/// \note VecStore does not require an aligned array.
 /// \since Crypto++ 6.0
 template<class T>
-inline void VectorStore(const T data, byte dest[16])
+inline void VecStore(const T data, byte dest[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -352,7 +352,7 @@ inline void VectorStore(const T data, byte dest[16])
     vec_vsx_st((uint8x16_p)data, 0, (byte*)dest);
 #  endif
 #else
-    return VectorStore_ALTIVEC(data, 0, dest);
+    return VecStore_ALTIVEC(data, 0, dest);
 #endif
 }
 
@@ -362,15 +362,15 @@ inline void VectorStore(const T data, byte dest[16])
 /// \param off the byte offset into the array
 /// \param dest the byte array
 /// \details Stores a vector in native endian format to a byte array.
-/// \details VectorStore uses POWER7's <tt>vec_xst</tt> or
+/// \details VecStore uses POWER7's <tt>vec_xst</tt> or
 ///   <tt>vec_vsx_st</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorStore_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorStore_ALTIVEC() is relatively expensive.
-/// \note VectorStore does not require an aligned array.
+/// \details VecStore_ALTIVEC() is used if POWER7 or above
+///   is not available. VecStore_ALTIVEC() is relatively expensive.
+/// \note VecStore does not require an aligned array.
 /// \since Crypto++ 6.0
 template<class T>
-inline void VectorStore(const T data, int off, byte dest[16])
+inline void VecStore(const T data, int off, byte dest[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -379,7 +379,7 @@ inline void VectorStore(const T data, int off, byte dest[16])
     vec_vsx_st((uint8x16_p)data, off, (byte*)dest);
 #  endif
 #else
-    return VectorStore_ALTIVEC(data, off, dest);
+    return VecStore_ALTIVEC(data, off, dest);
 #endif
 }
 
@@ -388,17 +388,17 @@ inline void VectorStore(const T data, int off, byte dest[16])
 /// \param data the vector
 /// \param dest the byte array
 /// \details Stores a vector in native endian format to a byte array.
-/// \details VectorStore uses POWER7's <tt>vec_xst</tt> or
+/// \details VecStore uses POWER7's <tt>vec_xst</tt> or
 ///   <tt>vec_vsx_st</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorStore_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorStore_ALTIVEC() is relatively expensive.
-/// \note VectorStore does not require an aligned array.
+/// \details VecStore_ALTIVEC() is used if POWER7 or above
+///   is not available. VecStore_ALTIVEC() is relatively expensive.
+/// \note VecStore does not require an aligned array.
 /// \since Crypto++ 8.0
 template<class T>
-inline void VectorStore(const T data, word32 dest[4])
+inline void VecStore(const T data, word32 dest[4])
 {
-    VectorStore((uint8x16_p)data, 0, (byte*)dest);
+    VecStore((uint8x16_p)data, 0, (byte*)dest);
 }
 
 /// \brief Stores a vector to a word array
@@ -407,17 +407,17 @@ inline void VectorStore(const T data, word32 dest[4])
 /// \param off the byte offset into the array
 /// \param dest the byte array
 /// \details Stores a vector in native endian format to a byte array.
-/// \details VectorStore uses POWER7's <tt>vec_xst</tt> or
+/// \details VecStore uses POWER7's <tt>vec_xst</tt> or
 ///   <tt>vec_vsx_st</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorStore_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorStore_ALTIVEC() is relatively expensive.
-/// \note VectorStore does not require an aligned array.
+/// \details VecStore_ALTIVEC() is used if POWER7 or above
+///   is not available. VecStore_ALTIVEC() is relatively expensive.
+/// \note VecStore does not require an aligned array.
 /// \since Crypto++ 8.0
 template<class T>
-inline void VectorStore(const T data, int off, word32 dest[4])
+inline void VecStore(const T data, int off, word32 dest[4])
 {
-    VectorStore((uint8x16_p)data, off, (byte*)dest);
+    VecStore((uint8x16_p)data, off, (byte*)dest);
 }
 
 /// \brief Stores a vector to a byte array
@@ -425,16 +425,16 @@ inline void VectorStore(const T data, int off, word32 dest[4])
 /// \param src the vector
 /// \param dest the byte array
 /// \details Stores a vector in big endian format to a byte array.
-///   VectorStoreBE will swap all bytes on little endian systems.
-/// \details VectorStoreBE uses POWER7's <tt>vec_xst</tt> or
+///   VecStoreBE will swap all bytes on little endian systems.
+/// \details VecStoreBE uses POWER7's <tt>vec_xst</tt> or
 ///   <tt>vec_vsx_st</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorStore_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorStore_ALTIVEC() is relatively expensive.
-/// \note VectorStoreBE does not require an aligned array.
+/// \details VecStore_ALTIVEC() is used if POWER7 or above
+///   is not available. VecStore_ALTIVEC() is relatively expensive.
+/// \note VecStoreBE does not require an aligned array.
 /// \since Crypto++ 6.0
 template <class T>
-inline void VectorStoreBE(const T src, byte dest[16])
+inline void VecStoreBE(const T src, byte dest[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -443,14 +443,14 @@ inline void VectorStoreBE(const T src, byte dest[16])
 #    if (CRYPTOPP_BIG_ENDIAN)
        vec_vsx_st((uint8x16_p)src, 0, (byte*)dest);
 #    else
-       vec_vsx_st((uint8x16_p)Reverse(src), 0, (byte*)dest);
+       vec_vsx_st((uint8x16_p)VecReverse(src), 0, (byte*)dest);
 #    endif
 #  endif
 #else  // _ARCH_PWR7
 #  if (CRYPTOPP_BIG_ENDIAN)
-     VectorStore((uint8x16_p)src, (byte*)dest);
+     VecStore((uint8x16_p)src, (byte*)dest);
 #  else
-     VectorStore((uint8x16_p)Reverse(src), (byte*)dest);
+     VecStore((uint8x16_p)VecReverse(src), (byte*)dest);
 #  endif
 #endif  // _ARCH_PWR7
 }
@@ -461,16 +461,16 @@ inline void VectorStoreBE(const T src, byte dest[16])
 /// \param off offset into the dest byte array
 /// \param dest the byte array
 /// \details Stores a vector in big endian format to a byte array.
-///   VectorStoreBE will swap all bytes on little endian systems.
-/// \details VectorStoreBE uses POWER7's <tt>vec_xst</tt> or
+///   VecStoreBE will swap all bytes on little endian systems.
+/// \details VecStoreBE uses POWER7's <tt>vec_xst</tt> or
 ///   <tt>vec_vsx_st</tt> if available. The instructions do not require
 ///   an aligned memory address.
-/// \details VectorStore_ALTIVEC() is used if POWER7 or above
-///   is not available. VectorStore_ALTIVEC() is relatively expensive.
-/// \note VectorStoreBE does not require an aligned array.
+/// \details VecStore_ALTIVEC() is used if POWER7 or above
+///   is not available. VecStore_ALTIVEC() is relatively expensive.
+/// \note VecStoreBE does not require an aligned array.
 /// \since Crypto++ 6.0
 template <class T>
-inline void VectorStoreBE(const T src, int off, byte dest[16])
+inline void VecStoreBE(const T src, int off, byte dest[16])
 {
 #if defined(_ARCH_PWR7)
 #  if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
@@ -479,14 +479,14 @@ inline void VectorStoreBE(const T src, int off, byte dest[16])
 #    if (CRYPTOPP_BIG_ENDIAN)
        vec_vsx_st((uint8x16_p)src, off, (byte*)dest);
 #    else
-       vec_vsx_st((uint8x16_p)Reverse(src), off, (byte*)dest);
+       vec_vsx_st((uint8x16_p)VecReverse(src), off, (byte*)dest);
 #    endif
 #  endif
 #else  // _ARCH_PWR7
 #  if (CRYPTOPP_BIG_ENDIAN)
-     VectorStore((uint8x16_p)src, off, (byte*)dest);
+     VecStore((uint8x16_p)src, off, (byte*)dest);
 #  else
-     VectorStore((uint8x16_p)Reverse(src), off, (byte*)dest);
+     VecStore((uint8x16_p)VecReverse(src), off, (byte*)dest);
 #  endif
 #endif  // _ARCH_PWR7
 }
@@ -498,12 +498,12 @@ inline void VectorStoreBE(const T src, int off, byte dest[16])
 /// \param vec the vector
 /// \param mask vector mask
 /// \returns vector
-/// \details VectorPermute returns a new vector from vec based on
+/// \details VecPermute returns a new vector from vec based on
 ///   mask. mask is an uint8x16_p type vector. The return
 ///   vector is the same type as vec.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorPermute(const T1 vec, const T2 mask)
+inline T1 VecPermute(const T1 vec, const T2 mask)
 {
     return (T1)vec_perm(vec, vec, (uint8x16_p)mask);
 }
@@ -515,12 +515,12 @@ inline T1 VectorPermute(const T1 vec, const T2 mask)
 /// \param vec2 the second vector
 /// \param mask vector mask
 /// \returns vector
-/// \details VectorPermute returns a new vector from vec1 and vec2
+/// \details VecPermute returns a new vector from vec1 and vec2
 ///   based on mask. mask is an uint8x16_p type vector. The return
 ///   vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorPermute(const T1 vec1, const T1 vec2, const T2 mask)
+inline T1 VecPermute(const T1 vec1, const T1 vec2, const T2 mask)
 {
     return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
 }
@@ -531,11 +531,11 @@ inline T1 VectorPermute(const T1 vec1, const T1 vec2, const T2 mask)
 /// \param vec1 the first vector
 /// \param vec2 the second vector
 /// \returns vector
-/// \details VectorAnd returns a new vector from vec1 and vec2. The return
+/// \details VecAnd returns a new vector from vec1 and vec2. The return
 ///   vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorAnd(const T1 vec1, const T2 vec2)
+inline T1 VecAnd(const T1 vec1, const T2 vec2)
 {
     return (T1)vec_and(vec1, (T1)vec2);
 }
@@ -546,11 +546,11 @@ inline T1 VectorAnd(const T1 vec1, const T2 vec2)
 /// \param vec1 the first vector
 /// \param vec2 the second vector
 /// \returns vector
-/// \details VectorOr returns a new vector from vec1 and vec2. The return
+/// \details VecOr returns a new vector from vec1 and vec2. The return
 ///   vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorOr(const T1 vec1, const T2 vec2)
+inline T1 VecOr(const T1 vec1, const T2 vec2)
 {
     return (T1)vec_or(vec1, (T1)vec2);
 }
@@ -561,11 +561,11 @@ inline T1 VectorOr(const T1 vec1, const T2 vec2)
 /// \param vec1 the first vector
 /// \param vec2 the second vector
 /// \returns vector
-/// \details VectorXor returns a new vector from vec1 and vec2. The return
+/// \details VecXor returns a new vector from vec1 and vec2. The return
 ///   vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorXor(const T1 vec1, const T2 vec2)
+inline T1 VecXor(const T1 vec1, const T2 vec2)
 {
     return (T1)vec_xor(vec1, (T1)vec2);
 }
@@ -576,12 +576,12 @@ inline T1 VectorXor(const T1 vec1, const T2 vec2)
 /// \param vec1 the first vector
 /// \param vec2 the second vector
 /// \returns vector
-/// \details VectorAdd returns a new vector from vec1 and vec2.
+/// \details VecAdd returns a new vector from vec1 and vec2.
 ///   vec2 is cast to the same type as vec1. The return vector
 ///   is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorAdd(const T1 vec1, const T2 vec2)
+inline T1 VecAdd(const T1 vec1, const T2 vec2)
 {
     return (T1)vec_add(vec1, (T1)vec2);
 }
@@ -591,12 +591,12 @@ inline T1 VectorAdd(const T1 vec1, const T2 vec2)
 /// \tparam T2 vector type
 /// \param vec1 the first vector
 /// \param vec2 the second vector
-/// \details VectorSub returns a new vector from vec1 and vec2.
+/// \details VecSub returns a new vector from vec1 and vec2.
 ///   vec2 is cast to the same type as vec1. The return vector
 ///   is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorSub(const T1 vec1, const T2 vec2)
+inline T1 VecSub(const T1 vec1, const T2 vec2)
 {
     return (T1)vec_sub(vec1, (T1)vec2);
 }
@@ -607,10 +607,10 @@ inline T1 VectorSub(const T1 vec1, const T2 vec2)
 /// \param vec1 the first vector
 /// \param vec2 the second vector
 /// \returns vector
-/// \details VectorAdd64 returns a new vector from vec1 and vec2.
+/// \details VecAdd64 returns a new vector from vec1 and vec2.
 ///   vec1 and vec2 are added as uint64x2_p quantities.
 /// \since Crypto++ 8.0
-inline uint32x4_p VectorAdd64(const uint32x4_p& vec1, const uint32x4_p& vec2)
+inline uint32x4_p VecAdd64(const uint32x4_p& vec1, const uint32x4_p& vec2)
 {
 #if defined(_ARCH_PWR8)
     return (uint32x4_p)vec_add((uint64x2_p)vec1, (uint64x2_p)vec2);
@@ -632,22 +632,22 @@ inline uint32x4_p VectorAdd64(const uint32x4_p& vec1, const uint32x4_p& vec2)
 /// \tparam T vector type
 /// \param vec the vector
 /// \returns vector
-/// \details VectorShiftLeftOctet() returns a new vector after shifting the
+/// \details VecShiftLeftOctet() returns a new vector after shifting the
 ///   concatenation of the zero vector and the source vector by the specified
 ///   number of bytes. The return vector is the same type as vec.
-/// \details On big endian machines VectorShiftLeftOctet() is <tt>vec_sld(a, z,
-///   c)</tt>. On little endian machines VectorShiftLeftOctet() is translated to
+/// \details On big endian machines VecShiftLeftOctet() is <tt>vec_sld(a, z,
+///   c)</tt>. On little endian machines VecShiftLeftOctet() is translated to
 ///   <tt>vec_sld(z, a, 16-c)</tt>. You should always call the function as
 ///   if on a big endian machine as shown below.
 /// <pre>
-///    uint8x16_p x = VectorLoad(ptr);
-///    uint8x16_p y = VectorShiftLeftOctet<12>(x);
+///    uint8x16_p x = VecLoad(ptr);
+///    uint8x16_p y = VecShiftLeftOctet<12>(x);
 /// </pre>
 /// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
 ///   endian sensitive?</A> on Stack Overflow
 /// \since Crypto++ 6.0
 template <unsigned int C, class T>
-inline T VectorShiftLeftOctet(const T vec)
+inline T VecShiftLeftOctet(const T vec)
 {
     const T zero = {0};
     if (C >= 16)
@@ -675,22 +675,22 @@ inline T VectorShiftLeftOctet(const T vec)
 /// \tparam T vector type
 /// \param vec the vector
 /// \returns vector
-/// \details VectorShiftRightOctet() returns a new vector after shifting the
+/// \details VecShiftRightOctet() returns a new vector after shifting the
 ///   concatenation of the zero vector and the source vector by the specified
 ///   number of bytes. The return vector is the same type as vec.
-/// \details On big endian machines VectorShiftRightOctet() is <tt>vec_sld(a, z,
-///   c)</tt>. On little endian machines VectorShiftRightOctet() is translated to
+/// \details On big endian machines VecShiftRightOctet() is <tt>vec_sld(a, z,
+///   c)</tt>. On little endian machines VecShiftRightOctet() is translated to
 ///   <tt>vec_sld(z, a, 16-c)</tt>. You should always call the function as
 ///   if on a big endian machine as shown below.
 /// <pre>
-///    uint8x16_p x = VectorLoad(ptr);
-///    uint8x16_p y = VectorShiftRightOctet<12>(y);
+///    uint8x16_p x = VecLoad(ptr);
+///    uint8x16_p y = VecShiftRightOctet<12>(y);
 /// </pre>
 /// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
 ///   endian sensitive?</A> on Stack Overflow
 /// \since Crypto++ 6.0
 template <unsigned int C, class T>
-inline T VectorShiftRightOctet(const T vec)
+inline T VecShiftRightOctet(const T vec)
 {
     const T zero = {0};
     if (C >= 16)
@@ -718,14 +718,14 @@ inline T VectorShiftRightOctet(const T vec)
 /// \tparam T vector type
 /// \param vec the vector
 /// \returns vector
-/// \details VectorRotateLeftOctet() returns a new vector after rotating the
+/// \details VecRotateLeftOctet() returns a new vector after rotating the
 ///   concatenation of the source vector with itself by the specified
 ///   number of bytes. The return vector is the same type as vec.
 /// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
 ///   endian sensitive?</A> on Stack Overflow
 /// \since Crypto++ 6.0
 template <unsigned int C, class T>
-inline T VectorRotateLeftOctet(const T vec)
+inline T VecRotateLeftOctet(const T vec)
 {
     enum { R = C&0xf };
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -740,14 +740,14 @@ inline T VectorRotateLeftOctet(const T vec)
 /// \tparam T vector type
 /// \param vec the vector
 /// \returns vector
-/// \details VectorRotateRightOctet() returns a new vector after rotating the
+/// \details VecRotateRightOctet() returns a new vector after rotating the
 ///   concatenation of the source vector with itself by the specified
 ///   number of bytes. The return vector is the same type as vec.
 /// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
 ///   endian sensitive?</A> on Stack Overflow
 /// \since Crypto++ 6.0
 template <unsigned int C, class T>
-inline T VectorRotateRightOctet(const T vec)
+inline T VecRotateRightOctet(const T vec)
 {
     enum { R = C&0xf };
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -761,9 +761,9 @@ inline T VectorRotateRightOctet(const T vec)
 /// \tparam C shift bit count
 /// \param vec the vector
 /// \returns vector
-/// \details VectorRotateLeft rotates each element in a packed vector by bit count.
+/// \details VecRotateLeft rotates each element in a packed vector by bit count.
 template<unsigned int C>
-inline uint32x4_p VectorRotateLeft(const uint32x4_p vec)
+inline uint32x4_p VecRotateLeft(const uint32x4_p vec)
 {
     const uint32x4_p m = {C, C, C, C};
     return vec_rl(vec, m);
@@ -773,9 +773,9 @@ inline uint32x4_p VectorRotateLeft(const uint32x4_p vec)
 /// \tparam C shift bit count
 /// \param vec the vector
 /// \returns vector
-/// \details VectorRotateRight rotates each element in a packed vector by bit count.
+/// \details VecRotateRight rotates each element in a packed vector by bit count.
 template<unsigned int C>
-inline uint32x4_p VectorRotateRight(const uint32x4_p vec)
+inline uint32x4_p VecRotateRight(const uint32x4_p vec)
 {
     const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
     return vec_rl(vec, m);
@@ -787,7 +787,7 @@ inline uint32x4_p VectorRotateRight(const uint32x4_p vec)
 /// \returns vector
 /// \since Crypto++ 7.0
 template <class T>
-inline T VectorSwapWords(const T vec)
+inline T VecSwapWords(const T vec)
 {
     return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)vec, 8);
 }
@@ -796,34 +796,34 @@ inline T VectorSwapWords(const T vec)
 /// \tparam T vector type
 /// \param val the vector
 /// \returns vector created from low dword
-/// \details VectorGetLow() extracts the low dword from a vector. The low dword
+/// \details VecGetLow() extracts the low dword from a vector. The low dword
 ///   is composed of the least significant bits and occupies bytes 8 through 15
 ///   when viewed as a big endian array. The return vector is the same type as
 ///   the original vector and padded with 0's in the most significant bit positions.
 template <class T>
-inline T VectorGetLow(const T val)
+inline T VecGetLow(const T val)
 {
     //const T zero = {0};
     //const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 8,9,10,11, 12,13,14,15 };
     //return (T)vec_perm(zero, val, mask);
-    return VectorShiftRightOctet<8>(VectorShiftLeftOctet<8>(val));
+    return VecShiftRightOctet<8>(VecShiftLeftOctet<8>(val));
 }
 
 /// \brief Extract a dword from a vector
 /// \tparam T vector type
 /// \param val the vector
 /// \returns vector created from high dword
-/// \details VectorGetHigh() extracts the high dword from a vector. The high dword
+/// \details VecGetHigh() extracts the high dword from a vector. The high dword
 ///   is composed of the most significant bits and occupies bytes 0 through 7
 ///   when viewed as a big endian array. The return vector is the same type as
 ///   the original vector and padded with 0's in the most significant bit positions.
 template <class T>
-inline T VectorGetHigh(const T val)
+inline T VecGetHigh(const T val)
 {
     //const T zero = {0};
     //const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 0,1,2,3, 4,5,6,7 };
     //return (T)vec_perm(zero, val, mask);
-    return VectorShiftRightOctet<8>(val);
+    return VecShiftRightOctet<8>(val);
 }
 
 /// \brief Compare two vectors
@@ -833,7 +833,7 @@ inline T VectorGetHigh(const T val)
 /// \param vec2 the second vector
 /// \returns true if vec1 equals vec2, false otherwise
 template <class T1, class T2>
-inline bool VectorEqual(const T1 vec1, const T2 vec2)
+inline bool VecEqual(const T1 vec1, const T2 vec2)
 {
     return 1 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2);
 }
@@ -845,7 +845,7 @@ inline bool VectorEqual(const T1 vec1, const T2 vec2)
 /// \param vec2 the second vector
 /// \returns true if vec1 does not equal vec2, false otherwise
 template <class T1, class T2>
-inline bool VectorNotEqual(const T1 vec1, const T2 vec2)
+inline bool VecNotEqual(const T1 vec1, const T2 vec2)
 {
     return 0 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2);
 }
@@ -859,11 +859,11 @@ inline bool VectorNotEqual(const T1 vec1, const T2 vec2)
 /// \tparam T2 vector type
 /// \param state the state vector
 /// \param key the subkey vector
-/// \details VectorEncrypt performs one round of AES encryption of state
+/// \details VecEncrypt performs one round of AES encryption of state
 ///   using subkey key. The return vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorEncrypt(const T1 state, const T2 key)
+inline T1 VecEncrypt(const T1 state, const T2 key)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (T1)__vcipher((uint8x16_p)state, (uint8x16_p)key);
@@ -879,11 +879,11 @@ inline T1 VectorEncrypt(const T1 state, const T2 key)
 /// \tparam T2 vector type
 /// \param state the state vector
 /// \param key the subkey vector
-/// \details VectorEncryptLast performs the final round of AES encryption
+/// \details VecEncryptLast performs the final round of AES encryption
 ///   of state using subkey key. The return vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorEncryptLast(const T1 state, const T2 key)
+inline T1 VecEncryptLast(const T1 state, const T2 key)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (T1)__vcipherlast((uint8x16_p)state, (uint8x16_p)key);
@@ -899,11 +899,11 @@ inline T1 VectorEncryptLast(const T1 state, const T2 key)
 /// \tparam T2 vector type
 /// \param state the state vector
 /// \param key the subkey vector
-/// \details VectorDecrypt performs one round of AES decryption of state
+/// \details VecDecrypt performs one round of AES decryption of state
 ///   using subkey key. The return vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorDecrypt(const T1 state, const T2 key)
+inline T1 VecDecrypt(const T1 state, const T2 key)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (T1)__vncipher((uint8x16_p)state, (uint8x16_p)key);
@@ -919,11 +919,11 @@ inline T1 VectorDecrypt(const T1 state, const T2 key)
 /// \tparam T2 vector type
 /// \param state the state vector
 /// \param key the subkey vector
-/// \details VectorDecryptLast performs the final round of AES decryption
+/// \details VecDecryptLast performs the final round of AES decryption
 ///   of state using subkey key. The return vector is the same type as vec1.
 /// \since Crypto++ 6.0
 template <class T1, class T2>
-inline T1 VectorDecryptLast(const T1 state, const T2 key)
+inline T1 VecDecryptLast(const T1 state, const T2 key)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (T1)__vncipherlast((uint8x16_p)state, (uint8x16_p)key);
@@ -939,11 +939,11 @@ inline T1 VectorDecryptLast(const T1 state, const T2 key)
 /// \tparam subfunc sub-function
 /// \tparam T vector type
 /// \param vec the block to transform
-/// \details VectorSHA256 selects sigma0, sigma1, Sigma0, Sigma1 based on
+/// \details VecSHA256 selects sigma0, sigma1, Sigma0, Sigma1 based on
 ///   func and subfunc. The return vector is the same type as vec.
 /// \since Crypto++ 6.0
 template <int func, int subfunc, class T>
-inline T VectorSHA256(const T vec)
+inline T VecSHA256(const T vec)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (T)__vshasigmaw((uint32x4_p)vec, func, subfunc);
@@ -959,11 +959,11 @@ inline T VectorSHA256(const T vec)
 /// \tparam subfunc sub-function
 /// \tparam T vector type
 /// \param vec the block to transform
-/// \details VectorSHA512 selects sigma0, sigma1, Sigma0, Sigma1 based on
+/// \details VecSHA512 selects sigma0, sigma1, Sigma0, Sigma1 based on
 ///   func and subfunc. The return vector is the same type as vec.
 /// \since Crypto++ 6.0
 template <int func, int subfunc, class T>
-inline T VectorSHA512(const T vec)
+inline T VecSHA512(const T vec)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (T)__vshasigmad((uint64x2_p)vec, func, subfunc);
diff --git a/rijndael_simd.cpp b/rijndael_simd.cpp
index 3d122d70..f4c9a288 100644
--- a/rijndael_simd.cpp
+++ b/rijndael_simd.cpp
@@ -214,12 +214,12 @@ bool CPU_ProbePower8()
         #if defined(__xlc__) || defined(__xlC__)
         const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
         const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
-        const uint64x2_p v3 = vec_add(v1, v2);  // 64-bit add
+        const uint64x2_p v3 = VecAdd(v1, v2);  // 64-bit add
         vec_xst((uint8x16_p)v3, 0, (byte*)w3);
         #else
         const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
         const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
-        const uint64x2_p v3 = vec_add(v1, v2);  // 64-bit add
+        const uint64x2_p v3 = VecAdd(v1, v2);  // 64-bit add
         vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
         #endif
 
@@ -265,13 +265,13 @@ bool CPU_ProbeAES()
                           0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
         byte r[16] = {255}, z[16] = {};
 
-        uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
-        uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
-        s = VectorEncrypt(s, k);
-        s = VectorEncryptLast(s, k);
-        s = VectorDecrypt(s, k);
-        s = VectorDecryptLast(s, k);
-        VectorStore(s, r);
+        uint8x16_p k = (uint8x16_p)VecLoad(0, key);
+        uint8x16_p s = (uint8x16_p)VecLoad(0, state);
+        s = VecEncrypt(s, k);
+        s = VecEncryptLast(s, k);
+        s = VecDecrypt(s, k);
+        s = VecDecryptLast(s, k);
+        VecStore(s, r);
 
         result = (0 != std::memcmp(r, z, 16));
     }
@@ -697,17 +697,17 @@ static inline void POWER8_Enc_Block(uint32x4_p &block, const word32 *subkeys, un
     CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
     const byte *keys = reinterpret_cast<const byte*>(subkeys);
 
-    uint32x4_p k = VectorLoad(keys);
-    block = VectorXor(block, k);
+    uint32x4_p k = VecLoad(keys);
+    block = VecXor(block, k);
 
     for (size_t i=1; i<rounds-1; i+=2)
     {
-        block = VectorEncrypt(block, VectorLoad(  i*16,   keys));
-        block = VectorEncrypt(block, VectorLoad((i+1)*16, keys));
+        block = VecEncrypt(block, VecLoad(  i*16,   keys));
+        block = VecEncrypt(block, VecLoad((i+1)*16, keys));
     }
 
-    block = VectorEncrypt(block, VectorLoad((rounds-1)*16, keys));
-    block = VectorEncryptLast(block, VectorLoad(rounds*16, keys));
+    block = VecEncrypt(block, VecLoad((rounds-1)*16, keys));
+    block = VecEncryptLast(block, VecLoad(rounds*16, keys));
 }
 
 static inline void POWER8_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -717,32 +717,32 @@ static inline void POWER8_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
     CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
     const byte *keys = reinterpret_cast<const byte*>(subkeys);
 
-    uint32x4_p k = VectorLoad(keys);
-    block0 = VectorXor(block0, k);
-    block1 = VectorXor(block1, k);
-    block2 = VectorXor(block2, k);
-    block3 = VectorXor(block3, k);
-    block4 = VectorXor(block4, k);
-    block5 = VectorXor(block5, k);
+    uint32x4_p k = VecLoad(keys);
+    block0 = VecXor(block0, k);
+    block1 = VecXor(block1, k);
+    block2 = VecXor(block2, k);
+    block3 = VecXor(block3, k);
+    block4 = VecXor(block4, k);
+    block5 = VecXor(block5, k);
 
     for (size_t i=1; i<rounds; ++i)
     {
-        k = VectorLoad(i*16, keys);
-        block0 = VectorEncrypt(block0, k);
-        block1 = VectorEncrypt(block1, k);
-        block2 = VectorEncrypt(block2, k);
-        block3 = VectorEncrypt(block3, k);
-        block4 = VectorEncrypt(block4, k);
-        block5 = VectorEncrypt(block5, k);
+        k = VecLoad(i*16, keys);
+        block0 = VecEncrypt(block0, k);
+        block1 = VecEncrypt(block1, k);
+        block2 = VecEncrypt(block2, k);
+        block3 = VecEncrypt(block3, k);
+        block4 = VecEncrypt(block4, k);
+        block5 = VecEncrypt(block5, k);
     }
 
-    k = VectorLoad(rounds*16, keys);
-    block0 = VectorEncryptLast(block0, k);
-    block1 = VectorEncryptLast(block1, k);
-    block2 = VectorEncryptLast(block2, k);
-    block3 = VectorEncryptLast(block3, k);
-    block4 = VectorEncryptLast(block4, k);
-    block5 = VectorEncryptLast(block5, k);
+    k = VecLoad(rounds*16, keys);
+    block0 = VecEncryptLast(block0, k);
+    block1 = VecEncryptLast(block1, k);
+    block2 = VecEncryptLast(block2, k);
+    block3 = VecEncryptLast(block3, k);
+    block4 = VecEncryptLast(block4, k);
+    block5 = VecEncryptLast(block5, k);
 }
 
 static inline void POWER8_Dec_Block(uint32x4_p &block, const word32 *subkeys, unsigned int rounds)
@@ -750,17 +750,17 @@ static inline void POWER8_Dec_Block(uint32x4_p &block, const word32 *subkeys, un
     CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
     const byte *keys = reinterpret_cast<const byte*>(subkeys);
 
-    uint32x4_p k = VectorLoad(rounds*16, keys);
-    block = VectorXor(block, k);
+    uint32x4_p k = VecLoad(rounds*16, keys);
+    block = VecXor(block, k);
 
     for (size_t i=rounds-1; i>1; i-=2)
     {
-        block = VectorDecrypt(block, VectorLoad(  i*16,   keys));
-        block = VectorDecrypt(block, VectorLoad((i-1)*16, keys));
+        block = VecDecrypt(block, VecLoad(  i*16,   keys));
+        block = VecDecrypt(block, VecLoad((i-1)*16, keys));
     }
 
-    block = VectorDecrypt(block, VectorLoad(16, keys));
-    block = VectorDecryptLast(block, VectorLoad(0, keys));
+    block = VecDecrypt(block, VecLoad(16, keys));
+    block = VecDecryptLast(block, VecLoad(0, keys));
 }
 
 static inline void POWER8_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -770,32 +770,32 @@ static inline void POWER8_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
     CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
     const byte *keys = reinterpret_cast<const byte*>(subkeys);
 
-    uint32x4_p k = VectorLoad(rounds*16, keys);
-    block0 = VectorXor(block0, k);
-    block1 = VectorXor(block1, k);
-    block2 = VectorXor(block2, k);
-    block3 = VectorXor(block3, k);
-    block4 = VectorXor(block4, k);
-    block5 = VectorXor(block5, k);
+    uint32x4_p k = VecLoad(rounds*16, keys);
+    block0 = VecXor(block0, k);
+    block1 = VecXor(block1, k);
+    block2 = VecXor(block2, k);
+    block3 = VecXor(block3, k);
+    block4 = VecXor(block4, k);
+    block5 = VecXor(block5, k);
 
     for (size_t i=rounds-1; i>0; --i)
     {
-        k = VectorLoad(i*16, keys);
-        block0 = VectorDecrypt(block0, k);
-        block1 = VectorDecrypt(block1, k);
-        block2 = VectorDecrypt(block2, k);
-        block3 = VectorDecrypt(block3, k);
-        block4 = VectorDecrypt(block4, k);
-        block5 = VectorDecrypt(block5, k);
+        k = VecLoad(i*16, keys);
+        block0 = VecDecrypt(block0, k);
+        block1 = VecDecrypt(block1, k);
+        block2 = VecDecrypt(block2, k);
+        block3 = VecDecrypt(block3, k);
+        block4 = VecDecrypt(block4, k);
+        block5 = VecDecrypt(block5, k);
     }
 
-    k = VectorLoad(0, keys);
-    block0 = VectorDecryptLast(block0, k);
-    block1 = VectorDecryptLast(block1, k);
-    block2 = VectorDecryptLast(block2, k);
-    block3 = VectorDecryptLast(block3, k);
-    block4 = VectorDecryptLast(block4, k);
-    block5 = VectorDecryptLast(block5, k);
+    k = VecLoad(0, keys);
+    block0 = VecDecryptLast(block0, k);
+    block1 = VecDecryptLast(block1, k);
+    block2 = VecDecryptLast(block2, k);
+    block3 = VecDecryptLast(block3, k);
+    block4 = VecDecryptLast(block4, k);
+    block5 = VecDecryptLast(block5, k);
 }
 
 ANONYMOUS_NAMESPACE_END
@@ -851,14 +851,14 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
     {
         const uint8x16_p d1 = vec_vsx_ld( 0, (uint8_t*)rkey);
         const uint8x16_p d2 = vec_vsx_ld(16, (uint8_t*)rkey);
-        vec_vsx_st(vec_perm(d1, zero, mask),  0, (uint8_t*)rkey);
-        vec_vsx_st(vec_perm(d2, zero, mask), 16, (uint8_t*)rkey);
+        vec_vsx_st(VecPermute(d1, zero, mask),  0, (uint8_t*)rkey);
+        vec_vsx_st(VecPermute(d2, zero, mask), 16, (uint8_t*)rkey);
     }
 
     for ( ; i<rounds+1; i++, rkey+=4)
     {
         const uint8x16_p d = vec_vsx_ld( 0, (uint8_t*)rkey);
-        vec_vsx_st(vec_perm(d, zero, mask),  0, (uint8_t*)rkey);
+        vec_vsx_st(VecPermute(d, zero, mask),  0, (uint8_t*)rkey);
     }
 #endif
 }
diff --git a/sha_simd.cpp b/sha_simd.cpp
index 9e1d57ec..1a7214a9 100644
--- a/sha_simd.cpp
+++ b/sha_simd.cpp
@@ -224,11 +224,11 @@ bool CPU_ProbeSHA256()
         byte r[16], z[16] = {0};
         uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
 
-        x = VectorSHA256<0,0>(x);
-        x = VectorSHA256<0,1>(x);
-        x = VectorSHA256<1,0>(x);
-        x = VectorSHA256<1,1>(x);
-        VectorStore(x, r);
+        x = VecSHA256<0,0>(x);
+        x = VecSHA256<0,1>(x);
+        x = VecSHA256<1,0>(x);
+        x = VecSHA256<1,1>(x);
+        VecStore(x, r);
 
         result = (0 == std::memcmp(r, z, 16));
     }
@@ -268,11 +268,11 @@ bool CPU_ProbeSHA512()
         byte r[16], z[16] = {0};
         uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
 
-        x = VectorSHA512<0,0>(x);
-        x = VectorSHA512<0,1>(x);
-        x = VectorSHA512<1,0>(x);
-        x = VectorSHA512<1,1>(x);
-        VectorStore(x, r);
+        x = VecSHA512<0,0>(x);
+        x = VecSHA512<0,1>(x);
+        x = VecSHA512<1,0>(x);
+        x = VecSHA512<1,1>(x);
+        VecStore(x, r);
 
         result = (0 == std::memcmp(r, z, 16));
     }
@@ -1091,7 +1091,7 @@ typedef __vector unsigned long long uint64x2_p8;
 
 // Unaligned load
 template <class T> static inline
-uint32x4_p8 VectorLoad32x4u(const T* data, int offset)
+uint32x4_p8 VecLoad32x4u(const T* data, int offset)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (uint32x4_p8)vec_xl(offset, (uint8_t*)data);
@@ -1102,7 +1102,7 @@ uint32x4_p8 VectorLoad32x4u(const T* data, int offset)
 
 // Unaligned store
 template <class T> static inline
-void VectorStore32x4u(const uint32x4_p8 val, T* data, int offset)
+void VecStore32x4u(const uint32x4_p8 val, T* data, int offset)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     vec_xst((uint8x16_p8)val, offset, (uint8_t*)data);
@@ -1114,14 +1114,14 @@ void VectorStore32x4u(const uint32x4_p8 val, T* data, int offset)
 // Unaligned load of a user message. The load is big-endian,
 //   and then the message is permuted for 32-bit words.
 template <class T> static inline
-uint32x4_p8 VectorLoadMsg32x4(const T* data, int offset)
+uint32x4_p8 VecLoadMsg32x4(const T* data, int offset)
 {
 #if (CRYPTOPP_LITTLE_ENDIAN)
     const uint8x16_p8 mask = {3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12};
-    const uint32x4_p8 r = VectorLoad32x4u(data, offset);
-    return (uint32x4_p8)vec_perm(r, r, mask);
+    const uint32x4_p8 r = VecLoad32x4u(data, offset);
+    return (uint32x4_p8)VecPermute(r, r, mask);
 #else
-    return VectorLoad32x4u(data, offset);
+    return VecLoad32x4u(data, offset);
 #endif
 }
 
@@ -1136,7 +1136,7 @@ static inline
 uint32x4_p8 VectorMaj(const uint32x4_p8 x, const uint32x4_p8 y, const uint32x4_p8 z)
 {
     // The trick below is due to Andy Polyakov and Jack Lloyd
-    return vec_sel(y, z, vec_xor(x, y));
+    return vec_sel(y, z, VecXor(x, y));
 }
 
 static inline
@@ -1185,7 +1185,7 @@ uint32x4_p8 VectorPack(const uint32x4_p8 a, const uint32x4_p8 b,
 {
     const uint8x16_p8 m1 = {0,1,2,3, 16,17,18,19, 0,0,0,0, 0,0,0,0};
     const uint8x16_p8 m2 = {0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
-    return vec_perm(vec_perm(a,b,m1), vec_perm(c,d,m1), m2);
+    return VecPermute(VecPermute(a,b,m1), VecPermute(c,d,m1), m2);
 }
 
 template <unsigned int R> static inline
@@ -1231,8 +1231,8 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
     const uint32_t* k = reinterpret_cast<const uint32_t*>(SHA256_K);
     const uint32_t* m = reinterpret_cast<const uint32_t*>(data);
 
-    uint32x4_p8 abcd = VectorLoad32x4u(state+0, 0);
-    uint32x4_p8 efgh = VectorLoad32x4u(state+4, 0);
+    uint32x4_p8 abcd = VecLoad32x4u(state+0, 0);
+    uint32x4_p8 efgh = VecLoad32x4u(state+4, 0);
     uint32x4_p8 W[16], S[8], vm, vk;
 
     size_t blocks = length / SHA256::BLOCKSIZE;
@@ -1241,80 +1241,80 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
         unsigned int offset=0;
 
         S[A] = abcd; S[E] = efgh;
-        S[B] = VectorShiftLeftOctet<4>(S[A]);
-        S[F] = VectorShiftLeftOctet<4>(S[E]);
-        S[C] = VectorShiftLeftOctet<4>(S[B]);
-        S[G] = VectorShiftLeftOctet<4>(S[F]);
-        S[D] = VectorShiftLeftOctet<4>(S[C]);
-        S[H] = VectorShiftLeftOctet<4>(S[G]);
+        S[B] = VecShiftLeftOctet<4>(S[A]);
+        S[F] = VecShiftLeftOctet<4>(S[E]);
+        S[C] = VecShiftLeftOctet<4>(S[B]);
+        S[G] = VecShiftLeftOctet<4>(S[F]);
+        S[D] = VecShiftLeftOctet<4>(S[C]);
+        S[H] = VecShiftLeftOctet<4>(S[G]);
 
         // Rounds 0-16
-        vk = VectorLoad32x4u(k, offset);
-        vm = VectorLoadMsg32x4(m, offset);
+        vk = VecLoad32x4u(k, offset);
+        vm = VecLoadMsg32x4(m, offset);
         SHA256_ROUND1<0>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<1>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<2>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<3>(W,S, vk,vm);
 
-        vk = VectorLoad32x4u(k, offset);
-        vm = VectorLoadMsg32x4(m, offset);
+        vk = VecLoad32x4u(k, offset);
+        vm = VecLoadMsg32x4(m, offset);
         SHA256_ROUND1<4>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<5>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<6>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<7>(W,S, vk,vm);
 
-        vk = VectorLoad32x4u(k, offset);
-        vm = VectorLoadMsg32x4(m, offset);
+        vk = VecLoad32x4u(k, offset);
+        vm = VecLoadMsg32x4(m, offset);
         SHA256_ROUND1<8>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<9>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<10>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<11>(W,S, vk,vm);
 
-        vk = VectorLoad32x4u(k, offset);
-        vm = VectorLoadMsg32x4(m, offset);
+        vk = VecLoad32x4u(k, offset);
+        vm = VecLoadMsg32x4(m, offset);
         SHA256_ROUND1<12>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<13>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<14>(W,S, vk,vm);
 
-        vk = VectorShiftLeftOctet<4>(vk);
-        vm = VectorShiftLeftOctet<4>(vm);
+        vk = VecShiftLeftOctet<4>(vk);
+        vm = VecShiftLeftOctet<4>(vm);
         SHA256_ROUND1<15>(W,S, vk,vm);
 
         m += 16; // 32-bit words, not bytes
@@ -1322,32 +1322,32 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
         // Rounds 16-64
         for (unsigned int i=16; i<64; i+=16)
         {
-            vk = VectorLoad32x4u(k, offset);
+            vk = VecLoad32x4u(k, offset);
             SHA256_ROUND2<0>(W,S, vk);
-            SHA256_ROUND2<1>(W,S, VectorShiftLeftOctet<4>(vk));
-            SHA256_ROUND2<2>(W,S, VectorShiftLeftOctet<8>(vk));
-            SHA256_ROUND2<3>(W,S, VectorShiftLeftOctet<12>(vk));
+            SHA256_ROUND2<1>(W,S, VecShiftLeftOctet<4>(vk));
+            SHA256_ROUND2<2>(W,S, VecShiftLeftOctet<8>(vk));
+            SHA256_ROUND2<3>(W,S, VecShiftLeftOctet<12>(vk));
             offset+=16;
 
-            vk = VectorLoad32x4u(k, offset);
+            vk = VecLoad32x4u(k, offset);
             SHA256_ROUND2<4>(W,S, vk);
-            SHA256_ROUND2<5>(W,S, VectorShiftLeftOctet<4>(vk));
-            SHA256_ROUND2<6>(W,S, VectorShiftLeftOctet<8>(vk));
-            SHA256_ROUND2<7>(W,S, VectorShiftLeftOctet<12>(vk));
+            SHA256_ROUND2<5>(W,S, VecShiftLeftOctet<4>(vk));
+            SHA256_ROUND2<6>(W,S, VecShiftLeftOctet<8>(vk));
+            SHA256_ROUND2<7>(W,S, VecShiftLeftOctet<12>(vk));
             offset+=16;
 
-            vk = VectorLoad32x4u(k, offset);
+            vk = VecLoad32x4u(k, offset);
             SHA256_ROUND2<8>(W,S, vk);
-            SHA256_ROUND2<9>(W,S, VectorShiftLeftOctet<4>(vk));
-            SHA256_ROUND2<10>(W,S, VectorShiftLeftOctet<8>(vk));
-            SHA256_ROUND2<11>(W,S, VectorShiftLeftOctet<12>(vk));
+            SHA256_ROUND2<9>(W,S, VecShiftLeftOctet<4>(vk));
+            SHA256_ROUND2<10>(W,S, VecShiftLeftOctet<8>(vk));
+            SHA256_ROUND2<11>(W,S, VecShiftLeftOctet<12>(vk));
             offset+=16;
 
-            vk = VectorLoad32x4u(k, offset);
+            vk = VecLoad32x4u(k, offset);
             SHA256_ROUND2<12>(W,S, vk);
-            SHA256_ROUND2<13>(W,S, VectorShiftLeftOctet<4>(vk));
-            SHA256_ROUND2<14>(W,S, VectorShiftLeftOctet<8>(vk));
-            SHA256_ROUND2<15>(W,S, VectorShiftLeftOctet<12>(vk));
+            SHA256_ROUND2<13>(W,S, VecShiftLeftOctet<4>(vk));
+            SHA256_ROUND2<14>(W,S, VecShiftLeftOctet<8>(vk));
+            SHA256_ROUND2<15>(W,S, VecShiftLeftOctet<12>(vk));
             offset+=16;
         }
 
@@ -1355,19 +1355,19 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
         efgh += VectorPack(S[E],S[F],S[G],S[H]);
     }
 
-    VectorStore32x4u(abcd, state+0, 0);
-    VectorStore32x4u(efgh, state+4, 0);
+    VecStore32x4u(abcd, state+0, 0);
+    VecStore32x4u(efgh, state+4, 0);
 }
 
 static inline
-uint64x2_p8 VectorPermute64x2(const uint64x2_p8 val, const uint8x16_p8 mask)
+uint64x2_p8 VecPermute64x2(const uint64x2_p8 val, const uint8x16_p8 mask)
 {
-    return (uint64x2_p8)vec_perm(val, val, mask);
+    return (uint64x2_p8)VecPermute(val, val, mask);
 }
 
 // Unaligned load
 template <class T> static inline
-uint64x2_p8 VectorLoad64x2u(const T* data, int offset)
+uint64x2_p8 VecLoad64x2u(const T* data, int offset)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     return (uint64x2_p8)vec_xl(offset, (uint8_t*)data);
@@ -1378,7 +1378,7 @@ uint64x2_p8 VectorLoad64x2u(const T* data, int offset)
 
 // Unaligned store
 template <class T> static inline
-void VectorStore64x2u(const uint64x2_p8 val, T* data, int offset)
+void VecStore64x2u(const uint64x2_p8 val, T* data, int offset)
 {
 #if defined(__xlc__) || defined(__xlC__) || defined(__clang__)
     vec_xst((uint8x16_p8)val, offset, (uint8_t*)data);
@@ -1390,13 +1390,13 @@ void VectorStore64x2u(const uint64x2_p8 val, T* data, int offset)
 // Unaligned load of a user message. The load is big-endian,
 //   and then the message is permuted for 32-bit words.
 template <class T> static inline
-uint64x2_p8 VectorLoadMsg64x2(const T* data, int offset)
+uint64x2_p8 VecLoadMsg64x2(const T* data, int offset)
 {
 #if (CRYPTOPP_LITTLE_ENDIAN)
     const uint8x16_p8 mask = {0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15};
-    return VectorPermute64x2(VectorLoad64x2u(data, offset), mask);
+    return VecPermute64x2(VecLoad64x2u(data, offset), mask);
 #else
-    return VectorLoad64x2u(data, offset);
+    return VecLoad64x2u(data, offset);
 #endif
 }
 
@@ -1411,7 +1411,7 @@ static inline
 uint64x2_p8 VectorMaj(const uint64x2_p8 x, const uint64x2_p8 y, const uint64x2_p8 z)
 {
     // The trick below is due to Andy Polyakov and Jack Lloyd
-    return vec_sel(y, z, vec_xor(x, y));
+    return vec_sel(y, z, VecXor(x, y));
 }
 
 static inline
@@ -1458,7 +1458,7 @@ static inline
 uint64x2_p8 VectorPack(const uint64x2_p8 x, const uint64x2_p8 y)
 {
     const uint8x16_p8 m = {0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
-    return vec_perm(x,y,m);
+    return VecPermute(x,y,m);
 }
 
 template <unsigned int R> static inline
@@ -1504,10 +1504,10 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
     const uint64_t* k = reinterpret_cast<const uint64_t*>(SHA512_K);
     const uint64_t* m = reinterpret_cast<const uint64_t*>(data);
 
-    uint64x2_p8 ab = VectorLoad64x2u(state+0, 0);
-    uint64x2_p8 cd = VectorLoad64x2u(state+2, 0);
-    uint64x2_p8 ef = VectorLoad64x2u(state+4, 0);
-    uint64x2_p8 gh = VectorLoad64x2u(state+6, 0);
+    uint64x2_p8 ab = VecLoad64x2u(state+0, 0);
+    uint64x2_p8 cd = VecLoad64x2u(state+2, 0);
+    uint64x2_p8 ef = VecLoad64x2u(state+4, 0);
+    uint64x2_p8 gh = VecLoad64x2u(state+6, 0);
     uint64x2_p8 W[16], S[8], vm, vk;
 
     size_t blocks = length / SHA512::BLOCKSIZE;
@@ -1517,82 +1517,82 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
 
         S[A] = ab; S[C] = cd;
         S[E] = ef; S[G] = gh;
-        S[B] = VectorShiftLeftOctet<8>(S[A]);
-        S[D] = VectorShiftLeftOctet<8>(S[C]);
-        S[F] = VectorShiftLeftOctet<8>(S[E]);
-        S[H] = VectorShiftLeftOctet<8>(S[G]);
+        S[B] = VecShiftLeftOctet<8>(S[A]);
+        S[D] = VecShiftLeftOctet<8>(S[C]);
+        S[F] = VecShiftLeftOctet<8>(S[E]);
+        S[H] = VecShiftLeftOctet<8>(S[G]);
 
         // Rounds 0-16
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<0>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<1>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<2>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<3>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<4>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<5>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<6>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<7>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<8>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<9>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<10>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<11>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<12>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<13>(W,S, vk,vm);
 
-        vk = VectorLoad64x2u(k, offset);
-        vm = VectorLoadMsg64x2(m, offset);
+        vk = VecLoad64x2u(k, offset);
+        vm = VecLoadMsg64x2(m, offset);
         SHA512_ROUND1<14>(W,S, vk,vm);
         offset+=16;
 
-        vk = VectorShiftLeftOctet<8>(vk);
-        vm = VectorShiftLeftOctet<8>(vm);
+        vk = VecShiftLeftOctet<8>(vk);
+        vm = VecShiftLeftOctet<8>(vm);
         SHA512_ROUND1<15>(W,S, vk,vm);
 
         m += 16; // 64-bit words, not bytes
@@ -1600,44 +1600,44 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
         // Rounds 16-80
         for (unsigned int i=16; i<80; i+=16)
         {
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<0>(W,S, vk);
-            SHA512_ROUND2<1>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<1>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<2>(W,S, vk);
-            SHA512_ROUND2<3>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<3>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<4>(W,S, vk);
-            SHA512_ROUND2<5>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<5>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<6>(W,S, vk);
-            SHA512_ROUND2<7>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<7>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<8>(W,S, vk);
-            SHA512_ROUND2<9>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<9>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<10>(W,S, vk);
-            SHA512_ROUND2<11>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<11>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<12>(W,S, vk);
-            SHA512_ROUND2<13>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<13>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
 
-            vk = VectorLoad64x2u(k, offset);
+            vk = VecLoad64x2u(k, offset);
             SHA512_ROUND2<14>(W,S, vk);
-            SHA512_ROUND2<15>(W,S, VectorShiftLeftOctet<8>(vk));
+            SHA512_ROUND2<15>(W,S, VecShiftLeftOctet<8>(vk));
             offset+=16;
         }
 
@@ -1647,10 +1647,10 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
         gh += VectorPack(S[G],S[H]);
     }
 
-    VectorStore64x2u(ab, state+0, 0);
-    VectorStore64x2u(cd, state+2, 0);
-    VectorStore64x2u(ef, state+4, 0);
-    VectorStore64x2u(gh, state+6, 0);
+    VecStore64x2u(ab, state+0, 0);
+    VecStore64x2u(cd, state+2, 0);
+    VecStore64x2u(ef, state+4, 0);
+    VecStore64x2u(gh, state+6, 0);
 }
 
 #endif  // CRYPTOPP_POWER8_SHA_AVAILABLE
diff --git a/simon128_simd.cpp b/simon128_simd.cpp
index c1a80546..1277a234 100644
--- a/simon128_simd.cpp
+++ b/simon128_simd.cpp
@@ -548,8 +548,9 @@ using CryptoPP::uint8x16_p;
 using CryptoPP::uint32x4_p;
 using CryptoPP::uint64x2_p;
 
-using CryptoPP::VectorAnd;
-using CryptoPP::VectorXor;
+using CryptoPP::VecAnd;
+using CryptoPP::VecXor;
+using CryptoPP::VecPermute;
 
 // Rotate left by bit count
 template<unsigned int C>
@@ -569,8 +570,8 @@ CRYPTOPP_INLINE uint64x2_p RotateRight64(const uint64x2_p val)
 
 CRYPTOPP_INLINE uint64x2_p SIMON128_f(const uint64x2_p val)
 {
-    return VectorXor(RotateLeft64<2>(val),
-        VectorAnd(RotateLeft64<1>(val), RotateLeft64<8>(val)));
+    return VecXor(RotateLeft64<2>(val),
+        VecAnd(RotateLeft64<1>(val), RotateLeft64<8>(val)));
 }
 
 CRYPTOPP_INLINE void SIMON128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds)
@@ -584,22 +585,22 @@ CRYPTOPP_INLINE void SIMON128_Enc_Block(uint32x4_p &block, const word64 *subkeys
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block, block, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block, block, m2);
 
     for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
     {
         const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]);
-        y1 = VectorXor(VectorXor(y1, SIMON128_f(x1)), rk1);
+        y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
 
         const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]);
-        x1 = VectorXor(VectorXor(x1, SIMON128_f(y1)), rk2);
+        x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2);
     }
 
     if (rounds & 1)
     {
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
-        y1 = VectorXor(VectorXor(y1, SIMON128_f(x1)), rk);
+        y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk);
         std::swap(x1, y1);
     }
 
@@ -612,7 +613,7 @@ CRYPTOPP_INLINE void SIMON128_Enc_Block(uint32x4_p &block, const word64 *subkeys
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block = (uint32x4_p)vec_perm(x1, y1, m3);
+    block = (uint32x4_p)VecPermute(x1, y1, m3);
 }
 
 CRYPTOPP_INLINE void SIMON128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds)
@@ -626,24 +627,24 @@ CRYPTOPP_INLINE void SIMON128_Dec_Block(uint32x4_p &block, const word64 *subkeys
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block, block, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block, block, m2);
 
     if (rounds & 1)
     {
         std::swap(x1, y1);
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
-        y1 = VectorXor(VectorXor(y1, rk), SIMON128_f(x1));
+        y1 = VecXor(VecXor(y1, rk), SIMON128_f(x1));
         rounds--;
     }
 
     for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
     {
         const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i+1]);
-        x1 = VectorXor(VectorXor(x1, SIMON128_f(y1)), rk1);
+        x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk1);
 
         const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i]);
-        y1 = VectorXor(VectorXor(y1, SIMON128_f(x1)), rk2);
+        y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk2);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -655,7 +656,7 @@ CRYPTOPP_INLINE void SIMON128_Dec_Block(uint32x4_p &block, const word64 *subkeys
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block = (uint32x4_p)vec_perm(x1, y1, m3);
+    block = (uint32x4_p)VecPermute(x1, y1, m3);
 }
 
 CRYPTOPP_INLINE void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -671,32 +672,32 @@ CRYPTOPP_INLINE void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block0, block1, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block0, block1, m2);
-    uint64x2_p x2 = (uint64x2_p)vec_perm(block2, block3, m1);
-    uint64x2_p y2 = (uint64x2_p)vec_perm(block2, block3, m2);
-    uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1);
-    uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2);
+    uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1);
+    uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2);
+    uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1);
+    uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2);
 
     for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
     {
         const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i]);
-        y1 = VectorXor(VectorXor(y1, SIMON128_f(x1)), rk1);
-        y2 = VectorXor(VectorXor(y2, SIMON128_f(x2)), rk1);
-        y3 = VectorXor(VectorXor(y3, SIMON128_f(x3)), rk1);
+        y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk1);
+        y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk1);
+        y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk1);
 
         const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i+1]);
-        x1 = VectorXor(VectorXor(x1, SIMON128_f(y1)), rk2);
-        x2 = VectorXor(VectorXor(x2, SIMON128_f(y2)), rk2);
-        x3 = VectorXor(VectorXor(x3, SIMON128_f(y3)), rk2);
+        x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk2);
+        x2 = VecXor(VecXor(x2, SIMON128_f(y2)), rk2);
+        x3 = VecXor(VecXor(x3, SIMON128_f(y3)), rk2);
     }
 
     if (rounds & 1)
     {
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
-        y1 = VectorXor(VectorXor(y1, SIMON128_f(x1)), rk);
-        y2 = VectorXor(VectorXor(y2, SIMON128_f(x2)), rk);
-        y3 = VectorXor(VectorXor(y3, SIMON128_f(x3)), rk);
+        y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk);
+        y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk);
+        y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk);
         std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
     }
 
@@ -709,12 +710,12 @@ CRYPTOPP_INLINE void SIMON128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 CRYPTOPP_INLINE void SIMON128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -730,34 +731,34 @@ CRYPTOPP_INLINE void SIMON128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block0, block1, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block0, block1, m2);
-    uint64x2_p x2 = (uint64x2_p)vec_perm(block2, block3, m1);
-    uint64x2_p y2 = (uint64x2_p)vec_perm(block2, block3, m2);
-    uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1);
-    uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2);
+    uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1);
+    uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2);
+    uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1);
+    uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2);
 
     if (rounds & 1)
     {
         std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[rounds-1]);
-        y1 = VectorXor(VectorXor(y1, rk), SIMON128_f(x1));
-        y2 = VectorXor(VectorXor(y2, rk), SIMON128_f(x2));
-        y3 = VectorXor(VectorXor(y3, rk), SIMON128_f(x3));
+        y1 = VecXor(VecXor(y1, rk), SIMON128_f(x1));
+        y2 = VecXor(VecXor(y2, rk), SIMON128_f(x2));
+        y3 = VecXor(VecXor(y3, rk), SIMON128_f(x3));
         rounds--;
     }
 
     for (int i = static_cast<int>(rounds-2); i >= 0; i -= 2)
     {
         const uint64x2_p rk1 = vec_splats((unsigned long long)subkeys[i+1]);
-        x1 = VectorXor(VectorXor(x1, SIMON128_f(y1)), rk1);
-        x2 = VectorXor(VectorXor(x2, SIMON128_f(y2)), rk1);
-        x3 = VectorXor(VectorXor(x3, SIMON128_f(y3)), rk1);
+        x1 = VecXor(VecXor(x1, SIMON128_f(y1)), rk1);
+        x2 = VecXor(VecXor(x2, SIMON128_f(y2)), rk1);
+        x3 = VecXor(VecXor(x3, SIMON128_f(y3)), rk1);
 
         const uint64x2_p rk2 = vec_splats((unsigned long long)subkeys[i]);
-        y1 = VectorXor(VectorXor(y1, SIMON128_f(x1)), rk2);
-        y2 = VectorXor(VectorXor(y2, SIMON128_f(x2)), rk2);
-        y3 = VectorXor(VectorXor(y3, SIMON128_f(x3)), rk2);
+        y1 = VecXor(VecXor(y1, SIMON128_f(x1)), rk2);
+        y2 = VecXor(VecXor(y2, SIMON128_f(x2)), rk2);
+        y3 = VecXor(VecXor(y3, SIMON128_f(x3)), rk2);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -769,12 +770,12 @@ CRYPTOPP_INLINE void SIMON128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 #endif  // CRYPTOPP_POWER8_AVAILABLE
diff --git a/simon64_simd.cpp b/simon64_simd.cpp
index 6b1a1db6..74bcf22b 100644
--- a/simon64_simd.cpp
+++ b/simon64_simd.cpp
@@ -538,10 +538,11 @@ CRYPTOPP_INLINE void SIMON64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
 using CryptoPP::uint8x16_p;
 using CryptoPP::uint32x4_p;
 
-using CryptoPP::VectorAnd;
-using CryptoPP::VectorXor;
-using CryptoPP::VectorLoad;
-using CryptoPP::VectorLoadBE;
+using CryptoPP::VecAnd;
+using CryptoPP::VecXor;
+using CryptoPP::VecLoad;
+using CryptoPP::VecLoadBE;
+using CryptoPP::VecPermute;
 
 // Rotate left by bit count
 template<unsigned int C>
@@ -561,8 +562,8 @@ CRYPTOPP_INLINE uint32x4_p RotateRight32(const uint32x4_p val)
 
 CRYPTOPP_INLINE uint32x4_p SIMON64_f(const uint32x4_p val)
 {
-    return VectorXor(RotateLeft32<2>(val),
-        VectorAnd(RotateLeft32<1>(val), RotateLeft32<8>(val)));
+    return VecXor(RotateLeft32<2>(val),
+        VecAnd(RotateLeft32<1>(val), RotateLeft32<8>(val)));
 }
 
 CRYPTOPP_INLINE void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
@@ -577,8 +578,8 @@ CRYPTOPP_INLINE void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
-    uint32x4_p x1 = vec_perm(block0, block1, m1);
-    uint32x4_p y1 = vec_perm(block0, block1, m2);
+    uint32x4_p x1 = VecPermute(block0, block1, m1);
+    uint32x4_p y1 = VecPermute(block0, block1, m2);
 
     for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
     {
@@ -587,13 +588,13 @@ CRYPTOPP_INLINE void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
         const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk1 = VectorLoad(subkeys+i);
-        uint32x4_p rk2 = VectorLoad(subkeys+i+1);
-        rk1 = vec_perm(rk1, rk1, m);
-        rk2 = vec_perm(rk2, rk2, m);
+        uint32x4_p rk1 = VecLoad(subkeys+i);
+        uint32x4_p rk2 = VecLoad(subkeys+i+1);
+        rk1 = VecPermute(rk1, rk1, m);
+        rk2 = VecPermute(rk2, rk2, m);
 #endif
-        y1 = VectorXor(VectorXor(y1, SIMON64_f(x1)), rk1);
-        x1 = VectorXor(VectorXor(x1, SIMON64_f(y1)), rk2);
+        y1 = VecXor(VecXor(y1, SIMON64_f(x1)), rk1);
+        x1 = VecXor(VecXor(x1, SIMON64_f(y1)), rk2);
     }
 
     if (rounds & 1)
@@ -602,10 +603,10 @@ CRYPTOPP_INLINE void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
         const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+rounds-1);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+rounds-1);
+        rk = VecPermute(rk, rk, m);
 #endif
-        y1 = VectorXor(VectorXor(y1, SIMON64_f(x1)), rk);
+        y1 = VecXor(VecXor(y1, SIMON64_f(x1)), rk);
         std::swap(x1, y1);
     }
 
@@ -618,8 +619,8 @@ CRYPTOPP_INLINE void SIMON64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
 }
 
 CRYPTOPP_INLINE void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
@@ -634,8 +635,8 @@ CRYPTOPP_INLINE void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
-    uint32x4_p x1 = vec_perm(block0, block1, m1);
-    uint32x4_p y1 = vec_perm(block0, block1, m2);
+    uint32x4_p x1 = VecPermute(block0, block1, m1);
+    uint32x4_p y1 = VecPermute(block0, block1, m2);
 
     if (rounds & 1)
     {
@@ -644,10 +645,10 @@ CRYPTOPP_INLINE void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
         const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+rounds-1);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+rounds-1);
+        rk = VecPermute(rk, rk, m);
 #endif
-        y1 = VectorXor(VectorXor(y1, rk), SIMON64_f(x1));
+        y1 = VecXor(VecXor(y1, rk), SIMON64_f(x1));
         rounds--;
     }
 
@@ -658,13 +659,13 @@ CRYPTOPP_INLINE void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
         const uint32x4_p rk2 = vec_splats(subkeys[i]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk1 = VectorLoad(subkeys+i+1);
-        uint32x4_p rk2 = VectorLoad(subkeys+i);
-        rk1 = vec_perm(rk1, rk1, m);
-        rk2 = vec_perm(rk2, rk2, m);
+        uint32x4_p rk1 = VecLoad(subkeys+i+1);
+        uint32x4_p rk2 = VecLoad(subkeys+i);
+        rk1 = VecPermute(rk1, rk1, m);
+        rk2 = VecPermute(rk2, rk2, m);
 #endif
-        x1 = VectorXor(VectorXor(x1, SIMON64_f(y1)), rk1);
-        y1 = VectorXor(VectorXor(y1, SIMON64_f(x1)), rk2);
+        x1 = VecXor(VecXor(x1, SIMON64_f(y1)), rk1);
+        y1 = VecXor(VecXor(y1, SIMON64_f(x1)), rk2);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -676,8 +677,8 @@ CRYPTOPP_INLINE void SIMON64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
 }
 
 CRYPTOPP_INLINE void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -693,12 +694,12 @@ CRYPTOPP_INLINE void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint32x4_p x1 = (uint32x4_p)vec_perm(block0, block1, m1);
-    uint32x4_p y1 = (uint32x4_p)vec_perm(block0, block1, m2);
-    uint32x4_p x2 = (uint32x4_p)vec_perm(block2, block3, m1);
-    uint32x4_p y2 = (uint32x4_p)vec_perm(block2, block3, m2);
-    uint32x4_p x3 = (uint32x4_p)vec_perm(block4, block5, m1);
-    uint32x4_p y3 = (uint32x4_p)vec_perm(block4, block5, m2);
+    uint32x4_p x1 = (uint32x4_p)VecPermute(block0, block1, m1);
+    uint32x4_p y1 = (uint32x4_p)VecPermute(block0, block1, m2);
+    uint32x4_p x2 = (uint32x4_p)VecPermute(block2, block3, m1);
+    uint32x4_p y2 = (uint32x4_p)VecPermute(block2, block3, m2);
+    uint32x4_p x3 = (uint32x4_p)VecPermute(block4, block5, m1);
+    uint32x4_p y3 = (uint32x4_p)VecPermute(block4, block5, m2);
 
     for (int i = 0; i < static_cast<int>(rounds & ~1)-1; i += 2)
     {
@@ -707,18 +708,18 @@ CRYPTOPP_INLINE void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
         const uint32x4_p rk2 = vec_splats(subkeys[i+1]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk1 = VectorLoad(subkeys+i);
-        uint32x4_p rk2 = VectorLoad(subkeys+i+1);
-        rk1 = vec_perm(rk1, rk1, m);
-        rk2 = vec_perm(rk2, rk2, m);
+        uint32x4_p rk1 = VecLoad(subkeys+i);
+        uint32x4_p rk2 = VecLoad(subkeys+i+1);
+        rk1 = VecPermute(rk1, rk1, m);
+        rk2 = VecPermute(rk2, rk2, m);
 #endif
-        y1 = VectorXor(VectorXor(y1, SIMON64_f(x1)), rk1);
-        y2 = VectorXor(VectorXor(y2, SIMON64_f(x2)), rk1);
-        y3 = VectorXor(VectorXor(y3, SIMON64_f(x3)), rk1);
+        y1 = VecXor(VecXor(y1, SIMON64_f(x1)), rk1);
+        y2 = VecXor(VecXor(y2, SIMON64_f(x2)), rk1);
+        y3 = VecXor(VecXor(y3, SIMON64_f(x3)), rk1);
 
-        x1 = VectorXor(VectorXor(x1, SIMON64_f(y1)), rk2);
-        x2 = VectorXor(VectorXor(x2, SIMON64_f(y2)), rk2);
-        x3 = VectorXor(VectorXor(x3, SIMON64_f(y3)), rk2);
+        x1 = VecXor(VecXor(x1, SIMON64_f(y1)), rk2);
+        x2 = VecXor(VecXor(x2, SIMON64_f(y2)), rk2);
+        x3 = VecXor(VecXor(x3, SIMON64_f(y3)), rk2);
     }
 
     if (rounds & 1)
@@ -727,12 +728,12 @@ CRYPTOPP_INLINE void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
         const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+rounds-1);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+rounds-1);
+        rk = VecPermute(rk, rk, m);
 #endif
-        y1 = VectorXor(VectorXor(y1, SIMON64_f(x1)), rk);
-        y2 = VectorXor(VectorXor(y2, SIMON64_f(x2)), rk);
-        y3 = VectorXor(VectorXor(y3, SIMON64_f(x3)), rk);
+        y1 = VecXor(VecXor(y1, SIMON64_f(x1)), rk);
+        y2 = VecXor(VecXor(y2, SIMON64_f(x2)), rk);
+        y3 = VecXor(VecXor(y3, SIMON64_f(x3)), rk);
         std::swap(x1, y1); std::swap(x2, y2); std::swap(x3, y3);
     }
 
@@ -745,12 +746,12 @@ CRYPTOPP_INLINE void SIMON64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 CRYPTOPP_INLINE void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -766,12 +767,12 @@ CRYPTOPP_INLINE void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint32x4_p x1 = (uint32x4_p)vec_perm(block0, block1, m1);
-    uint32x4_p y1 = (uint32x4_p)vec_perm(block0, block1, m2);
-    uint32x4_p x2 = (uint32x4_p)vec_perm(block2, block3, m1);
-    uint32x4_p y2 = (uint32x4_p)vec_perm(block2, block3, m2);
-    uint32x4_p x3 = (uint32x4_p)vec_perm(block4, block5, m1);
-    uint32x4_p y3 = (uint32x4_p)vec_perm(block4, block5, m2);
+    uint32x4_p x1 = (uint32x4_p)VecPermute(block0, block1, m1);
+    uint32x4_p y1 = (uint32x4_p)VecPermute(block0, block1, m2);
+    uint32x4_p x2 = (uint32x4_p)VecPermute(block2, block3, m1);
+    uint32x4_p y2 = (uint32x4_p)VecPermute(block2, block3, m2);
+    uint32x4_p x3 = (uint32x4_p)VecPermute(block4, block5, m1);
+    uint32x4_p y3 = (uint32x4_p)VecPermute(block4, block5, m2);
 
     if (rounds & 1)
     {
@@ -781,12 +782,12 @@ CRYPTOPP_INLINE void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
         const uint32x4_p rk = vec_splats(subkeys[rounds-1]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+rounds-1);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+rounds-1);
+        rk = VecPermute(rk, rk, m);
 #endif
-        y1 = VectorXor(VectorXor(y1, rk), SIMON64_f(x1));
-        y2 = VectorXor(VectorXor(y2, rk), SIMON64_f(x2));
-        y3 = VectorXor(VectorXor(y3, rk), SIMON64_f(x3));
+        y1 = VecXor(VecXor(y1, rk), SIMON64_f(x1));
+        y2 = VecXor(VecXor(y2, rk), SIMON64_f(x2));
+        y3 = VecXor(VecXor(y3, rk), SIMON64_f(x3));
         rounds--;
     }
 
@@ -797,18 +798,18 @@ CRYPTOPP_INLINE void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
         const uint32x4_p rk2 = vec_splats(subkeys[i]);
 #else
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk1 = VectorLoad(subkeys+i+1);
-        uint32x4_p rk2 = VectorLoad(subkeys+i);
-        rk1 = vec_perm(rk1, rk1, m);
-        rk2 = vec_perm(rk2, rk2, m);
+        uint32x4_p rk1 = VecLoad(subkeys+i+1);
+        uint32x4_p rk2 = VecLoad(subkeys+i);
+        rk1 = VecPermute(rk1, rk1, m);
+        rk2 = VecPermute(rk2, rk2, m);
 #endif
-        x1 = VectorXor(VectorXor(x1, SIMON64_f(y1)), rk1);
-        x2 = VectorXor(VectorXor(x2, SIMON64_f(y2)), rk1);
-        x3 = VectorXor(VectorXor(x3, SIMON64_f(y3)), rk1);
+        x1 = VecXor(VecXor(x1, SIMON64_f(y1)), rk1);
+        x2 = VecXor(VecXor(x2, SIMON64_f(y2)), rk1);
+        x3 = VecXor(VecXor(x3, SIMON64_f(y3)), rk1);
 
-        y1 = VectorXor(VectorXor(y1, SIMON64_f(x1)), rk2);
-        y2 = VectorXor(VectorXor(y2, SIMON64_f(x2)), rk2);
-        y3 = VectorXor(VectorXor(y3, SIMON64_f(x3)), rk2);
+        y1 = VecXor(VecXor(y1, SIMON64_f(x1)), rk2);
+        y2 = VecXor(VecXor(y2, SIMON64_f(x2)), rk2);
+        y3 = VecXor(VecXor(y3, SIMON64_f(x3)), rk2);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -820,12 +821,12 @@ CRYPTOPP_INLINE void SIMON64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
diff --git a/speck128_simd.cpp b/speck128_simd.cpp
index dd1ef08c..d59b1b1e 100644
--- a/speck128_simd.cpp
+++ b/speck128_simd.cpp
@@ -479,9 +479,10 @@ using CryptoPP::uint8x16_p;
 using CryptoPP::uint32x4_p;
 using CryptoPP::uint64x2_p;
 
-using CryptoPP::VectorAdd;
-using CryptoPP::VectorSub;
-using CryptoPP::VectorXor;
+using CryptoPP::VecAdd;
+using CryptoPP::VecSub;
+using CryptoPP::VecXor;
+using CryptoPP::VecPermute;
 
 // Rotate left by bit count
 template<unsigned int C>
@@ -510,19 +511,19 @@ void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block, block, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block, block, m2);
 
     for (int i=0; i < static_cast<int>(rounds); ++i)
     {
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
 
         x1 = RotateRight64<8>(x1);
-        x1 = VectorAdd(x1, y1);
-        x1 = VectorXor(x1, rk);
+        x1 = VecAdd(x1, y1);
+        x1 = VecXor(x1, rk);
 
         y1 = RotateLeft64<3>(y1);
-        y1 = VectorXor(y1, x1);
+        y1 = VecXor(y1, x1);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -534,7 +535,7 @@ void SPECK128_Enc_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block = (uint32x4_p)vec_perm(x1, y1, m3);
+    block = (uint32x4_p)VecPermute(x1, y1, m3);
 }
 
 void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int rounds)
@@ -548,17 +549,17 @@ void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block, block, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block, block, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block, block, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block, block, m2);
 
     for (int i = static_cast<int>(rounds-1); i >= 0; --i)
     {
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
 
-        y1 = VectorXor(y1, x1);
+        y1 = VecXor(y1, x1);
         y1 = RotateRight64<3>(y1);
-        x1 = VectorXor(x1, rk);
-        x1 = VectorSub(x1, y1);
+        x1 = VecXor(x1, rk);
+        x1 = VecSub(x1, y1);
         x1 = RotateLeft64<8>(x1);
     }
 
@@ -571,7 +572,7 @@ void SPECK128_Dec_Block(uint32x4_p &block, const word64 *subkeys, unsigned int r
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block = (uint32x4_p)vec_perm(x1, y1, m3);
+    block = (uint32x4_p)VecPermute(x1, y1, m3);
 }
 
 void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -587,12 +588,12 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block0, block1, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block0, block1, m2);
-    uint64x2_p x2 = (uint64x2_p)vec_perm(block2, block3, m1);
-    uint64x2_p y2 = (uint64x2_p)vec_perm(block2, block3, m2);
-    uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1);
-    uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2);
+    uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1);
+    uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2);
+    uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1);
+    uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2);
 
     for (int i=0; i < static_cast<int>(rounds); ++i)
     {
@@ -601,19 +602,19 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
         x1 = RotateRight64<8>(x1);
         x2 = RotateRight64<8>(x2);
         x3 = RotateRight64<8>(x3);
-        x1 = VectorAdd(x1, y1);
-        x2 = VectorAdd(x2, y2);
-        x3 = VectorAdd(x3, y3);
-        x1 = VectorXor(x1, rk);
-        x2 = VectorXor(x2, rk);
-        x3 = VectorXor(x3, rk);
+        x1 = VecAdd(x1, y1);
+        x2 = VecAdd(x2, y2);
+        x3 = VecAdd(x3, y3);
+        x1 = VecXor(x1, rk);
+        x2 = VecXor(x2, rk);
+        x3 = VecXor(x3, rk);
 
         y1 = RotateLeft64<3>(y1);
         y2 = RotateLeft64<3>(y2);
         y3 = RotateLeft64<3>(y3);
-        y1 = VectorXor(y1, x1);
-        y2 = VectorXor(y2, x2);
-        y3 = VectorXor(y3, x3);
+        y1 = VecXor(y1, x1);
+        y2 = VecXor(y2, x2);
+        y3 = VecXor(y3, x3);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -625,12 +626,12 @@ void SPECK128_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -646,30 +647,30 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
-    uint64x2_p x1 = (uint64x2_p)vec_perm(block0, block1, m1);
-    uint64x2_p y1 = (uint64x2_p)vec_perm(block0, block1, m2);
-    uint64x2_p x2 = (uint64x2_p)vec_perm(block2, block3, m1);
-    uint64x2_p y2 = (uint64x2_p)vec_perm(block2, block3, m2);
-    uint64x2_p x3 = (uint64x2_p)vec_perm(block4, block5, m1);
-    uint64x2_p y3 = (uint64x2_p)vec_perm(block4, block5, m2);
+    uint64x2_p x1 = (uint64x2_p)VecPermute(block0, block1, m1);
+    uint64x2_p y1 = (uint64x2_p)VecPermute(block0, block1, m2);
+    uint64x2_p x2 = (uint64x2_p)VecPermute(block2, block3, m1);
+    uint64x2_p y2 = (uint64x2_p)VecPermute(block2, block3, m2);
+    uint64x2_p x3 = (uint64x2_p)VecPermute(block4, block5, m1);
+    uint64x2_p y3 = (uint64x2_p)VecPermute(block4, block5, m2);
 
     for (int i = static_cast<int>(rounds-1); i >= 0; --i)
     {
         const uint64x2_p rk = vec_splats((unsigned long long)subkeys[i]);
 
-        y1 = VectorXor(y1, x1);
-        y2 = VectorXor(y2, x2);
-        y3 = VectorXor(y3, x3);
+        y1 = VecXor(y1, x1);
+        y2 = VecXor(y2, x2);
+        y3 = VecXor(y3, x3);
         y1 = RotateRight64<3>(y1);
         y2 = RotateRight64<3>(y2);
         y3 = RotateRight64<3>(y3);
 
-        x1 = VectorXor(x1, rk);
-        x2 = VectorXor(x2, rk);
-        x3 = VectorXor(x3, rk);
-        x1 = VectorSub(x1, y1);
-        x2 = VectorSub(x2, y2);
-        x3 = VectorSub(x3, y3);
+        x1 = VecXor(x1, rk);
+        x2 = VecXor(x2, rk);
+        x3 = VecXor(x3, rk);
+        x1 = VecSub(x1, y1);
+        x2 = VecSub(x2, y2);
+        x3 = VecSub(x3, y3);
         x1 = RotateLeft64<8>(x1);
         x2 = RotateLeft64<8>(x2);
         x3 = RotateLeft64<8>(x3);
@@ -684,12 +685,12 @@ void SPECK128_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 B1][A2 B2] ... => [A1 A2][B1 B2] ...
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 #endif  // CRYPTOPP_POWER8_AVAILABLE
diff --git a/speck64_simd.cpp b/speck64_simd.cpp
index 0ed4f8d3..b8f8b2b2 100644
--- a/speck64_simd.cpp
+++ b/speck64_simd.cpp
@@ -483,10 +483,11 @@ CRYPTOPP_INLINE void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
 using CryptoPP::uint8x16_p;
 using CryptoPP::uint32x4_p;
 
-using CryptoPP::VectorAdd;
-using CryptoPP::VectorSub;
-using CryptoPP::VectorXor;
-using CryptoPP::VectorLoad;
+using CryptoPP::VecAdd;
+using CryptoPP::VecSub;
+using CryptoPP::VecXor;
+using CryptoPP::VecLoad;
+using CryptoPP::VecPermute;
 
 // Rotate left by bit count
 template<unsigned int C>
@@ -516,8 +517,8 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
-    uint32x4_p x1 = vec_perm(block0, block1, m1);
-    uint32x4_p y1 = vec_perm(block0, block1, m2);
+    uint32x4_p x1 = VecPermute(block0, block1, m1);
+    uint32x4_p y1 = VecPermute(block0, block1, m2);
 
     for (int i=0; i < static_cast<int>(rounds); ++i)
     {
@@ -526,16 +527,16 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
 #else
         // subkeys has extra elements so memory backs the last subkey
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+i);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+i);
+        rk = VecPermute(rk, rk, m);
 #endif
 
         x1 = RotateRight32<8>(x1);
-        x1 = VectorAdd(x1, y1);
-        x1 = VectorXor(x1, rk);
+        x1 = VecAdd(x1, y1);
+        x1 = VecXor(x1, rk);
 
         y1 = RotateLeft32<3>(y1);
-        y1 = VectorXor(y1, x1);
+        y1 = VecXor(y1, x1);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -547,8 +548,8 @@ void SPECK64_Enc_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
 }
 
 void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
@@ -563,8 +564,8 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
-    uint32x4_p x1 = vec_perm(block0, block1, m1);
-    uint32x4_p y1 = vec_perm(block0, block1, m2);
+    uint32x4_p x1 = VecPermute(block0, block1, m1);
+    uint32x4_p y1 = VecPermute(block0, block1, m2);
 
     for (int i = static_cast<int>(rounds-1); i >= 0; --i)
     {
@@ -573,15 +574,15 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
 #else
         // subkeys has extra elements so memory backs the last subkey
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+i);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+i);
+        rk = VecPermute(rk, rk, m);
 #endif
 
-        y1 = VectorXor(y1, x1);
+        y1 = VecXor(y1, x1);
         y1 = RotateRight32<3>(y1);
 
-        x1 = VectorXor(x1, rk);
-        x1 = VectorSub(x1, y1);
+        x1 = VecXor(x1, rk);
+        x1 = VecSub(x1, y1);
         x1 = RotateLeft32<8>(x1);
     }
 
@@ -594,8 +595,8 @@ void SPECK64_Dec_Block(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
 }
 
 void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -611,12 +612,12 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
-    uint32x4_p x1 = (uint32x4_p)vec_perm(block0, block1, m1);
-    uint32x4_p y1 = (uint32x4_p)vec_perm(block0, block1, m2);
-    uint32x4_p x2 = (uint32x4_p)vec_perm(block2, block3, m1);
-    uint32x4_p y2 = (uint32x4_p)vec_perm(block2, block3, m2);
-    uint32x4_p x3 = (uint32x4_p)vec_perm(block4, block5, m1);
-    uint32x4_p y3 = (uint32x4_p)vec_perm(block4, block5, m2);
+    uint32x4_p x1 = (uint32x4_p)VecPermute(block0, block1, m1);
+    uint32x4_p y1 = (uint32x4_p)VecPermute(block0, block1, m2);
+    uint32x4_p x2 = (uint32x4_p)VecPermute(block2, block3, m1);
+    uint32x4_p y2 = (uint32x4_p)VecPermute(block2, block3, m2);
+    uint32x4_p x3 = (uint32x4_p)VecPermute(block4, block5, m1);
+    uint32x4_p y3 = (uint32x4_p)VecPermute(block4, block5, m2);
 
     for (int i=0; i < static_cast<int>(rounds); ++i)
     {
@@ -625,29 +626,29 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #else
         // subkeys has extra elements so memory backs the last subkey
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+i);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+i);
+        rk = VecPermute(rk, rk, m);
 #endif
 
         x1 = RotateRight32<8>(x1);
         x2 = RotateRight32<8>(x2);
         x3 = RotateRight32<8>(x3);
 
-        x1 = VectorAdd(x1, y1);
-        x2 = VectorAdd(x2, y2);
-        x3 = VectorAdd(x3, y3);
+        x1 = VecAdd(x1, y1);
+        x2 = VecAdd(x2, y2);
+        x3 = VecAdd(x3, y3);
 
-        x1 = VectorXor(x1, rk);
-        x2 = VectorXor(x2, rk);
-        x3 = VectorXor(x3, rk);
+        x1 = VecXor(x1, rk);
+        x2 = VecXor(x2, rk);
+        x3 = VecXor(x3, rk);
 
         y1 = RotateLeft32<3>(y1);
         y2 = RotateLeft32<3>(y2);
         y3 = RotateLeft32<3>(y3);
 
-        y1 = VectorXor(y1, x1);
-        y2 = VectorXor(y2, x2);
-        y3 = VectorXor(y3, x3);
+        y1 = VecXor(y1, x1);
+        y2 = VecXor(y2, x2);
+        y3 = VecXor(y3, x3);
     }
 
 #if (CRYPTOPP_BIG_ENDIAN)
@@ -659,12 +660,12 @@ void SPECK64_Enc_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
@@ -680,12 +681,12 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
-    uint32x4_p x1 = (uint32x4_p)vec_perm(block0, block1, m1);
-    uint32x4_p y1 = (uint32x4_p)vec_perm(block0, block1, m2);
-    uint32x4_p x2 = (uint32x4_p)vec_perm(block2, block3, m1);
-    uint32x4_p y2 = (uint32x4_p)vec_perm(block2, block3, m2);
-    uint32x4_p x3 = (uint32x4_p)vec_perm(block4, block5, m1);
-    uint32x4_p y3 = (uint32x4_p)vec_perm(block4, block5, m2);
+    uint32x4_p x1 = (uint32x4_p)VecPermute(block0, block1, m1);
+    uint32x4_p y1 = (uint32x4_p)VecPermute(block0, block1, m2);
+    uint32x4_p x2 = (uint32x4_p)VecPermute(block2, block3, m1);
+    uint32x4_p y2 = (uint32x4_p)VecPermute(block2, block3, m2);
+    uint32x4_p x3 = (uint32x4_p)VecPermute(block4, block5, m1);
+    uint32x4_p y3 = (uint32x4_p)VecPermute(block4, block5, m2);
 
     for (int i = static_cast<int>(rounds-1); i >= 0; --i)
     {
@@ -694,25 +695,25 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #else
         // subkeys has extra elements so memory backs the last subkey
         const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
-        uint32x4_p rk = VectorLoad(subkeys+i);
-        rk = vec_perm(rk, rk, m);
+        uint32x4_p rk = VecLoad(subkeys+i);
+        rk = VecPermute(rk, rk, m);
 #endif
 
-        y1 = VectorXor(y1, x1);
-        y2 = VectorXor(y2, x2);
-        y3 = VectorXor(y3, x3);
+        y1 = VecXor(y1, x1);
+        y2 = VecXor(y2, x2);
+        y3 = VecXor(y3, x3);
 
         y1 = RotateRight32<3>(y1);
         y2 = RotateRight32<3>(y2);
         y3 = RotateRight32<3>(y3);
 
-        x1 = VectorXor(x1, rk);
-        x2 = VectorXor(x2, rk);
-        x3 = VectorXor(x3, rk);
+        x1 = VecXor(x1, rk);
+        x2 = VecXor(x2, rk);
+        x3 = VecXor(x3, rk);
 
-        x1 = VectorSub(x1, y1);
-        x2 = VectorSub(x2, y2);
-        x3 = VectorSub(x3, y3);
+        x1 = VecSub(x1, y1);
+        x2 = VecSub(x2, y2);
+        x3 = VecSub(x3, y3);
 
         x1 = RotateLeft32<8>(x1);
         x2 = RotateLeft32<8>(x2);
@@ -728,12 +729,12 @@ void SPECK64_Dec_6_Blocks(uint32x4_p &block0, uint32x4_p &block1,
 #endif
 
     // [A1 A3 B1 B3][A2 A4 B2 B4] => [A1 A2 A3 A4][B1 B2 B3 B4]
-    block0 = (uint32x4_p)vec_perm(x1, y1, m3);
-    block1 = (uint32x4_p)vec_perm(x1, y1, m4);
-    block2 = (uint32x4_p)vec_perm(x2, y2, m3);
-    block3 = (uint32x4_p)vec_perm(x2, y2, m4);
-    block4 = (uint32x4_p)vec_perm(x3, y3, m3);
-    block5 = (uint32x4_p)vec_perm(x3, y3, m4);
+    block0 = (uint32x4_p)VecPermute(x1, y1, m3);
+    block1 = (uint32x4_p)VecPermute(x1, y1, m4);
+    block2 = (uint32x4_p)VecPermute(x2, y2, m3);
+    block3 = (uint32x4_p)VecPermute(x2, y2, m4);
+    block4 = (uint32x4_p)VecPermute(x3, y3, m3);
+    block5 = (uint32x4_p)VecPermute(x3, y3, m4);
 }
 
 #endif  // CRYPTOPP_ALTIVEC_AVAILABLE
diff --git a/validat1.cpp b/validat1.cpp
index 4432680f..39392b94 100644
--- a/validat1.cpp
+++ b/validat1.cpp
@@ -1089,44 +1089,44 @@ bool TestAltivecOps()
     const byte st2[16] ={21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6};
     const byte st3[16] ={20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5};
 
-    VectorStore(VectorLoad(src), dest);
+    VecStore(VecLoad(src), dest);
     pass1 = (0 == std::memcmp(src, dest, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStore(VectorLoad(src+1), dest+1);
+    VecStore(VecLoad(src+1), dest+1);
     pass1 = (0 == std::memcmp(st1, dest+1, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStore(VectorLoad(src+2), dest+2);
+    VecStore(VecLoad(src+2), dest+2);
     pass1 = (0 == std::memcmp(st2, dest+2, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStore(VectorLoad(src+3), dest+3);
+    VecStore(VecLoad(src+3), dest+3);
     pass1 = (0 == std::memcmp(st3, dest+3, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStoreBE(VectorLoadBE(src), dest);
+    VecStoreBE(VecLoadBE(src), dest);
     pass1 = (0 == std::memcmp(src, dest, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStoreBE(VectorLoadBE(src+1), dest+1);
+    VecStoreBE(VecLoadBE(src+1), dest+1);
     pass1 = (0 == std::memcmp(st1, dest+1, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStoreBE(VectorLoadBE(src+2), dest+2);
+    VecStoreBE(VecLoadBE(src+2), dest+2);
     pass1 = (0 == std::memcmp(st2, dest+2, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStoreBE(VectorLoadBE(src+3), dest+3);
+    VecStoreBE(VecLoadBE(src+3), dest+3);
     pass1 = (0 == std::memcmp(st3, dest+3, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
 #if (CRYPTOPP_LITTLE_ENDIAN)
-    VectorStore(VectorLoadBE(src), dest);
+    VecStore(VecLoadBE(src), dest);
     pass1 = (0 != std::memcmp(src, dest, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 
-    VectorStoreBE(VectorLoad(src), dest);
+    VecStoreBE(VecLoad(src), dest);
     pass1 = (0 != std::memcmp(src, dest, 16)) && pass1;
     CRYPTOPP_ASSERT(pass1);
 #endif
@@ -1143,9 +1143,9 @@ bool TestAltivecOps()
     uint8x16_p val = {0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
                       0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff};
 
-    pass2 = (VectorEqual(val, VectorShiftLeftOctet<0>(val))) && pass2;
+    pass2 = (VecEqual(val, VecShiftLeftOctet<0>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
-    pass2 = (VectorEqual(val, VectorShiftRightOctet<0>(val))) && pass2;
+    pass2 = (VecEqual(val, VecShiftRightOctet<0>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
 
     uint8x16_p lsh1 = {0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
@@ -1153,9 +1153,9 @@ bool TestAltivecOps()
     uint8x16_p rsh1 = {0x00,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
                        0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff};
 
-    pass2 = (VectorEqual(lsh1, VectorShiftLeftOctet<1>(val))) && pass2;
+    pass2 = (VecEqual(lsh1, VecShiftLeftOctet<1>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
-    pass2 = (VectorEqual(rsh1, VectorShiftRightOctet<1>(val))) && pass2;
+    pass2 = (VecEqual(rsh1, VecShiftRightOctet<1>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
 
     uint8x16_p lsh15 = {0xff,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
@@ -1163,9 +1163,9 @@ bool TestAltivecOps()
     uint8x16_p rsh15 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
                         0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0xff};
 
-    pass2 = (VectorEqual(lsh15, VectorShiftLeftOctet<15>(val))) && pass2;
+    pass2 = (VecEqual(lsh15, VecShiftLeftOctet<15>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
-    pass2 = (VectorEqual(rsh15, VectorShiftRightOctet<15>(val))) && pass2;
+    pass2 = (VecEqual(rsh15, VecShiftRightOctet<15>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
 
     uint8x16_p lsh16 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
@@ -1173,9 +1173,9 @@ bool TestAltivecOps()
     uint8x16_p rsh16 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
                         0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00};
 
-    pass2 = (VectorEqual(lsh16, VectorShiftLeftOctet<16>(val))) && pass2;
+    pass2 = (VecEqual(lsh16, VecShiftLeftOctet<16>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
-    pass2 = (VectorEqual(rsh16, VectorShiftRightOctet<16>(val))) && pass2;
+    pass2 = (VecEqual(rsh16, VecShiftRightOctet<16>(val))) && pass2;
     CRYPTOPP_ASSERT(pass2);
 
     if (!pass2)
@@ -1194,16 +1194,16 @@ bool TestAltivecOps()
     uint8x16_p ex3 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
                       0x1f,0x1e,0x1d,0x1c, 0x1b,0x1a,0x19,0x18};
 
-    pass3 = VectorEqual(ex2, VectorGetLow(ex1)) && pass3;
+    pass3 = VecEqual(ex2, VecGetLow(ex1)) && pass3;
     CRYPTOPP_ASSERT(pass3);
-    pass3 = VectorEqual(ex3, VectorGetHigh(ex1)) && pass3;
+    pass3 = VecEqual(ex3, VecGetHigh(ex1)) && pass3;
     CRYPTOPP_ASSERT(pass3);
 
-    uint8x16_p ex4 = VectorShiftRightOctet<8>(VectorShiftLeftOctet<8>(ex1));
-    pass3 = VectorEqual(ex4, VectorGetLow(ex1)) && pass3;
+    uint8x16_p ex4 = VecShiftRightOctet<8>(VecShiftLeftOctet<8>(ex1));
+    pass3 = VecEqual(ex4, VecGetLow(ex1)) && pass3;
     CRYPTOPP_ASSERT(pass3);
-    uint8x16_p ex5 = VectorShiftRightOctet<8>(ex1);
-    pass3 = VectorEqual(ex5, VectorGetHigh(ex1)) && pass3;
+    uint8x16_p ex5 = VecShiftRightOctet<8>(ex1);
+    pass3 = VecEqual(ex5, VecGetHigh(ex1)) && pass3;
     CRYPTOPP_ASSERT(pass3);
 
     if (!pass3)