Update documentation

2025-03-03 02:46:07 +00:00 · 2018-06-23 12:27:25 -04:00 · 2018-06-23 12:27:25 -04:00 · 527613df22
commit 527613df22
parent d0b5dac162
2 changed files with 12 additions and 11 deletions
--- a/adv-simd.h
+++ b/adv-simd.h
@ -494,12 +494,13 @@ inline size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
 /// \tparam F1 function to process 1 128-bit block
 /// \tparam F4 function to process 4 128-bit blocks
 /// \tparam W word type of the subkey table
-/// \tparam V vector type of the NEON data type
-/// \details AdvancedProcessBlocks128_6x2_NEON processes 4 and 1 NEON SIMD words
+/// \tparam V vector type of the NEON datatype
+/// \details AdvancedProcessBlocks128_4x1_NEON processes 4 and 1 NEON SIMD words
 ///   at a time.
 /// \details The subkey type is usually word32 or word64. V is the vector type and it is
 ///   usually uint32x4_t or uint64x2_t. F1, F4, W and V must use the same word and
-///   vector type.
+///   vector type. The V parameter is used to avoid template argument
+///   deduction/substitution failures.
 template <typename F1, typename F4, typename W, typename V>
 inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
            const V& unused, const W *subKeys, size_t rounds, const byte *inBlocks,
--- a/cham-simd.cpp
+++ b/cham-simd.cpp
@ -83,7 +83,7 @@ inline __m128i UnpackXMM<0>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpacklo_epi16(a, b);
    const __m128i r2 = _mm_unpacklo_epi16(c, d);
@ -102,7 +102,7 @@ inline __m128i UnpackXMM<1>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpacklo_epi16(a, b);
    const __m128i r2 = _mm_unpacklo_epi16(c, d);
@ -121,7 +121,7 @@ inline __m128i UnpackXMM<2>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpacklo_epi16(a, b);
    const __m128i r2 = _mm_unpacklo_epi16(c, d);
@ -140,7 +140,7 @@ inline __m128i UnpackXMM<3>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpacklo_epi16(a, b);
    const __m128i r2 = _mm_unpacklo_epi16(c, d);
@ -159,7 +159,7 @@ inline __m128i UnpackXMM<4>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpackhi_epi16(a, b);
    const __m128i r2 = _mm_unpackhi_epi16(c, d);
@ -178,7 +178,7 @@ inline __m128i UnpackXMM<5>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpackhi_epi16(a, b);
    const __m128i r2 = _mm_unpackhi_epi16(c, d);
@ -197,7 +197,7 @@ inline __m128i UnpackXMM<6>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpackhi_epi16(a, b);
    const __m128i r2 = _mm_unpackhi_epi16(c, d);
@ -216,7 +216,7 @@ inline __m128i UnpackXMM<7>(const __m128i& a, const __m128i& b, const __m128i& c
 {
    // The shuffle converts to and from little-endian for SSE. A specialized
    // CHAM implementation can avoid the shuffle by framing the data for
-    // encryption, decrementryption and benchmarks. The library cannot take the
+    // encryption, decryption and benchmarks. The library cannot take the
    // speed-up because of the byte oriented API.
    const __m128i r1 = _mm_unpackhi_epi16(a, b);
    const __m128i r2 = _mm_unpackhi_epi16(c, d);