diff --git a/adv-simd.h b/adv-simd.h index e86c12ba..54eec0f1 100644 --- a/adv-simd.h +++ b/adv-simd.h @@ -494,12 +494,13 @@ inline size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6, /// \tparam F1 function to process 1 128-bit block /// \tparam F4 function to process 4 128-bit blocks /// \tparam W word type of the subkey table -/// \tparam V vector type of the NEON data type -/// \details AdvancedProcessBlocks128_6x2_NEON processes 4 and 1 NEON SIMD words +/// \tparam V vector type of the NEON datatype +/// \details AdvancedProcessBlocks128_4x1_NEON processes 4 and 1 NEON SIMD words /// at a time. /// \details The subkey type is usually word32 or word64. V is the vector type and it is /// usually uint32x4_t or uint64x2_t. F1, F4, W and V must use the same word and -/// vector type. +/// vector type. The V parameter is used to avoid template argument +/// deduction/substitution failures. template inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4, const V& unused, const W *subKeys, size_t rounds, const byte *inBlocks, diff --git a/cham-simd.cpp b/cham-simd.cpp index a503aa4a..5b9f2e85 100644 --- a/cham-simd.cpp +++ b/cham-simd.cpp @@ -83,7 +83,7 @@ inline __m128i UnpackXMM<0>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpacklo_epi16(a, b); const __m128i r2 = _mm_unpacklo_epi16(c, d); @@ -102,7 +102,7 @@ inline __m128i UnpackXMM<1>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpacklo_epi16(a, b); const __m128i r2 = _mm_unpacklo_epi16(c, d); @@ -121,7 +121,7 @@ inline __m128i UnpackXMM<2>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpacklo_epi16(a, b); const __m128i r2 = _mm_unpacklo_epi16(c, d); @@ -140,7 +140,7 @@ inline __m128i UnpackXMM<3>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpacklo_epi16(a, b); const __m128i r2 = _mm_unpacklo_epi16(c, d); @@ -159,7 +159,7 @@ inline __m128i UnpackXMM<4>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpackhi_epi16(a, b); const __m128i r2 = _mm_unpackhi_epi16(c, d); @@ -178,7 +178,7 @@ inline __m128i UnpackXMM<5>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpackhi_epi16(a, b); const __m128i r2 = _mm_unpackhi_epi16(c, d); @@ -197,7 +197,7 @@ inline __m128i UnpackXMM<6>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpackhi_epi16(a, b); const __m128i r2 = _mm_unpackhi_epi16(c, d); @@ -216,7 +216,7 @@ inline __m128i UnpackXMM<7>(const __m128i& a, const __m128i& b, const __m128i& c { // The shuffle converts to and from little-endian for SSE. A specialized // CHAM implementation can avoid the shuffle by framing the data for - // encryption, decrementryption and benchmarks. The library cannot take the + // encryption, decryption and benchmarks. The library cannot take the // speed-up because of the byte oriented API. const __m128i r1 = _mm_unpackhi_epi16(a, b); const __m128i r2 = _mm_unpackhi_epi16(c, d);