s390x/tcg: Fix VECTOR MULTIPLY AND ADD *

We missed that we always read a "double-wide even-odd element
pair of the fourth operand". Fix it in all four variants.

Fixes: 1b430aec41 ("s390x/tcg: Implement VECTOR MULTIPLY AND ADD *")
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
Message-Id: <20191021085715.3797-3-david@redhat.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
This commit is contained in:
David Hildenbrand 2019-10-21 10:57:11 +02:00 committed by Cornelia Huck
parent 49a7ce4e03
commit 8b95251947

View File

@ -336,7 +336,7 @@ void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3, \
for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \
@ -354,7 +354,7 @@ void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3, \
for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) { \
uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \
@ -372,7 +372,7 @@ void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3, \
for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j); \
int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j); \
int##TBITS##_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, j); \
int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \
@ -390,7 +390,7 @@ void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3, \
for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) { \
uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j); \
uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j); \
uint##TBITS##_t c = s390_vec_read_element##BITS(v4, j); \
uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i); \
\
s390_vec_write_element##TBITS(v1, i, a * b + c); \
} \