mirror of
https://github.com/shadps4-emu/ext-zlib-ng.git
synced 2024-10-07 00:13:58 +00:00
Introduce zmemcpy to use unaligned access for architectures we know support unaligned access, otherwise use memcpy.
This commit is contained in:
parent
2fb95de5e8
commit
363a95fb9b
@ -25,19 +25,19 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
|
||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||
uint16_t tmp;
|
||||
memcpy(&tmp, from, 2);
|
||||
zmemcpy_2(&tmp, from);
|
||||
*chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp));
|
||||
}
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
uint32_t tmp;
|
||||
memcpy(&tmp, from, 4);
|
||||
zmemcpy_4(&tmp, from);
|
||||
*chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp));
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
uint64_t tmp;
|
||||
memcpy(&tmp, from, 8);
|
||||
zmemcpy_8(&tmp, from);
|
||||
*chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp));
|
||||
}
|
||||
|
||||
|
@ -21,19 +21,19 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
|
||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||
uint16_t tmp;
|
||||
memcpy(&tmp, from, 2);
|
||||
zmemcpy_2(&tmp, from);
|
||||
*chunk = (vector unsigned char)vec_splats(tmp);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
uint32_t tmp;
|
||||
memcpy(&tmp, from, 4);
|
||||
zmemcpy_4(&tmp, from);
|
||||
*chunk = (vector unsigned char)vec_splats(tmp);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
uint64_t tmp;
|
||||
memcpy(&tmp, from, 8);
|
||||
zmemcpy_8(&tmp, from);
|
||||
*chunk = (vector unsigned char)vec_splats(tmp);
|
||||
}
|
||||
|
||||
|
10
chunkset.c
10
chunkset.c
@ -18,20 +18,20 @@ static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
uint8_t *dest = (uint8_t *)chunk;
|
||||
memcpy(dest, from, sizeof(uint32_t));
|
||||
memcpy(dest+4, from, sizeof(uint32_t));
|
||||
zmemcpy_4(dest, from);
|
||||
zmemcpy_4(dest+4, from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
memcpy(chunk, from, sizeof(uint64_t));
|
||||
zmemcpy_8(chunk, from);
|
||||
}
|
||||
|
||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||
chunkmemset_8((uint8_t *)s, chunk);
|
||||
zmemcpy_8(chunk, (uint8_t *)s);
|
||||
}
|
||||
|
||||
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
||||
memcpy(out, chunk, sizeof(uint64_t));
|
||||
zmemcpy_8(out, chunk);
|
||||
}
|
||||
|
||||
#define CHUNKSIZE chunksize_c
|
||||
|
@ -60,20 +60,20 @@ Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned l
|
||||
#endif
|
||||
#if CHUNK_SIZE >= 8
|
||||
while (len >= 8) {
|
||||
memcpy(out, from, 8);
|
||||
zmemcpy_8(out, from);
|
||||
out += 8;
|
||||
from += 8;
|
||||
len -= 8;
|
||||
}
|
||||
#endif
|
||||
if (len >= 4) {
|
||||
memcpy(out, from, 4);
|
||||
zmemcpy_4(out, from);
|
||||
out += 4;
|
||||
from += 4;
|
||||
len -= 4;
|
||||
}
|
||||
if (len >= 2) {
|
||||
memcpy(out, from, 2);
|
||||
zmemcpy_2(out, from);
|
||||
out += 2;
|
||||
from += 2;
|
||||
len -= 2;
|
||||
|
@ -101,8 +101,8 @@ static inline uint32_t compare256_unaligned_32_static(const uint8_t *src0, const
|
||||
do {
|
||||
uint32_t sv, mv, diff;
|
||||
|
||||
memcpy(&sv, src0, sizeof(sv));
|
||||
memcpy(&mv, src1, sizeof(mv));
|
||||
zmemcpy_4(&sv, src0);
|
||||
zmemcpy_4(&mv, src1);
|
||||
|
||||
diff = sv ^ mv;
|
||||
if (diff) {
|
||||
@ -141,8 +141,8 @@ static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const
|
||||
do {
|
||||
uint64_t sv, mv, diff;
|
||||
|
||||
memcpy(&sv, src0, sizeof(sv));
|
||||
memcpy(&mv, src1, sizeof(mv));
|
||||
zmemcpy_8(&sv, src0);
|
||||
zmemcpy_8(&mv, src1);
|
||||
|
||||
diff = sv ^ mv;
|
||||
if (diff) {
|
||||
|
10
deflate.h
10
deflate.h
@ -305,7 +305,7 @@ static inline void put_short(deflate_state *s, uint16_t w) {
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
w = ZSWAP16(w);
|
||||
#endif
|
||||
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
|
||||
zmemcpy_2(&s->pending_buf[s->pending], &w);
|
||||
s->pending += 2;
|
||||
}
|
||||
|
||||
@ -317,7 +317,7 @@ static inline void put_short_msb(deflate_state *s, uint16_t w) {
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
w = ZSWAP16(w);
|
||||
#endif
|
||||
memcpy(&s->pending_buf[s->pending], &w, sizeof(w));
|
||||
zmemcpy_2(&s->pending_buf[s->pending], &w);
|
||||
s->pending += 2;
|
||||
}
|
||||
|
||||
@ -329,7 +329,7 @@ static inline void put_uint32(deflate_state *s, uint32_t dw) {
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
dw = ZSWAP32(dw);
|
||||
#endif
|
||||
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
|
||||
zmemcpy_4(&s->pending_buf[s->pending], &dw);
|
||||
s->pending += 4;
|
||||
}
|
||||
|
||||
@ -341,7 +341,7 @@ static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
dw = ZSWAP32(dw);
|
||||
#endif
|
||||
memcpy(&s->pending_buf[s->pending], &dw, sizeof(dw));
|
||||
zmemcpy_4(&s->pending_buf[s->pending], &dw);
|
||||
s->pending += 4;
|
||||
}
|
||||
|
||||
@ -353,7 +353,7 @@ static inline void put_uint64(deflate_state *s, uint64_t lld) {
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
lld = ZSWAP64(lld);
|
||||
#endif
|
||||
memcpy(&s->pending_buf[s->pending], &lld, sizeof(lld));
|
||||
zmemcpy_8(&s->pending_buf[s->pending], &lld);
|
||||
s->pending += 8;
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
|
||||
static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
|
||||
uint64_t chunk;
|
||||
memcpy(&chunk, in, sizeof(chunk));
|
||||
zmemcpy_8(&chunk, in);
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
return chunk << bits;
|
||||
|
@ -29,9 +29,9 @@
|
||||
# define HASH_CALC_MASK HASH_MASK
|
||||
#endif
|
||||
#ifndef HASH_CALC_READ
|
||||
# ifdef UNALIGNED_OK
|
||||
# if BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define HASH_CALC_READ \
|
||||
memcpy(&val, strstart, sizeof(val));
|
||||
zmemcpy_4(&val, strstart);
|
||||
# else
|
||||
# define HASH_CALC_READ \
|
||||
val = ((uint32_t)(strstart[0])); \
|
||||
|
12
match_tpl.h
12
match_tpl.h
@ -74,11 +74,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
||||
#endif
|
||||
|
||||
#ifdef UNALIGNED64_OK
|
||||
memcpy(scan_start, scan, sizeof(uint64_t));
|
||||
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
||||
zmemcpy_8(scan_start, scan);
|
||||
zmemcpy_8(scan_end, scan+offset);
|
||||
#elif defined(UNALIGNED_OK)
|
||||
memcpy(scan_start, scan, sizeof(uint32_t));
|
||||
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
||||
zmemcpy_4(scan_start, scan);
|
||||
zmemcpy_4(scan_end, scan+offset);
|
||||
#else
|
||||
scan_end[0] = *(scan+offset);
|
||||
scan_end[1] = *(scan+offset+1);
|
||||
@ -201,9 +201,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
|
||||
#endif
|
||||
|
||||
#ifdef UNALIGNED64_OK
|
||||
memcpy(scan_end, scan+offset, sizeof(uint64_t));
|
||||
zmemcpy_8(scan_end, scan+offset);
|
||||
#elif defined(UNALIGNED_OK)
|
||||
memcpy(scan_end, scan+offset, sizeof(uint32_t));
|
||||
zmemcpy_4(scan_end, scan+offset);
|
||||
#else
|
||||
scan_end[0] = *(scan+offset);
|
||||
scan_end[1] = *(scan+offset+1);
|
||||
|
15
zbuild.h
15
zbuild.h
@ -194,4 +194,19 @@
|
||||
# define Tracecv(c, x)
|
||||
#endif
|
||||
|
||||
#ifdef UNALIGNED_OK
|
||||
# define zmemcpy_2(dest, src) *((uint16_t *)dest) = *((uint16_t *)src)
|
||||
# define zmemcpy_4(dest, src) *((uint32_t *)dest) = *((uint32_t *)src)
|
||||
# if UINTPTR_MAX == UINT64_MAX
|
||||
# define zmemcpy_8(dest, src) *((uint64_t *)dest) = *((uint64_t *)src)
|
||||
# else
|
||||
# define zmemcpy_8(dest, src) ((uint32_t *)dest)[0] = *((uint32_t *)src)[0] \
|
||||
((uint32_t *)dest)[1] = *((uint32_t *)src)[1]
|
||||
# endif
|
||||
#else
|
||||
# define zmemcpy_2(dest, src) memcpy(dest, src, 2)
|
||||
# define zmemcpy_4(dest, src) memcpy(dest, src, 4)
|
||||
# define zmemcpy_8(dest, src) memcpy(dest, src, 8)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user