mirror of
https://github.com/stenzek/duckstation.git
synced 2024-11-27 08:00:36 +00:00
19a0854528
And include XZ routines.
170 lines
5.3 KiB
C
170 lines
5.3 KiB
C
/* Ppmd.h -- PPMD codec common code
|
|
2023-03-05 : Igor Pavlov : Public domain
|
|
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
|
|
|
#ifndef ZIP7_INC_PPMD_H
|
|
#define ZIP7_INC_PPMD_H
|
|
|
|
#include "CpuArch.h"
|
|
|
|
EXTERN_C_BEGIN
|
|
|
|
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
|
|
/*
|
|
PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
|
|
if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields.
|
|
if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
|
|
if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
|
|
if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
|
|
and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
|
|
PPMD code works slightly faster in (PPMD_32BIT) mode.
|
|
*/
|
|
#define PPMD_32BIT
|
|
#endif
|
|
|
|
#define PPMD_INT_BITS 7
|
|
#define PPMD_PERIOD_BITS 7
|
|
#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS))
|
|
|
|
#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift))
|
|
#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2)
|
|
#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob))
|
|
#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob))
|
|
|
|
#define PPMD_N1 4
|
|
#define PPMD_N2 4
|
|
#define PPMD_N3 4
|
|
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
|
|
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
|
|
|
|
MY_CPU_pragma_pack_push_1
|
|
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
|
|
|
|
/* SEE-contexts for PPM-contexts with masked symbols */
|
|
typedef struct
|
|
{
|
|
UInt16 Summ; /* Freq */
|
|
Byte Shift; /* Speed of Freq change; low Shift is for fast change */
|
|
Byte Count; /* Count to next change of Shift */
|
|
} CPpmd_See;
|
|
|
|
#define Ppmd_See_UPDATE(p) \
|
|
{ if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
|
|
{ (p)->Summ = (UInt16)((p)->Summ << 1); \
|
|
(p)->Count = (Byte)(3 << (p)->Shift++); }}
|
|
|
|
|
|
typedef struct
|
|
{
|
|
Byte Symbol;
|
|
Byte Freq;
|
|
UInt16 Successor_0;
|
|
UInt16 Successor_1;
|
|
} CPpmd_State;
|
|
|
|
typedef struct CPpmd_State2_
|
|
{
|
|
Byte Symbol;
|
|
Byte Freq;
|
|
} CPpmd_State2;
|
|
|
|
typedef struct CPpmd_State4_
|
|
{
|
|
UInt16 Successor_0;
|
|
UInt16 Successor_1;
|
|
} CPpmd_State4;
|
|
|
|
MY_CPU_pragma_pop
|
|
|
|
/*
|
|
PPMD code can write full CPpmd_State structure data to CPpmd*_Context
|
|
at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
|
|
|
|
If we use pointers to different types, but that point to shared
|
|
memory space, we can have aliasing problem (strict aliasing).
|
|
|
|
XLC compiler in -O2 mode can change the order of memory write instructions
|
|
in relation to read instructions, if we have use pointers to different types.
|
|
|
|
To solve that aliasing problem we use combined CPpmd*_Context structure
|
|
with unions that contain the fields from both structures:
|
|
the original CPpmd*_Context and CPpmd_State.
|
|
So we can access the fields from both structures via one pointer,
|
|
and the compiler doesn't change the order of write instructions
|
|
in relation to read instructions.
|
|
|
|
If we don't use memory write instructions to shared memory in
|
|
some local code, and we use only reading instructions (read only),
|
|
then probably it's safe to use pointers to different types for reading.
|
|
*/
|
|
|
|
|
|
|
|
#ifdef PPMD_32BIT
|
|
|
|
#define Ppmd_Ref_Type(type) type *
|
|
#define Ppmd_GetRef(p, ptr) (ptr)
|
|
#define Ppmd_GetPtr(p, ptr) (ptr)
|
|
#define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
|
|
|
|
#else
|
|
|
|
#define Ppmd_Ref_Type(type) UInt32
|
|
#define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
|
|
#define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
|
|
#define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
|
|
|
|
#endif // PPMD_32BIT
|
|
|
|
|
|
typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
|
|
typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref;
|
|
typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref;
|
|
|
|
|
|
/*
|
|
#ifdef MY_CPU_LE_UNALIGN
|
|
// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
|
|
#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
|
|
#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
|
|
|
|
#else
|
|
*/
|
|
|
|
/*
|
|
We can write 16-bit halves to 32-bit (Successor) field in any selected order.
|
|
But the native order is more consistent way.
|
|
So we use the native order, if LE/BE order can be detected here at compile time.
|
|
*/
|
|
|
|
#ifdef MY_CPU_BE
|
|
|
|
#define Ppmd_GET_SUCCESSOR(p) \
|
|
( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
|
|
|
|
#define Ppmd_SET_SUCCESSOR(p, v) { \
|
|
(p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
|
|
(p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
|
|
|
|
#else
|
|
|
|
#define Ppmd_GET_SUCCESSOR(p) \
|
|
( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
|
|
|
|
#define Ppmd_SET_SUCCESSOR(p, v) { \
|
|
(p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
|
|
(p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
|
|
|
|
#endif
|
|
|
|
// #endif
|
|
|
|
|
|
#define PPMD_SetAllBitsIn256Bytes(p) \
|
|
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
|
|
p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }}
|
|
|
|
EXTERN_C_END
|
|
|
|
#endif
|