Update ext/jpge

From aeb7d3b463

Fixes #14012

Also now comes with SSE2 SIMD
This commit is contained in:
Gleb Mazovetskiy 2021-01-30 11:39:01 +00:00
parent c251d69eab
commit 5a09bc3d30
7 changed files with 5097 additions and 4347 deletions

25
ext/jpge/README.md Normal file
View File

@ -0,0 +1,25 @@
# jpeg-compressor
This directory contains code from
https://github.com/richgel999/jpeg-compressor/tree/aeb7d3b463aa8228b87a28013c15ee50a7e6fcf3
with the following patches:
* https://github.com/richgel999/jpeg-compressor/pull/18
## License
The license of jpgd.cpp/.h and jpge.cpp/.h is either Public Domain or Apache 2.0. Choose whatever you want.
The license for the optional file jpgd_idct.h (and ONLY this file) is Copyright 2009 Intel Corporation:
Permission is granted to use, copy, distribute and prepare derivative works of
this software for any purpose and without fee, provided, that the above
copyright notice and this statement appear in all copies. Intel makes no
representations about the suitability of this software for any purpose. THIS
SOFTWARE IS PROVIDED "AS IS." INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES,
EXPRESS OR IMPLIED, AND ALL LIABILITY, INCLUDING CONSEQUENTIAL AND OTHER
INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE, INCLUDING LIABILITY FOR
INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not assume any
responsibility for any errors which may appear in this software nor any
responsibility to update it.

File diff suppressed because it is too large Load Diff

View File

@ -1,319 +1,351 @@
// jpgd.h - C++ class for JPEG decompression.
// Public domain, Rich Geldreich <richgel99@gmail.com>
// Richard Geldreich <richgel99@gmail.com>
// See jpgd.cpp for license (Public Domain or Apache 2.0).
#ifndef JPEG_DECODER_H
#define JPEG_DECODER_H
#include <stdlib.h>
#include <stdio.h>
#include <setjmp.h>
#include <assert.h>
#include <stdint.h>
#ifdef _MSC_VER
#define JPGD_NORETURN __declspec(noreturn)
#define JPGD_NORETURN __declspec(noreturn)
#elif defined(__GNUC__)
#define JPGD_NORETURN __attribute__ ((noreturn))
#define JPGD_NORETURN __attribute__ ((noreturn))
#else
#define JPGD_NORETURN
#define JPGD_NORETURN
#endif
#define JPGD_HUFF_TREE_MAX_LENGTH 512
#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256
namespace jpgd
{
typedef unsigned char uint8;
typedef signed short int16;
typedef unsigned short uint16;
typedef unsigned int uint;
typedef signed int int32;
typedef unsigned char uint8;
typedef signed short int16;
typedef unsigned short uint16;
typedef unsigned int uint;
typedef signed int int32;
// Loads a JPEG image from a memory buffer or a file.
// req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
// On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
// Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly.
// Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps);
unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps);
// Loads a JPEG image from a memory buffer or a file.
// req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
// On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
// Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly.
// Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0);
unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0);
// Success/failure error codes.
enum jpgd_status
{
JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
};
// Input stream interface.
// Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
// The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
// It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
// Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
class jpeg_decoder_stream
{
public:
jpeg_decoder_stream() { }
virtual ~jpeg_decoder_stream() { }
// Success/failure error codes.
enum jpgd_status
{
JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER,
JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS
};
// The read() method is called when the internal input buffer is empty.
// Parameters:
// pBuf - input buffer
// max_bytes_to_read - maximum bytes that can be written to pBuf
// pEOF_flag - set this to true if at end of stream (no more bytes remaining)
// Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
// Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
};
// Input stream interface.
// Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
// The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
// It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
// Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
class jpeg_decoder_stream
{
public:
jpeg_decoder_stream() { }
virtual ~jpeg_decoder_stream() { }
// stdio FILE stream class.
class jpeg_decoder_file_stream : public jpeg_decoder_stream
{
jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
// The read() method is called when the internal input buffer is empty.
// Parameters:
// pBuf - input buffer
// max_bytes_to_read - maximum bytes that can be written to pBuf
// pEOF_flag - set this to true if at end of stream (no more bytes remaining)
// Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
// Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0;
};
FILE *m_pFile;
bool m_eof_flag, m_error_flag;
// stdio FILE stream class.
class jpeg_decoder_file_stream : public jpeg_decoder_stream
{
jpeg_decoder_file_stream(const jpeg_decoder_file_stream&);
jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&);
public:
jpeg_decoder_file_stream();
virtual ~jpeg_decoder_file_stream();
bool open(const char *Pfilename);
void close();
FILE* m_pFile;
bool m_eof_flag, m_error_flag;
virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
};
public:
jpeg_decoder_file_stream();
virtual ~jpeg_decoder_file_stream();
// Memory stream class.
class jpeg_decoder_mem_stream : public jpeg_decoder_stream
{
const uint8 *m_pSrc_data;
uint m_ofs, m_size;
bool open(const char* Pfilename);
void close();
public:
jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
jpeg_decoder_mem_stream(const uint8 *pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }
virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag);
};
virtual ~jpeg_decoder_mem_stream() { }
// Memory stream class.
class jpeg_decoder_mem_stream : public jpeg_decoder_stream
{
const uint8* m_pSrc_data;
uint m_ofs, m_size;
bool open(const uint8 *pSrc_data, uint size);
void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }
virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
};
public:
jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }
// Loads JPEG file from a jpeg_decoder_stream.
unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps);
virtual ~jpeg_decoder_mem_stream() { }
enum
{
JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384
};
typedef int16 jpgd_quant_t;
typedef int16 jpgd_block_t;
bool open(const uint8* pSrc_data, uint size);
void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }
class jpeg_decoder
{
public:
// Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
// methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
jpeg_decoder(jpeg_decoder_stream *pStream);
virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag);
};
~jpeg_decoder();
// Loads JPEG file from a jpeg_decoder_stream.
unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0);
// Call this method after constructing the object to begin decompression.
// If JPGD_SUCCESS is returned you may then call decode() on each scanline.
int begin_decoding();
enum
{
JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768
};
// Returns the next scan line.
// For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
// Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
// Returns JPGD_SUCCESS if a scan line has been returned.
// Returns JPGD_DONE if all scan lines have been returned.
// Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
int decode(const void** pScan_line, uint* pScan_line_len);
inline jpgd_status get_error_code() const { return m_error_code; }
typedef int16 jpgd_quant_t;
typedef int16 jpgd_block_coeff_t;
inline int get_width() const { return m_image_x_size; }
inline int get_height() const { return m_image_y_size; }
class jpeg_decoder
{
public:
enum
{
cFlagBoxChromaFiltering = 1,
cFlagDisableSIMD = 2
};
inline int get_num_components() const { return m_comps_in_frame; }
// Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
// methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = 0);
inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
~jpeg_decoder();
// Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
inline int get_total_bytes_read() const { return m_total_bytes_read; }
private:
jpeg_decoder(const jpeg_decoder &);
jpeg_decoder &operator =(const jpeg_decoder &);
// Call this method after constructing the object to begin decompression.
// If JPGD_SUCCESS is returned you may then call decode() on each scanline.
typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
int begin_decoding();
struct huff_tables
{
bool ac_table;
uint look_up[256];
uint look_up2[256];
uint8 code_size[256];
uint tree[512];
};
// Returns the next scan line.
// For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
// Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
// Returns JPGD_SUCCESS if a scan line has been returned.
// Returns JPGD_DONE if all scan lines have been returned.
// Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
int decode(const void** pScan_line, uint* pScan_line_len);
struct coeff_buf
{
uint8 *pData;
int block_num_x, block_num_y;
int block_len_x, block_len_y;
int block_size;
};
inline jpgd_status get_error_code() const { return m_error_code; }
struct mem_block
{
mem_block *m_pNext;
size_t m_used_count;
size_t m_size;
char m_data[1];
};
inline int get_width() const { return m_image_x_size; }
inline int get_height() const { return m_image_y_size; }
jmp_buf m_jmp_state;
mem_block *m_pMem_blocks;
int m_image_x_size;
int m_image_y_size;
jpeg_decoder_stream *m_pStream;
int m_progressive_flag;
uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES];
uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size
uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size
jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
int m_comps_in_frame; // # of components in frame
int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor
int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor
int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector
int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID
int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
int m_comps_in_scan; // # of components in scan
int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector
int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector
int m_spectral_start; // spectral selection start
int m_spectral_end; // spectral selection end
int m_successive_low; // successive approximation low
int m_successive_high; // successive approximation high
int m_max_mcu_x_size; // MCU's max. X size in pixels
int m_max_mcu_y_size; // MCU's max. Y size in pixels
int m_blocks_per_mcu;
int m_max_blocks_per_row;
int m_mcus_per_row, m_mcus_per_col;
int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
int m_total_lines_left; // total # lines left in image
int m_mcu_lines_left; // total # lines left in this MCU
int m_real_dest_bytes_per_scan_line;
int m_dest_bytes_per_scan_line; // rounded up
int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y)
huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
int m_eob_run;
int m_block_y_mcu[JPGD_MAX_COMPONENTS];
uint8* m_pIn_buf_ofs;
int m_in_buf_left;
int m_tem_flag;
bool m_eof_flag;
uint8 m_in_buf_pad_start[128];
uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128];
uint8 m_in_buf_pad_end[128];
int m_bits_left;
uint m_bit_buf;
int m_restart_interval;
int m_restarts_left;
int m_next_restart_num;
int m_max_mcus_per_row;
int m_max_blocks_per_mcu;
int m_expanded_blocks_per_mcu;
int m_expanded_blocks_per_row;
int m_expanded_blocks_per_component;
bool m_freq_domain_chroma_upsample;
int m_max_mcus_per_col;
uint m_last_dc_val[JPGD_MAX_COMPONENTS];
jpgd_block_t* m_pMCU_coefficients;
int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
uint8* m_pSample_buf;
int m_crr[256];
int m_cbb[256];
int m_crg[256];
int m_cbg[256];
uint8* m_pScan_line_0;
uint8* m_pScan_line_1;
jpgd_status m_error_code;
bool m_ready_flag;
int m_total_bytes_read;
inline int get_num_components() const { return m_comps_in_frame; }
inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
// Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
inline int get_total_bytes_read() const { return m_total_bytes_read; }
private:
jpeg_decoder(const jpeg_decoder&);
jpeg_decoder& operator =(const jpeg_decoder&);
typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int);
struct huff_tables
{
bool ac_table;
uint look_up[256];
uint look_up2[256];
uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH];
uint tree[JPGD_HUFF_TREE_MAX_LENGTH];
};
struct coeff_buf
{
uint8* pData;
int block_num_x, block_num_y;
int block_len_x, block_len_y;
int block_size;
};
struct mem_block
{
mem_block* m_pNext;
size_t m_used_count;
size_t m_size;
char m_data[1];
};
jmp_buf m_jmp_state;
uint32_t m_flags;
mem_block* m_pMem_blocks;
int m_image_x_size;
int m_image_y_size;
jpeg_decoder_stream* m_pStream;
int m_progressive_flag;
uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES];
uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size
uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size
jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
int m_comps_in_frame; // # of components in frame
int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor
int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor
int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector
int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID
int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
int m_comps_in_scan; // # of components in scan
int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector
int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector
int m_spectral_start; // spectral selection start
int m_spectral_end; // spectral selection end
int m_successive_low; // successive approximation low
int m_successive_high; // successive approximation high
int m_max_mcu_x_size; // MCU's max. X size in pixels
int m_max_mcu_y_size; // MCU's max. Y size in pixels
int m_blocks_per_mcu;
int m_max_blocks_per_row;
int m_mcus_per_row, m_mcus_per_col;
int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
int m_total_lines_left; // total # lines left in image
int m_mcu_lines_left; // total # lines left in this MCU
int m_num_buffered_scanlines;
int m_real_dest_bytes_per_scan_line;
int m_dest_bytes_per_scan_line; // rounded up
int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y)
huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
int m_eob_run;
int m_block_y_mcu[JPGD_MAX_COMPONENTS];
uint8* m_pIn_buf_ofs;
int m_in_buf_left;
int m_tem_flag;
uint8 m_in_buf_pad_start[64];
uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128];
uint8 m_in_buf_pad_end[64];
int m_bits_left;
uint m_bit_buf;
int m_restart_interval;
int m_restarts_left;
int m_next_restart_num;
int m_max_mcus_per_row;
int m_max_blocks_per_mcu;
int m_max_mcus_per_col;
uint m_last_dc_val[JPGD_MAX_COMPONENTS];
jpgd_block_coeff_t* m_pMCU_coefficients;
int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
uint8* m_pSample_buf;
uint8* m_pSample_buf_prev;
int m_crr[256];
int m_cbb[256];
int m_crg[256];
int m_cbg[256];
uint8* m_pScan_line_0;
uint8* m_pScan_line_1;
jpgd_status m_error_code;
int m_total_bytes_read;
bool m_ready_flag;
bool m_eof_flag;
bool m_sample_buf_prev_valid;
bool m_has_sse2;
inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; }
void free_all_blocks();
JPGD_NORETURN void stop_decoding(jpgd_status status);
void* alloc(size_t n, bool zero = false);
void* alloc_aligned(size_t nSize, uint32_t align = 16, bool zero = false);
void word_clear(void* p, uint16 c, uint n);
void prep_in_buffer();
void read_dht_marker();
void read_dqt_marker();
void read_sof_marker();
void skip_variable_marker();
void read_dri_marker();
void read_sos_marker();
int next_marker();
int process_markers();
void locate_soi_marker();
void locate_sof_marker();
int locate_sos_marker();
void init(jpeg_decoder_stream* pStream, uint32_t flags);
void create_look_ups();
void fix_in_buffer();
void transform_mcu(int mcu_row);
coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
inline jpgd_block_coeff_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y);
void load_next_row();
void decode_next_row();
void make_huff_table(int index, huff_tables* pH);
void check_quant_tables();
void check_huff_tables();
bool calc_mcu_block_order();
int init_scan();
void init_frame();
void process_restart();
void decode_scan(pDecode_block_func decode_block_func);
void init_progressive();
void init_sequential();
void decode_start();
void decode_init(jpeg_decoder_stream* pStream, uint32_t flags);
void H2V2Convert();
uint32_t H2V2ConvertFiltered();
void H2V1Convert();
void H2V1ConvertFiltered();
void H1V2Convert();
void H1V2ConvertFiltered();
void H1V1Convert();
void gray_convert();
void find_eoi();
inline uint get_char();
inline uint get_char(bool* pPadding_flag);
inline void stuff_char(uint8 q);
inline uint8 get_octet();
inline uint get_bits(int num_bits);
inline uint get_bits_no_markers(int numbits);
inline int huff_decode(huff_tables* pH);
inline int huff_decode(huff_tables* pH, int& extrabits);
// Clamps a value between 0-255.
static inline uint8 clamp(int i)
{
if (static_cast<uint>(i) > 255)
i = (((~i) >> 31) & 0xFF);
return static_cast<uint8>(i);
}
int decode_next_mcu_row();
static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y);
static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y);
static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y);
static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y);
};
void free_all_blocks();
JPGD_NORETURN void stop_decoding(jpgd_status status);
void *alloc(size_t n, bool zero = false);
void word_clear(void *p, uint16 c, uint n);
void prep_in_buffer();
void read_dht_marker();
void read_dqt_marker();
void read_sof_marker();
void skip_variable_marker();
void read_dri_marker();
void read_sos_marker();
int next_marker();
int process_markers();
void locate_soi_marker();
void locate_sof_marker();
int locate_sos_marker();
void init(jpeg_decoder_stream * pStream);
void create_look_ups();
void fix_in_buffer();
void transform_mcu(int mcu_row);
void transform_mcu_expand(int mcu_row);
coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
void load_next_row();
void decode_next_row();
void make_huff_table(int index, huff_tables *pH);
void check_quant_tables();
void check_huff_tables();
void calc_mcu_block_order();
int init_scan();
void init_frame();
void process_restart();
void decode_scan(pDecode_block_func decode_block_func);
void init_progressive();
void init_sequential();
void decode_start();
void decode_init(jpeg_decoder_stream * pStream);
void H2V2Convert();
void H2V1Convert();
void H1V2Convert();
void H1V1Convert();
void gray_convert();
void expanded_convert();
void find_eoi();
inline uint get_char();
inline uint get_char(bool *pPadding_flag);
inline void stuff_char(uint8 q);
inline uint8 get_octet();
inline uint get_bits(int num_bits);
inline uint get_bits_no_markers(int numbits);
inline int huff_decode(huff_tables *pH);
inline int huff_decode(huff_tables *pH, int& extrabits);
static inline uint8 clamp(int i);
static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
};
} // namespace jpgd
#endif // JPEG_DECODER_H

462
ext/jpge/jpgd_idct.h Normal file
View File

@ -0,0 +1,462 @@
// Copyright 2009 Intel Corporation
// All Rights Reserved
//
// Permission is granted to use, copy, distribute and prepare derivative works of this
// software for any purpose and without fee, provided, that the above copyright notice
// and this statement appear in all copies. Intel makes no representations about the
// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS."
// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not
// assume any responsibility for any errors which may appear in this software nor any
// responsibility to update it.
//
// From:
// https://software.intel.com/sites/default/files/m/d/4/1/d/8/UsingIntelAVXToImplementIDCT-r1_5.pdf
// https://software.intel.com/file/29048
//
// Requires SSE
//
#ifdef _MSC_VER
#include <intrin.h>
#endif
#include <immintrin.h>
#ifdef _MSC_VER
#define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name
#else
#define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
#endif
#define BITS_INV_ACC 4
#define SHIFT_INV_ROW 16 - BITS_INV_ACC
#define SHIFT_INV_COL 1 + BITS_INV_ACC
const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC); //1 << (SHIFT_INV_ROW-1)
const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3); // 1 << (SHIFT_INV_COL-1)
const short IRND_INV_CORR = IRND_INV_COL - 1; // correction -1.0 and round
JPGD_SIMD_ALIGN(short, shortM128_one_corr[8]) = {1, 1, 1, 1, 1, 1, 1, 1};
JPGD_SIMD_ALIGN(short, shortM128_round_inv_row[8]) = {IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0};
JPGD_SIMD_ALIGN(short, shortM128_round_inv_col[8]) = {IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL};
JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8])= {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR};
JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036}; // tg * (2<<16) + 0.5
JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}; // tg * (2<<16) + 0.5
JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}; // tg * (2<<16) + 0.5
JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195};// cos * (2<<16) + 0.5
//-----------------------------------------------------------------------------
// Table for rows 0,4 - constants are multiplied on cos_4_16
// w15 w14 w11 w10 w07 w06 w03 w02
// w29 w28 w25 w24 w21 w20 w17 w16
// w31 w30 w27 w26 w23 w22 w19 w18
//movq -> w05 w04 w01 w00
JPGD_SIMD_ALIGN(short, shortM128_tab_i_04[]) = {
16384, 21407, 16384, 8867,
16384, -8867, 16384, -21407, // w13 w12 w09 w08
16384, 8867, -16384, -21407, // w07 w06 w03 w02
-16384, 21407, 16384, -8867, // w15 w14 w11 w10
22725, 19266, 19266, -4520, // w21 w20 w17 w16
12873, -22725, 4520, -12873, // w29 w28 w25 w24
12873, 4520, -22725, -12873, // w23 w22 w19 w18
4520, 19266, 19266, -22725}; // w31 w30 w27 w26
// Table for rows 1,7 - constants are multiplied on cos_1_16
//movq -> w05 w04 w01 w00
JPGD_SIMD_ALIGN(short, shortM128_tab_i_17[]) = {
22725, 29692, 22725, 12299,
22725, -12299, 22725, -29692, // w13 w12 w09 w08
22725, 12299, -22725, -29692, // w07 w06 w03 w02
-22725, 29692, 22725, -12299, // w15 w14 w11 w10
31521, 26722, 26722, -6270, // w21 w20 w17 w16
17855, -31521, 6270, -17855, // w29 w28 w25 w24
17855, 6270, -31521, -17855, // w23 w22 w19 w18
6270, 26722, 26722, -31521}; // w31 w30 w27 w26
// Table for rows 2,6 - constants are multiplied on cos_2_16
//movq -> w05 w04 w01 w00
JPGD_SIMD_ALIGN(short, shortM128_tab_i_26[]) = {
21407, 27969, 21407, 11585,
21407, -11585, 21407, -27969, // w13 w12 w09 w08
21407, 11585, -21407, -27969, // w07 w06 w03 w02
-21407, 27969, 21407, -11585, // w15 w14 w11 w10
29692, 25172, 25172, -5906, // w21 w20 w17 w16
16819, -29692, 5906, -16819, // w29 w28 w25 w24
16819, 5906, -29692, -16819, // w23 w22 w19 w18
5906, 25172, 25172, -29692}; // w31 w30 w27 w26
// Table for rows 3,5 - constants are multiplied on cos_3_16
//movq -> w05 w04 w01 w00
JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = {
19266, 25172, 19266, 10426,
19266, -10426, 19266, -25172, // w13 w12 w09 w08
19266, 10426, -19266, -25172, // w07 w06 w03 w02
-19266, 25172, 19266, -10426, // w15 w14 w11 w10
26722, 22654, 22654, -5315, // w21 w20 w17 w16
15137, -26722, 5315, -15137, // w29 w28 w25 w24
15137, 5315, -26722, -15137, // w23 w22 w19 w18
5315, 22654, 22654, -26722}; // w31 w30 w27 w26
JPGD_SIMD_ALIGN(short, shortM128_128[8]) = { 128, 128, 128, 128, 128, 128, 128, 128 };
void idctSSEShortU8(const short *pInput, uint8_t * pOutputUB)
{
__m128i r_xmm0, r_xmm4;
__m128i r_xmm1, r_xmm2, r_xmm3, r_xmm5, r_xmm6, r_xmm7;
__m128i row0, row1, row2, row3, row4, row5, row6, row7;
short * pTab_i_04 = shortM128_tab_i_04;
short * pTab_i_26 = shortM128_tab_i_26;
//Get pointers for this input and output
pTab_i_04 = shortM128_tab_i_04;
pTab_i_26 = shortM128_tab_i_26;
//Row 1 and Row 3
r_xmm0 = _mm_load_si128((__m128i *) pInput);
r_xmm4 = _mm_load_si128((__m128i *) (&pInput[2*8]));
// *** Work on the data in xmm0
//low shuffle mask = 0xd8 = 11 01 10 00
//get short 2 and short 0 into ls 32-bits
r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
// copy short 2 and short 0 to all locations
r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
// add to those copies
r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
// shuffle mask = 0x55 = 01 01 01 01
// copy short 3 and short 1 to all locations
r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
// high shuffle mask = 0xd8 = 11 01 10 00
// get short 6 and short 4 into bit positions 64-95
// get short 7 and short 5 into bit positions 96-127
r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
// add to short 3 and short 1
r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
// shuffle mask = 0xaa = 10 10 10 10
// copy short 6 and short 4 to all locations
r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
// shuffle mask = 0xaa = 11 11 11 11
// copy short 7 and short 5 to all locations
r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
// add to short 6 and short 4
r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
// *** Work on the data in xmm4
// high shuffle mask = 0xd8 11 01 10 00
// get short 6 and short 4 into bit positions 64-95
// get short 7 and short 5 into bit positions 96-127
r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
// (xmm0 short 2 and short 0 plus pSi) + some constants
r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
r_xmm2 = r_xmm1;
r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
r_xmm6 = r_xmm5;
r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
row0 = _mm_packs_epi32(r_xmm0, r_xmm2);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
row2 = _mm_packs_epi32(r_xmm4, r_xmm6);
//Row 5 and row 7
r_xmm0 = _mm_load_si128((__m128i *) (&pInput[4*8]));
r_xmm4 = _mm_load_si128((__m128i *) (&pInput[6*8]));
r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
r_xmm2 = r_xmm1;
r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
r_xmm6 = r_xmm5;
r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
row4 = _mm_packs_epi32(r_xmm0, r_xmm2);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
row6 = _mm_packs_epi32(r_xmm4, r_xmm6);
//Row 4 and row 2
pTab_i_04 = shortM128_tab_i_35;
pTab_i_26 = shortM128_tab_i_17;
r_xmm0 = _mm_load_si128((__m128i *) (&pInput[3*8]));
r_xmm4 = _mm_load_si128((__m128i *) (&pInput[1*8]));
r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
r_xmm2 = r_xmm1;
r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
r_xmm6 = r_xmm5;
r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
row3 = _mm_packs_epi32(r_xmm0, r_xmm2);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
row1 = _mm_packs_epi32(r_xmm4, r_xmm6);
//Row 6 and row 8
r_xmm0 = _mm_load_si128((__m128i *) (&pInput[5*8]));
r_xmm4 = _mm_load_si128((__m128i *) (&pInput[7*8]));
r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
r_xmm2 = r_xmm1;
r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16]));
r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
r_xmm6 = r_xmm5;
r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
row5 = _mm_packs_epi32(r_xmm0, r_xmm2);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
row7 = _mm_packs_epi32(r_xmm4, r_xmm6);
r_xmm1 = _mm_load_si128((__m128i *) shortM128_tg_3_16);
r_xmm2 = row5;
r_xmm3 = row3;
r_xmm0 = _mm_mulhi_epi16(row5, r_xmm1);
r_xmm1 = _mm_mulhi_epi16(r_xmm1, r_xmm3);
r_xmm5 = _mm_load_si128((__m128i *) shortM128_tg_1_16);
r_xmm6 = row7;
r_xmm4 = _mm_mulhi_epi16(row7, r_xmm5);
r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm2);
r_xmm5 = _mm_mulhi_epi16(r_xmm5, row1);
r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm3);
r_xmm7 = row6;
r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm3);
r_xmm3 = _mm_load_si128((__m128i *) shortM128_tg_2_16);
r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm1);
r_xmm7 = _mm_mulhi_epi16(r_xmm7, r_xmm3);
r_xmm1 = r_xmm0;
r_xmm3 = _mm_mulhi_epi16(r_xmm3, row2);
r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm6);
r_xmm4 = _mm_adds_epi16(r_xmm4, row1);
r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm4);
r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i *) shortM128_one_corr));
r_xmm4 = _mm_subs_epi16(r_xmm4, r_xmm1);
r_xmm6 = r_xmm5;
r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm2);
r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_one_corr));
r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
//Intermediate results, needed later
__m128i temp3, temp7;
temp7 = r_xmm0;
r_xmm1 = r_xmm4;
r_xmm0 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm5);
r_xmm2 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
r_xmm2 = _mm_mulhi_epi16(r_xmm2, r_xmm4);
//Intermediate results, needed later
temp3 = r_xmm6;
r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm5);
r_xmm7 = _mm_adds_epi16(r_xmm7, row2);
r_xmm3 = _mm_subs_epi16(r_xmm3, row6);
r_xmm6 = row0;
r_xmm0 = _mm_mulhi_epi16(r_xmm0, r_xmm1);
r_xmm5 = row4;
r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm6);
r_xmm6 = _mm_subs_epi16(r_xmm6, row4);
r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm2);
r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i *) shortM128_one_corr));
r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm1);
r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i *) shortM128_one_corr));
r_xmm2 = r_xmm5;
r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm7);
r_xmm1 = r_xmm6;
r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_round_inv_col));
r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm7);
r_xmm7 = temp7;
r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm3);
r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i *) shortM128_round_inv_col));
r_xmm7 = _mm_adds_epi16(r_xmm7, r_xmm5);
r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm3);
r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i *) shortM128_round_inv_corr));
r_xmm3 = r_xmm6;
r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i *) shortM128_round_inv_corr));
r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm4);
//Store results for row 0
//_mm_store_si128((__m128i *) pOutput, r_xmm7);
__m128i r0 = r_xmm7;
r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
r_xmm7 = r_xmm1;
r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm0);
//Store results for row 1
//_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6);
__m128i r1 = r_xmm6;
r_xmm1 = _mm_srai_epi16(r_xmm1, SHIFT_INV_COL);
r_xmm6 = temp3;
r_xmm7 = _mm_subs_epi16(r_xmm7, r_xmm0);
r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
//Store results for row 2
//_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1);
__m128i r2 = r_xmm1;
r_xmm5 = _mm_subs_epi16(r_xmm5, temp7);
r_xmm5 = _mm_srai_epi16(r_xmm5, SHIFT_INV_COL);
//Store results for row 7
//_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5);
__m128i r7 = r_xmm5;
r_xmm3 = _mm_subs_epi16(r_xmm3, r_xmm4);
r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
r_xmm2 = _mm_subs_epi16(r_xmm2, temp3);
r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
r_xmm2 = _mm_srai_epi16(r_xmm2, SHIFT_INV_COL);
//Store results for row 3
//_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6);
__m128i r3 = r_xmm6;
r_xmm3 = _mm_srai_epi16(r_xmm3, SHIFT_INV_COL);
//Store results for rows 4, 5, and 6
//_mm_store_si128((__m128i *) (&pOutput[4*8]), r_xmm2);
//_mm_store_si128((__m128i *) (&pOutput[5*8]), r_xmm7);
//_mm_store_si128((__m128i *) (&pOutput[6*8]), r_xmm3);
__m128i r4 = r_xmm2;
__m128i r5 = r_xmm7;
__m128i r6 = r_xmm3;
r0 = _mm_add_epi16(*(const __m128i *)shortM128_128, r0);
r1 = _mm_add_epi16(*(const __m128i *)shortM128_128, r1);
r2 = _mm_add_epi16(*(const __m128i *)shortM128_128, r2);
r3 = _mm_add_epi16(*(const __m128i *)shortM128_128, r3);
r4 = _mm_add_epi16(*(const __m128i *)shortM128_128, r4);
r5 = _mm_add_epi16(*(const __m128i *)shortM128_128, r5);
r6 = _mm_add_epi16(*(const __m128i *)shortM128_128, r6);
r7 = _mm_add_epi16(*(const __m128i *)shortM128_128, r7);
((__m128i *)pOutputUB)[0] = _mm_packus_epi16(r0, r1);
((__m128i *)pOutputUB)[1] = _mm_packus_epi16(r2, r3);
((__m128i *)pOutputUB)[2] = _mm_packus_epi16(r4, r5);
((__m128i *)pOutputUB)[3] = _mm_packus_epi16(r6, r7);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,168 +1,172 @@
// jpge.h - C++ class for JPEG compression.
// Public domain, Rich Geldreich <richgel99@gmail.com>
// Public Domain or Apache 2.0, Richard Geldreich <richgel99@gmail.com>
// Alex Evans: Added RGBA support, linear memory allocator.
#ifndef JPEG_ENCODER_H
#define JPEG_ENCODER_H
namespace jpge
{
typedef unsigned char uint8;
typedef signed short int16;
typedef signed int int32;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned int uint;
// JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 };
typedef unsigned char uint8;
typedef signed short int16;
typedef signed int int32;
typedef unsigned short uint16;
typedef unsigned int uint32;
typedef unsigned int uint;
// JPEG compression parameters structure.
struct params
{
inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false) { }
// JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 };
inline bool check() const
{
if ((m_quality < 1) || (m_quality > 100)) return false;
if ((uint)m_subsampling > (uint)H2V2) return false;
return true;
}
// JPEG compression parameters structure.
struct params
{
inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false), m_use_std_tables(false) { }
// Quality: 1-100, higher is better. Typical values are around 50-95.
int m_quality;
inline bool check() const
{
if ((m_quality < 1) || (m_quality > 100)) return false;
if ((uint)m_subsampling > (uint)H2V2) return false;
return true;
}
// m_subsampling:
// 0 = Y (grayscale) only
// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
subsampling_t m_subsampling;
// Quality: 1-100, higher is better. Typical values are around 50-95.
int m_quality;
// Disables CbCr discrimination - only intended for testing.
// If true, the Y quantization table is also used for the CbCr channels.
bool m_no_chroma_discrim_flag;
// m_subsampling:
// 0 = Y (grayscale) only
// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
subsampling_t m_subsampling;
bool m_two_pass_flag;
};
// Writes JPEG image to a file.
// num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels.
bool compress_image_to_jpeg_file(const char *pFilename, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params = params());
// Disables CbCr discrimination - only intended for testing.
// If true, the Y quantization table is also used for the CbCr channels.
bool m_no_chroma_discrim_flag;
// Writes JPEG image to memory buffer.
// On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes.
// If return value is true, buf_size will be set to the size of the compressed data.
bool compress_image_to_jpeg_file_in_memory(void *pBuf, int &buf_size, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params = params());
// Output stream abstract class - used by the jpeg_encoder class to write to the output stream.
// put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts.
class output_stream
{
public:
virtual ~output_stream() { };
virtual bool put_buf(const void* Pbuf, int len) = 0;
template<class T> inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); }
};
// Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
class jpeg_encoder
{
public:
jpeg_encoder();
~jpeg_encoder();
bool m_two_pass_flag;
// Initializes the compressor.
// pStream: The stream object to use for writing compressed data.
// params - Compression parameters structure, defined above.
// width, height - Image dimensions.
// channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
// Returns false on out of memory or if a stream write fails.
bool init(output_stream *pStream, int width, int height, int src_channels, const params &comp_params = params());
const params &get_params() const { return m_params; }
// Deinitializes the compressor, freeing any allocated memory. May be called at any time.
void deinit();
// By default we use the same quantization tables as mozjpeg's default.
// Set to true to use the traditional tables from JPEG Annex K.
bool m_use_std_tables;
};
uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; }
inline uint get_cur_pass() { return m_pass_num; }
// Writes JPEG image to a file.
// num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels.
bool compress_image_to_jpeg_file(const char* pFilename, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params = params());
// Call this method with each source scanline.
// width * src_channels bytes per scanline is expected (RGB or Y format).
// You must call with NULL after all scanlines are processed to finish compression.
// Returns false on out of memory or if a stream write fails.
bool process_scanline(const void* pScanline);
private:
jpeg_encoder(const jpeg_encoder &);
jpeg_encoder &operator =(const jpeg_encoder &);
// Writes JPEG image to memory buffer.
// On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes.
// If return value is true, buf_size will be set to the size of the compressed data.
bool compress_image_to_jpeg_file_in_memory(void* pBuf, int& buf_size, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params = params());
typedef int32 sample_array_t;
output_stream *m_pStream;
params m_params;
uint8 m_num_components;
uint8 m_comp_h_samp[3], m_comp_v_samp[3];
int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
int m_image_x_mcu, m_image_y_mcu;
int m_image_bpl_xlt, m_image_bpl_mcu;
int m_mcus_per_row;
int m_mcu_x, m_mcu_y;
uint8 *m_mcu_lines[16];
uint8 m_mcu_y_ofs;
sample_array_t m_sample_array[64];
int16 m_coefficient_array[64];
int32 m_quantization_tables[2][64];
uint m_huff_codes[4][256];
uint8 m_huff_code_sizes[4][256];
uint8 m_huff_bits[4][17];
uint8 m_huff_val[4][256];
uint32 m_huff_count[4][256];
int m_last_dc_val[3];
enum { JPGE_OUT_BUF_SIZE = 2048 };
uint8 m_out_buf[JPGE_OUT_BUF_SIZE];
uint8 *m_pOut_buf;
uint m_out_buf_left;
uint32 m_bit_buffer;
uint m_bits_in;
uint8 m_pass_num;
bool m_all_stream_writes_succeeded;
void optimize_huffman_table(int table_num, int table_len);
void emit_byte(uint8 i);
void emit_word(uint i);
void emit_marker(int marker);
void emit_jfif_app0();
void emit_dqt();
void emit_sof();
void emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag);
void emit_dhts();
void emit_sos();
void emit_markers();
void compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val);
void compute_quant_table(int32 *dst, int16 *src);
void adjust_quant_table(int32 *dst, int32 *src);
void first_pass_init();
bool second_pass_init();
bool jpg_open(int p_x_res, int p_y_res, int src_channels);
void load_block_8_8_grey(int x);
void load_block_8_8(int x, int y, int c);
void load_block_16_8(int x, int c);
void load_block_16_8_8(int x, int c);
void load_quantized_coefficients(int component_num);
void flush_output_buffer();
void put_bits(uint bits, uint len);
void code_coefficients_pass_one(int component_num);
void code_coefficients_pass_two(int component_num);
void code_block(int component_num);
void process_mcu_row();
bool terminate_pass_one();
bool terminate_pass_two();
bool process_end_of_image();
void load_mcu(const void* src);
void clear();
void init();
};
// Output stream abstract class - used by the jpeg_encoder class to write to the output stream.
// put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts.
class output_stream
{
public:
virtual ~output_stream() { };
virtual bool put_buf(const void* Pbuf, int len) = 0;
template<class T> inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); }
};
// Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
class jpeg_encoder
{
public:
jpeg_encoder();
~jpeg_encoder();
// Initializes the compressor.
// pStream: The stream object to use for writing compressed data.
// params - Compression parameters structure, defined above.
// width, height - Image dimensions.
// channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
// Returns false on out of memory or if a stream write fails.
bool init(output_stream* pStream, int width, int height, int src_channels, const params& comp_params = params());
const params& get_params() const { return m_params; }
// Deinitializes the compressor, freeing any allocated memory. May be called at any time.
void deinit();
uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; }
inline uint get_cur_pass() { return m_pass_num; }
// Call this method with each source scanline.
// width * src_channels bytes per scanline is expected (RGB or Y format).
// You must call with NULL after all scanlines are processed to finish compression.
// Returns false on out of memory or if a stream write fails.
bool process_scanline(const void* pScanline);
private:
jpeg_encoder(const jpeg_encoder&);
jpeg_encoder& operator =(const jpeg_encoder&);
typedef int32 sample_array_t;
output_stream* m_pStream;
params m_params;
uint8 m_num_components;
uint8 m_comp_h_samp[3], m_comp_v_samp[3];
int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
int m_image_x_mcu, m_image_y_mcu;
int m_image_bpl_xlt, m_image_bpl_mcu;
int m_mcus_per_row;
int m_mcu_x, m_mcu_y;
uint8* m_mcu_lines[16];
uint8 m_mcu_y_ofs;
sample_array_t m_sample_array[64];
int16 m_coefficient_array[64];
int32 m_quantization_tables[2][64];
uint m_huff_codes[4][256];
uint8 m_huff_code_sizes[4][256];
uint8 m_huff_bits[4][17];
uint8 m_huff_val[4][256];
uint32 m_huff_count[4][256];
int m_last_dc_val[3];
enum { JPGE_OUT_BUF_SIZE = 2048 };
uint8 m_out_buf[JPGE_OUT_BUF_SIZE];
uint8* m_pOut_buf;
uint m_out_buf_left;
uint32 m_bit_buffer;
uint m_bits_in;
uint8 m_pass_num;
bool m_all_stream_writes_succeeded;
void optimize_huffman_table(int table_num, int table_len);
void emit_byte(uint8 i);
void emit_word(uint i);
void emit_marker(int marker);
void emit_jfif_app0();
void emit_dqt();
void emit_sof();
void emit_dht(uint8* bits, uint8* val, int index, bool ac_flag);
void emit_dhts();
void emit_sos();
void emit_markers();
void compute_huffman_table(uint* codes, uint8* code_sizes, uint8* bits, uint8* val);
void compute_quant_table(int32* dst, int16* src);
void adjust_quant_table(int32* dst, int32* src);
void first_pass_init();
bool second_pass_init();
bool jpg_open(int p_x_res, int p_y_res, int src_channels);
void load_block_8_8_grey(int x);
void load_block_8_8(int x, int y, int c);
void load_block_16_8(int x, int c);
void load_block_16_8_8(int x, int c);
void load_quantized_coefficients(int component_num);
void flush_output_buffer();
void put_bits(uint bits, uint len);
void code_coefficients_pass_one(int component_num);
void code_coefficients_pass_two(int component_num);
void code_block(int component_num);
void process_mcu_row();
bool terminate_pass_one();
bool terminate_pass_two();
bool process_end_of_image();
void load_mcu(const void* src);
void clear();
void init();
};
} // namespace jpge

View File

@ -1 +0,0 @@
This is https://code.google.com/p/jpeg-compressor/ , public domain code.