Bug 1508198 - Update libjpeg-turbo to 2.0.3. r=aosmond

Differential Revision: https://phabricator.services.mozilla.com/D44851

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Ryan VanderMeulen 2019-09-05 16:49:12 +00:00
parent 5f190b63ff
commit 15c1a17047
25 changed files with 185 additions and 188 deletions

View File

@ -1,57 +0,0 @@
Bug 1520760 - Fix AVX2 detection to ensure we have all required CPU parameters.
diff --git simd/i386/jsimdcpu.asm simd/i386/jsimdcpu.asm
--- simd/i386/jsimdcpu.asm
+++ simd/i386/jsimdcpu.asm
@@ -75,6 +75,12 @@ EXTN(jpeg_simd_cpu_support):
or edi, byte JSIMD_SSE2
.no_sse2:
+ ; Check for level 7 support
+ xor eax, eax
+ cpuid
+ cmp eax, 7
+ jl short .no_avx2
+
; Check for AVX2 instruction support
mov eax, 7
xor ecx, ecx
@@ -87,8 +93,10 @@ EXTN(jpeg_simd_cpu_support):
mov eax, 1
xor ecx, ecx
cpuid
- test ecx, 1<<27
+ test ecx, 1<<26
jz short .no_avx2 ; O/S does not support XSAVE
+ test ecx, 1<<27
+ jz short .no_avx2 ; O/S does not support OSXSAVE
test ecx, 1<<28
jz short .no_avx2 ; CPU does not support AVX2
diff --git simd/x86_64/jsimdcpu.asm simd/x86_64/jsimdcpu.asm
--- simd/x86_64/jsimdcpu.asm
+++ simd/x86_64/jsimdcpu.asm
@@ -38,14 +38,20 @@ EXTN(jpeg_simd_cpu_support):
xor rdi, rdi ; simd support flag
+ ; Check for level 7 support
+ xor rax, rax
+ cpuid
+
+ or rdi, JSIMD_SSE2
+ or rdi, JSIMD_SSE
+ cmp rax, 7
+ jl short .return
+
; Check for AVX2 instruction support
mov rax, 7
xor rcx, rcx
cpuid
mov rax, rbx ; rax = Extended feature flags
-
- or rdi, JSIMD_SSE2
- or rdi, JSIMD_SSE
test rax, 1<<5 ; bit5:AVX2
jz short .return

View File

@ -14,7 +14,7 @@ libjpeg-turbo is covered by three compatible BSD-style open source licenses:
This license covers the TurboJPEG API library and associated programs, as
well as the build system.
- The zlib License, which is listed below
- The [zlib License](https://opensource.org/licenses/Zlib)
This license is a subset of the other two, and it covers the libjpeg-turbo
SIMD extensions.
@ -66,7 +66,7 @@ best of our understanding.
2. If your binary distribution includes or uses the TurboJPEG API, then
your product documentation must include the text of the Modified BSD
License.
License (see below.)
**Origin**
- Clause 2 of the Modified BSD License
@ -91,7 +91,8 @@ best of our understanding.
The Modified (3-clause) BSD License
===================================
Copyright (C)\<YEAR\> \<AUTHOR\>. All Rights Reserved.
Copyright (C)2009-2019 D. R. Commander. All Rights Reserved.
Copyright (C)2015 Viktor Szathmáry. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@ -118,28 +119,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
The zlib License
================
Copyright (C) \<YEAR\>, \<AUTHOR\>.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Why Three Licenses?
===================

View File

@ -48,6 +48,10 @@ To upgrade to a new revision of libjpeg-turbo, do the following:
$ hg addremove
== September 5, 2019 (libjpeg-turbo v2.0.3 5db6a6819d0f904e0b58f34ae928fea234adb1a0 2019-09-04) ==
* Updated to v2.0.3 release.
== October 4, 2018 (libjpeg-turbo v2.0.0 574f3a772c96dc9db2c98ef24706feb3f6dbda9a 2018-06-27) ==
* Updated to v2.0.0 release.

View File

@ -135,12 +135,11 @@ without recompiling. libjpeg-turbo does not claim to support all of the
libjpeg v7+ features, nor to produce identical output to libjpeg v7+ in all
cases (see below.)
By passing an argument of `--with-jpeg7` or `--with-jpeg8` to `configure`, or
an argument of `-DWITH_JPEG7=1` or `-DWITH_JPEG8=1` to `cmake`, you can build a
version of libjpeg-turbo that emulates the libjpeg v7 or v8 ABI, so that
programs that are built against libjpeg v7 or v8 can be run with libjpeg-turbo.
The following section describes which libjpeg v7+ features are supported and
which aren't.
By passing an argument of `-DWITH_JPEG7=1` or `-DWITH_JPEG8=1` to `cmake`, you
can build a version of libjpeg-turbo that emulates the libjpeg v7 or v8 ABI, so
that programs that are built against libjpeg v7 or v8 can be run with
libjpeg-turbo. The following section describes which libjpeg v7+ features are
supported and which aren't.
### Support for libjpeg v7 and v8 Features
@ -247,9 +246,8 @@ don't, and it allows those functions to be provided in the "official"
libjpeg-turbo binaries.
Those who are concerned about maintaining strict conformance with the libjpeg
v6b or v7 API can pass an argument of `--without-mem-srcdst` to `configure` or
an argument of `-DWITH_MEM_SRCDST=0` to `cmake` prior to building
libjpeg-turbo. This will restore the pre-1.3 behavior, in which
v6b or v7 API can pass an argument of `-DWITH_MEM_SRCDST=0` to `cmake` prior to
building libjpeg-turbo. This will restore the pre-1.3 behavior, in which
`jpeg_mem_src()` and `jpeg_mem_dest()` are only included when emulating the
libjpeg v8 API/ABI.
@ -344,3 +342,15 @@ quality of 98-100. Thus, libjpeg-turbo must use the non-SIMD quantization
function in those cases. This causes performance to drop by as much as 40%.
It is therefore strongly advised that you use the slow integer forward DCT
whenever encoding images with a JPEG quality of 98 or higher.
Memory Debugger Pitfalls
========================
Valgrind and Memory Sanitizer (MSan) can generate false positives
(specifically, incorrect reports of uninitialized memory accesses) when used
with libjpeg-turbo's SIMD extensions. It is generally recommended that the
SIMD extensions be disabled, either by passing an argument of `-DWITH_SIMD=0`
to `cmake` when configuring the build or by setting the environment variable
`JSIMD_FORCENONE` to `1` at run time, when testing libjpeg-turbo with Valgrind,
MSan, or other memory debuggers.

View File

@ -4,16 +4,16 @@
* This file was part of the Independent JPEG Group's software:
* Developed 1997-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.
* Copyright (C) 2015, 2018, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
* This file contains probability estimation tables for common use in
* arithmetic entropy encoding and decoding routines.
*
* This data represents Table D.2 in the JPEG spec (ISO/IEC IS 10918-1
* and CCITT Recommendation ITU-T T.81) and Table 24 in the JBIG spec
* (ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82).
* This data represents Table D.2 in
* Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 and Table 24 in
* Recommendation ITU-T T.82 (1993) | ISO/IEC 11544:1993.
*/
#define JPEG_INTERNALS
@ -29,9 +29,10 @@
* implementation (jbig_tab.c).
*/
#define V(i,a,b,c,d) (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b)
#define V(i, a, b, c, d) \
(((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b)
const JLONG jpeg_aritab[113+1] = {
const JLONG jpeg_aritab[113 + 1] = {
/*
* Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
*/

View File

@ -31,7 +31,7 @@
*/
GLOBAL(void)
jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize)
{
int i;
@ -41,7 +41,7 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
if (structsize != sizeof(struct jpeg_compress_struct))
ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
(int) sizeof(struct jpeg_compress_struct), (int) structsize);
(int)sizeof(struct jpeg_compress_struct), (int)structsize);
/* For debugging purposes, we zero the whole master structure.
* But the application has already set the err pointer, and may have set
@ -59,7 +59,7 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
cinfo->is_decompressor = FALSE;
/* Initialize a memory manager instance for this object */
jinit_memory_mgr((j_common_ptr) cinfo);
jinit_memory_mgr((j_common_ptr)cinfo);
/* Zero out pointers to permanent structures. */
cinfo->progress = NULL;
@ -83,7 +83,7 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
/* Must do it here for emit_dqt in case jpeg_write_tables is used */
cinfo->block_size = DCTSIZE;
cinfo->natural_order = jpeg_natural_order;
cinfo->lim_Se = DCTSIZE2-1;
cinfo->lim_Se = DCTSIZE2 - 1;
#endif
cinfo->script_space = NULL;
@ -100,9 +100,9 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
*/
GLOBAL(void)
jpeg_destroy_compress (j_compress_ptr cinfo)
jpeg_destroy_compress(j_compress_ptr cinfo)
{
jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
jpeg_destroy((j_common_ptr)cinfo); /* use common routine */
}
@ -112,9 +112,9 @@ jpeg_destroy_compress (j_compress_ptr cinfo)
*/
GLOBAL(void)
jpeg_abort_compress (j_compress_ptr cinfo)
jpeg_abort_compress(j_compress_ptr cinfo)
{
jpeg_abort((j_common_ptr) cinfo); /* use common routine */
jpeg_abort((j_common_ptr)cinfo); /* use common routine */
}
@ -131,7 +131,7 @@ jpeg_abort_compress (j_compress_ptr cinfo)
*/
GLOBAL(void)
jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress)
{
int i;
JQUANT_TBL *qtbl;
@ -159,7 +159,7 @@ jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
*/
GLOBAL(void)
jpeg_finish_compress (j_compress_ptr cinfo)
jpeg_finish_compress(j_compress_ptr cinfo)
{
JDIMENSION iMCU_row;
@ -172,18 +172,18 @@ jpeg_finish_compress (j_compress_ptr cinfo)
} else if (cinfo->global_state != CSTATE_WRCOEFS)
ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
/* Perform any remaining passes */
while (! cinfo->master->is_last_pass) {
while (!cinfo->master->is_last_pass) {
(*cinfo->master->prepare_for_pass) (cinfo);
for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
if (cinfo->progress != NULL) {
cinfo->progress->pass_counter = (long) iMCU_row;
cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
cinfo->progress->pass_counter = (long)iMCU_row;
cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows;
(*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
}
/* We bypass the main controller and invoke coef controller directly;
* all work is being done from the coefficient buffer.
*/
if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL))
ERREXIT(cinfo, JERR_CANT_SUSPEND);
}
(*cinfo->master->finish_pass) (cinfo);
@ -192,7 +192,7 @@ jpeg_finish_compress (j_compress_ptr cinfo)
(*cinfo->marker->write_file_trailer) (cinfo);
(*cinfo->dest->term_destination) (cinfo);
/* We can use jpeg_abort to release memory and reset global_state */
jpeg_abort((j_common_ptr) cinfo);
jpeg_abort((j_common_ptr)cinfo);
}
@ -204,8 +204,8 @@ jpeg_finish_compress (j_compress_ptr cinfo)
*/
GLOBAL(void)
jpeg_write_marker (j_compress_ptr cinfo, int marker,
const JOCTET *dataptr, unsigned int datalen)
jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr,
unsigned int datalen)
{
void (*write_marker_byte) (j_compress_ptr info, int val);
@ -226,7 +226,7 @@ jpeg_write_marker (j_compress_ptr cinfo, int marker,
/* Same, but piecemeal. */
GLOBAL(void)
jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
jpeg_write_m_header(j_compress_ptr cinfo, int marker, unsigned int datalen)
{
if (cinfo->next_scanline != 0 ||
(cinfo->global_state != CSTATE_SCANNING &&
@ -238,7 +238,7 @@ jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
}
GLOBAL(void)
jpeg_write_m_byte (j_compress_ptr cinfo, int val)
jpeg_write_m_byte(j_compress_ptr cinfo, int val)
{
(*cinfo->marker->write_marker_byte) (cinfo, val);
}
@ -266,13 +266,13 @@ jpeg_write_m_byte (j_compress_ptr cinfo, int val)
*/
GLOBAL(void)
jpeg_write_tables (j_compress_ptr cinfo)
jpeg_write_tables(j_compress_ptr cinfo)
{
if (cinfo->global_state != CSTATE_START)
ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
/* (Re)initialize error mgr and destination modules */
(*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
(*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
(*cinfo->dest->init_destination) (cinfo);
/* Initialize the marker writer ... bit of a crock to do it here. */
jinit_marker_writer(cinfo);

View File

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander.
* Copyright (C) 2009-2011, 2014-2016, 2018-2019, D. R. Commander.
* Copyright (C) 2015, Matthieu Darbois.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
@ -356,6 +356,8 @@ dump_buffer(working_state *state)
put_buffer = (put_buffer << size) | code; \
}
#if SIZEOF_SIZE_T != 8 && !defined(_WIN64)
#define CHECKBUF15() { \
if (put_bits > 15) { \
EMIT_BYTE() \
@ -363,6 +365,8 @@ dump_buffer(working_state *state)
} \
}
#endif
#define CHECKBUF31() { \
if (put_bits > 31) { \
EMIT_BYTE() \

View File

@ -492,8 +492,8 @@ prepare_for_pass(j_compress_ptr cinfo)
*/
master->pass_type = output_pass;
master->pass_number++;
/*FALLTHROUGH*/
#endif
/*FALLTHROUGH*/
case output_pass:
/* Do a data-output pass. */
/* We need not repeat per-scan setup if prior optimization pass did it. */

View File

@ -1,5 +1,5 @@
/* libjpeg-turbo build number */
#define BUILD "20181004"
#define BUILD "20190905"
/* Need to use Mozilla-specific function inlining. */
#include "mozilla/Attributes.h"
@ -9,7 +9,7 @@
#define PACKAGE_NAME "libjpeg-turbo"
/* Version number of package */
#define VERSION "2.0.0"
#define VERSION "2.0.3"
/* The size of `size_t', as computed by sizeof. */
#ifdef HAVE_64BIT_BUILD

View File

@ -592,7 +592,7 @@ ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Declarations for ordered dithering
*
* We use a 4x4 ordered dither array packed into 32 bits. This array is
* sufficent for dithering RGB888 to RGB565.
* sufficient for dithering RGB888 to RGB565.
*/
#define DITHER_MASK 0x3

View File

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1997, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2009-2011, 2016, 2018, D. R. Commander.
* Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -589,7 +589,11 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
/* Convert DC difference to actual value, update last_dc_val */
int ci = cinfo->MCU_membership[blkn];
s += state.last_dc_val[ci];
/* This is really just
* s += state.last_dc_val[ci];
* It is written this way in order to shut up UBSan.
*/
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
state.last_dc_val[ci] = s;
if (block) {
/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
@ -684,7 +688,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
if (entropy->dc_needed[blkn]) {
int ci = cinfo->MCU_membership[blkn];
s += state.last_dc_val[ci];
s = (int)((unsigned int)s + (unsigned int)state.last_dc_val[ci]);
state.last_dc_val[ci] = s;
if (block)
(*block)[0] = (JCOEF)s;

View File

@ -429,8 +429,6 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
#define PACK_TWO_PIXELS_LE(l, r) ((r << 16) | l)
#define PACK_TWO_PIXELS_BE(l, r) ((l << 16) | r)
#define PACK_NEED_ALIGNMENT(ptr) (((size_t)(ptr)) & 3)
#define WRITE_TWO_PIXELS_LE(addr, pixels) { \
((INT16 *)(addr))[0] = (INT16)(pixels); \
((INT16 *)(addr))[1] = (INT16)((pixels) >> 16); \
@ -448,7 +446,7 @@ h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
/* Declarations for ordered dithering
*
* We use a 4x4 ordered dither array packed into 32 bits. This array is
* sufficent for dithering RGB888 to RGB565.
* sufficient for dithering RGB888 to RGB565.
*/
#define DITHER_MASK 0x3

View File

@ -8,6 +8,7 @@
* Copyright (C) 2010, 2015-2016, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California.
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2019, Arm Limited.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -315,9 +316,9 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
JSAMPARRAY output_data = *output_data_ptr;
JSAMPROW inptr0, inptr1, outptr;
#if BITS_IN_JSAMPLE == 8
int thiscolsum;
int thiscolsum, bias;
#else
JLONG thiscolsum;
JLONG thiscolsum, bias;
#endif
JDIMENSION colctr;
int inrow, outrow, v;
@ -327,15 +328,18 @@ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
for (v = 0; v < 2; v++) {
/* inptr0 points to nearest input row, inptr1 points to next nearest */
inptr0 = input_data[inrow];
if (v == 0) /* next nearest is row above */
if (v == 0) { /* next nearest is row above */
inptr1 = input_data[inrow - 1];
else /* next nearest is row below */
bias = 1;
} else { /* next nearest is row below */
inptr1 = input_data[inrow + 1];
bias = 2;
}
outptr = output_data[outrow++];
for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
*outptr++ = (JSAMPLE)((thiscolsum + 1) >> 2);
*outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
}
}
inrow++;

View File

@ -154,7 +154,7 @@ typedef struct {
*/
boolean is_padded; /* is the colorindex padded for odither? */
int Ncolors[MAX_Q_COMPS]; /* # of values alloced to each component */
int Ncolors[MAX_Q_COMPS]; /* # of values allocated to each component */
/* Variables for ordered dithering */
int row_index; /* cur row's vertical index in dither matrix */

View File

@ -4,7 +4,7 @@
* This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
* libjpeg-turbo Modifications:
* Copyright (C) 2010, 2012-2018, D. R. Commander.
* Copyright (C) 2010, 2012-2019, D. R. Commander.
* For conditions of distribution and use, see the accompanying README.ijg
* file.
*
@ -36,7 +36,7 @@
*/
#define JCOPYRIGHT \
"Copyright (C) 2009-2018 D. R. Commander\n" \
"Copyright (C) 2009-2019 D. R. Commander\n" \
"Copyright (C) 2011-2016 Siarhei Siamashka\n" \
"Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
"Copyright (C) 2015 Intel Corporation\n" \
@ -49,4 +49,4 @@
"Copyright (C) 1991-2016 Thomas G. Lane, Guido Vollbeding"
#define JCOPYRIGHT_SHORT \
"Copyright (C) 1991-2018 The libjpeg-turbo Project and many others"
"Copyright (C) 1991-2019 The libjpeg-turbo Project and many others"

View File

@ -5,6 +5,7 @@
* Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, D. R. Commander.
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
* Copyright (C) 2019, Google LLC.
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -30,7 +31,7 @@
static unsigned int simd_support = ~0;
static unsigned int simd_huffman = 1;
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
@ -105,7 +106,7 @@ init_simd(void)
#ifndef NO_GETENV
char *env = NULL;
#endif
#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
#if !defined(__ARM_NEON__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__))
int bufsize = 1024; /* an initial guess for the line buffer size limit */
#endif

View File

@ -63,7 +63,7 @@ _\fname:
trn2 \x1\literal, \xi\literal, \x1\literal
.endm
/* Transpose elements of 2 differnet registers */
/* Transpose elements of 2 different registers */
.macro transpose x0, x1, xi, xilen, literal
mov \xi\xilen, \x0\xilen
trn1 \x0\literal, \x0\literal, \x1\literal

View File

@ -329,6 +329,8 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
add LUT, 16*SIZEOF_INT
dec K
jnz .BLOOP16
test LEN, 15
je .PADDING
.ELOOP16:
mov LENEND, LEN
and LENEND, 7

View File

@ -51,35 +51,14 @@ EXTN(jpeg_simd_cpu_support):
xor eax, edx
jz near .return ; CPUID is not supported
; Check for MMX instruction support
; Check whether CPUID leaf 07H is supported
; (leaf 07H is used to check for AVX2 instruction support)
xor eax, eax
cpuid
test eax, eax
jz near .return
xor eax, eax
inc eax
cpuid
mov eax, edx ; eax = Standard feature flags
test eax, 1<<23 ; bit23:MMX
jz short .no_mmx
or edi, byte JSIMD_MMX
.no_mmx:
test eax, 1<<25 ; bit25:SSE
jz short .no_sse
or edi, byte JSIMD_SSE
.no_sse:
test eax, 1<<26 ; bit26:SSE2
jz short .no_sse2
or edi, byte JSIMD_SSE2
.no_sse2:
; Check for level 7 support
xor eax, eax
cpuid
cmp eax, 7
jl short .no_avx2
jl short .no_avx2 ; Maximum leaf < 07H
; Check for AVX2 instruction support
mov eax, 7
@ -93,10 +72,8 @@ EXTN(jpeg_simd_cpu_support):
mov eax, 1
xor ecx, ecx
cpuid
test ecx, 1<<26
jz short .no_avx2 ; O/S does not support XSAVE
test ecx, 1<<27
jz short .no_avx2 ; O/S does not support OSXSAVE
jz short .no_avx2 ; O/S does not support XSAVE
test ecx, 1<<28
jz short .no_avx2 ; CPU does not support AVX2
@ -110,6 +87,26 @@ EXTN(jpeg_simd_cpu_support):
or edi, JSIMD_AVX2
.no_avx2:
; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
xor eax, eax
inc eax
cpuid
mov eax, edx ; eax = Standard feature flags
; Check for MMX instruction support
test eax, 1<<23 ; bit23:MMX
jz short .no_mmx
or edi, byte JSIMD_MMX
.no_mmx:
test eax, 1<<25 ; bit25:SSE
jz short .no_sse
or edi, byte JSIMD_SSE
.no_sse:
test eax, 1<<26 ; bit26:SSE2
jz short .no_sse2
or edi, byte JSIMD_SSE2
.no_sse2:
; Check for 3DNow! instruction support
mov eax, 0x80000000
cpuid

View File

@ -2,12 +2,13 @@
* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Authors: ZhuChen <zhuchen@loongson.cn>
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
* CaiWanwei <caiwanwei@loongson.cn>
* ZhangLixia <zhanglixia-hf@loongson.cn>
*
* Based on the x86 SIMD extension for IJG JPEG library
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
"$14", "memory"
);
} else {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmG = _mm_load_si64((__m64 *)&inptr[8]);
mmF = _mm_load_si64((__m64 *)&inptr[16]);
if (!(((long)inptr) & 7)) {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmG = _mm_load_si64((__m64 *)&inptr[8]);
mmF = _mm_load_si64((__m64 *)&inptr[16]);
} else {
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
}
inptr += RGB_PIXELSIZE * 8;
}
mmD = mmA;
@ -268,10 +275,17 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
: "$f0", "$f2", "$8", "$9", "$10", "$11", "$13", "memory"
);
} else {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmF = _mm_load_si64((__m64 *)&inptr[8]);
mmD = _mm_load_si64((__m64 *)&inptr[16]);
mmC = _mm_load_si64((__m64 *)&inptr[24]);
if (!(((long)inptr) & 7)) {
mmA = _mm_load_si64((__m64 *)&inptr[0]);
mmF = _mm_load_si64((__m64 *)&inptr[8]);
mmD = _mm_load_si64((__m64 *)&inptr[16]);
mmC = _mm_load_si64((__m64 *)&inptr[24]);
} else {
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
mmF = _mm_loadu_si64((__m64 *)&inptr[8]);
mmD = _mm_loadu_si64((__m64 *)&inptr[16]);
mmC = _mm_loadu_si64((__m64 *)&inptr[24]);
}
inptr += RGB_PIXELSIZE * 8;
}
mmB = mmA;

View File

@ -1,8 +1,9 @@
/*
* Loongson MMI optimizations for libjpeg-turbo
*
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
* All Rights Reserved.
* Copyright (C) 2019, D. R. Commander. All Rights Reserved.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
@ -41,7 +42,7 @@ typedef float __m32;
/********** Set Operations **********/
extern __inline __m64
extern __inline __m64 FUNCTION_ATTRIBS
_mm_setzero_si64(void)
{
return 0.0;
@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src)
asm("ldc1 %0, %1\n\t"
: "=f" (ret)
: "m" (*src)
: "memory"
);
return ret;
}
extern __inline __m64 FUNCTION_ATTRIBS
_mm_loadu_si64(const __m64 *src)
{
__m64 ret;
asm("gsldlc1 %0, 7(%1)\n\t"
"gsldrc1 %0, 0(%1)\n\t"
: "=f" (ret)
: "r" (src)
: "memory"
);
return ret;

View File

@ -692,8 +692,10 @@ jsimd_can_convsamp_float(void)
if (sizeof(ISLOW_MULT_TYPE) != 2)
return 0;
#ifndef __mips_soft_float
if (simd_support & JSIMD_DSPR2)
return 1;
#endif
return 0;
}
@ -709,7 +711,9 @@ GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
FAST_FLOAT *workspace)
{
#ifndef __mips_soft_float
jsimd_convsamp_float_dspr2(sample_data, start_col, workspace);
#endif
}
GLOBAL(int)
@ -805,8 +809,10 @@ jsimd_can_quantize_float(void)
if (sizeof(ISLOW_MULT_TYPE) != 2)
return 0;
#ifndef __mips_soft_float
if (simd_support & JSIMD_DSPR2)
return 1;
#endif
return 0;
}
@ -821,7 +827,9 @@ GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
FAST_FLOAT *workspace)
{
#ifndef __mips_soft_float
jsimd_quantize_float_dspr2(coef_block, divisors, workspace);
#endif
}
GLOBAL(int)

View File

@ -2810,6 +2810,8 @@ LEAF_DSPR2(jsimd_quantize_dspr2)
END(jsimd_quantize_dspr2)
#ifndef __mips_soft_float
/*****************************************************************************/
LEAF_DSPR2(jsimd_quantize_float_dspr2)
/*
@ -2890,6 +2892,8 @@ LEAF_DSPR2(jsimd_quantize_float_dspr2)
END(jsimd_quantize_float_dspr2)
#endif
/*****************************************************************************/
LEAF_DSPR2(jsimd_idct_2x2_dspr2)
@ -4110,6 +4114,8 @@ LEAF_DSPR2(jsimd_convsamp_dspr2)
END(jsimd_convsamp_dspr2)
#ifndef __mips_soft_float
/*****************************************************************************/
LEAF_DSPR2(jsimd_convsamp_float_dspr2)
/*
@ -4468,4 +4474,6 @@ LEAF_DSPR2(jsimd_convsamp_float_dspr2)
END(jsimd_convsamp_float_dspr2)
#endif
/*****************************************************************************/

View File

@ -322,6 +322,8 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
add LUT, 16*SIZEOF_INT
dec K
jnz .BLOOP16
test LEN, 15
je .PADDING
.ELOOP16:
test LEN, 8
jz .TRY7

View File

@ -38,20 +38,23 @@ EXTN(jpeg_simd_cpu_support):
xor rdi, rdi ; simd support flag
; Check for level 7 support
xor rax, rax
cpuid
; Assume that all x86-64 processors support SSE & SSE2 instructions
or rdi, JSIMD_SSE2
or rdi, JSIMD_SSE
; Check whether CPUID leaf 07H is supported
; (leaf 07H is used to check for AVX2 instruction support)
mov rax, 0
cpuid
cmp rax, 7
jl short .return
jl short .return ; Maximum leaf < 07H
; Check for AVX2 instruction support
mov rax, 7
xor rcx, rcx
cpuid
mov rax, rbx ; rax = Extended feature flags
test rax, 1<<5 ; bit5:AVX2
jz short .return
@ -59,10 +62,8 @@ EXTN(jpeg_simd_cpu_support):
mov rax, 1
xor rcx, rcx
cpuid
test rcx, 1<<26
jz short .return ; O/S does not support XSAVE
test rcx, 1<<27
jz short .return ; O/S does not support OSXSAVE
jz short .return ; O/S does not support XSAVE
test rcx, 1<<28
jz short .return ; CPU does not support AVX2