update source code

Signed-off-by: lizimian <lizimian@huawei.com>
This commit is contained in:
lizimian
2022-05-23 20:52:55 +08:00
parent 86fd1eab80
commit 91b197a3de
122 changed files with 17236 additions and 6955 deletions
@@ -1,45 +1,43 @@
@echo off
rem *************************************************************************************************
rem usage:
rem AutoBuildForWPAndWindows.bat % Configuration %
rem AutoBuildForWindows.bat Configuration [-winsdk_version=winsdk_version] [-vc_version=vc_version]
rem --For debug version:
rem Win32-C-Only: AutoBuildForWPAndWindows.bat Win32-Debug-C
rem Win32-ASM: AutoBuildForWPAndWindows.bat Win32-Debug-ASM
rem Win64-C-Only: AutoBuildForWPAndWindows.bat Win64-Debug-C
rem Win64-ASM: AutoBuildForWPAndWindows.bat Win64-Debug-ASM
rem ARM-C-Only(WP8): AutoBuildForWPAndWindows.bat ARM-Debug-C
rem ARM-ASM(WP8): AutoBuildForWPAndWindows.bat ARM-Debug-ASM
rem Win32-C-Only: AutoBuildForWindows.bat Win32-Debug-C
rem Win32-ASM: AutoBuildForWindows.bat Win32-Debug-ASM
rem Win64-C-Only: AutoBuildForWindows.bat Win64-Debug-C
rem Win64-ASM: AutoBuildForWindows.bat Win64-Debug-ASM
rem ARM64-C-Only: AutoBuildForWindows.bat ARM64-Debug-C
rem ARM64-ASM: AutoBuildForWindows.bat ARM64-Debug-ASM
rem --For release version:
rem Win32-C-Only: AutoBuildForWPAndWindows.bat Win32-Release-C
rem Win32-ASM: AutoBuildForWPAndWindows.bat Win32-Release-ASM
rem Win64-C-Only: AutoBuildForWPAndWindows.bat Win64-Release-C
rem Win64-ASM(WP8): AutoBuildForWPAndWindows.bat Win64-Release-ASM
rem ARM-C-Only(WP8): AutoBuildForWPAndWindows.bat ARM-Release-C
rem ARM-ASM(WP8): AutoBuildForWPAndWindows.bat ARM-Release-ASM
rem Win32-C-Only: AutoBuildForWindows.bat Win32-Release-C
rem Win32-ASM: AutoBuildForWindows.bat Win32-Release-ASM
rem Win64-C-Only: AutoBuildForWindows.bat Win64-Release-C
rem Win64-ASM: AutoBuildForWindows.bat Win64-Release-ASM
rem ARM64-C-Only: AutoBuildForWindows.bat ARM64-Release-C
rem ARM64-ASM: AutoBuildForWindows.bat ARM64-Release-ASM
rem --For debug and release version:
rem Win32-C-Only: AutoBuildForWPAndWindows.bat Win32-All-C
rem Win32-ASM: AutoBuildForWPAndWindows.bat Win32-All-ASM
rem Win64-C-Only: AutoBuildForWPAndWindows.bat Win64-All-C
rem Win64-ASM: AutoBuildForWPAndWindows.bat Win64-All-ASM
rem ARM-C-Only(WP8): AutoBuildForWPAndWindows.bat ARM-All-C
rem ARM-ASM(WP8): AutoBuildForWPAndWindows.bat ARM-All-ASM
rem Win32-C-Only: AutoBuildForWindows.bat Win32-All-C
rem Win32-ASM: AutoBuildForWindows.bat Win32-All-ASM
rem Win64-C-Only: AutoBuildForWindows.bat Win64-All-C
rem Win64-ASM: AutoBuildForWindows.bat Win64-All-ASM
rem ARM64-C-Only: AutoBuildForWindows.bat ARM64-All-C
rem ARM64-ASM: AutoBuildForWindows.bat ARM64-All-ASM
rem --For default:
rem AutoBuildForWPAndWindows.bat
rem ARM-All-ASM(WP8)
rem AutoBuildForWindows.bat
rem Win32-Release-ASM
rem
rem --lib/dll files will be copied to folder .\bin
rem --win32 folder bin\i386*
rem --win64 folder bin\x86_64*
rem --arm folder bin\arm*
rem --win32 folder bin\Win32
rem --win64 folder bin\x64
rem --arm64 folder bin\ARM64
rem
rem [winsdk_version] : full Windows 10 SDK number (e.g. 10.0.17763.0)
rem [vc_version] : Specify a VC++ version
rem VC16 for VC++ 2019
rem VC15 for VC++ 2017
rem
rem Environment:
rem ----for windows phone, Visual studio with update 3 or later is needed
rem ----gas-preprocessor(windows phone build only)
rem --you can clone it from git://git.libav.org/gas-preprocessor.git
rem --for more detail, please refer to https://git.libav.org/?p=gas-preprocessor.git
rem -- and then set gas-preprocessor path to the %GasScriptPath% variable in this script
rem or just copy to VC2013 bin's path,you can refer to variable %VC12Path%
rem
rem ----MinGW
rem --install MinGW tools
rem --more detail, please refer to http://www.mingw.org/
@@ -47,7 +45,7 @@ rem
rem 2015/03/15 huashi@cisco.com
rem *************************************************************************************************
set WP8Flag=0
set "OPENH264_BUILD_ARGS_LIST=%*"
call :BasicSetting
call :PathSetting
call :SetBuildOption %1
@@ -55,6 +53,7 @@ if not %ERRORLEVEL%==0 (
echo not suppot option!
goto :ErrorReturn
)
call :ParseAdditionalArgs
call :EnvSetting %1
call :BuildResultInit
call :RunBuild
@@ -97,7 +96,25 @@ goto :EOF
:EnvSetting
set MinGWPath=C:\MinGW\bin
set MsysPath=C:\MinGW\msys\1.0\bin
set VC16BuildToolsPath=C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC
set VC16CommunityPath=C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC
set VC16ProfessionalPath=C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC
set VC16EnterprisePath=C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC
if exist "%VC16BuildToolsPath%" set VC16PATH=%VC16BuildToolsPath%
if exist "%VC16CommunityPath%" set VC16PATH=%VC16CommunityPath%
if exist "%VC16ProfessionalPath%" set VC16PATH=%VC16ProfessionalPath%
if exist "%VC16EnterprisePath%" set VC16PATH=%VC16EnterprisePath%
set GitPath=C:\Program Files (x86)\Git\bin
set VC15BuildToolsPath=C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC
set VC15CommunityPath=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC
set VC15ProfessionalPath=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\VC
set VC15EnterprisePath=C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC
if exist "%VC15BuildToolsPath%" set VC15PATH=%VC15BuildToolsPath%
if exist "%VC15CommunityPath%" set VC15PATH=%VC15CommunityPath%
if exist "%VC15ProfessionalPath%" set VC15PATH=%VC15ProfessionalPath%
if exist "%VC15EnterprisePath%" set VC15PATH=%VC15EnterprisePath%
set VC14Path=C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC
set VC12Path=C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC
@@ -105,55 +122,58 @@ goto :EOF
set VC10Path=C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC
set VC9Path=C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC
set VC12ArmLib01=C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\lib\store\arm
set VC12ArmLib02=C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\lib\arm
set WP8KitLib=C:\Program Files (x86)\Windows Phone Kits\8.1\lib\arm
if exist "%VC9Path%" set VCPATH=%VC9Path%
if exist "%VC10Path%" set VCPATH=%VC10Path%
if exist "%VC11Path%" set VCPATH=%VC11Path%
if exist "%VC12Path%" set VCPATH=%VC12Path%
if exist "%VC14Path%" set VCPATH=%VC14Path%
if %WP8Flag%==1 set VCPATH=%VC12Path%
set VCVARSPATH=%VCPATH%
if exist "%VC15Path%" set VCVARSPATH=%VC15Path%\Auxiliary\Build
if exist "%VC16Path%" set VCVARSPATH=%VC16Path%\Auxiliary\Build
if /I "%OPENH264_VC_VERSION%" == "VC15" (
set VCPATH=
set "VCVARSPATH=%VC15Path%\Auxiliary\Build"
) else if /I "%OPENH264_VC_VERSION%" == "VC12" (
set "VCPATH=%VC12Path%"
set "VCVARSPATH=%VCPATH%"
)
set GasScriptPath=%VCPATH%\bin
if "%vArcType%" =="i386" set PATH=%MinGWPath%;%MsysPath%;%VCPATH%\bin;%GitPath%;%PATH%
if "%vArcType%" =="x86_64" set PATH=%MinGWPath%;%MsysPath%;%VCPATH%\bin;%GitPath%;%PATH%
if "%vArcType%" =="arm" set PATH=%MinGWPath%;%MsysPath%;%VCPATH%\bin;%GitPath%;%PATH%
rem if "%vArcType%" =="arm" set PATH=C:\MinGW\bin;C:\MinGW\msys\1.0\bin;C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin;C:\Program Files (x86)\Git\bin;%PATH%
if "%VCPATH%" NEQ "" (
set "VCPATH_BIN=%VCPATH%\bin"
)
if "%vArcType%" == "arm64" (
set "CLANG_BIN=%VC16PATH%\tools\llvm\bin\"
)
if "%vArcType%" =="i386" call "%VCPATH%\vcvarsall.bat" x86
if "%vArcType%" =="x86_64" call "%VCPATH%\vcvarsall.bat" x64
if "%vArcType%" =="arm" call "%VCPATH%\vcvarsall.bat" x86_arm
if %WP8Flag%==1 call :WPSetting
if "%vArcType%" =="i386" set "PATH=%MinGWPath%;%MsysPath%;%VCPATH_BIN%;%GitPath%;%PATH%"
if "%vArcType%" =="x86_64" set "PATH=%MinGWPath%;%MsysPath%;%VCPATH_BIN%;%GitPath%;%PATH%"
if "%vArcType%" =="arm64" set "PATH=%MinGWPath%;%MsysPath%;%VCPATH_BIN%;%GitPath%;%CLANG_BIN%;%PATH%"
if "%vArcType%" =="i386" call "%VCVARSPATH%\vcvarsall.bat" x86 %OPENH264_WINSDK_VERSION%
if "%vArcType%" =="x86_64" call "%VCVARSPATH%\vcvarsall.bat" x64 %OPENH264_WINSDK_VERSION%
if "%vArcType%" =="arm64" (
if "%vEnable64BitFlag%" == "Yes" (
call "%VCVARSPATH%\vcvarsall.bat" amd64_arm64 %OPENH264_WINSDK_VERSION%
)
if "%vEnable64BitFlag%" == "No" (
call "%VCVARSPATH%\vcvarsall.bat" x86_arm64 %OPENH264_WINSDK_VERSION%
)
)
echo PATH is %PATH%
echo LIB is %LIB%
goto :EOF
:WPSetting
set LIB=%VC12ArmLib01%;%VC12ArmLib02%;%WP8KitLib%
echo LIB setting for wp8 is:
echo %LIB%
if not exist "%VC12Path%" (
echo VC12 does not exist,
echo ******************************************
echo VC12 does not exist,
echo which is needed for windows phone
echo ******************************************
goto :ErrorReturn
)
goto :EOF
:SetBuildOption
if "aaa%1"=="aaa" (
set aConfigurationList=Debug Release
set vArcType=arm
set vOSType=msvc-wp
set aConfigurationList=Release
set vArcType=i386
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=Yes
set WP8Flag=1
echo default setting
) else if "%1"=="Win32-Debug-C" (
set aConfigurationList=Debug
@@ -183,22 +203,20 @@ goto :EOF
set vEnable64BitFlag=Yes
set vASMFlag=No
echo Win64-Release-C setting
) else if "%1"=="ARM-Debug-C" (
) else if "%1"=="ARM64-Debug-C" (
set aConfigurationList=Debug
set vArcType=arm
set vOSType=msvc-wp
set vArcType=arm64
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=No
set WP8Flag=1
echo ARM-Debug-C setting
) else if "%1"=="ARM-Release-C" (
set aConfigurationList=Debug Release
set vArcType=arm
set vOSType=msvc-wp
echo ARM64-Debug-C setting
) else if "%1"=="ARM64-Release-C" (
set aConfigurationList=Release
set vArcType=arm64
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=No
set WP8Flag=1
echo ARM-Release-C setting
echo ARM64-Release-C setting
) else if "%1"=="Win32-All-C" (
set aConfigurationList=Debug Release
set vArcType=i386
@@ -213,14 +231,13 @@ goto :EOF
set vEnable64BitFlag=Yes
set vASMFlag=No
echo All-C setting
) else if "%1"=="ARM-All-C" (
) else if "%1"=="ARM64-All-C" (
set aConfigurationList=Debug Release
set vArcType=arm
set vOSType=msvc-wp
set vArcType=arm64
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=No
set WP8Flag=1
echo ARM-All-C setting
echo ARM64-All-C setting
) else if "%1"=="Win32-Debug-ASM" (
set aConfigurationList=Debug
set vArcType=i386
@@ -249,22 +266,20 @@ goto :EOF
set vEnable64BitFlag=Yes
set vASMFlag=Yes
echo Win64-Release-ASM setting
) else if "%1"=="ARM-Debug-ASM" (
) else if "%1"=="ARM64-Debug-ASM" (
set aConfigurationList=Debug
set vArcType=arm
set vOSType=msvc-wp
set vArcType=arm64
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=Yes
set WP8Flag=1
echo ARM-Debug-ASM setting
) else if "%1"=="ARM-Release-ASM" (
echo ARM64-Debug-ASM setting
) else if "%1"=="ARM64-Release-ASM" (
set aConfigurationList=Release
set vArcType=arm
set vOSType=msvc-wp
set vArcType=arm64
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=Yes
set WP8Flag=1
echo ARM-Release-ASM setting
echo ARM64-Release-ASM setting
) else if "%1"=="Win32-All-ASM" (
set aConfigurationList=Debug Release
set vArcType=i386
@@ -279,14 +294,13 @@ goto :EOF
set vEnable64BitFlag=Yes
set vASMFlag=Yes
echo All-ASM setting
) else if "%1"=="ARM-All-ASM" (
) else if "%1"=="ARM64-All-ASM" (
set aConfigurationList=Debug Release
set vArcType=arm
set vOSType=msvc-wp
set vArcType=arm64
set vOSType=msvc
set vEnable64BitFlag=No
set vASMFlag=Yes
set WP8Flag=1
echo ARM-All-ASM setting
echo ARM64-All-ASM setting
) else (
call :help
goto :ErrorReturn
@@ -351,31 +365,36 @@ rem ***********************************************
:help
echo *******************************************************************************
echo usage:
echo AutoBuildForWPAndWindows.bat % Configuration %
echo AutoBuildForWindows.bat Configuration [-winsdk_version=winsdk_version] [-vc_version=vc_version]
echo --For debug version:
echo Win32-C-Only: AutoBuildForWPAndWindows.bat Win32-Debug-C
echo Win32-ASM: AutoBuildForWPAndWindows.bat Win32-Debug-ASM
echo Win64-C-Only: AutoBuildForWPAndWindows.bat Win64-Debug-C
echo Win64-ASM: AutoBuildForWPAndWindows.bat Win64-Debug-ASM
echo ARM-C-Only(WP8): AutoBuildForWPAndWindows.bat ARM-Debug-C
echo ARM-ASM(WP8): AutoBuildForWPAndWindows.bat ARM-Debug-ASM
echo Win32-C-Only: AutoBuildForWindows.bat Win32-Debug-C
echo Win32-ASM: AutoBuildForWindows.bat Win32-Debug-ASM
echo Win64-C-Only: AutoBuildForWindows.bat Win64-Debug-C
echo Win64-ASM: AutoBuildForWindows.bat Win64-Debug-ASM
echo ARM64-C-Only: AutoBuildForWindows.bat ARM64-Debug-C
echo ARM64-ASM: AutoBuildForWindows.bat ARM64-Debug-ASM
echo --For release version:
echo Win32-C-Only: AutoBuildForWPAndWindows.bat Win32-Release-C
echo Win32-ASM: AutoBuildForWPAndWindows.bat Win32-Release-ASM
echo Win64-C-Only: AutoBuildForWPAndWindows.bat Win64-Release-C
echo Win64-ASM: AutoBuildForWPAndWindows.bat Win64-Release-ASM
echo ARM-C-Only(WP8): AutoBuildForWPAndWindows.bat ARM-Release-C
echo ARM-ASM(WP8): AutoBuildForWPAndWindows.bat ARM-Release-ASM
echo Win32-C-Only: AutoBuildForWindows.bat Win32-Release-C
echo Win32-ASM: AutoBuildForWindows.bat Win32-Release-ASM
echo Win64-C-Only: AutoBuildForWindows.bat Win64-Release-C
echo Win64-ASM: AutoBuildForWindows.bat Win64-Release-ASM
echo ARM64-C-Only: AutoBuildForWindows.bat ARM64-Release-C
echo ARM64-ASM: AutoBuildForWindows.bat ARM64-Release-ASM
echo --For debug and release version:
echo Win32-C-Only: AutoBuildForWPAndWindows.bat Win32-All-C
echo Win32-ASM: AutoBuildForWPAndWindows.bat Win32-All-ASM
echo Win64-C-Only: AutoBuildForWPAndWindows.bat Win64-All-C
echo Win64-ASM: AutoBuildForWPAndWindows.bat Win64-All-ASM
echo ARM-C-Only(WP8): AutoBuildForWPAndWindows.bat ARM-All-C
echo ARM-ASM(WP8): AutoBuildForWPAndWindows.bat ARM-All-ASM
echo Win32-C-Only: AutoBuildForWindows.bat Win32-All-C
echo Win32-ASM: AutoBuildForWindows.bat Win32-All-ASM
echo Win64-C-Only: AutoBuildForWindows.bat Win64-All-C
echo Win64-ASM: AutoBuildForWindows.bat Win64-All-ASM
echo ARM64-C-Only: AutoBuildForWindows.bat ARM64-All-C
echo ARM64-ASM: AutoBuildForWindows.bat ARM64-All-ASM
echo --For default:
echo AutoBuildForWPAndWindows.bat
echo ARM-All-ASM(WP8)
echo AutoBuildForWindows.bat
echo Win32-Release-ASM
echo [winsdk_version] : full Windows 10 SDK number (e.g. 10.0.17763.0)
echo [vc_version] : Specify a VC++ version
echo VC16 for VC++ 2019
echo VC15 for VC++ 2017
echo *******************************************************************************
goto :EOF
@@ -384,12 +403,12 @@ rem ***********************************************
:Build
set vConfiguration=%1
cd %RootDir%
echo bash -c "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% clean"
echo bash -c "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration%"
echo bash -c "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% plugin"
bash -c "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% clean"
bash -c "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration%"
bash -c "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% plugin"
echo "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% clean"
echo "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% %NATIVE_OPTIONS%"
echo "make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% %NATIVE_OPTIONS% plugin"
make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% clean
make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% %NATIVE_OPTIONS%
make OS=%vOSType% ARCH=%vArcType% USE_ASM=%vASMFlag% BUILDTYPE=%vConfiguration% %NATIVE_OPTIONS% plugin
if not %ERRORLEVEL%==0 (
set BuildFlag=1
)
@@ -400,8 +419,8 @@ goto :EOF
set vConfiguration=%1
set vBuildOption=%2
cd %RootDir%
if "%vArcType%"=="arm" (
set vBinDirName=ARM
if "%vArcType%"=="arm64" (
set vBinDirName=ARM64
) else if "%vArcType%"=="i386" (
set vBinDirName=Win32
) else (
@@ -421,17 +440,35 @@ goto :EOF
cd
set DestDir=bin/%vBinDirName%/%vConfiguration%
echo DestDir is %DestDir%
if "%vOSType%"=="msvc-wp" (
set aFileList=%DllFile% %LibFile% %PDBFile% %UTDllFile%
) else (
set aFileList=%DllFile% %LibFile% %PDBFile% %UTBinFile% %EncBinFile% %DecBinFile%
)
set aFileList=%DllFile% %LibFile% %PDBFile% %UTBinFile% %EncBinFile% %DecBinFile%
for %%k in (%aFileList%) do (
bash -c "cp -f %%k %DestDir%"
)
cd %WorkingDir%
goto :EOF
:ParseAdditionalArgs
for /F "tokens=1,* delims= " %%a in ("%OPENH264_BUILD_ARGS_LIST%") do (
if "%%a" == "--" (
set "NATIVE_OPTIONS=%%b"
set "OPENH264_BUILD_ARGS_LIST="
) else (
call :ParseArgument %%a
set "OPENH264_BUILD_ARGS_LIST=%%b"
)
goto :ParseAdditionalArgs
)
goto :EOF
:ParseArgument
if /I "%1" == "-winsdk_version" (
set OPENH264_WINSDK_VERSION=%2
)
if /I "%1" == "-vc_version" (
set OPENH264_VC_VERSION=%2
)
goto :EOF
:ErrorReturn
endlocal
exit /b 2
+42 -6
View File
@@ -13,7 +13,7 @@ endif
endif
#for arm
ifneq ($(filter-out arm64, $(filter arm%, $(ARCH))),)
ifneq ($(filter-out arm64 arm64e, $(filter arm%, $(ARCH))),)
ifeq ($(USE_ASM), Yes)
ASM_ARCH = arm
ASMFLAGS += -I$(SRC_PATH)codec/common/arm/
@@ -22,7 +22,7 @@ endif
endif
#for arm64
ifneq ($(filter arm64 aarch64, $(ARCH)),)
ifneq ($(filter arm64 aarch64 arm64e, $(ARCH)),)
ifeq ($(USE_ASM), Yes)
ASM_ARCH = arm64
ASMFLAGS += -I$(SRC_PATH)codec/common/arm64/
@@ -30,14 +30,50 @@ CFLAGS += -DHAVE_NEON_AARCH64
endif
endif
#for loongson
#for mips
ifneq ($(filter mips mips64, $(ARCH)),)
ifeq ($(USE_ASM), Yes)
ENABLE_MMI=Yes
ENABLE_MSA=Yes
ASM_ARCH = mips
ASMFLAGS += -I$(SRC_PATH)codec/common/mips/
LOONGSON3A = $(shell g++ -dM -E - < /dev/null | grep '_MIPS_TUNE ' | cut -f 3 -d " ")
ifeq ($(LOONGSON3A), "loongson3a")
CFLAGS += -DHAVE_MMI
#mmi
ifeq ($(ENABLE_MMI), Yes)
ENABLE_MMI = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) mmi)
ifeq ($(ENABLE_MMI), Yes)
CFLAGS += -DHAVE_MMI -march=loongson3a
endif
endif
#msa
ifeq ($(ENABLE_MSA), Yes)
ENABLE_MSA = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) msa)
ifeq ($(ENABLE_MSA), Yes)
CFLAGS += -DHAVE_MSA -mmsa
endif
endif
endif
endif
#for loongarch
ifneq ($(filter loongarch64, $(ARCH)),)
ifeq ($(USE_ASM), Yes)
ENABLE_LSX=Yes
ENABLE_LASX=Yes
ASM_ARCH = loongarch
ASMFLAGS += -I$(SRC_PATH)codec/common/loongarch/
#lsx
ifeq ($(ENABLE_LSX), Yes)
ENABLE_LSX = $(shell $(SRC_PATH)build/loongarch-simd-check.sh $(CC) lsx)
ifeq ($(ENABLE_LSX), Yes)
CFLAGS += -DHAVE_LSX -mlsx
endif
endif
#lasx
ifeq ($(ENABLE_LASX), Yes)
ENABLE_LASX = $(shell $(SRC_PATH)build/loongarch-simd-check.sh $(CC) lasx)
ifeq ($(ENABLE_LASX), Yes)
CFLAGS += -DHAVE_LASX -mlasx
endif
endif
endif
endif
+36
View File
@@ -0,0 +1,36 @@
#!/bin/bash
# Copyright (c) 2021 Loongson Technology Corporation Limited
# Contributed by Xiwei Gu <guxiwei-hf@loongson.cn>
# Contributed by Lu Wang <wanglu@loongson.cn>
#
#***************************************************************************************
# This script is used in build/arch.mk for loongarch to detect the simd instructions:
# lsx, lasx (maybe more in the future).
#
# --usage:
# ./loongarch-simd-check.sh $(CC) lsx
# or ./loongarch-simd-check.sh $(CC) lasx
#
# date: 11/23/2021 Created
#***************************************************************************************
TMPC=$(mktemp tmp.XXXXXX.c)
TMPO=$(mktemp tmp.XXXXXX.o)
if [ $2 == "lsx" ]
then
echo "void main(void){ __asm__ volatile(\"vadd.b \$vr0, \$vr1, \$vr1\"); }" > $TMPC
$1 -mlsx $TMPC -o $TMPO &> /dev/null
if test -s $TMPO
then
echo "Yes"
fi
elif [ $2 == "lasx" ]
then
echo "void main(void){ __asm__ volatile(\"xvadd.b \$xr0, \$xr1, \$xr1\"); }" > $TMPC
$1 -mlasx $TMPC -o $TMPO &> /dev/null
if test -s $TMPO
then
echo "Yes"
fi
fi
rm -f $TMPC $TMPO
+32
View File
@@ -0,0 +1,32 @@
#!/bin/bash
#**********************************************************************************
# This script is using in build/arch.mk for mips to detect the simd instructions:
# mmi, msa (maybe more in the future).
#
# --usage:
# ./mips-simd-check.sh $(CC) mmi
# or ./mips-simd-check.sh $(CC) msa
#
# date: 10/17/2019 Created
#**********************************************************************************
TMPC=$(mktemp tmp.XXXXXX.c)
TMPO=$(mktemp tmp.XXXXXX.o)
if [ $2 == "mmi" ]
then
echo "void main(void){ __asm__ volatile(\"punpcklhw \$f0, \$f0, \$f0\"); }" > $TMPC
$1 -march=loongson3a $TMPC -o $TMPO &> /dev/null
if test -s $TMPO
then
echo "Yes"
fi
elif [ $2 == "msa" ]
then
echo "void main(void){ __asm__ volatile(\"addvi.b \$w0, \$w1, 1\"); }" > $TMPC
$1 -mmsa $TMPC -o $TMPO &> /dev/null
if test -s $TMPO
then
echo "Yes"
fi
fi
rm -f $TMPC $TMPO
+68 -14
View File
@@ -119,12 +119,16 @@ for file in sfiles:
armfiles.append(file)
mipsfiles = []
for file in cfiles:
c = file.split('/')
if 'mips' in c:
mipsfiles.append(file)
c = file.split('/')
if 'mips' in c:
mipsfiles.append(file)
cfiles = [x for x in cfiles if x not in mipsfiles]
loongarchfiles = []
for file in cfiles:
c = file.split('/')
if 'loongarch' in c:
loongarchfiles.append(file)
cfiles = [x for x in cfiles if x not in loongarchfiles]
f = open(OUTFILE, "w")
@@ -181,15 +185,65 @@ if len(arm64files) > 0:
f.write("OBJS += $(%s_OBJSARM64)\n\n"%(PREFIX))
if len(mipsfiles) > 0:
f.write("%s_ASM_MIPS_SRCS=\\\n"%(PREFIX))
for c in mipsfiles:
f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
f.write("\n")
f.write("%s_OBJSMIPS += $(%s_ASM_MIPS_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
f.write("ifeq ($(ASM_ARCH), mips)\n")
f.write("%s_OBJS += $(%s_OBJSMIPS)\n"%(PREFIX,PREFIX))
f.write("endif\n")
f.write("OBJS += $(%s_OBJSMIPS)\n\n"%(PREFIX))
mmifiles = []
for file in mipsfiles:
if '_mmi' in file:
mmifiles.append(file)
f.write("%s_ASM_MIPS_MMI_SRCS=\\\n"%(PREFIX))
for c in mmifiles:
f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
f.write("\n")
f.write("%s_OBJSMIPS_MMI += $(%s_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX))
msafiles = []
for file in mipsfiles:
if '_msa' in file:
msafiles.append(file)
f.write("%s_ASM_MIPS_MSA_SRCS=\\\n"%(PREFIX))
for c in msafiles:
f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
f.write("\n")
f.write("%s_OBJSMIPS_MSA += $(%s_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
f.write("ifeq ($(ASM_ARCH), mips)\n")
f.write("ifeq ($(ENABLE_MMI), Yes)\n")
f.write("%s_OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX,PREFIX))
f.write("endif\n")
f.write("ifeq ($(ENABLE_MSA), Yes)\n")
f.write("%s_OBJS += $(%s_OBJSMIPS_MSA)\n"%(PREFIX,PREFIX))
f.write("endif\n")
f.write("endif\n")
f.write("OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX))
f.write("OBJS += $(%s_OBJSMIPS_MSA)\n\n"%(PREFIX))
if len(loongarchfiles) > 0:
lsxfiles = []
for file in loongarchfiles:
if '_lsx' in file:
lsxfiles.append(file)
f.write("%s_ASM_LOONGARCH_LSX_SRCS=\\\n"%(PREFIX))
for c in lsxfiles:
f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
f.write("\n")
f.write("%s_OBJSLOONGARCH_LSX += $(%s_ASM_LOONGARCH_LSX_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX))
lasxfiles = []
for file in loongarchfiles:
if '_lasx' in file:
lasxfiles.append(file)
f.write("%s_ASM_LOONGARCH_LASX_SRCS=\\\n"%(PREFIX))
for c in lasxfiles:
f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
f.write("\n")
f.write("%s_OBJSLOONGARCH_LASX += $(%s_ASM_LOONGARCH_LASX_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
f.write("ifeq ($(ASM_ARCH), loongarch)\n")
f.write("ifeq ($(ENABLE_LSX), Yes)\n")
f.write("%s_OBJS += $(%s_OBJSLOONGARCH_LSX)\n"%(PREFIX,PREFIX))
f.write("endif\n")
f.write("ifeq ($(ENABLE_LASX), Yes)\n")
f.write("%s_OBJS += $(%s_OBJSLOONGARCH_LASX)\n"%(PREFIX,PREFIX))
f.write("endif\n")
f.write("endif\n")
f.write("OBJS += $(%s_OBJSLOONGARCH_LSX)\n"%(PREFIX))
f.write("OBJS += $(%s_OBJSLOONGARCH_LASX)\n\n"%(PREFIX))
f.write("OBJS += $(%s_OBJS)\n\n"%(PREFIX))
write_cpp_rule_pattern(f)
+14 -7
View File
@@ -12,15 +12,18 @@ ifeq ($(ASM_ARCH), arm)
CCAS = gas-preprocessor.pl -as-type armasm -force-thumb -- armasm
CCASFLAGS = -nologo -DHAVE_NEON -ignore 4509
endif
ifeq ($(ASM_ARCH), arm64)
CCAS = gas-preprocessor.pl -as-type armasm -arch aarch64 -- armasm64
CCASFLAGS = -nologo -DHAVE_NEON_AARCH64
endif
CC=cl
CXX=cl
AR=lib
CXX_O=-Fo$@
ifeq ($(ASM_ARCH), arm64)
CCAS = clang-cl
CCASFLAGS = -nologo -DHAVE_NEON_AARCH64 --target=arm64-windows
endif
# -D_VARIADIC_MAX=10 is required to fix building gtest on MSVC 2012, but
# since we don't (easily) know which version of MSVC we use here, we add
# it unconditionally. The same issue can also be worked around by adding
@@ -29,8 +32,8 @@ CXX_O=-Fo$@
CFLAGS += -nologo -W3 -EHsc -fp:precise -Zc:wchar_t -Zc:forScope -D_VARIADIC_MAX=10
CXX_LINK_O=-nologo -Fe$@
AR_OPTS=-nologo -out:$@
CFLAGS_OPT=-O2 -Ob1 -Oy- -Zi -GF -Gm- -GS -Gy -DNDEBUG
CFLAGS_DEBUG=-Od -Oy- -Zi -RTC1 -D_DEBUG
CFLAGS_OPT=-O2 -Ob1 -Oy- -Zi -FS -GF -GS -Gy -DNDEBUG
CFLAGS_DEBUG=-Od -Oy- -Zi -FS -RTC1 -D_DEBUG
CFLAGS_M32=
CFLAGS_M64=
LINK_LOCAL_DIR=
@@ -48,7 +51,11 @@ EXTRA_LIBRARY=$(PROJECT_NAME)_dll.lib
LDFLAGS += -link
SHLDFLAGS=-debug -map -opt:ref -opt:icf -def:$(SRC_PATH)openh264.def -implib:$(EXTRA_LIBRARY)
STATIC_LDFLAGS=
CODEC_UNITTEST_CFLAGS=-D_CRT_SECURE_NO_WARNINGS
CODEC_UNITTEST_CFLAGS+=-D_CRT_SECURE_NO_WARNINGS
ifneq ($(filter %86 x86_64, $(ARCH)),)
LDFLAGS += -cetcompat
endif
%.res: %.rc
$(QUIET_RC)rc -fo $@ $<
+16 -2
View File
@@ -44,11 +44,15 @@ SYSROOT = $(NDKROOT)/platforms/android-$(NDKLEVEL)/arch-$(ARCH)
CXX = $(TOOLCHAINPREFIX)g++
CC = $(TOOLCHAINPREFIX)gcc
AR = $(TOOLCHAINPREFIX)ar
CFLAGS += -DANDROID_NDK -fpic --sysroot=$(SYSROOT) -MMD -MP -fstack-protector-all
CFLAGS += -DANDROID_NDK -fpic --sysroot=$(SYSROOT) -MMD -MP
ifeq ($(USE_STACK_PROTECTOR), Yes)
CFLAGS += -fstack-protector-all
endif
CFLAGS += -isystem $(NDKROOT)/sysroot/usr/include -isystem $(NDKROOT)/sysroot/usr/include/$(TOOLCHAIN_NAME) -D__ANDROID_API__=$(NDKLEVEL)
CXXFLAGS += -fno-rtti -fno-exceptions
LDFLAGS += --sysroot=$(SYSROOT)
SHLDFLAGS = -Wl,--no-undefined -Wl,-z,relro -Wl,-z,now -Wl,-soname,lib$(PROJECT_NAME).so
UTSHLDFLAGS = -Wl,-soname,libut.so
ifeq ($(NDK_TOOLCHAIN_VERSION), clang)
HOST_OS = $(shell uname -s | tr [A-Z] [a-z])
@@ -70,9 +74,11 @@ ifeq ($(NDK_TOOLCHAIN_VERSION), clang)
CFLAGS += -target $(TARGET_NAME)
LDFLAGS += -target $(TARGET_NAME) -gcc-toolchain $(GCC_TOOLCHAIN_PATH)
LDFLAGS += -Wl,--exclude-libs,libgcc.a -Wl,--exclude-libs,libunwind.a
endif
# background reading: https://android.googlesource.com/platform/ndk/+/master/docs/BuildSystemMaintainers.md#unwinding
LDFLAGS += -Wl,--exclude-libs,libgcc.a -Wl,--exclude-libs,libunwind.a
ifneq ($(findstring /,$(CXX)),$(findstring \,$(CXX)))
ifneq ($(CXX),$(wildcard $(CXX)))
ifneq ($(CXX).exe,$(wildcard $(CXX).exe))
@@ -81,10 +87,18 @@ endif
endif
endif
ifeq ($(NDK_TOOLCHAIN_VERSION), clang)
STL_INCLUDES = \
-I$(NDKROOT)/sources/cxx-stl/llvm-libc++/include \
-I$(NDKROOT)/sources/cxx-stl/llvm-libc++abi/include
STL_LIB = \
$(NDKROOT)/sources/cxx-stl/llvm-libc++/libs/$(APP_ABI)/libc++_static.a
else
STL_INCLUDES = \
-I$(NDKROOT)/sources/cxx-stl/stlport/stlport
STL_LIB = \
$(NDKROOT)/sources/cxx-stl/stlport/libs/$(APP_ABI)/libstlport_static.a
endif
GTEST_INCLUDES = $(STL_INCLUDES)
CODEC_UNITTEST_INCLUDES = $(STL_INCLUDES)
+4 -1
View File
@@ -3,7 +3,10 @@ SHAREDLIBSUFFIX = so
SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX).$(FULL_VERSION)
SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIBSUFFIX).$(SHAREDLIB_MAJORVERSION)
SHLDFLAGS = -Wl,-soname,$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER)
CFLAGS += -fPIC -fstack-protector-all
CFLAGS += -fPIC
ifeq ($(USE_STACK_PROTECTOR), Yes)
CFLAGS += -fstack-protector-all
endif
LDFLAGS += -lpthread
STATIC_LDFLAGS += -lpthread -lm
ifeq ($(ASM_ARCH), x86)
+10 -3
View File
@@ -3,14 +3,21 @@ SHAREDLIB_DIR = $(PREFIX)/lib
SHAREDLIBSUFFIX = dylib
SHAREDLIBSUFFIXFULLVER=$(FULL_VERSION).$(SHAREDLIBSUFFIX)
SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIB_MAJORVERSION).$(SHAREDLIBSUFFIX)
CURRENT_VERSION := 2.0.0
COMPATIBILITY_VERSION := 2.0.0
CURRENT_VERSION := 2.2.0
COMPATIBILITY_VERSION := 2.2.0
SHLDFLAGS = -dynamiclib -twolevel_namespace -undefined dynamic_lookup \
-fno-common -headerpad_max_install_names -install_name \
$(SHAREDLIB_DIR)/$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER)
SHARED = -dynamiclib
SHARED += -current_version $(CURRENT_VERSION) -compatibility_version $(COMPATIBILITY_VERSION)
CFLAGS += -Wall -fPIC -MMD -MP -fstack-protector-all
CFLAGS += -Wall -fPIC -MMD -MP
ifeq ($(ARCH), arm64)
CFLAGS += -arch arm64
LDFLAGS += -arch arm64
endif
ifeq ($(USE_STACK_PROTECTOR), Yes)
CFLAGS += -fstack-protector-all
endif
ifeq ($(ASM_ARCH), x86)
ASMFLAGS += -DPREFIX
ifeq ($(ARCH), x86_64)
+4 -1
View File
@@ -3,7 +3,10 @@ SHAREDLIBSUFFIX = so
SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX).$(FULL_VERSION)
SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIBSUFFIX).$(SHAREDLIB_MAJORVERSION)
SHLDFLAGS = -Wl,-soname,$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER)
CFLAGS += -Wall -fno-strict-aliasing -fPIC -MMD -MP -fstack-protector-all
CFLAGS += -Wall -fno-strict-aliasing -fPIC -MMD -MP
ifeq ($(USE_STACK_PROTECTOR), Yes)
CFLAGS += -fstack-protector-all
endif
LDFLAGS += -lpthread
STATIC_LDFLAGS += -lpthread -lm
AR_OPTS = crD $@
+7 -1
View File
@@ -1,4 +1,4 @@
include $(SRC_PATH)build/x86-common.mk
include $(SRC_PATH)build/arch.mk
SHAREDLIB_DIR = $(PREFIX)/bin
SHAREDLIBSUFFIX = dll
SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX)
@@ -7,6 +7,7 @@ EXTRA_LIBRARY=$(LIBPREFIX)$(PROJECT_NAME).dll.a
SHLDFLAGS = -Wl,--out-implib,$(EXTRA_LIBRARY)
CFLAGS += -MMD -MP
LDFLAGS +=
ifeq ($(ASM_ARCH), x86)
ifeq ($(ARCH), x86_64)
ASMFLAGS += -f win64
ASMFLAGS_PLATFORM = -DWIN64
@@ -16,5 +17,10 @@ AR = x86_64-w64-mingw32-ar
else
ASMFLAGS += -f win32 -DPREFIX
endif
endif
ifeq ($(ASM_ARCH), arm)
CCAS = gas-preprocessor.pl -as-type clang -force-thumb -- $(CC)
CCASFLAGS = -DHAVE_NEON -mimplicit-it=always
endif
EXEEXT = .exe
+2 -2
View File
@@ -167,8 +167,8 @@ typedef enum {
DECODER_OPTION_LEVEL, ///< get current AU level info,only is used in GetOption
DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
DECODER_OPTION_IS_REF_PIC, ///< feedback current frame is ref pic or not
DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
DECODER_OPTION_NUM_OF_THREADS, ///< number of decoding threads. The maximum thread count is equal or less than lesser of (cpu core counts and 16).
} DECODER_OPTION;
/**
+1
View File
@@ -201,6 +201,7 @@ typedef struct TagBufferInfo {
union {
SSysMEMBuffer sSystemBuffer; ///< memory info for one picture
} UsrData; ///< output buffer info
unsigned char* pDst[3]; //point to picture YUV data
} SBufferInfo;
+4 -4
View File
@@ -4,12 +4,12 @@
#include "codec_app_def.h"
static const OpenH264Version g_stCodecVersion = {2, 0, 0, 1905};
static const char* const g_strCodecVer = "OpenH264 version:2.0.0.1905";
static const OpenH264Version g_stCodecVersion = {2, 2, 0, 2201};
static const char* const g_strCodecVer = "OpenH264 version:2.2.0.2201";
#define OPENH264_MAJOR (2)
#define OPENH264_MINOR (0)
#define OPENH264_MINOR (2)
#define OPENH264_REVISION (0)
#define OPENH264_RESERVED (1905)
#define OPENH264_RESERVED (2201)
#endif // CODEC_VER_H
+12
View File
@@ -860,6 +860,10 @@
RelativePath="..\..\..\common\inc\wels_const_common.h"
>
</File>
<File
RelativePath="..\..\..\decoder\core\inc\wels_decoder_thread.h"
>
</File>
</Filter>
<Filter
Name="Source Files"
@@ -977,6 +981,14 @@
RelativePath="..\..\..\common\src\utils.cpp"
>
</File>
<File
RelativePath="..\..\..\common\src\WelsThreadLib.cpp"
>
</File>
<File
RelativePath="..\..\..\decoder\core\src\wels_decoder_thread.cpp"
>
</File>
</Filter>
</Files>
<Globals>
+6
View File
@@ -50,16 +50,22 @@ mov pc, lr
.endm
#else
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits // Mark stack as non-executable
#endif
.text
#ifdef __ELF__
.arch armv7-a
.fpu neon
#endif
.macro WELS_ASM_FUNC_BEGIN funcName
.align 2
.arm
.global \funcName
#ifdef __ELF__
.type \funcName, %function
#endif
#ifndef __clang__
.func \funcName
#endif
@@ -45,13 +45,17 @@ ret
.endm
#else
#ifdef __ELF__
.section .note.GNU-stack,"",%progbits // Mark stack as non-executable
#endif
.text
.macro WELS_ASM_AARCH64_FUNC_BEGIN funcName
.align 2
.global \funcName
#ifdef __ELF__
.type \funcName, %function
#endif
#ifndef __clang__
.func \funcName
#endif
+13
View File
@@ -60,6 +60,19 @@ typedef HANDLE WELS_EVENT;
#define WELS_THREAD_ROUTINE_TYPE DWORD WINAPI
#define WELS_THREAD_ROUTINE_RETURN(rc) return (DWORD)rc;
#ifdef WINAPI_FAMILY
#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define WP80
#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
#define GetSystemInfo(x) GetNativeSystemInfo(x)
#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS)
#define CreateSemaphore(a, b, c, d) CreateSemaphoreEx(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS)
#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE)
#endif
#endif
#else // NON-WINDOWS
#include <stdlib.h>
+12 -7
View File
@@ -288,9 +288,9 @@
/**
* backup register
*/
#if defined(_ABI64) && _MIPS_SIM == _ABI64
#define BACKUP_REG \
double __back_temp[8]; \
if (_MIPS_SIM == _ABI64) \
double __attribute__((aligned(16))) __back_temp[8]; \
__asm__ volatile ( \
"gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
"gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
@@ -299,8 +299,10 @@
: \
: [temp]"r"(__back_temp) \
: "memory" \
); \
else \
);
#else
#define BACKUP_REG \
double __attribute__((aligned(16))) __back_temp[8]; \
__asm__ volatile ( \
"gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
"gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
@@ -309,12 +311,13 @@
: [temp]"r"(__back_temp) \
: "memory" \
);
#endif
/**
* recover register
*/
#if defined(_ABI64) && _MIPS_SIM == _ABI64
#define RECOVER_REG \
if (_MIPS_SIM == _ABI64) \
__asm__ volatile ( \
"gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
"gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
@@ -323,8 +326,9 @@
: \
: [temp]"r"(__back_temp) \
: "memory" \
); \
else \
);
#else
#define RECOVER_REG \
__asm__ volatile ( \
"gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
"gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
@@ -333,6 +337,7 @@
: [temp]"r"(__back_temp) \
: "memory" \
);
#endif
# define OK 1
# define NOTOK 0
+14
View File
@@ -82,6 +82,20 @@ void WelsCopy16x8NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src,
void WelsCopy16x16_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif//HAVE_MMI
#if defined (HAVE_MSA)
void WelsCopy8x8_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy8x16_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x8_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif//HAVE_MSA
#if defined (HAVE_LSX)
void WelsCopy8x8_lsx (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
void WelsCopy16x16_lsx (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
void WelsCopy16x16NotAligned_lsx (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS);
#endif
#if defined(__cplusplus)
}
#endif//__cplusplus
+1
View File
@@ -59,6 +59,7 @@ void WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint
int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx);
int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx);
uint32_t WelsCPUDetectAVX512();
void WelsEmms();
+9
View File
@@ -74,6 +74,12 @@
#define WELS_CPU_AVX2 0x00000000 /* !AVX2 */
#endif
#define WELS_CPU_AVX512F 0x00080000 /* AVX512F */
#define WELS_CPU_AVX512CD 0x00100000 /* AVX512CD */
#define WELS_CPU_AVX512DQ 0x00200000 /* AVX512DQ */
#define WELS_CPU_AVX512BW 0x00400000 /* AVX512BW */
#define WELS_CPU_AVX512VL 0x00800000 /* AVX512VL */
#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */
#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */
#define WELS_CPU_CACHELINE_64 0x40000000 /* CacheLine Size 64 */
@@ -86,6 +92,9 @@
/* For loongson */
#define WELS_CPU_MMI 0x00000001 /* mmi */
#define WELS_CPU_MSA 0x00000002 /* msa */
#define WELS_CPU_LSX 0x00000003 /* lsx */
#define WELS_CPU_LASX 0x00000004 /* lasx */
/*
* Interfaces for CPU core feature detection as below
+14
View File
@@ -91,6 +91,20 @@ void DeblockChromaLt4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i
int8_t* pTC);
void WelsNonZeroCount_mmi (int8_t* pNonZeroCount);
#endif//HAVE_MMI
#if defined(HAVE_MSA)
void DeblockLumaLt4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockLumaLt4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc);
void DeblockLumaEq4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaEq4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void DeblockChromaEq4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta);
void DeblockChromaLt4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta,
int8_t* pTC);
void WelsNonZeroCount_msa (int8_t* pNonZeroCount);
#endif//HAVE_MSA
#if defined(__cplusplus)
}
#endif//__cplusplus
+5
View File
@@ -47,6 +47,7 @@ extern "C" {
#endif//__cplusplus
#define PADDING_LENGTH 32 // reference extension
#define CHROMA_PADDING_LENGTH 16 // chroma reference extension
#if defined(X86_ASM)
void ExpandPictureLuma_sse2 (uint8_t* pDst,
@@ -89,6 +90,10 @@ typedef struct TagExpandPicFunc {
PExpandPictureFunc pfExpandChromaPicture[2];
} SExpandPicFunc;
void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight);
void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight);
void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3],
PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]);
File diff suppressed because it is too large Load Diff
+14
View File
@@ -356,6 +356,20 @@ void McHorVer20Width17U8ToS16_avx2 (const uint8_t* pSrc, int32_t iSrcStride, int
#endif //X86_ASM
//***************************************************************************//
// LSX definition //
//***************************************************************************//
#if defined(HAVE_LSX)
void McCopyWidthEq4_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McCopyWidthEq8_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McCopyWidthEq16_lsx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
void McChromaWidthEq4_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
const uint8_t *pABCD, int32_t iHeight);
void McChromaWidthEq8_lsx (const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride,
const uint8_t *pABCD, int32_t iHeight);
#endif//HAVE_LSX
#if defined(__cplusplus)
}
#endif//__cplusplus
File diff suppressed because it is too large Load Diff
+15
View File
@@ -117,6 +117,21 @@ void WelsSampleSadFour16x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_mmi (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif//HAVE_MMI
#if defined (HAVE_LASX)
int32_t WelsSampleSad4x4_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x16_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
void WelsSampleSadFour4x4_lasx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_lasx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_lasx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x8_lasx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x16_lasx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif
#if defined(__cplusplus)
}
#endif//__cplusplus
+194
View File
@@ -0,0 +1,194 @@
/*!
* \copy
* Copyright (c) 2009-2018, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file copy_mb_lsx.c
*
* \brief Loongson optimization
*
* \date 12/18/2021 Created
*
*************************************************************************************
*/
#include <stdint.h>
#include "loongson_intrinsics.h"
void WelsCopy8x8_lsx (uint8_t* pDst, int32_t iStrideD,
uint8_t* pSrc, int32_t iStrideS) {
int32_t iStride0 = 0;
int32_t iStride1 = iStrideS << 1;
int32_t iStride2 = iStride1 << 1;
__m128i src0, src1, src2, src3, src4 ,src5, src6, src7;
DUP4_ARG2(__lsx_vldx,
pSrc, iStride0,
pSrc, iStrideS,
pSrc, iStride1,
pSrc, iStride1 + iStrideS,
src0, src1, src2, src3);
pSrc += iStride2;
DUP4_ARG2(__lsx_vldx,
pSrc, iStride0,
pSrc, iStrideS,
pSrc, iStride1,
pSrc, iStride1 + iStrideS,
src4, src5, src6, src7);
iStride1 = iStrideD << 1;
__lsx_vstelm_d(src0, pDst, 0, 0);
__lsx_vstelm_d(src1, pDst + iStrideD, 0, 0);
pDst += iStride1;
__lsx_vstelm_d(src2, pDst, 0, 0);
__lsx_vstelm_d(src3, pDst + iStrideD, 0, 0);
pDst += iStride1;
__lsx_vstelm_d(src4, pDst, 0, 0);
__lsx_vstelm_d(src5, pDst + iStrideD, 0, 0);
pDst += iStride1;
__lsx_vstelm_d(src6, pDst, 0, 0);
__lsx_vstelm_d(src7, pDst + iStrideD, 0, 0);
}
void WelsCopy16x16_lsx (uint8_t* pDst, int32_t iStrideD,
uint8_t* pSrc, int32_t iStrideS) {
int32_t iStride0 = 0;
int32_t iStride1 = iStrideS;
int32_t iStride2 = iStrideS << 1;
int32_t iStride3 = iStride2 + iStrideS;
int32_t iStride4 = iStrideS << 2;
__m128i src0, src1, src2, src3, src4, src5, src6, src7;
__m128i src8, src9, src10, src11, src12, src13, src14, src15;
DUP4_ARG2(__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src0, src1, src2, src3);
pSrc += iStride4;
DUP4_ARG2(__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src4, src5, src6, src7);
pSrc += iStride4;
DUP4_ARG2(__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src8, src9, src10, src11);
pSrc += iStride4;
DUP4_ARG2(__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src12, src13, src14, src15);
iStride1 = iStrideD;
iStride2 = iStrideD << 1;
iStride3 = iStride2 + iStrideD;
iStride4 = iStrideD << 2;
__lsx_vstx(src0, pDst, iStride0);
__lsx_vstx(src1, pDst, iStride1);
__lsx_vstx(src2, pDst, iStride2);
__lsx_vstx(src3, pDst, iStride3);
pDst += iStride4;
__lsx_vstx(src4, pDst, iStride0);
__lsx_vstx(src5, pDst, iStride1);
__lsx_vstx(src6, pDst, iStride2);
__lsx_vstx(src7, pDst, iStride3);
pDst += iStride4;
__lsx_vstx(src8, pDst, iStride0);
__lsx_vstx(src9, pDst, iStride1);
__lsx_vstx(src10, pDst, iStride2);
__lsx_vstx(src11, pDst, iStride3);
pDst += iStride4;
__lsx_vstx(src12, pDst, iStride0);
__lsx_vstx(src13, pDst, iStride1);
__lsx_vstx(src14, pDst, iStride2);
__lsx_vstx(src15, pDst, iStride3);
}
void WelsCopy16x16NotAligned_lsx (uint8_t* pDst, int32_t iStrideD,
uint8_t* pSrc, int32_t iStrideS) {
int32_t iStride0 = 0;
int32_t iStride1 = iStrideS;
int32_t iStride2 = iStrideS << 1;
int32_t iStride3 = iStride2 + iStrideS;
int32_t iStride4 = iStrideS << 2;
v16u8_b src0, src1, src2, src3, src4, src5, src6, src7;
v16u8_b src8, src9, src10, src11, src12, src13, src14, src15;
DUP4_ARG2((v16u8_b)__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src0, src1, src2, src3);
pSrc += iStride4;
DUP4_ARG2((v16u8_b)__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src4, src5, src6, src7);
pSrc += iStride4;
DUP4_ARG2((v16u8_b)__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src8, src9, src10, src11);
pSrc += iStride4;
DUP4_ARG2((v16u8_b)__lsx_vldx,
pSrc, iStride0, pSrc, iStride1,
pSrc, iStride2, pSrc, iStride3,
src12, src13, src14, src15);
iStride1 = iStrideD;
iStride2 = iStrideD << 1;
iStride3 = iStride2 + iStrideD;
iStride4 = iStrideD << 2;
__lsx_vstx((__m128i)src0, pDst, iStride0);
__lsx_vstx((__m128i)src1, pDst, iStride1);
__lsx_vstx((__m128i)src2, pDst, iStride2);
__lsx_vstx((__m128i)src3, pDst, iStride3);
pDst += iStride4;
__lsx_vstx((__m128i)src4, pDst, iStride0);
__lsx_vstx((__m128i)src5, pDst, iStride1);
__lsx_vstx((__m128i)src6, pDst, iStride2);
__lsx_vstx((__m128i)src7, pDst, iStride3);
pDst += iStride4;
__lsx_vstx((__m128i)src8, pDst, iStride0);
__lsx_vstx((__m128i)src9, pDst, iStride1);
__lsx_vstx((__m128i)src10, pDst, iStride2);
__lsx_vstx((__m128i)src11, pDst, iStride3);
pDst += iStride4;
__lsx_vstx((__m128i)src12, pDst, iStride0);
__lsx_vstx((__m128i)src13, pDst, iStride1);
__lsx_vstx((__m128i)src14, pDst, iStride2);
__lsx_vstx((__m128i)src15, pDst, iStride3);
}
+192
View File
@@ -0,0 +1,192 @@
/*!
**********************************************************************************
* Copyright (c) 2021 Loongson Technology Corporation Limited
* Contributed by Lu Wang <wanglu@loongson.cn>
*
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* \file mc_chroma_lsx.c
*
* \brief Loongson optimization
*
* \date 12/23/2021 Created
*
**********************************************************************************
*/
#include "stdint.h"
#include "loongson_intrinsics.h"
void McCopyWidthEq4_lsx(const uint8_t *pSrc, int iSrcStride,
uint8_t *pDst, int iDstStride, int iHeight) {
__m128i src0, src1;
int iSrcStride_x2 = iSrcStride << 1;
int iDstStride_x2 = iDstStride << 1;
iHeight = iHeight >> 1;
for (int i = 0; i < iHeight; i++) {
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0, src1);
__lsx_vstelm_w(src0, pDst, 0, 0);
__lsx_vstelm_w(src1, pDst + iDstStride, 0, 0);
pSrc += iSrcStride_x2;
pDst += iDstStride_x2;
}
}
void McCopyWidthEq8_lsx(const uint8_t *pSrc, int iSrcStride,
uint8_t *pDst, int iDstStride, int iHeight) {
__m128i src0, src1;
int iSrcStride_x2 = iSrcStride << 1;
int iDstStride_x2 = iDstStride << 1;
iHeight = iHeight >> 1;
for (int i = 0; i < iHeight; i++) {
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0, src1);
__lsx_vstelm_d(src0, pDst, 0, 0);
__lsx_vstelm_d(src1, pDst + iDstStride, 0, 0);
pSrc += iSrcStride_x2;
pDst += iDstStride_x2;
}
}
void McCopyWidthEq16_lsx(const uint8_t *pSrc, int iSrcStride,
uint8_t *pDst, int iDstStride, int iHeight) {
__m128i src0, src1;
int iSrcStride_x2 = iSrcStride << 1;
int iDstStride_x2 = iDstStride << 1;
iHeight = iHeight >> 1;
for (int i = 0; i < iHeight; i++) {
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0, src1);
__lsx_vst(src0, pDst, 0);
__lsx_vstx(src1, pDst, iDstStride);
pSrc += iSrcStride_x2;
pDst += iDstStride_x2;
}
}
void McChromaWidthEq4_lsx(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst,
int32_t iDstStride, const uint8_t *pABCD, int32_t iHeight) {
int32_t i;
int8_t iA, iB, iC, iD;
uint16_t iFillData;
int32_t iSrcStride_x2 = iSrcStride << 1;
int32_t iDstStride_x2 = iDstStride << 1;
__m128i paramAB, paramCD, paramConst;
__m128i src0, src1, src2, src0Even, src1Even, src2Even;
__m128i out0, outNext0, out1, outNext1;
__m128i shift;
iA = pABCD[0];
iB = pABCD[1];
iC = pABCD[2];
iD = pABCD[3];
iFillData = (iB << 8) | iA;
paramAB = __lsx_vreplgr2vr_h(iFillData);
iFillData = (iD << 8) | iC;
paramCD = __lsx_vreplgr2vr_h(iFillData);
paramConst = __lsx_vreplgr2vr_h(32);
shift = __lsx_vreplgr2vr_h(6);
iHeight = iHeight >> 1;
for (i = 0; i < iHeight; i++) {
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0, src1);
src2 = __lsx_vld(pSrc + iSrcStride_x2, 0);
pSrc += 1;
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0Even, src1Even);
src2Even = __lsx_vld(pSrc + iSrcStride_x2, 0);
pSrc -= 1;
DUP2_ARG2(__lsx_vilvl_w, src0Even, src0, src1Even, src1, src0, src1);
src2 = __lsx_vilvl_w(src2Even, src2);
DUP4_ARG2(__lsx_vdp2_h_bu, src0, paramAB, src1, paramCD, src1, paramAB, src2,
paramCD, out0, outNext0, out1, outNext1);
DUP4_ARG2(__lsx_vadd_h, out0, outNext0, out0, paramConst, out1, outNext1, out1,
paramConst, out0, out0, out1, out1);
DUP2_ARG2(__lsx_vsrl_h, out0, shift, out1, shift, out0, out1);
DUP2_ARG2(__lsx_vsrl_d, out0, paramConst, out1, paramConst, outNext0, outNext1);
DUP2_ARG2(__lsx_vpackev_b, outNext0, out0, outNext1, out1, out0, out1);
__lsx_vstelm_w(out0, pDst, 0, 0);
__lsx_vstelm_w(out1, pDst + iDstStride, 0, 0);
pDst += iDstStride_x2;
pSrc += iSrcStride_x2;
}
}
void McChromaWidthEq8_lsx(const uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst,
int32_t iDstStride, const uint8_t *pABCD, int32_t iHeight) {
int32_t i;
int8_t iA, iB, iC, iD;
uint16_t iFillData;
int32_t iSrcStride_x2 = iSrcStride << 1;
int32_t iDstStride_x2 = iDstStride << 1;
__m128i paramAB, paramCD, paramConst;
__m128i src0, src1, src2, src0Even, src1Even, src2Even;
__m128i out0, outNext0, out1, outNext1;
__m128i shift;
iA = pABCD[0];
iB = pABCD[1];
iC = pABCD[2];
iD = pABCD[3];
iFillData = (iB << 8) | iA;
paramAB = __lsx_vreplgr2vr_h(iFillData);
iFillData = (iD << 8) | iC;
paramCD = __lsx_vreplgr2vr_h(iFillData);
paramConst = __lsx_vreplgr2vr_h(32);
shift = __lsx_vreplgr2vr_h(6);
iHeight = iHeight >> 1;
for (i = 0; i < iHeight; i++) {
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0, src1);
src2 = __lsx_vld(pSrc + iSrcStride_x2, 0);
pSrc += 1;
DUP2_ARG2(__lsx_vld, pSrc, 0, pSrc + iSrcStride, 0, src0Even, src1Even);
src2Even = __lsx_vld(pSrc + iSrcStride_x2, 0);
pSrc -= 1;
DUP2_ARG2(__lsx_vilvl_d, src0Even, src0, src1Even, src1, src0, src1);
src2 = __lsx_vilvl_d(src2Even, src2);
DUP4_ARG2(__lsx_vdp2_h_bu, src0, paramAB, src1, paramCD, src1, paramAB, src2,
paramCD, out0, outNext0, out1, outNext1);
DUP4_ARG2(__lsx_vadd_h, out0, outNext0, out0, paramConst, out1, outNext1, out1,
paramConst, out0, out0, out1, out1);
DUP2_ARG2(__lsx_vsrl_h, out0, shift, out1, shift, out0, out1);
DUP2_ARG2(__lsx_vilvh_d, out0, out0, out1, out1, outNext0, outNext1);
DUP2_ARG2(__lsx_vpackev_b, outNext0, out0, outNext1, out1, out0, out1);
__lsx_vstelm_d(out0, pDst, 0, 0);
__lsx_vstelm_d(out1, pDst + iDstStride, 0, 0);
pDst += iDstStride_x2;
pSrc += iSrcStride_x2;
}
}
+567
View File
@@ -0,0 +1,567 @@
/*!
* \copy
* Copyright (c) 2009-2018, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file satd_sad_lasx.c
*
* \brief Loongson optimization
*
* \date 12/10/2021 Created
*
*************************************************************************************
*/
#include <stdint.h>
#include "loongson_intrinsics.h"
#define HORISUM(in0, in1, out0) \
out0 = __lasx_xvabsd_bu(in0, in1); \
out0 = __lasx_xvhaddw_hu_bu(out0, out0); \
out0 = __lasx_xvhaddw_wu_hu(out0, out0); \
out0 = __lasx_xvhaddw_du_wu(out0, out0); \
int32_t WelsSampleSad4x4_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
uint8_t *pSrc1 = pSample1;
uint8_t *pSrc2 = pSample2;
int32_t iStride0 = 0;
int32_t iStride1_tmp = iStride1 << 1;
int32_t iStride2_tmp = iStride2 << 1;
__m256i src1_0, src1_1, src1_2, src1_3;
__m256i src2_0, src2_1, src2_2, src2_3;
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp,
pSrc1, iStride1_tmp + iStride1,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride0,
pSrc2, iStride2,
pSrc2, iStride2_tmp,
pSrc2, iStride2_tmp + iStride2,
src2_0, src2_1, src2_2, src2_3);
DUP2_ARG2(__lasx_xvpackev_w,
src1_0, src1_1, src1_2, src1_3,
src1_0, src1_2);
DUP2_ARG2(__lasx_xvpackev_w,
src2_0, src2_1, src2_2, src2_3,
src2_0, src2_2);
DUP2_ARG2(__lasx_xvpackev_d,
src1_0, src1_2, src2_0, src2_2,
src1_0, src2_0);
HORISUM(src1_0, src2_0, src1_0);
src1_0 = __lasx_xvhaddw_qu_du(src1_0, src1_0);
return __lasx_xvpickve2gr_d(src1_0, 0);
}
static inline
int32_t WelsSampleSad8x8x2_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
uint8_t* pSrc1 = pSample1;
uint8_t* pSrc2 = pSample2;
int32_t iStride0 = 0;
int32_t iStride1_tmp2 = iStride1 << 1;
int32_t iStride1_tmp3 = iStride1_tmp2 + iStride1;
int32_t iStride1_tmp4 = iStride1 << 2;
int32_t iStride1_tmp5 = iStride1_tmp4 + iStride1;
int32_t iStride1_tmp6 = iStride1_tmp5 + iStride1;
int32_t iStride1_tmp7 = iStride1_tmp6 + iStride1;
int32_t iStride2_tmp2 = iStride2 << 1;
int32_t iStride2_tmp3 = iStride2_tmp2 + iStride2;
int32_t iStride2_tmp4 = iStride2 << 2;
int32_t iStride2_tmp5 = iStride2_tmp4 + iStride2;
int32_t iStride2_tmp6 = iStride2_tmp5 + iStride2;
int32_t iStride2_tmp7 = iStride2_tmp6 + iStride2;
__m256i src1_0, src1_1, src1_2, src1_3,
src1_4, src1_5, src1_6, src1_7;
__m256i src2_0, src2_1, src2_2, src2_3,
src2_4, src2_5, src2_6, src2_7;
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp2,
pSrc1, iStride1_tmp3,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride1_tmp4,
pSrc1, iStride1_tmp5,
pSrc1, iStride1_tmp6,
pSrc1, iStride1_tmp7,
src1_4, src1_5, src1_6, src1_7);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride0,
pSrc2, iStride2,
pSrc2, iStride2_tmp2,
pSrc2, iStride2_tmp3,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride2_tmp4,
pSrc2, iStride2_tmp5,
pSrc2, iStride2_tmp6,
pSrc2, iStride2_tmp7,
src2_4, src2_5, src2_6, src2_7);
DUP4_ARG3(__lasx_xvpermi_q,
src1_0, src1_1, 0x20,
src1_2, src1_3, 0x20,
src1_4, src1_5, 0x20,
src1_6, src1_7, 0x20,
src1_0, src1_2, src1_4, src1_6);
DUP4_ARG3(__lasx_xvpermi_q,
src2_0, src2_1, 0x20,
src2_2, src2_3, 0x20,
src2_4, src2_5, 0x20,
src2_6, src2_7, 0x20,
src2_0, src2_2, src2_4, src2_6);
HORISUM(src1_0, src2_0, src1_0);
HORISUM(src1_2, src2_2, src1_2);
HORISUM(src1_4, src2_4, src1_4);
HORISUM(src1_6, src2_6, src1_6);
src1_0 = __lasx_xvadd_d(src1_0, src1_2);
src1_0 = __lasx_xvadd_d(src1_0, src1_4);
src1_0 = __lasx_xvadd_d(src1_0, src1_6);
src1_0 = __lasx_xvhaddw_qu_du(src1_0, src1_0);
return (__lasx_xvpickve2gr_d(src1_0, 0) +
__lasx_xvpickve2gr_d(src1_0, 2));
}
int32_t WelsSampleSad8x8_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
uint8_t* pSrc1 = pSample1;
uint8_t* pSrc2 = pSample2;
int32_t iStride0 = 0;
int32_t iStride1_tmp2 = iStride1 << 1;
int32_t iStride1_tmp3 = iStride1_tmp2 + iStride1;
int32_t iStride1_tmp4 = iStride1 << 2;
int32_t iStride1_tmp5 = iStride1_tmp4 + iStride1;
int32_t iStride1_tmp6 = iStride1_tmp5 + iStride1;
int32_t iStride1_tmp7 = iStride1_tmp6 + iStride1;
int32_t iStride2_tmp2 = iStride2 << 1;
int32_t iStride2_tmp3 = iStride2_tmp2 + iStride2;
int32_t iStride2_tmp4 = iStride2 << 2;
int32_t iStride2_tmp5 = iStride2_tmp4 + iStride2;
int32_t iStride2_tmp6 = iStride2_tmp5 + iStride2;
int32_t iStride2_tmp7 = iStride2_tmp6 + iStride2;
__m256i src1_0, src1_1, src1_2, src1_3,
src1_4, src1_5, src1_6, src1_7;
__m256i src2_0, src2_1, src2_2, src2_3,
src2_4, src2_5, src2_6, src2_7;
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp2,
pSrc1, iStride1_tmp3,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride1_tmp4,
pSrc1, iStride1_tmp5,
pSrc1, iStride1_tmp6,
pSrc1, iStride1_tmp7,
src1_4, src1_5, src1_6, src1_7);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride0,
pSrc2, iStride2,
pSrc2, iStride2_tmp2,
pSrc2, iStride2_tmp3,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride2_tmp4,
pSrc2, iStride2_tmp5,
pSrc2, iStride2_tmp6,
pSrc2, iStride2_tmp7,
src2_4, src2_5, src2_6, src2_7);
DUP4_ARG2(__lasx_xvpackev_d,
src1_0, src1_1, src1_2, src1_3,
src1_4, src1_5, src1_6, src1_7,
src1_0, src1_2, src1_4, src1_6);
DUP2_ARG3(__lasx_xvpermi_q,
src1_0, src1_2, 0x20,
src1_4, src1_6, 0x20,
src1_0, src1_4);
DUP4_ARG2(__lasx_xvpackev_d,
src2_0, src2_1, src2_2, src2_3,
src2_4, src2_5, src2_6, src2_7,
src2_0, src2_2, src2_4, src2_6);
DUP2_ARG3(__lasx_xvpermi_q,
src2_0, src2_2, 0x20,
src2_4, src2_6, 0x20,
src2_0, src2_4);
HORISUM(src1_0, src2_0, src1_0);
HORISUM(src1_4, src2_4, src1_4);
src1_0 = __lasx_xvadd_d(src1_0, src1_4);
src1_0 = __lasx_xvhaddw_qu_du(src1_0, src1_0);
return (__lasx_xvpickve2gr_d(src1_0, 0) +
__lasx_xvpickve2gr_d(src1_0, 2));
}
int32_t WelsSampleSatd4x4_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSatdSum;
uint8_t* pSrc1 = pSample1;
uint8_t* pSrc2 = pSample2;
int32_t iStride0 = 0;
int32_t iStride1_tmp = iStride1 << 1;
int32_t iStride2_tmp = iStride2 << 1;
__m256i src1_0, src1_1, src1_2, src1_3;
__m256i src2_0, src2_1, src2_2, src2_3;
__m256i iSample01, iSample23;
__m256i tmp0, tmp1, tmp2, tmp3;
__m256i zero = __lasx_xvldi(0);
v16i16 mask= {1, 0, 3, 2, 5, 4, 7, 6, 1, 0, 3, 2, 5, 4, 7, 6};
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp,
pSrc1, iStride1_tmp + iStride1,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride0,
pSrc2, iStride2,
pSrc2, iStride2_tmp,
pSrc2, iStride2_tmp + iStride2,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvpackev_w,
src1_0, src1_1,
src1_2, src1_3,
src2_0, src2_1,
src2_2, src2_3,
src1_0, src1_2, src2_0, src2_2);
DUP2_ARG2(__lasx_xvpackev_d,
src1_0, src1_2,
src2_0, src2_2,
src1_0, src2_0);
tmp0 = __lasx_xvsubwev_h_bu(src1_0, src2_0);
tmp1 = __lasx_xvsubwod_h_bu(src1_0, src2_0);
tmp2 = __lasx_xvilvl_w(tmp0, tmp1);
tmp3 = __lasx_xvilvh_w(tmp0, tmp1);
tmp0 = __lasx_xvpermi_q(tmp3, tmp2, 0x20);
tmp0 = __lasx_xvshuf_h((__m256i)mask, tmp0, tmp0);
iSample01 = __lasx_xvhaddw_w_h(tmp0, tmp0);
iSample23 = __lasx_xvhsubw_w_h(tmp0, tmp0);
tmp0 = __lasx_xvhaddw_d_w(iSample01, iSample01);
tmp1 = __lasx_xvhaddw_d_w(iSample23, iSample23);
tmp2 = __lasx_xvhsubw_d_w(iSample23, iSample23);
tmp3 = __lasx_xvhsubw_d_w(iSample01, iSample01);
tmp1 = __lasx_xvpackev_w(tmp1, tmp0);
tmp3 = __lasx_xvpackev_w(tmp3, tmp2);
tmp0 = __lasx_xvpermi_q(tmp3, tmp1, 0x20);
tmp2 = __lasx_xvpermi_q(tmp3, tmp1, 0x31);
tmp0 = __lasx_xvpermi_w(tmp0, tmp0, 0x72);
tmp2 = __lasx_xvpermi_w(tmp2, tmp2, 0x72);
iSample01 = __lasx_xvadd_w(tmp0, tmp2);
iSample23 = __lasx_xvsub_w(tmp0, tmp2);
tmp0 = __lasx_xvhaddw_d_w(iSample01, iSample01);
tmp1 = __lasx_xvhaddw_d_w(iSample23, iSample23);
tmp2 = __lasx_xvhsubw_d_w(iSample23, iSample23);
tmp3 = __lasx_xvhsubw_d_w(iSample01, iSample01);
tmp0 = __lasx_xvpackev_w(tmp0, tmp1);
tmp2 = __lasx_xvpackev_w(tmp2, tmp3);
tmp0 = __lasx_xvabsd_w(tmp0, zero);
tmp2 = __lasx_xvabsd_w(tmp2, zero);
tmp0 = __lasx_xvadd_w(tmp0, tmp2);
tmp0 = __lasx_xvhaddw_d_w(tmp0, tmp0);
tmp0 = __lasx_xvhaddw_q_d(tmp0, tmp0);
iSatdSum = __lasx_xvpickve2gr_d(tmp0, 0) +
__lasx_xvpickve2gr_d(tmp0, 2);
return ((iSatdSum + 1) >> 1);
}
int32_t WelsSampleSad16x8_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
return WelsSampleSad8x8x2_lasx (pSample1, iStride1,
pSample2, iStride2);
}
int32_t WelsSampleSad8x16_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad8x8_lasx (pSample1, iStride1,
pSample2, iStride2);
iSadSum += WelsSampleSad8x8_lasx (pSample1 + (iStride1 << 3), iStride1,
pSample2 + (iStride2 << 3), iStride2);
return iSadSum;
}
int32_t WelsSampleSad16x16_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSadSum = 0;
iSadSum += WelsSampleSad8x8x2_lasx (pSample1, iStride1,
pSample2, iStride2);
iSadSum += WelsSampleSad8x8x2_lasx (pSample1 + (iStride1 << 3), iStride1,
pSample2 + (iStride2 << 3), iStride2);
return iSadSum;
}
void WelsSampleSadFour4x4_lasx (uint8_t* iSample1, int32_t iStride1,
uint8_t* iSample2, int32_t iStride2,
int32_t* pSad) {
uint8_t *pSrc1 = iSample1;
uint8_t *pSrc2 = iSample2 - iStride2;
uint8_t *pSrc3 = iSample2 + iStride2;
uint8_t *pSrc4 = iSample2 - 1;
uint8_t *pSrc5 = iSample2 + 1;
int32_t iStride0 = 0;
int32_t iStride1_tmp = iStride1 << 1;
int32_t iStride2_tmp = iStride2 << 1;
__m256i src1_0, src1_1, src1_2, src1_3;
__m256i src2_0, src2_1, src2_2, src2_3;
__m256i cb0, cb1, cb2, cb3, cb4, cb5, cb6, cb7;
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp,
pSrc1, iStride1_tmp + iStride1,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc2, iStride0,
pSrc2, iStride2,
pSrc2, iStride2_tmp,
pSrc2, iStride2_tmp + iStride2,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvpackev_w,
src1_0, src1_1, src1_2, src1_3,
src2_0, src2_1, src2_2, src2_3,
src1_0, src1_2, src2_0, src2_2);
DUP2_ARG2(__lasx_xvpackev_d,
src1_0, src1_2, src2_0, src2_2,
cb0, cb1); //16 16
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp,
pSrc1, iStride1_tmp + iStride1,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc3, iStride0,
pSrc3, iStride2,
pSrc3, iStride2_tmp,
pSrc3, iStride2_tmp + iStride2,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvpackev_w,
src1_0, src1_1, src1_2, src1_3,
src2_0, src2_1, src2_2, src2_3,
src1_0, src1_2, src2_0, src2_2);
DUP2_ARG2(__lasx_xvpackev_d,
src1_0, src1_2, src2_0, src2_2,
cb2, cb3); //16 16
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp,
pSrc1, iStride1_tmp + iStride1,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc4, iStride0,
pSrc4, iStride2,
pSrc4, iStride2_tmp,
pSrc4, iStride2_tmp + iStride2,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvpackev_w,
src1_0, src1_1, src1_2, src1_3,
src2_0, src2_1, src2_2, src2_3,
src1_0, src1_2, src2_0, src2_2);
DUP2_ARG2(__lasx_xvpackev_d,
src1_0, src1_2, src2_0, src2_2,
cb4, cb5); //16 16
DUP4_ARG2(__lasx_xvldx,
pSrc1, iStride0,
pSrc1, iStride1,
pSrc1, iStride1_tmp,
pSrc1, iStride1_tmp + iStride1,
src1_0, src1_1, src1_2, src1_3);
DUP4_ARG2(__lasx_xvldx,
pSrc5, iStride0,
pSrc5, iStride2,
pSrc5, iStride2_tmp,
pSrc5, iStride2_tmp + iStride2,
src2_0, src2_1, src2_2, src2_3);
DUP4_ARG2(__lasx_xvpackev_w,
src1_0, src1_1, src1_2, src1_3,
src2_0, src2_1, src2_2, src2_3,
src1_0, src1_2, src2_0, src2_2);
DUP2_ARG2(__lasx_xvpackev_d,
src1_0, src1_2, src2_0, src2_2,
cb6, cb7); //16 16
cb0 = __lasx_xvpermi_q(cb2, cb0, 0x20);
cb1 = __lasx_xvpermi_q(cb3, cb1, 0x20);
cb4 = __lasx_xvpermi_q(cb6, cb4, 0x20);
cb5 = __lasx_xvpermi_q(cb7, cb5, 0x20);
HORISUM(cb0, cb1, cb0);
HORISUM(cb4, cb5, cb4);
DUP2_ARG2(__lasx_xvhaddw_qu_du,
cb0, cb0, cb4, cb4,
cb0, cb4);
* (pSad) = __lasx_xvpickve2gr_d(cb0, 0);
* (pSad + 1) = __lasx_xvpickve2gr_d(cb0, 2);
* (pSad + 2) = __lasx_xvpickve2gr_d(cb4, 0);
* (pSad + 3) = __lasx_xvpickve2gr_d(cb4, 2);
}
void WelsSampleSadFour8x8_lasx (uint8_t* iSample1, int32_t iStride1,
uint8_t* iSample2, int32_t iStride2,
int32_t* pSad) {
* (pSad) = WelsSampleSad8x8_lasx (iSample1, iStride1,
(iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad8x8_lasx (iSample1, iStride1,
(iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad8x8_lasx (iSample1, iStride1,
(iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad8x8_lasx (iSample1, iStride1,
(iSample2 + 1), iStride2);
}
void WelsSampleSadFour8x16_lasx (uint8_t* iSample1, int32_t iStride1,
uint8_t* iSample2, int32_t iStride2,
int32_t* pSad) {
* (pSad) = WelsSampleSad8x16_lasx (iSample1, iStride1,
(iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad8x16_lasx (iSample1, iStride1,
(iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad8x16_lasx (iSample1, iStride1,
(iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad8x16_lasx (iSample1, iStride1,
(iSample2 + 1), iStride2);
}
void WelsSampleSadFour16x8_lasx (uint8_t* iSample1, int32_t iStride1,
uint8_t* iSample2, int32_t iStride2,
int32_t* pSad) {
* (pSad) = WelsSampleSad16x8_lasx (iSample1, iStride1,
(iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad16x8_lasx (iSample1, iStride1,
(iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad16x8_lasx (iSample1, iStride1,
(iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad16x8_lasx (iSample1, iStride1,
(iSample2 + 1), iStride2);
}
void WelsSampleSadFour16x16_lasx (uint8_t* iSample1, int32_t iStride1,
uint8_t* iSample2, int32_t iStride2,
int32_t* pSad) {
* (pSad) = WelsSampleSad16x16_lasx (iSample1, iStride1,
(iSample2 - iStride2), iStride2);
* (pSad + 1) = WelsSampleSad16x16_lasx (iSample1, iStride1,
(iSample2 + iStride2), iStride2);
* (pSad + 2) = WelsSampleSad16x16_lasx (iSample1, iStride1,
(iSample2 - 1), iStride2);
* (pSad + 3) = WelsSampleSad16x16_lasx (iSample1, iStride1,
(iSample2 + 1), iStride2);
}
int32_t WelsSampleSatd8x8_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSatdSum = 0;
iSatdSum += WelsSampleSatd4x4_lasx (pSample1, iStride1,
pSample2, iStride2);
iSatdSum += WelsSampleSatd4x4_lasx (pSample1 + 4, iStride1,
pSample2 + 4, iStride2);
iSatdSum += WelsSampleSatd4x4_lasx (pSample1 + (iStride1 << 2), iStride1,
pSample2 + (iStride2 << 2), iStride2);
iSatdSum += WelsSampleSatd4x4_lasx (pSample1 + (iStride1 << 2) + 4, iStride1,
pSample2 + (iStride2 << 2) + 4, iStride2);
return iSatdSum;
}
int32_t WelsSampleSatd16x8_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSatdSum = 0;
iSatdSum += WelsSampleSatd8x8_lasx (pSample1, iStride1,
pSample2, iStride2);
iSatdSum += WelsSampleSatd8x8_lasx (pSample1 + 8, iStride1,
pSample2 + 8, iStride2);
return iSatdSum;
}
int32_t WelsSampleSatd8x16_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSatdSum = 0;
iSatdSum += WelsSampleSatd8x8_lasx (pSample1, iStride1,
pSample2, iStride2);
iSatdSum += WelsSampleSatd8x8_lasx (pSample1 + (iStride1 << 3), iStride1,
pSample2 + (iStride2 << 3), iStride2);
return iSatdSum;
}
int32_t WelsSampleSatd16x16_lasx (uint8_t* pSample1, int32_t iStride1,
uint8_t* pSample2, int32_t iStride2) {
int32_t iSatdSum = 0;
iSatdSum += WelsSampleSatd8x8_lasx (pSample1, iStride1,
pSample2, iStride2);
iSatdSum += WelsSampleSatd8x8_lasx (pSample1 + 8, iStride1,
pSample2 + 8, iStride2);
iSatdSum += WelsSampleSatd8x8_lasx (pSample1 + (iStride1 << 3), iStride1,
pSample2 + (iStride2 << 3), iStride2);
iSatdSum += WelsSampleSatd8x8_lasx (pSample1 + (iStride1 << 3) + 8, iStride1,
pSample2 + (iStride2 << 3) + 8, iStride2);
return iSatdSum;
}
+45 -15
View File
@@ -17,21 +17,51 @@ cpp_sources = [
'src/WelsThreadPool.cpp',
]
asm_sources = [
'x86/cpuid.asm',
'x86/dct.asm',
'x86/deblock.asm',
'x86/expand_picture.asm',
'x86/intra_pred_com.asm',
'x86/mb_copy.asm',
'x86/mc_chroma.asm',
'x86/mc_luma.asm',
'x86/satd_sad.asm',
'x86/vaa.asm',
]
objs_asm = asm_gen.process(asm_sources)
objs_asm = []
if cpu_family in ['x86', 'x86_64']
asm_sources = [
'x86/cpuid.asm',
'x86/dct.asm',
'x86/deblock.asm',
'x86/expand_picture.asm',
'x86/intra_pred_com.asm',
'x86/mb_copy.asm',
'x86/mc_chroma.asm',
'x86/mc_luma.asm',
'x86/satd_sad.asm',
'x86/vaa.asm',
]
objs_asm += asm_gen.process(asm_sources)
elif cpu_family == 'arm'
asm_sources = [
'arm/copy_mb_neon.S',
'arm/deblocking_neon.S',
'arm/expand_picture_neon.S',
'arm/intra_pred_common_neon.S',
'arm/mc_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
elif cpu_family == 'aarch64'
asm_sources = [
'arm64/copy_mb_aarch64_neon.S',
'arm64/deblocking_aarch64_neon.S',
'arm64/expand_picture_aarch64_neon.S',
'arm64/intra_pred_common_aarch64_neon.S',
'arm64/mc_aarch64_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
else
error('Unsupported cpu_family @0@'.format(cpu_family))
endif
libcommon = static_library('common', cpp_sources, objs_asm,
include_directories: inc,
include_directories: [inc, casm_inc],
dependencies: deps)
+80
View File
@@ -0,0 +1,80 @@
/*!
* \copy
* Copyright (C) 2020 Loongson Technology Co. Ltd.
* Contributed by Gu Xiwei(guxiwei-hf@loongson.cn)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file copy_mb_msa.c
*
* \brief MIPS MSA optimizations
*
* \date 14/05/2020 Created
*
*************************************************************************************
*/
#include <stdint.h>
#include "msa_macros.h"
void WelsCopy8x8_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
int32_t iStrideS ) {
v16u8 src0, src1;
for (int i = 0; i < 4; i++) {
MSA_LD_V2(v16u8, pSrc, iStrideS, src0, src1);
MSA_ST_D(src0, 0, pDst);
MSA_ST_D(src1, 0, pDst + iStrideD);
pSrc += 2 * iStrideS;
pDst += 2 * iStrideD;
}
}
void WelsCopy8x16_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
int32_t iStrideS) {
WelsCopy8x8_msa(pDst, iStrideD, pSrc, iStrideS);
WelsCopy8x8_msa(pDst + 8 * iStrideD, iStrideD,
pSrc + 8 * iStrideS, iStrideS);
}
void WelsCopy16x8_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
int32_t iStrideS) {
v16u8 src0, src1;
for (int i = 0; i < 4; i++) {
MSA_LD_V2(v16u8, pSrc, iStrideS, src0, src1);
MSA_ST_V2(v16u8, src0, src1, pDst, iStrideD);
pSrc += 2 * iStrideS;
pDst += 2 * iStrideD;
}
}
void WelsCopy16x16_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc,
int32_t iStrideS) {
WelsCopy16x8_msa(pDst, iStrideD, pSrc, iStrideS);
WelsCopy16x8_msa(pDst + 8 * iStrideD, iStrideD,
pSrc + 8 * iStrideS, iStrideS);
};
File diff suppressed because it is too large Load Diff
-12
View File
@@ -71,18 +71,6 @@
#if defined(_WIN32) || defined(__CYGWIN__)
#ifdef WINAPI_FAMILY
#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define WP80
#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0)
#define GetSystemInfo(x) GetNativeSystemInfo(x)
#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS)
#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE)
#endif
#endif
WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) {
InitializeCriticalSection (mutex);
+75 -8
View File
@@ -145,6 +145,15 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
}
}
if (uiMaxCpuidLevel >= 7) {
uiFeatureC = WelsCPUDetectAVX512();
if (uiFeatureC & 0x10000) uiCPU |= WELS_CPU_AVX512F;
if (uiFeatureC & 0x10000000) uiCPU |= WELS_CPU_AVX512CD;
if (uiFeatureC & 0x20000) uiCPU |= WELS_CPU_AVX512DQ;
if (uiFeatureC & 0x40000000) uiCPU |= WELS_CPU_AVX512BW;
if (uiFeatureC & 0x80000000) uiCPU |= WELS_CPU_AVX512VL;
}
if (pNumberOfLogicProcessors != NULL) {
if (uiCPU & WELS_CPU_HTT) {
*pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
@@ -308,13 +317,73 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
}
#elif defined(mips)
/* for loongson */
/* Get cpu features from cpuinfo. */
static uint32_t get_cpu_flags_from_cpuinfo(void)
{
uint32_t flags = 0;
# ifdef __linux__
FILE* fp = fopen("/proc/cpuinfo", "r");
if (!fp)
return flags;
char buf[200];
memset(buf, 0, sizeof(buf));
while (fgets(buf, sizeof(buf), fp)) {
if (!strncmp(buf, "model name", strlen("model name"))) {
if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") ||
strstr(buf, "Loongson-2K")) {
flags |= WELS_CPU_MMI;
}
break;
}
}
while (fgets(buf, sizeof(buf), fp)) {
if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) {
if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) {
flags |= WELS_CPU_MMI;
}
if (strstr(buf, "msa")) {
flags |= WELS_CPU_MSA;
}
break;
}
}
fclose(fp);
# endif
return flags;
}
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
#if defined(HAVE_MMI)
return WELS_CPU_MMI;
#else
return 0;
#endif
return get_cpu_flags_from_cpuinfo();
}
#elif defined(__loongarch__) && defined(__linux__)
/* The CPUCFG instruction is used to dynamically identify the characteristics
* of the loongarch in the running processor during software execution. */
#define LOONGARCH_CFG2 0x02
#define LOONGARCH_CFG2_LSX (1<<6)
#define LOONGARCH_CFG2_LASX (1<<7)
static uint32_t get_cpu_flags_from_cpucfg(void) {
uint32_t reg = 0;
uint32_t flags = 0;
__asm__ volatile(
"cpucfg %0, %1 \n\t"
: "+&r"(reg)
: "r"(LOONGARCH_CFG2)
);
if (reg & LOONGARCH_CFG2_LSX)
flags |= WELS_CPU_LSX;
if (reg & LOONGARCH_CFG2_LASX)
flags |= WELS_CPU_LASX;
return flags;
}
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
return get_cpu_flags_from_cpucfg();
}
#else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
@@ -324,5 +393,3 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
}
#endif
+234
View File
@@ -33,6 +33,240 @@
#include "expand_pic.h"
#include "cpu_core.h"
static inline void MBPadTopLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride) {
const uint8_t kuiTL = pDst[0];
int32_t i = 0;
uint8_t* pTopLeft = pDst;
do {
pTopLeft -= kiStride;
// pad pTop
memcpy (pTopLeft, pDst, 16); // confirmed_safe_unsafe_usage
memset (pTopLeft - PADDING_LENGTH, kuiTL, PADDING_LENGTH); //pTop left
} while (++i < PADDING_LENGTH);
}
static inline void MBPadTopLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) {
uint8_t* pTopLine = pDst + (kiMbX << 4);
int32_t i = 0;
uint8_t* pTop = pTopLine;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop, pTopLine, 16); // confirmed_safe_unsafe_usage
} while (++i < PADDING_LENGTH);
}
static inline void MBPadBottomLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX,
const int32_t& kiPicH) {
uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 4);
int32_t i = 0;
uint8_t* pBottom = pBottomLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pBottomLine, 16); // confirmed_safe_unsafe_usage
} while (++i < PADDING_LENGTH);
}
static inline void MBPadTopRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) {
uint8_t* pTopRight = pDst + kiPicW;
const uint8_t kuiTR = pTopRight[-1];
int32_t i = 0;
uint8_t* pTop = pTopRight;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop - 16, pTopRight - 16, 16); // confirmed_safe_unsafe_usage
memset (pTop, kuiTR, PADDING_LENGTH); //pTop Right
} while (++i < PADDING_LENGTH);
}
static inline void MBPadBottomLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride;
const uint8_t kuiBL = pDstLastLine[0];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pDstLastLine, 16); // confirmed_safe_unsafe_usage
memset (pBottom - PADDING_LENGTH, kuiBL, PADDING_LENGTH); //pBottom left
} while (++i < PADDING_LENGTH);
}
static inline void MBPadBottomRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW,
const int32_t& kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW;
const uint8_t kuiBR = pDstLastLine[-1];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom - 16, pDstLastLine - 16, 16); // confirmed_safe_unsafe_usage
memset (pBottom, kuiBR, PADDING_LENGTH); //pBottom Right
} while (++i < PADDING_LENGTH);
}
static inline void MBPadLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) {
uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride;
for (int32_t i = 0; i < 16; ++i) {
// pad left
memset (pTmp - PADDING_LENGTH, pTmp[0], PADDING_LENGTH);
pTmp += kiStride;
}
}
static inline void MBPadRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY,
const int32_t& kiPicW) {
uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride + kiPicW;
for (int32_t i = 0; i < 16; ++i) {
// pad right
memset (pTmp, pTmp[-1], PADDING_LENGTH);
pTmp += kiStride;
}
}
static inline void MBPadTopChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) {
uint8_t* pTopLine = pDst + (kiMbX << 3);
int32_t i = 0;
uint8_t* pTop = pTopLine;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop, pTopLine, 8); // confirmed_safe_unsafe_usage
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadBottomChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX,
const int32_t& kiPicH) {
uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 3);
int32_t i = 0;
uint8_t* pBottom = pBottomLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pBottomLine, 8); // confirmed_safe_unsafe_usage
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadTopLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride) {
const uint8_t kuiTL = pDst[0];
int32_t i = 0;
uint8_t* pTopLeft = pDst;
do {
pTopLeft -= kiStride;
// pad pTop
memcpy (pTopLeft, pDst, 8); // confirmed_safe_unsafe_usage
memset (pTopLeft - CHROMA_PADDING_LENGTH, kuiTL, CHROMA_PADDING_LENGTH); //pTop left
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadTopRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) {
uint8_t* pTopRight = pDst + kiPicW;
const uint8_t kuiTR = pTopRight[-1];
int32_t i = 0;
uint8_t* pTop = pTopRight;
do {
pTop -= kiStride;
// pad pTop
memcpy (pTop - 8, pTopRight - 8, 8); // confirmed_safe_unsafe_usage
memset (pTop, kuiTR, CHROMA_PADDING_LENGTH); //pTop Right
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadBottomLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride;
const uint8_t kuiBL = pDstLastLine[0];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom, pDstLastLine, 8); // confirmed_safe_unsafe_usage
memset (pBottom - CHROMA_PADDING_LENGTH, kuiBL, CHROMA_PADDING_LENGTH); //pBottom left
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadBottomRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW,
const int32_t kiPicH) {
uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW;
const uint8_t kuiBR = pDstLastLine[-1];
int32_t i = 0;
uint8_t* pBottom = pDstLastLine;
do {
pBottom += kiStride;
// pad pBottom
memcpy (pBottom - 8, pDstLastLine - 8, 8); // confirmed_safe_unsafe_usage
memset (pBottom, kuiBR, CHROMA_PADDING_LENGTH); //pBottom Right
} while (++i < CHROMA_PADDING_LENGTH);
}
static inline void MBPadLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) {
uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride;
for (int32_t i = 0; i < 8; ++i) {
// pad left
memset (pTmp - CHROMA_PADDING_LENGTH, pTmp[0], CHROMA_PADDING_LENGTH);
pTmp += kiStride;
}
}
static inline void MBPadRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY,
const int32_t& kiPicW) {
uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride + kiPicW;
for (int32_t i = 0; i < 8; ++i) {
// pad right
memset (pTmp, pTmp[-1], CHROMA_PADDING_LENGTH);
pTmp += kiStride;
}
}
void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) {
if (kiMbX == 0 && kiMbY == 0) {
MBPadTopLeftLuma_c (pDst, kiStride);
} else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) {
MBPadTopRightLuma_c (pDst, kiStride, kiPicW);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) {
MBPadBottomLeftLuma_c (pDst, kiStride, kiPicH);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) {
MBPadBottomRightLuma_c (pDst, kiStride, kiPicW, kiPicH);
}
if (kiMbX == 0) {
MBPadLeftLuma_c (pDst, kiStride, kiMbY);
} else if (kiMbX == kiMBWidth - 1) {
MBPadRightLuma_c (pDst, kiStride, kiMbY, kiPicW);
}
if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadTopLuma_c (pDst, kiStride, kiMbX);
} else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadBottomLuma_c (pDst, kiStride, kiMbX, kiPicH);
}
}
void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH,
const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) {
if (kiMbX == 0 && kiMbY == 0) {
MBPadTopLeftChroma_c (pDst, kiStride);
} else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) {
MBPadTopRightChroma_c (pDst, kiStride, kiPicW);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) {
MBPadBottomLeftChroma_c (pDst, kiStride, kiPicH);
} else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) {
MBPadBottomRightChroma_c (pDst, kiStride, kiPicW, kiPicH);
}
if (kiMbX == 0) {
MBPadLeftChroma_c (pDst, kiStride, kiMbY);
} else if (kiMbX == kiMBWidth - 1) {
MBPadRightChroma_c (pDst, kiStride, kiMbY, kiPicW);
}
if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadTopChroma_c (pDst, kiStride, kiMbX);
} else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) {
MBPadBottomChroma_c (pDst, kiStride, kiMbX, kiPicH);
}
}
// rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009
static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
const int32_t kiPicH) {
+42
View File
@@ -4195,6 +4195,42 @@ void PixelAvg_mmi(uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32
kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
}
#endif//HAVE_MMI
#if defined(HAVE_LSX)
static inline void McCopy_lsx(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst,
int32_t iDstStride, int32_t iWidth, int32_t iHeight) {
if (iWidth == 16)
McCopyWidthEq16_lsx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 8)
McCopyWidthEq8_lsx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else if (iWidth == 4)
McCopyWidthEq4_lsx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
else
McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
void McChroma_lsx(const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst,
int32_t iDstStride, int16_t iMvX, int16_t iMvY,
int32_t iWidth, int32_t iHeight) {
static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = {
McChromaWidthEq4_lsx,
McChromaWidthEq8_lsx
};
const int32_t kiD8x = iMvX & 0x07;
const int32_t kiD8y = iMvY & 0x07;
if (kiD8x == 0 && kiD8y == 0) {
McCopy_lsx (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
return;
}
if (iWidth != 2) {
kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride,
g_kuiABCD[kiD8y][kiD8x], iHeight);
} else
McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY,
iWidth, iHeight);
}
#endif//HAVE_LSX
} // anon ns.
void WelsCommon::InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
@@ -4263,4 +4299,10 @@ void WelsCommon::InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
pMcFuncs->pMcLumaFunc = McLuma_mmi;
}
#endif//HAVE_MMI
#if defined(HAVE_LSX)
if (uiCpuFlag & WELS_CPU_LSX) {
pMcFuncs->pMcChromaFunc = McChroma_lsx;
}
#endif//HAVE_LSX
}
+5 -5
View File
@@ -62,11 +62,11 @@ CMemoryAlign::~CMemoryAlign() {
}
void* WelsMalloc (const uint32_t kuiSize, const char* kpTag, const uint32_t kiAlign) {
const int32_t kiSizeOfVoidPointer = sizeof (void**);
const int32_t kiSizeOfInt = sizeof (int32_t);
const int32_t kiAlignedBytes = kiAlign - 1;
const int32_t kiTrialRequestedSize = kuiSize + kiAlignedBytes + kiSizeOfVoidPointer + kiSizeOfInt;
const int32_t kiActualRequestedSize = kiTrialRequestedSize;
const uint32_t kiSizeOfVoidPointer = sizeof (void**);
const uint32_t kiSizeOfInt = sizeof (int32_t);
const uint32_t kiAlignedBytes = kiAlign - 1;
const uint32_t kiTrialRequestedSize = kuiSize + kiAlignedBytes + kiSizeOfVoidPointer + kiSizeOfInt;
const uint32_t kiActualRequestedSize = kiTrialRequestedSize;
const uint32_t kiPayloadSize = kuiSize;
uint8_t* pBuf = (uint8_t*) malloc (kiActualRequestedSize);
+35 -4
View File
@@ -66,18 +66,49 @@ COMMON_OBJS += $(COMMON_OBJSARM64)
endif
OBJS += $(COMMON_OBJSARM64)
COMMON_ASM_MIPS_SRCS=\
COMMON_ASM_MIPS_MMI_SRCS=\
$(COMMON_SRCDIR)/mips/copy_mb_mmi.c\
$(COMMON_SRCDIR)/mips/deblock_mmi.c\
$(COMMON_SRCDIR)/mips/expand_picture_mmi.c\
$(COMMON_SRCDIR)/mips/intra_pred_com_mmi.c\
$(COMMON_SRCDIR)/mips/satd_sad_mmi.c\
COMMON_OBJSMIPS += $(COMMON_ASM_MIPS_SRCS:.c=.$(OBJ))
COMMON_OBJSMIPS_MMI += $(COMMON_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
COMMON_ASM_MIPS_MSA_SRCS=\
$(COMMON_SRCDIR)/mips/copy_mb_msa.c\
$(COMMON_SRCDIR)/mips/deblock_msa.c\
COMMON_OBJSMIPS_MSA += $(COMMON_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
COMMON_OBJS += $(COMMON_OBJSMIPS)
ifeq ($(ENABLE_MMI), Yes)
COMMON_OBJS += $(COMMON_OBJSMIPS_MMI)
endif
ifeq ($(ENABLE_MSA), Yes)
COMMON_OBJS += $(COMMON_OBJSMIPS_MSA)
endif
endif
OBJS += $(COMMON_OBJSMIPS_MMI)
OBJS += $(COMMON_OBJSMIPS_MSA)
COMMON_ASM_LOONGARCH_LSX_SRCS=\
$(COMMON_SRCDIR)/loongarch/mc_chroma_lsx.c\
$(COMMON_SRCDIR)/loongarch/copy_mb_lsx.c\
COMMON_OBJSLOONGARCH_LSX += $(COMMON_ASM_LOONGARCH_LSX_SRCS:.c=.$(OBJ))
COMMON_ASM_LOONGARCH_LASX_SRCS=\
$(COMMON_SRCDIR)/loongarch/satd_sad_lasx.c\
COMMON_OBJSLOONGARCH_LASX += $(COMMON_ASM_LOONGARCH_LASX_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), loongarch)
ifeq ($(ENABLE_LSX), Yes)
COMMON_OBJS += $(COMMON_OBJSLOONGARCH_LSX)
endif
ifeq ($(ENABLE_LASX), Yes)
COMMON_OBJS += $(COMMON_OBJSLOONGARCH_LASX)
endif
endif
OBJS += $(COMMON_OBJSMIPS)
OBJS += $(COMMON_OBJS)
+10 -5
View File
@@ -485,15 +485,20 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non-
%endmacro
%macro WELS_EXTERN 1
%ifndef WELS_PRIVATE_EXTERN
%define WELS_PRIVATE_EXTERN
%endif
ALIGN 16, nop
%ifdef PREFIX
global _%1 WELS_PRIVATE_EXTERN
%ifdef WELS_PRIVATE_EXTERN
global _%1: WELS_PRIVATE_EXTERN
%else
global _%1
%endif
%define %1 _%1
%else
global %1 WELS_PRIVATE_EXTERN
%ifdef WELS_PRIVATE_EXTERN
global %1: WELS_PRIVATE_EXTERN
%else
global %1
%endif
%endif
%1:
%endmacro
+51
View File
@@ -210,3 +210,54 @@ WELS_EXTERN WelsEmms
emms ; empty mmx technology states
ret
;*****************************************************************************
; int32_t WelsCPUDetectAVX512()
;*****************************************************************************
WELS_EXTERN WelsCPUDetectAVX512
%ifdef X86_32
push ebx
%else
push rbx
%endif
mov eax, 1
mov ecx, 0
cpuid
and ecx, 0x08000000
cmp ecx, 0x08000000 ; check CPUID.1:ECX.OSXSAVE[bit 27]
jne avx512_not_supported
; check XMM/YMM/zmm/opmask state
mov ecx, 0
XGETBV ; result in EDX:EAX
and eax, 0x0D6
cmp eax, 0x0D6
jne avx512_not_supported
; check AVX512 flag CPUID.7:EBX.AVX512F[bit 16]
; EBX[bit 16]: AVX512F
; EBX[bit 28]: AVX512CD
; EBX[bit 17]: AVX512DQ
; EBX[bit 30]: AVX512BW
; EBX[bit 31]: AVX512VL
mov eax, 7
cpuid
and ebx, 0xC0030000
mov eax, ebx
%ifdef X86_32
pop ebx
%else
pop rbx
%endif
ret
avx512_not_supported:
mov eax, 0
%ifdef X86_32
pop ebx
%else
pop rbx
%endif
ret
+187 -49
View File
@@ -52,7 +52,6 @@
#include "measure_time.h"
#include "d3d9_utils.h"
using namespace std;
#if defined (WINDOWS_PHONE)
@@ -69,6 +68,153 @@ int g_iDecodedFrameNum = 0;
#endif
//using namespace WelsDec;
int32_t readBit (uint8_t* pBufPtr, int32_t& curBit) {
int nIndex = curBit / 8;
int nOffset = curBit % 8 + 1;
curBit++;
return (pBufPtr[nIndex] >> (8 - nOffset)) & 0x01;
}
int32_t readBits (uint8_t* pBufPtr, int32_t& n, int32_t& curBit) {
int r = 0;
int i;
for (i = 0; i < n; i++) {
r |= (readBit (pBufPtr, curBit) << (n - i - 1));
}
return r;
}
int32_t bsGetUe (uint8_t* pBufPtr, int32_t& curBit) {
int r = 0;
int i = 0;
while ((readBit (pBufPtr, curBit) == 0) && (i < 32)) {
i++;
}
r = readBits (pBufPtr, i, curBit);
r += (1 << i) - 1;
return r;
}
int32_t readFirstMbInSlice (uint8_t* pSliceNalPtr) {
int32_t curBit = 0;
int32_t firstMBInSlice = bsGetUe (pSliceNalPtr + 1, curBit);
return firstMBInSlice;
}
int32_t readPicture (uint8_t* pBuf, const int32_t& iFileSize, const int32_t& bufPos, uint8_t*& pSpsBuf,
int32_t& sps_byte_count) {
int32_t bytes_available = iFileSize - bufPos;
if (bytes_available < 4) {
return bytes_available;
}
uint8_t* ptr = pBuf + bufPos;
int32_t read_bytes = 0;
int32_t sps_count = 0;
int32_t pps_count = 0;
int32_t non_idr_pict_count = 0;
int32_t idr_pict_count = 0;
int32_t nal_deliminator = 0;
pSpsBuf = NULL;
sps_byte_count = 0;
while (read_bytes < bytes_available - 4) {
bool has4ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 1;
bool has3ByteStartCode = false;
if (!has4ByteStartCode) {
has3ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 1;
}
if (has4ByteStartCode || has3ByteStartCode) {
int32_t byteOffset = has4ByteStartCode ? 4 : 3;
uint8_t nal_unit_type = has4ByteStartCode ? (ptr[4] & 0x1F) : (ptr[3] & 0x1F);
if (nal_unit_type == 1) {
int32_t firstMBInSlice = readFirstMbInSlice (ptr + byteOffset);
if (++non_idr_pict_count >= 1 && idr_pict_count >= 1 && firstMBInSlice == 0) {
return read_bytes;
}
if (non_idr_pict_count >= 2 && firstMBInSlice == 0) {
return read_bytes;
}
} else if (nal_unit_type == 5) {
int32_t firstMBInSlice = readFirstMbInSlice (ptr + byteOffset);
if (++idr_pict_count >= 1 && non_idr_pict_count >= 1 && firstMBInSlice == 0) {
return read_bytes;
}
if (idr_pict_count >= 2 && firstMBInSlice == 0) {
return read_bytes;
}
} else if (nal_unit_type == 7) {
pSpsBuf = ptr + (has4ByteStartCode ? 4 : 3);
if ((++sps_count >= 1) && (non_idr_pict_count >= 1 || idr_pict_count >= 1)) {
return read_bytes;
}
if (sps_count == 2) {
return read_bytes;
}
} else if (nal_unit_type == 8) {
if (++pps_count == 1 && sps_count == 1) {
sps_byte_count = int32_t (ptr - pSpsBuf);
}
if (pps_count >= 1 && (non_idr_pict_count >= 1 || idr_pict_count >= 1)) {
return read_bytes;
}
} else if (nal_unit_type == 9) {
if (++nal_deliminator == 2) {
return read_bytes;
}
}
if (read_bytes >= bytes_available - 4) {
return bytes_available;
}
read_bytes += 4;
ptr += 4;
} else {
++ptr;
++read_bytes;
}
}
return bytes_available;
}
void FlushFrames (ISVCDecoder* pDecoder, int64_t& iTotal, FILE* pYuvFile, FILE* pOptionFile, int32_t& iFrameCount,
unsigned long long& uiTimeStamp, int32_t& iWidth, int32_t& iHeight, int32_t& iLastWidth, int32_t iLastHeight) {
uint8_t* pData[3] = { NULL };
uint8_t* pDst[3] = { NULL };
SBufferInfo sDstBufInfo;
int32_t num_of_frames_in_buffer = 0;
CUtils cOutputModule;
pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
int64_t iStart = WelsTime();
pData[0] = NULL;
pData[1] = NULL;
pData[2] = NULL;
memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
pDecoder->FlushFrame (pData, &sDstBufInfo);
if (sDstBufInfo.iBufferStatus == 1) {
pDst[0] = sDstBufInfo.pDst[0];
pDst[1] = sDstBufInfo.pDst[1];
pDst[2] = sDstBufInfo.pDst[2];
}
int64_t iEnd = WelsTime();
iTotal += iEnd - iStart;
if (sDstBufInfo.iBufferStatus == 1) {
cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
if (pOptionFile != NULL) {
if (iWidth != iLastWidth && iHeight != iLastHeight) {
fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
iLastWidth = iWidth;
iLastHeight = iHeight;
}
}
++iFrameCount;
}
}
}
void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName,
int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName,
int32_t iErrorConMethod,
@@ -95,14 +241,17 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
int32_t iBufPos = 0;
int32_t iFileSize;
int32_t i = 0;
int32_t iLastWidth = 0, iLastHeight = 0;
int32_t iFrameCount = 0;
int32_t iEndOfStreamFlag = 0;
int32_t num_of_frames_in_buffer = 0;
pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod);
CUtils cOutputModule;
double dElapsed = 0;
uint8_t uLastSpsBuf[32];
int32_t iLastSpsByteCount = 0;
int32_t iThreadCount = 1;
pDecoder->GetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
if (kpH264FileName) {
pH264File = fopen (kpH264FileName, "rb");
@@ -181,13 +330,32 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
goto label_exit;
iSliceSize = static_cast<int32_t> (pInfo[2]);
} else {
for (i = 0; i < iFileSize; i++) {
if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
&& i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
break;
if (iThreadCount >= 1) {
uint8_t* uSpsPtr = NULL;
int32_t iSpsByteCount = 0;
iSliceSize = readPicture (pBuf, iFileSize, iBufPos, uSpsPtr, iSpsByteCount);
if (iLastSpsByteCount > 0 && iSpsByteCount > 0) {
if (iSpsByteCount != iLastSpsByteCount || memcmp (uSpsPtr, uLastSpsBuf, iLastSpsByteCount) != 0) {
//whenever new sequence is different from preceding sequence. All pending frames must be flushed out before the new sequence can start to decode.
FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
iLastHeight);
}
}
if (iSpsByteCount > 0 && uSpsPtr != NULL) {
if (iSpsByteCount > 32) iSpsByteCount = 32;
iLastSpsByteCount = iSpsByteCount;
memcpy (uLastSpsBuf, uSpsPtr, iSpsByteCount);
}
} else {
int i = 0;
for (i = 0; i < iFileSize; i++) {
if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
&& i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
break;
}
}
iSliceSize = i;
}
iSliceSize = i;
}
if (iSliceSize < 4) { //too small size, no effective data, ignore
iBufPos += iSliceSize;
@@ -225,9 +393,9 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
}
if (sDstBufInfo.iBufferStatus == 1) {
pDst[0] = pData[0];
pDst[1] = pData[1];
pDst[2] = pData[2];
pDst[0] = sDstBufInfo.pDst[0];
pDst[1] = sDstBufInfo.pDst[1];
pDst[2] = sDstBufInfo.pDst[2];
}
iEnd = WelsTime();
iTotal += iEnd - iStart;
@@ -257,9 +425,9 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
pDecoder->DecodeFrame2 (NULL, 0, pData, &sDstBufInfo);
if (sDstBufInfo.iBufferStatus == 1) {
pDst[0] = pData[0];
pDst[1] = pData[1];
pDst[2] = pData[2];
pDst[0] = sDstBufInfo.pDst[0];
pDst[1] = sDstBufInfo.pDst[1];
pDst[2] = sDstBufInfo.pDst[2];
}
iEnd = WelsTime();
iTotal += iEnd - iStart;
@@ -283,41 +451,8 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons
iBufPos += iSliceSize;
++ iSliceIndex;
}
pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
iStart = WelsTime();
pData[0] = NULL;
pData[1] = NULL;
pData[2] = NULL;
memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
sDstBufInfo.iBufferStatus = 1;
pDecoder->FlushFrame (pData, &sDstBufInfo);
if (sDstBufInfo.iBufferStatus == 1) {
pDst[0] = pData[0];
pDst[1] = pData[1];
pDst[2] = pData[2];
}
iEnd = WelsTime();
iTotal += iEnd - iStart;
if (sDstBufInfo.iBufferStatus == 1) {
cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
if (pOptionFile != NULL) {
if (iWidth != iLastWidth && iHeight != iLastHeight) {
fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
iLastWidth = iWidth;
iLastHeight = iHeight;
}
}
++iFrameCount;
}
}
FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
iLastHeight);
dElapsed = iTotal / 1e6;
fprintf (stderr, "-------------------------------------------------------\n");
fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n",
@@ -489,6 +624,9 @@ int32_t main (int32_t iArgC, char* pArgV[]) {
pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
}
int32_t iThreadCount = 0;
pDecoder->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
if (pDecoder->Initialize (&sDecParam)) {
printf ("Decoder initialization failed.\n");
return 1;
+1
View File
@@ -126,6 +126,7 @@ WELS_ASM_FUNC_END
.align 3
//The table for SIMD instruction {(8,7,6,5,4,3,2,1) * 5}
CONST0_GET_I16X16_LUMA_PRED_PLANE: .long 0x191e2328, 0x050a0f14
+29 -1
View File
@@ -66,6 +66,26 @@ void DeblockingInit (PDeblockingFunc pDeblockingFunc, int32_t iCpu);
*/
void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb);
/*!
* \brief AVC slice init deblocking filtering target layer
*
* \in and out param SDeblockingFilter
* \in and out param iFilterIdc
*
* \return NONE
*/
void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc);
/*!
* \brief AVC MB deblocking filtering target layer
*
* \param DqLayer which has the current location of MB to be deblocked.
*
* \return NONE
*/
void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
PDeblockingFilterMbFunc pDeblockMb);
/*!
* \brief pixel deblocking filtering
*
@@ -77,13 +97,21 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun
* \return NONE
*/
uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
int32_t iNeighMb, int32_t iMbXy);
uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc);
void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag);
inline int8_t* GetPNzc (PDqLayer pCurDqLayer, int32_t iMbXy) {
if (pCurDqLayer->pDec != NULL && pCurDqLayer->pDec->pNzc != NULL) {
return pCurDqLayer->pDec->pNzc[iMbXy];
}
return pCurDqLayer->pNzc[iMbXy];
}
} // namespace WelsDec
#endif //WELS_DEBLOCKING_H__
+5 -4
View File
@@ -58,15 +58,16 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv
int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice
int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur);
int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx);
int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx);
int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput);
int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput);
int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer,
uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC);
int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer);
int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer);
void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx);
int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer);
int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer);
void WelsChromaDcIdct (int16_t* pBlock);
bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx);
+20 -1
View File
@@ -59,6 +59,21 @@ int32_t DecoderConfigParam (PWelsDecoderContext pCtx, const SDecodingParam* kpPa
*/
void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx);
/*
* fill last decoded picture info
*/
void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo);
/*!
* \brief fill data fields in SPS and PPS default for decoder context
*/
void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx);
/*!
* \brief copy SpsPps from one Ctx to another ctx for threaded code
*/
void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx);
/*!
*************************************************************************************
* \brief Initialize Wels decoder parameters and memory
@@ -157,7 +172,11 @@ void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx);
//update decoder statistics information
void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput);
//Destroy picutre buffer
void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa);
void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa);
//reset picture reodering buffer list
void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo,
const bool& bFullReset);
#ifdef __cplusplus
}
#endif//__cplusplus
+145 -43
View File
@@ -56,6 +56,7 @@
#include "expand_pic.h"
#include "mc.h"
#include "memory_align.h"
#include "wels_decoder_thread.h"
namespace WelsDec {
#define MAX_PRED_MODE_ID_I16x16 3
@@ -64,6 +65,7 @@ namespace WelsDec {
#define WELS_QP_MAX 51
#define LONG_TERM_REF
#define IMinInt32 -0x7FFFFFFF
typedef struct SWels_Cabac_Element {
uint8_t uiState;
uint8_t uiMPS;
@@ -172,7 +174,7 @@ typedef struct tagDeblockingFilter {
int8_t iChromaQP[2];
int8_t iLumaQP;
struct TagDeblockingFunc* pLoopf;
PPicture *pRefPics[LIST_A];
PPicture* pRefPics[LIST_A];
} SDeblockingFilter, *PDeblockingFilter;
typedef void (*PDeblockingFilterMbFunc) (PDqLayer pCurDqLayer, PDeblockingFilter filter, int32_t boundry_flag);
@@ -215,7 +217,7 @@ typedef struct TagBlockFunc {
} SBlockFunc;
typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int8_t* pIntraPredMode, PDqLayer pCurLayer);
int8_t* pIntraPredMode, PDqLayer pCurDqLayer);
typedef void (*PWelsMapNeighToSample) (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail);
typedef void (*PWelsMap16NeighToSample) (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail);
typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
@@ -229,6 +231,73 @@ enum {
OVERWRITE_SUBSETSPS = 1 << 2
};
//Decoder SPS and PPS global CTX
typedef struct tagWelsWelsDecoderSpsPpsCTX {
SPosOffset sFrameCrop;
SSps sSpsBuffer[MAX_SPS_COUNT + 1];
SPps sPpsBuffer[MAX_PPS_COUNT + 1];
SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1];
SNalUnit sPrefixNal;
PSps pActiveLayerSps[MAX_LAYER_NUM];
bool bAvcBasedFlag; // For decoding bitstream:
// for EC parameter sets
bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence?
bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence?
bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence?
int32_t iSpsErrorIgnored;
int32_t iSubSpsErrorIgnored;
int32_t iPpsErrorIgnored;
bool bSpsAvailFlags[MAX_SPS_COUNT];
bool bSubspsAvailFlags[MAX_SPS_COUNT];
bool bPpsAvailFlags[MAX_PPS_COUNT];
int32_t iPPSLastInvalidId;
int32_t iPPSInvalidNum;
int32_t iSPSLastInvalidId;
int32_t iSPSInvalidNum;
int32_t iSubSPSLastInvalidId;
int32_t iSubSPSInvalidNum;
int32_t iSeqId; //sequence id
int iOverwriteFlags;
} SWelsDecoderSpsPpsCTX, *PWelsDecoderSpsPpsCTX;
//Last Decoded Picture Info
typedef struct tagSWelsLastDecPicInfo {
// Save the last nal header info
SNalUnitHeaderExt sLastNalHdrExt;
SSliceHeader sLastSliceHeader;
int32_t iPrevPicOrderCntMsb;
int32_t iPrevPicOrderCntLsb;
PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
int32_t iPrevFrameNum;// frame number of previous frame well decoded for non-truncated mode yet
bool bLastHasMmco5;
uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
} SWelsLastDecPicInfo, *PWelsLastDecPicInfo;
typedef struct tagPictInfo {
SBufferInfo sBufferInfo;
int32_t iPOC;
int32_t iPicBuffIdx;
uint32_t uiDecodingTimeStamp;
bool bLastGOP;
} SPictInfo, *PPictInfo;
typedef struct tagPictReoderingStatus {
int32_t iPictInfoIndex;
int32_t iMinPOC;
int32_t iNumOfPicts;
int32_t iLastGOPRemainPicts;
int32_t iLastWrittenPOC;
int32_t iLargestBufferedPicIndex;
bool bHasBSlice;
} SPictReoderingStatus, *PPictReoderingStatus;
/*
* SWelsDecoderContext: to maintail all modules data over decoder@framework
*/
@@ -263,9 +332,6 @@ typedef struct TagWelsDecoderContext {
EWelsSliceType eSliceType; // Slice type
bool bUsedAsRef; //flag as ref
int32_t iFrameNum;
int32_t
iPrevFrameNum; // frame number of previous frame well decoded for non-truncated mode yet
bool bLastHasMmco5; //
int32_t iErrorCode; // error code return while decoding in case packets lost
SFmo sFmoList[MAX_PPS_COUNT]; // list for FMO storage
PFmo pFmo; // current fmo context after parsed slice_header
@@ -305,6 +371,7 @@ typedef struct TagWelsDecoderContext {
uint32_t iMbHeight;
} sMb;
// reconstruction picture
PPicture pDec; //pointer to current picture being reconstructed
@@ -313,65 +380,44 @@ typedef struct TagWelsDecoderContext {
// reference pictures
SRefPic sRefPic;
SVlcTable sVlcTable; // vlc table
SRefPic sTmpRefPic; //used to temporarily save RefPic for next active thread
SVlcTable* pVlcTable; // vlc table
SBitStringAux sBs;
int32_t iMaxBsBufferSizeInByte; //actual memory size for BS buffer
/* Global memory external */
SWelsDecoderSpsPpsCTX sSpsPpsCtx;
bool bHasNewSps;
SPosOffset sFrameCrop;
SSps sSpsBuffer[MAX_SPS_COUNT + 1];
SPps sPpsBuffer[MAX_PPS_COUNT + 1];
PSliceHeader pSliceHeader;
PPicBuff pPicBuff; // Initially allocated memory for pictures which are used in decoding.
int32_t iPicQueueNumber;
SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1];
SNalUnit sPrefixNal;
PAccessUnit pAccessUnitList; // current access unit list to be performed
PSps pActiveLayerSps[MAX_LAYER_NUM];
//PSps pActiveLayerSps[MAX_LAYER_NUM];
PSps pSps; // used by current AU
PPps pPps; // used by current AU
// Memory for pAccessUnitList is dynamically held till decoder destruction.
PDqLayer
pCurDqLayer; // current DQ layer representation, also carry reference base layer if applicable
PDqLayer pDqLayersList[LAYER_NUM_EXCHANGEABLE]; // DQ layers list with memory allocated
PNalUnit pNalCur; // point to current NAL Nnit
uint8_t uiNalRefIdc; // NalRefIdc for easy access;
int32_t iPicWidthReq; // picture width have requested the memory
int32_t iPicHeightReq; // picture height have requested the memory
uint8_t uiTargetDqId; // maximal DQ ID in current access unit, meaning target layer ID
bool bAvcBasedFlag; // For decoding bitstream:
//bool bAvcBasedFlag; // For decoding bitstream:
bool bEndOfStreamFlag; // Flag on end of stream requested by external application layer
bool bInstantDecFlag; // Flag for no-delay decoding
bool bInitialDqLayersMem; // dq layers related memory is available?
bool bOnlyOneLayerInCurAuFlag; //only one layer in current AU: 1
// for EC parameter sets
bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence?
bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence?
bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence?
int32_t iSpsErrorIgnored;
int32_t iSubSpsErrorIgnored;
int32_t iPpsErrorIgnored;
bool bSpsAvailFlags[MAX_SPS_COUNT];
bool bSubspsAvailFlags[MAX_SPS_COUNT];
bool bPpsAvailFlags[MAX_PPS_COUNT];
int32_t iPPSLastInvalidId;
int32_t iPPSInvalidNum;
int32_t iSPSLastInvalidId;
int32_t iSPSInvalidNum;
int32_t iSubSPSLastInvalidId;
int32_t iSubSPSInvalidNum;
bool bReferenceLostAtT0Flag;
int32_t iTotalNumMbRec; //record current number of decoded MB
#ifdef LONG_TERM_REF
@@ -385,7 +431,6 @@ typedef struct TagWelsDecoderContext {
#endif
bool bNewSeqBegin;
bool bNextNewSeqBegin;
int iOverwriteFlags;
//for Parse only
bool bFramePending;
@@ -397,7 +442,7 @@ typedef struct TagWelsDecoderContext {
SPpsBsInfo sPpsBsInfo [MAX_PPS_COUNT];
SParserBsInfo* pParserBsInfo;
PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
//PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment
PGetIntraPredFunc pGetI16x16LumaPredFunc[7]; //h264_predict_copy_16x16;
PGetIntraPredFunc pGetI4x4LumaPredFunc[14]; // h264_predict_4x4_t
PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t
@@ -437,18 +482,14 @@ typedef struct TagWelsDecoderContext {
//trace handle
void* pTraceHandle;
//Save the last nal header info
SNalUnitHeaderExt sLastNalHdrExt;
SSliceHeader sLastSliceHeader;
int32_t iPrevPicOrderCntMsb;
int32_t iPrevPicOrderCntLsb;
PWelsLastDecPicInfo pLastDecPicInfo;
SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT];
bool bCabacInited;
SWelsCabacCtx pCabacCtx[WELS_CONTEXT_COUNT];
PWelsCabacDecEngine pCabacDecEngine;
double dDecTime;
SDecoderStatistics sDecoderStatistics;// For real time debugging
SDecoderStatistics* pDecoderStatistics; // For real time debugging
int32_t iMbEcedNum;
int32_t iMbEcedPropNum;
int32_t iMbNum;
@@ -457,6 +498,7 @@ typedef struct TagWelsDecoderContext {
int32_t iECMVs[16][2];
PPicture pECRefPic[16];
unsigned long long uiTimeStamp;
uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
// To support scaling list HP
uint16_t pDequant_coeff_buffer4x4[6][52][16];
uint16_t pDequant_coeff_buffer8x8[6][52][64];
@@ -466,15 +508,75 @@ typedef struct TagWelsDecoderContext {
bool bDequantCoeff4x4Init;
bool bUseScalingList;
CMemoryAlign* pMemAlign;
void* pThreadCtx;
void* pLastThreadCtx;
WELS_MUTEX* pCsDecoder;
int16_t lastReadyHeightOffset[LIST_A][MAX_REF_PIC_COUNT]; //last ready reference MB offset
PPictInfo pPictInfoList;
PPictReoderingStatus pPictReoderingStatus;
} SWelsDecoderContext, *PWelsDecoderContext;
typedef struct tagSWelsDecThread {
SWelsDecSemphore* sIsBusy;
SWelsDecSemphore sIsActivated;
SWelsDecSemphore sIsIdle;
SWelsDecThread sThrHandle;
uint32_t uiCommand;
uint32_t uiThrNum;
uint32_t uiThrMaxNum;
uint32_t uiThrStackSize;
DECLARE_PROCTHREAD_PTR (pThrProcMain);
} SWelsDecThreadInfo, *PWelsDecThreadInfo;
typedef struct tagSWelsDecThreadCtx {
SWelsDecThreadInfo sThreadInfo;
PWelsDecoderContext pCtx;
void* threadCtxOwner;
uint8_t* kpSrc;
int32_t kiSrcLen;
uint8_t** ppDst;
SBufferInfo sDstInfo;
PPicture pDec;
SWelsDecEvent sImageReady;
SWelsDecEvent sSliceDecodeStart;
SWelsDecEvent sSliceDecodeFinish;
int32_t iPicBuffIdx; //picBuff Index
} SWelsDecoderThreadCTX, *PWelsDecoderThreadCTX;
static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) {
if (pCtx->iTotalNumMbRec == 0) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
pCtx->pActiveLayerSps[i] = NULL;
pCtx->sSpsPpsCtx.pActiveLayerSps[i] = NULL;
}
}
}
static inline int32_t GetThreadCount (PWelsDecoderContext pCtx) {
int32_t iThreadCount = 0;
if (pCtx->pThreadCtx != NULL) {
PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
iThreadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
}
return iThreadCount;
}
//GetPrevFrameNum only applies when thread count >= 2
static inline int32_t GetPrevFrameNum (PWelsDecoderContext pCtx) {
if (pCtx->uiDecodingTimeStamp > 0) {
PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
int32_t iThreadCount = int32_t (pThreadCtx->sThreadInfo.uiThrMaxNum);
int32_t uiThrNum = int32_t (pThreadCtx->sThreadInfo.uiThrNum);
for (int32_t i = 0; i < iThreadCount; ++i) {
int32_t id = i - uiThrNum;
if (id != 0 && pThreadCtx[id].pCtx->uiDecodingTimeStamp == pCtx->uiDecodingTimeStamp - 1) {
if (pThreadCtx[id].pCtx->pDec != NULL) {
int32_t iFrameNum = pThreadCtx[id].pCtx->pDec->iFrameNum;
if (iFrameNum >= 0) return iFrameNum;
}
return pThreadCtx[id].pCtx->iFrameNum;
}
}
}
return pCtx->pLastDecPicInfo->iPrevFrameNum;
}
//#ifdef __cplusplus
//}
//#endif//__cplusplus
+28
View File
@@ -129,6 +129,34 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
*/
bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kpDst, PNalUnit const kpSrc);
/*
* WelsDecodeInitAccessUnitStart
* check and (re)allocate picture buffers on new sequence begin
* bit_len: size in bit length of data
* buf_len: size in byte length of data
* coded_au: mark an Access Unit decoding finished
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo);
/*
* AllocPicBuffOnNewSeqBegin
* check and (re)allocate picture buffers on new sequence begin
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx);
/*
* InitConstructAccessUnit
* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
* joint a collective access unit.
* parameter\
* SBufferInfo: Buffer info
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo);
/*
* ConstructAccessUnit
+2 -1
View File
@@ -48,11 +48,12 @@
namespace WelsDec {
void WelsResetRefPic (PWelsDecoderContext pCtx);
void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx);
int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc);
int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc);
int32_t WelsReorderRefList (PWelsDecoderContext pCtx);
int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx);
int32_t WelsMarkAsRef (PWelsDecoderContext pCtx);
int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec = NULL);
} // namespace WelsDec
+16 -3
View File
@@ -44,6 +44,10 @@
#include "dec_frame.h"
#include "decoder_context.h"
#define RETURN_ERR_IF_NULL(pRefPic0) \
if ( pRefPic0 == NULL) \
return GENERATE_ERROR_NO(ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX)
namespace WelsDec {
/*!
@@ -91,7 +95,7 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
* \param
* \param output iMvp[]
*/
void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]);
void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]);
/*!
* \brief get the motion predictor and reference for B-slice direct mode version 2
@@ -149,7 +153,7 @@ void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[
* \param
* \param output motion vector cache and motion vector deviation cache
*/
void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A],
int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]);
@@ -158,7 +162,8 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
* \param
* \param output motion vector cache and motion vector deviation cache
*/
void FillTemporalDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount,
const int8_t& iPartW,
const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2],
int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]);
@@ -177,6 +182,14 @@ int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0,
*/
void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef);
inline uint32_t* GetMbType (PDqLayer& pCurDqLayer) {
if (pCurDqLayer->pDec != NULL) {
return pCurDqLayer->pDec->pMbType;
} else {
return pCurDqLayer->pMbType;
}
}
} // namespace WelsDec
#endif//WELS_MV_PRED_H__
+7 -7
View File
@@ -51,18 +51,18 @@ namespace WelsDec {
void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer);
void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer);
void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer);
void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurDqLayer);
void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer);
PDqLayer pCurDqLayer);
void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer);
PDqLayer pCurDqLayer);
void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
PDqLayer pCurLayer);
void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer);
PDqLayer pCurDqLayer);
void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer);
void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer);
/*!
* \brief check iPredMode for intra16x16 eligible or not
+3
View File
@@ -53,6 +53,9 @@ typedef struct TagPicBuff {
*/
PPicture PrefetchPic (PPicBuff pPicBuff); // To get current node applicable
PPicture PrefetchPicForThread (PPicBuff pPicBuff); // To get current node applicable in the case of threaded mode
PPicture PrefetchLastPicForThread (PPicBuff pPicBuff,
const int32_t& iLast); // To get last node applicable in the case of threaded mode
} // namespace WelsDec
+9 -2
View File
@@ -37,6 +37,7 @@
#include "typedefs.h"
#include "wels_common_defs.h"
#include "wels_const_common.h"
#include "wels_decoder_thread.h"
using namespace WelsCommon;
@@ -68,8 +69,7 @@ struct SPicture {
/*******************************sef_definition for misc use****************************/
bool bUsedAsRef; //for ref pic management
bool bIsLongRef; // long term reference frame flag //for ref pic management
uint8_t uiRefCount;
bool bAvailableFlag; // indicate whether it is available in this picture memory block.
int8_t iRefCount;
bool bIsComplete; // indicate whether current picture is complete, not from EC
/*******************************for future use****************************/
@@ -85,15 +85,22 @@ struct SPicture {
int32_t iSpsId; //against mosaic caused by cross-IDR interval reference.
int32_t iPpsId;
unsigned long long uiTimeStamp;
uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps
int32_t iPicBuffIdx;
EWelsSliceType eSliceType;
bool bIsUngroupedMultiSlice; //multi-slice picture with each each slice group contains one slice.
bool bNewSeqBegin;
int32_t iMbEcedNum;
int32_t iMbEcedPropNum;
int32_t iMbNum;
bool* pMbCorrectlyDecodedFlag;
int8_t (*pNzc)[24];
uint32_t* pMbType; // mb type used for direct mode
int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode
int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode
struct SPicture* pRefPic[LIST_A][17]; //ref pictures used for direct mode
SWelsDecEvent* pReadyEvent; //MB line ready event
};// "Picture" declaration is comflict with Mac system
+3 -2
View File
@@ -74,10 +74,11 @@ typedef struct TagMCRefMember {
int32_t iPicHeight;
} sMCRefMember;
void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx,
int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]);
void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer);
void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer);
int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
+2 -2
View File
@@ -275,7 +275,7 @@ static const SPartMbInfo g_ksInterBMbTypeInfo[] = {
{ MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, 4 } //B_8x8
};
//Table 7.17 Sub-macroblock types in B macroblocks.
//Table 7.17 Sub-macroblock types in B macroblocks.
static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = {
{SUB_MB_TYPE_8x8, 1, 2},
{SUB_MB_TYPE_8x4, 2, 2},
@@ -283,7 +283,7 @@ static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = {
{SUB_MB_TYPE_4x4, 4, 1},
};
//Table 7.18 Sub-macroblock types in B macroblocks.
//Table 7.18 Sub-macroblock types in B macroblocks.
static const SPartMbInfo g_ksInterBSubMbTypeInfo[] = {
{ MB_TYPE_DIRECT, 1, 2 }, //B_Direct_8x8
{ SUB_MB_TYPE_8x8 | MB_TYPE_P0L0, 1, 2 }, //B_L0_8x8
@@ -0,0 +1,170 @@
/*!
* \copy
* Copyright (c) 2009-2019, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file wels_decoder_thread.h
*
* \brief Interfaces introduced in thread programming
*
* \date 08/06/2018 Created
*
*************************************************************************************
*/
#ifndef _WELS_DECODER_THREAD_H_
#define _WELS_DECODER_THREAD_H_
#include "WelsThreadLib.h"
#ifdef __cplusplus
extern "C" {
#endif
#define WELS_DEC_MAX_NUM_CPU 16
#define WELS_DEC_MAX_THREAD_STACK_SIZE 4096
#define WELS_DEC_THREAD_COMMAND_RUN 0
#define WELS_DEC_THREAD_COMMAND_ABORT 1
#if defined(_WIN32) || defined(__CYGWIN__)
typedef struct tagWelsDecSemphore {
WELS_THREAD_HANDLE h;
} SWelsDecSemphore;
typedef struct tagWelsDecEvent {
WELS_THREAD_HANDLE h;
int isSignaled;
} SWelsDecEvent;
typedef struct tagWelsDecThread {
WELS_THREAD_HANDLE h;
} SWelsDecThread;
#define WelsDecThreadFunc(fn,a) DWORD WINAPI fn(LPVOID a)
#define WelsDecThreadFuncArg(a) LPWELS_THREAD_ROUTINE a
#define WELS_DEC_THREAD_WAIT_TIMEDOUT WAIT_TIMEOUT
#define WELS_DEC_THREAD_WAIT_SIGNALED WAIT_OBJECT_0
#define WELS_DEC_THREAD_WAIT_INFINITE INFINITE
#else // NON-WINDOWS
typedef pthread_mutexattr_t WELS_MUTEX_ATTR;
typedef struct tagWelsDecSemphore {
long max;
long v;
WELS_EVENT e;
WELS_MUTEX m;
} SWelsDecSemphore;
typedef struct tagWelsDecEvent {
int manualReset;
int isSignaled;
pthread_cond_t c;
WELS_MUTEX m;
} SWelsDecEvent;
typedef struct tagWelsDecThread {
WELS_THREAD_HANDLE h;
} SWelsDecThread;
#define WelsDecThreadFunc(fn,a) void* fn(void* a)
#define WelsDecThreadFuncArg(a) void* (*a)(void*)
#define WELS_DEC_THREAD_WAIT_TIMEDOUT ETIMEDOUT
#define WELS_DEC_THREAD_WAIT_SIGNALED EINTR
#define WELS_DEC_THREAD_WAIT_INFINITE -1
#endif//_WIN32
#define WelsDecThreadReturn WELS_THREAD_ROUTINE_RETURN(0);
int32_t GetCPUCount();
// Event
int EventCreate (SWelsDecEvent* e, int manualReset, int initialState);
void EventPost (SWelsDecEvent* e);
int EventWait (SWelsDecEvent* e, int32_t timeout);
void EventReset (SWelsDecEvent* e);
void EventDestroy (SWelsDecEvent* e);
// Semaphore
int SemCreate (SWelsDecSemphore* s, long value, long max);
int SemWait (SWelsDecSemphore* s, int32_t timeout);
void SemRelease (SWelsDecSemphore* s, long* prev_count);
void SemDestroy (SWelsDecSemphore* s);
// Thread
int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta);
int ThreadWait (SWelsDecThread* t);
#define DECLARE_PROCTHREAD(name, argument) \
WelsDecThreadFunc(name,argument)
#define DECLARE_PROCTHREAD_PTR(name) \
LPWELS_THREAD_ROUTINE name
#define CREATE_THREAD(ph, threadproc,argument) \
ThreadCreate(ph, threadproc, (void*)argument)
#define CREATE_EVENT(ph, manualreset,initial_state,name) \
EventCreate(ph,(int)(manualreset),(int)(initial_state))
#define CREATE_SEMAPHORE(ph, initial_count,max_count, name) \
SemCreate(ph, (long)initial_count,(long)(max_count))
#define CLOSE_EVENT(ph) \
EventDestroy(ph)
#define CLOSE_SEMAPHORE(ph) \
SemDestroy(ph)
#define SET_EVENT(ph) \
EventPost(ph)
#define RESET_EVENT(ph) \
EventReset(ph)
#define RELEASE_SEMAPHORE(ph) \
SemRelease(ph,NULL)
#define WAIT_EVENT(ph,timeout) \
EventWait(ph, (int32_t)timeout)
#define WAIT_THREAD(ph) \
ThreadWait(ph)
#define WAIT_SEMAPHORE(ph,timeout) \
SemWait(ph,(int32_t)timeout)
#ifdef __cplusplus
}
#endif
#endif
+63 -60
View File
@@ -148,48 +148,50 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
++ (*pConsumedBytes);
if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_SPS_NAL (pNalUnitHeader->eNalUnitType)
|| IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->bSpsExistAheadFlag)) {
if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iSpsErrorIgnored == 0) {
|| IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bSpsExistAheadFlag)) {
if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSpsErrorIgnored == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING,
"parse_nal(), no exist Sequence Parameter Sets ahead of sequence when try to decode NAL(type:%d).",
pNalUnitHeader->eNalUnitType);
} else {
pCtx->iSpsErrorIgnored++;
pCtx->sSpsPpsCtx.iSpsErrorIgnored++;
}
pCtx->sDecoderStatistics.iSpsNoExistNalNum++;
pCtx->pDecoderStatistics->iSpsNoExistNalNum++;
pCtx->iErrorCode = dsNoParamSets;
return NULL;
}
pCtx->iSpsErrorIgnored = 0;
pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0;
if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_PARAM_SETS_NALS (pNalUnitHeader->eNalUnitType)
|| IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->bPpsExistAheadFlag)) {
if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iPpsErrorIgnored == 0) {
|| IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) {
if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iPpsErrorIgnored == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING,
"parse_nal(), no exist Picture Parameter Sets ahead of sequence when try to decode NAL(type:%d).",
pNalUnitHeader->eNalUnitType);
} else {
pCtx->iPpsErrorIgnored++;
pCtx->sSpsPpsCtx.iPpsErrorIgnored++;
}
pCtx->sDecoderStatistics.iPpsNoExistNalNum++;
pCtx->pDecoderStatistics->iPpsNoExistNalNum++;
pCtx->iErrorCode = dsNoParamSets;
return NULL;
}
pCtx->iPpsErrorIgnored = 0;
if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->bSpsExistAheadFlag || pCtx->bPpsExistAheadFlag)) ||
(IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->bSpsExistAheadFlag || pCtx->bSubspsExistAheadFlag
|| pCtx->bPpsExistAheadFlag))) {
if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iSubSpsErrorIgnored == 0) {
pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0;
if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag
|| pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) ||
(IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag
|| pCtx->sSpsPpsCtx.bSubspsExistAheadFlag
|| pCtx->sSpsPpsCtx.bPpsExistAheadFlag))) {
if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSubSpsErrorIgnored == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING,
"ParseNalHeader(), no exist Parameter Sets ahead of sequence when try to decode slice(type:%d).",
pNalUnitHeader->eNalUnitType);
} else {
pCtx->iSubSpsErrorIgnored++;
pCtx->sSpsPpsCtx.iSubSpsErrorIgnored++;
}
pCtx->sDecoderStatistics.iSubSpsNoExistNalNum++;
pCtx->pDecoderStatistics->iSubSpsNoExistNalNum++;
pCtx->iErrorCode |= dsNoParamSets;
return NULL;
}
pCtx->iSubSpsErrorIgnored = 0;
pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0;
switch (pNalUnitHeader->eNalUnitType) {
case NAL_UNIT_AU_DELIMITER:
@@ -201,7 +203,7 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
break;
case NAL_UNIT_PREFIX:
pCurNal = &pCtx->sPrefixNal;
pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal;
pCurNal->uiTimeStamp = pCtx->uiTimeStamp;
if (iNalSize < NAL_UNIT_HEADER_EXT_SIZE) {
@@ -365,9 +367,9 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade
memcpy (pSavedData->pCurPos + iStartDeltaByte, pSrcNal, iActualLen);
pSavedData->pCurPos += iStartDeltaByte + iActualLen;
}
if (NAL_UNIT_PREFIX == pCtx->sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) {
if (pCtx->sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) {
PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sPrefixNal);
if (NAL_UNIT_PREFIX == pCtx->sSpsPpsCtx.sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) {
if (pCtx->sSpsPpsCtx.sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) {
PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sSpsPpsCtx.sPrefixNal);
}
}
@@ -496,8 +498,8 @@ bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal,
const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt;
const SSliceHeader* kpLastSliceHeader = &kpLastNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
const SSliceHeader* kpCurSliceHeader = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
if (pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
&& pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) {
if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
&& pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) {
return true; // the active sps changed, new sequence begins, so the current au is ready
}
@@ -548,8 +550,8 @@ bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal,
bool CheckNextAuNewSeq (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PSps kpSps) {
const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt;
if (pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
&& pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps)
if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL
&& pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps)
return true;
if (kpCurNalHeaderExt->bIdrFlag)
return true;
@@ -606,7 +608,7 @@ int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t
pCtx->iErrorCode |= dsBitstreamError;
return iErr;
}
pCtx->bHasNewSps = true;
break;
case NAL_UNIT_PPS:
@@ -620,17 +622,18 @@ int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t
return iErr;
}
}
iErr = ParsePps (pCtx, &pCtx->sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen);
iErr = ParsePps (pCtx, &pCtx->sSpsPpsCtx.sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen);
if (ERR_NONE != iErr) { // modified for pps invalid, 12/1/2009
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE)
pCtx->iErrorCode |= dsNoParamSets;
else
pCtx->iErrorCode |= dsBitstreamError;
pCtx->bHasNewSps = false;
return iErr;
}
pCtx->bPpsExistAheadFlag = true;
pCtx->sSpsPpsCtx.bPpsExistAheadFlag = true;
++ (pCtx->sSpsPpsCtx.iSeqId);
break;
case NAL_UNIT_SEI:
@@ -683,7 +686,7 @@ int32_t ParseRefBasePicMarking (PBitStringAux pBs, PRefBasePicMarking pRefBasePi
}
int32_t ParsePrefixNalUnit (PWelsDecoderContext pCtx, PBitStringAux pBs) {
PNalUnit pCurNal = &pCtx->sPrefixNal;
PNalUnit pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal;
uint32_t uiCode;
if (pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
@@ -834,12 +837,12 @@ const SLevelLimits* GetLevelLimits (int32_t iLevelIdx, bool bConstraint3) {
bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
if (pCtx->pActiveLayerSps[i] == pSps)
if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == pSps)
return true;
}
// Pre-active, will be used soon
if (bUseSubsetFlag) {
if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->bSubspsAvailFlags[pSps->iSpsId]) {
if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSubspsAvailFlags[pSps->iSpsId]) {
if (pCtx->iTotalNumMbRec > 0) {
return true;
}
@@ -857,7 +860,7 @@ bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) {
}
}
} else {
if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->bSpsAvailFlags[pSps->iSpsId]) {
if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSpsAvailFlags[pSps->iSpsId]) {
if (pCtx->iTotalNumMbRec > 0) {
return true;
}
@@ -1251,57 +1254,57 @@ int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicW
if (PRO_SCALABLE_BASELINE == uiProfileIdc || PRO_SCALABLE_HIGH == uiProfileIdc)
pCtx->bAvcBasedFlag = false;
pCtx->sSpsPpsCtx.bAvcBasedFlag = false;
*pPicWidth = pSps->iMbWidth << 4;
*pPicHeight = pSps->iMbHeight << 4;
PSps pTmpSps = NULL;
if (kbUseSubsetFlag) {
pTmpSps = &pCtx->sSubsetSpsBuffer[iSpsId].sSps;
pTmpSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId].sSps;
} else {
pTmpSps = &pCtx->sSpsBuffer[iSpsId];
pTmpSps = &pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId];
}
if (CheckSpsActive (pCtx, pTmpSps, kbUseSubsetFlag)) {
// we are overwriting the active sps, copy a temp buffer
if (kbUseSubsetFlag) {
if (memcmp (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) {
if (memcmp (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) {
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
memcpy (&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
pCtx->bAuReadyFlag = true;
pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1;
pCtx->iOverwriteFlags |= OVERWRITE_SUBSETSPS;
pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS;
} else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSubsetSps->sSps.iSpsId)) {
memcpy (&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
pCtx->iOverwriteFlags |= OVERWRITE_SUBSETSPS;
memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps));
pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS;
} else {
memcpy (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
}
}
} else {
if (memcmp (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) {
if (memcmp (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) {
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
memcpy (&pCtx->sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
pCtx->iOverwriteFlags |= OVERWRITE_SPS;
memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS;
pCtx->bAuReadyFlag = true;
pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1;
} else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSps->iSpsId)) {
memcpy (&pCtx->sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
pCtx->iOverwriteFlags |= OVERWRITE_SPS;
memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps));
pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS;
} else {
memcpy (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps));
memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps));
}
}
}
}
// Not overwrite active sps, just copy to final place
else if (kbUseSubsetFlag) {
memcpy (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
pCtx->bSubspsAvailFlags[iSpsId] = true;
pCtx->bSubspsExistAheadFlag = true;
memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps));
pCtx->sSpsPpsCtx.bSubspsAvailFlags[iSpsId] = true;
pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = true;
} else {
memcpy (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps));
pCtx->bSpsAvailFlags[iSpsId] = true;
pCtx->bSpsExistAheadFlag = true;
memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps));
pCtx->sSpsPpsCtx.bSpsAvailFlags[iSpsId] = true;
pCtx->sSpsPpsCtx.bSpsExistAheadFlag = true;
}
return ERR_NONE;
}
@@ -1421,8 +1424,8 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux,
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_scaling_matrix_present_flag
pPps->bPicScalingMatrixPresentFlag = !!uiCode;
if (pPps->bPicScalingMatrixPresentFlag) {
if (pCtx->bSpsAvailFlags[pPps->iSpsId]) {
WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag,
if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId]) {
WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag,
pPps->bPicScalingListPresentFlag, pPps->iScalingList4x4, pPps->iScalingList8x8));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
@@ -1440,16 +1443,16 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux,
if (pCtx->pPps != NULL && pCtx->pPps->iPpsId == pPps->iPpsId) {
if (memcmp (pCtx->pPps, pPps, sizeof (*pPps)) != 0) {
memcpy (&pCtx->sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps));
pCtx->iOverwriteFlags |= OVERWRITE_PPS;
memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps));
pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_PPS;
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
pCtx->bAuReadyFlag = true;
pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1;
}
}
} else {
memcpy (&pCtx->sPpsBuffer[uiPpsId], pPps, sizeof (SPps));
pCtx->bPpsAvailFlags[uiPpsId] = true;
memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[uiPpsId], pPps, sizeof (SPps));
pCtx->sSpsPpsCtx.bPpsAvailFlags[uiPpsId] = true;
}
if (pCtx->pParam->bParseOnly) {
if (kSrcNalLen >= SPS_PPS_BS_SIZE - 4) { //pps bs exceeds
+287 -308
View File
@@ -62,6 +62,30 @@ namespace WelsDec {
( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\
)
#define ON_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iMbBn, iIndex, iNeighIndex) \
(\
(( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbBn][iNeighIndex][0] ) >= 4 ) || \
( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbBn][iNeighIndex][1] ) >= 4 ))\
)
#define IN_MB_BS_MV_DIFF(iMV_A, iMV_B, iMbXy, iIndex, iNeighIndex) \
(\
(( WELS_ABS( iMV_A[iMbXy][iIndex][0] - iMV_B[iMbXy][iNeighIndex][0] ) >= 4 ) || \
( WELS_ABS( iMV_A[iMbXy][iIndex][1] - iMV_B[iMbXy][iNeighIndex][1] ) >= 4 )) \
)
//On MB Boundary strength
//Apply for B_SLICE
#define ON_MB_BS(ref_p0, ref_q0, ref_p1, ref_q1, mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) \
(\
(ref_p0 != ref_p1) ? \
((ref_p0 == ref_q0) ? \
(ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) : \
(ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) : \
((ON_MB_BS_MV_DIFF (mv0, mv0, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv1, iMbXy, iMbBn, iIndex, iNeighIndex)) && \
(ON_MB_BS_MV_DIFF (mv0, mv1, iMbXy, iMbBn, iIndex, iNeighIndex) || ON_MB_BS_MV_DIFF (mv1, mv0, iMbXy, iMbBn, iIndex, iNeighIndex))) \
)
#if defined(SAME_MB_DIFF_REFIDX)
#define SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex) \
(\
@@ -76,9 +100,40 @@ namespace WelsDec {
)
#endif
#if defined(SAME_MB_DIFF_REFIDX)
#define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \
(\
(((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \
((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \
((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \
((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \
(IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \
(IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \
((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \
(IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \
)
#else
#define IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex) \
(\
!!(((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_1][iNeigborIndex])) || \
((refs[LIST_0][iIndex] == refs[LIST_1][iNeigborIndex]) && (refs[LIST_1][iIndex] == refs[LIST_0][iNeigborIndex]))) ? \
((refs[LIST_0][iIndex] != refs[LIST_1][iIndex]) ? \
((refs[LIST_0][iIndex] == refs[LIST_0][iNeigborIndex]) ? \
(IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) : \
(IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex))) : \
((IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_0], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_1], iMbXy, iIndex, iNeigborIndex)) && \
(IN_MB_BS_MV_DIFF (mv[LIST_0], mv[LIST_1], iMbXy, iIndex, iNeigborIndex) || IN_MB_BS_MV_DIFF (mv[LIST_1], mv[LIST_0], iMbXy, iIndex, iNeigborIndex)))) : 1 \
)
#endif
#define BS_EDGE(bsx1, pRefPics, iMotionVector, iIndex, iNeighIndex) \
( (bsx1|SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
//Inside MB Boundary strength
//Apply for B_SLICE
#define IN_BS_EDGE(bsx1, refs, mv, iMbXy, iIndex, iNeigborIndex) \
( (bsx1|IN_SMB_EDGE_MV(refs, mv, iMbXy, iIndex, iNeigborIndex))<<((uint8_t)(!!bsx1)))
#define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \
{\
iIndex = (iQp + iAlphaOffset);\
@@ -201,11 +256,12 @@ void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4
nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
}
void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4],
int8_t* pNnzTab,
int32_t iMbXy) {
uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
int8_t* iRefIdx = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
void *iRefs[MB_BLOCK4x4_NUM];
int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy];
void* iRefs[MB_BLOCK4x4_NUM];
int i;
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
@@ -226,15 +282,15 @@ void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLa
pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
}
//vertical
nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pMv[LIST_0][iMbXy],
nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pMv[LIST_0][iMbXy],
nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
//horizontal
nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pMv[LIST_0][iMbXy],
nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pMv[LIST_0][iMbXy],
nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy],
g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
} else {
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
@@ -244,60 +300,61 @@ void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLa
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 1, 0);
nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 2, 1);
nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 3, 2);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 4);
nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 5);
nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 6);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 8);
nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 9);
nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 10);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 12);
nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 13);
nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 14);
// horizontal
* (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 4, 0);
nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 1);
nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 2);
nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 3);
* (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 8, 4);
nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 5);
nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 6);
nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 7);
* (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 12, 8);
nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 9);
nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 10);
nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 11);
}
}
void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer,
uint8_t nBS[2][4][4], int8_t* pNnzTab,
int32_t iMbXy) {
uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
void *iRefs[LIST_A][MB_BLOCK4x4_NUM];
void* iRefs[LIST_A][MB_BLOCK4x4_NUM];
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
int8_t i8x8NnzTab[4];
int l;
for (l = 0; l < LIST_A; l++) {
int8_t* iRefIdx = pCurDqLayer->pRefIndex[l][iMbXy];
int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[l][iMbXy];
int i;
/* Look up each reference picture based on indices */
for (i = 0; i < MB_BLOCK4x4_NUM; i++) {
@@ -317,51 +374,23 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter,
//vertical
int8_t iIndex = g_kuiMbCountScan4Idx[1 << 2];
int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0];
nBS[0][2][0] = nBS[0][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs[listIdx],
pCurDqLayer->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
}
nBS[0][2][0] = nBS[0][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
iIndex, iNeigborIndex);
iIndex = g_kuiMbCountScan4Idx[3 << 2];
iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2];
nBS[0][2][2] = nBS[0][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs[listIdx],
pCurDqLayer->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
}
nBS[0][2][2] = nBS[0][2][3] = IN_BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
iIndex, iNeigborIndex);
//horizontal
iIndex = g_kuiMbCountScan4Idx[2 << 2];
iNeigborIndex = g_kuiMbCountScan4Idx[0];
nBS[1][2][0] = nBS[1][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs[listIdx],
pCurDqLayer->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
}
nBS[1][2][0] = nBS[1][2][1] = IN_BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
iIndex, iNeigborIndex);
iIndex = g_kuiMbCountScan4Idx[3 << 2];
iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2];
nBS[1][2][2] = nBS[1][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) {
nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs[listIdx],
pCurDqLayer->pMv[listIdx][iMbXy],
iIndex, iNeigborIndex);
break;
}
}
nBS[1][2][2] = nBS[1][2][3] = IN_BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv, iMbXy,
iIndex, iNeigborIndex);
} else {
uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
@@ -370,195 +399,57 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter,
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
nBS[0][1][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][1] && iRefs[listIdx][0]) {
nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 1, 0);
break;
}
}
nBS[0][2][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][2] && iRefs[listIdx][1]) {
nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 2, 1);
break;
}
}
nBS[0][3][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][3] && iRefs[listIdx][2]) {
nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 3, 2);
break;
}
}
nBS[0][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 1, 0);
nBS[0][2][0] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 2, 1);
nBS[0][3][0] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 3, 2);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
nBS[0][1][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][5] && iRefs[listIdx][4]) {
nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 4);
break;
}
}
nBS[0][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][6] && iRefs[listIdx][5]) {
nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 5);
break;
}
}
nBS[0][3][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][7] && iRefs[listIdx][6]) {
nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 6);
break;
}
}
nBS[0][1][1] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 4);
nBS[0][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 5);
nBS[0][3][1] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 6);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
nBS[0][1][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][9] && iRefs[listIdx][8]) {
nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 8);
break;
}
}
nBS[0][2][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][10] && iRefs[listIdx][9]) {
nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 9);
break;
}
}
nBS[0][3][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][11] && iRefs[listIdx][10]) {
nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 10);
break;
}
}
nBS[0][1][2] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 8);
nBS[0][2][2] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 9);
nBS[0][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 10);
for (int i = 0; i < 3; i++)
uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
nBS[0][1][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][13] && iRefs[listIdx][12]) {
nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 12);
break;
}
}
nBS[0][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][14] && iRefs[listIdx][13]) {
nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 13);
break;
}
}
nBS[0][3][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][15] && iRefs[listIdx][14]) {
nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 14);
break;
}
}
nBS[0][1][3] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 12);
nBS[0][2][3] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 13);
nBS[0][3][3] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 14);
// horizontal
* (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
nBS[1][1][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][4] && iRefs[listIdx][0]) {
nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 4, 0);
break;
}
}
nBS[1][1][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][5] && iRefs[listIdx][1]) {
nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 1);
break;
}
}
nBS[1][1][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][6] && iRefs[listIdx][2]) {
nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 2);
break;
}
}
nBS[1][1][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][7] && iRefs[listIdx][3]) {
nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 3);
break;
}
}
nBS[1][1][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 4, 0);
nBS[1][1][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 5, 1);
nBS[1][1][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 6, 2);
nBS[1][1][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 7, 3);
* (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
nBS[1][2][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][8] && iRefs[listIdx][4]) {
nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 8, 4);
break;
}
}
nBS[1][2][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][9] && iRefs[listIdx][5]) {
nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 5);
break;
}
}
nBS[1][2][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][10] && iRefs[listIdx][6]) {
nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 6);
break;
}
}
nBS[1][2][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][11] && iRefs[listIdx][7]) {
nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 7);
break;
}
}
nBS[1][2][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 8, 4);
nBS[1][2][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 9, 5);
nBS[1][2][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 10, 6);
nBS[1][2][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 11, 7);
* (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
nBS[1][3][0] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][12] && iRefs[listIdx][8]) {
nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 12, 8);
break;
}
}
nBS[1][3][1] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][13] && iRefs[listIdx][9]) {
nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 9);
break;
}
}
nBS[1][3][2] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][14] && iRefs[listIdx][10]) {
nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 10);
break;
}
}
nBS[1][3][3] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (iRefs[listIdx][15] && iRefs[listIdx][11]) {
nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 11);
break;
}
}
nBS[1][3][0] = IN_BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 12, 8);
nBS[1][3][1] = IN_BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 13, 9);
nBS[1][3][2] = IN_BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 14, 10);
nBS[1][3][3] = IN_BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv, iMbXy, 15, 11);
for (int ii = 0; ii < 2; ii++)
for (int jj = 1; jj < 4; jj++)
for (int kk = 0; kk < 4; kk++)
if (nBS[ii][jj][kk] > 1)
nBS[ii][jj][kk] = nBS[ii][jj][kk];
}
}
uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
int32_t iNeighMb, int32_t iMbXy) {
int32_t i, j;
uint32_t uiBSx4;
uint8_t* pBS = (uint8_t*) (&uiBSx4);
@@ -566,21 +457,23 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCu
const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pRefIndex[LIST_0];
int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pRefIndex[LIST_0] :
pCurDqLayer->pRefIndex[LIST_0];
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
}
if (uiNzc) {
pBS[i << 1] = pBS[1 + (i << 1)] = 2;
} else {
PPicture ref0, ref1;
ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] : NULL;
pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb,
ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
NULL;
pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[LIST_0], iMbXy, iNeighMb,
*pB8x8Idx, *pBn8x8Idx);
}
pB8x8Idx += 4;
@@ -590,16 +483,17 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCu
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[j + (i << 1)] = 2;
} else {
PPicture ref0, ref1;
ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL;
ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx,
pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
(pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pB8x8Idx,
*pBnIdx);
}
pBnIdx++;
@@ -610,16 +504,18 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCu
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
pBS[j + (i << 1)] = 2;
} else {
PPicture ref0, ref1;
ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] : NULL;
pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] :
NULL;
pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1,
(pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pBIdx,
*pBn8x8Idx);
}
pBIdx++;
@@ -629,13 +525,14 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCu
} else {
// only 4x4 transform
for (i = 0; i < 4; i++) {
if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[i] = 2;
} else {
PPicture ref0, ref1;
ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL;
ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL;
pBS[i] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx, *pBnIdx);
pBS[i] = MB_BS_MV (ref0, ref1, (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]),
iMbXy, iNeighMb, *pBIdx, *pBnIdx);
}
pBIdx++;
pBnIdx++;
@@ -644,7 +541,8 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCu
return uiBSx4;
}
uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge,
int32_t iNeighMb, int32_t iMbXy) {
int32_t i, j;
uint32_t uiBSx4;
uint8_t* pBS = (uint8_t*) (&uiBSx4);
@@ -652,28 +550,33 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLay
const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
PPicture ref0, ref1;
PPicture ref_p0, ref_p1, ref_q0, ref_q1;
int8_t (*iRefIdx0)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_0];
int8_t (*iRefIdx1)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[LIST_1];
if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]);
}
if (uiNzc) {
pBS[i << 1] = pBS[1 + (i << 1)] = 2;
} else {
pBS[i << 1] = pBS[1 + (i << 1)] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
&& pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pRefIndex[listIdx];
ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pB8x8Idx]];
ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBn8x8Idx]];
pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb,
*pB8x8Idx, *pBn8x8Idx);
break;
}
ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL;
ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] :
NULL;
ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL;
ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] :
NULL;
if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
pCurDqLayer->pMv[LIST_0];
int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
pCurDqLayer->pMv[LIST_1];
pBS[i << 1] = pBS[1 + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx,
*pBn8x8Idx);
}
}
pB8x8Idx += 4;
@@ -683,23 +586,25 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLay
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[j + (i << 1)] = 2;
} else {
pBS[j + (i << 1)] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
&& pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pRefIndex[listIdx];
ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pB8x8Idx]];
ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBnIdx]];
pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx,
*pBnIdx);
break;
}
ref_p0 = iRefIdx0[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pB8x8Idx]] : NULL;
ref_q0 = iRefIdx0[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] :
NULL;
ref_p1 = iRefIdx1[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pB8x8Idx]] : NULL;
ref_q1 = iRefIdx1[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] :
NULL;
if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
pCurDqLayer->pMv[LIST_0];
int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
pCurDqLayer->pMv[LIST_1];
pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pB8x8Idx, *pBnIdx);
}
}
pBnIdx++;
@@ -710,22 +615,25 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLay
for (i = 0; i < 2; i++) {
uint8_t uiNzc = 0;
for (j = 0; uiNzc == 0 && j < 4; j++) {
uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)];
}
for (j = 0; j < 2; j++) {
if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) {
pBS[j + (i << 1)] = 2;
} else {
pBS[j + (i << 1)] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
&& pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pRefIndex[listIdx];
ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pBIdx]];
ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBn8x8Idx]];
pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx);
break;
}
ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL;
ref_q0 = iRefIdx0[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBn8x8Idx]] :
NULL;
ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL;
ref_q1 = iRefIdx1[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBn8x8Idx]] :
NULL;
if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
pCurDqLayer->pMv[LIST_0];
int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
pCurDqLayer->pMv[LIST_1];
pBS[j + (i << 1)] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx);
}
}
pBIdx++;
@@ -735,19 +643,22 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLay
} else {
// only 4x4 transform
for (i = 0; i < 4; i++) {
if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) {
pBS[i] = 2;
} else {
pBS[i] = 1;
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
&& pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pRefIndex[listIdx];
ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pBIdx]];
ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBnIdx]];
pBS[i] = MB_BS_MV (ref0, ref1, pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBnIdx);
break;
}
ref_p0 = iRefIdx0[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iMbXy][*pBIdx]] : NULL;
ref_q0 = iRefIdx0[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_0][iRefIdx0[iNeighMb][*pBnIdx]] :
NULL;
ref_p1 = iRefIdx1[iMbXy][*pBIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iMbXy][*pBIdx]] : NULL;
ref_q1 = iRefIdx1[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST ? pFilter->pRefPics[LIST_1][iRefIdx1[iNeighMb][*pBnIdx]] :
NULL;
if (((ref_p0 == ref_q0) && (ref_p1 == ref_q1)) || ((ref_p0 == ref_q1) && (ref_p1 == ref_q0))) {
int16_t (*pMv0)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] :
pCurDqLayer->pMv[LIST_0];
int16_t (*pMv1)[MB_BLOCK4x4_NUM][MV_A] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_1] :
pCurDqLayer->pMv[LIST_1];
pBS[i] = ON_MB_BS (ref_p0, ref_q0, ref_p1, ref_q1, pMv0, pMv1, iMbXy, iNeighMb, *pBIdx, *pBnIdx);
}
}
pBIdx++;
@@ -972,7 +883,7 @@ void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uin
static void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4],
int32_t iBoundryFlag) {
int32_t iBoundryFlag) {
int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
int32_t iMbX = pCurDqLayer->iMbX;
int32_t iMbY = pCurDqLayer->iMbY;
@@ -1224,7 +1135,8 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
uint8_t nBS[2][4][4] = {{{ 0 }}};
int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
uint32_t iCurMbType = pCurDqLayer->pMbType[iMbXyIndex];
uint32_t iCurMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbXyIndex] :
pCurDqLayer->pMbType[iMbXyIndex];
int32_t iMbNb;
PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
@@ -1242,11 +1154,13 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
if (iBoundryFlag & LEFT_FLAG_MASK) {
iMbNb = iMbXyIndex - 1;
uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
if (bBSlice) {
* (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase (
* (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 :
DeblockingBSliceBsMarginalMBAvcbase (
pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
} else {
* (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
* (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex);
}
} else {
@@ -1254,11 +1168,13 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
}
if (iBoundryFlag & TOP_FLAG_MASK) {
iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth;
uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb];
if (bBSlice) {
* (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase (
* (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 :
DeblockingBSliceBsMarginalMBAvcbase (
pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
} else {
* (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
* (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex);
}
} else {
@@ -1271,16 +1187,16 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t
} else {
if (IS_INTER_16x16 (iCurMbType)) {
if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
} else {
DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1);
}
} else {
if (bBSlice) {
DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
} else {
DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex);
}
}
}
@@ -1360,6 +1276,56 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun
} while (1);
}
}
/*!
* \brief AVC slice init deblocking filtering target layer
*
* \in and out param SDeblockingFilter
* \in and out param iFilterIdc
*
* \return NONE
*/
void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc) {
PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
memset (&pFilter, 0, sizeof (pFilter));
iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
/* Step1: parameters set */
pFilter.pCsData[0] = pCtx->pDec->pData[0];
pFilter.pCsData[1] = pCtx->pDec->pData[1];
pFilter.pCsData[2] = pCtx->pDec->pData[2];
pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
pFilter.eSliceType = (EWelsSliceType)pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
pFilter.pLoopf = &pCtx->sDeblockingFunc;
pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0];
pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1];
}
/*!
* \brief AVC MB deblocking filtering target layer
*
* \param DqLayer which has the current location of MB to be deblocked.
*
* \return NONE
*/
void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc,
PDeblockingFilterMbFunc pDeblockMb) {
/* macroblock deblocking */
if (0 == iFilterIdc || 2 == iFilterIdc) {
int32_t iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
}
}
/*!
* \brief deblocking module initialize
*
@@ -1438,6 +1404,19 @@ void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) {
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
}
#endif//HAVE_MMI
#if defined(HAVE_MSA)
if (iCpu & WELS_CPU_MSA) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
}
#endif//HAVE_MSA
}
} // namespace WelsDec
File diff suppressed because it is too large Load Diff
+151 -50
View File
@@ -52,6 +52,7 @@
#include "decode_slice.h"
#include "error_concealment.h"
#include "memory_align.h"
#include "wels_decoder_thread.h"
namespace WelsDec {
@@ -61,6 +62,7 @@ extern void FreePicture (PPicture pPic, CMemoryAlign* pMa);
static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, const int32_t kiSize,
const int32_t kiPicWidth, const int32_t kiPicHeight) {
PPicBuff pPicBuf = NULL;
int32_t iPicIdx = 0;
if (kiSize <= 0 || kiPicWidth <= 0 || kiPicHeight <= 0) {
@@ -79,7 +81,7 @@ static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, cons
if (NULL == pPicBuf->ppPic) {
pPicBuf->iCapacity = 0;
DestroyPicBuff (&pPicBuf, pMa);
DestroyPicBuff (pCtx, &pPicBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
@@ -88,7 +90,7 @@ static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, cons
if (NULL == pPic) {
// init capacity first for free memory
pPicBuf->iCapacity = iPicIdx;
DestroyPicBuff (&pPicBuf, pMa);
DestroyPicBuff (pCtx, &pPicBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
pPicBuf->ppPic[iPicIdx] = pPic;
@@ -122,7 +124,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
if (NULL == pPicNewBuf->ppPic) {
pPicNewBuf->iCapacity = 0;
DestroyPicBuff (&pPicNewBuf, pMa);
DestroyPicBuff (pCtx, &pPicNewBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
@@ -132,7 +134,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
if (NULL == pPic) {
// Set maximum capacity as the new malloc memory at the tail
pPicNewBuf->iCapacity = iPicIdx;
DestroyPicBuff (&pPicNewBuf, pMa);
DestroyPicBuff (pCtx, &pPicNewBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
pPicNewBuf->ppPic[iPicIdx] = pPic;
@@ -149,8 +151,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) {
pPicNewBuf->ppPic[i]->bUsedAsRef = false;
pPicNewBuf->ppPic[i]->bIsLongRef = false;
pPicNewBuf->ppPic[i]->uiRefCount = 0;
pPicNewBuf->ppPic[i]->bAvailableFlag = true;
pPicNewBuf->ppPic[i]->iRefCount = 0;
pPicNewBuf->ppPic[i]->bIsComplete = false;
}
// remove old PicBuf
@@ -186,13 +187,15 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
if (NULL == pPicNewBuf->ppPic) {
pPicNewBuf->iCapacity = 0;
DestroyPicBuff (&pPicNewBuf, pMa);
DestroyPicBuff (pCtx, &pPicNewBuf, pMa);
return ERR_INFO_OUT_OF_MEMORY;
}
ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false);
int32_t iPrevPicIdx = -1;
for (iPrevPicIdx = 0; iPrevPicIdx < kiOldSize; ++iPrevPicIdx) {
if (pCtx->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) {
if (pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) {
break;
}
}
@@ -210,22 +213,12 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
}
//update references due to allocation changes
//all references' references have to be reset oss-buzz 14423
for (int32_t i = 0; i < kiNewSize; i++) {
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
int32_t j = -1;
while (++j < MAX_DPB_COUNT && pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] != NULL) {
unsigned long long uiTimeStamp = pPicNewBuf->ppPic[i]->pRefPic[listIdx][j]->uiTimeStamp;
bool foundThePic = false;
for (int32_t k = 0; k < kiNewSize; k++) {
if (pPicNewBuf->ppPic[k]->uiTimeStamp == uiTimeStamp) {
pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] = pPicNewBuf->ppPic[k];
foundThePic = true;
break;
}
}
if (!foundThePic) {
pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] = NULL;
}
pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] = NULL;
}
}
}
@@ -246,8 +239,7 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) {
pPicNewBuf->ppPic[i]->bUsedAsRef = false;
pPicNewBuf->ppPic[i]->bIsLongRef = false;
pPicNewBuf->ppPic[i]->uiRefCount = 0;
pPicNewBuf->ppPic[i]->bAvailableFlag = true;
pPicNewBuf->ppPic[i]->iRefCount = 0;
pPicNewBuf->ppPic[i]->bIsComplete = false;
}
// remove old PicBuf
@@ -263,9 +255,11 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co
return ERR_NONE;
}
void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa) {
void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa) {
PPicBuff pPicBuf = NULL;
ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false);
if (NULL == ppPicBuf || NULL == *ppPicBuf)
return;
@@ -294,6 +288,26 @@ void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa) {
*ppPicBuf = NULL;
}
//reset picture reodering buffer list
void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo,
const bool& fullReset) {
if (pPictReoderingStatus != NULL && pPictInfo != NULL) {
int32_t pictInfoListCount = fullReset ? 16 : (pPictReoderingStatus->iLargestBufferedPicIndex + 1);
pPictReoderingStatus->iPictInfoIndex = 0;
pPictReoderingStatus->iMinPOC = IMinInt32;
pPictReoderingStatus->iNumOfPicts = 0;
pPictReoderingStatus->iLastGOPRemainPicts = 0;
pPictReoderingStatus->iLastWrittenPOC = IMinInt32;
pPictReoderingStatus->iLargestBufferedPicIndex = 0;
for (int32_t i = 0; i < pictInfoListCount; ++i) {
pPictInfo[i].bLastGOP = false;
pPictInfo[i].iPOC = IMinInt32;
}
pPictInfo->sBufferInfo.iBufferStatus = 0;
pPictReoderingStatus->bHasBSlice = false;
}
}
/*
* fill data fields in default for decoder context
*/
@@ -318,7 +332,7 @@ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) {
pCtx->bFreezeOutput = true;
pCtx->iFrameNum = -1;
pCtx->iPrevFrameNum = -1;
pCtx->pLastDecPicInfo->iPrevFrameNum = -1;
pCtx->iErrorCode = ERR_NONE;
pCtx->pDec = NULL;
@@ -331,31 +345,91 @@ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) {
pCtx->pPicBuff = NULL;
pCtx->bAvcBasedFlag = true;
pCtx->pPreviousDecodedPictureInDpb = NULL;
pCtx->sDecoderStatistics.iAvgLumaQp = -1;
pCtx->sDecoderStatistics.iStatisticsLogInterval = 1000;
//pCtx->sSpsPpsCtx.bAvcBasedFlag = true;
pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL;
pCtx->pDecoderStatistics->iAvgLumaQp = -1;
pCtx->pDecoderStatistics->iStatisticsLogInterval = 1000;
pCtx->bUseScalingList = false;
pCtx->iSpsErrorIgnored = 0;
pCtx->iSubSpsErrorIgnored = 0;
pCtx->iPpsErrorIgnored = 0;
pCtx->iPPSInvalidNum = 0;
pCtx->iPPSLastInvalidId = -1;
pCtx->iSPSInvalidNum = 0;
pCtx->iSPSLastInvalidId = -1;
pCtx->iSubSPSInvalidNum = 0;
pCtx->iSubSPSLastInvalidId = -1;
/*pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0;
pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0;
pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0;
pCtx->sSpsPpsCtx.iPPSInvalidNum = 0;
pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1;
pCtx->sSpsPpsCtx.iSPSInvalidNum = 0;
pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1;
pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0;
pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1;
*/
pCtx->iFeedbackNalRefIdc = -1; //initialize
pCtx->iPrevPicOrderCntMsb = 0;
pCtx->iPrevPicOrderCntLsb = 0;
pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0;
pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0;
}
/*
* fill data fields in SPS and PPS default for decoder context
*/
void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx) {
sSpsPpsCtx.bSpsExistAheadFlag = false;
sSpsPpsCtx.bSubspsExistAheadFlag = false;
sSpsPpsCtx.bPpsExistAheadFlag = false;
sSpsPpsCtx.bAvcBasedFlag = true;
sSpsPpsCtx.iSpsErrorIgnored = 0;
sSpsPpsCtx.iSubSpsErrorIgnored = 0;
sSpsPpsCtx.iPpsErrorIgnored = 0;
sSpsPpsCtx.iPPSInvalidNum = 0;
sSpsPpsCtx.iPPSLastInvalidId = -1;
sSpsPpsCtx.iSPSInvalidNum = 0;
sSpsPpsCtx.iSPSLastInvalidId = -1;
sSpsPpsCtx.iSubSPSInvalidNum = 0;
sSpsPpsCtx.iSubSPSLastInvalidId = -1;
sSpsPpsCtx.iSeqId = -1;
}
/*
* fill last decoded picture info
*/
void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo) {
sLastDecPicInfo.iPrevPicOrderCntMsb = 0;
sLastDecPicInfo.iPrevPicOrderCntLsb = 0;
sLastDecPicInfo.pPreviousDecodedPictureInDpb = NULL;
sLastDecPicInfo.iPrevFrameNum = -1;
sLastDecPicInfo.bLastHasMmco5 = false;
sLastDecPicInfo.uiDecodingTimeStamp = 0;
}
/*!
* \brief copy SpsPps from one Ctx to another ctx for threaded code
*/
void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx) {
pToCtx->sSpsPpsCtx = pFromCtx->sSpsPpsCtx;
PAccessUnit pFromCurAu = pFromCtx->pAccessUnitList;
PSps pTmpLayerSps[MAX_LAYER_NUM];
for (int i = 0; i < MAX_LAYER_NUM; i++) {
pTmpLayerSps[i] = NULL;
}
// track the layer sps for the current au
for (unsigned int i = pFromCurAu->uiStartPos; i <= pFromCurAu->uiEndPos; i++) {
uint32_t uiDid = pFromCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId;
pTmpLayerSps[uiDid] = pFromCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
for (unsigned int j = 0; j < MAX_SPS_COUNT + 1; ++j) {
if (&pFromCtx->sSpsPpsCtx.sSpsBuffer[j] == pTmpLayerSps[uiDid]) {
pTmpLayerSps[uiDid] = &pToCtx->sSpsPpsCtx.sSpsBuffer[j];
break;
}
}
}
for (int i = 0; i < MAX_LAYER_NUM; i++) {
if (pTmpLayerSps[i] != NULL) {
pToCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i];
}
}
}
/*
* destory_mb_blocks
*/
/*
* get size of reference picture list in target layer incoming, = (iNumRefFrames
*/
@@ -366,6 +440,10 @@ static inline int32_t GetTargetRefListSize (PWelsDecoderContext pCtx) {
iNumRefFrames = MAX_REF_PIC_COUNT + 2;
} else {
iNumRefFrames = pCtx->pSps->iNumRefFrames + 2;
int32_t iThreadCount = GetThreadCount (pCtx);
if (iThreadCount > 1) {
iNumRefFrames = MAX_REF_PIC_COUNT;
}
}
#ifdef LONG_TERM_REF
@@ -407,7 +485,9 @@ int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const
&& kiPicHeight == pCtx->iImgHeightInPixel) && (!bNeedChangePicQueue)) // have same scaled buffer
// sync update pRefList
WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
if (GetThreadCount (pCtx) <= 1) {
WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
}
if (pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel && kiPicHeight == pCtx->iImgHeightInPixel)
&& pCtx->pPicBuff != NULL && pCtx->pPicBuff->iCapacity != iPicQueueSize) {
@@ -435,11 +515,11 @@ int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const
// for Recycled_Pic_Queue
PPicBuff* ppPic = &pCtx->pPicBuff;
if (NULL != ppPic && NULL != *ppPic) {
DestroyPicBuff (ppPic, pMa);
DestroyPicBuff (pCtx, ppPic, pMa);
}
pCtx->pPreviousDecodedPictureInDpb = NULL;
pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL;
// currently only active for LIST_0 due to have no B frames
iErr = CreatePicBuff (pCtx, &pCtx->pPicBuff, iPicQueueSize, kiPicWidth, kiPicHeight);
@@ -481,7 +561,18 @@ void WelsFreeDynamicMemory (PWelsDecoderContext pCtx) {
PPicBuff* pPicBuff = &pCtx->pPicBuff;
if (NULL != pPicBuff && NULL != *pPicBuff) {
DestroyPicBuff (pPicBuff, pMa);
DestroyPicBuff (pCtx, pPicBuff, pMa);
}
if (GetThreadCount (pCtx) > 1) {
//prevent from double destruction of PPicBuff
PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pThreadCtx);
int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
for (int32_t i = 0; i < threadCount; ++i) {
if (pThreadCtx[i - id].pCtx != NULL) {
pThreadCtx[i - id].pCtx->pPicBuff = NULL;
}
}
}
if (pCtx->pTempDec) {
@@ -510,7 +601,7 @@ int32_t WelsOpenDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx) {
InitDecFuncs (pCtx, pCtx->uiCpuFlag);
// vlc tables
InitVlcTable (&pCtx->sVlcTable);
InitVlcTable (pCtx->pVlcTable);
// static memory
iRet = WelsInitStaticMemory (pCtx);
@@ -725,7 +816,11 @@ int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const in
}
CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) {
ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
if (GetThreadCount (pCtx) <= 1) {
ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
} else {
pCtx->pAccessUnitList->uiAvailUnitsNum = 1;
}
}
}
DecodeFinishUpdate (pCtx);
@@ -781,9 +876,15 @@ int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const in
if (IS_PARAM_SETS_NALS (pCtx->sCurNalHead.eNalUnitType)) {
iRet = ParseNonVclNal (pCtx, pNalPayload, iDstIdx - iConsumedBytes, pSrcNal - 3, iSrcIdx + 3);
}
CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
if (GetThreadCount (pCtx) <= 1) {
CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
}
if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) {
ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
if (GetThreadCount (pCtx) <= 1) {
ConstructAccessUnit (pCtx, ppDst, pDstBufInfo);
} else {
pCtx->pAccessUnitList->uiAvailUnitsNum = 1;
}
}
}
DecodeFinishUpdate (pCtx);
@@ -1098,7 +1199,7 @@ void UpdateDecStatFreezingInfo (const bool kbIdrFlag, SDecoderStatistics* pDecSt
void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) {
PDqLayer pCurDq = pCtx->pCurDqLayer;
PPicture pPic = pCtx->pDec;
SDecoderStatistics* pDecStat = &pCtx->sDecoderStatistics;
SDecoderStatistics* pDecStat = pCtx->pDecoderStatistics;
if (pDecStat->iAvgLumaQp == -1) //first correct frame received
pDecStat->iAvgLumaQp = 0;
@@ -1140,7 +1241,7 @@ void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) {
//update decoder statistics information
void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput) {
if (pCtx->bFreezeOutput)
UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, &pCtx->sDecoderStatistics);
UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, pCtx->pDecoderStatistics);
else if (kbOutput)
UpdateDecStatNoFreezingInfo (pCtx);
}
+394 -184
View File
@@ -77,11 +77,11 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
|| (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) {
pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
if ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth)
|| (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight)) {
pCtx->pDecoderStatistics->uiResolutionChangeTimes++;
pCtx->pDecoderStatistics->uiWidth = kiActualWidth;
pCtx->pDecoderStatistics->uiHeight = kiActualHeight;
}
UpdateDecStatNoFreezingInfo (pCtx);
}
@@ -194,8 +194,9 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
"DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
bFrameCompleteFlag = false; //return later after output buffer is done
if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
if (pCtx->bInstantDecFlag) { //no-delay decoding, wait for new slice
return ERR_INFO_MB_NUM_INADEQUATE;
}
} else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
&& (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
pCtx->pDec->bIsComplete = true;
@@ -219,10 +220,30 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2;
ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
for (int i = 0; i < 3; ++i) {
pDstInfo->pDst[i] = ppDst[i];
}
pDstInfo->iBufferStatus = 1;
bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
|| (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
if (GetThreadCount (pCtx) > 1 && pPic->bIsComplete == false) {
pPic->bIsComplete = true;
}
if (GetThreadCount (pCtx) > 1) {
uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
for (uint32_t i = 0; i < uiMbHeight; ++i) {
SET_EVENT (&pCtx->pDec->pReadyEvent[i]);
}
}
bool bOutResChange = false;
if (GetThreadCount (pCtx) <= 1 || pCtx->pLastThreadCtx == NULL) {
bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
|| (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
} else {
if (pCtx->pLastThreadCtx != NULL) {
PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx);
bOutResChange = (pLastThreadCtx->pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
|| (pLastThreadCtx->pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
}
}
pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
@@ -250,11 +271,11 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t
pCtx->iMbNum = pPic->iMbNum;
pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum;
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
|| (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) {
pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
if (pDstInfo->iBufferStatus && ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth)
|| (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight))) {
pCtx->pDecoderStatistics->uiResolutionChangeTimes++;
pCtx->pDecoderStatistics->uiWidth = kiActualWidth;
pCtx->pDecoderStatistics->uiHeight = kiActualHeight;
}
UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0);
}
@@ -384,7 +405,7 @@ void CreateImplicitWeightTable (PWelsDecoderContext pCtx) {
//fix Bugzilla 1485229 check if pointers are NULL
if (pCtx->sRefPic.pRefList[LIST_0][0] && pCtx->sRefPic.pRefList[LIST_1][0]) {
if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1
&& pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) {
&& int64_t(pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc) + int64_t(pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc) == 2 * int64_t(iPoc)) {
pCurDqLayer->bUseWeightedBiPredIdc = false;
return;
}
@@ -522,13 +543,18 @@ int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSli
WELS_VERIFY_RETURN_IF (-1, bMmco4Exist);
bMmco4Exist = true;
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_long_term_frame_idx_plus1
kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = -1 + uiCode;
int32_t iMaxLongTermFrameIdx = -1 + uiCode;
if (iMaxLongTermFrameIdx > int32_t (pSps->uiLog2MaxFrameNum)) {
//ISO/IEC 14496-10:2009(E) 7.4.3.3 Decoded reference picture marking semantics page 96
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_MARKING);
}
kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = iMaxLongTermFrameIdx;
} else if (kuiMmco == MMCO_RESET) {
WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist));
bMmco5Exist = true;
pCtx->iPrevPicOrderCntLsb = 0;
pCtx->iPrevPicOrderCntMsb = 0;
pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0;
pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0;
pSh->iPicOrderCntLsb = 0;
if (pCtx->pSliceHeader)
pCtx->pSliceHeader->iPicOrderCntLsb = 0;
@@ -846,8 +872,9 @@ void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatis
* Parse slice header of bitstream in avc for storing data structure
*/
int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
PNalUnit const kpCurNal = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
1];
PNalUnit const kpCurNal =
pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
1];
PNalUnitHeaderExt pNalHeaderExt = NULL;
PSliceHeader pSliceHead = NULL;
@@ -924,22 +951,22 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
iPpsId = uiCode;
//add check PPS available here
if (pCtx->bPpsAvailFlags[iPpsId] == false) {
pCtx->sDecoderStatistics.iPpsReportErrorNum++;
if (pCtx->iPPSLastInvalidId != iPpsId) {
if (pCtx->sSpsPpsCtx.bPpsAvailFlags[iPpsId] == false) {
pCtx->pDecoderStatistics->iPpsReportErrorNum++;
if (pCtx->sSpsPpsCtx.iPPSLastInvalidId != iPpsId) {
WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId,
pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum);
pCtx->iPPSLastInvalidId = iPpsId;
pCtx->iPPSInvalidNum = 0;
pCtx->sSpsPpsCtx.iPPSLastInvalidId, pCtx->sSpsPpsCtx.iPPSInvalidNum);
pCtx->sSpsPpsCtx.iPPSLastInvalidId = iPpsId;
pCtx->sSpsPpsCtx.iPPSInvalidNum = 0;
} else {
pCtx->iPPSInvalidNum++;
pCtx->sSpsPpsCtx.iPPSInvalidNum++;
}
pCtx->iErrorCode |= dsNoParamSets;
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID);
}
pCtx->iPPSLastInvalidId = -1;
pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1;
pPps = &pCtx->sPpsBuffer[iPpsId];
pPps = &pCtx->sSpsPpsCtx.sPpsBuffer[iPpsId];
if (pPps->uiNumSliceGroups == 0) {
WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced");
@@ -948,38 +975,38 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
}
if (kbExtensionFlag) {
pSubsetSps = &pCtx->sSubsetSpsBuffer[pPps->iSpsId];
pSubsetSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pPps->iSpsId];
pSps = &pSubsetSps->sSps;
if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) {
pCtx->sDecoderStatistics.iSubSpsReportErrorNum++;
if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) {
if (pCtx->sSpsPpsCtx.bSubspsAvailFlags[pPps->iSpsId] == false) {
pCtx->pDecoderStatistics->iSubSpsReportErrorNum++;
if (pCtx->sSpsPpsCtx.iSubSPSLastInvalidId != pPps->iSpsId) {
WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum);
pCtx->iSubSPSLastInvalidId = pPps->iSpsId;
pCtx->iSubSPSInvalidNum = 0;
pCtx->sSpsPpsCtx.iSubSPSLastInvalidId, pCtx->sSpsPpsCtx.iSubSPSInvalidNum);
pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = pPps->iSpsId;
pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0;
} else {
pCtx->iSubSPSInvalidNum++;
pCtx->sSpsPpsCtx.iSubSPSInvalidNum++;
}
pCtx->iErrorCode |= dsNoParamSets;
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
}
pCtx->iSubSPSLastInvalidId = -1;
pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1;
} else {
if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) {
pCtx->sDecoderStatistics.iSpsReportErrorNum++;
if (pCtx->iSPSLastInvalidId != pPps->iSpsId) {
if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId] == false) {
pCtx->pDecoderStatistics->iSpsReportErrorNum++;
if (pCtx->sSpsPpsCtx.iSPSLastInvalidId != pPps->iSpsId) {
WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum);
pCtx->iSPSLastInvalidId = pPps->iSpsId;
pCtx->iSPSInvalidNum = 0;
pCtx->sSpsPpsCtx.iSPSLastInvalidId, pCtx->sSpsPpsCtx.iSPSInvalidNum);
pCtx->sSpsPpsCtx.iSPSLastInvalidId = pPps->iSpsId;
pCtx->sSpsPpsCtx.iSPSInvalidNum = 0;
} else {
pCtx->iSPSInvalidNum++;
pCtx->sSpsPpsCtx.iSPSInvalidNum++;
}
pCtx->iErrorCode |= dsNoParamSets;
return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
}
pCtx->iSPSLastInvalidId = -1;
pSps = &pCtx->sSpsBuffer[pPps->iSpsId];
pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1;
pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId];
}
pSliceHead->iPpsId = iPpsId;
pSliceHead->iSpsId = pPps->iSpsId;
@@ -1049,16 +1076,18 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
//Calculate poc if necessary
int32_t pocLsb = pSliceHead->iPicOrderCntLsb;
if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) {
pCtx->iPrevPicOrderCntMsb = 0;
pCtx->iPrevPicOrderCntLsb = 0;
pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0;
pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0;
}
int32_t pocMsb;
if (pocLsb < pCtx->iPrevPicOrderCntLsb && pCtx->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
pocMsb = pCtx->iPrevPicOrderCntMsb + iMaxPocLsb;
else if (pocLsb > pCtx->iPrevPicOrderCntLsb && pocLsb - pCtx->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
pocMsb = pCtx->iPrevPicOrderCntMsb - iMaxPocLsb;
if (pocLsb < pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb
&& pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb + iMaxPocLsb;
else if (pocLsb > pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb
&& pocLsb - pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb - iMaxPocLsb;
else
pocMsb = pCtx->iPrevPicOrderCntMsb;
pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb;
pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb;
if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
@@ -1066,8 +1095,8 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co
}
if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
pCtx->iPrevPicOrderCntLsb = pocLsb;
pCtx->iPrevPicOrderCntMsb = pocMsb;
pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = pocLsb;
pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = pocMsb;
}
//End of Calculating poc
} else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) {
@@ -1376,7 +1405,7 @@ bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst
pNalHdrExtS = &kpSrc->sNalHeaderExt;
pShExtD = &kppDst->sNalData.sVclNal.sSliceHeaderExt;
pPrefixS = &kpSrc->sNalData.sPrefixNal;
pSps = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
pNalHdrExtD->uiDependencyId = pNalHdrExtS->uiDependencyId;
pNalHdrExtD->uiQualityId = pNalHdrExtS->uiQualityId;
@@ -1438,7 +1467,7 @@ int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
if (uiActualIdx ==
pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009
pCtx->sDecoderStatistics.uiIDRLostNum++;
pCtx->pDecoderStatistics->uiIDRLostNum++;
if (!pCtx->bParamSetsLostFlag)
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
"UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU.");
@@ -1460,7 +1489,6 @@ int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
int32_t i = 0;
WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
pCtx->sMb.iMbWidth = (kiMaxWidth + 15) >> 4;
pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
@@ -1506,7 +1534,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
sizeof (
bool),
"pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
bool),
"pCtx->sMb.pTransformSize8x8Flag[]");
pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t) * 2,
@@ -1517,9 +1546,11 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
"pCtx->sMb.pCbfDc[]");
pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t) * 24,
"pCtx->sMb.pNzc[]");
pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t) * 24,
"pCtx->sMb.pNzcRs[]");
pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
pCtx->sMb.iMbHeight *
@@ -1537,20 +1568,24 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
"pCtx->sMb.pChromaPredMode[]");
pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pCbp[]");
pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
pCtx->sMb.iMbHeight *
sizeof (
uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
"pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t),
"pCtx->sMb.pResidualPredFlag[]");
pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
sizeof (
int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
bool),
"pCtx->sMb.pMbCorrectlyDecodedFlag[]");
pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
bool),
"pCtx->pMbRefConcealedFlag[]");
// check memory block valid due above allocated..
@@ -1597,6 +1632,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid
return ERR_NONE;
}
void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
int32_t i = 0;
CMemoryAlign* pMa = pCtx->pMemAlign;
@@ -1825,9 +1862,9 @@ void ForceClearCurrentNal (PAccessUnit pAu) {
}
void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) {
pCtx->bSpsExistAheadFlag = false;
pCtx->bSubspsExistAheadFlag = false;
pCtx->bPpsExistAheadFlag = false;
pCtx->sSpsPpsCtx.bSpsExistAheadFlag = false;
pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = false;
pCtx->sSpsPpsCtx.bPpsExistAheadFlag = false;
// Force clear the AU list
pCtx->pAccessUnitList->uiAvailUnitsNum = 0;
@@ -2101,14 +2138,14 @@ int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) {
return iRet;
pCtx->pAccessUnitList->uiStartPos = 0;
if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
pCtx->iErrorCode |= dsBitstreamError;
return dsBitstreamError;
}
//check current AU has only one layer or not
//If YES, can use deblocking based on AVC
if (!pCtx->bAvcBasedFlag) {
if (!pCtx->sSpsPpsCtx.bAvcBasedFlag) {
CheckOnlyOneLayerInAu (pCtx);
}
@@ -2119,8 +2156,8 @@ void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) {
//save previous header info
PAccessUnit pCurAu = pCtx->pAccessUnitList;
PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos];
memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
memcpy (&pCtx->sLastSliceHeader,
memcpy (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
memcpy (&pCtx->pLastDecPicInfo->sLastSliceHeader,
&pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader));
// uninitialize context of current access unit and rbsp buffer clean
ResetCurrentAccessUnit (pCtx);
@@ -2147,7 +2184,7 @@ static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
}
int iMaxActiveLayer = 0, iMaxCurrentLayer = 0;
for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
if (pCtx->pActiveLayerSps[i] != NULL) {
if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] != NULL) {
iMaxActiveLayer = i;
break;
}
@@ -2159,37 +2196,39 @@ static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
}
}
if ((iMaxCurrentLayer != iMaxActiveLayer)
|| (pTmpLayerSps[iMaxCurrentLayer] != pCtx->pActiveLayerSps[iMaxActiveLayer])) {
|| (pTmpLayerSps[iMaxCurrentLayer] != pCtx->sSpsPpsCtx.pActiveLayerSps[iMaxActiveLayer])) {
bNewSeq = true;
}
// fill active sps if the current sps is not null while active layer is null
if (!bNewSeq) {
for (int i = 0; i < MAX_LAYER_NUM; i++) {
if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
pCtx->pActiveLayerSps[i] = pTmpLayerSps[i];
if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
pCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i];
}
}
} else {
// UpdateActiveLayerSps if new sequence start
memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
memcpy (&pCtx->sSpsPpsCtx.pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
}
return bNewSeq;
}
static void WriteBackActiveParameters (PWelsDecoderContext pCtx) {
if (pCtx->iOverwriteFlags & OVERWRITE_PPS) {
memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS) {
memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT].iPpsId],
&pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
}
if (pCtx->iOverwriteFlags & OVERWRITE_SPS) {
memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS) {
memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT].iSpsId],
&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
pCtx->bNewSeqBegin = true;
}
if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) {
memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS) {
memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
pCtx->bNewSeqBegin = true;
}
pCtx->iOverwriteFlags = OVERWRITE_NONE;
pCtx->sSpsPpsCtx.iOverwriteFlags = OVERWRITE_NONE;
}
/*
@@ -2208,22 +2247,19 @@ void DecodeFinishUpdate (PWelsDecoderContext pCtx) {
}
/*
* ConstructAccessUnit
* construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
* joint a collective access unit.
* parameter\
* buf: bitstream data buffer
* bit_len: size in bit length of data
* buf_len: size in byte length of data
* coded_au: mark an Access Unit decoding finished
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
int32_t iErr;
* WelsDecodeInitAccessUnitStart
* check and (re)allocate picture buffers on new sequence begin
* bit_len: size in bit length of data
* buf_len: size in byte length of data
* coded_au: mark an Access Unit decoding finished
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) {
int32_t iErr = ERR_NONE;
PAccessUnit pCurAu = pCtx->pAccessUnitList;
pCtx->bAuReadyFlag = false;
pCtx->bLastHasMmco5 = false;
pCtx->pLastDecPicInfo->bLastHasMmco5 = false;
bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
iErr = WelsDecodeAccessUnitStart (pCtx);
@@ -2243,17 +2279,82 @@ int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferI
pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
//try to allocate or relocate DPB memory only when new sequence is coming.
if (pCtx->bNewSeqBegin) {
WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
return iErr;
}
/*
* AllocPicBuffOnNewSeqBegin
* check and (re)allocate picture buffers on new sequence begin
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx) {
//try to allocate or relocate DPB memory only when new sequence is coming.
if (GetThreadCount (pCtx) <= 1) {
WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
}
int32_t iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
if (ERR_NONE != iErr) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr);
return iErr;
}
return iErr;
}
/*
* InitConstructAccessUnit
* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
* joint a collective access unit.
* parameter\
* SBufferInfo: Buffer info
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) {
int32_t iErr = ERR_NONE;
iErr = WelsDecodeInitAccessUnitStart (pCtx, pDstInfo);
if (ERR_NONE != iErr) {
return iErr;
}
if (pCtx->bNewSeqBegin) {
iErr = AllocPicBuffOnNewSeqBegin (pCtx);
if (ERR_NONE != iErr) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr);
return iErr;
}
}
return iErr;
}
/*
* ConstructAccessUnit
* construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
* joint a collective access unit.
* parameter\
* buf: bitstream data buffer
* bit_len: size in bit length of data
* buf_len: size in byte length of data
* coded_au: mark an Access Unit decoding finished
* return:
* 0 - success; otherwise returned error_no defined in error_no.h
*/
int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
int32_t iErr = ERR_NONE;
if (GetThreadCount (pCtx) <= 1) {
iErr = InitConstructAccessUnit (pCtx, pDstInfo);
if (ERR_NONE != iErr) {
return iErr;
}
}
if (pCtx->pCabacDecEngine == NULL) {
pCtx->pCabacDecEngine = (SWelsCabacDecEngine*)pCtx->pMemAlign->WelsMallocz (sizeof (SWelsCabacDecEngine),
"pCtx->pCabacDecEngine");
WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pCtx->pCabacDecEngine))
}
iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
WelsDecodeAccessUnitEnd (pCtx);
@@ -2320,8 +2421,7 @@ void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pS
pCtx->bUsedAsRef = false;
pCtx->iFrameNum = pSh->iFrameNum;
UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics),
pSps, pPps);
UpdateDecoderStatisticsForActiveParaset (pCtx->pDecoderStatistics, pSps, pPps);
}
int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
@@ -2380,13 +2480,27 @@ void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
* Decode current access unit when current AU is completed.
*/
int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
int32_t iRefCount[LIST_A];
PNalUnit pNalCur = NULL;
PNalUnit pNalCur = pCtx->pNalCur = NULL;
PAccessUnit pCurAu = pCtx->pAccessUnitList;
int32_t iIdx = pCurAu->uiStartPos;
int32_t iEndIdx = pCurAu->uiEndPos;
//get current thread ctx
PWelsDecoderThreadCTX pThreadCtx = NULL;
if (pCtx->pThreadCtx != NULL) {
pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
}
//get last thread ctx
PWelsDecoderThreadCTX pLastThreadCtx = NULL;
if (pCtx->pLastThreadCtx != NULL) {
pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx);
if (pLastThreadCtx->pDec == NULL) {
pLastThreadCtx->pDec = PrefetchLastPicForThread (pCtx->pPicBuff,
pLastThreadCtx->iPicBuffIdx);
}
}
int32_t iThreadCount = GetThreadCount (pCtx);
int32_t iPpsId = 0;
int32_t iRet = ERR_NONE;
@@ -2396,12 +2510,12 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4;
int16_t iLastIdD = -1, iLastIdQ = -1;
int16_t iCurrIdD = 0, iCurrIdQ = 0;
uint8_t uiNalRefIdc = 0;
pCtx->uiNalRefIdc = 0;
bool bFreshSliceAvailable =
true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
//update pCurDqLayer at the starting of AU decoding
if (pCtx->bInitialDqLayersMem) {
if (pCtx->bInitialDqLayersMem || pCtx->pCurDqLayer == NULL) {
pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
}
@@ -2413,9 +2527,36 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
SLayerInfo pLayerInfo;
PSliceHeaderExt pShExt = NULL;
PSliceHeader pSh = NULL;
bool isNewFrame = true;
if (iThreadCount > 1) {
isNewFrame = pCtx->pDec == NULL;
}
if (pCtx->pDec == NULL) {
//make call PrefetchPic first before updating reference lists in threaded mode
//this prevents from possible thread-decoding hanging
pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
if (pLastThreadCtx != NULL) {
pLastThreadCtx->pDec->bUsedAsRef = pLastThreadCtx->pCtx->uiNalRefIdc > 0;
if (pLastThreadCtx->pDec->bUsedAsRef) {
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
uint32_t i = 0;
while (i < MAX_REF_PIC_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) {
pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i];
++i;
}
}
pLastThreadCtx->pCtx->sTmpRefPic = pLastThreadCtx->pCtx->sRefPic;
WelsMarkAsRef (pLastThreadCtx->pCtx, pLastThreadCtx->pDec);
pCtx->sRefPic = pLastThreadCtx->pCtx->sTmpRefPic;
} else {
pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
}
}
//WelsResetRefPic needs to be called when a new sequence is encountered
//Otherwise artifacts is observed in decoded yuv in couple of unit tests with multiple-slice frame
if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) {
WelsResetRefPic (pCtx);
}
if (pCtx->iTotalNumMbRec != 0)
pCtx->iTotalNumMbRec = 0;
@@ -2427,17 +2568,32 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
pCtx->iErrorCode |= dsOutOfMemory;
return ERR_INFO_REF_COUNT_OVERFLOW;
}
if (pThreadCtx != NULL) {
pThreadCtx->pDec = pCtx->pDec;
if (iThreadCount > 1) ++pCtx->pDec->iRefCount;
uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
for (uint32_t i = 0; i < uiMbHeight; ++i) {
RESET_EVENT (&pCtx->pDec->pReadyEvent[i]);
}
}
pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
} else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
}
pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
pCtx->pDec->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
if (pThreadCtx != NULL) {
pThreadCtx->iPicBuffIdx = pCtx->pDec->iPicBuffIdx;
pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag = pCtx->pDec->pMbCorrectlyDecodedFlag;
}
if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
memset (pCtx->pDec->pRefPic[LIST_0], 0, sizeof (PPicture) * MAX_DPB_COUNT);
memset (pCtx->pDec->pRefPic[LIST_1], 0, sizeof (PPicture) * MAX_DPB_COUNT);
pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
pCtx->pDec->iMbEcedNum = 0;
pCtx->pDec->iMbEcedPropNum = 0;
@@ -2468,6 +2624,7 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
pCtx->pDec->iFrameNum = pSh->iFrameNum;
pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
pCtx->pDec->eSliceType = pSh->eSliceType;
memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
@@ -2475,7 +2632,7 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
pLayerInfo.sSliceInLayer.iLastMbQp = pSh->iSliceQp;
dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead;
uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
pCtx->uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
iPpsId = pSh->iPpsId;
@@ -2500,11 +2657,9 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
bFreshSliceAvailable = (iCurrIdD != iLastIdD
|| iCurrIdQ != iLastIdQ); // do not need condition of (first_mb == 0) due multiple slices might be disorder
WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
if (iCurrIdQ == BASE_QUALITY_ID) {
ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
}
if ((iLastIdD < 0) || //case 1: first layer
(iLastIdD == iCurrIdD)) { //case 2: same uiDId
@@ -2514,11 +2669,18 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
|| (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
// Subclause 8.2.5.2 Decoding process for gaps in frame_num
int32_t iPrevFrameNum = pCtx->pLastDecPicInfo->iPrevFrameNum;
if (pLastThreadCtx != NULL) {
//call GetPrevFrameNum() to get correct iPrevFrameNum to prevent frame gap warning
iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : GetPrevFrameNum (pCtx);
}
if (!kbIdrFlag &&
pSh->iFrameNum != pCtx->iPrevFrameNum &&
pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) {
pSh->iFrameNum != iPrevFrameNum &&
pSh->iFrameNum != ((iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) -
1))) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
"referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum,
"referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d",
iPrevFrameNum,
pSh->iFrameNum);
bAllRefComplete = false;
@@ -2534,8 +2696,9 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb);
if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID && isNewFrame) {
iRet = InitRefPicList (pCtx, pCtx->uiNalRefIdc, pSh->iPicOrderCntLsb);
if (iThreadCount > 1) isNewFrame = false;
if (iRet) {
pCtx->bRPLRError = true;
bAllRefComplete = false; // RPLR error, set ref pictures complete flag false
@@ -2554,7 +2717,15 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
if (pSh->eSliceType == B_SLICE && !pSh->iDirectSpatialMvPredFlag)
ComputeColocatedTemporalScaling (pCtx);
iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
if (iThreadCount > 1) {
if (iIdx == 0) {
memset (&pCtx->lastReadyHeightOffset[0][0], -1, LIST_A * MAX_REF_PIC_COUNT * sizeof (int16_t));
SET_EVENT (&pThreadCtx->sSliceDecodeStart);
}
iRet = WelsDecodeAndConstructSlice (pCtx);
} else {
iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
}
//Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
if (iRet != ERR_NONE) {
@@ -2570,17 +2741,19 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
if (bReconstructSlice) {
if (iThreadCount <= 1 && bReconstructSlice) {
if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
return iRet;
}
}
if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
bAllRefComplete &= CheckRefPicturesComplete (pCtx);
} else {
bAllRefComplete = false;
if (iThreadCount <= 1) {
if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
bAllRefComplete &= CheckRefPicturesComplete (pCtx);
} else {
bAllRefComplete = false;
}
}
}
}
@@ -2632,55 +2805,80 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf
}
}
if (iThreadCount >= 1) {
int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
for (int32_t i = 0; i < iThreadCount; ++i) {
if (i == id || pThreadCtx[i - id].pCtx->uiDecodingTimeStamp == 0) continue;
if (pThreadCtx[i - id].pCtx->uiDecodingTimeStamp < pCtx->uiDecodingTimeStamp) {
WAIT_EVENT (&pThreadCtx[i - id].sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE);
}
}
pCtx->pLastDecPicInfo->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
}
iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
if (iRet)
if (iRet) {
if (iThreadCount > 1) {
SET_EVENT (&pThreadCtx->sSliceDecodeFinish);
}
return iRet;
}
pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
pCtx->bUsedAsRef = false;
if (uiNalRefIdc > 0) {
pCtx->bUsedAsRef = true;
//save MBType, MV and RefIndex for use in B-Slice direct mode
memcpy (pCtx->pDec->pMbType, pCtx->pCurDqLayer->pMbType, pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t));
memcpy (pCtx->pDec->pMv[LIST_0], pCtx->pCurDqLayer->pMv[LIST_0],
pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
memcpy (pCtx->pDec->pMv[LIST_1], pCtx->pCurDqLayer->pMv[LIST_1],
pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
memcpy (pCtx->pDec->pRefIndex[LIST_0], pCtx->pCurDqLayer->pRefIndex[LIST_0],
pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
memcpy (pCtx->pDec->pRefIndex[LIST_1], pCtx->pCurDqLayer->pRefIndex[LIST_1],
pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
uint32_t i = 0;
while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) {
pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
++i;
pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
pCtx->bUsedAsRef = pCtx->uiNalRefIdc > 0;
if (iThreadCount <= 1) {
if (pCtx->bUsedAsRef) {
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
uint32_t i = 0;
while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) {
pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
++i;
}
}
}
iRet = WelsMarkAsRef (pCtx);
if (iRet != ERR_NONE) {
if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
pCtx->iErrorCode |= dsBitstreamError;
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
pCtx->pDec = NULL;
return iRet;
iRet = WelsMarkAsRef (pCtx);
if (iRet != ERR_NONE) {
if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
pCtx->iErrorCode |= dsBitstreamError;
if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
pCtx->pDec = NULL;
return iRet;
}
}
if (!pCtx->pParam->bParseOnly)
ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
pCtx->pDec->iLinesize,
pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
}
if (!pCtx->pParam->bParseOnly)
ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
pCtx->pDec->iLinesize,
pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
} else if (iThreadCount > 1) {
SET_EVENT (&pThreadCtx->sImageReady);
}
pCtx->pDec = NULL; //after frame decoding, always set to NULL
}
// need update frame_num due current frame is well decoded
if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
pCtx->iPrevFrameNum = pSh->iFrameNum;
if (pCtx->bLastHasMmco5)
pCtx->iPrevFrameNum = 0;
pCtx->pLastDecPicInfo->iPrevFrameNum = pSh->iFrameNum;
if (pCtx->pLastDecPicInfo->bLastHasMmco5)
pCtx->pLastDecPicInfo->iPrevFrameNum = 0;
if (iThreadCount > 1) {
int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
for (int32_t i = 0; i < iThreadCount; ++i) {
if (pThreadCtx[i - id].pCtx != NULL) {
unsigned long long uiTimeStamp = pThreadCtx[i - id].pCtx->uiTimeStamp;
if (uiTimeStamp > 0 && pThreadCtx[i - id].pCtx->sSpsPpsCtx.iSeqId > pCtx->sSpsPpsCtx.iSeqId) {
CopySpsPps (pThreadCtx[i - id].pCtx, pCtx);
if (pCtx->pPicBuff != pThreadCtx[i - id].pCtx->pPicBuff) {
pCtx->pPicBuff = pThreadCtx[i - id].pCtx->pPicBuff;
}
InitialDqLayersContext (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
break;
}
}
}
}
if (iThreadCount > 1) {
SET_EVENT (&pThreadCtx->sSliceDecodeFinish);
}
}
return ERR_NONE;
}
@@ -2690,7 +2888,8 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data
PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos];
bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0)
&& (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader,
&& (CheckAccessUnitBoundaryExt (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt,
&pCtx->pLastDecPicInfo->sLastSliceHeader,
&pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader));
} else { //non VCL
if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) {
@@ -2698,11 +2897,11 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) {
bAuBoundaryFlag = true;
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) {
bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS);
bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS);
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) {
bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS);
bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS);
} else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) {
bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS);
bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS);
}
if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first
ConstructAccessUnit (pCtx, ppDst, pDstInfo);
@@ -2718,8 +2917,8 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
if (pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
if (MarkECFrameAsRef (pCtx) == ERR_INFO_INVALID_PTR) {
pCtx->iErrorCode |= dsRefListNullPtrs;
return false;
@@ -2730,7 +2929,8 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
pCtx->bFrameFinish = true; //clear frame pending status here!
} else {
if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) {
if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0))
if ((pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0)
&& (pCtx->pLastDecPicInfo->sLastNalHdrExt.uiTemporalId == 0))
pCtx->iErrorCode |= dsNoParamSets;
else
pCtx->iErrorCode |= dsBitstreamError;
@@ -2740,9 +2940,9 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn
}
pCtx->pDec = NULL;
if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num
if (pCtx->bLastHasMmco5)
pCtx->iPrevFrameNum = 0;
pCtx->pLastDecPicInfo->iPrevFrameNum = pCtx->pLastDecPicInfo->sLastSliceHeader.iFrameNum; //save frame_num
if (pCtx->pLastDecPicInfo->bLastHasMmco5)
pCtx->pLastDecPicInfo->iPrevFrameNum = 0;
}
return ERR_NONE;
}
@@ -2753,28 +2953,37 @@ bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
for (int32_t iMbIdx = 0; bAllRefComplete
&& iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) {
switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) {
switch (pCtx->pCurDqLayer->pDec->pMbType[iRealMbIdx]) {
case MB_TYPE_SKIP:
case MB_TYPE_16x16:
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
break;
case MB_TYPE_16x8:
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
break;
case MB_TYPE_8x16:
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
break;
case MB_TYPE_8x8:
case MB_TYPE_8x8_REF0:
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
bAllRefComplete &=
pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
break;
default:
@@ -2785,6 +2994,7 @@ bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
return false;
}
return bAllRefComplete;
}
} // namespace WelsDec
+41 -34
View File
@@ -76,6 +76,13 @@ void InitErrorCon (PWelsDecoderContext pCtx) {
pCtx->sCopyFunc.pCopyChromaFunc = WelsCopy8x8_AArch64_neon; //aligned
}
#endif //HAVE_NEON_AARCH64
#if defined(HAVE_LSX)
if (pCtx->uiCpuFlag & WELS_CPU_LSX) {
pCtx->sCopyFunc.pCopyChromaFunc = WelsCopy8x8_lsx; //aligned
pCtx->sCopyFunc.pCopyLumaFunc = WelsCopy16x16_lsx; //aligned
}
#endif// HAVE_LSX
} //TODO add more methods here
return;
}
@@ -83,7 +90,7 @@ void InitErrorCon (PWelsDecoderContext pCtx) {
//Do error concealment using frame copy method
void DoErrorConFrameCopy (PWelsDecoderContext pCtx) {
PPicture pDstPic = pCtx->pDec;
PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb;
PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb;
uint32_t uiHeightInPixelY = (pCtx->pSps->iMbHeight) << 4;
int32_t iStrideY = pDstPic->iLinesize[0];
int32_t iStrideUV = pDstPic->iLinesize[1];
@@ -109,7 +116,7 @@ void DoErrorConSliceCopy (PWelsDecoderContext pCtx) {
int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth;
int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight;
PPicture pDstPic = pCtx->pDec;
PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb;
PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb;
if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY) && (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag))
pSrcPic = NULL; //no cross IDR method, should fill in data instead of copy
@@ -245,7 +252,7 @@ void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32
}
iMVs[0] = iFullMVx - (iMbXInPix << 2);
iMVs[1] = iFullMVy - (iMbYInPix << 2);
BaseMC (pMCRefMem, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs);
BaseMC (pCtx, pMCRefMem, -1, -1, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs);
}
return;
}
@@ -266,40 +273,40 @@ void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) {
for (int32_t iMbY = 0; iMbY < iMbHeight; ++iMbY) {
for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) {
iMbXyIndex = iMbY * iMbWidth + iMbX;
if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pMbType[iMbXyIndex])) {
uint32_t iMBType = pCurDqLayer->pMbType[iMbXyIndex];
if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pDec->pMbType[iMbXyIndex])) {
uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMbXyIndex];
switch (iMBType) {
case MB_TYPE_SKIP:
case MB_TYPE_16x16:
iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1];
iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
break;
case MB_TYPE_16x8:
iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1];
iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][8];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][8][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][8][1];
iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][8];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
break;
case MB_TYPE_8x16:
iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1];
iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][2];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][2][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][2][1];
iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][2];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][1];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
iInterMbCorrectNum[iRefIdx]++;
break;
@@ -311,39 +318,39 @@ void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) {
for (i = 0; i < 4; i++) {
iSubMBType = pCurDqLayer->pSubMbType[iMbXyIndex][i];
iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][iIIdx];
iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][iIIdx];
pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
switch (iSubMBType) {
case SUB_MB_TYPE_8x8:
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1];
iInterMbCorrectNum[iRefIdx]++;
break;
case SUB_MB_TYPE_8x4:
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 4][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 4][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][1];
iInterMbCorrectNum[iRefIdx] += 2;
break;
case SUB_MB_TYPE_4x8:
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 1][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 1][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][1];
iInterMbCorrectNum[iRefIdx] += 2;
break;
case SUB_MB_TYPE_4x4: {
for (j = 0; j < 4; j++) {
iJIdx = ((j >> 1) << 2) + (j & 1);
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + iJIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + iJIdx][1];
pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][0];
pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][1];
}
iInterMbCorrectNum[iRefIdx] += 4;
}
@@ -372,7 +379,7 @@ void DoErrorConSliceMVCopy (PWelsDecoderContext pCtx) {
int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth;
int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight;
PPicture pDstPic = pCtx->pDec;
PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb;
PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb;
bool* pMbCorrectlyDecodedFlag = pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag;
int32_t iMbXyIndex;
+93 -50
View File
@@ -42,6 +42,7 @@
#include "manage_dec_ref.h"
#include "error_concealment.h"
#include "error_code.h"
#include "decoder.h"
namespace WelsDec {
@@ -50,10 +51,10 @@ static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameId
static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum);
static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx);
static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking);
static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking);
static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType,
int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx);
static int32_t SlidingWindow (PWelsDecoderContext pCtx);
static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic);
static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic);
static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx, uint32_t uiLongTermPicNum);
@@ -63,7 +64,7 @@ static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx);
#ifdef LONG_TERM_REF
int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum);
#endif
static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx);
static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic);
static void SetUnRef (PPicture pRef) {
if (NULL != pRef) {
@@ -79,9 +80,19 @@ static void SetUnRef (PPicture pRef) {
pRef->uiSpatialId = -1;
pRef->iSpsId = -1;
pRef->bIsComplete = false;
pRef->iRefCount = 0;
if (pRef->eSliceType == I_SLICE) {
return;
}
int32_t lists = pRef->eSliceType == P_SLICE ? 1 : 2;
for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) {
pRef->pRefPic[LIST_0][i] = NULL;
pRef->pRefPic[LIST_1][i] = NULL;
for (int32_t list = 0; list < lists; ++list) {
if (pRef->pRefPic[list][i] != NULL) {
pRef->pRefPic[list][i]->iRefCount = 0;
pRef->pRefPic[list][i] = NULL;
}
}
}
}
}
@@ -115,9 +126,29 @@ void WelsResetRefPic (PWelsDecoderContext pCtx) {
pRefPic->uiLongRefCount[LIST_0] = 0;
}
void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx) {
int32_t i = 0;
PRefPic pRefPic = &pCtx->sRefPic;
pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0;
pRefPic->uiRefCount[LIST_0] = 0;
pRefPic->uiRefCount[LIST_1] = 0;
for (i = 0; i < MAX_DPB_COUNT; i++) {
pRefPic->pShortRefList[LIST_0][i] = NULL;
}
pRefPic->uiShortRefCount[LIST_0] = 0;
for (i = 0; i < MAX_DPB_COUNT; i++) {
pRefPic->pLongRefList[LIST_0][i] = NULL;
}
pRefPic->uiLongRefCount[LIST_0] = 0;
}
static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) {
if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) && (pCtx->eSliceType != I_SLICE
&& pCtx->eSliceType != SI_SLICE)) {
if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0)
&& (pCtx->eSliceType != I_SLICE
&& pCtx->eSliceType != SI_SLICE)) {
if (pCtx->pParam->eEcActiveIdc !=
ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0
PPicture pRef = PrefetchPic (pCtx->pPicBuff);
@@ -140,24 +171,29 @@ static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) {
|| (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc))
&& (NULL != pCtx->pPreviousDecodedPictureInDpb);
bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel)
&& (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel);
&& (NULL != pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb);
bCopyPrevious = bCopyPrevious
&& (pRef->iWidthInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iWidthInPixel)
&& (pRef->iHeightInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iHeightInPixel);
if (!bCopyPrevious) {
memset (pRef->pData[0], 128, pRef->iLinesize[0] * pRef->iHeightInPixel);
memset (pRef->pData[1], 128, pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
memset (pRef->pData[2], 128, pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
} else if (pRef == pCtx->pPreviousDecodedPictureInDpb) {
} else if (pRef == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsInitRefList()::EC memcpy overlap.");
} else {
memcpy (pRef->pData[0], pCtx->pPreviousDecodedPictureInDpb->pData[0], pRef->iLinesize[0] * pRef->iHeightInPixel);
memcpy (pRef->pData[1], pCtx->pPreviousDecodedPictureInDpb->pData[1], pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
memcpy (pRef->pData[2], pCtx->pPreviousDecodedPictureInDpb->pData[2], pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
memcpy (pRef->pData[0], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[0],
pRef->iLinesize[0] * pRef->iHeightInPixel);
memcpy (pRef->pData[1], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[1],
pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
memcpy (pRef->pData[2], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[2],
pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
}
pRef->iFrameNum = 0;
pRef->iFramePoc = 0;
pRef->uiTemporalId = pRef->uiQualityId = 0;
pRef->eSliceType = pCtx->eSliceType;
ExpandReferencingPicture (pRef->pData, pRef->iWidthInPixel, pRef->iHeightInPixel, pRef->iLinesize,
pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
AddShortTermToList (&pCtx->sRefPic, pRef);
@@ -358,7 +394,10 @@ int32_t WelsReorderRefList (PWelsDecoderContext pCtx) {
for (int32_t listIdx = 0; listIdx < ListCount; ++listIdx) {
PPicture pPic = NULL;
PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx];
int32_t iMaxRefIdx = pCtx->iPicQueueNumber;
int32_t iMaxRefIdx = pCtx->iPicQueueNumber;
if (iMaxRefIdx >= MAX_REF_PIC_COUNT) {
iMaxRefIdx = MAX_REF_PIC_COUNT - 1;
}
int32_t iRefCount = pSliceHeader->uiRefCount[listIdx];
int32_t iPredFrameNum = pSliceHeader->iFrameNum;
int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum;
@@ -532,13 +571,20 @@ int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx) {
for (i = WELS_MAX (1, WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx])); i < iRefCount; i++)
ppRefList[i] = ppRefList[i - 1];
pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]), iRefCount);
pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]),
iRefCount);
}
return ERR_NONE;
}
int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
PRefPic pRefPic = &pCtx->sRefPic;
int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec) {
PPicture pDec = pLastDec;
bool isThreadCtx = true;
if (pDec == NULL) {
pDec = pCtx->pDec;
isThreadCtx = false;
}
PRefPic pRefPic = isThreadCtx ? &pCtx->sTmpRefPic : &pCtx->sRefPic;
PRefPicMarking pRefPicMarking = pCtx->pCurDqLayer->pRefPicMarking;
PAccessUnit pCurAU = pCtx->pAccessUnitList;
bool bIsIDRAU = false;
@@ -546,10 +592,10 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
int32_t iRet = ERR_NONE;
pCtx->pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
pCtx->pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId;
pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId;
pDec->iSpsId = pCtx->pSps->iSpsId;
pDec->iPpsId = pCtx->pPps->iPpsId;
for (j = pCurAU->uiStartPos; j <= pCurAU->uiEndPos; j++) {
if (pCurAU->pNalUnitsList[j]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR
@@ -560,33 +606,33 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
}
if (bIsIDRAU) {
if (pRefPicMarking->bLongTermRefFlag) {
pCtx->sRefPic.iMaxLongTermFrameIdx = 0;
AddLongTermToList (pRefPic, pCtx->pDec, 0, 0);
pRefPic->iMaxLongTermFrameIdx = 0;
AddLongTermToList (pRefPic, pDec, 0, 0);
} else {
pCtx->sRefPic.iMaxLongTermFrameIdx = -1;
pRefPic->iMaxLongTermFrameIdx = -1;
}
} else {
if (pRefPicMarking->bAdaptiveRefPicMarkingModeFlag) {
iRet = MMCO (pCtx, pRefPicMarking);
iRet = MMCO (pCtx, pRefPic, pRefPicMarking);
if (iRet != ERR_NONE) {
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iRet = RemainOneBufferInDpbForEC (pCtx);
iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic);
WELS_VERIFY_RETURN_IF (iRet, iRet);
} else {
return iRet;
}
}
if (pCtx->bLastHasMmco5) {
pCtx->pDec->iFrameNum = 0;
pCtx->pDec->iFramePoc = 0;
if (pCtx->pLastDecPicInfo->bLastHasMmco5) {
pDec->iFrameNum = 0;
pDec->iFramePoc = 0;
}
} else {
iRet = SlidingWindow (pCtx);
iRet = SlidingWindow (pCtx, pRefPic);
if (iRet != ERR_NONE) {
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iRet = RemainOneBufferInDpbForEC (pCtx);
iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic);
WELS_VERIFY_RETURN_IF (iRet, iRet);
} else {
return iRet;
@@ -595,22 +641,22 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
}
}
if (!pCtx->pDec->bIsLongRef) {
if (!pDec->bIsLongRef) {
if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) {
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iRet = RemainOneBufferInDpbForEC (pCtx);
iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic);
WELS_VERIFY_RETURN_IF (iRet, iRet);
} else {
return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
}
}
iRet = AddShortTermToList (pRefPic, pCtx->pDec);
iRet = AddShortTermToList (pRefPic, pDec);
}
return iRet;
}
static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking) {
PSps pSps = pCtx->pCurDqLayer->sLayerInfo.pSps;
int32_t i = 0;
int32_t iRet = ERR_NONE;
@@ -624,7 +670,8 @@ static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
if (uiMmcoType > MMCO_LONG) {
return ERR_INFO_INVALID_MMCO_OPCODE_BASE;
}
iRet = MMCOProcess (pCtx, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, iMaxLongTermFrameIdx);
iRet = MMCOProcess (pCtx, pRefPic, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx,
iMaxLongTermFrameIdx);
if (iRet != ERR_NONE) {
return iRet;
}
@@ -635,9 +682,8 @@ static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
return ERR_NONE;
}
static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType,
int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx) {
PRefPic pRefPic = &pCtx->sRefPic;
PPicture pPic = NULL;
int32_t i = 0;
int32_t iRet = ERR_NONE;
@@ -684,7 +730,7 @@ static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
break;
case MMCO_RESET:
WelsResetRefPic (pCtx);
pCtx->bLastHasMmco5 = true;
pCtx->pLastDecPicInfo->bLastHasMmco5 = true;
break;
case MMCO_LONG:
if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
@@ -709,13 +755,12 @@ static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
return iRet;
}
static int32_t SlidingWindow (PWelsDecoderContext pCtx) {
PRefPic pRefPic = &pCtx->sRefPic;
static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic) {
PPicture pPic = NULL;
int32_t i = 0;
if (pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) {
if (pCtx->sRefPic.uiShortRefCount[LIST_0] == 0) {
if (pRefPic->uiShortRefCount[LIST_0] + pRefPic->uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) {
if (pRefPic->uiShortRefCount[LIST_0] == 0) {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "No reference picture in short term list when sliding window");
return ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH;
}
@@ -740,8 +785,8 @@ static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) {
for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) {
if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) {
iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1;
pRefPic->pShortRefList[LIST_0][i]->bUsedAsRef = false;
pPic = pRefPic->pShortRefList[LIST_0][i];
pPic->bUsedAsRef = false;
pRefPic->pShortRefList[LIST_0][i] = NULL;
if (iMoveSize > 0) {
memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1],
@@ -752,7 +797,6 @@ static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) {
break;
}
}
return pPic;
}
@@ -878,14 +922,13 @@ int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum) {
}
#endif
static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx) {
static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic) {
int32_t iRet = ERR_NONE;
PRefPic pRefPic = &pCtx->sRefPic;
if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] < pCtx->pSps->iNumRefFrames)
return iRet;
if (pRefPic->uiShortRefCount[0] > 0) {
iRet = SlidingWindow (pCtx);
iRet = SlidingWindow (pCtx, pRefPic);
} else { //all LTR, remove the smallest long_term_frame_idx
int32_t iLongTermFrameIdx = 0;
int32_t iMaxLongTermFrameIdx = pRefPic->iMaxLongTermFrameIdx;
+250 -196
View File
@@ -155,7 +155,7 @@ void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const
memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16);
}
}
void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]) {
bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
@@ -170,14 +170,14 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
int8_t iMatchRef;
int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2];
iCurXy = pCurLayer->iMbXyIndex;
iCurX = pCurLayer->iMbX;
iCurY = pCurLayer->iMbY;
iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
iCurXy = pCurDqLayer->iMbXyIndex;
iCurX = pCurDqLayer->iMbX;
iCurY = pCurDqLayer->iMbY;
iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
if (iCurX != 0) {
iLeftXy = iCurXy - 1;
iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
} else {
bLeftAvail = 0;
@@ -185,19 +185,19 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
}
if (iCurY != 0) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
bTopAvail = (iTopSliceIdc == iCurSliceIdc);
if (iCurX != 0) {
iLeftTopXy = iTopXy - 1;
iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
} else {
bLeftTopAvail = 0;
}
if (iCurX != (pCurLayer->iMbWidth - 1)) {
if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
iRightTopXy = iTopXy + 1;
iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
} else {
bRightTopAvail = 0;
@@ -208,18 +208,18 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
bRightTopAvail = 0;
}
iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0);
iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0);
iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
? pCurLayer->pMbType[iLeftTopXy] : 0);
iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
? pCurLayer->pMbType[iRightTopXy] : 0);
? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
/*get neb mv&iRefIdxArray*/
/*left*/
if (bLeftAvail && IS_INTER (iLeftType)) {
ST32 (iMvA, LD32 (pCurLayer->pMv[0][iLeftXy][3]));
iLeftRef = pCurLayer->pRefIndex[0][iLeftXy][3];
ST32 (iMvA, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftXy][3] : pCurDqLayer->pMv[0][iLeftXy][3]));
iLeftRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftXy][3] : pCurDqLayer->pRefIndex[0][iLeftXy][3];
} else {
ST32 (iMvA, 0);
if (0 == bLeftAvail) { //not available
@@ -236,8 +236,8 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
/*top*/
if (bTopAvail && IS_INTER (iTopType)) {
ST32 (iMvB, LD32 (pCurLayer->pMv[0][iTopXy][12]));
iTopRef = pCurLayer->pRefIndex[0][iTopXy][12];
ST32 (iMvB, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iTopXy][12] : pCurDqLayer->pMv[0][iTopXy][12]));
iTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iTopXy][12] : pCurDqLayer->pRefIndex[0][iTopXy][12];
} else {
ST32 (iMvB, 0);
if (0 == bTopAvail) { //not available
@@ -254,8 +254,10 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
/*right_top*/
if (bRightTopAvail && IS_INTER (iRightTopType)) {
ST32 (iMvC, LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
iRightTopRef = pCurLayer->pRefIndex[0][iRightTopXy][12];
ST32 (iMvC, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iRightTopXy][12] :
pCurDqLayer->pMv[0][iRightTopXy][12]));
iRightTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iRightTopXy][12] :
pCurDqLayer->pRefIndex[0][iRightTopXy][12];
} else {
ST32 (iMvC, 0);
if (0 == bRightTopAvail) { //not available
@@ -267,8 +269,9 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
/*left_top*/
if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
ST32 (iMvD, LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
iLeftTopRef = pCurLayer->pRefIndex[0][iLeftTopXy][15];
ST32 (iMvD, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftTopXy][15] : pCurDqLayer->pMv[0][iLeftTopXy][15]));
iLeftTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftTopXy][15] :
pCurDqLayer->pRefIndex[0][iLeftTopXy][15];
} else {
ST32 (iMvD, 0);
if (0 == bLeftTopAvail) { //not available
@@ -305,13 +308,21 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
}
int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) {
PDqLayer pCurLayer = pCtx->pCurDqLayer;
int32_t iMbXy = pCurLayer->iMbXyIndex;
PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
uint32_t is8x8 = IS_Inter_8x8 (pCurLayer->pMbType[iMbXy]);
mbType = pCurLayer->pMbType[iMbXy];
uint32_t is8x8 = IS_Inter_8x8 (GetMbType (pCurDqLayer)[iMbXy]);
mbType = GetMbType (pCurDqLayer)[iMbXy];
PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0];
if (GetThreadCount (pCtx) > 1) {
if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) {
if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) {
WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE);
}
pCtx->lastReadyHeightOffset[1][0] = 16 * pCurDqLayer->iMbY;
}
}
if (colocPic == NULL) {
SLogContext* pLogCtx = & (pCtx->sLogCtx);
@@ -336,43 +347,43 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub
}
if (IS_INTRA (coloc_mbType)) {
SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
return ERR_NONE;
}
SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
if (IS_INTER_16x16 (mbType)) {
int16_t iMVZero[2] = { 0 };
int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero;
ST32 (pCurLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
ST32 (pCurLayer->iColocMv[LIST_1][0], LD32 (pMv));
pCurLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
pCurLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
REF_NOT_IN_LIST;
ST32 (pCurDqLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
ST32 (pCurDqLayer->iColocMv[LIST_1][0], LD32 (pMv));
pCurDqLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
pCurDqLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
REF_NOT_IN_LIST;
} else {
if (!pCtx->pSps->bDirect8x8InferenceFlag) {
CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
if (IS_TYPE_L1 (coloc_mbType)) {
CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
} else { // only forward prediction
SetRectBlock (pCurLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
SetRectBlock (pCurDqLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
}
} else {
for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) {
SetRectBlock (pCurLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
SetRectBlock (pCurLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
SetRectBlock (pCurLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
SetRectBlock (pCurLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
SetRectBlock (pCurDqLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
SetRectBlock (pCurDqLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
SetRectBlock (pCurDqLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
SetRectBlock (pCurDqLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
}
if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction
SetRectBlock (&pCurLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
SetRectBlock (&pCurDqLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
}
}
return ERR_NONE;
@@ -382,9 +393,9 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
SubMbType& subMbType) {
int32_t ret = ERR_NONE;
PDqLayer pCurLayer = pCtx->pCurDqLayer;
int32_t iMbXy = pCurLayer->iMbXyIndex;
bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0;
PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
MbType mbType;
ret = GetColocatedMb (pCtx, mbType, subMbType);
@@ -404,15 +415,15 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
int8_t iDiagonalRef[LIST_A];
int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2];
iCurXy = pCurLayer->iMbXyIndex;
iCurXy = pCurDqLayer->iMbXyIndex;
iCurX = pCurLayer->iMbX;
iCurY = pCurLayer->iMbY;
iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
iCurX = pCurDqLayer->iMbX;
iCurY = pCurDqLayer->iMbY;
iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
if (iCurX != 0) {
iLeftXy = iCurXy - 1;
iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
} else {
bLeftAvail = 0;
@@ -420,19 +431,19 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
}
if (iCurY != 0) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
bTopAvail = (iTopSliceIdc == iCurSliceIdc);
if (iCurX != 0) {
iLeftTopXy = iTopXy - 1;
iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
} else {
bLeftTopAvail = 0;
}
if (iCurX != (pCurLayer->iMbWidth - 1)) {
if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
iRightTopXy = iTopXy + 1;
iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
} else {
bRightTopAvail = 0;
@@ -443,20 +454,22 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
bRightTopAvail = 0;
}
iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0);
iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0);
iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0);
iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0);
iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
? pCurLayer->pMbType[iLeftTopXy] : 0);
iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
? pCurLayer->pMbType[iRightTopXy] : 0);
? GetMbType (pCurDqLayer)[iLeftTopXy] : 0);
iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
? GetMbType (pCurDqLayer)[iRightTopXy] : 0);
/*get neb mv&iRefIdxArray*/
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
/*left*/
if (bLeftAvail && IS_INTER (iLeftType)) {
ST32 (iMvA[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
iLeftRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
ST32 (iMvA[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3] :
pCurDqLayer->pMv[listIdx][iLeftXy][3]));
iLeftRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3] :
pCurDqLayer->pRefIndex[listIdx][iLeftXy][3];
} else {
ST32 (iMvA[listIdx], 0);
if (0 == bLeftAvail) { //not available
@@ -468,8 +481,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
/*top*/
if (bTopAvail && IS_INTER (iTopType)) {
ST32 (iMvB[listIdx], LD32 (pCurLayer->pMv[listIdx][iTopXy][12]));
iTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iTopXy][12];
ST32 (iMvB[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iTopXy][12] :
pCurDqLayer->pMv[listIdx][iTopXy][12]));
iTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12] :
pCurDqLayer->pRefIndex[listIdx][iTopXy][12];
} else {
ST32 (iMvB[listIdx], 0);
if (0 == bTopAvail) { //not available
@@ -481,8 +496,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
/*right_top*/
if (bRightTopAvail && IS_INTER (iRightTopType)) {
ST32 (iMvC[listIdx], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
iRightTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
ST32 (iMvC[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12] :
pCurDqLayer->pMv[listIdx][iRightTopXy][12]));
iRightTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12] :
pCurDqLayer->pRefIndex[listIdx][iRightTopXy][12];
} else {
ST32 (iMvC[listIdx], 0);
if (0 == bRightTopAvail) { //not available
@@ -493,8 +510,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
}
/*left_top*/
if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
ST32 (iMvD[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
iLeftTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
ST32 (iMvD[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15] :
pCurDqLayer->pMv[listIdx][iLeftTopXy][15]));
iLeftTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15] :
pCurDqLayer->pRefIndex[listIdx][iLeftTopXy][15];
} else {
ST32 (iMvD[listIdx], 0);
if (0 == bLeftTopAvail) { //not available
@@ -543,7 +562,7 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
mbType &= ~MB_TYPE_L0;
subMbType &= ~MB_TYPE_L0;
}
pCurLayer->pMbType[iMbXy] = mbType;
GetMbType (pCurDqLayer)[iMbXy] = mbType;
int16_t pMvd[4] = { 0 };
@@ -551,31 +570,31 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
if (IS_INTER_16x16 (mbType)) {
if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
if (0 == pCurLayer->iColocIntra[0] && !bIsLongRef
&& ((pCurLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurLayer->iColocMv[LIST_0][0][0] + 1) <= 2
&& (unsigned) (pCurLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
|| (pCurLayer->iColocRefIndex[LIST_0][0] < 0 && pCurLayer->iColocRefIndex[LIST_1][0] == 0
&& (unsigned) (pCurLayer->iColocMv[LIST_1][0][0] + 1) <= 2
&& (unsigned) (pCurLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
if (0 == pCurDqLayer->iColocIntra[0] && !bIsLongRef
&& ((pCurDqLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][0] + 1) <= 2
&& (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
|| (pCurDqLayer->iColocRefIndex[LIST_0][0] < 0 && pCurDqLayer->iColocRefIndex[LIST_1][0] == 0
&& (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][0] + 1) <= 2
&& (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
if (0 >= ref[0]) * (uint32_t*)iMvp[LIST_0] = 0;
if (0 >= ref[1]) * (uint32_t*)iMvp[LIST_1] = 0;
}
}
UpdateP16x16DirectCabac (pCurLayer);
UpdateP16x16DirectCabac (pCurDqLayer);
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
UpdateP16x16MotionInfo (pCurLayer, listIdx, ref[listIdx], iMvp[listIdx]);
UpdateP16x16MvdCabac (pCurLayer, pMvd, listIdx);
UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref[listIdx], iMvp[listIdx]);
UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx);
}
} else {
if (bSkipOrDirect) {
int8_t pSubPartCount[4], pPartW[4];
for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
int16_t iIdx8 = i << 2;
pCurLayer->pSubMbType[iMbXy][i] = subMbType;
pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
int8_t pRefIndex[LIST_A][30];
UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
UpdateP8x8DirectCabac (pCurLayer, iIdx8);
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
@@ -584,7 +603,7 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
pSubPartCount[i] = 4;
pPartW[i] = 1;
}
FillSpatialDirect8x8Mv (pCurLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL);
FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL);
}
}
}
@@ -594,9 +613,9 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A],
SubMbType& subMbType) {
int32_t ret = ERR_NONE;
PDqLayer pCurLayer = pCtx->pCurDqLayer;
int32_t iMbXy = pCurLayer->iMbXyIndex;
bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0;
PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0;
MbType mbType;
ret = GetColocatedMb (pCtx, mbType, subMbType);
@@ -604,42 +623,42 @@ int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
return ret;
}
pCurLayer->pMbType[iMbXy] = mbType;
GetMbType (pCurDqLayer)[iMbXy] = mbType;
PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int16_t pMvd[4] = { 0 };
const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]);
if (IS_INTER_16x16 (mbType)) {
ref[LIST_0] = 0;
ref[LIST_1] = 0;
UpdateP16x16DirectCabac (pCurLayer);
UpdateP16x16RefIdx (pCurLayer, LIST_1, ref[LIST_1]);
UpdateP16x16DirectCabac (pCurDqLayer);
UpdateP16x16RefIdx (pCurDqLayer, LIST_1, ref[LIST_1]);
ST64 (iMvp, 0);
if (pCurLayer->iColocIntra[0]) {
UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]);
UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]);
UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]);
if (pCurDqLayer->iColocIntra[0]) {
UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
} else {
ref[LIST_0] = 0;
int16_t* mv = pCurLayer->iColocMv[LIST_0][0];
int8_t colocRefIndexL0 = pCurLayer->iColocRefIndex[LIST_0][0];
int16_t* mv = pCurDqLayer->iColocMv[LIST_0][0];
int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][0];
if (colocRefIndexL0 >= 0) {
ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
} else {
mv = pCurLayer->iColocMv[LIST_1][0];
mv = pCurDqLayer->iColocMv[LIST_1][0];
}
UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]);
UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]);
iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]);
UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]);
iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0];
iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1];
UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]);
UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]);
}
UpdateP16x16MvdCabac (pCurLayer, pMvd, LIST_0);
UpdateP16x16MvdCabac (pCurLayer, pMvd, LIST_1);
UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_1);
} else {
if (bSkipOrDirect) {
int8_t pSubPartCount[4], pPartW[4];
@@ -647,27 +666,27 @@ int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
for (int32_t i = 0; i < 4; i++) {
int16_t iIdx8 = i << 2;
const uint8_t iScan4Idx = g_kuiScan4[iIdx8];
pCurLayer->pSubMbType[iMbXy][i] = subMbType;
pCurDqLayer->pSubMbType[iMbXy][i] = subMbType;
int16_t (*mvColoc)[2] = pCurLayer->iColocMv[LIST_0];
int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
ref[LIST_1] = 0;
UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
if (pCurLayer->iColocIntra[iScan4Idx]) {
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
if (pCurDqLayer->iColocIntra[iScan4Idx]) {
ref[LIST_0] = 0;
UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
ST64 (iMvp, 0);
} else {
ref[LIST_0] = 0;
int8_t colocRefIndexL0 = pCurLayer->iColocRefIndex[LIST_0][iScan4Idx];
int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][iScan4Idx];
if (colocRefIndexL0 >= 0) {
ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count);
} else {
mvColoc = pCurLayer->iColocMv[LIST_1];
mvColoc = pCurDqLayer->iColocMv[LIST_1];
}
UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
}
UpdateP8x8DirectCabac (pCurLayer, iIdx8);
UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
@@ -676,7 +695,7 @@ int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2],
pSubPartCount[i] = 4;
pPartW[i] = 1;
}
FillTemporalDirect8x8Mv (pCurLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL);
FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL);
}
}
}
@@ -785,14 +804,23 @@ void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef,
//mb
const uint8_t kuiScan4Idx = g_kuiScan4[i];
const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
if (pCurDqLayer->pDec != NULL) {
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
} else {
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
}
}
}
@@ -808,8 +836,8 @@ void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) {
const uint8_t kuiScan4Idx = g_kuiScan4[i];
const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
}
}
@@ -824,11 +852,17 @@ void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs
//mb
const uint8_t kuiScan4Idx = g_kuiScan4[i];
const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
if (pCurDqLayer->pDec != NULL) {
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
} else {
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
}
}
}
@@ -848,12 +882,21 @@ void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
//mb
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
if (pCurDqLayer->pDec != NULL) {
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
} else {
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
}
//cache
ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
@@ -879,12 +922,21 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
//mb
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
if (pCurDqLayer->pDec != NULL) {
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
} else {
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
}
//cache
ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
@@ -895,10 +947,10 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][
}
}
void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A],
int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]) {
int32_t iMbXy = pCurLayer->iMbXyIndex;
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
for (int32_t j = 0; j < iPartCount; j++) {
int8_t iPartIdx = iIdx8 + j * iPartW;
uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
@@ -909,10 +961,10 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
if (IS_SUB_8x8 (subMbType)) {
* (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
ST32 ((pMV + 2), LD32 (pMV));
ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
if (pMotionVector != NULL) {
ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
@@ -923,10 +975,10 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
}
* (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
ST32 ((pMV + 2), LD32 (pMV));
ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
if (pMotionVector != NULL) {
ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
@@ -937,8 +989,8 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
}
} else { //SUB_4x4
* (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
if (pMotionVector != NULL) {
ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV));
}
@@ -946,8 +998,8 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
}
* (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
if (pMotionVector != NULL) {
ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV));
}
@@ -956,19 +1008,19 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
}
}
if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
uint32_t uiColZeroFlag = (0 == pCurLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
(pCurLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
&& pCurLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
const int16_t (*mvColoc)[2] = 0 == pCurLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurLayer->iColocMv[LIST_0] :
pCurLayer->iColocMv[LIST_1];
uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
(pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
&& pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
const int16_t (*mvColoc)[2] = 0 == pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurDqLayer->iColocMv[LIST_0] :
pCurDqLayer->iColocMv[LIST_1];
const int16_t* mv = mvColoc[iColocIdx];
if (IS_SUB_8x8 (subMbType)) {
if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
if (iRef[LIST_0] == 0) {
ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
if (pMotionVector != NULL) {
ST64 (pMotionVector[LIST_0][iCacheIdx], 0);
ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0);
@@ -980,10 +1032,10 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
}
if (iRef[LIST_1] == 0) {
ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
if (pMotionVector != NULL) {
ST64 (pMotionVector[LIST_1][iCacheIdx], 0);
ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0);
@@ -997,8 +1049,8 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
} else {
if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
if (iRef[LIST_0] == 0) {
ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
if (pMotionVector != NULL) {
ST32 (pMotionVector[LIST_0][iCacheIdx], 0);
}
@@ -1007,8 +1059,8 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
}
}
if (iRef[LIST_1] == 0) {
ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
if (pMotionVector != NULL) {
ST32 (pMotionVector[LIST_1][iCacheIdx], 0);
}
@@ -1022,11 +1074,12 @@ void FillSpatialDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int
}
}
void FillTemporalDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW,
void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount,
const int8_t& iPartW,
const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], int16_t pMotionVector[LIST_A][30][MV_A],
int16_t pMvdCache[LIST_A][30][MV_A]) {
PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
int32_t iMbXy = pCurLayer->iMbXyIndex;
PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
for (int32_t j = 0; j < iPartCount; j++) {
int8_t iPartIdx = iIdx8 + j * iPartW;
@@ -1038,16 +1091,16 @@ void FillTemporalDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const in
int16_t pMV[4] = { 0 };
if (IS_SUB_8x8 (subMbType)) {
if (!pCurLayer->iColocIntra[iColocIdx]) {
if (!pCurDqLayer->iColocIntra[iColocIdx]) {
pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
}
ST32 (pMV, LD32 (pMvDirect[LIST_0]));
ST32 ((pMV + 2), LD32 (pMvDirect[LIST_0]));
ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
if (pMotionVector != NULL) {
ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
@@ -1056,16 +1109,16 @@ void FillTemporalDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const in
ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
}
if (!pCurLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
}
ST32 (pMV, LD32 (pMvDirect[LIST_1]));
ST32 ((pMV + 2), LD32 (pMvDirect[LIST_1]));
ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
if (pMotionVector != NULL) {
ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
@@ -1075,24 +1128,24 @@ void FillTemporalDirect8x8Mv (PDqLayer pCurLayer, const int16_t& iIdx8, const in
ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
}
} else { //SUB_4x4
if (!pCurLayer->iColocIntra[iColocIdx]) {
if (!pCurDqLayer->iColocIntra[iColocIdx]) {
pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
}
ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0]));
ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0]));
ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
if (pMotionVector != NULL) {
ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMvDirect[LIST_0]));
}
if (pMvdCache != NULL) {
ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
}
if (!pCurLayer->iColocIntra[iColocIdx]) {
if (!pCurDqLayer->iColocIntra[iColocIdx]) {
pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0];
pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1];
}
ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1]));
ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1]));
ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
if (pMotionVector != NULL) {
ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMvDirect[LIST_1]));
}
@@ -1122,8 +1175,9 @@ int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0,
void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef) {
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
pCurDqLayer->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] =
pCurDqLayer->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[listIdx][iMbXy][iScan4Idx + 5] = iRef;
pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] =
pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx +
5] = iRef;
}
} // namespace WelsDec
+52 -41
View File
@@ -35,6 +35,7 @@
#include "mv_pred.h"
#include "error_code.h"
#include <stdio.h>
namespace WelsDec {
#define IDX_UNUSED -1
@@ -110,8 +111,8 @@ void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30],
const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
const uint8_t iCacheIdx6 = 6 + iCacheIdx;
//mb
ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
//cache
ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes);
ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes);
@@ -129,8 +130,8 @@ void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30],
const uint8_t iScan4Idx4 = 4 + iScan4Idx;
const uint8_t iCacheIdx6 = 6 + iCacheIdx;
//mb
ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
//cache
ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes);
ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes);
@@ -141,8 +142,10 @@ void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30],
const int8_t iListIdx) {
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] =
pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 5] = iRef;
pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1]
=
pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx +
5] = iRef;
}
void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx) {
@@ -476,7 +479,7 @@ int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeigh
uint32_t uiCode;
int32_t iIdxA, iIdxB, iCtxInc;
int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode;
uint32_t* pMbType = pCtx->pCurDqLayer->pMbType;
uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType;
int32_t iLeftAvail = uiNeighAvail & 0x04;
int32_t iTopAvail = uiNeighAvail & 0x01;
@@ -532,7 +535,9 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
pRefCount[0] = pSliceHeader->uiRefCount[0];
pRefCount[1] = pSliceHeader->uiRefCount[1];
switch (pCurDqLayer->pMbType[iMbXy]) {
bool bIsPending = GetThreadCount (pCtx) > 1;
switch (pCurDqLayer->pDec->pMbType[iMbXy]) {
case MB_TYPE_16x16: {
iPartIdx = 0;
WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0,
@@ -547,7 +552,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[0]]
&& ppRefPic[iRef[0]]->bIsComplete);
&& (ppRefPic[iRef[0]]->bIsComplete || bIsPending));
PredMv (pMotionVector, pRefIndex, LIST_0, 0, 4, iRef[0], pMv);
WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
@@ -573,7 +578,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
&& ppRefPic[iRef[i]]->bIsComplete);
&& (ppRefPic[iRef[i]]->bIsComplete || bIsPending));
UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
}
for (i = 0; i < 2; i++) {
@@ -603,7 +608,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
&& ppRefPic[iRef[i]]->bIsComplete);
&& (ppRefPic[iRef[i]]->bIsComplete || bIsPending));
UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
}
for (i = 0; i < 2; i++) {
@@ -651,7 +656,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[pRefIdx[i]]
&& ppRefPic[pRefIdx[i]]->bIsComplete);
&& (ppRefPic[pRefIdx[i]]->bIsComplete || bIsPending));
UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0);
}
//mv
@@ -677,8 +682,8 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd));
ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd));
ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv));
@@ -688,13 +693,13 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
} else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv));
ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD64 (pMvd));
ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv));
ST64 (pMvdCache[0][iCacheIdx ], LD64 (pMvd));
} else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd));
ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd));
ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv));
@@ -702,7 +707,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd));
ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd));
} else { //SUB_MB_TYPE_4x4
ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd));
ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv));
ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd));
@@ -734,7 +739,9 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
pRefCount[0] = pSliceHeader->uiRefCount[0];
pRefCount[1] = pSliceHeader->uiRefCount[1];
MbType mbType = pCurDqLayer->pMbType[iMbXy];
MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy];
bool bIsPending = GetThreadCount (pCtx) > 1;
if (IS_DIRECT (mbType)) {
@@ -767,12 +774,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iRef[listIdx] = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]
&& pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete);
&& (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete || bIsPending));
}
}
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
@@ -804,12 +812,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
ref_idx = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][ref_idx]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx]
&& pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete);
&& (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending));
}
UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx);
ref_idx_list[listIdx][i] = ref_idx;
@@ -848,12 +857,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
ref_idx = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][ref_idx]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx]
&& pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete);
&& (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending));
}
UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx);
ref_idx_list[listIdx][i] = ref_idx;
@@ -982,12 +992,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iref = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(pCtx->sRefPic.pRefList[listIdx][iref]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iref]
&& pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete);
&& (pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete || bIsPending));
}
Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref);
ref_idx_list[listIdx][i] = iref;
@@ -1030,8 +1041,8 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv));
ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd));
ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMvd));
ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv));
@@ -1039,13 +1050,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd));
ST64 (pMvdCache[listIdx][iCacheIdx + 6], LD64 (pMvd));
} else if (IS_SUB_4x4 (subMbType)) { //MB_TYPE_4x4
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd));
ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv));
ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd));
} else if (IS_SUB_4x8 (subMbType)) { //MB_TYPE_4x8 5, 7, 9
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv));
ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd));
ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMvd));
ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv));
@@ -1055,7 +1066,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN
} else { //MB_TYPE_8x4 4, 6, 8
ST32 ((pMv + 2), LD32 (pMv));
ST32 ((pMvd + 2), LD32 (pMvd));
ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd));
ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv));
ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd));
@@ -1077,7 +1088,7 @@ int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
uint32_t uiCode;
int32_t iIdxA = 0, iIdxB = 0;
int32_t iCtxInc = 0;
int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex];
int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pDec->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex];
int8_t* pDirect = pCtx->pCurDqLayer->pDirect[pCtx->pCurDqLayer->iMbXyIndex];
if (iZOrderIdx == 0) {
iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
@@ -1270,7 +1281,7 @@ int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int3
int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
uint32_t* pMbType = pCtx->pCurDqLayer->pMbType;
uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType;
int32_t iCtxInc;
uiCbfBit = 0;
nA = nB = (int8_t)!!IS_INTRA (pMbType[iCurrBlkXy]);
@@ -1493,12 +1504,12 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
int32_t i;
PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux;
SDqLayer* pCurLayer = pCtx->pCurDqLayer;
int32_t iDstStrideLuma = pCurLayer->pDec->iLinesize[0];
int32_t iDstStrideChroma = pCurLayer->pDec->iLinesize[1];
int32_t iMbX = pCurLayer->iMbX;
int32_t iMbY = pCurLayer->iMbY;
int32_t iMbXy = pCurLayer->iMbXyIndex;
SDqLayer* pCurDqLayer = pCtx->pCurDqLayer;
int32_t iDstStrideLuma = pCurDqLayer->pDec->iLinesize[0];
int32_t iDstStrideChroma = pCurDqLayer->pDec->iLinesize[1];
int32_t iMbX = pCurDqLayer->iMbX;
int32_t iMbY = pCurDqLayer->iMbY;
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4;
int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3;
@@ -1509,7 +1520,7 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
uint8_t* pPtrSrc;
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux);
intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf;
if (iBytesLeft < 384) {
@@ -1536,9 +1547,9 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
pBsAux->pCurBuf += 384;
pCurLayer->pLumaQp[iMbXy] = 0;
pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy]));
pCurDqLayer->pLumaQp[iMbXy] = 0;
pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0;
memset (pCurDqLayer->pNzc[iMbXy], 16, sizeof (pCurDqLayer->pNzc[iMbXy]));
//step 4: cabac engine init
WELS_READ_VERIFY (InitReadBits (pBsAux, 1));
+155 -146
View File
@@ -53,20 +53,20 @@ typedef struct TagReadBitsCache {
uint8_t* pBuf;
} SReadBitsCache;
void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer) {
int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
int32_t iCurXy, iTopXy = 0, iLeftXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
int32_t iCurX, iCurY;
iCurXy = pCurLayer->iMbXyIndex;
iCurX = pCurLayer->iMbX;
iCurY = pCurLayer->iMbY;
iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
iCurXy = pCurDqLayer->iMbXyIndex;
iCurX = pCurDqLayer->iMbX;
iCurY = pCurDqLayer->iMbY;
iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy];
if (iCurX != 0) {
iLeftXy = iCurXy - 1;
iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy];
pNeighAvail->iLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurLayer->pCbp[iLeftXy] : 0;
pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurDqLayer->pCbp[iLeftXy] : 0;
} else {
pNeighAvail->iLeftAvail = 0;
pNeighAvail->iLeftTopAvail = 0;
@@ -74,20 +74,20 @@ void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
}
if (iCurY != 0) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy];
pNeighAvail->iTopAvail = (iTopSliceIdc == iCurSliceIdc);
pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurLayer->pCbp[iTopXy] : 0;
pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurDqLayer->pCbp[iTopXy] : 0;
if (iCurX != 0) {
iLeftTopXy = iTopXy - 1;
iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy];
pNeighAvail->iLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
} else {
pNeighAvail->iLeftTopAvail = 0;
}
if (iCurX != (pCurLayer->iMbWidth - 1)) {
if (iCurX != (pCurDqLayer->iMbWidth - 1)) {
iRightTopXy = iTopXy + 1;
iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy];
pNeighAvail->iRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
} else {
pNeighAvail->iRightTopAvail = 0;
@@ -99,18 +99,18 @@ void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
pNeighAvail->iTopCbp = 0;
}
pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurLayer->pMbType[iLeftXy] : 0);
pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurLayer->pMbType[iTopXy] : 0);
pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurLayer->pMbType[iLeftTopXy] : 0);
pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurLayer->pMbType[iRightTopXy] : 0);
pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurDqLayer->pDec->pMbType[iLeftXy] : 0);
pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurDqLayer->pDec->pMbType[iTopXy] : 0);
pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurDqLayer->pDec->pMbType[iLeftTopXy] : 0);
pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurDqLayer->pDec->pMbType[iRightTopXy] : 0);
}
void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
PDqLayer pCurLayer) { //no matter slice type, intra_pred_constrained_flag
int32_t iCurXy = pCurLayer->iMbXyIndex;
PDqLayer pCurDqLayer) { //no matter slice type, intra_pred_constrained_flag
int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
if (pNeighAvail->iTopAvail) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
@@ -118,10 +118,10 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
//stuff non_zero_coeff_count from pNeighAvail(left and top)
if (pNeighAvail->iTopAvail) {
ST32 (&pNonZeroCount[1], LD32 (&pCurLayer->pNzc[iTopXy][12]));
ST32 (&pNonZeroCount[1], LD32 (&pCurDqLayer->pNzc[iTopXy][12]));
pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0;
ST16 (&pNonZeroCount[6], LD16 (&pCurLayer->pNzc[iTopXy][20]));
ST16 (&pNonZeroCount[30], LD16 (&pCurLayer->pNzc[iTopXy][22]));
ST16 (&pNonZeroCount[6], LD16 (&pCurDqLayer->pNzc[iTopXy][20]));
ST16 (&pNonZeroCount[30], LD16 (&pCurDqLayer->pNzc[iTopXy][22]));
} else {
ST32 (&pNonZeroCount[1], 0xFFFFFFFFU);
pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0xFF;
@@ -130,15 +130,15 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
}
if (pNeighAvail->iLeftAvail) {
pNonZeroCount[8 * 1] = pCurLayer->pNzc[iLeftXy][3];
pNonZeroCount[8 * 2] = pCurLayer->pNzc[iLeftXy][7];
pNonZeroCount[8 * 3] = pCurLayer->pNzc[iLeftXy][11];
pNonZeroCount[8 * 4] = pCurLayer->pNzc[iLeftXy][15];
pNonZeroCount[8 * 1] = pCurDqLayer->pNzc[iLeftXy][3];
pNonZeroCount[8 * 2] = pCurDqLayer->pNzc[iLeftXy][7];
pNonZeroCount[8 * 3] = pCurDqLayer->pNzc[iLeftXy][11];
pNonZeroCount[8 * 4] = pCurDqLayer->pNzc[iLeftXy][15];
pNonZeroCount[5 + 8 * 1] = pCurLayer->pNzc[iLeftXy][17];
pNonZeroCount[5 + 8 * 2] = pCurLayer->pNzc[iLeftXy][21];
pNonZeroCount[5 + 8 * 4] = pCurLayer->pNzc[iLeftXy][19];
pNonZeroCount[5 + 8 * 5] = pCurLayer->pNzc[iLeftXy][23];
pNonZeroCount[5 + 8 * 1] = pCurDqLayer->pNzc[iLeftXy][17];
pNonZeroCount[5 + 8 * 2] = pCurDqLayer->pNzc[iLeftXy][21];
pNonZeroCount[5 + 8 * 4] = pCurDqLayer->pNzc[iLeftXy][19];
pNonZeroCount[5 + 8 * 5] = pCurDqLayer->pNzc[iLeftXy][23];
} else {
pNonZeroCount[8 * 1] =
pNonZeroCount[8 * 2] =
@@ -153,16 +153,16 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo
}
}
void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer) { //no matter slice type
int32_t iCurXy = pCurLayer->iMbXyIndex;
PDqLayer pCurDqLayer) { //no matter slice type
int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
//stuff non_zero_coeff_count from pNeighAvail(left and top)
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
@@ -170,7 +170,7 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
//intraNxN_pred_mode
if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top
ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0]));
ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0]));
} else {
int32_t iPred;
if (IS_INTRA16x16 (pNeighAvail->iTopType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iTopType))
@@ -181,10 +181,10 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left
pIntraPredMode[ 0 + 8 ] = pCurLayer->pIntraPredMode[iLeftXy][4];
pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5];
pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6];
pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3];
pIntraPredMode[ 0 + 8 ] = pCurDqLayer->pIntraPredMode[iLeftXy][4];
pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5];
pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6];
pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3];
} else {
int8_t iPred;
if (IS_INTRA16x16 (pNeighAvail->iLeftType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iLeftType))
@@ -199,16 +199,16 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer) { //no matter slice type
int32_t iCurXy = pCurLayer->iMbXyIndex;
PDqLayer pCurDqLayer) { //no matter slice type
int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
//stuff non_zero_coeff_count from pNeighAvail(left and top)
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
@@ -216,7 +216,7 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
//intra4x4_pred_mode
if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top
ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0]));
ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0]));
} else {
int32_t iPred;
if (pNeighAvail->iTopAvail)
@@ -227,10 +227,10 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left
pIntraPredMode[ 0 + 8 * 1] = pCurLayer->pIntraPredMode[iLeftXy][4];
pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5];
pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6];
pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3];
pIntraPredMode[ 0 + 8 * 1] = pCurDqLayer->pIntraPredMode[iLeftXy][4];
pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5];
pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6];
pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3];
} else {
int8_t iPred;
if (pNeighAvail->iLeftAvail)
@@ -245,52 +245,52 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon
}
void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A],
int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) {
int32_t iCurXy = pCurLayer->iMbXyIndex;
int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) {
int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
int32_t iLeftTopXy = 0;
int32_t iRightTopXy = 0;
PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int32_t listCount = 1;
if (pSliceHeader->eSliceType == B_SLICE) {
listCount = 2;
}
//stuff non_zero_coeff_count from pNeighAvail(left and top)
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
}
if (pNeighAvail->iLeftTopAvail) {
iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iRightTopAvail) {
iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth;
}
for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) {
//stuff mv_cache and iRefIdxArray from left and top (inter)
if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
ST32 (iMvArray[listIdx][6], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
ST32 (iMvArray[listIdx][12], LD32 (pCurLayer->pMv[listIdx][iLeftXy][7]));
ST32 (iMvArray[listIdx][18], LD32 (pCurLayer->pMv[listIdx][iLeftXy][11]));
ST32 (iMvArray[listIdx][24], LD32 (pCurLayer->pMv[listIdx][iLeftXy][15]));
ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3]));
ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7]));
ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11]));
ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15]));
ST32 (iMvdCache[listIdx][6], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][3]));
ST32 (iMvdCache[listIdx][12], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][7]));
ST32 (iMvdCache[listIdx][18], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][11]));
ST32 (iMvdCache[listIdx][24], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][15]));
ST32 (iMvdCache[listIdx][6], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][3]));
ST32 (iMvdCache[listIdx][12], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][7]));
ST32 (iMvdCache[listIdx][18], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][11]));
ST32 (iMvdCache[listIdx][24], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][15]));
iRefIdxArray[listIdx][6] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
iRefIdxArray[listIdx][12] = pCurLayer->pRefIndex[listIdx][iLeftXy][7];
iRefIdxArray[listIdx][18] = pCurLayer->pRefIndex[listIdx][iLeftXy][11];
iRefIdxArray[listIdx][24] = pCurLayer->pRefIndex[listIdx][iLeftXy][15];
iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3];
iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7];
iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11];
iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15];
} else {
ST32 (iMvArray[listIdx][6], 0);
ST32 (iMvArray[listIdx][12], 0);
@@ -316,9 +316,9 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
}
if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
ST32 (iMvArray[listIdx][0], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
ST32 (iMvdCache[listIdx][0], LD32 (pCurLayer->pMvd[listIdx][iLeftTopXy][15]));
iRefIdxArray[listIdx][0] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15]));
ST32 (iMvdCache[listIdx][0], LD32 (pCurDqLayer->pMvd[listIdx][iLeftTopXy][15]));
iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15];
} else {
ST32 (iMvArray[listIdx][0], 0);
ST32 (iMvdCache[listIdx][0], 0);
@@ -330,11 +330,11 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
ST64 (iMvArray[listIdx][1], LD64 (pCurLayer->pMv[listIdx][iTopXy][12]));
ST64 (iMvArray[listIdx][3], LD64 (pCurLayer->pMv[listIdx][iTopXy][14]));
ST64 (iMvdCache[listIdx][1], LD64 (pCurLayer->pMvd[listIdx][iTopXy][12]));
ST64 (iMvdCache[listIdx][3], LD64 (pCurLayer->pMvd[listIdx][iTopXy][14]));
ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurLayer->pRefIndex[listIdx][iTopXy][12]));
ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12]));
ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14]));
ST64 (iMvdCache[listIdx][1], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][12]));
ST64 (iMvdCache[listIdx][3], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][14]));
ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12]));
} else {
ST64 (iMvArray[listIdx][1], 0);
ST64 (iMvArray[listIdx][3], 0);
@@ -354,9 +354,9 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
ST32 (iMvArray[listIdx][5], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
ST32 (iMvdCache[listIdx][5], LD32 (pCurLayer->pMvd[listIdx][iRightTopXy][12]));
iRefIdxArray[listIdx][5] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12]));
ST32 (iMvdCache[listIdx][5], LD32 (pCurDqLayer->pMvd[listIdx][iRightTopXy][12]));
iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12];
} else {
ST32 (iMvArray[listIdx][5], 0);
if (0 == pNeighAvail->iRightTopAvail) { //not available
@@ -385,56 +385,56 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun
}
}
void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer) {
void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer) {
int32_t iCurXy = pCurLayer->iMbXyIndex;
int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
int32_t iLeftTopXy = 0;
int32_t iRightTopXy = 0;
if (pNeighAvail->iTopAvail) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
}
if (pNeighAvail->iLeftTopAvail) {
iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iRightTopAvail) {
iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth;
}
memset (iDirect, 0, 30);
if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
iDirect[6] = pCurLayer->pDirect[iLeftXy][3];
iDirect[12] = pCurLayer->pDirect[iLeftXy][7];
iDirect[18] = pCurLayer->pDirect[iLeftXy][11];
iDirect[24] = pCurLayer->pDirect[iLeftXy][15];
iDirect[6] = pCurDqLayer->pDirect[iLeftXy][3];
iDirect[12] = pCurDqLayer->pDirect[iLeftXy][7];
iDirect[18] = pCurDqLayer->pDirect[iLeftXy][11];
iDirect[24] = pCurDqLayer->pDirect[iLeftXy][15];
}
if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
iDirect[0] = pCurLayer->pDirect[iLeftTopXy][15];
iDirect[0] = pCurDqLayer->pDirect[iLeftTopXy][15];
}
if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
ST32 (&iDirect[1], LD32 (&pCurLayer->pDirect[iTopXy][12]));
ST32 (&iDirect[1], LD32 (&pCurDqLayer->pDirect[iTopXy][12]));
}
if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
iDirect[5] = pCurLayer->pDirect[iRightTopXy][12];
iDirect[5] = pCurDqLayer->pDirect[iRightTopXy][12];
}
//right-top 4*4 block unavailable
}
void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) {
int32_t iCurXy = pCurLayer->iMbXyIndex;
int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) {
int32_t iCurXy = pCurDqLayer->iMbXyIndex;
int32_t iTopXy = 0;
int32_t iLeftXy = 0;
int32_t iLeftTopXy = 0;
int32_t iRightTopXy = 0;
PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
int32_t listCount = 1;
if (pSliceHeader->eSliceType == B_SLICE) {
@@ -442,32 +442,32 @@ void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
}
//stuff non_zero_coeff_count from pNeighAvail(left and top)
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer);
if (pNeighAvail->iTopAvail) {
iTopXy = iCurXy - pCurLayer->iMbWidth;
iTopXy = iCurXy - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iLeftAvail) {
iLeftXy = iCurXy - 1;
}
if (pNeighAvail->iLeftTopAvail) {
iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth;
}
if (pNeighAvail->iRightTopAvail) {
iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth;
}
for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) {
//stuff mv_cache and iRefIdxArray from left and top (inter)
if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
ST32 (iMvArray[listIdx][6], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
ST32 (iMvArray[listIdx][12], LD32 (pCurLayer->pMv[listIdx][iLeftXy][7]));
ST32 (iMvArray[listIdx][18], LD32 (pCurLayer->pMv[listIdx][iLeftXy][11]));
ST32 (iMvArray[listIdx][24], LD32 (pCurLayer->pMv[listIdx][iLeftXy][15]));
iRefIdxArray[listIdx][6] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
iRefIdxArray[listIdx][12] = pCurLayer->pRefIndex[listIdx][iLeftXy][7];
iRefIdxArray[listIdx][18] = pCurLayer->pRefIndex[listIdx][iLeftXy][11];
iRefIdxArray[listIdx][24] = pCurLayer->pRefIndex[listIdx][iLeftXy][15];
ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3]));
ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7]));
ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11]));
ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15]));
iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3];
iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7];
iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11];
iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15];
} else {
ST32 (iMvArray[listIdx][6], 0);
ST32 (iMvArray[listIdx][12], 0);
@@ -487,8 +487,8 @@ void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
}
}
if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
ST32 (iMvArray[listIdx][0], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
iRefIdxArray[listIdx][0] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15]));
iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15];
} else {
ST32 (iMvArray[listIdx][0], 0);
if (0 == pNeighAvail->iLeftTopAvail) { //not available
@@ -498,9 +498,9 @@ void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
}
}
if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
ST64 (iMvArray[listIdx][1], LD64 (pCurLayer->pMv[listIdx][iTopXy][12]));
ST64 (iMvArray[listIdx][3], LD64 (pCurLayer->pMv[listIdx][iTopXy][14]));
ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurLayer->pRefIndex[listIdx][iTopXy][12]));
ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12]));
ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14]));
ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12]));
} else {
ST64 (iMvArray[listIdx][1], 0);
ST64 (iMvArray[listIdx][3], 0);
@@ -517,8 +517,8 @@ void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
}
}
if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
ST32 (iMvArray[listIdx][5], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
iRefIdxArray[listIdx][5] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12]));
iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12];
} else {
ST32 (iMvArray[listIdx][5], 0);
if (0 == pNeighAvail->iRightTopAvail) { //not available
@@ -1083,7 +1083,9 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
iRefCount[0] = pSliceHeader->uiRefCount[0];
iRefCount[1] = pSliceHeader->uiRefCount[1];
switch (pCurDqLayer->pMbType[iMbXy]) {
bool bIsPending = GetThreadCount (pCtx) > 1;
switch (pCurDqLayer->pDec->pMbType[iMbXy]) {
case MB_TYPE_16x16: {
int32_t iRefIdx = 0;
if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
@@ -1105,7 +1107,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx]
&& ppRefPic[iRefIdx]->bIsComplete);
&& (ppRefPic[iRefIdx]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1146,7 +1148,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
&& ppRefPic[iRefIdx[i]]->bIsComplete);
&& (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending));
}
for (i = 0; i < 2; i++) {
PredInter16x8Mv (iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv);
@@ -1183,7 +1185,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
&& ppRefPic[iRefIdx[i]]->bIsComplete);
&& (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1207,7 +1209,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
int32_t iRefIdx[4] = {0}, iSubPartCount[4], iPartWidth[4];
uint32_t uiSubMbType;
if (MB_TYPE_8x8_REF0 == pCurDqLayer->pMbType[iMbXy]) {
if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) {
iRefCount[0] =
iRefCount[1] = 1;
}
@@ -1235,8 +1237,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
//iRefIdxArray
if (MB_TYPE_8x8_REF0 == pCurDqLayer->pMbType[iMbXy]) {
memset (pCurDqLayer->pRefIndex[0][iMbXy], 0, 16);
if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) {
memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, 16);
} else {
for (i = 0; i < 4; i++) {
int16_t iIndex8 = i << 2;
@@ -1255,10 +1257,11 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
&& ppRefPic[iRefIdx[i]]->bIsComplete);
&& (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending));
pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 1] =
pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 5] = iRefIdx[i];
pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 1] =
pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 5] =
iRefIdx[i];
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1290,26 +1293,26 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M
iMv[1] += iCode;
WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
if (SUB_MB_TYPE_8x8 == uiSubMbType) {
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 7], LD32 (iMv));
} else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv));
} else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv));
} else { //SUB_MB_TYPE_4x4 == uiSubMbType
ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv));
ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv));
}
}
@@ -1345,7 +1348,9 @@ int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][
iRefCount[0] = pSliceHeader->uiRefCount[0];
iRefCount[1] = pSliceHeader->uiRefCount[1];
MbType mbType = pCurDqLayer->pMbType[iMbXy];
bool bIsPending = GetThreadCount (pCtx) > 1;
MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy];
if (IS_DIRECT (mbType)) {
int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
@@ -1385,12 +1390,13 @@ int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
ref_idx_list[listIdx][0] = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(ppRefPic[listIdx][ref_idx_list[listIdx][0]]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][ref_idx_list[listIdx][0]]
&& ppRefPic[listIdx][ref_idx_list[listIdx][0]]->bIsComplete);
&& (ppRefPic[listIdx][ref_idx_list[listIdx][0]]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1434,13 +1440,14 @@ int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iRefIdx = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(ppRefPic[listIdx][iRefIdx]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
ref_idx_list[listIdx][i] = iRefIdx;
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx]
&& ppRefPic[listIdx][iRefIdx]->bIsComplete);
&& (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1493,13 +1500,14 @@ int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iRefIdx = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(ppRefPic[listIdx][iRefIdx]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
ref_idx_list[listIdx][i] = iRefIdx;
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx]
&& ppRefPic[listIdx][iRefIdx]->bIsComplete);
&& (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1639,12 +1647,13 @@ int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][
if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
iref = 0;
pCtx->iErrorCode |= dsBitstreamError;
RETURN_ERR_IF_NULL(ppRefPic[listIdx][iref]);
} else {
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
}
}
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iref]
&& ppRefPic[listIdx][iref]->bIsComplete);
&& (ppRefPic[listIdx][iref]->bIsComplete || bIsPending));
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1689,26 +1698,26 @@ int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][
* (uint32_t*)iMv = 0;
}
if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 5], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 5], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx + 7], LD32 (iMv));
} else if (IS_SUB_8x4 (subMbType)) {
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv));
} else if (IS_SUB_4x8 (subMbType)) {
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv));
} else { //SUB_MB_TYPE_4x4 == uiSubMbType
ST32 (pCurDqLayer->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv));
ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv));
}
}
+66 -7
View File
@@ -106,13 +106,15 @@ PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const
pPic->iWidthInPixel = kiPicWidth;
pPic->iHeightInPixel = kiPicHeight;
pPic->iFrameNum = -1;
pPic->bAvailableFlag = true;
pPic->iRefCount = 0;
uint32_t uiMbWidth = (kiPicWidth + 15) >> 4;
uint32_t uiMbHeight = (kiPicHeight + 15) >> 4;
uint32_t uiMbCount = uiMbWidth * uiMbHeight;
pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t),
"pPic->pMbType");
pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag");
pPic->pNzc = GetThreadCount (pCtx) > 1 ? (int8_t (*)[24])pMa->WelsMallocz (uiMbCount * 24, "pPic->pNzc") : NULL;
pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType");
pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
pPic->pMv[LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
@@ -121,6 +123,15 @@ PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const
int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]");
pPic->pRefIndex[LIST_1] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof (
int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]");
if (pCtx->pThreadCtx != NULL) {
pPic->pReadyEvent = (SWelsDecEvent*)pMa->WelsMallocz (uiMbHeight * sizeof (SWelsDecEvent), "pPic->pReadyEvent");
for (uint32_t i = 0; i < uiMbHeight; ++i) {
CREATE_EVENT (&pPic->pReadyEvent[i], 1, 0, NULL);
}
} else {
pPic->pReadyEvent = NULL;
}
return pPic;
}
@@ -131,6 +142,16 @@ void FreePicture (PPicture pPic, CMemoryAlign* pMa) {
pPic->pBuffer[0] = NULL;
}
if (pPic->pMbCorrectlyDecodedFlag) {
pMa->WelsFree (pPic->pMbCorrectlyDecodedFlag, "pPic->pMbCorrectlyDecodedFlag");
pPic->pMbCorrectlyDecodedFlag = NULL;
}
if (pPic->pNzc) {
pMa->WelsFree (pPic->pNzc, "pPic->pNzc");
pPic->pNzc = NULL;
}
if (pPic->pMbType) {
pMa->WelsFree (pPic->pMbType, "pPic->pMbType");
pPic->pMbType = NULL;
@@ -147,6 +168,14 @@ void FreePicture (PPicture pPic, CMemoryAlign* pMa) {
pPic->pRefIndex[listIdx] = NULL;
}
}
if (pPic->pReadyEvent != NULL) {
uint32_t uiMbHeight = (pPic->iHeightInPixel + 15) >> 4;
for (uint32_t i = 0; i < uiMbHeight; ++i) {
CLOSE_EVENT (&pPic->pReadyEvent[i]);
}
pMa->WelsFree (pPic->pReadyEvent, "pPic->pReadyEvent");
pPic->pReadyEvent = NULL;
}
pMa->WelsFree (pPic, "pPic");
pPic = NULL;
}
@@ -160,25 +189,55 @@ PPicture PrefetchPic (PPicBuff pPicBuf) {
}
for (iPicIdx = pPicBuf->iCurrentIdx + 1; iPicIdx < pPicBuf->iCapacity ; ++iPicIdx) {
if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag
&& !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) {
if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef
&& pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) {
pPic = pPicBuf->ppPic[iPicIdx];
break;
}
}
if (pPic != NULL) {
pPicBuf->iCurrentIdx = iPicIdx;
pPic->iPicBuffIdx = iPicIdx;
return pPic;
}
for (iPicIdx = 0 ; iPicIdx <= pPicBuf->iCurrentIdx ; ++iPicIdx) {
if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag
&& !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) {
if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef
&& pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) {
pPic = pPicBuf->ppPic[iPicIdx];
break;
}
}
pPicBuf->iCurrentIdx = iPicIdx;
if (pPic != NULL) {
pPic->iPicBuffIdx = iPicIdx;
}
return pPic;
}
PPicture PrefetchPicForThread (PPicBuff pPicBuf) {
PPicture pPic = NULL;
if (pPicBuf->iCapacity == 0) {
return NULL;
}
pPic = pPicBuf->ppPic[pPicBuf->iCurrentIdx];
pPic->iPicBuffIdx = pPicBuf->iCurrentIdx;
if (++pPicBuf->iCurrentIdx >= pPicBuf->iCapacity) {
pPicBuf->iCurrentIdx = 0;
}
return pPic;
}
PPicture PrefetchLastPicForThread (PPicBuff pPicBuf, const int32_t& iLastPicBuffIdx) {
PPicture pPic = NULL;
if (pPicBuf->iCapacity == 0) {
return NULL;
}
if (iLastPicBuffIdx >= 0 && iLastPicBuffIdx < pPicBuf->iCapacity) {
pPic = pPicBuf->ppPic[iLastPicBuffIdx];
}
return pPic;
}
+200 -173
View File
@@ -44,20 +44,20 @@
namespace WelsDec {
void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer) {
void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer) {
PPicture pCurPic = pCtx->pDec;
int32_t iLumaStride = pCurPic->iLinesize[0];
int32_t iChromaStride = pCurPic->iLinesize[1];
int32_t iMbX = pCurLayer->iMbX;
int32_t iMbY = pCurLayer->iMbY;
int32_t iMbX = pCurDqLayer->iMbX;
int32_t iMbY = pCurDqLayer->iMbY;
pCurLayer->iLumaStride = iLumaStride;
pCurLayer->iChromaStride = iChromaStride;
pCurDqLayer->iLumaStride = iLumaStride;
pCurDqLayer->iChromaStride = iChromaStride;
if (bOutput) {
pCurLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
pCurLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
pCurLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
pCurDqLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
pCurDqLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
pCurDqLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
}
}
@@ -214,11 +214,10 @@ int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLe
//according to current 8*8 block ref_index to gain reference picture
static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, int8_t* pRefIdxList,
int32_t iIndex, int32_t listIdx) {
static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, const int8_t& iRefIdx,
int32_t listIdx) {
PPicture pRefPic;
int8_t iRefIdx = pRefIdxList[iIndex];
if (iRefIdx >= 0) {
pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx];
@@ -242,7 +241,9 @@ static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pC
#ifndef MC_FLOW_SIMPLE_JUDGE
#define MC_FLOW_SIMPLE_JUDGE 1
#endif //MC_FLOW_SIMPLE_JUDGE
void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx,
int32_t iXOffset, int32_t iYOffset,
SMcFunc* pMCFunc,
int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]) {
int32_t iFullMVx = (iXOffset << 2) + iMVs[0]; //quarter pixel
int32_t iFullMVy = (iYOffset << 2) + iMVs[1];
@@ -251,6 +252,27 @@ void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFun
iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)),
((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2)));
if (GetThreadCount (pCtx) > 1 && iRefIdx >= 0) {
// wait for the lines of reference macroblock (3 + 16).
PPicture pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx];
if (pCtx->bNewSeqBegin && (pCtx->iErrorCode & dsRefLost)) {
//set event if refpic is lost to prevent from infinite waiting.
if (!pRefPic->pReadyEvent[0].isSignaled) {
for (uint32_t ln = 0; ln < pCtx->sMb.iMbHeight; ++ln) {
SET_EVENT (&pRefPic->pReadyEvent[ln]);
}
}
}
int32_t offset = (iFullMVy >> 2) + iBlkHeight + 3 + 16;
if (offset > pCtx->lastReadyHeightOffset[listIdx][iRefIdx]) {
const int32_t down_line = WELS_MIN (offset >> 4, int32_t (pCtx->sMb.iMbHeight) - 1);
if (pRefPic->pReadyEvent[down_line].isSignaled != 1) {
WAIT_EVENT (&pRefPic->pReadyEvent[down_line], WELS_DEC_THREAD_WAIT_INFINITE);
}
pCtx->lastReadyHeightOffset[listIdx][iRefIdx] = offset;
}
}
int32_t iSrcPixOffsetLuma = (iFullMVx >> 2) + (iFullMVy >> 2) * pMCRefMem->iSrcLineLuma;
int32_t iSrcPixOffsetChroma = (iFullMVx >> 3) + (iFullMVy >> 3) * pMCRefMem->iSrcLineChroma;
@@ -446,7 +468,7 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
int16_t iMVs[2] = {0};
uint32_t iMBType = pCurDqLayer->pMbType[iMBXY];
uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY];
int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
@@ -466,65 +488,66 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
pMCRefMem.iDstLineLuma = iDstLineLuma;
pMCRefMem.iDstLineChroma = iDstLineChroma;
int32_t iRefIndex = 0;
int8_t iRefIndex = 0;
switch (iMBType) {
case MB_TYPE_SKIP:
case MB_TYPE_16x16:
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0));
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 16);
}
break;
case MB_TYPE_16x8:
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0));
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8);
}
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][8][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][8][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 8, LIST_0));
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][8][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][8][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][8];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
pMCRefMem.pDstY = pPredY + (iDstLineLuma << 3);
pMCRefMem.pDstU = pPredCb + (iDstLineChroma << 2);
pMCRefMem.pDstV = pPredCr + (iDstLineChroma << 2);
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs);
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][8];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8);
}
break;
case MB_TYPE_8x16:
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0));
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16);
}
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][2][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][2][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 2, LIST_0));
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][2][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][2][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][2];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
pMCRefMem.pDstY = pPredY + 8;
pMCRefMem.pDstU = pPredCb + 4;
pMCRefMem.pDstV = pPredCr + 4;
BaseMC (&pMCRefMem, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs);
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][2];
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16);
}
break;
@@ -541,9 +564,8 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
iYOffset = iMBOffsetY + iBlk8Y;
iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], iIIdx, LIST_0));
iRefIndex = pCurDqLayer->bUseWeightPredictionFlag ? pCurDqLayer->pRefIndex[0][iMBXY][iIIdx] : 0;
iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0));
pDstY = pPredY + iBlk8X + iBlk8Y * iDstLineLuma;
pDstU = pPredCb + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
pDstV = pPredCr + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
@@ -552,9 +574,9 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
pMCRefMem.pDstV = pDstV;
switch (iSubMBType) {
case SUB_MB_TYPE_8x8:
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 8);
@@ -562,21 +584,21 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
break;
case SUB_MB_TYPE_8x4:
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4);
}
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][1];
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][1];
pMCRefMem.pDstY += (iDstLineLuma << 2);
pMCRefMem.pDstU += (iDstLineChroma << 1);
pMCRefMem.pDstV += (iDstLineChroma << 1);
BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4);
@@ -584,21 +606,21 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
break;
case SUB_MB_TYPE_4x8:
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8);
}
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][1];
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][1];
pMCRefMem.pDstY += 4;
pMCRefMem.pDstU += 2;
pMCRefMem.pDstV += 2;
BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8);
@@ -618,9 +640,9 @@ int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWels
pMCRefMem.pDstU = pDstU + iUVLineStride;
pMCRefMem.pDstV = pDstV + iUVLineStride;
iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][1];
BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
if (pCurDqLayer->bUseWeightPredictionFlag) {
WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 4);
@@ -652,7 +674,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
int16_t iMVs[2] = { 0 };
uint32_t iMBType = pCurDqLayer->pMbType[iMBXY];
uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY];
int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
@@ -677,37 +699,38 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pTempMCRefMem.pDstV = pTempPredYCbCr[2];
int32_t iRefIndex1 = 0;
int32_t iRefIndex2 = 0;
int8_t iRefIndex0 = 0;
int8_t iRefIndex1 = 0;
int8_t iRefIndex = 0;
bool bWeightedBipredIdcIs1 = pCurDqLayer->sLayerInfo.pPps->uiWeightedBipredIdc == 1;
if (IS_INTER_16x16 (iMBType)) {
if (IS_TYPE_L0 (iMBType) && IS_TYPE_L1 (iMBType)) {
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][0][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], 0, LIST_0));
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][1];
iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][0];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0));
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][0][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], 0, LIST_1));
BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][0];
iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][0];
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][1];
iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][0];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 16);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 16);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 16);
}
} else {
int32_t listIdx = (iMBType & MB_TYPE_P0L0) ? LIST_0 : LIST_1;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][0][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], 0, listIdx));
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][0];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
if (bWeightedBipredIdcIs1) {
int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][0];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 16, 16);
}
}
@@ -719,29 +742,31 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (IS_DIR (iMBType, i, listIdx)) {
lastListIdx = listIdx;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iPartIdx, listIdx));
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iPartIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
if (i) {
pMCRefMem.pDstY += (iDstLineLuma << 3);
pMCRefMem.pDstU += (iDstLineChroma << 2);
pMCRefMem.pDstV += (iDstLineChroma << 2);
}
BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
if (++listCount == 2) {
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iPartIdx, LIST_1));
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][1];
iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
if (i) {
pTempMCRefMem.pDstY += (iDstLineLuma << 3);
pTempMCRefMem.pDstU += (iDstLineChroma << 2);
pTempMCRefMem.pDstV += (iDstLineChroma << 2);
}
BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iPartIdx];
iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iPartIdx];
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 8);
iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iPartIdx];
iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx];
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 8);
}
@@ -750,7 +775,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
}
if (listCount == 1) {
if (bWeightedBipredIdcIs1) {
int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][iPartIdx];
iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][iPartIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 16, 8);
}
}
@@ -762,29 +787,31 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
if (IS_DIR (iMBType, i, listIdx)) {
lastListIdx = listIdx;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], i << 1, listIdx));
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][i << 1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
if (i) {
pMCRefMem.pDstY += 8;
pMCRefMem.pDstU += 4;
pMCRefMem.pDstV += 4;
}
BaseMC (&pMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (++listCount == 2) {
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], i << 1, LIST_1));
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][1];
iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
if (i) {
pTempMCRefMem.pDstY += 8;
pTempMCRefMem.pDstU += 4;
pTempMCRefMem.pDstV += 4;
}
BaseMC (&pTempMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][i << 1];
iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][i << 1];
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 16);
iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][i << 1];
iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1];
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 16);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 16);
}
@@ -793,7 +820,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
}
if (listCount == 1) {
if (bWeightedBipredIdcIs1) {
int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][i << 1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][i << 1];
WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 8, 16);
}
}
@@ -830,53 +857,53 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pTempMCRefMem.pDstV = pDstV2;
if ((IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType))) {
iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], iIIdx, LIST_0));
iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0));
iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iIIdx, LIST_1));
iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1));
} else {
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
iRefIndex1 = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iIIdx, listIdx));
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx));
}
if (IS_SUB_8x8 (iSubMBType)) {
if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) {
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1];
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 8);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 8);
}
} else {
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
if (bWeightedBipredIdcIs1) {
int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 8);
}
}
} else if (IS_SUB_8x4 (iSubMBType)) {
if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_8x4
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1];
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4);
}
@@ -884,49 +911,49 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstY += (iDstLineLuma << 2);
pMCRefMem.pDstU += (iDstLineChroma << 1);
pMCRefMem.pDstV += (iDstLineChroma << 1);
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
pTempMCRefMem.pDstY += (iDstLineLuma << 2);
pTempMCRefMem.pDstU += (iDstLineChroma << 1);
pTempMCRefMem.pDstV += (iDstLineChroma << 1);
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][1];
BaseMC (&pTempMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][1];
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4);
}
} else { //B_L0_8x4 B_L1_8x4
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
pMCRefMem.pDstY += (iDstLineLuma << 2);
pMCRefMem.pDstU += (iDstLineChroma << 1);
pMCRefMem.pDstV += (iDstLineChroma << 1);
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][1];
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
if (bWeightedBipredIdcIs1) {
int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 4);
}
}
} else if (IS_SUB_4x8 (iSubMBType)) {
if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_4x8
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1];
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8);
}
@@ -934,35 +961,35 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstY += 4;
pMCRefMem.pDstU += 2;
pMCRefMem.pDstV += 2;
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][1];
BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
pTempMCRefMem.pDstY += 4;
pTempMCRefMem.pDstU += 2;
pTempMCRefMem.pDstV += 2;
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][1];
BaseMC (&pTempMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][1];
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8);
}
} else { //B_L0_4x8 B_L1_4x8
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
pMCRefMem.pDstY += 4;
pMCRefMem.pDstU += 2;
pMCRefMem.pDstV += 2;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][1];
BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][1];
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
if (bWeightedBipredIdcIs1) {
int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 8);
}
}
@@ -980,27 +1007,27 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstU = pDstU + iUVLineStride;
pMCRefMem.pDstV = pDstV + iUVLineStride;
iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1];
BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1];
BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
pTempMCRefMem.pDstY = pDstY2 + iBlk8X + iBlk8Y * iDstLineLuma;
pTempMCRefMem.pDstU = pDstU2 + iUVLineStride;
pTempMCRefMem.pDstV = pDstV2 + iUVLineStride;;
iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1];
BaseMC (&pTempMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1];
BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
if (pCurDqLayer->bUseWeightedBiPredIdc) {
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 4);
BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 4);
} else {
BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 4);
}
}
} else {
int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx];
for (int32_t j = 0; j < 4; j++) {
int32_t iUVLineStride;
iJIdx = ((j >> 1) << 2) + (j & 1);
@@ -1013,9 +1040,9 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels
pMCRefMem.pDstU = pDstU + iUVLineStride;
pMCRefMem.pDstV = pDstV + iUVLineStride;
iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][1];
BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][0];
iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][1];
BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
if (bWeightedBipredIdcIs1) {
WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 4);
}
@@ -0,0 +1,311 @@
/*!
* \copy
* Copyright (c) 2009-2019, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file wels_decoder_thread.cpp
*
* \brief Interfaces introduced in thread programming
*
* \date 08/06/2018 Created
*
*************************************************************************************
*/
#ifdef __linux__
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>
#elif !defined(_WIN32) && !defined(__CYGWIN__)
#include <sys/types.h>
#include <sys/param.h>
#include <unistd.h>
#ifndef __Fuchsia__
#include <sys/sysctl.h>
#endif
#ifdef __APPLE__
#define HW_NCPU_NAME "hw.logicalcpu"
#else
#define HW_NCPU_NAME "hw.ncpu"
#endif
#endif
#include "wels_decoder_thread.h"
#include <stdio.h>
#include <stdlib.h>
int32_t GetCPUCount() {
WelsLogicalProcessInfo pInfo;
pInfo.ProcessorCount = 1;
WelsQueryLogicalProcessInfo (&pInfo);
return pInfo.ProcessorCount;
}
int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta) {
WELS_THREAD_ATTR attr = 0;
return WelsThreadCreate (& (t->h), tf, ta, attr);
}
int ThreadWait (SWelsDecThread* t) {
return WelsThreadJoin (t->h);
}
#if defined(_WIN32) || defined(__CYGWIN__)
int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) {
e->h = CreateEvent (NULL, manualReset, initialState, NULL);
e->isSignaled = initialState;
return (e->h != NULL) ? 0 : 1;
}
void EventReset (SWelsDecEvent* e) {
ResetEvent (e->h);
e->isSignaled = 0;
}
void EventPost (SWelsDecEvent* e) {
SetEvent (e->h);
e->isSignaled = 1;
}
int EventWait (SWelsDecEvent* e, int32_t timeout) {
DWORD result;
if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0)
result = WaitForSingleObject (e->h, INFINITE);
else
result = WaitForSingleObject (e->h, timeout);
if (result == WAIT_OBJECT_0)
return WELS_DEC_THREAD_WAIT_SIGNALED;
else
return WAIT_TIMEOUT;
}
void EventDestroy (SWelsDecEvent* e) {
CloseHandle (e->h);
e->h = NULL;
}
int SemCreate (SWelsDecSemphore* s, long value, long max) {
s->h = CreateSemaphore (NULL, value, max, NULL);
return (s->h != NULL) ? 0 : 1;
}
int SemWait (SWelsDecSemphore* s, int32_t timeout) {
DWORD result;
if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0)
result = WaitForSingleObject (s->h, INFINITE);
else
result = WaitForSingleObject (s->h, timeout);
if (result == WAIT_OBJECT_0) {
return WELS_DEC_THREAD_WAIT_SIGNALED;
} else {
return WELS_DEC_THREAD_WAIT_TIMEDOUT;
}
}
void SemRelease (SWelsDecSemphore* s, long* prevcount) {
ReleaseSemaphore (s->h, 1, prevcount);
}
void SemDestroy (SWelsDecSemphore* s) {
CloseHandle (s->h);
s->h = NULL;
}
#else /* _WIN32 */
static void getTimespecFromTimeout (struct timespec* ts, int32_t timeout) {
struct timeval tv;
gettimeofday (&tv, 0);
ts->tv_nsec = tv.tv_usec * 1000 + timeout * 1000000;
ts->tv_sec = tv.tv_sec + ts->tv_nsec / 1000000000;
ts->tv_nsec %= 1000000000;
}
int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) {
if (pthread_mutex_init (& (e->m), NULL))
return 1;
if (pthread_cond_init (& (e->c), NULL))
return 2;
e->isSignaled = initialState;
e->manualReset = manualReset;
return 0;
}
void EventReset (SWelsDecEvent* e) {
pthread_mutex_lock (& (e->m));
e->isSignaled = 0;
pthread_mutex_unlock (& (e->m));
}
void EventPost (SWelsDecEvent* e) {
pthread_mutex_lock (& (e->m));
pthread_cond_broadcast (& (e->c));
e->isSignaled = 1;
pthread_mutex_unlock (& (e->m));
}
int EventWait (SWelsDecEvent* e, int32_t timeout) {
pthread_mutex_lock (& (e->m));
int signaled = e->isSignaled;
if (timeout == 0) {
pthread_mutex_unlock (& (e->m));
if (signaled)
return WELS_DEC_THREAD_WAIT_SIGNALED;
else
return WELS_DEC_THREAD_WAIT_TIMEDOUT;
}
if (signaled) {
if (!e->manualReset) {
e->isSignaled = 0;
}
pthread_mutex_unlock (& (e->m));
return WELS_DEC_THREAD_WAIT_SIGNALED;
}
int rc = 0;
if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) {
rc = pthread_cond_wait (& (e->c), & (e->m));
} else {
struct timespec ts;
getTimespecFromTimeout (&ts, timeout);
rc = pthread_cond_timedwait (& (e->c), & (e->m), &ts);
}
if (!e->manualReset) {
e->isSignaled = 0;
}
pthread_mutex_unlock (& (e->m));
if (rc == 0)
return WELS_DEC_THREAD_WAIT_SIGNALED;
else
return WELS_DEC_THREAD_WAIT_TIMEDOUT;
}
void EventDestroy (SWelsDecEvent* e) {
pthread_mutex_destroy (& (e->m));
pthread_cond_destroy (& (e->c));
}
int SemCreate (SWelsDecSemphore* s, long value, long max) {
s->v = value;
s->max = max;
if (pthread_mutex_init (& (s->m), NULL))
return 1;
const char* event_name = "";
if (WelsEventOpen (& (s->e), event_name)) {
return 2;
}
return 0;
}
int SemWait (SWelsDecSemphore* s, int32_t timeout) {
#if defined(__APPLE__)
pthread_mutex_lock (& (s->m));
#endif
int rc = 0;
if (timeout != 0) {
while ((s->v) == 0) {
if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) {
// infinite wait until released
#if defined(__APPLE__)
rc = pthread_cond_wait (& (s->e), & (s->m));
#else
rc = sem_wait (s->e);
if (rc != 0) rc = errno;
#endif
} else {
struct timespec ts;
getTimespecFromTimeout (&ts, timeout);
#if defined(__APPLE__)
rc = pthread_cond_timedwait (& (s->e), & (s->m), &ts);
#else
rc = sem_timedwait (s->e, &ts);
if (rc != 0) rc = errno;
#endif
if (rc != EINTR) {
// if timed out we return to the caller
break;
}
}
}
// only decrement counter if semaphore was signaled
if (rc == 0)
s->v -= 1;
} else {
// Special handling for timeout of 0
if (s->v > 0) {
s->v -= 1;
rc = 0;
} else {
rc = 1;
}
}
#if defined(__APPLE__)
pthread_mutex_unlock (& (s->m));
#endif
// set return value
if (rc == 0)
return WELS_DEC_THREAD_WAIT_SIGNALED;
else
return WELS_DEC_THREAD_WAIT_TIMEDOUT;
}
void SemRelease (SWelsDecSemphore* s, long* o_pPrevCount) {
long prevcount;
#ifdef __APPLE__
pthread_mutex_lock (& (s->m));
prevcount = s->v;
if (s->v < s->max)
s->v += 1;
pthread_cond_signal (& (s->e));
pthread_mutex_unlock (& (s->m));
#else
prevcount = s->v;
if (s->v < s->max)
s->v += 1;
sem_post (s->e);
#endif
if (o_pPrevCount != NULL) {
*o_pPrevCount = prevcount;
}
}
void SemDestroy (SWelsDecSemphore* s) {
pthread_mutex_destroy (& (s->m));
const char* event_name = "";
WelsEventClose (& (s->e), event_name);
}
#endif /* !_WIN32 */
+32 -7
View File
@@ -19,15 +19,40 @@ cpp_sources = [
'core/src/pic_queue.cpp',
'core/src/rec_mb.cpp',
'plus/src/welsDecoderExt.cpp',
'core/src/wels_decoder_thread.cpp',
]
asm_sources = [
'core/x86/dct.asm',
'core/x86/intra_pred.asm',
]
objs_asm = asm_gen.process(asm_sources)
objs_asm = []
if cpu_family in ['x86', 'x86_64']
asm_sources = [
'core/x86/dct.asm',
'core/x86/intra_pred.asm',
]
objs_asm = asm_gen.process(asm_sources)
elif cpu_family == 'arm'
asm_sources = [
'core/arm/block_add_neon.S',
'core/arm/intra_pred_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
elif cpu_family == 'aarch64'
asm_sources = [
'core/arm64/block_add_aarch64_neon.S',
'core/arm64/intra_pred_aarch64_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
else
error('Unsupported cpu family @0@'.format(cpu_family))
endif
libdecoder = static_library('decoder', cpp_sources, objs_asm,
include_directories: [inc, decoder_inc],
include_directories: [inc, decoder_inc, casm_inc],
dependencies: deps)
+38 -17
View File
@@ -109,33 +109,54 @@ class CWelsDecoder : public ISVCDecoder {
virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption);
virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption);
typedef struct tagPictInfo {
SBufferInfo sBufferInfo;
int32_t iPOC;
int32_t iPicBuffIdx;
bool bLastGOP;
unsigned char* pData[3];
} SPictInfo, *PPictInfo;
public:
DECODING_STATE DecodeFrame2WithCtx (PWelsDecoderContext pCtx, const unsigned char* kpSrc, const int kiSrcLen,
unsigned char** ppDst, SBufferInfo* pDstInfo);
DECODING_STATE ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx);
private:
PWelsDecoderContext m_pDecContext;
welsCodecTrace* m_pWelsTrace;
uint32_t m_uiDecodeTimeStamp;
bool m_bIsBaseline;
int32_t m_iCpuCount;
int32_t m_iThreadCount;
int32_t m_iCtxCount;
PPicBuff m_pPicBuff;
bool m_bParamSetsLostFlag;
bool m_bFreezeOutput;
int32_t m_DecCtxActiveCount;
PWelsDecoderThreadCTX m_pDecThrCtx;
PWelsDecoderThreadCTX m_pLastDecThrCtx;
int32_t m_iLastBufferedIdx;
WELS_MUTEX m_csDecoder;
SWelsDecEvent m_sBufferingEvent;
SWelsDecEvent m_sReleaseBufferEvent;
SWelsDecSemphore m_sIsBusy;
SPictInfo m_sPictInfoList[16];
int32_t m_iPictInfoIndex;
int32_t m_iMinPOC;
int32_t m_iNumOfPicts;
int32_t m_iLastGOPRemainPicts;
int32_t m_LastWrittenPOC;
int32_t m_iLargestBufferedPicIndex;
SPictReoderingStatus m_sReoderingStatus;
PWelsDecoderThreadCTX m_pDecThrCtxActive[WELS_DEC_MAX_NUM_CPU];
SVlcTable m_sVlcTable;
SWelsLastDecPicInfo m_sLastDecPicInfo;
SDecoderStatistics m_sDecoderStatistics;// For real time debugging
private:
int32_t InitDecoder (const SDecodingParam* pParam);
void UninitDecoder (void);
int32_t ResetDecoder();
void ResetReorderingPictureBuffers();
int32_t InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam);
void UninitDecoderCtx (PWelsDecoderContext& pCtx);
int32_t ResetDecoder (PWelsDecoderContext& pCtx);
int32_t ThreadResetDecoder (PWelsDecoderContext& pCtx);
void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics);
DECODING_STATE ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo);
DECODING_STATE ReorderPicturesInDisplay (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
int ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
SBufferInfo* pDstInfo);
void BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
void ReleaseBufferedReadyPictureReorder (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo, bool isFlush = false);
void ReleaseBufferedReadyPictureNoReorder (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
void OpenDecoderThreads();
void CloseDecoderThreads();
#ifdef OUTPUT_BIT_STREAM
WelsFileHandle* m_pFBS;
WelsFileHandle* m_pFBSSize;
File diff suppressed because it is too large Load Diff
+31 -4
View File
@@ -22,6 +22,7 @@ DECODER_CPP_SRCS=\
$(DECODER_SRCDIR)/core/src/parse_mb_syn_cavlc.cpp\
$(DECODER_SRCDIR)/core/src/pic_queue.cpp\
$(DECODER_SRCDIR)/core/src/rec_mb.cpp\
$(DECODER_SRCDIR)/core/src/wels_decoder_thread.cpp\
$(DECODER_SRCDIR)/plus/src/welsDecoderExt.cpp\
DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.$(OBJ))
@@ -56,14 +57,40 @@ DECODER_OBJS += $(DECODER_OBJSARM64)
endif
OBJS += $(DECODER_OBJSARM64)
DECODER_ASM_MIPS_SRCS=\
DECODER_ASM_MIPS_MMI_SRCS=\
$(DECODER_SRCDIR)/core/mips/dct_mmi.c\
DECODER_OBJSMIPS += $(DECODER_ASM_MIPS_SRCS:.c=.$(OBJ))
DECODER_OBJSMIPS_MMI += $(DECODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
DECODER_ASM_MIPS_MSA_SRCS=\
DECODER_OBJSMIPS_MSA += $(DECODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
DECODER_OBJS += $(DECODER_OBJSMIPS)
ifeq ($(ENABLE_MMI), Yes)
DECODER_OBJS += $(DECODER_OBJSMIPS_MMI)
endif
ifeq ($(ENABLE_MSA), Yes)
DECODER_OBJS += $(DECODER_OBJSMIPS_MSA)
endif
endif
OBJS += $(DECODER_OBJSMIPS_MMI)
OBJS += $(DECODER_OBJSMIPS_MSA)
DECODER_ASM_LOONGARCH_LSX_SRCS=\
DECODER_OBJSLOONGARCH_LSX += $(DECODER_ASM_LOONGARCH_LSX_SRCS:.c=.$(OBJ))
DECODER_ASM_LOONGARCH_LASX_SRCS=\
DECODER_OBJSLOONGARCH_LASX += $(DECODER_ASM_LOONGARCH_LASX_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), loongarch)
ifeq ($(ENABLE_LSX), Yes)
DECODER_OBJS += $(DECODER_OBJSLOONGARCH_LSX)
endif
ifeq ($(ENABLE_LASX), Yes)
DECODER_OBJS += $(DECODER_OBJSLOONGARCH_LASX)
endif
endif
OBJS += $(DECODER_OBJSMIPS)
OBJS += $(DECODER_OBJS)
+1
View File
@@ -83,6 +83,7 @@ loop_0_get_i16x16_luma_pred_dc_both:
WELS_ASM_FUNC_END
.align 3
//The table for SIMD instruction {(8,7,6,5,4,3,2,1) * 5}
CONST0_GET_I16X16_LUMA_PRED_PLANE: .long 0x191e2328, 0x050a0f14
+5
View File
@@ -174,6 +174,11 @@ void WelsQuant4x4Dc_mmi (int16_t* pDct, int16_t iFF, int16_t iMF);
void WelsQuantFour4x4_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
void WelsQuantFour4x4Max_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
#endif//HAVE_MMI
#ifdef HAVE_LSX
void WelsQuantFour4x4Max_lsx (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
#endif//HAVE_LSX
#if defined(__cplusplus)
}
#endif//__cplusplus
+1 -1
View File
@@ -365,7 +365,7 @@ typedef struct TagWelsSvcCodingParam: SEncParamExt {
uiIntraPeriod = ((uiIntraPeriod + uiGopSize - 1) / uiGopSize) * uiGopSize;
if (((pCodingParam.iNumRefFrame != AUTO_REF_PIC_COUNT)
&& ((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT)))
&& !((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT)))
|| ((iNumRefFrame != AUTO_REF_PIC_COUNT) && (pCodingParam.iNumRefFrame == AUTO_REF_PIC_COUNT))) {
iNumRefFrame = pCodingParam.iNumRefFrame;
}
+9
View File
@@ -132,6 +132,15 @@ int32_t WelsSampleSatd8x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSatd16x16_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSatd4x4_mmi (uint8_t*, int32_t, uint8_t*, int32_t);
#endif//HAVE_MMI
#if defined (HAVE_LASX)
int32_t WelsSampleSatd4x4_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSatd8x8_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSatd16x8_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSatd8x16_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSatd16x16_lasx (uint8_t*, int32_t, uint8_t*, int32_t);
#endif
#if defined(__cplusplus)
}
#endif//__cplusplus
+110
View File
@@ -0,0 +1,110 @@
/*!
**********************************************************************************
* Copyright (c) 2021 Loongson Technology Corporation Limited
* Contributed by Lu Wang <wanglu@loongson.cn>
*
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* \file quant_lsx.c
*
* \brief Loongson optimization
*
* \date 12/10/2021 Created
*
**********************************************************************************
*/
#include "stdint.h"
#include "loongson_intrinsics.h"
void WelsQuantFour4x4Max_lsx (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax) {
int32_t k;
int16_t iMaxAbs;
__m128i vec_pDct1, vec_pDct2, vec_pDct3, vec_pDct4;
__m128i vec_pFF, vec_pMF, vec_iMaxAbs, tmp_iMaxAbs;
__m128i vec_pFF0, vec_pFF1, vec_pFF2, vec_pMF0, vec_pMF1, vec_pMF2;
__m128i vec_pDct10, vec_pDct11, vec_pDct12, vec_pDct20, vec_pDct21, vec_pDct22;
__m128i vec_iSign11, vec_iSign12, vec_iSign21, vec_iSign22;
__m128i vec_iSign31, vec_iSign32, vec_iSign41, vec_iSign42;
DUP2_ARG2(__lsx_vld, pFF, 0, pMF, 0, vec_pFF, vec_pMF);
DUP2_ARG2(__lsx_vsrai_h, vec_pFF, 15, vec_pMF, 15, vec_pFF0, vec_pMF0);
DUP2_ARG2(__lsx_vilvl_h, vec_pFF0, vec_pFF, vec_pMF0, vec_pMF, vec_pFF1, vec_pMF1);
DUP2_ARG2(__lsx_vilvh_h, vec_pFF0, vec_pFF, vec_pMF0, vec_pMF, vec_pFF2, vec_pMF2);
for (k = 0; k < 4; k++) {
iMaxAbs = 0;
vec_iMaxAbs = __lsx_vreplgr2vr_h(0);
DUP2_ARG2(__lsx_vld, pDct, 0, pDct + 8, 0, vec_pDct1, vec_pDct2);
DUP2_ARG2(__lsx_vsrai_h, vec_pDct1, 15, vec_pDct2, 15, vec_pDct10, vec_pDct20);
DUP2_ARG2(__lsx_vilvl_h, vec_pDct10, vec_pDct1, vec_pDct20, vec_pDct2, vec_pDct11,
vec_pDct21);
DUP2_ARG2(__lsx_vilvh_h, vec_pDct10, vec_pDct1, vec_pDct20, vec_pDct2, vec_pDct12,
vec_pDct22);
DUP4_ARG2(__lsx_vsrai_w, vec_pDct11, 31, vec_pDct12, 31, vec_pDct21, 31, vec_pDct22,
31, vec_iSign11, vec_iSign12, vec_iSign21, vec_iSign22);
vec_iSign31 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign11, vec_pDct11), vec_iSign11);
vec_iSign32 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign12, vec_pDct12), vec_iSign12);
vec_iSign41 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign21, vec_pDct21), vec_iSign21);
vec_iSign42 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign22, vec_pDct22), vec_iSign22);
DUP4_ARG2(__lsx_vadd_w, vec_pFF1, vec_iSign31, vec_pFF2, vec_iSign32, vec_pFF1,
vec_iSign41, vec_pFF2, vec_iSign42, vec_iSign31, vec_iSign32, vec_iSign41,
vec_iSign42);
DUP4_ARG2(__lsx_vmul_w, vec_pMF1, vec_iSign31, vec_pMF2, vec_iSign32, vec_pMF1,
vec_iSign41, vec_pMF2, vec_iSign42, vec_pDct11, vec_pDct12, vec_pDct21,
vec_pDct22);
DUP4_ARG2(__lsx_vsrai_w, vec_pDct11, 16, vec_pDct12, 16, vec_pDct21, 16, vec_pDct22,
16, vec_pDct11, vec_pDct12, vec_pDct21, vec_pDct22);
DUP4_ARG2(__lsx_vmax_w, vec_iMaxAbs, vec_pDct11, vec_iMaxAbs, vec_pDct12, vec_iMaxAbs,
vec_pDct21, vec_iMaxAbs, vec_pDct22, vec_iMaxAbs, vec_iMaxAbs, vec_iMaxAbs,
vec_iMaxAbs);
tmp_iMaxAbs = __lsx_vbsrl_v(vec_iMaxAbs, 8);
vec_iMaxAbs = __lsx_vmax_w(vec_iMaxAbs, tmp_iMaxAbs);
tmp_iMaxAbs = __lsx_vbsrl_v(vec_iMaxAbs, 4);
vec_iMaxAbs = __lsx_vmax_w(vec_iMaxAbs, tmp_iMaxAbs);
iMaxAbs = __lsx_vpickve2gr_h(vec_iMaxAbs, 0);
vec_pDct1 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign11, vec_pDct11), vec_iSign11);
vec_pDct2 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign12, vec_pDct12), vec_iSign12);
vec_pDct3 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign21, vec_pDct21), vec_iSign21);
vec_pDct4 = __lsx_vsub_w(__lsx_vxor_v(vec_iSign22, vec_pDct22), vec_iSign22);
DUP2_ARG2(__lsx_vpickev_h, vec_pDct2, vec_pDct1, vec_pDct4, vec_pDct3, vec_pDct1,
vec_pDct2);
__lsx_vst(vec_pDct1, pDct, 0);
__lsx_vst(vec_pDct2, pDct + 8, 0);
pDct += 16;
pMax[k] = iMaxAbs;
}
}
+6 -2
View File
@@ -134,7 +134,7 @@ static int32_t WelsCheckNumRefSetting (SLogContext* pLogCtx, SWelsSvcCodingParam
int32_t WelsCheckRefFrameLimitationNumRefFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam) {
if (WelsCheckNumRefSetting (pLogCtx, pParam, true)) {
if (WelsCheckNumRefSetting (pLogCtx, pParam, false)) {
// we take num-ref as the honored setting but it conflicts with temporal and LTR
return ENC_RETURN_UNSUPPORTED_PARA;
}
@@ -304,7 +304,11 @@ int32_t WelsWriteSpsSyntax (SWelsSPS* pSps, SBitStringAux* pBitStringAux, int32_
BsWriteUE (pLocalBitStringAux, pSps->iMbHeight - 1); // pic_height_in_map_units_minus1
BsWriteOneBit (pLocalBitStringAux, true/*pSps->bFrameMbsOnlyFlag*/); // bFrameMbsOnlyFlag
BsWriteOneBit (pLocalBitStringAux, 0/*pSps->bDirect8x8InferenceFlag*/); // direct_8x8_inference_flag
uint8_t d8x8 = 0;
if (pSps->iLevelIdc >= 30)
d8x8 = 1;
BsWriteOneBit (pLocalBitStringAux, d8x8/*pSps->bDirect8x8InferenceFlag*/); // direct_8x8_inference_flag
BsWriteOneBit (pLocalBitStringAux, pSps->bFrameCroppingFlag); // bFrameCroppingFlag
if (pSps->bFrameCroppingFlag) {
BsWriteUE (pLocalBitStringAux, pSps->sFrameCrop.iCropLeft); // frame_crop_left_offset
+18
View File
@@ -783,6 +783,11 @@ void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu)
*pfSetNZCZero = WelsNonZeroCount_mmi;
}
#endif
#if defined(HAVE_MSA)
if (iCpu & WELS_CPU_MSA) {
*pfSetNZCZero = WelsNonZeroCount_msa;
}
#endif
}
void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
@@ -860,6 +865,19 @@ void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) {
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
}
#endif//HAVE_MMI
#if defined(HAVE_MSA)
if (iCpu & WELS_CPU_MSA) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa;
pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa;
pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
}
#endif//HAVE_MSA
}
+21 -1
View File
@@ -464,7 +464,7 @@ int32_t WelsHadamardQuant2x2Skip_AArch64_neon (int16_t* pRes, int16_t iFF, int1
void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->pfCopy8x8Aligned = WelsCopy8x8_c;
pFuncList->pfCopy16x16Aligned =
pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c;
pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c;
pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_c;
pFuncList->pfCopy8x16Aligned = WelsCopy8x16_c;
pFuncList->pfCopy4x4 = WelsCopy4x4_c;
@@ -612,5 +612,25 @@ void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->pfDctFourT4 = WelsDctFourT4_mmi;
}
#endif//HAVE_MMI
#if defined(HAVE_MSA)
if (uiCpuFlag & WELS_CPU_MSA) {
pFuncList->pfCopy8x8Aligned = WelsCopy8x8_msa;
pFuncList->pfCopy8x16Aligned = WelsCopy8x16_msa;
pFuncList->pfCopy16x16Aligned =
pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_msa;
pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_msa;
}
#endif
#if defined(HAVE_LSX)
if (uiCpuFlag & WELS_CPU_LSX) {
pFuncList->pfCopy8x8Aligned = WelsCopy8x8_lsx;
pFuncList->pfCopy16x16Aligned = WelsCopy16x16_lsx;
pFuncList->pfCopy16x16NotAligned = WelsCopy16x16NotAligned_lsx;
pFuncList->pfQuantizationFour4x4Max = WelsQuantFour4x4Max_lsx;
}
#endif
}
}
+4 -2
View File
@@ -374,12 +374,12 @@ int32_t ParamValidation (SLogContext* pLogCtx, SWelsSvcCodingParam* pCfg) {
pCfg->bEnableFrameSkip);
if ((pCfg->iMaxQp <= 0) || (pCfg->iMinQp <= 0)) {
if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) {
WelsLog (pLogCtx, WELS_LOG_WARNING, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP,
WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP,
MAX_SCREEN_QP);
pCfg->iMinQp = MIN_SCREEN_QP;
pCfg->iMaxQp = MAX_SCREEN_QP;
} else {
WelsLog (pLogCtx, WELS_LOG_WARNING, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp,
WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp,
GOM_MIN_QP_MODE, MAX_LOW_BR_QP);
pCfg->iMinQp = GOM_MIN_QP_MODE;
pCfg->iMaxQp = MAX_LOW_BR_QP;
@@ -3616,6 +3616,8 @@ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSour
eNalRefIdc = NRI_PRI_LOWEST;
else if (1 + iCurTid == iDecompositionStages)
eNalRefIdc = NRI_PRI_LOW;
else if (2 + iCurTid == iDecompositionStages)
eNalRefIdc = NRI_PRI_HIGH;
else // more details for other temporal layers?
eNalRefIdc = NRI_PRI_HIGHEST;
pCtx->eNalType = eNalType;
@@ -628,7 +628,6 @@ bool WelsBuildRefList (sWelsEncCtx* pCtx, const int32_t iPOC, int32_t iBestLtrRe
WelsLog (& (pCtx->sLogCtx), WELS_LOG_DETAIL,
"WelsBuildRefList pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d",
pCtx->uiTemporalId, pRef->iFrameNum, pRef->uiTemporalId);
break;
}
}
}
+21
View File
@@ -490,6 +490,27 @@ void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_mmi;
}
#endif//HAVE_MMI
#if defined (HAVE_LASX)
if (uiCpuFlag & WELS_CPU_LASX) {
pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4] = WelsSampleSad4x4_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8] = WelsSampleSad16x8_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_lasx;
pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_lasx;
pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_lasx;
pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_lasx;
pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] = WelsSampleSatd4x4_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8] = WelsSampleSatd16x8_lasx;
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_lasx;
}
#endif
}
} // namespace WelsEnc
+1 -1
View File
@@ -101,7 +101,7 @@ void WelsSliceHeaderExtInit (sWelsEncCtx* pEncCtx, SDqLayer* pCurLayer, SSlice*
if (P_SLICE == pEncCtx->eSliceType) {
pCurSliceHeader->uiNumRefIdxL0Active = 1;
if (pCurSliceHeader->uiRefCount > 0 &&
pCurSliceHeader->uiRefCount < pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) {
pCurSliceHeader->uiRefCount <= pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) {
pCurSliceHeader->bNumRefIdxActiveOverrideFlag = true;
pCurSliceHeader->uiNumRefIdxL0Active = pCurSliceHeader->uiRefCount;
}
+45 -13
View File
@@ -33,19 +33,51 @@ cpp_sources = [
'plus/src/welsEncoderExt.cpp',
]
asm_sources = [
'core/x86/coeff.asm',
'core/x86/dct.asm',
'core/x86/intra_pred.asm',
'core/x86/matrix_transpose.asm',
'core/x86/memzero.asm',
'core/x86/quant.asm',
'core/x86/sample_sc.asm',
'core/x86/score.asm',
]
objs_asm = asm_gen.process(asm_sources)
objs_asm = []
if cpu_family in ['x86', 'x86_64']
asm_sources = [
'core/x86/coeff.asm',
'core/x86/dct.asm',
'core/x86/intra_pred.asm',
'core/x86/matrix_transpose.asm',
'core/x86/memzero.asm',
'core/x86/quant.asm',
'core/x86/sample_sc.asm',
'core/x86/score.asm',
]
objs_asm = asm_gen.process(asm_sources)
elif cpu_family == 'arm'
asm_sources = [
'core/arm/intra_pred_neon.S',
'core/arm/intra_pred_sad_3_opt_neon.S',
'core/arm/memory_neon.S',
'core/arm/pixel_neon.S',
'core/arm/reconstruct_neon.S',
'core/arm/svc_motion_estimation.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
elif cpu_family == 'aarch64'
asm_sources = [
'core/arm64/intra_pred_aarch64_neon.S',
'core/arm64/intra_pred_sad_3_opt_aarch64_neon.S',
'core/arm64/memory_aarch64_neon.S',
'core/arm64/pixel_aarch64_neon.S',
'core/arm64/reconstruct_aarch64_neon.S',
'core/arm64/svc_motion_estimation_aarch64_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
else
error('Unsupported cpu family @0@'.format(cpu_family))
endif
libencoder = static_library('encoder', cpp_sources, objs_asm,
include_directories: [inc, processing_inc, encoder_inc],
include_directories: [inc, processing_inc, encoder_inc, casm_inc],
dependencies: deps)
+31 -4
View File
@@ -82,16 +82,43 @@ ENCODER_OBJS += $(ENCODER_OBJSARM64)
endif
OBJS += $(ENCODER_OBJSARM64)
ENCODER_ASM_MIPS_SRCS=\
ENCODER_ASM_MIPS_MMI_SRCS=\
$(ENCODER_SRCDIR)/core/mips/dct_mmi.c\
$(ENCODER_SRCDIR)/core/mips/quant_mmi.c\
$(ENCODER_SRCDIR)/core/mips/score_mmi.c\
ENCODER_OBJSMIPS += $(ENCODER_ASM_MIPS_SRCS:.c=.$(OBJ))
ENCODER_OBJSMIPS_MMI += $(ENCODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
ENCODER_ASM_MIPS_MSA_SRCS=\
ENCODER_OBJSMIPS_MSA += $(ENCODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
ENCODER_OBJS += $(ENCODER_OBJSMIPS)
ifeq ($(ENABLE_MMI), Yes)
ENCODER_OBJS += $(ENCODER_OBJSMIPS_MMI)
endif
ifeq ($(ENABLE_MSA), Yes)
ENCODER_OBJS += $(ENCODER_OBJSMIPS_MSA)
endif
endif
OBJS += $(ENCODER_OBJSMIPS_MMI)
OBJS += $(ENCODER_OBJSMIPS_MSA)
ENCODER_ASM_LOONGARCH_LSX_SRCS=\
$(ENCODER_SRCDIR)/core/loongarch/quant_lsx.c\
ENCODER_OBJSLOONGARCH_LSX += $(ENCODER_ASM_LOONGARCH_LSX_SRCS:.c=.$(OBJ))
ENCODER_ASM_LOONGARCH_LASX_SRCS=\
ENCODER_OBJSLOONGARCH_LASX += $(ENCODER_ASM_LOONGARCH_LASX_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), loongarch)
ifeq ($(ENABLE_LSX), Yes)
ENCODER_OBJS += $(ENCODER_OBJSLOONGARCH_LSX)
endif
ifeq ($(ENABLE_LASX), Yes)
ENCODER_OBJS += $(ENCODER_OBJSLOONGARCH_LASX)
endif
endif
OBJS += $(ENCODER_OBJSMIPS)
OBJS += $(ENCODER_OBJS)
+4 -1
View File
@@ -2,5 +2,8 @@ subdir('common')
subdir('decoder')
subdir('encoder')
subdir('processing')
subdir('console')
if not ['android', 'ios'].contains(system)
# also disabled in the Makefile for these platforms
subdir('console')
endif
subdir('api')
+36 -8
View File
@@ -18,14 +18,42 @@ cpp_sources = [
'src/vaacalc/vaacalculation.cpp',
]
asm_sources = [
'src/x86/denoisefilter.asm',
'src/x86/downsample_bilinear.asm',
'src/x86/vaa.asm',
]
objs_asm = asm_gen.process(asm_sources)
objs_asm = []
if cpu_family in ['x86', 'x86_64']
asm_sources = [
'src/x86/denoisefilter.asm',
'src/x86/downsample_bilinear.asm',
'src/x86/vaa.asm',
]
objs_asm = asm_gen.process(asm_sources)
elif cpu_family == 'arm'
asm_sources = [
'src/arm/adaptive_quantization.S',
'src/arm/down_sample_neon.S',
'src/arm/pixel_sad_neon.S',
'src/arm/vaa_calc_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
elif cpu_family == 'aarch64'
asm_sources = [
'src/arm64/adaptive_quantization_aarch64_neon.S',
'src/arm64/down_sample_aarch64_neon.S',
'src/arm64/pixel_sad_aarch64_neon.S',
'src/arm64/vaa_calc_aarch64_neon.S',
]
if use_asm_gen
objs_asm = asm_gen.process(asm_sources)
else
cpp_sources += asm_sources
endif
else
error('Unsupported cpu family @0@'.format(cpu_family))
endif
libprocessing = static_library('processing', cpp_sources, objs_asm,
include_directories: [inc, processing_inc],
include_directories: [inc, processing_inc, casm_inc],
dependencies: deps)
+235
View File
@@ -0,0 +1,235 @@
/*!
**********************************************************************************
* Copyright (c) 2021 Loongson Technology Corporation Limited
* Contributed by Lu Wang <wanglu@loongson.cn>
*
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* \file vaa_lsx.c
*
* \brief Loongson optimization
*
* \date 12/10/2021 Created
*
**********************************************************************************
*/
#include "stdint.h"
#include "loongson_intrinsics.h"
void VAACalcSadBgd_lsx (const uint8_t* pCurData, const uint8_t* pRefData,
int32_t iPicWidth, int32_t iPicHeight, int32_t iPicStride,
int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8,
uint8_t* pMad8x8) {
const uint8_t* tmp_ref = pRefData;
const uint8_t* tmp_cur = pCurData;
int32_t iMbWidth = (iPicWidth >> 4);
int32_t mb_height = (iPicHeight >> 4);
int32_t mb_index = 0;
int32_t pic_stride_x8 = iPicStride << 3;
int32_t step = (iPicStride << 4) - iPicWidth;
*pFrameSad = 0;
for (int32_t i = 0; i < mb_height; i++) {
for (int32_t j = 0; j < iMbWidth; j++) {
int32_t k;
int32_t l_sad, l_sd, l_mad;
const uint8_t* tmp_cur_row;
const uint8_t* tmp_ref_row;
int32_t tmp_mb_index = mb_index << 2;
int32_t tmp_mb_index1 = tmp_mb_index + 1;
int32_t tmp_mb_index2 = tmp_mb_index + 2;
int32_t tmp_mb_index3 = tmp_mb_index + 3;
__m128i cur, ref;
__m128i vec_diff, vec_abs_diff, tmp_l_sd, tmp_l_sad, tmp_l_mad;
__m128i zero = __lsx_vreplgr2vr_b(0);
__m128i vec_l_sd = zero;
__m128i vec_l_sad = zero;
__m128i vec_l_mad = zero;
l_mad = l_sd = l_sad = 0;
tmp_cur_row = tmp_cur;
tmp_ref_row = tmp_ref;
for (k = 0; k < 8; k ++) {
DUP2_ARG2(__lsx_vld, tmp_cur_row, 0, tmp_ref_row, 0, cur, ref);
DUP2_ARG2(__lsx_vilvl_b, zero, cur, zero, ref, cur, ref);
vec_diff = __lsx_vsub_h(cur, ref);
vec_l_sd = __lsx_vadd_h(vec_l_sd, vec_diff);
vec_abs_diff = __lsx_vabsd_h(cur, ref);
vec_l_sad = __lsx_vadd_h(vec_l_sad, vec_abs_diff);
vec_l_mad = __lsx_vmax_h(vec_l_mad, vec_abs_diff);
tmp_cur_row += iPicStride;
tmp_ref_row += iPicStride;
}
DUP2_ARG2(__lsx_vhaddw_w_h, vec_l_sd, vec_l_sd, vec_l_sad, vec_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_d_w, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_q_d, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vpickve2gr_d, tmp_l_sd, 0, tmp_l_sad, 0, l_sd, l_sad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 8);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 4);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 2);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
l_mad = __lsx_vpickve2gr_h(vec_l_mad, 0);
*pFrameSad += l_sad;
pSad8x8[tmp_mb_index] = l_sad;
pSd8x8[tmp_mb_index] = l_sd;
pMad8x8[tmp_mb_index] = l_mad;
l_mad = l_sd = l_sad = 0;
tmp_cur_row = tmp_cur + 8;
tmp_ref_row = tmp_ref + 8;
vec_l_sd = vec_l_sad = vec_l_mad = zero;
for (k = 0; k < 8; k ++) {
DUP2_ARG2(__lsx_vld, tmp_cur_row, 0, tmp_ref_row, 0, cur, ref);
DUP2_ARG2(__lsx_vilvl_b, zero, cur, zero, ref, cur, ref);
vec_diff = __lsx_vsub_h(cur, ref);
vec_l_sd = __lsx_vadd_h(vec_l_sd, vec_diff);
vec_abs_diff = __lsx_vabsd_h(cur, ref);
vec_l_sad = __lsx_vadd_h(vec_l_sad, vec_abs_diff);
vec_l_mad = __lsx_vmax_h(vec_l_mad, vec_abs_diff);
tmp_cur_row += iPicStride;
tmp_ref_row += iPicStride;
}
DUP2_ARG2(__lsx_vhaddw_w_h, vec_l_sd, vec_l_sd, vec_l_sad, vec_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_d_w, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_q_d, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vpickve2gr_d, tmp_l_sd, 0, tmp_l_sad, 0, l_sd, l_sad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 8);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 4);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 2);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
l_mad = __lsx_vpickve2gr_h(vec_l_mad, 0);
*pFrameSad += l_sad;
pSad8x8[tmp_mb_index1] = l_sad;
pSd8x8[tmp_mb_index1] = l_sd;
pMad8x8[tmp_mb_index1] = l_mad;
l_mad = l_sd = l_sad = 0;
tmp_cur_row = tmp_cur + pic_stride_x8;
tmp_ref_row = tmp_ref + pic_stride_x8;
vec_l_sd = vec_l_sad = vec_l_mad = zero;
for (k = 0; k < 8; k ++) {
DUP2_ARG2(__lsx_vld, tmp_cur_row, 0, tmp_ref_row, 0, cur, ref);
DUP2_ARG2(__lsx_vilvl_b, zero, cur, zero, ref, cur, ref);
vec_diff = __lsx_vsub_h(cur, ref);
vec_l_sd = __lsx_vadd_h(vec_l_sd, vec_diff);
vec_abs_diff = __lsx_vabsd_h(cur, ref);
vec_l_sad = __lsx_vadd_h(vec_l_sad, vec_abs_diff);
vec_l_mad = __lsx_vmax_h(vec_l_mad, vec_abs_diff);
tmp_cur_row += iPicStride;
tmp_ref_row += iPicStride;
}
DUP2_ARG2(__lsx_vhaddw_w_h, vec_l_sd, vec_l_sd, vec_l_sad, vec_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_d_w, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_q_d, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vpickve2gr_d, tmp_l_sd, 0, tmp_l_sad, 0, l_sd, l_sad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 8);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 4);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 2);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
l_mad = __lsx_vpickve2gr_h(vec_l_mad, 0);
*pFrameSad += l_sad;
pSad8x8[tmp_mb_index2] = l_sad;
pSd8x8[tmp_mb_index2] = l_sd;
pMad8x8[tmp_mb_index2] = l_mad;
l_mad = l_sd = l_sad = 0;
tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
vec_l_sd = vec_l_sad = vec_l_mad = zero;
for (k = 0; k < 8; k ++) {
DUP2_ARG2(__lsx_vld, tmp_cur_row, 0, tmp_ref_row, 0, cur, ref);
DUP2_ARG2(__lsx_vilvl_b, zero, cur, zero, ref, cur, ref);
vec_diff = __lsx_vsub_h(cur, ref);
vec_l_sd = __lsx_vadd_h(vec_l_sd, vec_diff);
vec_abs_diff = __lsx_vabsd_h(cur, ref);
vec_l_sad = __lsx_vadd_h(vec_l_sad, vec_abs_diff);
vec_l_mad = __lsx_vmax_h(vec_l_mad, vec_abs_diff);
tmp_cur_row += iPicStride;
tmp_ref_row += iPicStride;
}
DUP2_ARG2(__lsx_vhaddw_w_h, vec_l_sd, vec_l_sd, vec_l_sad, vec_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_d_w, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vhaddw_q_d, tmp_l_sd, tmp_l_sd, tmp_l_sad, tmp_l_sad,
tmp_l_sd, tmp_l_sad);
DUP2_ARG2(__lsx_vpickve2gr_d, tmp_l_sd, 0, tmp_l_sad, 0, l_sd, l_sad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 8);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 4);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
tmp_l_mad = __lsx_vbsrl_v(vec_l_mad, 2);
vec_l_mad = __lsx_vmax_h(vec_l_mad, tmp_l_mad);
l_mad = __lsx_vpickve2gr_h(vec_l_mad, 0);
*pFrameSad += l_sad;
pSad8x8[tmp_mb_index3] = l_sad;
pSd8x8[tmp_mb_index3] = l_sd;
pMad8x8[tmp_mb_index3] = l_mad;
tmp_ref += 16;
tmp_cur += 16;
++mb_index;
}
tmp_ref += step;
tmp_cur += step;
}
}
@@ -89,6 +89,18 @@ class CSceneChangeDetectorVideo {
}
#endif
#ifdef HAVE_MMI
if (iCpuFlag & WELS_CPU_MMI) {
m_pfSad = WelsSampleSad8x8_mmi;
}
#endif
#ifdef HAVE_LASX
if (iCpuFlag & WELS_CPU_LASX) {
m_pfSad = WelsSampleSad8x8_lasx;
}
#endif
m_fSceneChangeMotionRatioLarge = SCENE_CHANGE_MOTION_RATIO_LARGE_VIDEO;
m_fSceneChangeMotionRatioMedium = SCENE_CHANGE_MOTION_RATIO_MEDIUM;
}
@@ -103,6 +103,12 @@ void CVAACalculation::InitVaaFuncs (SVaaFuncs& sVaaFuncs, int32_t iCpuFlag) {
sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_mmi;
}
#endif//HAVE_MMI
#ifdef HAVE_LSX
if ((iCpuFlag & WELS_CPU_LSX) == WELS_CPU_LSX) {
sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_lsx;
}
#endif//HAVE_LSX
}
EResult CVAACalculation::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) {
@@ -142,6 +142,12 @@ VAACalcSadSsdFunc VAACalcSadSsd_mmi;
WELSVP_EXTERN_C_END
#endif
#ifdef HAVE_LSX
WELSVP_EXTERN_C_BEGIN
VAACalcSadBgdFunc VAACalcSadBgd_lsx;
WELSVP_EXTERN_C_END
#endif
class CVAACalculation : public IStrategy {
public:
CVAACalculation (int32_t iCpuFlag);
+31 -4
View File
@@ -58,14 +58,41 @@ PROCESSING_OBJS += $(PROCESSING_OBJSARM64)
endif
OBJS += $(PROCESSING_OBJSARM64)
PROCESSING_ASM_MIPS_SRCS=\
PROCESSING_ASM_MIPS_MMI_SRCS=\
$(PROCESSING_SRCDIR)/src/mips/vaa_mmi.c\
PROCESSING_OBJSMIPS += $(PROCESSING_ASM_MIPS_SRCS:.c=.$(OBJ))
PROCESSING_OBJSMIPS_MMI += $(PROCESSING_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
PROCESSING_ASM_MIPS_MSA_SRCS=\
PROCESSING_OBJSMIPS_MSA += $(PROCESSING_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), mips)
PROCESSING_OBJS += $(PROCESSING_OBJSMIPS)
ifeq ($(ENABLE_MMI), Yes)
PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MMI)
endif
ifeq ($(ENABLE_MSA), Yes)
PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MSA)
endif
endif
OBJS += $(PROCESSING_OBJSMIPS_MMI)
OBJS += $(PROCESSING_OBJSMIPS_MSA)
PROCESSING_ASM_LOONGARCH_LSX_SRCS=\
$(PROCESSING_SRCDIR)/src/loongarch/vaa_lsx.c\
PROCESSING_OBJSLOONGARCH_LSX += $(PROCESSING_ASM_LOONGARCH_LSX_SRCS:.c=.$(OBJ))
PROCESSING_ASM_LOONGARCH_LASX_SRCS=\
PROCESSING_OBJSLOONGARCH_LASX += $(PROCESSING_ASM_LOONGARCH_LASX_SRCS:.c=.$(OBJ))
ifeq ($(ASM_ARCH), loongarch)
ifeq ($(ENABLE_LSX), Yes)
PROCESSING_OBJS += $(PROCESSING_OBJSLOONGARCH_LSX)
endif
ifeq ($(ENABLE_LASX), Yes)
PROCESSING_OBJS += $(PROCESSING_OBJSLOONGARCH_LASX)
endif
endif
OBJS += $(PROCESSING_OBJSMIPS)
OBJS += $(PROCESSING_OBJS)
+1 -1
View File
@@ -4,5 +4,5 @@ foreach header : api_headers
api_header_deps += configure_file(
input : header[1],
output : header[0],
configuration : configuration_data())
copy : true)
endforeach
+58
View File
@@ -0,0 +1,58 @@
#ifndef __BASETHREADDECODERTEST_H__
#define __BASETHREADDECODERTEST_H__
#include "test_stdint.h"
#include <limits.h>
#include <fstream>
#include "codec_api.h"
#include "utils/BufferedData.h"
class BaseThreadDecoderTest {
public:
struct Plane {
const uint8_t* data;
int width;
int height;
int stride;
};
struct Frame {
Plane y;
Plane u;
Plane v;
};
typedef enum tagDecodeStatus {
OpenFile,
Decoding,
EndOfStream,
End
} eDecodeStatus;
struct Callback {
virtual void onDecodeFrame (const Frame& frame) = 0;
};
BaseThreadDecoderTest();
int32_t SetUp();
void TearDown();
bool ThreadDecodeFile (const char* fileName, Callback* cbk);
bool Open (const char* fileName);
ISVCDecoder* decoder_;
private:
void DecodeFrame (const uint8_t* src, size_t sliceSize, Callback* cbk);
void FlushFrame (Callback* cbk);
std::ifstream file_;
SBufferInfo sBufInfo;
uint8_t* pData[3];
uint64_t uiTimeStamp;
FILE* pYuvFile;
bool bEnableYuvDumpTest;
eDecodeStatus decodeStatus_;
};
#endif //__BASETHREADDECODERTEST_H__
+319
View File
@@ -0,0 +1,319 @@
#include <fstream>
#include <gtest/gtest.h>
#include "codec_def.h"
#include "codec_app_def.h"
#include "utils/BufferedData.h"
#include "BaseThreadDecoderTest.h"
static int32_t readBit (uint8_t* pBufPtr, int32_t& curBit) {
int nIndex = curBit / 8;
int nOffset = curBit % 8 + 1;
curBit++;
return (pBufPtr[nIndex] >> (8 - nOffset)) & 0x01;
}
static int32_t readBits (uint8_t* pBufPtr, int32_t& n, int32_t& curBit) {
int r = 0;
int i;
for (i = 0; i < n; i++) {
r |= (readBit (pBufPtr, curBit) << (n - i - 1));
}
return r;
}
static int32_t bsGetUe (uint8_t* pBufPtr, int32_t& curBit) {
int r = 0;
int i = 0;
while ((readBit (pBufPtr, curBit) == 0) && (i < 32)) {
i++;
}
r = readBits (pBufPtr, i, curBit);
r += (1 << i) - 1;
return r;
}
static int32_t readFirstMbInSlice (uint8_t* pSliceNalPtr) {
int32_t curBit = 0;
int32_t firstMBInSlice = bsGetUe (pSliceNalPtr + 1, curBit);
return firstMBInSlice;
}
static int32_t ReadFrame (uint8_t* pBuf, const int32_t& iFileSize, const int32_t& bufPos) {
int32_t bytes_available = iFileSize - bufPos;
if (bytes_available < 4) {
return bytes_available;
}
uint8_t* ptr = pBuf + bufPos;
int32_t read_bytes = 0;
int32_t sps_count = 0;
int32_t pps_count = 0;
int32_t non_idr_pict_count = 0;
int32_t idr_pict_count = 0;
int32_t nal_deliminator = 0;
while (read_bytes < bytes_available - 4) {
bool has4ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 1;
bool has3ByteStartCode = false;
if (!has4ByteStartCode) {
has3ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 1;
}
if (has4ByteStartCode || has3ByteStartCode) {
int32_t byteOffset = has4ByteStartCode ? 4 : 3;
uint8_t nal_unit_type = has4ByteStartCode ? (ptr[4] & 0x1F) : (ptr[3] & 0x1F);
if (nal_unit_type == 1) {
int32_t firstMBInSlice = readFirstMbInSlice (ptr + byteOffset);
if (++non_idr_pict_count >= 1 && idr_pict_count >= 1 && firstMBInSlice == 0) {
return read_bytes;
}
if (non_idr_pict_count >= 2 && firstMBInSlice == 0) {
return read_bytes;
}
} else if (nal_unit_type == 5) {
int32_t firstMBInSlice = readFirstMbInSlice (ptr + byteOffset);
if (++idr_pict_count >= 1 && non_idr_pict_count >= 1 && firstMBInSlice == 0) {
return read_bytes;
}
if (idr_pict_count >= 2 && firstMBInSlice == 0) {
return read_bytes;
}
} else if (nal_unit_type == 7) {
if ((++sps_count >= 1) && (non_idr_pict_count >= 1 || idr_pict_count >= 1)) {
return read_bytes;
}
if (sps_count == 2) return read_bytes;
} else if (nal_unit_type == 8) {
if (++pps_count >= 1 && (non_idr_pict_count >= 1 || idr_pict_count >= 1)) return read_bytes;
} else if (nal_unit_type == 9) {
if (++nal_deliminator == 2) {
return read_bytes;
}
}
if (read_bytes >= bytes_available - 4) {
return bytes_available;
}
read_bytes += 4;
ptr += 4;
} else {
++ptr;
++read_bytes;
}
}
return bytes_available;
}
static void Write2File (FILE* pFp, unsigned char* pData[3], int iStride[2], int iWidth, int iHeight) {
int i;
unsigned char* pPtr = NULL;
pPtr = pData[0];
for (i = 0; i < iHeight; i++) {
fwrite (pPtr, 1, iWidth, pFp);
pPtr += iStride[0];
}
iHeight = iHeight / 2;
iWidth = iWidth / 2;
pPtr = pData[1];
for (i = 0; i < iHeight; i++) {
fwrite (pPtr, 1, iWidth, pFp);
pPtr += iStride[1];
}
pPtr = pData[2];
for (i = 0; i < iHeight; i++) {
fwrite (pPtr, 1, iWidth, pFp);
pPtr += iStride[1];
}
}
static void Process (SBufferInfo* pInfo, FILE* pFp) {
if (pFp && pInfo->pDst[0] && pInfo->pDst[1] && pInfo->pDst[2] && pInfo) {
int iStride[2];
int iWidth = pInfo->UsrData.sSystemBuffer.iWidth;
int iHeight = pInfo->UsrData.sSystemBuffer.iHeight;
iStride[0] = pInfo->UsrData.sSystemBuffer.iStride[0];
iStride[1] = pInfo->UsrData.sSystemBuffer.iStride[1];
Write2File (pFp, (unsigned char**)pInfo->pDst, iStride, iWidth, iHeight);
}
}
BaseThreadDecoderTest::BaseThreadDecoderTest()
: decoder_ (NULL), uiTimeStamp (0), pYuvFile (NULL), bEnableYuvDumpTest (false), decodeStatus_ (OpenFile) {
}
int32_t BaseThreadDecoderTest::SetUp() {
long rv = WelsCreateDecoder (&decoder_);
EXPECT_EQ (0, rv);
EXPECT_TRUE (decoder_ != NULL);
if (decoder_ == NULL) {
return rv;
}
SDecodingParam decParam;
memset (&decParam, 0, sizeof (SDecodingParam));
decParam.uiTargetDqLayer = UCHAR_MAX;
decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
int iThreadCount = (rand() % 2) + 2;
decoder_->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
rv = decoder_->Initialize (&decParam);
EXPECT_EQ (0, rv);
return (int32_t)rv;
}
void BaseThreadDecoderTest::TearDown() {
if (decoder_ != NULL) {
decoder_->Uninitialize();
WelsDestroyDecoder (decoder_);
}
}
void BaseThreadDecoderTest::DecodeFrame (const uint8_t* src, size_t sliceSize, Callback* cbk) {
SBufferInfo bufInfo;
memset (pData, 0, sizeof (pData));
memset (&bufInfo, 0, sizeof (SBufferInfo));
bufInfo.uiInBsTimeStamp = ++uiTimeStamp;
DECODING_STATE rv = decoder_->DecodeFrameNoDelay (src, (int) sliceSize, pData, &bufInfo);
ASSERT_TRUE (rv == dsErrorFree);
sBufInfo = bufInfo;
if (sBufInfo.iBufferStatus == 1 && cbk != NULL) {
if (bEnableYuvDumpTest) {
Process (&sBufInfo, pYuvFile);
}
const Frame frame = {
{
// y plane
sBufInfo.pDst[0],
bufInfo.UsrData.sSystemBuffer.iWidth,
bufInfo.UsrData.sSystemBuffer.iHeight,
bufInfo.UsrData.sSystemBuffer.iStride[0]
},
{
// u plane
sBufInfo.pDst[1],
sBufInfo.UsrData.sSystemBuffer.iWidth / 2,
sBufInfo.UsrData.sSystemBuffer.iHeight / 2,
sBufInfo.UsrData.sSystemBuffer.iStride[1]
},
{
// v plane
sBufInfo.pDst[2],
sBufInfo.UsrData.sSystemBuffer.iWidth / 2,
sBufInfo.UsrData.sSystemBuffer.iHeight / 2,
sBufInfo.UsrData.sSystemBuffer.iStride[1]
},
};
cbk->onDecodeFrame (frame);
}
}
void BaseThreadDecoderTest::FlushFrame (Callback* cbk) {
SBufferInfo bufInfo;
memset (pData, 0, sizeof (pData));
memset (&bufInfo, 0, sizeof (SBufferInfo));
DECODING_STATE rv = decoder_->FlushFrame (pData, &bufInfo);
ASSERT_TRUE (rv == dsErrorFree);
sBufInfo = bufInfo;
if (sBufInfo.iBufferStatus == 1 && cbk != NULL) {
if (bEnableYuvDumpTest) {
Process (&sBufInfo, pYuvFile);
}
const Frame frame = {
{
// y plane
sBufInfo.pDst[0],
sBufInfo.UsrData.sSystemBuffer.iWidth,
sBufInfo.UsrData.sSystemBuffer.iHeight,
sBufInfo.UsrData.sSystemBuffer.iStride[0]
},
{
// u plane
sBufInfo.pDst[1],
sBufInfo.UsrData.sSystemBuffer.iWidth / 2,
sBufInfo.UsrData.sSystemBuffer.iHeight / 2,
sBufInfo.UsrData.sSystemBuffer.iStride[1]
},
{
// v plane
sBufInfo.pDst[2],
sBufInfo.UsrData.sSystemBuffer.iWidth / 2,
sBufInfo.UsrData.sSystemBuffer.iHeight / 2,
sBufInfo.UsrData.sSystemBuffer.iStride[1]
},
};
cbk->onDecodeFrame (frame);
}
}
bool BaseThreadDecoderTest::ThreadDecodeFile (const char* fileName, Callback* cbk) {
std::ifstream file (fileName, std::ios::in | std::ios::binary);
if (!file.is_open())
return false;
BufferedData buf;
char b;
for (;;) {
file.read (&b, 1);
if (file.gcount() != 1) { // end of file
break;
}
if (!buf.PushBack (b)) {
std::cout << "unable to allocate memory" << std::endl;
return false;
}
}
std::string outFileName = std::string (fileName);
size_t pos = outFileName.find_last_of (".");
if (bEnableYuvDumpTest) {
outFileName = outFileName.substr (0, pos) + std::string (".yuv");
pYuvFile = fopen (outFileName.c_str(), "wb");
}
uiTimeStamp = 0;
memset (&sBufInfo, 0, sizeof (SBufferInfo));
int32_t bufPos = 0;
int32_t bytesConsumed = 0;
int32_t fileSize = (int32_t)buf.Length();
while (bytesConsumed < fileSize) {
int32_t frameSize = ReadFrame (buf.data(), fileSize, bufPos);
if (::testing::Test::HasFatalFailure()) {
return false;
}
uint8_t* frame_ptr = buf.data() + bufPos;
DecodeFrame (frame_ptr, frameSize, cbk);
if (::testing::Test::HasFatalFailure()) {
return false;
}
bufPos += frameSize;
bytesConsumed += frameSize;
}
int32_t iEndOfStreamFlag = 1;
decoder_->SetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
// Flush out last frames in decoder buffer
int32_t num_of_frames_in_buffer = 0;
decoder_->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
FlushFrame (cbk);
}
if (bEnableYuvDumpTest) {
fclose (pYuvFile);
}
return true;
}
bool BaseThreadDecoderTest::Open (const char* fileName) {
if (decodeStatus_ == OpenFile) {
file_.open (fileName, std::ios_base::out | std::ios_base::binary);
if (file_.is_open()) {
decodeStatus_ = Decoding;
return true;
}
}
return false;
}
+3 -3
View File
@@ -781,9 +781,9 @@ const uint32_t kiFrameNum = 100; //DO NOT CHANGE!
const char* const pHashStr[][2] = { //DO NOT CHANGE!
// Allow for different output depending on whether averaging is done
// vertically or horizontally first when downsampling.
{ "caaaa3352ab8614e3a35836f5d7c9a528294e953", "326cc236e9ba5277aedc5cf0865dd4cbd2f89fe0" },
{ "2dc97661e94515d9947a344127062f82814afc2a", "72f36bb33d190979be88077c6166a09767dd2992" },
{ "106ec96a90412aabea5c0cfa6bfc654a0b5db33e", "998c2947bccf140bde1e43e29376614038eb7c71" }
{ "d5fb6d72f8cc0ea4b037e883598c162fd32b475d", "0fc7e06d0d766ac911730da2aa9e953bc858a161" },
{ "17203f07486e895aef7c1bf94133fd731caba572", "1d47de674c9c44d8292ee00fa053a42bb9383614" },
{ "86bf890aef2abe24abe40ebe3d9ec76a25ddebe7", "43eaac708413c109ca120c5d570176f1c9b4036c" }
};
class DecodeParseAPI : public ::testing::TestWithParam<EncodeDecodeFileParamBase>, public EncodeDecodeTestBase {
+2 -2
View File
@@ -130,8 +130,8 @@ TEST_P (DecodeEncodeTest, CompareOutput) {
}
}
static const DecodeEncodeFileParam kFileParamArray[] = {
{"res/test_vd_1d.264", "f25b59215ffd34b1c6509d821a7392eb2247a107", 320, 192, 12.0f},
{"res/test_vd_rc.264", "345d376f395225a4121285c3c4fca145c9fd5fbd", 320, 192, 12.0f},
{"res/test_vd_1d.264", "47cdeeb156991a61af309f4145b23643556d35a2", 320, 192, 12.0f},
{"res/test_vd_rc.264", "37f9f80c7330ab35f611c6cb6d009c2f1e7815ab", 320, 192, 12.0f},
};

Some files were not shown because too many files have changed in this diff Show More