Bug 583958 - Add NEON detection to SSE.h - r=jmuizelaar

--HG--
extra : rebase_source : 2ae9de7e5a914e7ea30c3ef432edfa100027d656
This commit is contained in:
Tim Terribe 2011-04-09 06:57:52 +12:00
parent 61d4db165f
commit 21090ba813
7 changed files with 392 additions and 70 deletions

View File

@ -38,9 +38,7 @@
#include "ycbcr_to_rgb565.h"
//The logic for have_ycbcr_to_rgb565 is taken from pixman-cpu.c
#if !defined (HAVE_ARM_NEON)
#if !defined (MOZILLA_MAY_SUPPORT_NEON)
int have_ycbcr_to_rgb565 ()
{
@ -49,72 +47,9 @@ int have_ycbcr_to_rgb565 ()
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <elf.h>
#ifdef ANDROID
int have_ycbcr_to_rgb565 ()
{
static int have_ycbcr_to_rgb565_initialized = 0;
static int arm_has_neon = 0;
if (!have_ycbcr_to_rgb565_initialized)
{
have_ycbcr_to_rgb565_initialized = 1;
char buf[1024];
const char* ver_token = "CPU architecture: ";
FILE* f = fopen("/proc/cpuinfo", "r");
if (!f) {
return 0;
}
fread(buf, sizeof(char), 1024, f);
arm_has_neon = strstr(buf, "neon") != NULL;
fclose(f);
}
return arm_has_neon;
return mozilla::supports_neon();
}
#else
int have_ycbcr_to_rgb565 ()
{
static int have_ycbcr_to_rgb565_initialized = 0;
static int arm_has_neon = 0;
if (!have_ycbcr_to_rgb565_initialized)
{
have_ycbcr_to_rgb565_initialized = 1;
int fd;
Elf32_auxv_t aux;
fd = open ("/proc/self/auxv", O_RDONLY);
if (fd >= 0)
{
while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
{
if (aux.a_type == AT_HWCAP)
{
uint32_t hwcap = aux.a_un.a_val;
arm_has_neon = (hwcap & 4096) != 0;
break;
}
}
close (fd);
}
}
return arm_has_neon;
}
#endif //ANDROID
#endif //_MSC_VER
#endif //MOZILLA_MAY_SUPPORT_NEON

View File

@ -1,5 +1,10 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef MEDIA_BASE_YCBCR_TO_RGB565_H_
#define MEDIA_BASE_YCBCR_TO_RGB565_H_
#include "mozilla/arm.h"
int have_ycbcr_to_rgb565();
#endif // MEDIA_BASE_YCBCR_TO_RGB565_H_

View File

@ -132,6 +132,7 @@ EXPORTS_mozilla = \
Monitor.h \
Mutex.h \
SSE.h \
arm.h \
unused.h \
$(NULL)

200
xpcom/glue/arm.cpp Normal file
View File

@ -0,0 +1,200 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is arm.cpp
*
* The Initial Developer of the Original Code is the Mozilla Foundation.
* Portions created by the Initial Developer are Copyright (C) 2011
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Timothy B. Terriberry <tterriberry@mozilla.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* compile-time and runtime tests for whether to use various ARM extensions */
#include "arm.h"
namespace {
// arm.h has parallel #ifs which declare MOZILLA_ARM_HAVE_CPUID_DETECTION.
// We don't check it here so that we get compile errors if it's defined, but
// we don't compile one of these detection methods. The detection code here is
// based on the CPU detection in libtheora.
#if defined(_MSC_VER)
//For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.
# define WIN32_LEAN_AND_MEAN
# define WIN32_EXTRA_LEAN
# include <windows.h>
static bool
check_edsp(void)
{
# if defined(MOZILLA_MAY_SUPPORT_EDSP)
__try
{
//PLD [r13]
__emit(0xF5DDF000);
return true;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION)
{
//Ignore exception.
}
# endif
return false;
}
static bool
check_armv6(void)
{
# if defined(MOZILLA_MAY_SUPPORT_ARMV6)
__try
{
//SHADD8 r3,r3,r3
__emit(0xE6333F93);
return true;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION)
{
//Ignore exception.
}
# endif
return false;
}
static bool
check_neon(void)
{
# if defined(MOZILLA_MAY_SUPPORT_NEON)
__try
{
//VORR q0,q0,q0
__emit(0xF2200150);
return true;
}
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION)
{
//Ignore exception.
}
# endif
return false;
}
#elif defined(__linux__) || defined(ANDROID)
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
enum{
MOZILLA_HAS_EDSP_FLAG=1,
MOZILLA_HAS_ARMV6_FLAG=2,
MOZILLA_HAS_NEON_FLAG=4
};
static unsigned
get_arm_cpu_flags(void)
{
unsigned flags;
FILE *fin;
flags = 0;
/*Reading /proc/self/auxv would be easier, but that doesn't work reliably on
Android. This also means that detection will fail in Scratchbox, which is
desirable, as NEON does not work in the qemu shipped with the Maemo 5 SDK.
I don't know if /proc/self/auxv would do any better in that case, anyway,
or if it would return random flags from the host CPU.*/
fin = fopen ("/proc/cpuinfo","r");
if (fin != NULL)
{
/*512 should be enough for anybody (it's even enough for all the flags that
x86 has accumulated... so far).*/
char buf[512];
while (fgets(buf, 511, fin) != NULL)
{
if (memcmp(buf, "Features", 8) == 0)
{
char *p;
p = strstr(buf, " edsp");
if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= MOZILLA_HAS_EDSP_FLAG;
p = strstr(buf, " neon");
if( p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= MOZILLA_HAS_NEON_FLAG;
}
if (memcmp(buf, "CPU architecture:", 17) == 0)
{
int version;
version = atoi(buf + 17);
if (version >= 6)
flags |= MOZILLA_HAS_ARMV6_FLAG;
}
}
fclose(fin);
}
return flags;
}
// Cache a local copy so we only have to read /proc/cpuinfo once.
static unsigned arm_cpu_flags = get_arm_cpu_flags();
static bool
check_edsp(void)
{
return (arm_cpu_flags & MOZILLA_HAS_EDSP_FLAG) != 0;
}
static bool
check_armv6(void)
{
return (arm_cpu_flags & MOZILLA_HAS_ARMV6_FLAG) != 0;
}
static bool
check_neon(void)
{
return (arm_cpu_flags & MOZILLA_HAS_NEON_FLAG) != 0;
}
#endif
}
namespace mozilla {
namespace arm_private {
#if defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
#if !defined(MOZILLA_PRESUME_EDSP)
bool edsp_enabled = check_edsp();
#endif
#if !defined(MOZILLA_PRESUME_ARMV6)
bool armv6_enabled = check_armv6();
#endif
#if !defined(MOZILLA_PRESUME_NEON)
bool neon_enabled = check_neon();
#endif
#endif
} // namespace arm_private
} // namespace mozilla

180
xpcom/glue/arm.h Normal file
View File

@ -0,0 +1,180 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is arm.h
*
* The Initial Developer of the Original Code is the Mozilla Foundation.
* Portions created by the Initial Developer are Copyright (C) 2011
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Timothy B. Terriberry <tterriberry@mozilla.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/* compile-time and runtime tests for whether to use SSE instructions */
#ifndef mozilla_arm_h_
#define mozilla_arm_h_
// for definition of NS_COM_GLUE
#include "nscore.h"
/* This is patterned after SSE.h, but provides ARMv5E, ARMv6, and NEON
detection. For reasons similar to the SSE code, code using NEON (even just
in inline asm) needs to be in a separate compilation unit from the regular
code, because it requires an ".fpu neon" directive which can't be undone.
ARMv5E and ARMv6 code may also require an .arch directive, since by default
the assembler refuses to generate code for opcodes outside of its current
.arch setting.
TODO: Add Thumb, Thumb2, VFP, iwMMX, etc. detection, if we need it. */
#if defined(__GNUC__) && defined(__arm__)
# define MOZILLA_ARM_ARCH 3
# if defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) \
|| defined(_ARM_ARCH_4)
# undef MOZILLA_ARM_ARCH
# define MOZILLA_ARM_ARCH 4
# endif
# if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
|| defined(__ARM_ARCH_5TEJ__) || defined(_ARM_ARCH_5)
# undef MOZILLA_ARM_ARCH
# define MOZILLA_ARM_ARCH 5
# endif
# if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
|| defined(__ARM_ARCH_6M__) || defined(_ARM_ARCH_6)
# undef MOZILLA_ARM_ARCH
# define MOZILLA_ARM_ARCH 6
# endif
# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
|| defined(__ARM_ARCH_7EM__) || defined(_ARM_ARCH_7)
# undef MOZILLA_ARM_ARCH
# define MOZILLA_ARM_ARCH 7
# endif
# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95)
# define MOZILLA_MAY_SUPPORT_EDSP 1
# endif
# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
# if defined(HAVE_ARM_SIMD)
# define MOZILLA_MAY_SUPPORT_ARMV6 1
# endif
# endif
// Technically 4.2.x only works in the CodeSourcery releases, but I don't
// know how to detect those separately from mainline gcc (which got support
// in 4.3). The Maemo version 5 SDK shipped with the CodeSourcery 4.2.1
// release, which we need to work.
# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)
# if defined(HAVE_ARM_NEON)
# define MOZILLA_MAY_SUPPORT_NEON 1
# endif
# endif
// Currently we only have CPU detection for Linux via /proc/cpuinfo
# if defined(__linux__) || defined(ANDROID)
# define MOZILLA_ARM_HAVE_CPUID_DETECTION 1
# endif
#elif defined(_MSC_VER) && defined(_M_ARM)
# define MOZILLA_ARM_HAVE_CPUID_DETECTION 1
// I don't know how to do arch detection at compile time for MSVC, so assume
// the worst for now.
# define MOZILLA_ARM_ARCH 3
// MSVC only allows external asm for ARM, so we don't have to rely on
// compiler support.
# define MOZILLA_MAY_SUPPORT_EDSP 1
# if defined(HAVE_ARM_SIMD)
# define MOZILLA_MAY_SUPPORT_ARMV6 1
# endif
# if defined(HAVE_ARM_SIMD)
# define MOZILLA_MAY_SUPPORT_NEON 1
# endif
#endif
namespace mozilla {
namespace arm_private {
#if defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
#if !defined(MOZILLA_PRESUME_EDSP)
extern bool NS_COM_GLUE edsp_enabled;
#endif
#if !defined(MOZILLA_PRESUME_ARMV6)
extern bool NS_COM_GLUE armv6_enabled;
#endif
#if !defined(MOZILLA_PRESUME_NEON)
extern bool NS_COM_GLUE neon_enabled;
#endif
#endif
}
#if defined(MOZILLA_PRESUME_EDSP)
# define MOZILLA_MAY_SUPPORT_EDSP 1
inline bool supports_edsp() { return true; }
#elif defined(MOZILLA_MAY_SUPPORT_EDSP) \
&& defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
inline bool supports_edsp() { return arm_private::edsp_enabled; }
#else
inline bool supports_edsp() { return false; }
#endif
#if defined(MOZILLA_PRESUME_ARMV6)
# define MOZILLA_MAY_SUPPORT_ARMV6 1
inline bool supports_armv6() { return true; }
#elif defined(MOZILLA_MAY_SUPPORT_ARMV6) \
&& defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
inline bool supports_armv6() { return arm_private::armv6_enabled; }
#else
inline bool supports_armv6() { return false; }
#endif
#if defined(MOZILLA_PRESUME_NEON)
# define MOZILLA_MAY_SUPPORT_NEON 1
inline bool supports_neon() { return true; }
#elif defined(MOZILLA_MAY_SUPPORT_NEON) \
&& defined(MOZILLA_ARM_HAVE_CPUID_DETECTION)
inline bool supports_neon() { return arm_private::neon_enabled; }
#else
inline bool supports_neon() { return false; }
#endif
}
#endif /* !defined(mozilla_arm_h_) */

View File

@ -69,7 +69,7 @@ SDK_LIBRARY = \
$(LIB_PREFIX)xpcomglue_s_nomozalloc.$(LIB_SUFFIX) \
$(NULL)
GARBAGE += $(CSRCS) $(CPPSRCS) DeadlockDetector.h SSE.h
GARBAGE += $(CSRCS) $(CPPSRCS) DeadlockDetector.h SSE.h arm.h
# we don't want the shared lib, but we want to force the creation of a static lib.
FORCE_STATIC_LIB = 1
@ -94,7 +94,7 @@ OS_COMPILE_CFLAGS += -Zl
DEFINES += -D_USE_ANSI_CPP
endif
export:: $(XPCOM_GLUE_SRC_CSRCS) $(XPCOM_GLUE_SRC_CPPSRCS) $(XPCOM_GLUENS_SRC_CPPSRCS) $(topsrcdir)/xpcom/glue/nsStringAPI.cpp $(topsrcdir)/xpcom/glue/GenericModule.cpp $(topsrcdir)/xpcom/glue/DeadlockDetector.h $(topsrcdir)/xpcom/glue/SSE.h
export:: $(XPCOM_GLUE_SRC_CSRCS) $(XPCOM_GLUE_SRC_CPPSRCS) $(XPCOM_GLUENS_SRC_CPPSRCS) $(topsrcdir)/xpcom/glue/nsStringAPI.cpp $(topsrcdir)/xpcom/glue/GenericModule.cpp $(topsrcdir)/xpcom/glue/DeadlockDetector.h $(topsrcdir)/xpcom/glue/SSE.h $(topsrcdir)/xpcom/glue/arm.h
$(INSTALL) $^ .
ifdef TARGET_XPCOM_ABI

View File

@ -75,6 +75,7 @@ XPCOM_GLUENS_SRC_LCPPSRCS = \
BlockingResourceBase.cpp \
DeadlockDetector.cpp \
SSE.cpp \
arm.cpp \
unused.cpp \
nsProxyRelease.cpp \
nsTextFormatter.cpp \