mirror of
https://github.com/mozilla/gecko-dev.git
synced 2025-01-25 22:29:07 +00:00
Bug 586698 - Add SSE versions of LossyConvertEncoding; r=tterribe,jst
This commit is contained in:
parent
10ebb07512
commit
b774f4accc
@ -227,9 +227,7 @@ nsTextFragment::SetTo(const PRUnichar* aBuffer, PRInt32 aLength)
|
||||
}
|
||||
|
||||
// Copy data
|
||||
// Use the same copying code we use elsewhere; it's likely to be
|
||||
// carefully tuned.
|
||||
LossyConvertEncoding<PRUnichar, char> converter(buff);
|
||||
LossyConvertEncoding16to8 converter(buff);
|
||||
copy_string(aBuffer, aBuffer+aLength, converter);
|
||||
m1b = buff;
|
||||
}
|
||||
@ -260,9 +258,8 @@ nsTextFragment::CopyTo(PRUnichar *aDest, PRInt32 aOffset, PRInt32 aCount)
|
||||
} else {
|
||||
const char *cp = m1b + aOffset;
|
||||
const char *end = cp + aCount;
|
||||
while (cp < end) {
|
||||
*aDest++ = (unsigned char)(*cp++);
|
||||
}
|
||||
LossyConvertEncoding8to16 converter(aDest);
|
||||
copy_string(cp, end, converter);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -316,11 +313,10 @@ nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy data
|
||||
for (PRUint32 i = 0; i < mState.mLength; ++i) {
|
||||
buff[i] = (unsigned char)m1b[i];
|
||||
}
|
||||
|
||||
// Copy data into buff
|
||||
LossyConvertEncoding8to16 converter(buff);
|
||||
copy_string(m1b, m1b+mState.mLength, converter);
|
||||
|
||||
memcpy(buff + mState.mLength, aBuffer, aLength * sizeof(PRUnichar));
|
||||
|
||||
mState.mLength += aLength;
|
||||
@ -354,10 +350,10 @@ nsTextFragment::Append(const PRUnichar* aBuffer, PRUint32 aLength)
|
||||
memcpy(buff, m1b, mState.mLength);
|
||||
mState.mInHeap = PR_TRUE;
|
||||
}
|
||||
|
||||
for (PRUint32 i = 0; i < aLength; ++i) {
|
||||
buff[mState.mLength + i] = (char)aBuffer[i];
|
||||
}
|
||||
|
||||
// Copy aBuffer into buff.
|
||||
LossyConvertEncoding16to8 converter(buff + mState.mLength);
|
||||
copy_string(aBuffer, aBuffer + aLength, converter);
|
||||
|
||||
m1b = buff;
|
||||
mState.mLength += aLength;
|
||||
|
@ -43,6 +43,7 @@
|
||||
// use XPCOM assertion/debugging macros, etc.
|
||||
|
||||
#include "nscore.h"
|
||||
#include "mozilla/SSE.h"
|
||||
|
||||
#include "nsCharTraits.h"
|
||||
|
||||
@ -662,39 +663,89 @@ class CalculateUTF8Size
|
||||
|
||||
#ifdef MOZILLA_INTERNAL_API
|
||||
/**
|
||||
* A character sink that performs a |reinterpret_cast| style conversion
|
||||
* between character types.
|
||||
* A character sink that performs a |reinterpret_cast|-style conversion
|
||||
* from char to PRUnichar.
|
||||
*/
|
||||
template <class FromCharT, class ToCharT>
|
||||
class LossyConvertEncoding
|
||||
class LossyConvertEncoding8to16
|
||||
{
|
||||
public:
|
||||
typedef FromCharT value_type;
|
||||
|
||||
typedef FromCharT input_type;
|
||||
typedef ToCharT output_type;
|
||||
|
||||
typedef typename nsCharTraits<FromCharT>::unsigned_char_type unsigned_input_type;
|
||||
typedef char value_type;
|
||||
typedef char input_type;
|
||||
typedef PRUnichar output_type;
|
||||
|
||||
public:
|
||||
LossyConvertEncoding( output_type* aDestination ) : mDestination(aDestination) { }
|
||||
LossyConvertEncoding8to16( PRUnichar* aDestination ) :
|
||||
mDestination(aDestination) { }
|
||||
|
||||
void
|
||||
write( const input_type* aSource, PRUint32 aSourceLength )
|
||||
write( const char* aSource, PRUint32 aSourceLength )
|
||||
{
|
||||
const input_type* done_writing = aSource + aSourceLength;
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
if (mozilla::supports_sse2())
|
||||
{
|
||||
write_sse2(aSource, aSourceLength);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
const char* done_writing = aSource + aSourceLength;
|
||||
while ( aSource < done_writing )
|
||||
*mDestination++ = (output_type)(unsigned_input_type)(*aSource++); // use old-style cast to mimic old |ns[C]String| behavior
|
||||
*mDestination++ = (PRUnichar)(unsigned char)(*aSource++);
|
||||
}
|
||||
|
||||
void
|
||||
write_sse2( const char* aSource, PRUint32 aSourceLength );
|
||||
|
||||
void
|
||||
write_terminator()
|
||||
{
|
||||
*mDestination = output_type(0);
|
||||
*mDestination = (PRUnichar)(0);
|
||||
}
|
||||
|
||||
private:
|
||||
output_type* mDestination;
|
||||
PRUnichar* mDestination;
|
||||
};
|
||||
|
||||
/**
|
||||
* A character sink that performs a |reinterpret_cast|-style conversion
|
||||
* from PRUnichar to char.
|
||||
*/
|
||||
class LossyConvertEncoding16to8
|
||||
{
|
||||
public:
|
||||
typedef PRUnichar value_type;
|
||||
typedef PRUnichar input_type;
|
||||
typedef char output_type;
|
||||
|
||||
LossyConvertEncoding16to8( char* aDestination ) : mDestination(aDestination) { }
|
||||
|
||||
void
|
||||
write( const PRUnichar* aSource, PRUint32 aSourceLength)
|
||||
{
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
if (mozilla::supports_sse2())
|
||||
{
|
||||
write_sse2(aSource, aSourceLength);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
const PRUnichar* done_writing = aSource + aSourceLength;
|
||||
while ( aSource < done_writing )
|
||||
*mDestination++ = (char)(*aSource++);
|
||||
}
|
||||
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
void
|
||||
write_sse2( const PRUnichar* aSource, PRUint32 aSourceLength );
|
||||
#endif
|
||||
|
||||
void
|
||||
write_terminator()
|
||||
{
|
||||
*mDestination = '\0';
|
||||
}
|
||||
|
||||
private:
|
||||
char *mDestination;
|
||||
};
|
||||
#endif // MOZILLA_INTERNAL_API
|
||||
|
||||
|
@ -70,6 +70,18 @@ FORCE_STATIC_LIB = 1
|
||||
# Force use of PIC
|
||||
FORCE_USE_PIC = 1
|
||||
|
||||
# Are we targeting x86 or x86-64? If so, compile the SSE2 functions for
|
||||
# nsUTF8Utils.cpp.
|
||||
ifneq (,$(INTEL_ARCHITECTURE))
|
||||
CPPSRCS += nsUTF8UtilsSSE2.cpp
|
||||
|
||||
# gcc requires -msse2 on nsUTF8UtilsSSE2.cpp since it uses SSE2 intrinsics.
|
||||
# (See bug 585538 comment 12.)
|
||||
ifdef GNU_CC
|
||||
nsUTF8UtilsSSE2.$(OBJ_SUFFIX): CXXFLAGS+=-msse2
|
||||
endif
|
||||
endif
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
||||
DEFINES += -D_IMPL_NS_COM
|
||||
|
@ -145,9 +145,9 @@ LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest )
|
||||
|
||||
dest.advance(old_dest_length);
|
||||
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
LossyConvertEncoding<PRUnichar, char> converter(dest.get());
|
||||
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
LossyConvertEncoding16to8 converter(dest.get());
|
||||
|
||||
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
|
||||
}
|
||||
|
||||
@ -167,7 +167,7 @@ AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest )
|
||||
dest.advance(old_dest_length);
|
||||
|
||||
// right now, this won't work on multi-fragment destinations
|
||||
LossyConvertEncoding<char, PRUnichar> converter(dest.get());
|
||||
LossyConvertEncoding8to16 converter(dest.get());
|
||||
|
||||
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
|
||||
}
|
||||
@ -303,7 +303,7 @@ ToNewCString( const nsAString& aSource )
|
||||
return nsnull;
|
||||
|
||||
nsAString::const_iterator fromBegin, fromEnd;
|
||||
LossyConvertEncoding<PRUnichar, char> converter(result);
|
||||
LossyConvertEncoding16to8 converter(result);
|
||||
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
|
||||
return result;
|
||||
}
|
||||
@ -374,7 +374,7 @@ ToNewUnicode( const nsACString& aSource )
|
||||
return nsnull;
|
||||
|
||||
nsACString::const_iterator fromBegin, fromEnd;
|
||||
LossyConvertEncoding<char, PRUnichar> converter(result);
|
||||
LossyConvertEncoding8to16 converter(result);
|
||||
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
|
||||
return result;
|
||||
}
|
||||
|
96
xpcom/string/src/nsUTF8UtilsSSE2.cpp
Normal file
96
xpcom/string/src/nsUTF8UtilsSSE2.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
#include "nscore.h"
|
||||
#include <emmintrin.h>
|
||||
#include <nsUTF8Utils.h>
|
||||
|
||||
void
|
||||
LossyConvertEncoding16to8::write_sse2(const PRUnichar* aSource,
|
||||
PRUint32 aSourceLength)
|
||||
{
|
||||
char* dest = mDestination;
|
||||
|
||||
// Align source to a 16-byte boundary.
|
||||
PRUint32 i = 0;
|
||||
PRUint32 alignLen =
|
||||
PR_MIN(aSourceLength, (-NS_PTR_TO_UINT32(aSource) & 0xf) / sizeof(PRUnichar));
|
||||
for (; i < alignLen; i++) {
|
||||
dest[i] = static_cast<unsigned char>(aSource[i]);
|
||||
}
|
||||
|
||||
// Walk 64 bytes (four XMM registers) at a time.
|
||||
__m128i vectmask = _mm_set1_epi16(0x00ff);
|
||||
for (; aSourceLength - i > 31; i += 32) {
|
||||
__m128i source1 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i));
|
||||
source1 = _mm_and_si128(source1, vectmask);
|
||||
|
||||
__m128i source2 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 8));
|
||||
source2 = _mm_and_si128(source2, vectmask);
|
||||
|
||||
__m128i source3 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 16));
|
||||
source3 = _mm_and_si128(source3, vectmask);
|
||||
|
||||
__m128i source4 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 24));
|
||||
source4 = _mm_and_si128(source4, vectmask);
|
||||
|
||||
|
||||
// Pack the source data. SSE2 views this as a saturating uint16 to
|
||||
// uint8 conversion, but since we masked off the high-order byte of every
|
||||
// uint16, we're really just grabbing the low-order bytes of source1 and
|
||||
// source2.
|
||||
__m128i packed1 = _mm_packus_epi16(source1, source2);
|
||||
__m128i packed2 = _mm_packus_epi16(source3, source4);
|
||||
|
||||
// This store needs to be unaligned since there's no guarantee that the
|
||||
// alignment we did above for the source will align the destination.
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i), packed1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 16), packed2);
|
||||
}
|
||||
|
||||
// Finish up the rest.
|
||||
for (; i < aSourceLength; i++) {
|
||||
dest[i] = static_cast<unsigned char>(aSource[i]);
|
||||
}
|
||||
|
||||
mDestination += i;
|
||||
}
|
||||
|
||||
void
|
||||
LossyConvertEncoding8to16::write_sse2(const char* aSource,
|
||||
PRUint32 aSourceLength)
|
||||
{
|
||||
PRUnichar *dest = mDestination;
|
||||
|
||||
// Align source to a 16-byte boundary. We choose to align source rather than
|
||||
// dest because we'd rather have our loads than our stores be fast. You have
|
||||
// to wait for a load to complete, but you can keep on moving after issuing a
|
||||
// store.
|
||||
PRUint32 i = 0;
|
||||
PRUint32 alignLen = PR_MIN(aSourceLength, (-NS_PTR_TO_UINT32(aSource) & 0xf));
|
||||
for (; i < alignLen; i++) {
|
||||
dest[i] = static_cast<unsigned char>(aSource[i]);
|
||||
}
|
||||
|
||||
// Walk 32 bytes (two XMM registers) at a time.
|
||||
for (; aSourceLength - i > 31; i += 32) {
|
||||
__m128i source1 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i));
|
||||
__m128i source2 = _mm_load_si128(reinterpret_cast<const __m128i*>(aSource + i + 16));
|
||||
|
||||
// Interleave 0s in with the bytes of source to create lo and hi.
|
||||
__m128i lo1 = _mm_unpacklo_epi8(source1, _mm_setzero_si128());
|
||||
__m128i hi1 = _mm_unpackhi_epi8(source1, _mm_setzero_si128());
|
||||
__m128i lo2 = _mm_unpacklo_epi8(source2, _mm_setzero_si128());
|
||||
__m128i hi2 = _mm_unpackhi_epi8(source2, _mm_setzero_si128());
|
||||
|
||||
// store lo and hi into dest.
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i), lo1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 8), hi1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 16), lo2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest + i + 24), hi2);
|
||||
}
|
||||
|
||||
// Finish up whatever's left.
|
||||
for (; i < aSourceLength; i++) {
|
||||
dest[i] = static_cast<unsigned char>(aSource[i]);
|
||||
}
|
||||
|
||||
mDestination += i;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user