mirror of
https://github.com/reactos/CMake.git
synced 2025-01-07 11:40:23 +00:00
e4beefeb6d
CTest filters the output from tools and tests to ensure that the XML build/test result documents it generates have valid characters. Previously we just converted all non-ASCII bytes into XML-escaped Unicode characters of the corresponding index. This does not preserve tool output encoded in UTF-8. We now assume UTF-8 output from tools and implement decoding as specified in RFC 3629. Valid characters are preserved, possibly with XML escaping. Invalid byte sequences and characters are converted to human-readable hex values with distinguishing tags. See issue #10003.
103 lines
3.0 KiB
C++
103 lines
3.0 KiB
C++
/*============================================================================
|
|
CMake - Cross Platform Makefile Generator
|
|
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
|
|
|
|
Distributed under the OSI-approved BSD License (the "License");
|
|
see accompanying file Copyright.txt for details.
|
|
|
|
This software is distributed WITHOUT ANY WARRANTY; without even the
|
|
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
See the License for more information.
|
|
============================================================================*/
|
|
#include "cmXMLSafe.h"
|
|
|
|
#include "cm_utf8.h"
|
|
|
|
#include <cmsys/ios/iostream>
|
|
#include <cmsys/ios/sstream>
|
|
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
|
|
//----------------------------------------------------------------------------
|
|
cmXMLSafe::cmXMLSafe(const char* s):
|
|
Data(s),
|
|
Size(static_cast<unsigned long>(strlen(s))),
|
|
DoQuotes(true)
|
|
{
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
cmXMLSafe::cmXMLSafe(cmsys_stl::string const& s):
|
|
Data(s.c_str()),
|
|
Size(static_cast<unsigned long>(s.length())),
|
|
DoQuotes(true)
|
|
{
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
cmXMLSafe& cmXMLSafe::Quotes(bool b)
|
|
{
|
|
this->DoQuotes = b;
|
|
return *this;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
cmsys_stl::string cmXMLSafe::str()
|
|
{
|
|
cmsys_ios::ostringstream ss;
|
|
ss << *this;
|
|
return ss.str();
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
cmsys_ios::ostream& operator<<(cmsys_ios::ostream& os, cmXMLSafe const& self)
|
|
{
|
|
char const* first = self.Data;
|
|
char const* last = self.Data + self.Size;
|
|
while(first != last)
|
|
{
|
|
unsigned int ch;
|
|
if(const char* next = cm_utf8_decode_character(first, last, &ch))
|
|
{
|
|
// http://www.w3.org/TR/REC-xml/#NT-Char
|
|
if((ch >= 0x20 && ch <= 0xD7FF) ||
|
|
(ch >= 0xE000 && ch <= 0xFFFD) ||
|
|
(ch >= 0x10000 && ch <= 0x10FFFF) ||
|
|
ch == 0x9 || ch == 0xA || ch == 0xD)
|
|
{
|
|
switch(ch)
|
|
{
|
|
// Escape XML control characters.
|
|
case '&': os << "&"; break;
|
|
case '<': os << "<"; break;
|
|
case '>': os << ">"; break;
|
|
case '"': os << (self.DoQuotes? """ : "\""); break;
|
|
case '\'': os << (self.DoQuotes? "'" : "'"); break;
|
|
case '\r': break; // Ignore CR
|
|
// Print the UTF-8 character.
|
|
default: os.write(first, next-first); break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Use a human-readable hex value for this invalid character.
|
|
char buf[16];
|
|
sprintf(buf, "%X", ch);
|
|
os << "[NON-XML-CHAR-0x" << buf << "]";
|
|
}
|
|
|
|
first = next;
|
|
}
|
|
else
|
|
{
|
|
ch = static_cast<unsigned char>(*first++);
|
|
// Use a human-readable hex value for this invalid byte.
|
|
char buf[16];
|
|
sprintf(buf, "%X", ch);
|
|
os << "[NON-UTF-8-BYTE-0x" << buf << "]";
|
|
}
|
|
}
|
|
return os;
|
|
}
|