1 files changed, 464 insertions, 0 deletions
diff --git a/source/string.cpp b/source/string.cpp
new file mode 100644
index 0000000..ae73f11
--- /dev/null
+++ b/source/string.cpp
@@ -0,0 +1,464 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EASTL/internal/config.h>
+#include <EASTL/string.h>
+#include <EABase/eabase.h>
+#include <string.h>
+
+
+namespace eastl
+{
+	///////////////////////////////////////////////////////////////////////////////
+	// Converters for DecodePart
+	//
+	// For some decent documentation about conversions, see:
+	//     http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c
+	//     
+	///////////////////////////////////////////////////////////////////////////////
+
+	// Requires that pDest have a capacity of at least 6 chars.
+	// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
+	inline bool UCS4ToUTF8(uint32_t c, char*& pResult)
+	{
+		if(c < 0x00000080)
+			*pResult++ = (char)(uint8_t)c;
+		else if(c < 0x0800)
+		{
+			*pResult++ = (char)(uint8_t)(0xC0 | (c >> 6));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x0000FFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xE0 | (c >> 12));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x001FFFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xF0 | (c >> 18));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x003FFFFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xF8 | (c >> 24));
+			*pResult++ = (char)(uint8_t)(0x80 | (c >> 18));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else if(c <= 0x7FFFFFFF)
+		{
+			*pResult++ = (char)(uint8_t)(0xFC | (c >> 30));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 24) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 18) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 12) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | ((c >> 6) & 0x3F));
+			*pResult++ = (char)(uint8_t)(0x80 | (c & 0x3F));
+		}
+		else
+		{
+			// values >= 0x80000000 can't be converted to UTF8.
+			*pResult++ = '\1';
+			return false;
+		}
+
+		return true;
+	}
+
+
+	// Requires that pResult have a capacity of at least 3 chars.
+	// Sets pResult to '\1' in the case that c is an invalid UCS4 char.
+	inline bool UCS2ToUTF8(uint16_t c, char*& pResult)
+	{
+		return UCS4ToUTF8(c, pResult);
+	}
+
+
+	// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
+	// 32 bit 0xffffffff is an invalid UCS4 code point, so we can't use that as an error return value.
+	inline bool UTF8ToUCS4(const char*& p, const char* pEnd, uint32_t& result)
+	{
+		// This could likely be implemented in a faster-executing way that uses tables.
+
+		bool           success = true;
+		uint32_t       c = 0xffff;
+		const char* pNext = NULL;
+
+		if(p < pEnd)
+		{
+			uint8_t cChar0((uint8_t)*p), cChar1, cChar2, cChar3;
+
+			// Asserts are disabled because we don't necessarily want to interrupt runtime execution due to this.
+			// EASTL_ASSERT((cChar0 != 0xFE) && (cChar0 != 0xFF));     //  No byte can be 0xFE or 0xFF
+			// Code below will effectively catch this error as it goes.
+
+			if(cChar0 < 0x80)
+			{
+				pNext = p + 1;
+				c     = cChar0;
+			}
+			else
+			{
+				//EASTL_ASSERT((cChar0 & 0xC0) == 0xC0);              //  The top two bits need to be equal to 1
+				if((cChar0 & 0xC0) != 0xC0)
+				{
+					success = false;
+					goto Failure;
+				}
+
+				if((cChar0 & 0xE0) == 0xC0)
+				{
+					pNext = p + 2;
+
+					if(pNext <= pEnd)
+					{
+						c      = (uint32_t)((cChar0 & 0x1F) << 6);
+						cChar1 = static_cast<uint8_t>(p[1]);
+						c     |= cChar1 & 0x3F;
+
+						//EASTL_ASSERT((cChar1 & 0xC0) == 0x80);          //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT(c >= 0x0080 && c < 0x0800);        //  Check that we have the smallest coding
+						if(!((cChar1 & 0xC0) == 0x80) ||
+						   !(c >= 0x0080 && c < 0x0800))
+						{
+							success = false;
+							goto Failure;
+						}
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xF0) == 0xE0)
+				{
+					pNext = p + 3;
+
+					if(pNext <= pEnd)
+					{
+						c      = (uint32_t)((cChar0 & 0xF) << 12);
+						cChar1 = static_cast<uint8_t>(p[1]);
+						c     |= (cChar1 & 0x3F) << 6;
+						cChar2 = static_cast<uint8_t>(p[2]);
+						c     |= cChar2 & 0x3F;
+
+						//EASTL_ASSERT((cChar1 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT((cChar2 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT(c >= 0x00000800 && c <  0x00010000); //  Check that we have the smallest coding
+						if(!((cChar1 & 0xC0) == 0x80) ||
+						   !((cChar2 & 0xC0) == 0x80) ||
+						   !(c >= 0x00000800 && c <  0x00010000))
+						{
+							success = false;
+							goto Failure;
+						}
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xF8) == 0xF0)
+				{
+					pNext = p + 4;
+
+					if(pNext <= pEnd)
+					{
+						c      = (uint32_t)((cChar0 & 0x7) << 18);
+						cChar1 = static_cast<uint8_t>(p[1]);
+						c     |= (uint32_t)((cChar1 & 0x3F) << 12);
+						cChar2 = static_cast<uint8_t>(p[2]);
+						c     |= (cChar2 & 0x3F) << 6;
+						cChar3 = static_cast<uint8_t>(p[3]);
+						c     |= cChar3 & 0x3F;
+
+						//EASTL_ASSERT((cChar0 & 0xf8) == 0xf0);            //  We handle the unicode but not UCS-4
+						//EASTL_ASSERT((cChar1 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT((cChar2 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT((cChar3 & 0xC0) == 0x80);            //  All subsequent code should be b10xxxxxx
+						//EASTL_ASSERT(c >= 0x00010000 && c <= 0x0010FFFF); //  Check that we have the smallest coding, Unicode and not ucs-4
+						if(!((cChar0 & 0xf8) == 0xf0) ||
+						   !((cChar1 & 0xC0) == 0x80) ||
+						   !((cChar2 & 0xC0) == 0x80) ||
+						   !(c >= 0x00010000 && c <= 0x0010FFFF))
+						{
+							success = false;
+							goto Failure;
+						}
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xFC) == 0xF8)
+				{
+					pNext = p + 4;
+
+					if(pNext <= pEnd)
+					{
+						// To do. We don't currently support extended UCS4 characters.
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else if((cChar0 & 0xFE) == 0xFC)
+				{
+					pNext = p + 5;
+
+					if(pNext <= pEnd)
+					{
+						// To do. We don't currently support extended UCS4 characters.
+					}
+					else
+					{
+						success = false;
+						goto Failure;
+					}
+				}
+				else
+				{
+					success = false;
+					goto Failure;
+				}
+			}
+		}
+		else
+			success = false;
+
+		Failure:
+		if(success)
+		{
+			p = pNext;
+			result = c;
+		}
+		else
+		{
+			p = p + 1;
+			result = 0xffff;
+		}
+
+		return success;
+	}
+
+	// Sets result to 0xffff in the case that the input UTF8 sequence is bad.
+	// The effect of converting UTF8 codepoints > 0xffff to UCS2 (char16_t) is to set all
+	// such codepoints to 0xffff. EASTL doesn't have a concept of setting or maintaining 
+	// error state for string conversions, though it does have a policy of converting 
+	// impossible values to something without generating invalid strings or throwing exceptions.
+	inline bool UTF8ToUCS2(const char*& p, const char* pEnd, uint16_t& result)
+	{
+		uint32_t u32;
+
+		if(UTF8ToUCS4(p, pEnd, u32))
+		{
+			if(u32 <= 0xffff)
+			{
+				result = (uint16_t)u32;
+				return true;
+			}
+		}
+
+		result = 0xffff;
+		return false;
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// DecodePart
+	///////////////////////////////////////////////////////////////////////////
+
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char*& pDest, char* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   sourceSize = destSize;
+
+		memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
+
+		pSrc  += sourceSize;
+		pDest += sourceSize; // Intentionally add sourceSize here.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		bool success = true;
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UTF8ToUCS2(pSrc, pSrcEnd, (uint16_t&)*pDest++);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		bool success = true;
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UTF8ToUCS4(pSrc, pSrcEnd, (uint32_t&)*pDest++);
+
+		return success;
+	}
+
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char*& pDest, char* pDestEnd)
+	{
+		bool success = true;
+
+		EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
+		pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UCS2ToUTF8(*pSrc++, pDest);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   sourceSize = destSize;
+
+		memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
+
+		pSrc  += sourceSize;
+		pDest += sourceSize; // Intentionally add sourceSize here.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char32_t)*pSrc++;
+
+		return true;
+	}
+
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char*& pDest, char* pDestEnd)
+	{
+		bool success = true;
+
+		EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
+		pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UCS4ToUTF8(*pSrc++, pDest);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char16_t)*pSrc++;   // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   sourceSize = destSize;
+
+		memmove(pDest, pSrc, sourceSize * sizeof(*pSrcEnd));
+
+		pSrc  += sourceSize;
+		pDest += sourceSize; // Intentionally add sourceSize here.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char*&  pDest, char* pDestEnd)
+	{
+		bool success = true;
+
+		EASTL_ASSERT((pDest + 6) < pDestEnd); // The user must provide ample buffer space, preferably 256 chars or more.
+		pDestEnd -= 6; // Do this so that we can avoid dest buffer size checking in the loop below and the function it calls.
+
+		while(success && (pSrc < pSrcEnd) && (pDest < pDestEnd))
+			success = UCS4ToUTF8((uint32_t)(unsigned)*pSrc++, pDest);
+
+		return success;
+	}
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char16_t)*pSrc++;   // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
+
+		return true;
+	}
+
+	EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd)
+	{
+		size_t sourceSize = (size_t)(pSrcEnd - pSrc);
+		size_t destSize   = (size_t)(pDestEnd - pDest);
+
+		if(sourceSize > destSize)
+		   pSrcEnd = pSrc + destSize;
+
+		while(pSrc != pSrcEnd) // To consider: Improve this by unrolling this loop. Other tricks can improve its speed as well.
+			*pDest++ = (char32_t)*pSrc++;   // This is potentially losing data. We are not converting to UTF16; we are converting to UCS2.
+
+		return true;
+	}
+
+
+
+} // namespace eastl
+
+
+
+
+
+
+
+
+
+
+
+
+
+