diff --git a/tsk3/base/tsk_unicode.c b/tsk3/base/tsk_unicode.c index 4c04a1662240fb3a9d30bbcb0a110c733044068e..391078ed4caa1d74a09ad53498f828a1c434d92d 100644 --- a/tsk3/base/tsk_unicode.c +++ b/tsk3/base/tsk_unicode.c @@ -170,10 +170,16 @@ tsk_UTF16toUTF8(TSK_ENDIAN_ENUM endian, const UTF16 ** sourceStart, + (ch2 - UNI_SUR_LOW_START) + halfBase; ++source; } - else if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = TSKsourceIllegal; - break; + else { + if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = TSKsourceIllegal; + break; + } + // replace with another character + else { + ch = '^'; + } } } else { /* We don't have the 16 bits following the high surrogate. */ @@ -182,14 +188,19 @@ tsk_UTF16toUTF8(TSK_ENDIAN_ENUM endian, const UTF16 ** sourceStart, break; } } - else if (flags == TSKstrictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + if (flags == TSKstrictConversion) { --source; /* return to the illegal value itself */ result = TSKsourceIllegal; break; } + // replace with another character + else { + ch = '^'; + } } + /* Figure out how many bytes the result will require */ if (ch < (UTF32) 0x80) { bytesToWrite = 1; @@ -273,10 +284,16 @@ tsk_UTF16toUTF8_lclorder(const UTF16 ** sourceStart, + (ch2 - UNI_SUR_LOW_START) + halfBase; ++source; } - else if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = TSKsourceIllegal; - break; + else { + if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = TSKsourceIllegal; + break; + } + // replace with another character + else { + ch = '^'; + } } } else { /* We don't have the 16 bits following the high surrogate. */ @@ -285,15 +302,20 @@ tsk_UTF16toUTF8_lclorder(const UTF16 ** sourceStart, break; } } - else if (flags == TSKstrictConversion) { - /* UTF-16 surrogate values are illegal in UTF-32 */ - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + if (flags == TSKstrictConversion) { --source; /* return to the illegal value itself */ result = TSKsourceIllegal; break; } + // replace with another character + else { + ch = '^'; + } } -/* Figure out how many bytes the result will require */ + + /* Figure out how many bytes the result will require */ if (ch < (UTF32) 0x80) { bytesToWrite = 1; }