diff --git a/NEWS.txt b/NEWS.txt index a11a1a77b9ca23f48d131c0d91b2534796ba4ec8..b973f59e8688b4c5c4a8ed71fdead56a7c53f852 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -17,6 +17,7 @@ New Features: - new TskAuto::findFilesInFs(TSK_FS_INFO *) method - Need to only specify first E01 file and the rest are found - Changed docs license to non-commercial +- Unicode conversion routines fix invalid UTF-16 text during conversion Bug Fixes: diff --git a/tsk3/base/tsk_unicode.c b/tsk3/base/tsk_unicode.c index 391078ed4caa1d74a09ad53498f828a1c434d92d..8f3831b393d386dbd8cdb0b6abdeaa1c422ae127 100644 --- a/tsk3/base/tsk_unicode.c +++ b/tsk3/base/tsk_unicode.c @@ -164,22 +164,20 @@ tsk_UTF16toUTF8(TSK_ENDIAN_ENUM endian, const UTF16 ** sourceStart, /* If the 16 bits following the high surrogate are in the source buffer... */ if (source < sourceEnd) { UTF32 ch2 = tsk_getu16(endian, (uint8_t *) source); + ++source; + /* If it's a low surrogate, convert to UTF32. */ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++source; } + else if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ + result = TSKsourceIllegal; + break; + } + // replace with another character else { - if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = TSKsourceIllegal; - break; - } - // replace with another character - else { - ch = '^'; - } + ch = '^'; } } else { /* We don't have the 16 bits following the high surrogate. */ @@ -278,22 +276,19 @@ tsk_UTF16toUTF8_lclorder(const UTF16 ** sourceStart, /* If the 16 bits following the high surrogate are in the source buffer... */ if (source < sourceEnd) { UTF32 ch2 = *source; + source++; /* If it's a low surrogate, convert to UTF32. */ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; - ++source; } + else if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ + result = TSKsourceIllegal; + break; + } + // replace with another character else { - if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */ - --source; /* return to the illegal value itself */ - result = TSKsourceIllegal; - break; - } - // replace with another character - else { - ch = '^'; - } + ch = '^'; } } else { /* We don't have the 16 bits following the high surrogate. */