Skip to content
Snippets Groups Projects
Commit 78522fd5 authored by Brian Carrier's avatar Brian Carrier
Browse files

Fix for 2725799 re: wrong endian ordering of UTF16 text in ifind on win32. ...

Fix for 2725799 re: wrong endian ordering of UTF16 text in ifind on win32.  Applied use of new Unicode convert function to other places too.
parent 7a91f8c0
No related branches found
No related tags found
No related merge requests found
...@@ -62,6 +62,12 @@ about the ext2 and ext3 types (it knew about ext and the older ...@@ -62,6 +62,12 @@ about the ext2 and ext3 types (it knew about ext and the older
forms). Also added support for ufs1, ufs2, iso, and hfs. reported forms). Also added support for ufs1, ufs2, iso, and hfs. reported
by Vinogratzky. by Vinogratzky.
4/11/09: Bug Fix: Fixed issue 2725799 regarding ifind not converting
UTF16 names properly on Windows because it was using endian ordering
of file system and not local system. Created new conversion function
and changed a couple of other spots that had similar bug. Reported
by Rob Joyce.
---------------- VERSION 3.0.0 -------------- ---------------- VERSION 3.0.0 --------------
0/00/00: Update: Many, many, many API changes. 0/00/00: Update: Many, many, many API changes.
......
...@@ -39,14 +39,15 @@ extern "C" { ...@@ -39,14 +39,15 @@ extern "C" {
(roundup((x),(y)) - (y))) (roundup((x),(y)) - (y)))
#endif #endif
extern void *tsk_malloc(size_t); extern void *tsk_malloc(size_t);
extern void *tsk_realloc(void *, size_t); extern void *tsk_realloc(void *, size_t);
// getopt for windows // getopt for windows
#ifdef TSK_WIN32 #ifdef TSK_WIN32
extern int tsk_optind; extern int tsk_optind;
extern TSK_TCHAR *tsk_optarg; extern TSK_TCHAR *tsk_optarg;
extern int tsk_getopt(int argc, TSK_TCHAR * const argv[], const TSK_TCHAR * optstring); extern int tsk_getopt(int argc, TSK_TCHAR * const argv[],
const TSK_TCHAR * optstring);
#endif #endif
...@@ -55,9 +56,9 @@ extern void *tsk_realloc(void *, size_t); ...@@ -55,9 +56,9 @@ extern void *tsk_realloc(void *, size_t);
#define TSK_ERRSTR_L 512 #define TSK_ERRSTR_L 512
#define TSK_ERRSTR_PR_L ((TSK_ERRSTR_L << 2) + 64) #define TSK_ERRSTR_PR_L ((TSK_ERRSTR_L << 2) + 64)
extern char tsk_errstr[TSK_ERRSTR_L]; extern char tsk_errstr[TSK_ERRSTR_L];
extern char tsk_errstr2[TSK_ERRSTR_L]; extern char tsk_errstr2[TSK_ERRSTR_L];
extern char tsk_errstr_print[TSK_ERRSTR_PR_L]; extern char tsk_errstr_print[TSK_ERRSTR_PR_L];
...@@ -65,8 +66,10 @@ extern char tsk_errstr_print[TSK_ERRSTR_PR_L]; ...@@ -65,8 +66,10 @@ extern char tsk_errstr_print[TSK_ERRSTR_PR_L];
/* macros to read in multi-byte fields /* macros to read in multi-byte fields
* file system is an array of 8-bit values, not 32-bit values * file system is an array of 8-bit values, not 32-bit values
*/ */
extern uint8_t tsk_guess_end_u16(TSK_ENDIAN_ENUM *, uint8_t *, uint16_t); extern uint8_t tsk_guess_end_u16(TSK_ENDIAN_ENUM *, uint8_t *,
extern uint8_t tsk_guess_end_u32(TSK_ENDIAN_ENUM *, uint8_t *, uint32_t); uint16_t);
extern uint8_t tsk_guess_end_u32(TSK_ENDIAN_ENUM *, uint8_t *,
uint32_t);
/** \internal /** \internal
* Read a 16-bit unsigned value. * Read a 16-bit unsigned value.
...@@ -280,33 +283,39 @@ extern uint8_t tsk_guess_end_u32(TSK_ENDIAN_ENUM *, uint8_t *, uint32_t); ...@@ -280,33 +283,39 @@ extern uint8_t tsk_guess_end_u32(TSK_ENDIAN_ENUM *, uint8_t *, uint32_t);
------------------------------------------------------------------------ */ ------------------------------------------------------------------------ */
typedef unsigned short UTF16; /* at least 16 bits */ typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */ typedef unsigned char UTF8; /* typically 8 bits */
typedef unsigned char Boolean; /* 0 or 1 */ typedef unsigned char Boolean; /* 0 or 1 */
typedef enum { typedef enum {
TSKconversionOK, ///< conversion successful TSKconversionOK, ///< conversion successful
TSKsourceExhausted, ///< partial character in source, but hit end TSKsourceExhausted, ///< partial character in source, but hit end
TSKtargetExhausted, ///< insuff. room in target for conversion TSKtargetExhausted, ///< insuff. room in target for conversion
TSKsourceIllegal ///< source sequence is illegal/malformed TSKsourceIllegal ///< source sequence is illegal/malformed
} TSKConversionResult; } TSKConversionResult;
typedef enum { typedef enum {
TSKstrictConversion = 0, ///< Error if invalid surrogate pairs are found TSKstrictConversion = 0, ///< Error if invalid surrogate pairs are found
TSKlenientConversion ///< Ignore invalid surrogate pairs TSKlenientConversion ///< Ignore invalid surrogate pairs
} TSKConversionFlags; } TSKConversionFlags;
TSKConversionResult tsk_UTF8toUTF16(const UTF8 ** sourceStart, extern TSKConversionResult tsk_UTF8toUTF16(const UTF8 ** sourceStart,
const UTF8 * sourceEnd, const UTF8 * sourceEnd,
UTF16 ** targetStart, UTF16 * targetEnd, TSKConversionFlags flags); UTF16 ** targetStart, UTF16 * targetEnd, TSKConversionFlags flags);
TSKConversionResult tsk_UTF16toUTF8(TSK_ENDIAN_ENUM, extern TSKConversionResult tsk_UTF16toUTF8(TSK_ENDIAN_ENUM,
const UTF16 ** sourceStart, const UTF16 * sourceEnd, const UTF16 ** sourceStart, const UTF16 * sourceEnd,
UTF8 ** targetStart, UTF8 * targetEnd, TSKConversionFlags flags); UTF8 ** targetStart, UTF8 * targetEnd, TSKConversionFlags flags);
Boolean tsk_isLegalUTF8Sequence(const UTF8 * source, extern TSKConversionResult
const UTF8 * sourceEnd); tsk_UTF16toUTF8_lclorder(const UTF16 ** sourceStart,
const UTF16 * sourceEnd, UTF8 ** targetStart,
UTF8 * targetEnd, TSKConversionFlags flags);
extern Boolean tsk_isLegalUTF8Sequence(const UTF8 * source,
const UTF8 * sourceEnd);
#endif #endif
//@} //@}
......
...@@ -235,6 +235,109 @@ tsk_UTF16toUTF8(TSK_ENDIAN_ENUM endian, const UTF16 ** sourceStart, ...@@ -235,6 +235,109 @@ tsk_UTF16toUTF8(TSK_ENDIAN_ENUM endian, const UTF16 ** sourceStart,
return result; return result;
} }
/**
* \ingroup baselib
* Convert a UTF-16 string in local endian ordering to UTF-8.
* @param sourceStart Pointer to pointer to start of UTF-16 string. Will be updated to last char proccessed.
* @param sourceEnd Pointer to one entry past end of UTF-16 string
* @param targetStart Pointer to pointer to place where UTF-8 string should be written. Will be updated to next place to write to.
* @param targetEnd Pointer to end of UTF-8 buffer
* @param flags Flags used during conversion
* @returns error code
*/
TSKConversionResult
tsk_UTF16toUTF8_lclorder(const UTF16 ** sourceStart,
const UTF16 * sourceEnd, UTF8 ** targetStart,
UTF8 * targetEnd, TSKConversionFlags flags)
{
TSKConversionResult result = TSKconversionOK;
const UTF16 *source = *sourceStart;
UTF8 *target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
const UTF16 *oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
/* If the 16 bits following the high surrogate are in the source buffer... */
if (source < sourceEnd) {
UTF32 ch2 = *source;
/* If it's a low surrogate, convert to UTF32. */
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++source;
}
else if (flags == TSKstrictConversion) { /* it's an unpaired high surrogate */
--source; /* return to the illegal value itself */
result = TSKsourceIllegal;
break;
}
}
else { /* We don't have the 16 bits following the high surrogate. */
--source; /* return to the high surrogate */
result = TSKsourceExhausted;
break;
}
}
else if (flags == TSKstrictConversion) {
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
--source; /* return to the illegal value itself */
result = TSKsourceIllegal;
break;
}
}
/* Figure out how many bytes the result will require */
if (ch < (UTF32) 0x80) {
bytesToWrite = 1;
}
else if (ch < (UTF32) 0x800) {
bytesToWrite = 2;
}
else if (ch < (UTF32) 0x10000) {
bytesToWrite = 3;
}
else if (ch < (UTF32) 0x110000) {
bytesToWrite = 4;
}
else {
bytesToWrite = 3;
ch = TSK_UNI_REPLACEMENT_CHAR;
}
target += bytesToWrite;
if (target > targetEnd) {
source = oldSource; /* Back up source pointer! */
target -= bytesToWrite;
result = TSKtargetExhausted;
break;
}
switch (bytesToWrite) { /* note: everything falls through. */
case 4:
*--target = (UTF8) ((ch | byteMark) & byteMask);
ch >>= 6;
case 3:
*--target = (UTF8) ((ch | byteMark) & byteMask);
ch >>= 6;
case 2:
*--target = (UTF8) ((ch | byteMark) & byteMask);
ch >>= 6;
case 1:
*--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
}
target += bytesToWrite;
}
*sourceStart = source;
*targetStart = target;
return result;
}
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
/* /*
......
...@@ -220,8 +220,7 @@ tsk_fs_fls(TSK_FS_INFO * fs, TSK_FS_FLS_FLAG_ENUM lclflags, ...@@ -220,8 +220,7 @@ tsk_fs_fls(TSK_FS_INFO * fs, TSK_FS_FLS_FLAG_ENUM lclflags,
ptr16 = (UTF16 *) tpre; ptr16 = (UTF16 *) tpre;
retval = retval =
tsk_UTF16toUTF8(fs->endian, tsk_UTF16toUTF8_lclorder((const UTF16 **) &ptr16, (UTF16 *)
(const UTF16 **) &ptr16, (UTF16 *)
& ptr16[TSTRLEN(tpre) + 1], &ptr8, & ptr16[TSTRLEN(tpre) + 1], &ptr8,
(UTF8 *) ((uintptr_t) ptr8 + clen), TSKlenientConversion); (UTF8 *) ((uintptr_t) ptr8 + clen), TSKlenientConversion);
if (retval != TSKconversionOK) { if (retval != TSKconversionOK) {
......
...@@ -458,7 +458,7 @@ tsk_fs_ifind_path(TSK_FS_INFO * fs, TSK_TCHAR * tpath, TSK_INUM_T * result) ...@@ -458,7 +458,7 @@ tsk_fs_ifind_path(TSK_FS_INFO * fs, TSK_TCHAR * tpath, TSK_INUM_T * result)
ptr16 = (UTF16 *) tpath; ptr16 = (UTF16 *) tpath;
retval = retval =
tsk_UTF16toUTF8(fs->endian, (const UTF16 **) &ptr16, (UTF16 *) tsk_UTF16toUTF8_lclorder((const UTF16 **) &ptr16, (UTF16 *)
& ptr16[TSTRLEN(tpath) + 1], &ptr8, & ptr16[TSTRLEN(tpath) + 1], &ptr8,
(UTF8 *) ((uintptr_t) ptr8 + clen), TSKlenientConversion); (UTF8 *) ((uintptr_t) ptr8 + clen), TSKlenientConversion);
if (retval != TSKconversionOK) { if (retval != TSKconversionOK) {
......
...@@ -314,8 +314,8 @@ tsk_img_open_utf8(int num_img, const char **images, TSK_IMG_TYPE_ENUM type) ...@@ -314,8 +314,8 @@ tsk_img_open_utf8(int num_img, const char **images, TSK_IMG_TYPE_ENUM type)
// we allocate the buffer with the same number of chars as the UTF-8 length // we allocate the buffer with the same number of chars as the UTF-8 length
ilen = strlen(images[i]); ilen = strlen(images[i]);
if ((images16[i] = if ((images16[i] =
(wchar_t *) tsk_malloc((ilen+1) * sizeof(wchar_t))) == (wchar_t *) tsk_malloc((ilen +
NULL) { 1) * sizeof(wchar_t))) == NULL) {
goto tsk_utf8_cleanup; goto tsk_utf8_cleanup;
} }
...@@ -324,8 +324,7 @@ tsk_img_open_utf8(int num_img, const char **images, TSK_IMG_TYPE_ENUM type) ...@@ -324,8 +324,7 @@ tsk_img_open_utf8(int num_img, const char **images, TSK_IMG_TYPE_ENUM type)
retval2 = retval2 =
tsk_UTF8toUTF16((const UTF8 **) &utf8, &utf8[ilen], tsk_UTF8toUTF16((const UTF8 **) &utf8, &utf8[ilen],
&utf16, &utf16[ilen], &utf16, &utf16[ilen], TSKlenientConversion);
TSKlenientConversion);
if (retval2 != TSKconversionOK) { if (retval2 != TSKconversionOK) {
tsk_errno = TSK_ERR_IMG_CONVERT; tsk_errno = TSK_ERR_IMG_CONVERT;
snprintf(tsk_errstr, TSK_ERRSTR_L, snprintf(tsk_errstr, TSK_ERRSTR_L,
...@@ -411,7 +410,7 @@ tsk_img_open_utf16(int num_img, ...@@ -411,7 +410,7 @@ tsk_img_open_utf16(int num_img,
utf8 = (UTF8 *) images8[i]; utf8 = (UTF8 *) images8[i];
retval2 = retval2 =
tsk_UTF16toUTF8(endian, (const UTF16 **) &utf16, tsk_UTF16toUTF8_lclorder((const UTF16 **) &utf16,
&utf16[wcslen(images[i]) + 1], &utf8, &utf16[wcslen(images[i]) + 1], &utf8,
&utf8[ilen + 1], TSKlenientConversion); &utf8[ilen + 1], TSKlenientConversion);
if (retval2 != TSKconversionOK) { if (retval2 != TSKconversionOK) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment