Robustify UTF translation

This commit is contained in:
Michael Theall 2015-11-19 13:49:31 -06:00
parent 3714fa3810
commit c0f75ae42b
10 changed files with 143 additions and 74 deletions

View File

@ -11,6 +11,13 @@
/// The maximum value of a u64. /// The maximum value of a u64.
#define U64_MAX UINT64_MAX #define U64_MAX UINT64_MAX
/// would be nice if newlib had this already
#ifndef SSIZE_MAX
#ifdef SIZE_MAX
#define SSIZE_MAX ((SIZE_MAX) >> 1)
#endif
#endif
typedef uint8_t u8; ///< 8-bit unsigned integer typedef uint8_t u8; ///< 8-bit unsigned integer
typedef uint16_t u16; ///< 16-bit unsigned integer typedef uint16_t u16; ///< 16-bit unsigned integer
typedef uint32_t u32; ///< 32-bit unsigned integer typedef uint32_t u32; ///< 32-bit unsigned integer

View File

@ -52,61 +52,104 @@ ssize_t encode_utf8 (uint8_t *out, uint32_t in);
ssize_t encode_utf16(uint16_t *out, uint32_t in); ssize_t encode_utf16(uint16_t *out, uint32_t in);
/** Convert a UTF-8 sequence into a UTF-16 sequence /** Convert a UTF-8 sequence into a UTF-16 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
* *
* @param[out] out Output sequence * @param[out] out Output sequence
* @param[in] in Input sequence * @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
* *
* @returns number of output code units produced * @returns number of output code units produced
* @returns -1 for error * @returns -1 for error
*
* @note \a out is not null-terminated
*/ */
size_t utf8_to_utf16(uint16_t *out, const uint8_t *in, size_t len); ssize_t utf8_to_utf16(uint16_t *out, const uint8_t *in, size_t len);
/** Convert a UTF-8 sequence into a UTF-32 sequence /** Convert a UTF-8 sequence into a UTF-32 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
* *
* @param[out] out Output sequence * @param[out] out Output sequence
* @param[in] in Input sequence * @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
* *
* @returns number of output code units produced * @returns number of output code units produced
* @returns -1 for error * @returns -1 for error
*
* @note \a out is not null-terminated
*/ */
size_t utf8_to_utf32(uint32_t *out, const uint8_t *in, size_t len); ssize_t utf8_to_utf32(uint32_t *out, const uint8_t *in, size_t len);
/** Convert a UTF-16 sequence into a UTF-8 sequence /** Convert a UTF-16 sequence into a UTF-8 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
* *
* @param[out] out Output sequence * @param[out] out Output sequence
* @param[in] in Input sequence * @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
* *
* @returns number of output code units produced * @returns number of output code units produced
* @returns -1 for error * @returns -1 for error
*
* @note \a out is not null-terminated
*/ */
size_t utf16_to_utf8(uint8_t *out, const uint16_t *in, size_t len); ssize_t utf16_to_utf8(uint8_t *out, const uint16_t *in, size_t len);
/** Convert a UTF-16 sequence into a UTF-32 sequence /** Convert a UTF-16 sequence into a UTF-32 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
* *
* @param[out] out Output sequence * @param[out] out Output sequence
* @param[in] in Input sequence * @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
* *
* @returns number of output code units produced * @returns number of output code units produced
* @returns -1 for error * @returns -1 for error
*
* @note \a out is not null-terminated
*/ */
size_t utf16_to_utf32(uint32_t *out, const uint16_t *in, size_t len); ssize_t utf16_to_utf32(uint32_t *out, const uint16_t *in, size_t len);
/** Convert a UTF-32 sequence into a UTF-8 sequence /** Convert a UTF-32 sequence into a UTF-8 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
* *
* @param[out] out Output sequence * @param[out] out Output sequence
* @param[in] in Input sequence * @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
* *
* @returns number of output code units produced * @returns number of output code units produced
* @returns -1 for error * @returns -1 for error
*
* @note \a out is not null-terminated
*/ */
size_t utf32_to_utf8(uint8_t *out, const uint32_t *in, size_t len); ssize_t utf32_to_utf8(uint8_t *out, const uint32_t *in, size_t len);
/** Convert a UTF-32 sequence into a UTF-16 sequence /** Convert a UTF-32 sequence into a UTF-16 sequence
* *
* @param[out] out Output sequence * @param[out] out Output sequence
* @param[in] in Input sequence * @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
* *
* @returns number of output code units produced * @returns number of output code units produced
* @returns -1 for error * @returns -1 for error
*
* @note \a out is not null-terminated
*/ */
size_t utf32_to_utf16(uint16_t *out, const uint32_t *in, size_t len); ssize_t utf32_to_utf16(uint16_t *out, const uint32_t *in, size_t len);

View File

@ -133,8 +133,9 @@ Result romfsInit(void)
} else } else
return 2; return 2;
size_t units = utf8_to_utf16(__utf16path, (const uint8_t*)filename, PATH_MAX+1); ssize_t units = utf8_to_utf16(__utf16path, (const uint8_t*)filename, PATH_MAX);
if (units == (size_t)-1) return 3; if (units < 0) return 3;
if (units >= PATH_MAX) return 4;
__utf16path[units] = 0; __utf16path[units] = 0;
FS_Archive arch = { ARCHIVE_SDMC, { PATH_EMPTY, 1, (u8*)"" }, 0 }; FS_Archive arch = { ARCHIVE_SDMC, { PATH_EMPTY, 1, (u8*)"" }, 0 };
@ -284,7 +285,7 @@ static romfs_file* searchForFile(romfs_dir* parent, u16* name, u32 namelen)
static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir) static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir)
{ {
size_t units; ssize_t units;
char* colonPos = strchr(*pPath, ':'); char* colonPos = strchr(*pPath, ':');
if (colonPos) *pPath = colonPos+1; if (colonPos) *pPath = colonPos+1;
@ -331,9 +332,11 @@ static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir)
} }
} }
units = utf8_to_utf16(__utf16path, (const uint8_t*)component, PATH_MAX+1); units = utf8_to_utf16(__utf16path, (const uint8_t*)component, PATH_MAX);
if (units == (size_t)-1) if (units < 0)
return EILSEQ; return EILSEQ;
if (units >= PATH_MAX)
return ENAMETOOLONG;
*ppDir = searchForDir(*ppDir, __utf16path, units); *ppDir = searchForDir(*ppDir, __utf16path, units);
if (!*ppDir) if (!*ppDir)
@ -363,12 +366,17 @@ int romfs_open(struct _reent *r, void *fileStruct, const char *path, int flags,
if (r->_errno != 0) if (r->_errno != 0)
return -1; return -1;
size_t units = utf8_to_utf16(__utf16path, (const uint8_t*)path, PATH_MAX+1); ssize_t units = utf8_to_utf16(__utf16path, (const uint8_t*)path, PATH_MAX);
if (!units || units == (size_t)-1) if (units <= 0)
{ {
r->_errno = EILSEQ; r->_errno = EILSEQ;
return -1; return -1;
} }
if (units >= PATH_MAX)
{
r->_errno = ENAMETOOLONG;
return -1;
}
romfs_file* file = searchForFile(curDir, __utf16path, units); romfs_file* file = searchForFile(curDir, __utf16path, units);
if (!file) if (!file)

View File

@ -118,7 +118,7 @@ static const char*
sdmc_fixpath(struct _reent *r, sdmc_fixpath(struct _reent *r,
const char *path) const char *path)
{ {
size_t units; ssize_t units;
uint32_t code; uint32_t code;
const uint8_t *p = (const uint8_t*)path; const uint8_t *p = (const uint8_t*)path;
@ -126,7 +126,7 @@ sdmc_fixpath(struct _reent *r,
do do
{ {
units = decode_utf8(&code, p); units = decode_utf8(&code, p);
if(units == (size_t)-1) if(units < 0)
{ {
r->_errno = EILSEQ; r->_errno = EILSEQ;
return NULL; return NULL;
@ -145,7 +145,7 @@ sdmc_fixpath(struct _reent *r,
do do
{ {
units = decode_utf8(&code, p); units = decode_utf8(&code, p);
if(units == (size_t)-1) if(units < 0)
{ {
r->_errno = EILSEQ; r->_errno = EILSEQ;
return NULL; return NULL;
@ -182,7 +182,7 @@ static const FS_Path
sdmc_utf16path(struct _reent *r, sdmc_utf16path(struct _reent *r,
const char *path) const char *path)
{ {
size_t units; ssize_t units;
FS_Path fspath; FS_Path fspath;
fspath.data = NULL; fspath.data = NULL;
@ -190,14 +190,13 @@ sdmc_utf16path(struct _reent *r,
if(sdmc_fixpath(r, path) == NULL) if(sdmc_fixpath(r, path) == NULL)
return fspath; return fspath;
units = utf8_to_utf16(__utf16path, (const uint8_t*)__fixedpath, PATH_MAX+1); units = utf8_to_utf16(__utf16path, (const uint8_t*)__fixedpath, PATH_MAX);
if(units == (size_t)-1) if(units < 0)
{ {
r->_errno = EILSEQ; r->_errno = EILSEQ;
return fspath; return fspath;
} }
if(units >= PATH_MAX)
if(__utf16path[PATH_MAX] != 0)
{ {
r->_errno = ENAMETOOLONG; r->_errno = ENAMETOOLONG;
return fspath; return fspath;
@ -220,7 +219,7 @@ static bool sdmcInitialised = false;
/*! Initialize SDMC device */ /*! Initialize SDMC device */
Result sdmcInit(void) Result sdmcInit(void)
{ {
size_t units; ssize_t units;
uint32_t code; uint32_t code;
char *p; char *p;
Result rc = 0; Result rc = 0;
@ -253,7 +252,7 @@ Result sdmcInit(void)
do do
{ {
units = decode_utf8(&code, (const uint8_t*)p); units = decode_utf8(&code, (const uint8_t*)p);
if(units == (size_t)-1) if(units < 0)
{ {
last_slash = NULL; last_slash = NULL;
break; break;
@ -909,9 +908,9 @@ sdmc_dirnext(struct _reent *r,
char *filename, char *filename,
struct stat *filestat) struct stat *filestat)
{ {
Result rc; Result rc;
u32 entries; u32 entries;
size_t units; ssize_t units;
/* get pointer to our data */ /* get pointer to our data */
sdmc_dir_t *dir = (sdmc_dir_t*)(dirState->dirStruct); sdmc_dir_t *dir = (sdmc_dir_t*)(dirState->dirStruct);
@ -938,13 +937,13 @@ sdmc_dirnext(struct _reent *r,
/* convert name from UTF-16 to UTF-8 */ /* convert name from UTF-16 to UTF-8 */
memset(filename, 0, NAME_MAX); memset(filename, 0, NAME_MAX);
units = utf16_to_utf8((uint8_t*)filename, dir->entry_data.name, NAME_MAX); units = utf16_to_utf8((uint8_t*)filename, dir->entry_data.name, NAME_MAX);
if(units == (size_t)-1) if(units < 0)
{ {
r->_errno = EILSEQ; r->_errno = EILSEQ;
return -1; return -1;
} }
if(filename[NAME_MAX-1] != 0) if(units >= NAME_MAX)
{ {
r->_errno = ENAMETOOLONG; r->_errno = ENAMETOOLONG;
return -1; return -1;

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h" #include "3ds/util/utf.h"
size_t ssize_t
utf16_to_utf32(uint32_t *out, utf16_to_utf32(uint32_t *out,
const uint16_t *in, const uint16_t *in,
size_t len) size_t len)
{ {
size_t rc = 0; ssize_t rc = 0;
ssize_t units; ssize_t units;
uint32_t code; uint32_t code;
@ -13,7 +14,7 @@ utf16_to_utf32(uint32_t *out,
{ {
units = decode_utf16(&code, in); units = decode_utf16(&code, in);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(code > 0) if(code > 0)
{ {
@ -23,11 +24,12 @@ utf16_to_utf32(uint32_t *out,
{ {
if(rc < len) if(rc < len)
*out++ = code; *out++ = code;
else
return rc;
} }
++rc; if(SSIZE_MAX - 1 <= rc)
++rc;
else
return -1;
} }
} while(code > 0); } while(code > 0);

View File

@ -1,20 +1,21 @@
#include "3ds/types.h"
#include "3ds/util/utf.h" #include "3ds/util/utf.h"
size_t ssize_t
utf16_to_utf8(uint8_t *out, utf16_to_utf8(uint8_t *out,
const uint16_t *in, const uint16_t *in,
size_t len) size_t len)
{ {
size_t rc = 0; ssize_t rc = 0;
ssize_t units; ssize_t units;
uint32_t code; uint32_t code;
uint8_t encoded[4]; uint8_t encoded[4];
do do
{ {
units = decode_utf16(&code, in); units = decode_utf16(&code, in);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(code > 0) if(code > 0)
{ {
@ -22,7 +23,7 @@ utf16_to_utf8(uint8_t *out,
units = encode_utf8(encoded, code); units = encode_utf8(encoded, code);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(out != NULL) if(out != NULL)
{ {
@ -36,11 +37,12 @@ utf16_to_utf8(uint8_t *out,
if(units > 3) if(units > 3)
*out++ = encoded[3]; *out++ = encoded[3];
} }
else
return rc;
} }
rc += units; if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
} }
} while(code > 0); } while(code > 0);

View File

@ -1,19 +1,20 @@
#include "3ds/types.h"
#include "3ds/util/utf.h" #include "3ds/util/utf.h"
size_t ssize_t
utf32_to_utf16(uint16_t *out, utf32_to_utf16(uint16_t *out,
const uint32_t *in, const uint32_t *in,
size_t len) size_t len)
{ {
size_t rc = 0; ssize_t rc = 0;
ssize_t units; ssize_t units;
uint16_t encoded[2]; uint16_t encoded[2];
while(*in > 0) while(*in > 0)
{ {
units = encode_utf16(encoded, *in++); units = encode_utf16(encoded, *in++);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(out != NULL) if(out != NULL)
{ {
@ -23,11 +24,12 @@ utf32_to_utf16(uint16_t *out,
if(units > 1) if(units > 1)
*out++ = encoded[1]; *out++ = encoded[1];
} }
else
return rc;
} }
rc += units; if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
} }
return rc; return rc;

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h" #include "3ds/util/utf.h"
size_t ssize_t
utf32_to_utf8(uint8_t *out, utf32_to_utf8(uint8_t *out,
const uint32_t *in, const uint32_t *in,
size_t len) size_t len)
{ {
size_t rc = 0; ssize_t rc = 0;
ssize_t units; ssize_t units;
uint8_t encoded[4]; uint8_t encoded[4];
@ -13,7 +14,7 @@ utf32_to_utf8(uint8_t *out,
{ {
units = encode_utf8(encoded, *in++); units = encode_utf8(encoded, *in++);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(out != NULL) if(out != NULL)
{ {
@ -27,11 +28,12 @@ utf32_to_utf8(uint8_t *out,
if(units > 3) if(units > 3)
*out++ = encoded[3]; *out++ = encoded[3];
} }
else
return rc;
} }
rc += units; if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
} }
return rc; return rc;

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h" #include "3ds/util/utf.h"
size_t ssize_t
utf8_to_utf16(uint16_t *out, utf8_to_utf16(uint16_t *out,
const uint8_t *in, const uint8_t *in,
size_t len) size_t len)
{ {
size_t rc = 0; ssize_t rc = 0;
ssize_t units; ssize_t units;
uint32_t code; uint32_t code;
uint16_t encoded[2]; uint16_t encoded[2];
@ -14,7 +15,7 @@ utf8_to_utf16(uint16_t *out,
{ {
units = decode_utf8(&code, in); units = decode_utf8(&code, in);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(code > 0) if(code > 0)
{ {
@ -22,7 +23,7 @@ utf8_to_utf16(uint16_t *out,
units = encode_utf16(encoded, code); units = encode_utf16(encoded, code);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(out != NULL) if(out != NULL)
{ {
@ -32,11 +33,12 @@ utf8_to_utf16(uint16_t *out,
if(units > 1) if(units > 1)
*out++ = encoded[1]; *out++ = encoded[1];
} }
else
return rc;
} }
rc += units; if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
} }
} while(code > 0); } while(code > 0);

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h" #include "3ds/util/utf.h"
size_t ssize_t
utf8_to_utf32(uint32_t *out, utf8_to_utf32(uint32_t *out,
const uint8_t *in, const uint8_t *in,
size_t len) size_t len)
{ {
size_t rc = 0; ssize_t rc = 0;
ssize_t units; ssize_t units;
uint32_t code; uint32_t code;
@ -13,7 +14,7 @@ utf8_to_utf32(uint32_t *out,
{ {
units = decode_utf8(&code, in); units = decode_utf8(&code, in);
if(units == -1) if(units == -1)
return (size_t)-1; return -1;
if(code > 0) if(code > 0)
{ {
@ -23,11 +24,12 @@ utf8_to_utf32(uint32_t *out,
{ {
if(rc < len) if(rc < len)
*out++ = code; *out++ = code;
else
return rc;
} }
++rc; if(SSIZE_MAX - 1 <= rc)
++rc;
else
return -1;
} }
} while(code > 0); } while(code > 0);