Robustify UTF translation

This commit is contained in:
Michael Theall 2015-11-19 13:49:31 -06:00
parent 3714fa3810
commit c0f75ae42b
10 changed files with 143 additions and 74 deletions

View File

@ -11,6 +11,13 @@
/// The maximum value of a u64.
#define U64_MAX UINT64_MAX
/// would be nice if newlib had this already
#ifndef SSIZE_MAX
#ifdef SIZE_MAX
#define SSIZE_MAX ((SIZE_MAX) >> 1)
#endif
#endif
typedef uint8_t u8; ///< 8-bit unsigned integer
typedef uint16_t u16; ///< 16-bit unsigned integer
typedef uint32_t u32; ///< 32-bit unsigned integer

View File

@ -52,61 +52,104 @@ ssize_t encode_utf8 (uint8_t *out, uint32_t in);
ssize_t encode_utf16(uint16_t *out, uint32_t in);
/** Convert a UTF-8 sequence into a UTF-16 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
*
* @param[out] out Output sequence
* @param[in] in Input sequence
* @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
*
* @returns number of output code units produced
* @returns -1 for error
*
* @note \a out is not null-terminated
*/
size_t utf8_to_utf16(uint16_t *out, const uint8_t *in, size_t len);
ssize_t utf8_to_utf16(uint16_t *out, const uint8_t *in, size_t len);
/** Convert a UTF-8 sequence into a UTF-32 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
*
* @param[out] out Output sequence
* @param[in] in Input sequence
* @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
*
* @returns number of output code units produced
* @returns -1 for error
*
* @note \a out is not null-terminated
*/
size_t utf8_to_utf32(uint32_t *out, const uint8_t *in, size_t len);
ssize_t utf8_to_utf32(uint32_t *out, const uint8_t *in, size_t len);
/** Convert a UTF-16 sequence into a UTF-8 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
*
* @param[out] out Output sequence
* @param[in] in Input sequence
* @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
*
* @returns number of output code units produced
* @returns -1 for error
*
* @note \a out is not null-terminated
*/
size_t utf16_to_utf8(uint8_t *out, const uint16_t *in, size_t len);
ssize_t utf16_to_utf8(uint8_t *out, const uint16_t *in, size_t len);
/** Convert a UTF-16 sequence into a UTF-32 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
*
* @param[out] out Output sequence
* @param[in] in Input sequence
* @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
*
* @returns number of output code units produced
* @returns -1 for error
*
* @note \a out is not null-terminated
*/
size_t utf16_to_utf32(uint32_t *out, const uint16_t *in, size_t len);
ssize_t utf16_to_utf32(uint32_t *out, const uint16_t *in, size_t len);
/** Convert a UTF-32 sequence into a UTF-8 sequence
*
* Fills the output buffer up to \a len code units.
* Returns the number of code units that the input would produce;
* if it returns greater than \a len, the output has been
* truncated.
*
* @param[out] out Output sequence
* @param[in] in Input sequence
* @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
*
* @returns number of output code units produced
* @returns -1 for error
*
* @note \a out is not null-terminated
*/
size_t utf32_to_utf8(uint8_t *out, const uint32_t *in, size_t len);
ssize_t utf32_to_utf8(uint8_t *out, const uint32_t *in, size_t len);
/** Convert a UTF-32 sequence into a UTF-16 sequence
*
* @param[out] out Output sequence
* @param[in] in Input sequence
* @param[in] in Input sequence (null-terminated)
* @param[in] len Output length
*
* @returns number of output code units produced
* @returns -1 for error
*
* @note \a out is not null-terminated
*/
size_t utf32_to_utf16(uint16_t *out, const uint32_t *in, size_t len);
ssize_t utf32_to_utf16(uint16_t *out, const uint32_t *in, size_t len);

View File

@ -133,8 +133,9 @@ Result romfsInit(void)
} else
return 2;
size_t units = utf8_to_utf16(__utf16path, (const uint8_t*)filename, PATH_MAX+1);
if (units == (size_t)-1) return 3;
ssize_t units = utf8_to_utf16(__utf16path, (const uint8_t*)filename, PATH_MAX);
if (units < 0) return 3;
if (units >= PATH_MAX) return 4;
__utf16path[units] = 0;
FS_Archive arch = { ARCHIVE_SDMC, { PATH_EMPTY, 1, (u8*)"" }, 0 };
@ -284,7 +285,7 @@ static romfs_file* searchForFile(romfs_dir* parent, u16* name, u32 namelen)
static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir)
{
size_t units;
ssize_t units;
char* colonPos = strchr(*pPath, ':');
if (colonPos) *pPath = colonPos+1;
@ -331,9 +332,11 @@ static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir)
}
}
units = utf8_to_utf16(__utf16path, (const uint8_t*)component, PATH_MAX+1);
if (units == (size_t)-1)
units = utf8_to_utf16(__utf16path, (const uint8_t*)component, PATH_MAX);
if (units < 0)
return EILSEQ;
if (units >= PATH_MAX)
return ENAMETOOLONG;
*ppDir = searchForDir(*ppDir, __utf16path, units);
if (!*ppDir)
@ -363,12 +366,17 @@ int romfs_open(struct _reent *r, void *fileStruct, const char *path, int flags,
if (r->_errno != 0)
return -1;
size_t units = utf8_to_utf16(__utf16path, (const uint8_t*)path, PATH_MAX+1);
if (!units || units == (size_t)-1)
ssize_t units = utf8_to_utf16(__utf16path, (const uint8_t*)path, PATH_MAX);
if (units <= 0)
{
r->_errno = EILSEQ;
return -1;
}
if (units >= PATH_MAX)
{
r->_errno = ENAMETOOLONG;
return -1;
}
romfs_file* file = searchForFile(curDir, __utf16path, units);
if (!file)

View File

@ -118,7 +118,7 @@ static const char*
sdmc_fixpath(struct _reent *r,
const char *path)
{
size_t units;
ssize_t units;
uint32_t code;
const uint8_t *p = (const uint8_t*)path;
@ -126,7 +126,7 @@ sdmc_fixpath(struct _reent *r,
do
{
units = decode_utf8(&code, p);
if(units == (size_t)-1)
if(units < 0)
{
r->_errno = EILSEQ;
return NULL;
@ -145,7 +145,7 @@ sdmc_fixpath(struct _reent *r,
do
{
units = decode_utf8(&code, p);
if(units == (size_t)-1)
if(units < 0)
{
r->_errno = EILSEQ;
return NULL;
@ -182,7 +182,7 @@ static const FS_Path
sdmc_utf16path(struct _reent *r,
const char *path)
{
size_t units;
ssize_t units;
FS_Path fspath;
fspath.data = NULL;
@ -190,14 +190,13 @@ sdmc_utf16path(struct _reent *r,
if(sdmc_fixpath(r, path) == NULL)
return fspath;
units = utf8_to_utf16(__utf16path, (const uint8_t*)__fixedpath, PATH_MAX+1);
if(units == (size_t)-1)
units = utf8_to_utf16(__utf16path, (const uint8_t*)__fixedpath, PATH_MAX);
if(units < 0)
{
r->_errno = EILSEQ;
return fspath;
}
if(__utf16path[PATH_MAX] != 0)
if(units >= PATH_MAX)
{
r->_errno = ENAMETOOLONG;
return fspath;
@ -220,7 +219,7 @@ static bool sdmcInitialised = false;
/*! Initialize SDMC device */
Result sdmcInit(void)
{
size_t units;
ssize_t units;
uint32_t code;
char *p;
Result rc = 0;
@ -253,7 +252,7 @@ Result sdmcInit(void)
do
{
units = decode_utf8(&code, (const uint8_t*)p);
if(units == (size_t)-1)
if(units < 0)
{
last_slash = NULL;
break;
@ -909,9 +908,9 @@ sdmc_dirnext(struct _reent *r,
char *filename,
struct stat *filestat)
{
Result rc;
u32 entries;
size_t units;
Result rc;
u32 entries;
ssize_t units;
/* get pointer to our data */
sdmc_dir_t *dir = (sdmc_dir_t*)(dirState->dirStruct);
@ -938,13 +937,13 @@ sdmc_dirnext(struct _reent *r,
/* convert name from UTF-16 to UTF-8 */
memset(filename, 0, NAME_MAX);
units = utf16_to_utf8((uint8_t*)filename, dir->entry_data.name, NAME_MAX);
if(units == (size_t)-1)
if(units < 0)
{
r->_errno = EILSEQ;
return -1;
}
if(filename[NAME_MAX-1] != 0)
if(units >= NAME_MAX)
{
r->_errno = ENAMETOOLONG;
return -1;

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h"
size_t
ssize_t
utf16_to_utf32(uint32_t *out,
const uint16_t *in,
size_t len)
{
size_t rc = 0;
ssize_t rc = 0;
ssize_t units;
uint32_t code;
@ -13,7 +14,7 @@ utf16_to_utf32(uint32_t *out,
{
units = decode_utf16(&code, in);
if(units == -1)
return (size_t)-1;
return -1;
if(code > 0)
{
@ -23,11 +24,12 @@ utf16_to_utf32(uint32_t *out,
{
if(rc < len)
*out++ = code;
else
return rc;
}
++rc;
if(SSIZE_MAX - 1 <= rc)
++rc;
else
return -1;
}
} while(code > 0);

View File

@ -1,20 +1,21 @@
#include "3ds/types.h"
#include "3ds/util/utf.h"
size_t
ssize_t
utf16_to_utf8(uint8_t *out,
const uint16_t *in,
size_t len)
{
size_t rc = 0;
ssize_t rc = 0;
ssize_t units;
uint32_t code;
uint8_t encoded[4];
uint8_t encoded[4];
do
{
units = decode_utf16(&code, in);
if(units == -1)
return (size_t)-1;
return -1;
if(code > 0)
{
@ -22,7 +23,7 @@ utf16_to_utf8(uint8_t *out,
units = encode_utf8(encoded, code);
if(units == -1)
return (size_t)-1;
return -1;
if(out != NULL)
{
@ -36,11 +37,12 @@ utf16_to_utf8(uint8_t *out,
if(units > 3)
*out++ = encoded[3];
}
else
return rc;
}
rc += units;
if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
}
} while(code > 0);

View File

@ -1,19 +1,20 @@
#include "3ds/types.h"
#include "3ds/util/utf.h"
size_t
ssize_t
utf32_to_utf16(uint16_t *out,
const uint32_t *in,
size_t len)
{
size_t rc = 0;
ssize_t rc = 0;
ssize_t units;
uint16_t encoded[2];
uint16_t encoded[2];
while(*in > 0)
{
units = encode_utf16(encoded, *in++);
if(units == -1)
return (size_t)-1;
return -1;
if(out != NULL)
{
@ -23,11 +24,12 @@ utf32_to_utf16(uint16_t *out,
if(units > 1)
*out++ = encoded[1];
}
else
return rc;
}
rc += units;
if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
}
return rc;

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h"
size_t
ssize_t
utf32_to_utf8(uint8_t *out,
const uint32_t *in,
size_t len)
{
size_t rc = 0;
ssize_t rc = 0;
ssize_t units;
uint8_t encoded[4];
@ -13,7 +14,7 @@ utf32_to_utf8(uint8_t *out,
{
units = encode_utf8(encoded, *in++);
if(units == -1)
return (size_t)-1;
return -1;
if(out != NULL)
{
@ -27,11 +28,12 @@ utf32_to_utf8(uint8_t *out,
if(units > 3)
*out++ = encoded[3];
}
else
return rc;
}
rc += units;
if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
}
return rc;

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h"
size_t
ssize_t
utf8_to_utf16(uint16_t *out,
const uint8_t *in,
size_t len)
{
size_t rc = 0;
ssize_t rc = 0;
ssize_t units;
uint32_t code;
uint16_t encoded[2];
@ -14,7 +15,7 @@ utf8_to_utf16(uint16_t *out,
{
units = decode_utf8(&code, in);
if(units == -1)
return (size_t)-1;
return -1;
if(code > 0)
{
@ -22,7 +23,7 @@ utf8_to_utf16(uint16_t *out,
units = encode_utf16(encoded, code);
if(units == -1)
return (size_t)-1;
return -1;
if(out != NULL)
{
@ -32,11 +33,12 @@ utf8_to_utf16(uint16_t *out,
if(units > 1)
*out++ = encoded[1];
}
else
return rc;
}
rc += units;
if(SSIZE_MAX - units <= rc)
rc += units;
else
return -1;
}
} while(code > 0);

View File

@ -1,11 +1,12 @@
#include "3ds/types.h"
#include "3ds/util/utf.h"
size_t
ssize_t
utf8_to_utf32(uint32_t *out,
const uint8_t *in,
size_t len)
{
size_t rc = 0;
ssize_t rc = 0;
ssize_t units;
uint32_t code;
@ -13,7 +14,7 @@ utf8_to_utf32(uint32_t *out,
{
units = decode_utf8(&code, in);
if(units == -1)
return (size_t)-1;
return -1;
if(code > 0)
{
@ -23,11 +24,12 @@ utf8_to_utf32(uint32_t *out,
{
if(rc < len)
*out++ = code;
else
return rc;
}
++rc;
if(SSIZE_MAX - 1 <= rc)
++rc;
else
return -1;
}
} while(code > 0);