Robustify UTF translation
This commit is contained in:
parent
3714fa3810
commit
c0f75ae42b
@ -11,6 +11,13 @@
|
||||
/// The maximum value of a u64.
|
||||
#define U64_MAX UINT64_MAX
|
||||
|
||||
/// would be nice if newlib had this already
|
||||
#ifndef SSIZE_MAX
|
||||
#ifdef SIZE_MAX
|
||||
#define SSIZE_MAX ((SIZE_MAX) >> 1)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef uint8_t u8; ///< 8-bit unsigned integer
|
||||
typedef uint16_t u16; ///< 16-bit unsigned integer
|
||||
typedef uint32_t u32; ///< 32-bit unsigned integer
|
||||
|
@ -52,61 +52,104 @@ ssize_t encode_utf8 (uint8_t *out, uint32_t in);
|
||||
ssize_t encode_utf16(uint16_t *out, uint32_t in);
|
||||
|
||||
/** Convert a UTF-8 sequence into a UTF-16 sequence
|
||||
*
|
||||
* Fills the output buffer up to \a len code units.
|
||||
* Returns the number of code units that the input would produce;
|
||||
* if it returns greater than \a len, the output has been
|
||||
* truncated.
|
||||
*
|
||||
* @param[out] out Output sequence
|
||||
* @param[in] in Input sequence
|
||||
* @param[in] in Input sequence (null-terminated)
|
||||
* @param[in] len Output length
|
||||
*
|
||||
* @returns number of output code units produced
|
||||
* @returns -1 for error
|
||||
*
|
||||
* @note \a out is not null-terminated
|
||||
*/
|
||||
size_t utf8_to_utf16(uint16_t *out, const uint8_t *in, size_t len);
|
||||
ssize_t utf8_to_utf16(uint16_t *out, const uint8_t *in, size_t len);
|
||||
|
||||
/** Convert a UTF-8 sequence into a UTF-32 sequence
|
||||
*
|
||||
* Fills the output buffer up to \a len code units.
|
||||
* Returns the number of code units that the input would produce;
|
||||
* if it returns greater than \a len, the output has been
|
||||
* truncated.
|
||||
*
|
||||
* @param[out] out Output sequence
|
||||
* @param[in] in Input sequence
|
||||
* @param[in] in Input sequence (null-terminated)
|
||||
* @param[in] len Output length
|
||||
*
|
||||
* @returns number of output code units produced
|
||||
* @returns -1 for error
|
||||
*
|
||||
* @note \a out is not null-terminated
|
||||
*/
|
||||
size_t utf8_to_utf32(uint32_t *out, const uint8_t *in, size_t len);
|
||||
ssize_t utf8_to_utf32(uint32_t *out, const uint8_t *in, size_t len);
|
||||
|
||||
/** Convert a UTF-16 sequence into a UTF-8 sequence
|
||||
*
|
||||
* Fills the output buffer up to \a len code units.
|
||||
* Returns the number of code units that the input would produce;
|
||||
* if it returns greater than \a len, the output has been
|
||||
* truncated.
|
||||
*
|
||||
* @param[out] out Output sequence
|
||||
* @param[in] in Input sequence
|
||||
* @param[in] in Input sequence (null-terminated)
|
||||
* @param[in] len Output length
|
||||
*
|
||||
* @returns number of output code units produced
|
||||
* @returns -1 for error
|
||||
*
|
||||
* @note \a out is not null-terminated
|
||||
*/
|
||||
size_t utf16_to_utf8(uint8_t *out, const uint16_t *in, size_t len);
|
||||
ssize_t utf16_to_utf8(uint8_t *out, const uint16_t *in, size_t len);
|
||||
|
||||
/** Convert a UTF-16 sequence into a UTF-32 sequence
|
||||
*
|
||||
* Fills the output buffer up to \a len code units.
|
||||
* Returns the number of code units that the input would produce;
|
||||
* if it returns greater than \a len, the output has been
|
||||
* truncated.
|
||||
*
|
||||
* @param[out] out Output sequence
|
||||
* @param[in] in Input sequence
|
||||
* @param[in] in Input sequence (null-terminated)
|
||||
* @param[in] len Output length
|
||||
*
|
||||
* @returns number of output code units produced
|
||||
* @returns -1 for error
|
||||
*
|
||||
* @note \a out is not null-terminated
|
||||
*/
|
||||
size_t utf16_to_utf32(uint32_t *out, const uint16_t *in, size_t len);
|
||||
ssize_t utf16_to_utf32(uint32_t *out, const uint16_t *in, size_t len);
|
||||
|
||||
/** Convert a UTF-32 sequence into a UTF-8 sequence
|
||||
*
|
||||
* Fills the output buffer up to \a len code units.
|
||||
* Returns the number of code units that the input would produce;
|
||||
* if it returns greater than \a len, the output has been
|
||||
* truncated.
|
||||
*
|
||||
* @param[out] out Output sequence
|
||||
* @param[in] in Input sequence
|
||||
* @param[in] in Input sequence (null-terminated)
|
||||
* @param[in] len Output length
|
||||
*
|
||||
* @returns number of output code units produced
|
||||
* @returns -1 for error
|
||||
*
|
||||
* @note \a out is not null-terminated
|
||||
*/
|
||||
size_t utf32_to_utf8(uint8_t *out, const uint32_t *in, size_t len);
|
||||
ssize_t utf32_to_utf8(uint8_t *out, const uint32_t *in, size_t len);
|
||||
|
||||
/** Convert a UTF-32 sequence into a UTF-16 sequence
|
||||
*
|
||||
* @param[out] out Output sequence
|
||||
* @param[in] in Input sequence
|
||||
* @param[in] in Input sequence (null-terminated)
|
||||
* @param[in] len Output length
|
||||
*
|
||||
* @returns number of output code units produced
|
||||
* @returns -1 for error
|
||||
*
|
||||
* @note \a out is not null-terminated
|
||||
*/
|
||||
size_t utf32_to_utf16(uint16_t *out, const uint32_t *in, size_t len);
|
||||
ssize_t utf32_to_utf16(uint16_t *out, const uint32_t *in, size_t len);
|
||||
|
@ -133,8 +133,9 @@ Result romfsInit(void)
|
||||
} else
|
||||
return 2;
|
||||
|
||||
size_t units = utf8_to_utf16(__utf16path, (const uint8_t*)filename, PATH_MAX+1);
|
||||
if (units == (size_t)-1) return 3;
|
||||
ssize_t units = utf8_to_utf16(__utf16path, (const uint8_t*)filename, PATH_MAX);
|
||||
if (units < 0) return 3;
|
||||
if (units >= PATH_MAX) return 4;
|
||||
__utf16path[units] = 0;
|
||||
|
||||
FS_Archive arch = { ARCHIVE_SDMC, { PATH_EMPTY, 1, (u8*)"" }, 0 };
|
||||
@ -284,7 +285,7 @@ static romfs_file* searchForFile(romfs_dir* parent, u16* name, u32 namelen)
|
||||
|
||||
static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir)
|
||||
{
|
||||
size_t units;
|
||||
ssize_t units;
|
||||
|
||||
char* colonPos = strchr(*pPath, ':');
|
||||
if (colonPos) *pPath = colonPos+1;
|
||||
@ -331,9 +332,11 @@ static int navigateToDir(romfs_dir** ppDir, const char** pPath, bool isDir)
|
||||
}
|
||||
}
|
||||
|
||||
units = utf8_to_utf16(__utf16path, (const uint8_t*)component, PATH_MAX+1);
|
||||
if (units == (size_t)-1)
|
||||
units = utf8_to_utf16(__utf16path, (const uint8_t*)component, PATH_MAX);
|
||||
if (units < 0)
|
||||
return EILSEQ;
|
||||
if (units >= PATH_MAX)
|
||||
return ENAMETOOLONG;
|
||||
|
||||
*ppDir = searchForDir(*ppDir, __utf16path, units);
|
||||
if (!*ppDir)
|
||||
@ -363,12 +366,17 @@ int romfs_open(struct _reent *r, void *fileStruct, const char *path, int flags,
|
||||
if (r->_errno != 0)
|
||||
return -1;
|
||||
|
||||
size_t units = utf8_to_utf16(__utf16path, (const uint8_t*)path, PATH_MAX+1);
|
||||
if (!units || units == (size_t)-1)
|
||||
ssize_t units = utf8_to_utf16(__utf16path, (const uint8_t*)path, PATH_MAX);
|
||||
if (units <= 0)
|
||||
{
|
||||
r->_errno = EILSEQ;
|
||||
return -1;
|
||||
}
|
||||
if (units >= PATH_MAX)
|
||||
{
|
||||
r->_errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
}
|
||||
|
||||
romfs_file* file = searchForFile(curDir, __utf16path, units);
|
||||
if (!file)
|
||||
|
@ -118,7 +118,7 @@ static const char*
|
||||
sdmc_fixpath(struct _reent *r,
|
||||
const char *path)
|
||||
{
|
||||
size_t units;
|
||||
ssize_t units;
|
||||
uint32_t code;
|
||||
const uint8_t *p = (const uint8_t*)path;
|
||||
|
||||
@ -126,7 +126,7 @@ sdmc_fixpath(struct _reent *r,
|
||||
do
|
||||
{
|
||||
units = decode_utf8(&code, p);
|
||||
if(units == (size_t)-1)
|
||||
if(units < 0)
|
||||
{
|
||||
r->_errno = EILSEQ;
|
||||
return NULL;
|
||||
@ -145,7 +145,7 @@ sdmc_fixpath(struct _reent *r,
|
||||
do
|
||||
{
|
||||
units = decode_utf8(&code, p);
|
||||
if(units == (size_t)-1)
|
||||
if(units < 0)
|
||||
{
|
||||
r->_errno = EILSEQ;
|
||||
return NULL;
|
||||
@ -182,7 +182,7 @@ static const FS_Path
|
||||
sdmc_utf16path(struct _reent *r,
|
||||
const char *path)
|
||||
{
|
||||
size_t units;
|
||||
ssize_t units;
|
||||
FS_Path fspath;
|
||||
|
||||
fspath.data = NULL;
|
||||
@ -190,14 +190,13 @@ sdmc_utf16path(struct _reent *r,
|
||||
if(sdmc_fixpath(r, path) == NULL)
|
||||
return fspath;
|
||||
|
||||
units = utf8_to_utf16(__utf16path, (const uint8_t*)__fixedpath, PATH_MAX+1);
|
||||
if(units == (size_t)-1)
|
||||
units = utf8_to_utf16(__utf16path, (const uint8_t*)__fixedpath, PATH_MAX);
|
||||
if(units < 0)
|
||||
{
|
||||
r->_errno = EILSEQ;
|
||||
return fspath;
|
||||
}
|
||||
|
||||
if(__utf16path[PATH_MAX] != 0)
|
||||
if(units >= PATH_MAX)
|
||||
{
|
||||
r->_errno = ENAMETOOLONG;
|
||||
return fspath;
|
||||
@ -220,7 +219,7 @@ static bool sdmcInitialised = false;
|
||||
/*! Initialize SDMC device */
|
||||
Result sdmcInit(void)
|
||||
{
|
||||
size_t units;
|
||||
ssize_t units;
|
||||
uint32_t code;
|
||||
char *p;
|
||||
Result rc = 0;
|
||||
@ -253,7 +252,7 @@ Result sdmcInit(void)
|
||||
do
|
||||
{
|
||||
units = decode_utf8(&code, (const uint8_t*)p);
|
||||
if(units == (size_t)-1)
|
||||
if(units < 0)
|
||||
{
|
||||
last_slash = NULL;
|
||||
break;
|
||||
@ -911,7 +910,7 @@ sdmc_dirnext(struct _reent *r,
|
||||
{
|
||||
Result rc;
|
||||
u32 entries;
|
||||
size_t units;
|
||||
ssize_t units;
|
||||
|
||||
/* get pointer to our data */
|
||||
sdmc_dir_t *dir = (sdmc_dir_t*)(dirState->dirStruct);
|
||||
@ -938,13 +937,13 @@ sdmc_dirnext(struct _reent *r,
|
||||
/* convert name from UTF-16 to UTF-8 */
|
||||
memset(filename, 0, NAME_MAX);
|
||||
units = utf16_to_utf8((uint8_t*)filename, dir->entry_data.name, NAME_MAX);
|
||||
if(units == (size_t)-1)
|
||||
if(units < 0)
|
||||
{
|
||||
r->_errno = EILSEQ;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(filename[NAME_MAX-1] != 0)
|
||||
if(units >= NAME_MAX)
|
||||
{
|
||||
r->_errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include "3ds/types.h"
|
||||
#include "3ds/util/utf.h"
|
||||
|
||||
size_t
|
||||
ssize_t
|
||||
utf16_to_utf32(uint32_t *out,
|
||||
const uint16_t *in,
|
||||
size_t len)
|
||||
{
|
||||
size_t rc = 0;
|
||||
ssize_t rc = 0;
|
||||
ssize_t units;
|
||||
uint32_t code;
|
||||
|
||||
@ -13,7 +14,7 @@ utf16_to_utf32(uint32_t *out,
|
||||
{
|
||||
units = decode_utf16(&code, in);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(code > 0)
|
||||
{
|
||||
@ -23,11 +24,12 @@ utf16_to_utf32(uint32_t *out,
|
||||
{
|
||||
if(rc < len)
|
||||
*out++ = code;
|
||||
else
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(SSIZE_MAX - 1 <= rc)
|
||||
++rc;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
} while(code > 0);
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include "3ds/types.h"
|
||||
#include "3ds/util/utf.h"
|
||||
|
||||
size_t
|
||||
ssize_t
|
||||
utf16_to_utf8(uint8_t *out,
|
||||
const uint16_t *in,
|
||||
size_t len)
|
||||
{
|
||||
size_t rc = 0;
|
||||
ssize_t rc = 0;
|
||||
ssize_t units;
|
||||
uint32_t code;
|
||||
uint8_t encoded[4];
|
||||
@ -14,7 +15,7 @@ utf16_to_utf8(uint8_t *out,
|
||||
{
|
||||
units = decode_utf16(&code, in);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(code > 0)
|
||||
{
|
||||
@ -22,7 +23,7 @@ utf16_to_utf8(uint8_t *out,
|
||||
|
||||
units = encode_utf8(encoded, code);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(out != NULL)
|
||||
{
|
||||
@ -36,11 +37,12 @@ utf16_to_utf8(uint8_t *out,
|
||||
if(units > 3)
|
||||
*out++ = encoded[3];
|
||||
}
|
||||
else
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(SSIZE_MAX - units <= rc)
|
||||
rc += units;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
} while(code > 0);
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include "3ds/types.h"
|
||||
#include "3ds/util/utf.h"
|
||||
|
||||
size_t
|
||||
ssize_t
|
||||
utf32_to_utf16(uint16_t *out,
|
||||
const uint32_t *in,
|
||||
size_t len)
|
||||
{
|
||||
size_t rc = 0;
|
||||
ssize_t rc = 0;
|
||||
ssize_t units;
|
||||
uint16_t encoded[2];
|
||||
|
||||
@ -13,7 +14,7 @@ utf32_to_utf16(uint16_t *out,
|
||||
{
|
||||
units = encode_utf16(encoded, *in++);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(out != NULL)
|
||||
{
|
||||
@ -23,11 +24,12 @@ utf32_to_utf16(uint16_t *out,
|
||||
if(units > 1)
|
||||
*out++ = encoded[1];
|
||||
}
|
||||
else
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(SSIZE_MAX - units <= rc)
|
||||
rc += units;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include "3ds/types.h"
|
||||
#include "3ds/util/utf.h"
|
||||
|
||||
size_t
|
||||
ssize_t
|
||||
utf32_to_utf8(uint8_t *out,
|
||||
const uint32_t *in,
|
||||
size_t len)
|
||||
{
|
||||
size_t rc = 0;
|
||||
ssize_t rc = 0;
|
||||
ssize_t units;
|
||||
uint8_t encoded[4];
|
||||
|
||||
@ -13,7 +14,7 @@ utf32_to_utf8(uint8_t *out,
|
||||
{
|
||||
units = encode_utf8(encoded, *in++);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(out != NULL)
|
||||
{
|
||||
@ -27,11 +28,12 @@ utf32_to_utf8(uint8_t *out,
|
||||
if(units > 3)
|
||||
*out++ = encoded[3];
|
||||
}
|
||||
else
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(SSIZE_MAX - units <= rc)
|
||||
rc += units;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include "3ds/types.h"
|
||||
#include "3ds/util/utf.h"
|
||||
|
||||
size_t
|
||||
ssize_t
|
||||
utf8_to_utf16(uint16_t *out,
|
||||
const uint8_t *in,
|
||||
size_t len)
|
||||
{
|
||||
size_t rc = 0;
|
||||
ssize_t rc = 0;
|
||||
ssize_t units;
|
||||
uint32_t code;
|
||||
uint16_t encoded[2];
|
||||
@ -14,7 +15,7 @@ utf8_to_utf16(uint16_t *out,
|
||||
{
|
||||
units = decode_utf8(&code, in);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(code > 0)
|
||||
{
|
||||
@ -22,7 +23,7 @@ utf8_to_utf16(uint16_t *out,
|
||||
|
||||
units = encode_utf16(encoded, code);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(out != NULL)
|
||||
{
|
||||
@ -32,11 +33,12 @@ utf8_to_utf16(uint16_t *out,
|
||||
if(units > 1)
|
||||
*out++ = encoded[1];
|
||||
}
|
||||
else
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(SSIZE_MAX - units <= rc)
|
||||
rc += units;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
} while(code > 0);
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
#include "3ds/types.h"
|
||||
#include "3ds/util/utf.h"
|
||||
|
||||
size_t
|
||||
ssize_t
|
||||
utf8_to_utf32(uint32_t *out,
|
||||
const uint8_t *in,
|
||||
size_t len)
|
||||
{
|
||||
size_t rc = 0;
|
||||
ssize_t rc = 0;
|
||||
ssize_t units;
|
||||
uint32_t code;
|
||||
|
||||
@ -13,7 +14,7 @@ utf8_to_utf32(uint32_t *out,
|
||||
{
|
||||
units = decode_utf8(&code, in);
|
||||
if(units == -1)
|
||||
return (size_t)-1;
|
||||
return -1;
|
||||
|
||||
if(code > 0)
|
||||
{
|
||||
@ -23,11 +24,12 @@ utf8_to_utf32(uint32_t *out,
|
||||
{
|
||||
if(rc < len)
|
||||
*out++ = code;
|
||||
else
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(SSIZE_MAX - 1 <= rc)
|
||||
++rc;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
} while(code > 0);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user