mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 14:59:39 +02:00
cobol: Fix FUNCTION TRIM.
The FUNCTION TRIM now works properly with UTF16 inputs. According to the ISO specification, the return type of a number of intrinsic functions is defined by the variable type of their first parameter. A number of changes here cause more functions to honor that requirement. gcc/cobol/ChangeLog: * parse.y: BASECONVERT and TRIM take their type from their first parameter. * parse_util.h (intrinsic_return_field): The function_descrs[] is adjusted so that a number of functions take their return type from their first calling parameter. intrinsic_return_field() has been refined. * symbols.cc (new_alphanumeric): Use set_explicit() instead of set() in support of refined intrinsic function return type. libgcobol/ChangeLog: * intrinsic.cc (__gg__trim): Rewritten to work properly, and avoid unnecessary variable codeset encoding translation.
This commit is contained in:
@@ -10892,7 +10892,7 @@ intrinsic: function_udf
|
||||
|
||||
| BASECONVERT '(' varg[r1] varg[r2] varg[r3] ')' {
|
||||
location_set(@1);
|
||||
$$ = new_alphanumeric("BASECONVERT");
|
||||
$$ = new_alphanumeric("BASECONVERT", $r1->field->codeset.encoding);
|
||||
cbl_unimplemented("BASECONVERT");
|
||||
if( ! intrinsic_call_3($$, BASECONVERT, $r1, $r2, $r3 )) YYERROR;
|
||||
}
|
||||
@@ -11223,7 +11223,7 @@ intrinsic: function_udf
|
||||
YYERROR;
|
||||
break;
|
||||
}
|
||||
$$ = new_alphanumeric("TRIM");
|
||||
$$ = new_alphanumeric("TRIM", $r1->field->codeset.encoding);
|
||||
cbl_refer_t * how = new_reference($trim_trailing);
|
||||
if( ! intrinsic_call_2($$, TRIM, $r1, how) ) YYERROR;
|
||||
}
|
||||
|
||||
@@ -46,15 +46,22 @@
|
||||
* n variadic
|
||||
* We use just A, I, N, or X, choosing the most general for each parameter.
|
||||
*
|
||||
* When FldInvalid is shown as the return type, it indicates that the type
|
||||
* of the function is determined by the type of the first parameter.
|
||||
* When FldInvalid is shown as the return type, it indicates that the Integer
|
||||
* vs. Numeric type of the function is determined by the type of the first
|
||||
* parameter.
|
||||
*
|
||||
* FldGroup is used when the first argument determines the encoding of the
|
||||
* temporary. This is for functions that can be Alphanumeric or National.
|
||||
*
|
||||
* We use FldNumericBin5 for functions of type "Integer", and FldFloat for
|
||||
* functions of type "Numeric",
|
||||
*/
|
||||
#define IntOrNum FldInvalid
|
||||
#define AnumOrNat FldGroup
|
||||
|
||||
static const function_descr_t function_descrs[] = {
|
||||
{ ABS, "ABS",
|
||||
"__gg__abs", "N", {}, FldInvalid },
|
||||
"__gg__abs", "N", {}, IntOrNum },
|
||||
{ ACOS, "ACOS",
|
||||
"__gg__acos", "N", {}, FldFloat },
|
||||
{ ANNUITY, "ANNUITY",
|
||||
@@ -63,7 +70,7 @@ static const function_descr_t function_descrs[] = {
|
||||
"__gg__asin", "N", {}, FldFloat },
|
||||
{ ATAN, "ATAN",
|
||||
"__gg__atan", "N", {}, FldFloat },
|
||||
{ BASECONVERT, "BASECONVERT",
|
||||
{ BASECONVERT, "BASECONVERT", // See parse.y
|
||||
"__gg__baseconvert", "XII", {}, FldAlphanumeric },
|
||||
{ BIT_OF, "BIT-OF",
|
||||
"__gg__bit_of", "X", {}, FldAlphanumeric },
|
||||
@@ -81,9 +88,9 @@ static const function_descr_t function_descrs[] = {
|
||||
{ COMBINED_DATETIME, "COMBINED-DATETIME",
|
||||
"__gg__combined_datetime", "IN", {}, FldFloat },
|
||||
{ CONCAT, "CONCAT",
|
||||
"__gg__concat", "n", {}, FldAlphanumeric },
|
||||
"__gg__concat", "n", {}, AnumOrNat },
|
||||
{ CONVERT, "CONVERT",
|
||||
"__gg__convert", "XII", {}, FldAlphanumeric },
|
||||
"__gg__convert", "XII", {}, AnumOrNat },
|
||||
{ COS, "COS",
|
||||
"__gg__cos", "N", {}, FldFloat },
|
||||
{ CURRENT_DATE, "CURRENT-DATE",
|
||||
@@ -121,13 +128,13 @@ static const function_descr_t function_descrs[] = {
|
||||
{ FIND_STRING, "FIND-STRING",
|
||||
"__gg__find_string", "AXI", {}, FldNumericBin5 },
|
||||
{ FORMATTED_CURRENT_DATE, "FORMATTED-CURRENT-DATE",
|
||||
"__gg__formatted_current_date", "X", {}, FldAlphanumeric },
|
||||
"__gg__formatted_current_date", "X", {}, AnumOrNat },
|
||||
{ FORMATTED_DATE, "FORMATTED-DATE",
|
||||
"__gg__formatted_date", "XX", {}, FldAlphanumeric },
|
||||
"__gg__formatted_date", "XX", {}, AnumOrNat },
|
||||
{ FORMATTED_DATETIME, "FORMATTED-DATETIME",
|
||||
"__gg__formatted_datetime", "XINI", {}, FldAlphanumeric },
|
||||
"__gg__formatted_datetime", "XINI", {}, AnumOrNat },
|
||||
{ FORMATTED_TIME, "FORMATTED-TIME",
|
||||
"__gg__formatted_time", "INI", {}, FldAlphanumeric },
|
||||
"__gg__formatted_time", "INI", {}, AnumOrNat },
|
||||
{ FRACTION_PART, "FRACTION-PART",
|
||||
"__gg__fraction_part", "N", {}, FldFloat },
|
||||
{ HEX_OF, "HEX-OF",
|
||||
@@ -135,7 +142,7 @@ static const function_descr_t function_descrs[] = {
|
||||
{ HEX_TO_CHAR, "HEX-TO-CHAR",
|
||||
"__gg__hex_to_char", "X", {}, FldAlphanumeric },
|
||||
{ HIGHEST_ALGEBRAIC, "HIGHEST-ALGEBRAIC",
|
||||
"__gg__highest_algebraic", "N", {}, FldInvalid },
|
||||
"__gg__highest_algebraic", "N", {}, IntOrNum },
|
||||
{ INTEGER, "INTEGER",
|
||||
"__gg__integer", "N", {}, FldNumericBin5 },
|
||||
// requires FldBoolean
|
||||
@@ -164,11 +171,11 @@ static const function_descr_t function_descrs[] = {
|
||||
{ LOG10, "LOG10",
|
||||
"__gg__log10", "N", {}, FldFloat },
|
||||
{ LOWER_CASE, "LOWER-CASE",
|
||||
"__gg__lower_case", "X", {}, FldAlphanumeric },
|
||||
"__gg__lower_case", "X", {}, AnumOrNat },
|
||||
{ LOWEST_ALGEBRAIC, "LOWEST-ALGEBRAIC",
|
||||
"__gg__lowest_algebraic", "N", {}, FldInvalid },
|
||||
"__gg__lowest_algebraic", "N", {}, IntOrNum },
|
||||
{ MAXX, "MAX",
|
||||
"__gg__max", "n", {}, FldInvalid },
|
||||
"__gg__max", "n", {}, IntOrNum },
|
||||
{ MEAN, "MEAN",
|
||||
"__gg__mean", "n", {}, FldFloat },
|
||||
{ MEDIAN, "MEDIAN",
|
||||
@@ -176,7 +183,7 @@ static const function_descr_t function_descrs[] = {
|
||||
{ MIDRANGE, "MIDRANGE",
|
||||
"__gg__midrange", "n", {}, FldFloat },
|
||||
{ MINN, "MIN",
|
||||
"__gg__min", "n", {}, FldInvalid },
|
||||
"__gg__min", "n", {}, IntOrNum },
|
||||
{ MOD, "MOD",
|
||||
"__gg__mod", "IN", {}, FldNumericBin5 },
|
||||
{ MODULE_NAME, "MODULE-NAME",
|
||||
@@ -202,11 +209,11 @@ static const function_descr_t function_descrs[] = {
|
||||
{ RANDOM, "RANDOM",
|
||||
"__gg__random", "I", {}, FldFloat },
|
||||
{ RANGE, "RANGE",
|
||||
"__gg__range", "n", {}, FldInvalid },
|
||||
"__gg__range", "n", {}, IntOrNum },
|
||||
{ REM, "REM",
|
||||
"__gg__rem", "NN", {}, FldFloat },
|
||||
{ REVERSE, "REVERSE",
|
||||
"__gg__reverse", "X", {}, FldAlphanumeric },
|
||||
"__gg__reverse", "X", {}, AnumOrNat },
|
||||
{ SECONDS_FROM_FORMATTED_TIME, "SECONDS-FROM-FORMATTED-TIME",
|
||||
"__gg__seconds_from_formatted_time", "XX", {}, FldFloat },
|
||||
{ SECONDS_PAST_MIDNIGHT, "SECONDS_PAST_MIDNIGHT",
|
||||
@@ -216,7 +223,7 @@ static const function_descr_t function_descrs[] = {
|
||||
{ SIN, "SIN",
|
||||
"__gg__sin", "N", {}, FldFloat },
|
||||
{ SMALLEST_ALGEBRAIC, "SMALLEST-ALGEBRAIC",
|
||||
"__gg__smallest_algebraic", "N", {}, FldInvalid },
|
||||
"__gg__smallest_algebraic", "N", {}, IntOrNum },
|
||||
{ SQRT, "SQRT",
|
||||
"__gg__sqrt", "N", {}, FldFloat },
|
||||
{ STANDARD_COMPARE, "STANDARD-COMPARE",
|
||||
@@ -224,9 +231,9 @@ static const function_descr_t function_descrs[] = {
|
||||
{ STANDARD_DEVIATION, "STANDARD-DEVIATION",
|
||||
"__gg__standard_deviation", "n", {}, FldFloat },
|
||||
{ SUBSTITUTE, "SUBSTITUTE",
|
||||
"__gg__substitute", "XXX", {}, FldAlphanumeric },
|
||||
"__gg__substitute", "XXX", {}, AnumOrNat },
|
||||
{ SUM, "SUM",
|
||||
"__gg__sum", "n", {}, FldInvalid },
|
||||
"__gg__sum", "n", {}, IntOrNum },
|
||||
{ TAN, "TAN",
|
||||
"__gg__tan", "N", {}, FldFloat },
|
||||
{ TEST_DATE_YYYYMMDD, "TEST-DATE-YYYYMMDD",
|
||||
@@ -241,8 +248,8 @@ static const function_descr_t function_descrs[] = {
|
||||
"__gg__test_numval_c", "XXU", {}, FldNumericBin5 },
|
||||
{ TEST_NUMVAL_F, "TEST-NUMVAL-F",
|
||||
"__gg__test_numval_f", "X", {}, FldNumericBin5 },
|
||||
{ TRIM, "TRIM",
|
||||
"__gg__trim", "XI", {}, FldNumericBin5 },
|
||||
{ TRIM, "TRIM", // See parse.y
|
||||
"__gg__trim", "XI", {}, FldAlphanumeric },
|
||||
{ ULENGTH, "ULENGTH",
|
||||
"__gg__ulength", "X", {}, FldAlphanumeric },
|
||||
{ UPOS, "UPOS",
|
||||
@@ -342,7 +349,8 @@ intrinsic_return_field(int token, std::vector<cbl_refer_t> args)
|
||||
retval = new_tempnumeric_float();
|
||||
break;
|
||||
case FldInvalid:
|
||||
// This is a flag that a function takes the type of its first input
|
||||
// This is a flag that a function takes the Numeric vs Int type of its
|
||||
// first argument
|
||||
assert( args.size() );
|
||||
switch(args[0].field->type)
|
||||
{
|
||||
@@ -350,7 +358,7 @@ intrinsic_return_field(int token, std::vector<cbl_refer_t> args)
|
||||
case FldAlphanumeric:
|
||||
case FldAlphaEdited:
|
||||
case FldLiteralA:
|
||||
retval = new_alphanumeric();
|
||||
retval = new_alphanumeric(NULL, args[0].field->codeset.encoding);
|
||||
break;
|
||||
case FldNumericBinary:
|
||||
case FldPacked:
|
||||
@@ -370,6 +378,36 @@ intrinsic_return_field(int token, std::vector<cbl_refer_t> args)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case FldGroup:
|
||||
// This is a flag that an alphanumeric function takes the encoding of the
|
||||
// first argument
|
||||
assert( args.size() );
|
||||
switch(args[0].field->type)
|
||||
{
|
||||
case FldGroup:
|
||||
case FldAlphanumeric:
|
||||
case FldAlphaEdited:
|
||||
case FldLiteralA:
|
||||
case FldNumericBinary:
|
||||
case FldPacked:
|
||||
case FldNumericDisplay:
|
||||
case FldNumericBin5:
|
||||
case FldLiteralN:
|
||||
case FldIndex:
|
||||
case FldPointer:
|
||||
retval = new_alphanumeric(NULL, args[0].field->codeset.encoding);
|
||||
break;
|
||||
case FldFloat:
|
||||
retval = new_tempnumeric_float();
|
||||
break;
|
||||
default:
|
||||
retval = NULL;
|
||||
gcc_unreachable();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
retval = NULL;
|
||||
gcc_unreachable();
|
||||
|
||||
@@ -3783,14 +3783,22 @@ symbol_temporaries_free() {
|
||||
cbl_field_t *
|
||||
new_alphanumeric( const cbl_name_t name, cbl_encoding_t encoding ) {
|
||||
cbl_field_t * field = new_temporary_impl(FldAlphanumeric, name);
|
||||
//// if( encoding != no_encoding_e ) {
|
||||
//// field->codeset.set(encoding);
|
||||
//// }
|
||||
//// //// Dubner hacking away: If name is non-null, then assume this is a
|
||||
//// //// function definition, and force the codeset, which otherwise will have
|
||||
//// //// defaulted to current_encoding('A'), and the valid() test in codeset.set
|
||||
//// //// will have prevented it from being changed.
|
||||
//// if( name && encoding != no_encoding_e ) {
|
||||
//// field->codeset.set_explicit(encoding);
|
||||
//// }
|
||||
/* Jim's original code was hedged with protections apparently intended to
|
||||
prevent encodings from changing. This proved unsatisfactor, especially
|
||||
when I started implementing setting the temporary return type of functions
|
||||
that take on the characteristics of their first parameter. So, I went
|
||||
from codeset.set_encoding() to codeset.set_explicit(). */
|
||||
if( encoding != no_encoding_e ) {
|
||||
field->codeset.set(encoding);
|
||||
}
|
||||
//// Dubner hacking away: If name is non-null, then assume this is a
|
||||
//// function definition, and force the codeset, which otherwise will have
|
||||
//// defaulted to current_encoding('A'), and the valid() test in codeset.set
|
||||
//// will have prevented it from being changed.
|
||||
if( name && encoding != no_encoding_e ) {
|
||||
field->codeset.set_explicit(encoding);
|
||||
}
|
||||
temporaries.add(field);
|
||||
|
||||
@@ -3527,9 +3527,11 @@ __gg__trim( cblc_field_t *dest,
|
||||
size_t arg2_offset,
|
||||
size_t arg2_size)
|
||||
{
|
||||
cbl_encoding_t from = arg1->encoding;
|
||||
cbl_encoding_t to = dest->encoding;
|
||||
charmap_t *charmap = __gg__get_charmap(to);
|
||||
// We assume that dest is an intermediate_e with the same encoding as arg1.
|
||||
assert( dest->type == FldAlphanumeric
|
||||
&& (dest->attr & intermediate_e)
|
||||
&& dest->encoding == arg1->encoding );
|
||||
charmap_t *charmap = __gg__get_charmap(arg1->encoding);
|
||||
int stride = charmap->stride();
|
||||
cbl_char_t mapped_space = charmap->mapped_character(ascii_space);
|
||||
|
||||
@@ -3539,80 +3541,61 @@ __gg__trim( cblc_field_t *dest,
|
||||
arg2_offset,
|
||||
arg2_size);
|
||||
//static const int BOTH = 0;
|
||||
static const int LEADING = 1; // Remove leading spaces
|
||||
static const int TRAILING = 2; // Remove trailing spaces
|
||||
#define LEADING 1 // Remove leading spaces
|
||||
#define TRAILING 2 // Remove trailing spaces
|
||||
|
||||
if( dest->type != FldAlphanumeric ||
|
||||
!(dest->attr & intermediate_e) )
|
||||
char *left = reinterpret_cast<char *>(arg1->data) + arg1_offset;
|
||||
char *right = left + arg1_size-stride; // Points AT the character, not beyond
|
||||
switch(type)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"We expect the target of a FUNCTION TRIM to "
|
||||
"be an intermediate alphanumeric\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
// What is this all about?
|
||||
dest->capacity = dest->offset;
|
||||
|
||||
// Make a copy of the input:
|
||||
char *copy = static_cast<char *>(malloc(arg1_size));
|
||||
massert(copy);
|
||||
memcpy(copy, arg1->data+arg1_offset, arg1_size);
|
||||
|
||||
// Convert it to the destination encoding
|
||||
__gg__convert_encoding_length(copy, arg1_size, from, to);
|
||||
|
||||
// No matter what, we want to find the leftmost non-space and the
|
||||
// rightmost non-space:
|
||||
|
||||
char *left = copy;
|
||||
char *right = left + arg1_size-stride;
|
||||
|
||||
// Find left and right: the first and last non-spaces
|
||||
while( left <= right )
|
||||
{
|
||||
cbl_char_t cleft = charmap->getch(left, (size_t)0);
|
||||
cbl_char_t cright = charmap->getch(right, (size_t)0);
|
||||
|
||||
if( cleft != mapped_space && cright != mapped_space )
|
||||
case 0: // Strip off leading and trailing spaces
|
||||
while(left <= right)
|
||||
{
|
||||
if( charmap->getch(left, (size_t)0) != mapped_space )
|
||||
{
|
||||
break;
|
||||
}
|
||||
left += stride;
|
||||
}
|
||||
while(left <= right)
|
||||
{
|
||||
if( charmap->getch(right, (size_t)0) != mapped_space )
|
||||
{
|
||||
break;
|
||||
}
|
||||
right -= stride;
|
||||
}
|
||||
break;
|
||||
|
||||
case LEADING: // Just leading
|
||||
{
|
||||
while(left <= right)
|
||||
{
|
||||
if( charmap->getch(left, (size_t)0) != mapped_space )
|
||||
{
|
||||
break;
|
||||
}
|
||||
left += stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if( cleft == mapped_space )
|
||||
|
||||
case TRAILING: // Just trailing
|
||||
{
|
||||
left += stride;
|
||||
}
|
||||
if( cright == mapped_space )
|
||||
{
|
||||
right -= stride;
|
||||
while(left <= right)
|
||||
{
|
||||
if( charmap->getch(right, (size_t)0) != mapped_space )
|
||||
{
|
||||
break;
|
||||
}
|
||||
right -= stride;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( type == LEADING )
|
||||
{
|
||||
// We want to leave any trailing spaces, so we return 'right' to its
|
||||
// original value:
|
||||
right = copy + arg1_size-1;
|
||||
}
|
||||
else if( type == TRAILING )
|
||||
{
|
||||
// We want to leave any leading spaces, so we return 'left' to its
|
||||
// original value:
|
||||
left = copy;
|
||||
}
|
||||
|
||||
if( left > right )
|
||||
{
|
||||
// When the arg1 input string was empty, we want left to be right+1.
|
||||
// The left/right loop can sometimes end up with left equal to right+2.
|
||||
// That needs to be fixed:
|
||||
left = right+stride;
|
||||
}
|
||||
|
||||
size_t ncount = right+stride - left;
|
||||
__gg__adjust_dest_size(dest, ncount);
|
||||
|
||||
memmove(dest->data, left, ncount);
|
||||
free(copy);
|
||||
}
|
||||
|
||||
#if HAVE_INITSTATE_R && HAVE_SRANDOM_R && HAVE_RANDOM_R
|
||||
|
||||
Reference in New Issue
Block a user