cobol: Improved efficiency of code generated for MOVE "A" TO VAR(1:1). [119456]

This PR rightly noted that COBOL source code which obviously could
result in simple machine language did not.  These changes take advantage
of the compiler knowing, at compile time, the values of literal offsets
and lengths, and uses that knowledge to generate much more efficient
GENERIC for those cases.

gcc/cobol/ChangeLog:

	PR cobol/119456

	* genapi.cc (mh_source_is_literalA): Don't set refmod_e attribute
	unless it is necessary.
	(have_common_parent): Helper routine that determines whether two
	COBOL variables are members of the same data description.
	(mh_alpha_to_alpha): Modified for greater efficiency when table
	subscripts and reference modification parameters are numeric
	literals.
	* genutil.cc (get_data_offset): Recognizes when table subscripts
	and refmod offsets are numeric literals.
	(refer_size): Recognizes when refmod offsets are numeric literals.
	(refer_size_source): Recognizes when table subscripts are numeric
	literals.
This commit is contained in:
Robert Dubner
2026-03-02 15:36:40 -05:00
parent 29094a3840
commit 435346eafa
2 changed files with 316 additions and 61 deletions

View File

@@ -15859,13 +15859,6 @@ mh_source_is_literalA(const cbl_refer_t &destref,
cbl_encoding_t encoding_dest = destref.field->codeset.encoding;
charmap_t *charmap_dest = __gg__get_charmap(encoding_dest);
if( destref.refmod.from
|| destref.refmod.len )
{
// Let the move routine know to treat the destination as alphanumeric
gg_attribute_bit_set(destref.field, refmod_e);
}
static char *buffer = NULL;
static size_t buffer_size = 0;
size_t source_length;
@@ -16001,6 +15994,7 @@ mh_source_is_literalA(const cbl_refer_t &destref,
}
else
{
// The refer has some information in it.
gg_memcpy(gg_add(member(destref.field->var_decl_node, "data"),
refer_offset(destref)),
build_string_literal(dest_bytes, src),
@@ -16011,7 +16005,12 @@ mh_source_is_literalA(const cbl_refer_t &destref,
else
{
// This is more complicated than a simple alpha-to-alpha move
if( destref.refmod.from
|| destref.refmod.len )
{
// Let the move routine know to treat the destination as alphanumeric
gg_attribute_bit_set(destref.field, refmod_e);
}
// If the source is flagged ALL, or if we are setting the destination to
// a figurative constant, pass along the ALL bit:
int rounded_parameter = rounded
@@ -16044,19 +16043,82 @@ mh_source_is_literalA(const cbl_refer_t &destref,
build_int_cst_type( SIZE_T, outlength),
NULL_TREE);
}
if( destref.refmod.from
|| destref.refmod.len )
{
// Return that value to its original form
gg_attribute_bit_clear(destref.field, refmod_e);
}
}
if( destref.refmod.from
|| destref.refmod.len )
{
// Return that value to its original form
gg_attribute_bit_clear(destref.field, refmod_e);
}
moved = true;
}
return moved;
}
static bool
have_common_parent(const cbl_refer_t &destref,
const cbl_refer_t &sourceref)
{
/* We are trying to lay down fast code when possible. But sometimes we have
to go slower in order to be accurate. The COBOL specification explicitly
says that when the storage areas of sending and receiving operands
overlap:
1) When the data items are not described by the same data description
entry, the result of the statement is undefined.
2) When the data items are described by the same data description entry,
the result of the statement is the same as if the data items shared
no part of their respective storage areas.
There is an additional paragraph:
In the case of reference modification, the unique data item produced by
reference modification is not considered to be the same data description
entry as any other data description entry. Therefore, if an overlapping
situation exists, the results of the operation are undefined.
This routine will return TRUE when neither reference is a refmod, and
both operands ultimately have the same parent (indicating that they are
part of the same data description.
The point is that when we return True, then the two are not refmods, and
they have a common parent, so we have to use a memmove. When we return
False, then we can use a faster memcpy.
*/
bool retval = true;
if( destref.is_refmod_reference() )
{
retval = false;
}
else if( sourceref.is_refmod_reference() )
{
retval = false;
}
else
{
// Neither is a refmod. Check for common parentage:
const cbl_field_t *poppa = destref.field;
const cbl_field_t *momma = sourceref.field;
while( parent_of(poppa) )
{
// Follow the first family_tree up as far as we can.
poppa = parent_of(poppa);
}
while( parent_of(momma) )
{
// Follow the second family_tree up as far as we can.
momma = parent_of(momma);
}
if( poppa != momma )
{
/* Okay, so the analogy breaks down. Think of momma and poppa as
bacteria, or something. */
retval = false;
}
}
return retval;
}
static bool
mh_alpha_to_alpha(const cbl_refer_t &destref,
const cbl_refer_t &sourceref,
@@ -16070,8 +16132,6 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
&& destref.field->type == FldAlphanumeric
&& !size_error
&& sourceref.field->codeset.encoding == destref.field->codeset.encoding
&& !destref.refmod.from
&& !destref.refmod.len
&& !(destref.field->attr & rjust_e)
&& !(sourceref.field->attr & any_length_e)
&& !(destref.field->attr & any_length_e)
@@ -16079,6 +16139,9 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
&& !sourceref.all
)
{
void (*mover)(tree, tree, tree); // dest, source, count
mover = have_common_parent(destref, sourceref) ? gg_memmove : gg_memcpy;
// We are in a position to simply move bytes from the source to the dest.
if( refer_is_clean(sourceref) && refer_is_clean(destref) )
{
@@ -16086,7 +16149,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
if( destref.field->data.capacity() <= sourceref.field->data.capacity() )
{
// This is the simplest case of all
gg_memcpy(member( destref.field->var_decl_node, "data"),
mover(member( destref.field->var_decl_node, "data"),
member(sourceref.field->var_decl_node, "data"),
build_int_cst_type(SIZE_T, destref.field->data.capacity()));
moved = true;
@@ -16095,7 +16158,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
{
// This is a tad more complicated. The source is too short, so we need
// to copy over what we can...
gg_memcpy(member( destref.field->var_decl_node, "data"),
mover(member( destref.field->var_decl_node, "data"),
member(sourceref.field->var_decl_node, "data"),
build_int_cst_type(SIZE_T, sourceref.field->data.capacity()));
// And then space-fill the rest:
@@ -16109,7 +16172,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
charmap->mapped_character(ascii_space),
fill_bytes);
// ...and then copy those spaces into place.
gg_memcpy(
mover(
gg_add(member(destref.field->var_decl_node, "data"),
build_int_cst_type(SIZE_T, sourceref.field->data.capacity())),
build_string_literal(fill_bytes, spaces),
@@ -16118,10 +16181,96 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
moved = true;
}
}
else
if( !refer_is_clean(sourceref) && refer_is_clean(destref) )
{
// Either the source or the dest is a table or refmod, so we need to do
// more work.
// The source is dirty, but the destination is clean:
tree source_data;
tree source_len;
tree dest_data;
tree dest_len;
source_data = gg_add(member(sourceref.field->var_decl_node, "data"),
refer_offset(sourceref));
source_len = refer_size_source(sourceref);
dest_data = member(destref.field->var_decl_node, "data");
dest_len = build_int_cst_type(SIZE_T, destref.field->data.capacity());
IF( source_len, ge_op, dest_len )
{
// The source has enough (or more) bytes to fill the destination:
mover(dest_data, source_data, dest_len);
}
ELSE
{
// The source data is too short. We need to copy over what we have...
mover(dest_data, source_data, source_len);
// And then right-fill the remainder with spaces. Create a buffer with
// more than enough spaces for our purposes:
size_t fill_bytes = destref.field->data.capacity();
char *spaces = static_cast<char *>(xmalloc(fill_bytes));
charmap_t *charmap =__gg__get_charmap(destref.field->codeset.encoding);
charmap->memset(spaces,
charmap->mapped_character(ascii_space),
fill_bytes);
// And then copy enough of those spaces into place.
mover(gg_add(dest_data, source_len),
build_string_literal(fill_bytes, spaces),
gg_subtract(dest_len, source_len));
free(spaces);
}
ENDIF
moved = true;
}
if( refer_is_clean(sourceref) && !refer_is_clean(destref) )
{
// The source is clean but the destination is dirty:
tree source_data;
tree source_len;
tree dest_data;
tree dest_len ;
source_data = member(sourceref.field->var_decl_node, "data");
source_len = build_int_cst_type(SIZE_T,
sourceref.field->data.capacity());
dest_data = gg_add(member(destref.field->var_decl_node, "data"),
refer_offset(destref));
dest_len = refer_size_dest(destref);
IF( source_len, ge_op, dest_len )
{
// The source has enough (or more) bytes to fill the destination:
mover(dest_data, source_data, dest_len);
}
ELSE
{
// The source data is too short. We need to copy over what we have...
mover(dest_data, source_data, source_len);
// And then right-fill the remainder with spaces. Create a buffer with
// more than enough spaces for our purposes:
size_t fill_bytes = destref.field->data.capacity();
char *spaces = static_cast<char *>(xmalloc(fill_bytes));
charmap_t *charmap =__gg__get_charmap(destref.field->codeset.encoding);
charmap->memset(spaces,
charmap->mapped_character(ascii_space),
fill_bytes);
// And then copy enough of those spaces into place.
mover(gg_add(dest_data, source_len),
build_string_literal(fill_bytes, spaces),
gg_subtract(dest_len, source_len));
free(spaces);
}
ENDIF
moved = true;
}
if( !refer_is_clean(sourceref) && !refer_is_clean(destref) )
{
// Both the source and the dest are "dirty"
tree source_data = gg_define_variable(UCHAR_P);
tree source_len = gg_define_variable(SIZE_T);
@@ -16140,12 +16289,12 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
IF( source_len, ge_op, dest_len )
{
// The source has enough (or more) bytes to fill the destination:
gg_memcpy(dest_data, source_data, dest_len);
mover(dest_data, source_data, dest_len);
}
ELSE
{
// The source data is too short. We need to copy over what we have...
gg_memcpy(dest_data, source_data, source_len);
mover(dest_data, source_data, source_len);
// And then right-fill the remainder with spaces. Create a buffer with
// more than enough spaces for our purposes:
@@ -16156,7 +16305,7 @@ mh_alpha_to_alpha(const cbl_refer_t &destref,
charmap->mapped_character(ascii_space),
fill_bytes);
// And then copy enough of those spaces into place.
gg_memcpy(gg_add(dest_data, source_len),
mover(gg_add(dest_data, source_len),
build_string_literal(fill_bytes, spaces),
gg_subtract(dest_len, source_len));
free(spaces);

View File

@@ -570,10 +570,99 @@ tree
get_data_offset(const cbl_refer_t &refer,
int *pflags = NULL)
{
Analyze();
// This routine returns a tree which is the size_t offset to the data in the
// refer/field
/* Let's first attempt to handle commonly-occurring situations that can
be handled efficiently. */
const cbl_enabled_exceptions_t &enabled_exceptions(cdf_enabled_exceptions());
if( !enabled_exceptions.match(ec_bound_subscript_e)
&& !enabled_exceptions.match(ec_bound_odo_e)
&& !enabled_exceptions.match(ec_bound_ref_mod_e) )
{
// There is no subscript bounds checking
bool all_literals = true;
for( size_t i=0; i<refer.nsubscript(); i++ )
{
if( (refer.subscripts[i].field->attr & FIGCONST_MASK) == zero_value_e )
{
// This refer is a figconst ZERO; we treat it as an ALL ZERO
// This is our internal representation for ALL, as in TABLE(ALL)
all_literals = false;
break;
}
if( !is_literal(refer.subscripts[i].field) )
{
// A subscript is not a literal. Too bad.
all_literals = false;
break;
}
}
if( refer.refmod.from && !is_literal(refer.refmod.from->field) )
{
all_literals = false;
}
if( all_literals )
{
// We are dealing with foo(x)(y:z) where x and y are integer constants.
size_t offset = 0;
if( refer.nsubscript() )
{
// We have at least one subscript:
// Figure we have three subscripts, so nsubscript is 3
// Figure that the subscripts are {5, 4, 3}
// We expect that starting from refer.field, that three of our ancestors --
// call them A1, A2, and A3 -- have occurs clauses.
// We need to start with the rightmost subscript, and work our way up through
// our parents. As we find each parent with an OCCURS, we increment qual_data
// by (subscript-1)*An->data.capacity()
// Establish the field_t pointer for walking up through our ancestors:
cbl_field_t *parent = refer.field;
// Note the backwards test, because refer->nsubscript is an unsigned value
for(size_t i=refer.nsubscript()-1; i<refer.nsubscript(); i-- )
{
// We need to search upward for an ancestor with occurs_max:
while(parent)
{
if( parent->occurs.ntimes() )
{
break;
}
parent = parent_of(parent);
}
// we might have an error condition at this point:
if( !parent )
{
cbl_internal_error("Too many subscripts");
}
// Pick up the integer value of the subscript.
long subscript = atol(refer.subscripts[i].field->data.original());
// Subscript is one-based integer
// Make it zero-based:
subscript = subscript - 1;
offset += subscript * parent->data.capacity();
parent = parent_of(parent);
}
}
if( refer.refmod.from )
{
// We know the refmod is a literal
offset += (atol(refer.refmod.from->field->data.original()) - 1)
* refer.field->codeset.stride();
return build_int_cst_type(SIZE_T, offset);
}
}
}
// Because this is for source / sending variables, checks are made for
// OCCURS DEPENDING ON violations (when those exceptions are enabled)
@@ -636,8 +725,6 @@ get_data_offset(const cbl_refer_t &refer,
}
else
{
const cbl_enabled_exceptions_t&
enabled_exceptions( cdf_enabled_exceptions() );
if( !enabled_exceptions.match(ec_bound_subscript_e) )
{
// With no exception testing, just pick up the value
@@ -698,9 +785,6 @@ get_data_offset(const cbl_refer_t &refer,
// Although we strictly don't need to look at the ODO value at this
// point, we do want it checked for the purposes of ec-bound-odo
const cbl_enabled_exceptions_t&
enabled_exceptions( cdf_enabled_exceptions() );
if( enabled_exceptions.match(ec_bound_odo_e) )
{
if( parent->occurs.depending_on )
@@ -1933,41 +2017,50 @@ tree // size_t
refer_size(const cbl_refer_t &refer, refer_type_t refer_type)
{
Analyze();
static tree retval = gg_define_variable(SIZE_T, "..rs_retval", vs_file_static);
if( !refer.field )
if( refer.refmod.len && refer.refmod.len->field->type == FldLiteralN )
{
return size_t_zero_node;
}
if( refer_is_clean(refer) )
{
return get_any_capacity(refer.field);
}
// Step the first: Get the actual full length:
if( refer_has_depends(refer, refer_type) )
{
// Because there is a depends, we might have to change the length:
gg_assign(retval, refer_fill_depends(refer));
return build_int_cst_type(SIZE_T,
atol( refer.refmod.len->field->data.original())
* refer.field->codeset.stride());
}
else
{
gg_assign(retval, get_any_capacity(refer.field));
}
static tree retval = gg_define_variable(SIZE_T, "..rs_retval", vs_file_static);
if( refer.refmod.from || refer.refmod.len )
{
tree refmod = refer_refmod_length(refer);
// retval is the ODO based total length.
// refmod is the length resulting from refmod(from:len)
// We have to reduce retval by the effect of refmod:
tree diff = gg_subtract(get_any_capacity(refer.field),
refmod);
gg_assign(retval, gg_subtract(retval, diff));
if( !refer.field )
{
return size_t_zero_node;
}
if( refer_is_clean(refer) )
{
return get_any_capacity(refer.field);
}
// Step the first: Get the actual full length:
if( refer_has_depends(refer, refer_type) )
{
// Because there is a depends, we might have to change the length:
gg_assign(retval, refer_fill_depends(refer));
}
else
{
gg_assign(retval, get_any_capacity(refer.field));
}
if( refer.refmod.from || refer.refmod.len )
{
tree refmod = refer_refmod_length(refer);
// retval is the ODO based total length.
// refmod is the length resulting from refmod(from:len)
// We have to reduce retval by the effect of refmod:
tree diff = gg_subtract(get_any_capacity(refer.field),
refmod);
gg_assign(retval, gg_subtract(retval, diff));
}
return retval;
}
return retval;
}
tree // size_t
@@ -1989,7 +2082,6 @@ refer_size_source(const cbl_refer_t &refer)
other. But there conceivably might be others,.
You have been warned.
*/
if( !refer.field )
@@ -2004,6 +2096,20 @@ refer_size_source(const cbl_refer_t &refer)
return get_any_capacity(refer.field);
}
// We are dealing with a refer
const cbl_enabled_exceptions_t&
enabled_exceptions( cdf_enabled_exceptions() );
if( !enabled_exceptions.match(ec_bound_ref_mod_e) )
{
// ref_mod bounds checking is off
if( refer.refmod.len && refer.refmod.len->field->type == FldLiteralN )
{
// And the refmod.len is a literal.
return build_int_cst_type(SIZE_T,
atol( refer.refmod.len->field->data.original())
* refer.field->codeset.stride()); }
}
// This assignment has to be here. Simply returning refer_size() results
// in regression testing errors.
static tree retval = gg_define_variable(SIZE_T, "..rss_retval", vs_file_static);