Files
gcc/libgcobol/xmlparse.cc
Robert Dubner 4cd4ec9bd3 cobol: Increase PIC X(MAX) from 8192 to 2^31.
Up until these changes, temporary intermediate alphanumeric string
variables have been allocated on the stack.  With the design change to
a larger limit, that's no longer practical.  Such variables are now
placed on the heap, and we now have to take pains to free() that memory
when we are done with it.

gcc/cobol/ChangeLog:

	* gcobc: Adjust how -fPIC is applied, and other refinements.
	* gcobol.1: Documentation.
	* genapi.cc (parser_statement_end): New function.  Deallocates
	temp char strings from the heap.
	(initialize_variable_internal): Ignore temp char strings.
	(compare_binary_binary): Formatting.
	(parser_end_program): Formatting.
	(parser_init_list): Formatting.
	(parser_exit_program): Formatting.
	(program_end_stuff): Formatting.
	(parser_exit): Formatting.
	(parser_perform_conditional): Formatting.
	(perform_outofline_before_until): Formatting.
	(parser_file_add): Formatting.
	(mh_source_is_literalA): Formatting.
	(psa_new_var_decl): Make cblc_field_t for intermediate_e
	alphanumerics program-static.
	(parser_symbol_add): Eliminate unnecessary code when
	type!=FldConditional; change handling of intermediate_e for
	FldAlphanumerics.
	* genapi.h (parser_end_program): New declaration.
	(parser_exit): Formatting.
	(parser_exit_program): Formatting.
	(parser_statement_end): New declaration.
	* lexio.cc (parse_replace_pairs): Change CDF handling.
	(cdftext::lex_open): Likewise.
	(cdftext::process_file): Likewise.
	* parse.y: Changes to MAXIMUM_ALPHA_LENGTH; refine return value
	types for various intrinsic functions; some CDF handling.
	* parse_ante.h (MAXLENGTH_FORMATTED_DATE): Eliminate constant.
	(MAXLENGTH_FORMATTED_TIME): Likewise.
	(MAXLENGTH_CALENDAR_DATE): Likewise.
	(MAXLENGTH_FORMATTED_DATETIME): Likewise.
	(new_alphanumeric): No longer takes a capacity.
	(intrinsic_return_field): New declaration.
	(struct ffi_args_t): Changed debug message.
	(is_among): New declaration.
	* parse_util.h (intrinsic_return_field): New function. Works with
	the modified function_descrs[] table.
	* scan.l: Modified scanning.
	* scan_ante.h (class input_file_status_t): Likewise.
	(verify_ws):  Likewise.
	(is_refmod): Likewise.
	* symbols.cc (symbols_update): Improved comment about a debug
	message.
	(symbol_temporaries): New function for temporaries on the heap that
	will have to be deallocated.
	(symbol_temporary_alphanumerics): Likewise.
	(new_temporary_impl): Eliminate MAXIMUM_ALPHA_LENGTH from template.
	(new_alphanumeric): Eliminate capacity as a parameter.
	* symbols.h (cbl_dialect_str): Formatting.
	(MAXIMUM_ALPHA_LENGTH): Change comment and value.
	(IBM_MAXIMUM_ALPHA_LENGTH): Put parentheses around "size_t(1)<<31".
	(symbol_temporaries): New declaration.
	(symbol_temporary_alphanumerics): New declaration.
	(struct function_descr_t): New comment on ret_type.
	(new_alphanumeric): New declaration.
	* util.cc (class cdf_directives_t): CDF processing.
	(cobol_set_indicator_column): Likewise.
	(cdf_push_source_format): Likewise.
	(cdf_pop_source_format): Likewise.
	(parent_names): Likewise.
	(cobol_filename): Likewise.
	(cobol_lineno): Likewise.
	(cobol_filename_restore): Likewise.

libgcobol/ChangeLog:

	* intrinsic.cc (string_to_dest): Move call to
	__gg__adjust_dest_size().
	(__gg__char): Likewise.
	(__gg__current_date): Likewise.
	(__gg__formatted_current_date): Likewise.
	(__gg__formatted_date): Likewise.
	(__gg__formatted_datetime): Likewise.
	(__gg__formatted_time): Likewise.
	(change_case): Likewise.
	(__gg__trim): Likewise; fix memory leak.
	(__gg__reverse): Move call to __gg__adjust_dest_size().
	(__gg__locale_compare): Likewise
	(__gg__locale_date): Likewise
	(__gg__locale_time): Likewise
	(__gg__locale_time_from_seconds): Likewise
	* libgcobol.cc (format_for_display_internal): Make the results of
	intermediate FldNumericBin5 look nice to a human.
	(init_var_both): Move call to __gg__adjust_dest_size().
	(__gg__get_argc): Move call to __gg__adjust_dest_size().
	(__gg__get_argv): Move call to __gg__adjust_dest_size().
	(__gg__get_command_line): Move call to __gg__adjust_dest_size().
	(__gg__adjust_dest_size): Properly handle intermediate_e
	allocations.
	(__gg__adjust_encoding): Move call to __gg__adjust_dest_size().
	(__gg__module_name): Move call to __gg__adjust_dest_size().
	(__gg__refer_from_string): Move call to __gg__adjust_dest_size().
	(__gg__refer_from_psz): Move call to __gg__adjust_dest_size().
	(__gg__convert): Move call to __gg__adjust_dest_size().
	* posix/shim/lseek.cc: Changes to extended functions.
	* posix/shim/open.cc (posix_opent): Likewise.
	(posix_open): Likewise.
	* posix/udf/posix-open.cbl: Likewise.
	* posix/udf/posix-read.cbl: Likewise.
	* posix/udf/posix-write.cbl: Likewise.
	* xmlparse.cc (sayso): Change to debug message.
	* posix/udf/posix-ftruncate.cbl: New file.
2026-02-24 10:48:38 -05:00

787 lines
28 KiB
C++

/*
* Copyright (c) 2021-2026 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fcntl.h>
#include <unistd.h>
#include <cctype>
#include <cerrno>
#include <cmath>
#include <cfenv>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <algorithm>
#include <vector>
#include <libxml/SAX2.h>
#include <libxml/parser.h>
#include <syslog.h>
#include "config.h"
#include "libgcobol-fp.h"
#include "ec.h"
#include "common-defs.h"
#include "io.h"
#include "gcobolio.h"
#include "libgcobol.h"
#define COUNT_OF(X) (sizeof(X) / sizeof(X[0]))
void sayso( const char func[], int line,
int len = 0 , const unsigned char data[] = { 0} ) {
if( getenv("XMLPARSE") ) {
switch(len) {
case 0:
fprintf(stderr, "%s:%d was here\n", func, line);
break;
case -1:
fprintf(stderr, "%s:%d: '%s'\n", func, line, data);
break;
default:
fprintf(stderr, "%s:%d: '%.*s'\n", func, line, len, data);
break;
}
}
}
#define SAYSO() sayso(__func__, __LINE__)
#define SAYSO_DATAZ(S) sayso(__func__, __LINE__, -1, S)
#define SAYSO_DATA(N, S) sayso(__func__, __LINE__, N, S)
#define CTX ctx __attribute__ ((unused))
struct xml_ec_value_t {
int ibm_code;
const char msg[80];
} xml_ec_values[] = {
// Table 73. XML PARSE exceptions that allow continuation
{ 1, "invalid character between elements" },
{ 2, "invalid start before element content" },
{ 3, "duplicate attribute" },
{ 4, "markup character '<' in an attribute value" },
{ 5, "start/end tag mismatch" },
{ 6, "invalid character in element" },
{ 7, "invalid start in element content. " },
{ 8, "CDATA closing character sequence ']]>' not opened" },
{ 10, "comment the character sequence '--' without '>'" },
{ 11, "invalid character in a processing instruction" },
{ 12, "XML declaration was not start of document" },
{ 13, "invalid digit in a hexadecimal character reference" },
{ 14, "invalid digit in a decimal character reference" },
{ 15, "encoding declaration value name must start with [a-zA-Z] character" },
{ 16, "character reference did not refer to a legal XML character" },
{ 17, "invalid character in an entity reference name" },
{ 70, "EBCDIC document, supported EBCDIC page, unsupported declaration" },
{ 71, "EBCDIC document, unsupported EBCDIC page " },
{ 72, "EBCDIC document, unsupported EBCDIC page, unsupported declaration" },
{ 73, "EBCDIC document, unsupported EBCDIC page and declaration " },
{ 80, "ASCII document, supported ASCII page, unsupported declaration" },
{ 81, "ASCII document, unsupported ASCII page " },
{ 82, "ASCII document, unsupported ASCII page, unsupported declaration" },
{ 83, "ASCII document, unsupported ASCII page and declaration " },
{ 84, "ASCII document, invalid UTF-8, external UTF-8, no declaration. " },
{ 85, "ASCII document, invalid UTF-8, external UTF-8, invalid declaration" },
{ 86, "ASCII document, invalid UTF-8, external ASCII" },
{ 87, "ASCII document, invalid UTF-8, external and document UTF-8" },
{ 88, "ASCII document, invalid UTF-8, unsupported ASCII/UTF-8, UTF-8 declaration" },
{ 89, "ASCII document, invalid UTF-8, external UTF-8, ASCII declaration" },
{ 92, "alphanumeric document expected, document is UTF-16. " },
// XML PARSE exceptions that allow continuation (continued)
//// 100,001 - 165,535 EBCDIC document encoding does not match code page
//// 200,001 - 265,535 ASCII document encoding does not match code page
// XML PARSE exceptions that do not allow continuation
{ 100, "end of document before start of XML declaration" },
{ 101, "end of document before end of XML declaration" },
{ 102, "end of document before root element" },
{ 103, "end of document before version information in XML declaration" },
{ 104, "end of document before version information value in XML declaration" },
{ 106, "end of document before encoding declaration value in XML declaration" },
{ 108, "end of document before standalone declaration value in XML declaration" },
{ 109, "end of document before attribute name" },
{ 110, "end of document before attribute value" },
{ 111, "end of document before character/entity reference in attribute value" },
{ 112, "end of document before empty element tag" },
{ 113, "end of document before root element name" },
{ 114, "end of document before element name" },
{ 115, "end of document before character data in element content" },
{ 116, "end of document before processing instruction in element content" },
{ 117, "end of document before comment or CDATA section in element content" },
{ 118, "end of document before comment in element content" },
{ 119, "end of document before CDATA section in element content" },
{ 120, "end of document before character/entity reference in element content" },
{ 121, "end of document before after close of root element" },
{ 122, "possible invalid start of a document type" },
{ 123, "duplicate document type" },
{ 124, "root element name must start with [A-Za-z_:]" },
{ 125, "first attribute name must start with [A-Za-z_:]" },
{ 126, "invalid character in or after element name" },
{ 127, "attribute name not followed by '=' " },
{ 128, "invalid attribute value delimiter" },
{ 130, "attribute name must start with [A-Za-z_:]" },
{ 131, "invalid character in or after attribute name" },
{ 132, "empty element tag not terminated with '/>'" },
{ 133, "element end tag name name must start with [A-Za-z_:]" },
{ 134, "element end tag not terminated with '>'" },
{ 135, "element name must start with [A-Za-z_:]" },
{ 136, "invalid start of comment/CDATA in element" },
{ 137, "invalid start of comment" },
{ 138, "processing instruction target name must start with [A-Za-z_:]" },
{ 139, "invalid character in/afterprocessing instruction target name" },
{ 140, "processing instruction not terminated with '?>'" },
{ 141, "invalid character following '&' in a character/entity reference" },
{ 142, "missing version information in XML declaration" },
{ 143, "missing '=' after 'version' in XML declaration " },
{ 144, "missing XML version declaration " },
{ 145, "invalid character in XML version information" },
{ 146, "invalid character following XML version information value " },
{ 147, "invalid attribute in XML declaration" },
{ 148, "missing '=' after 'encoding' in XML declaration" },
{ 149, "missing XML encoding declaration value" },
{ 150, "invalid XML encoding declaration value" },
{ 151, "invalid character afer XML declaration" },
{ 152, "invalid attribute XML declaration" },
{ 153, "missing '=' after standalone XML declaration" },
{ 154, "missing standalone XML declaration value" },
{ 155, "standalone declaration must be 'yes' or 'no'" },
{ 156, "invalid standalone XML declaration value" },
{ 157, "invalid character following XML standalone declaration value" },
{ 158, "unterminated XML declaration " },
{ 159, "start of document type declaration after end of root element" },
{ 160, "start of element after end of root element" },
{ 161, "invalid UTF-8 byte sequence" },
{ 162, "UTF-8 character that has a Unicode code point above x'FFFF'" },
{ 315, "UTF-16 document little-endian unsupported" },
{ 316, "UCS4 document unsupported" },
{ 317, "unrecognized document encoding" },
{ 318, "UTF-8 document unsupported " },
{ 320, "mismatched national document data item to document encoding EBCDIC" },
{ 321, "mismatched national document data item to document encoding ASCII" },
{ 322, "mismatched native alphanumeric document data item to document encoding EBCDIC" },
{ 323, "mismatched host alphanumeric document data item to document encoding ASCII" },
{ 324, "mismatched national document data item to document encoding UTF-8" },
{ 325, "mismatched host alphanumeric document datat to document encoding UTF-8" },
{ 500, "internal error" },
}, *eoxml_ec_values = xml_ec_values + COUNT_OF(xml_ec_values);
static const xml_ec_value_t *
xml_ec_value_of( int ibm_code ) {
if( 100000 < ibm_code && ibm_code < 200000 ) {
static xml_ec_value_t not_ebcdic{ 0, "EBCDIC document encoding "
"does not match code page" };
not_ebcdic.ibm_code = ibm_code;
return &not_ebcdic;
}
if( 200000 < ibm_code && ibm_code < 300000 ) {
static xml_ec_value_t not_ascii{ 0, "ASCII document encoding "
"does not match code page" };
not_ascii.ibm_code = ibm_code;
return &not_ascii;
}
auto p = std::find_if( xml_ec_values, eoxml_ec_values,
[ibm_code]( const auto& value ) {
return ibm_code == value.ibm_code;
} );
return p < eoxml_ec_values ? &*p : nullptr;
}
const char *
xml_ec_value_str( int ibm_code ) {
auto p = xml_ec_value_of(ibm_code);
return p? p->msg : nullptr;
}
#if NEEDED
static bool
xml_fatal( int ibm_code ) {
if( ibm_code < 100 ) return false;
if( ibm_code > 100000 ) return false;
assert(ibm_code < 1000);
return true;
}
#endif
static callback_t *cobol_callback;
/*
* Internal handler functions
*/
///////////////
/*
ATTRIBUTE-CHARACTER The single character that corresponds with the predefined entity reference in the attribute value
ATTRIBUTE-CHARACTERS The value within quotation marks or apostrophes. This can be a substring of the attribute value if the value includes an entity reference.
ATTRIBUTE-NAME The attribute name; the string to the left of the equal sign
ATTRIBUTE-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference.
CONTENT-CHARACTER The single character that corresponds with the predefined entity reference in the element content
CONTENT-NATIONAL-CHARACTER Regardless of the type of the XML document specified by identifier-1 in the XML PARSE statement, XML-TEXT is empty with length zero and XML-NTEXT contains the single national character that corresponds with the numeric character reference.1
DOCUMENT-TYPE-DECLARATION The entire document type declaration, including the opening and closing character sequences "<!DOCTYPE" and ">"
ENCODING-DECLARATION The value, between quotes or apostrophes, of the encoding declaration in the XML declaration
END-OF-CDATA-SECTION The string "]]>"
END-OF-DOCUMENT Empty with length zero
EXCEPTION The part of the document that was successfully scanned, up to and including the point at which the exception was detected.2 Special register XML-CODE contains the unique error code that identifies the exception.
PROCESSING-INSTRUCTION-TARGET The processing instruction target name, which occurs immediately after the processing instruction opening sequence, "<?"
STANDALONE-DECLARATION The value, between quotation marks or apostrophes ("yes" or "no"), of the stand-alone declaration in the XML declaration
START-OF-CDATA-SECTION The string "<![CDATA["
START-OF-DOCUMENT The entire document
UNKNOWN-REFERENCE-IN-CONTENT The entity reference name, not including the "&" and ";" delimiters
UNKNOWN-REFERENCE-IN-ATTRIBUTE The entity reference name, not including the "&" and ";" delimiters
VERSION-INFORMATION The value, between quotation marks or apostrophes, of the version information in the XML declaration
*/
///////////////
extern cblc_field_t __ggsr__xml_event;
extern cblc_field_t __ggsr__xml_code;
extern cblc_field_t __ggsr__xml_text;
extern cblc_field_t __ggsr__xml_ntext;
static void
xml_event( const char event_name[], size_t len, char text[] ) {
assert(strlen(event_name) < __ggsr__xml_event.allocated);
auto pend = __ggsr__xml_event.data + __ggsr__xml_event.allocated;
auto p = std::copy( event_name, event_name + strlen(event_name),
PTRCAST(char, __ggsr__xml_event.data) );
std::fill(PTRCAST(unsigned char, p), pend, 0x20);
__ggsr__xml_text.data = reinterpret_cast<unsigned char*>(text);
__ggsr__xml_text.capacity = __ggsr__xml_text.allocated = len;
__ggsr__xml_code.data = 0;
cobol_callback();
}
static inline void
xml_event( const char event_name[], char text[] ) {
xml_event(event_name, strlen(text), text);
}
static inline void
xml_event( const char event_name[], size_t len, const xmlChar * value ) {
char *text = reinterpret_cast<char*>(const_cast<xmlChar*>(value));
xml_event(event_name, len, text);
}
namespace XML {
static inline void
xml_event( const char event_name[], const xmlChar * value ) {
char *text = reinterpret_cast<char*>(const_cast<xmlChar*>(value));
::xml_event(event_name, strlen(text), text);
}
/*
* Many static handler functions are defined but not used while we learn what
* is needed.
*/
#pragma GCC diagnostic ignored "-Wunused-function"
static void attributeDecl(void * CTX,
const xmlChar * elem,
const xmlChar * fullname,
int type __attribute__ ((unused)),
int def __attribute__ ((unused)),
const xmlChar * defaultValue,
xmlEnumerationPtr tree __attribute__ ((unused)) )
{
fprintf(stderr, "%s:%d: elem=%s, name=%s, default=%s\n",
__func__, __LINE__, elem, fullname, defaultValue);
}
static void cdataBlock(void * CTX,
const xmlChar * data,
int len)
{
SAYSO_DATA(len, data);
::xml_event("CONTENT-CHARACTERS", len, data);
}
static void characters(void * CTX,
const xmlChar * data,
int len)
{
SAYSO_DATA(len, data);
::xml_event("CONTENT-CHARACTERS", len, data);
}
static void comment(void * CTX, const xmlChar * value) {
SAYSO_DATAZ(value);
xml_event("COMMENT", value);
}
static void elementDecl(void * CTX,
const xmlChar * name,
int type __attribute__ ((unused)),
xmlElementContentPtr content __attribute__ ((unused)) )
{ SAYSO_DATAZ(name); }
static void endDocument(void * CTX)
{ SAYSO(); }
static void endElementNs(void * CTX,
const xmlChar * localname,
const xmlChar * prefix,
const xmlChar * URI __attribute__ ((unused)) )
{
SAYSO_DATAZ(prefix);
SAYSO_DATAZ(localname);
xml_event("END-OF-ELEMENT", localname);
}
static void endElement(void * CTX,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
static void entityDecl(void * CTX,
const xmlChar * name,
int type __attribute__ ((unused)),
const xmlChar * publicId __attribute__ ((unused)),
const xmlChar * systemId __attribute__ ((unused)),
xmlChar * content )
{
SAYSO_DATAZ(name);
SAYSO_DATAZ(content);
}
static void error(void * CTX, const char * msg, ...)
{
va_list ap;
va_start (ap, msg);
fprintf(stderr, "error: ");
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end (ap);
}
static void externalSubset(void * CTX,
const xmlChar * name,
const xmlChar * ExternalID,
const xmlChar * SystemID)
{
SAYSO_DATAZ(name);
SAYSO_DATAZ(ExternalID);
SAYSO_DATAZ(SystemID);
}
static void fatalError(void * CTX, const char * msg, ...)
{
va_list ap;
va_start (ap, msg);
fprintf(stderr, "fatal: ");
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end (ap);
}
#if 0
static xmlEntityPtr getEntity(void * CTX,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
static xmlEntityPtr getParameterEntity(void * CTX,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
#endif
static int hasExternalSubset(void * CTX)
{ SAYSO(); return 0; }
static int hasInternalSubset(void * CTX)
{ SAYSO(); return 0; }
static void ignorableWhitespace(void * CTX,
const xmlChar * ch,
int len)
{ SAYSO_DATA(len, ch); }
static void internalSubset(void * CTX,
const xmlChar * name,
const xmlChar * ExternalID,
const xmlChar * SystemID)
{
SAYSO_DATAZ(name);
SAYSO_DATAZ(ExternalID);
SAYSO_DATAZ(SystemID);
}
#if 0
static int isStandalone (void * CTX)
{ SAYSO(); }
#endif
static void notationDecl(void * CTX,
const xmlChar * name,
const xmlChar * publicId,
const xmlChar * systemId)
{
SAYSO_DATAZ(name);
SAYSO_DATAZ(publicId);
SAYSO_DATAZ(systemId);
}
static void processingInstruction(void * CTX,
const xmlChar * target,
const xmlChar * data)
{
SAYSO_DATAZ(target);
xml_event("PROCESSING-INSTRUCTION-TARGET", target);
SAYSO_DATAZ(data);
xml_event("PROCESSING-INSTRUCTION-DATA", data);
}
static void reference(void * CTX,
const xmlChar * name)
{ SAYSO_DATAZ(name); }
#if 0
static xmlParserInputPtr resolveEntity( void * CTX,
const xmlChar * publicId,
const xmlChar * systemId)
{ SAYSO(); }
#endif
static void setDocumentLocator(void * CTX,
xmlSAXLocatorPtr loc __attribute__ ((unused)) )
{ SAYSO(); }
/*
* Called after the XML declaration was parsed.
* Use xmlCtxtGetVersion(), xmlCtxtGetDeclaredEncoding() and
* xmlCtxtGetStandalone() to get data from the XML declaration.
*/
static void startDocument(void * CTX)
{
SAYSO();
}
static void startElementNs(void * CTX,
const xmlChar * localname,
const xmlChar * prefix,
const xmlChar * URI,
int nb_namespaces __attribute__ ((unused)),
const xmlChar ** namespaces __attribute__ ((unused)),
int nb_attributes __attribute__ ((unused)),
int nb_defaulted __attribute__ ((unused)),
const xmlChar ** attributes __attribute__ ((unused)))
{
SAYSO_DATAZ(prefix);
SAYSO_DATAZ(URI);
SAYSO_DATAZ(localname);
xml_event("START-OF-ELEMENT", localname);
}
static void startElement(void * CTX,
const xmlChar * name,
const xmlChar ** atts)
{
SAYSO_DATAZ(name);
for( int i=0; atts[i]; i++ ) SAYSO_DATAZ(atts[i]);
}
static void unparsedEntityDecl(void * CTX,
const xmlChar * name,
const xmlChar * publicId,
const xmlChar * systemId,
const xmlChar * notationName)
{
SAYSO_DATAZ(name);
SAYSO_DATAZ(publicId);
SAYSO_DATAZ(systemId);
SAYSO_DATAZ(notationName);
}
static void warning(void * CTX, const char * msg, ... )
{
va_list ap;
va_start (ap, msg);
fprintf(stderr, "warning: ");
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end (ap);
}
/*
* xmlSAXHandler is a structure of function pointers that the SAX parser calls
* as it encounters XML elements in the input. Each pointer is a callback
* function, locally defined in this file. These we term "handlers".
*
* Each handler sets the XML registers per IBM, and then calls
* cobol_callback(), which is a function pointer supplied by the COBOL program
* to be the processing procedure for XML PARSE.
*
* There is no obvious way to abort parsing at the C level. See:
* http://veillard.com/XML/messages/0540.html
*
* > The simplest to implement this would not be to add a new SAX
* > callback but rather modify the xmlParserCtxtPtr passed to the
* > callbacks. The best seems to be:
* > - set ctxt->instate to XML_PARSER_EOF
* > - hack xmlCurrentChar() to return 0
* > if (ctxt->instate == XML_PARSER_EOF)
* > Doing both should led to a quick termination of parsing
* > (but endElement(s)/endDocument will certainly be called anyway).
*
* Another hack might be to set the input to all blanks in cobol_callback.
*/
static xmlSAXHandler handlers;
static void
initialize_handlers( callback_t *callback ) {
handlers = xmlSAXHandler {};
handlers.initialized = XML_SAX2_MAGIC;
cobol_callback = callback;
#if 0
//// Should typically not be modified
handlers.attributeDecl = attributeDecl;
handlers.elementDecl = elementDecl;
handlers.entityDecl = entityDecl;
handlers.externalSubset = externalSubset;
handlers.getEntity = getEntity;
handlers.getParameterEntity = getParameterEntity;
handlers.internalSubset = internalSubset;
handlers.notationDecl = notationDecl;
handlers.resolveEntity = resolveEntity;
handlers.unparsedEntityDecl = unparsedEntityDecl;
//// Not supposed to be changed by applications
handlers.hasExternalSubset = hasExternalSubset;
handlers.hasInternalSubset = hasInternalSubset;
handlers.isStandalone = isStandalone;
//// SAX 1 only
handlers.startElement = startElement;
handlers.endElement = endElement;
//// Everything is available on the context, so this is useless in our case
handlers.setDocumentLocator = setDocumentLocator;
#endif
handlers.cdataBlock = cdataBlock;
handlers.characters = characters;
handlers.comment = comment;
handlers.endDocument = endDocument;
handlers.endElementNs = endElementNs;
handlers.ignorableWhitespace = ignorableWhitespace;
handlers.processingInstruction = processingInstruction;
handlers.reference = reference;
handlers.startDocument = startDocument;
handlers.startElementNs = startElementNs;
handlers.error = error;
handlers.fatalError = fatalError;
handlers.warning = warning;
}
static xmlChar *
xmlchar_of( const char input[] ) {
return const_cast<xmlChar*>( reinterpret_cast<const xmlChar*>(input) );
}
static const char *
xmlParserErrors_str( xmlParserErrors erc, const char name[] ) {
const char *msg = "???";
switch( erc ) {
case XML_ERR_OK:
msg = "Success";
break;
case XML_ERR_INTERNAL_ERROR:
msg = "Internal assertion failure";
break;
case XML_ERR_NO_MEMORY:
msg = "Out of memory";
break;
case XML_ERR_UNSUPPORTED_ENCODING:
msg = "Unsupported character encoding";
break;
#if LIBXML_VERSION >= 21400
case XML_ERR_RESOURCE_LIMIT:
msg = "Internal resource limit like maximum amplification factor exceeded";
break;
case XML_ERR_ARGUMENT:
msg = "Invalid argument";
break;
case XML_ERR_SYSTEM:
msg = "Unexpected error from the OS or an external library";
break;
#endif
case XML_IO_ENOENT:
msg = "File not found";
break;
default:
msg = strdup(name);
if( ! msg ) msg = "unknown XML error";
break;
}
return msg;
}
#define xmlerror_str(E) xmlParserErrors_str( (E), #E )
/*
* The global context is NULL if XML PARSE is not in progress.
*/
static class context_t {
const int priority;
public:
xmlParserCtxt * ctxt;
context_t() : priority(LOG_INFO), ctxt(nullptr) {
const int option = LOG_PERROR, facility = LOG_USER;
#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
/* Declared in errno.h, when available. */
static const char * const ident = program_invocation_short_name;
#elif defined (HAVE_GETPROGNAME)
/* Declared in stdlib.h. */
static const char * const ident = getprogname();
#else
/* Avoid a NULL entry. */
static const char * const ident = "unnamed_COBOL_program";
#endif
// TODO: Program to set option in library via command-line and/or
// environment.
// Library listens to program, not to the environment.
openlog(ident, option, facility);
initialize_handlers(nullptr);
}
void
push( const cblc_field_t *input_field,
size_t input_offset,
size_t len, bool done ) {
if( ! ctxt ) {
init();
}
assert(cobol_callback); // caller must set
if( input_offset < len ) {
int size = len - input_offset;
const char *chunk = PTRCAST(char, input_field->data + input_offset);
int terminate = done? 1 : 0;
auto erc = (xmlParserErrors )xmlParseChunk( ctxt, chunk, size, terminate );
if( erc != 0 ) {
auto msg = xmlerror_str(erc);
syslog(priority, "XML PARSE: XML error: %s", msg);
}
if( done ) this->done();
}
}
void done() {
if( ctxt ) {
xmlFreeParserCtxt( ctxt );
ctxt = nullptr;
}
}
protected:
void init() {
const char *external_entities = nullptr;
void * const user_data = nullptr;
ctxt = xmlCreatePushParserCtxt( &handlers, user_data,
nullptr, 0, external_entities);
}
} context;
static int
xml_push_parse( const cblc_field_t *input_field,
size_t input_offset,
size_t len,
cblc_field_t *encoding __attribute__ ((unused)),
cblc_field_t *validating __attribute__ ((unused)),
int returns_national __attribute__ ((unused)),
void (*callback)(void) )
{
::cobol_callback = callback;
context.push( input_field, input_offset, len, false);
#if LIBXML_VERSION >= 21400
const xmlChar * version = xmlCtxtGetVersion( context.ctxt );
#else
const xmlChar * version = xmlchar_of("requires version 2.14");
#endif
assert(version);
assert(nullptr == "function not ready and not called");
return 0;
}
extern "C" // Parser calls via parser_xml_parse_end, probabably.
int
__gg__xml_parse_done() {
context.done();
return 0;
}
extern "C"
int
__gg__xml_parse( const cblc_field_t *input_field,
size_t input_offset,
size_t len,
cblc_field_t *encoding __attribute__ ((unused)),
cblc_field_t *validating __attribute__ ((unused)),
int returns_national __attribute__ ((unused)),
void (*callback)(void) )
{
initialize_handlers(callback);
const char *input = PTRCAST(char, input_field->data + input_offset);
int erc = xmlSAXUserParseMemory(&handlers, nullptr, input, len);
if( erc ) {
const xmlError *msg = xmlCtxtGetLastError(nullptr);
fprintf(stderr, "XML PARSE: error: line %d: %s (%d: %d.%d.%d)\n",
msg->line, msg->message, erc, msg->domain, msg->level, msg->code);
}
return erc;
}
} // end XML namespace