#!/usr/bin/env python3 # # Script to generate tables for libstdc++ std::text_encoding. # # This file is part of GCC. # # GCC is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 3, or (at your option) any later # version. # # GCC is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with GCC; see the file COPYING3. If not see # . # To update the Libstdc++ static data in download # the latest: # https://www.iana.org/assignments/character-sets/character-sets-1.csv # Then run this script and save the output to # include/bits/text_encoding-data.h import sys import csv if len(sys.argv) != 2: print("Usage: %s " % sys.argv[0], file=sys.stderr) sys.exit(1) print("// Generated by gen_text_encoding_data.py, do not edit.\n") print("#ifndef _GLIBCXX_GET_ENCODING_DATA") print('# error "This is not a public header, do not include it directly"') print("#endif\n") charsets = {} with open(sys.argv[1], newline='') as f: reader = csv.reader(f) next(reader) # skip header row for row in reader: mib = int(row[2]) if mib in charsets: raise ValueError("Multiple rows for mibEnum={}".format(mib)) name = row[1] aliases = row[5].split() # Ensure primary name comes first if name in aliases: aliases.remove(name) charsets[mib] = [name] + aliases # Remove "NATS-DANO" and "NATS-DANO-ADD" charsets.pop(33, None) charsets.pop(34, None) count = 0 for mib in sorted(charsets.keys()): names = charsets[mib] if names[0] == "UTF-8": print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count)) for name in names: print(' {{ {:4}, "{}" }},'.format(mib, name)) count += len(names) # gives an error if this macro is left defined. # Do this last, so that the generated output is not usable unless we reach here. print("\n#undef _GLIBCXX_GET_ENCODING_DATA")