Cross Compiling Header Generation Rework [Patch 1/3]

Fri Aug 23 16:12:19 UTC 2013

I am attempting to cross compile audit 2.3.2 and I ran into issues
generating the header files in the lib and auparse directories.  Compiling
an executable and running it to build the header files does not work when
they are compiled for my target.  To work around this, I wrote a python
script to generate the header files based on the pre-processor output from
the cross compiler.

A couple of caveats are introduced by this script.  Firstly, the build now
required Python 2.7 (or greater but I have not tested this) and the
Pyparsing library.  I chose to use Python because it is fairly standard and
easily accessible.  Pyparsing was chosen out of necessity to parse the enum
values the pre-processor dumps out.  There might be a way around using this
but I am not sure what could replace it.

This is the first of three patches to add the functionality.  The first one
adds in the python script.  The second changes the Makefiles to use it and
modifies the gen_tables.c file to remove extra functionality.  The final
patch adds checks into the configure script for Python and Pyparsing.

Any feedback on this approach is greatly appreciated.

Thanks,
Clayton Shotwell

diff -urN /dev/null b/lib/gen_tables.py

--- /dev/null    2013-06-19 11:25:31.230442052 -0500
+++ b/lib/gen_tables.py    2013-08-19 14:27:55.639872141 -0500
@@ -0,0 +1,458 @@
+#!/usr/bin/python
+################################################################################
+# Copyright 2013, Rockwell Collins.  All rights reserved.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# Authors:
+#      Clayton Shotwell <clshotwe at rockwellcollins.com>
+#
+# Description:
+#      Generator of lookup tables to replace the gen_tables.c method
developed
+#      Miloslav Trmac <mitr at redhat.com> to make audit package cross
compilable.
+#      The logic in this script mimics the logic in gen_tables.c before
the last
+#      modification.
+#
+# Usage: gen_tables.py [-h] [--i2s] [--i2s-transtab] [--s2i]
+#                             [--uppercase | --lowercase]
[--duplicate-ints]
+#                             prefix header source output
+#
+#        Generate tables header files.
+#
+#        positional arguments:
+#          prefix            The prefix of the output file to use
+#          header            The header file to parse table values from
+#          source            The source of the preprocessor from the
compiler
+#          output            The output header file
+#
+#        optional arguments:
+#          -h, --help        show this help message and exit
+#          --i2s             Generate i2s tables
+#          --i2s-transtab    Generate transtab tables
+#          --s2i             Generate s2i tables
+#          --uppercase       All characters are uppercase
+#          --lowercase       All characters are lowercase
+#          --duplicate-ints  Allow duplicate integers
+
+import argparse
+import ctypes
+import os
+import re
+import sys
+from operator import attrgetter
+from pyparsing import Group, Word, Suppress, alphas, alphanums, nums,
cppStyleComment, \
+        Optional, ZeroOrMore
+
+# Number of entries to print per line
+NUM_ENTIRES_IN_LINE = 10
+
+# Global table entries variable that is used everywhere
+ENTRIES = []
+
+# The ratio of table size to number of non-empty elements allowed for a
+# "direct" s2i table; if the ratio would be bigger, bsearch tables are used
+# instead.
+#
+# 2 looks like a lot at a first glance, but the bsearch tables need twice
as
+# much space per element, so with the ratio equal to 2 the direct table
uses
+# no more memory and is faster.
+DIRECT_THRESHOLD = 2
+
+# Set to True to enable some debug output
+DEBUG = False
+
+class Entry:
+    def __init__(self, new_s, val):
+        self.st = new_s
+        self.val = val
+        self.offset = 0
+        self.orig_index = 0
+
+    def set_position(self, offset):
+        self.offset = offset
+
+    def set_orig_index(self, orig_index):
+        self.orig_index = orig_index
+
+    def get_str(self):
+        return self.st
+
+    def __repr__(self):
+        return "<Entry st=%s val=%s>" % (self.st, self.val)
+
+    def __str__(self):
+        return "Entry of st=%s, val=%s, offset=%d, orig_index=%d" % \
+                (self.st, self.val, self.offset, self.orig_index)
+
+def output_strings(prefix, outfile):
+    try:
+        # Calculate the position each entry will be in the string
+        index = 0
+        for i in range(len(ENTRIES)):
+            ENTRIES[i].set_position(index)
+            # Increment the index by the length of the name plus 1 for the
null
+            # character at the end.
+            index += len(ENTRIES[i].get_str()) + 1
+        # Write out the strings
+        outfile.write("static const char %s_strings[] = \"" % prefix)
+        for i in range(len(ENTRIES)):
+            if (i != 0) and (i % NUM_ENTIRES_IN_LINE == 0):
+                outfile.write('"\n\t"')
+            outfile.write(ENTRIES[i].get_str())
+            if (i != (len(ENTRIES) - 1)):
+                outfile.write('\\0')
+        outfile.write('";\n')
+    except:
+        # If an error is found, raise the exception so the main function
can close
+        # and delete the outfile
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print("Unexpected error in output_strings:", exc_type, fname,
exc_tb.tb_lineno)
+        raise
+
+def output_s2i(prefix, outfile, uppercase, lowercase):
+    try:
+        # Check for duplicate values
+        for i in range(len(ENTRIES) - 1):
+            assert (ENTRIES[i].get_str() <= ENTRIES[i + 1].get_str()),
"Entries not in the correct order"
+            if (ENTRIES[i].get_str() == ENTRIES[i + 1].get_str()):
+                print("Duplicate value %s: %d, %d" % \
+                        (ENTRIES[i].get_str(), ENTRIES[i].val, ENTRIES[i +
1].val))
+                raise
+
+        # Write out the index to value index values
+        outfile.write("static const unsigned %s_s2i_s[] = {" % prefix)
+        for i in range(len(ENTRIES)):
+            if (i % NUM_ENTIRES_IN_LINE == 0):
+                outfile.write('\n\t')
+            outfile.write("%i," % ENTRIES[i].offset)
+        outfile.write('\n};\n')
+
+        # Write out the string to value actual values
+        outfile.write("static const int %s_s2i_i[] = {" % prefix)
+        for i in range(len(ENTRIES)):
+            if (i % NUM_ENTIRES_IN_LINE == 0):
+                outfile.write('\n\t')
+            outfile.write("%i," % ENTRIES[i].val)
+        outfile.write('\n};\n')
+
+        # Verify the strings are all uppercase or lowercase depending on
the arguments
+        # passed in
+        if uppercase:
+            for i in range(len(ENTRIES)):
+                assert (all(ord(c) < 128 for c in ENTRIES[i].get_str())
and \
+                        ENTRIES[i].get_str().isupper()), "String %s is not
uppercase" % ENTRIES[i].get_str()
+        if lowercase:
+            for i in range(len(ENTRIES)):
+                assert (all(ord(c) < 128 for c in ENTRIES[i].get_str())
and \
+                        ENTRIES[i].get_str().islower()), "String %s is not
lowercase" % ENTRIES[i].get_str()
+        if uppercase or lowercase:
+            outfile.write("static int %s_s2i(const char *s, int *value)
{\n" \
+                    "\tsize_t len, i;\n" \
+                    "\tlen = strlen(s);\n" \
+                    "\t{ char copy[len + 1];\n" \
+                    "\tfor (i = 0; i < len; i++) {\n" \
+                    "\t\tchar c = s[i];\n" % prefix)
+            if uppercase:
+                outfile.write("\t\tcopy[i] = GT_ISLOWER(c) ? c - 'a' + 'A'
: c;\n")
+            else:
+                outfile.write("\t\tcopy[i] = GT_ISUPPER(c) ? c - 'A' + 'a'
: c;\n")
+            outfile.write("\t}\n" \
+                    "\tcopy[i] = 0;\n" \
+                    "\treturn s2i__(%s_strings, %s_s2i_s, %s_s2i_i, %d,
copy, value);\n" \
+                    "\t}\n" \
+                    "}\n" % (prefix, prefix, prefix, len(ENTRIES)))
+        else:
+            outfile.write("static int %s_s2i(const char *s, int *value)
{\n" \
+                    "\treturn s2i__(%s_strings, %s_s2i_s, %s_s2i_i, %d, s,
value);\n" \
+                    "}\n" % (prefix, prefix, prefix, prefix, len(ENTRIES)))
+    except:
+        # If an error is found, raise the exception so the main function
can close
+        # and delete the outfile
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print("Unexpected error in output_s2i:", exc_type, fname,
exc_tb.tb_lineno)
+        raise
+
+def output_i2s(prefix, outfile, allow_duplicate_ints):
+    try:
+        # Check for duplicate values
+        for i in range(len(ENTRIES) - 1):
+            assert (ENTRIES[i].val <= ENTRIES[i + 1].val), "Entries not in
the correct order"
+            if (not allow_duplicate_ints) and (ENTRIES[i].val == ENTRIES[i
+ 1].val):
+                print("Duplicate value %d: %s, %s" % (ENTRIES[i].val,
ENTRIES[i].get_str(), \
+                        ENTRIES[i + 1].get_str()))
+                raise
+
+        # Find all of the unique values
+        unique_entries = []
+        for i in range(len(ENTRIES)):
+            # If the unique_entries is empty or the last unique_entries
entry is different from the
+            # entry being compared, append the entry
+            if (len(unique_entries) == 0) or (unique_entries[-1].val !=
ENTRIES[i].val):
+                unique_entries.append(ENTRIES[i])
+
+        # Determine which mapping to use based on the treshold
+        max_val = unique_entries[-1].val
+        min_val = unique_entries[0].val
+        if ((float(max_val - min_val)/len(unique_entries)) <=
DIRECT_THRESHOLD):
+            outfile.write("static const unsigned %s_i2s_direct[] = {" %
prefix)
+            next_index = min_val
+            i = 0
+            while True:
+                if (((next_index - min_val) % 10) == 0):
+                    outfile.write("\n\t")
+                while (unique_entries[i].val < next_index):
+                    # This can happen if (allow_duplicate_ints)
+                    i += 1
+                if (unique_entries[i].val == next_index):
+                    assert(unique_entries[i].offset <= sys.maxint)
+                    outfile.write("%i," % unique_entries[i].offset)
+                else:
+                    outfile.write("-1u,")
+                if (next_index == max_val):
+                    break
+                next_index += 1
+            outfile.write("\n};\nstatic const char *%s_i2s(int v) {\n" \
+                    "\treturn i2s_direct__(%s_strings, %s_i2s_direct, %d,
%d, v);\n" \
+                    "}\n" % (prefix, prefix, prefix, min_val, max_val))
+        else:
+            outfile.write("static const int %s_i2s_i[] = {" % prefix)
+            for i in range(len(unique_entries)):
+                if (i % 10 == 0):
+                    outfile.write("\n\t")
+                outfile.write("%i," % unique_entries[i].val)
+            outfile.write("\n};\nstatic const unsigned %s_i2s_s[] = {" %
prefix)
+            for i in range(len(unique_entries)):
+                if (i % 10 == 0):
+                    outfile.write("\n\t")
+                assert(unique_entries[i].offset <= sys.maxint)
+                outfile.write("%i," % unique_entries[i].offset)
+            outfile.write("\n };\n static const char *%s_i2s(int v) {\n" \
+                    "\treturn i2s_bsearch__(%s_strings, %s_i2s_i,
%s_i2s_s, %u, v);\n" \
+                    "}\n" % (prefix, prefix, prefix, prefix,
len(unique_entries)))
+    except:
+        # If an error is found, raise the exception so the main function
can close
+        # and delete the outfile
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print("Unexpected error in output_i2s:", exc_type, fname,
exc_tb.tb_lineno)
+        raise
+
+def output_i2s_transtab(prefix, outfile):
+    """
+        Output the string to integer mapping table as a transtab[].
+        values must be sorted in the desired order.
+    """
+    try:
+        outfile.write("static const struct transtab %s_table[] = {" %
prefix)
+        for i in range(len(ENTRIES)):
+            if (i % NUM_ENTIRES_IN_LINE == 0):
+                outfile.write('\n\t')
+            outfile.write("{%i,%u}," % (ENTRIES[i].val, ENTRIES[i].offset))
+        outfile.write("\n};\n#define %s_NUM_ENTRIES (sizeof(%s_table) /
sizeof(*%s_table))\n" % \
+                (prefix.upper(), prefix, prefix))
+    except:
+        # If an error is found, raise the exception so the main function
can close
+        # and delete the outfile
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print("Unexpected error in output_i2s_transtab:", exc_type, fname,
exc_tb.tb_lineno)
+        raise
+
+def lookup_enum(look_str, buf):
+    try:
+        # Pull all of the enums out of the preprocessor output out only
once
+        # to help speed up all of the lookups
+        if not hasattr(lookup_enum, "enums"):
+            if DEBUG:
+                print("Pulling out the enums from the preprocessor output")
+            # Regex pattern to parse out the enums from the preprocessor
output
+            enum_regex = "enum.*?{(?P<s>.*?)}"
+            lookup_enum.enums = re.findall(enum_regex, buf, flags=(re.M |
re.S))
+
+        # find which enum contains the string we are looking for
+        for i in range(len(lookup_enum.enums)):
+            if look_str in lookup_enum.enums[i]:
+                # Determine the value of the variable in the enum
+                enum_string = "enum preproc { " + lookup_enum.enums[i] + "
}"
+                enum_string = "".join([line.strip() for line in
enum_string])
+                if DEBUG:
+                    print("Found %s in %s" % (look_str, enum_string))
+
+                identifier = Word(alphas, alphanums+'_')
+                opt_value = Word(nums, nums+'x+<>/*')
+
+                enum_value = Group(identifier('name') +
Optional(Suppress('=') + opt_value('value')))
+                enum_list = Group(enum_value + ZeroOrMore(Suppress(',') +
enum_value))
+                enum = Suppress('enum') + identifier('enum') +
Suppress('{') + enum_list('list') + \
+                        Suppress('}')
+                enum.ignore(cppStyleComment)
+
+                for item, start, stop in enum.scanString(enum_string):
+                    temp = 0
+                    for entry in item.list:
+                        if DEBUG:
+                            print("Checking %s against %s" % (look_str,
entry.name))
+                        if entry.name == look_str:
+                            if entry.value != '':
+                                # Need to call eval becuase some enums
have math in them
+                                try:
+                                    value = eval(entry.value)
+                                except:
+                                    print("Found invalid value %s" %
entry.value)
+                            else:
+                                value = temp
+                            if DEBUG:
+                                print("Matched the enum name to value %d"
% value)
+                            return value
+                        temp += 1
+    except:
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print("Unexpected error in output_i2s_transtab:", exc_type, fname,
exc_tb.tb_lineno)
+    print("Unable to find enum value")
+    return None
+
+def evaluate_string(eval_str, buf):
+    if DEBUG:
+        print("Evaluating string %s" % eval_str)
+
+    # Regex expression for pulling apart the values in the preprocessor
output
+    eval_regex = "(?P<val>\w+)"
+    # Since the string can be anything, it must be parsed into individual
parts
+    # and evaluated separately to find any enum values
+    matches = re.findall(eval_regex, eval_str)
+    if len(matches) <= 0:
+        print("Could not find any matches")
+
+    local_s = eval_str
+    value = None
+    i = 0
+    for i in range(len(matches)):
+        try:
+            # If the current item is abled to evaled, there is nothing to
do
+            val = eval(matches[i])
+        except:
+            try:
+                # Need to check to see if the last character is a "U" and
remove it
+                # if this does not except, a valid number was found
+                if matches[i][-1] == 'U':
+                    val = eval(matches[i][:-1])
+                    local_s = local_s.replace(matches[i], "%d" % val)
+                else:
+                    # Need to do a enum look up for anything that doesnt
translate into a number
+                    val = lookup_enum(matches[i], buf)
+                    if val is not None:
+                        local_s = local_s.replace(matches[i], "%d" % val)
+            except:
+                # This case will be hit if the "U" removal fails
+                val = lookup_enum(matches[i], buf)
+                if val is not None:
+                    local_s = local_s.replace(matches[i], "%d" % val)
+    try:
+        # This will fail if all of the enums were not found rather
+        # than handling the failues in the above steps
+        # Also, need to convert to a signed 32 bit int for the output value
+        value = ctypes.c_int32(eval(local_s)).value
+        if DEBUG:
+            print("Found value %d for %s" % (value, matches[i]))
+    except:
+        print("Could not parse string %s" % local_s)
+
+    # Verify the mess above resulted in a number being found
+    if value is None:
+        print("Failed to find value for %s" % eval_str)
+        raise
+    return value
+
+def remove_output(outfile):
+    path = outfile.name
+    outfile.close()
+    os.remove(path)
+    sys.exit(1)
+
+def main():
+
+    # Setup the argument parser and parse the arguments given
+    parser = argparse.ArgumentParser(description='Generate tables header
files.')
+    parser.add_argument('--i2s', dest='gen_i2s', action='store_true',
+            help='Generate i2s tables')
+    parser.add_argument('--i2s-transtab', dest='gen_i2s_transtab',
action='store_true',
+            help='Generate transtab tables')
+    parser.add_argument('--s2i', dest='gen_s2i', action='store_true',
+            help='Generate s2i tables')
+    # Make sure uppercase and lowercase are mutually exclusive
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('--uppercase', dest='uppercase',
action='store_true',
+            help='All characters are uppercase')
+    group.add_argument('--lowercase', dest='lowercase',
action='store_true',
+            help='All characters are lowercase')
+    parser.add_argument('--duplicate-ints', dest='allow_duplicate_ints',
action='store_true',
+            help='Allow duplicate integers')
+    parser.add_argument('prefix', help='The prefix of the output file to
use')
+    parser.add_argument('source', type=argparse.FileType('r'),
+            help='The source of the preprocessor from the compiler')
+    parser.add_argument('output', type=argparse.FileType('w'),
+            help='The output header file')
+    args = parser.parse_args()
+
+    # Regex pattern to parse out the macro and string from the _S calls
+    source_regex = "{ \((?P<val>.*?)\), \(\"(?P<s>\S+)\"\), 0, 0 }"
+
+    # First parse the header file for all of the preprocessor source that
need to
+    # be looked up
+    buf = args.source.read()
+    matches = re.findall(source_regex, buf, flags=re.MULTILINE)
+
+    # Check to make sure we have matches
+    if (len(matches) <= 0):
+        print("Failed to find valid source")
+        remove_output(args.output)
+        sys.exit(1)
+
+    try:
+        # Create all of the entry structures
+        global ENTRIES
+        for i in range(len(matches)):
+            ENTRIES.append(Entry(matches[i][1],
evaluate_string(matches[i][0], buf)))
+            ENTRIES[i].set_orig_index(i)
+            if DEBUG:
+                print(ENTRIES[i])
+
+        # Sort the entries alphabetically
+        ENTRIES = sorted(ENTRIES, key=attrgetter('st'))
+        # Print out the output header
+        args.output.write("/* This is a generated file, see Makefile.am
for its inputs. */\n")
+        output_strings(args.prefix, args.output)
+        if args.gen_s2i:
+            output_s2i(args.prefix, args.output, args.uppercase,
args.lowercase)
+        if args.gen_i2s:
+            ENTRIES = sorted(ENTRIES, key=attrgetter('val'))
+            output_i2s(args.prefix, args.output, args.allow_duplicate_ints)
+        if args.gen_i2s_transtab:
+            ENTRIES = sorted(ENTRIES, key=attrgetter('orig_index'))
+            output_i2s_transtab(args.prefix, args.output)
+    except:
+        # On an error, close and remove the file before returning an error
+        print("Failed to write the output file correctly")
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
+        print("Unexpected error:", exc_type, fname, exc_tb.tb_lineno)
+        remove_output(args.output)
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/linux-audit/attachments/20130823/5575757b/attachment.htm>