[PATCH 06/16] rpcgen: add an XDR protocol parser

Daniel P. Berrangé berrange at redhat.com
Wed Mar 8 16:39:03 UTC 2023


This adds a parser capable of handling the XDR protocol files.

The parsing grammar requirements are detailed in

  https://www.rfc-editor.org/rfc/rfc4506#section-6.3

Signed-off-by: Daniel P. Berrangé <berrange at redhat.com>
---
 scripts/rpcgen/rpcgen/parser.py     | 497 ++++++++++++++++++++++++++++
 scripts/rpcgen/tests/meson.build    |   1 +
 scripts/rpcgen/tests/test_parser.py |  91 +++++
 3 files changed, 589 insertions(+)
 create mode 100644 scripts/rpcgen/rpcgen/parser.py
 create mode 100644 scripts/rpcgen/tests/test_parser.py

diff --git a/scripts/rpcgen/rpcgen/parser.py b/scripts/rpcgen/rpcgen/parser.py
new file mode 100644
index 0000000000..7efbe5468e
--- /dev/null
+++ b/scripts/rpcgen/rpcgen/parser.py
@@ -0,0 +1,497 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+from .lexer import (
+    XDRLexer,
+    XDRTokenPunctuation,
+    XDRTokenIdentifier,
+    XDRTokenCEscape,
+    XDRTokenConstant,
+)
+from .ast import (
+    XDRSpecification,
+    XDRDefinitionConstant,
+    XDRDefinitionTypedef,
+    XDRDefinitionEnum,
+    XDRDefinitionStruct,
+    XDRDefinitionUnion,
+    XDRDefinitionCEscape,
+    XDRDeclarationScalar,
+    XDRDeclarationPointer,
+    XDRDeclarationFixedArray,
+    XDRDeclarationVariableArray,
+    XDRTypeVoid,
+    XDRTypeChar,
+    XDRTypeUnsignedChar,
+    XDRTypeShort,
+    XDRTypeUnsignedShort,
+    XDRTypeInt,
+    XDRTypeUnsignedInt,
+    XDRTypeHyper,
+    XDRTypeUnsignedHyper,
+    XDRTypeFloat,
+    XDRTypeDouble,
+    XDRTypeBool,
+    XDRTypeOpaque,
+    XDRTypeString,
+    XDRTypeCustom,
+    XDREnumValue,
+    XDREnumBody,
+    XDRTypeEnum,
+    XDRStructBody,
+    XDRTypeStruct,
+    XDRUnionCase,
+    XDRUnionBody,
+    XDRTypeUnion,
+)
+
+
+# We are parsing (approximately the following grammar
+# from RFC 4506 #6.3:
+#
+#    declaration:
+#         type-specifier identifier
+#       | type-specifier identifier "[" value "]"
+#       | type-specifier identifier "<" [ value ] ">"
+#       | "opaque" identifier "[" value "]"
+#       | "opaque" identifier "<" [ value ] ">"
+#       | "string" identifier "<" [ value ] ">"
+#       | type-specifier "*" identifier
+#       | "void"
+#
+#    value:
+#         constant
+#       | identifier
+#
+#    constant:
+#       decimal-constant | hexadecimal-constant | octal-constant
+#
+#    type-specifier:
+#         [ "unsigned" ] "int"
+#       | [ "unsigned" ] "hyper"
+#       | "float"
+#       | "double"
+#       | "quadruple"     /* We're skipping this one */
+#       | "bool"
+#       | enum-type-spec
+#       | struct-type-spec
+#       | union-type-spec
+#       | identifier
+#
+#    enum-type-spec:
+#       "enum" enum-body
+#
+#    enum-body:
+#       "{"
+#          ( identifier "=" value )
+#          ( "," identifier "=" value )*
+#       "}"
+#
+#    struct-type-spec:
+#       "struct" struct-body
+#
+#    struct-body:
+#       "{"
+#          ( declaration ";" )
+#          ( declaration ";" )*
+#       "}"
+#
+#    union-type-spec:
+#       "union" union-body
+#
+#    union-body:
+#       "switch" "(" declaration ")" "{"
+#          case-spec
+#          case-spec *
+#          [ "default" ":" declaration ";" ]
+#       "}"
+#
+#    case-spec:
+#      ( "case" value ":")
+#      ( "case" value ":") *
+#      declaration ";"
+#
+#    constant-def:
+#       "const" identifier "=" constant ";"
+#
+#    type-def:
+#         "typedef" declaration ";"
+#       | "enum" identifier enum-body ";"
+#       | "struct" identifier struct-body ";"
+#       | "union" identifier union-body ";"
+#
+#    definition:
+#         type-def
+#       | constant-def
+#
+#    specification:
+#         definition *
+#
+# Notable divergance:
+#
+#   - In 'type-decl' we allow 'char' and 'short'
+#     in signed and unsigned variants
+#
+#   - In 'definition' we allow '%...' as escape C code
+#     to passthrough to the header output
+#
+#   - In 'enum-type-spec' we allow a bare enum name
+#     instead of enum body
+#
+#   - In 'struct-type-spec' we allow a bare struct name
+#     instead of struct body
+#
+#   - In 'union-type-spec' we allow a bare union name
+#     instead of union body
+#
+class XDRParser:
+    def __init__(self, fp):
+        self.lexer = XDRLexer(fp)
+        self.typedefs = {}
+
+    def parse(self):
+        spec = XDRSpecification()
+        while True:
+            definition = self.parse_definition()
+            if definition is None:
+                break
+            spec.definitions.append(definition)
+        return spec
+
+    def parse_definition(self):
+        token = self.lexer.next()
+        if token is None:
+            return None
+
+        if type(token) == XDRTokenCEscape:
+            return XDRDefinitionCEscape(token.value[1:])
+
+        if type(token) != XDRTokenIdentifier:
+            raise Exception("Expected identifier, but got %s" % token)
+
+        defs = {
+            "const": XDRDefinitionConstant,
+            "typedef": XDRDefinitionTypedef,
+            "enum": XDRDefinitionEnum,
+            "struct": XDRDefinitionStruct,
+            "union": XDRDefinitionUnion,
+        }
+
+        if token.value not in defs:
+            raise Exception("Unexpected identifier %s" % token)
+
+        funcname = "parse_definition_" + token.value
+        func = getattr(self, funcname)
+        assert func is not None
+
+        definition = func()
+
+        semi = self.lexer.next()
+        if type(semi) != XDRTokenPunctuation or semi.value != ";":
+            raise Exception("Expected ';', but got %s" % semi)
+
+        return definition
+
+    def parse_definition_const(self):
+        ident = self.lexer.next()
+        if type(ident) != XDRTokenIdentifier:
+            raise Exception("Expected identifier, but got %s" % ident)
+
+        assign = self.lexer.next()
+        if type(assign) != XDRTokenPunctuation or assign.value != "=":
+            raise Exception("Expected '=', but got %s" % assign)
+
+        const = self.lexer.next()
+        if type(const) not in [XDRTokenConstant, XDRTokenIdentifier]:
+            raise Exception("Expected constant, but got %s" % const)
+
+        return XDRDefinitionConstant(ident.value, const.value)
+
+    def parse_definition_typedef(self):
+        decl = self.parse_declaration()
+        if decl.identifier in self.typedefs:
+            raise Exception("Type '%s' already defined" % decl.identifier)
+
+        definition = XDRDefinitionTypedef(decl)
+        self.typedefs[decl.identifier] = definition
+        return definition
+
+    def parse_definition_enum(self):
+        name = self.lexer.next()
+        if type(name) != XDRTokenIdentifier:
+            raise Exception("Expected identifier, but got %s" % name)
+
+        body = self.parse_enum_body()
+
+        if name.value in self.typedefs:
+            raise Exception("Type '%s' already defined" % name.value)
+
+        definition = XDRDefinitionEnum(name.value, body)
+        self.typedefs[name.value] = definition
+        return definition
+
+    def parse_definition_struct(self):
+        name = self.lexer.next()
+        if type(name) != XDRTokenIdentifier:
+            raise Exception("Expected identifier, but got %s" % name)
+
+        body = self.parse_struct_body()
+
+        if name.value in self.typedefs:
+            raise Exception("Type '%s' already defined" % name.value)
+
+        definition = XDRDefinitionStruct(name.value, body)
+        self.typedefs[name.value] = definition
+        return definition
+
+    def parse_definition_union(self):
+        name = self.lexer.next()
+        if type(name) != XDRTokenIdentifier:
+            raise Exception("Expected identifier, but got %s" % name)
+
+        body = self.parse_union_body()
+
+        if name.value in self.typedefs:
+            raise Exception("Type '%s' already defined" % name.value)
+
+        definition = XDRDefinitionUnion(name.value, body)
+        self.typedefs[name.value] = definition
+        return definition
+
+    def parse_declaration(self):
+        typ = self.parse_type()
+
+        if type(typ) == XDRTypeVoid:
+            return XDRDeclarationScalar(typ, None)
+
+        ident = self.lexer.next()
+
+        pointer = False
+        if type(ident) == XDRTokenPunctuation:
+            if ident.value != "*":
+                raise Exception("Expected '*' or identifer, but got %s" % ident)
+            if type(typ) == XDRTypeString or type(typ) == XDRTypeOpaque:
+                raise Exception("Pointer invalid for 'string' and 'opaque' types")
+
+            pointer = True
+            ident = self.lexer.next()
+
+        bracket = self.lexer.peek()
+        if type(bracket) == XDRTokenPunctuation:
+            if bracket.value == "[":
+                _ = self.lexer.next()
+                value = self.lexer.next()
+                if type(value) not in [XDRTokenConstant, XDRTokenIdentifier]:
+                    raise Exception("Expected constant, but got %s" % value)
+
+                close = self.lexer.next()
+                if type(close) != XDRTokenPunctuation or close.value != "]":
+                    raise Exception("Expected ']', but got %s" % value)
+
+                if type(typ) == XDRTypeString:
+                    raise Exception("Fixed array invalid for 'string' type")
+                return XDRDeclarationFixedArray(typ, ident.value, value.value)
+            elif bracket.value == "<":
+                _ = self.lexer.next()
+                maybeValue = self.lexer.peek()
+                value = None
+                if type(maybeValue) in [XDRTokenConstant, XDRTokenIdentifier]:
+                    value = self.lexer.next().value
+
+                close = self.lexer.next()
+                if type(close) != XDRTokenPunctuation or close.value != ">":
+                    raise Exception("Expected '>', but got %s" % close)
+
+                return XDRDeclarationVariableArray(typ, ident.value, value)
+
+        if pointer:
+            return XDRDeclarationPointer(typ, ident.value)
+        else:
+            return XDRDeclarationScalar(typ, ident.value)
+
+    def parse_type(self):
+        typ = self.lexer.next()
+        if type(typ) != XDRTokenIdentifier:
+            raise Exception("Expected identifier, but got %s" % typ)
+
+        if typ.value == "unsigned":
+            typ = self.lexer.peek()
+            if type(typ) != XDRTokenIdentifier:
+                raise Exception("Expected identifier, but got %s" % typ)
+
+            if typ.value == "char":
+                _ = self.lexer.next()
+                return XDRTypeUnsignedChar()
+            elif typ.value == "short":
+                _ = self.lexer.next()
+                return XDRTypeUnsignedShort()
+            elif typ.value == "int":
+                _ = self.lexer.next()
+                return XDRTypeUnsignedInt()
+            elif typ.value == "hyper":
+                _ = self.lexer.next()
+                return XDRTypeUnsignedHyper()
+            else:
+                # Bare 'unsigned' isn't allowed by 'type-specifier'
+                # grammer in RFC 1014, but rpcgen allows it
+                return XDRTypeUnsignedInt()
+
+        if typ.value == "void":
+            return XDRTypeVoid()
+        elif typ.value == "char":
+            return XDRTypeChar()
+        elif typ.value == "short":
+            return XDRTypeShort()
+        elif typ.value == "int":
+            return XDRTypeInt()
+        elif typ.value == "hyper":
+            return XDRTypeHyper()
+        elif typ.value == "float":
+            return XDRTypeFloat()
+        elif typ.value == "double":
+            return XDRTypeDouble()
+        elif typ.value == "bool":
+            return XDRTypeBool()
+        elif typ.value == "enum":
+            return self.parse_type_enum()
+        elif typ.value == "struct":
+            return self.parse_type_struct()
+        elif typ.value == "union":
+            return self.parse_type_union()
+        elif typ.value == "opaque":
+            return XDRTypeOpaque()
+        elif typ.value == "string":
+            return XDRTypeString()
+        else:
+            return XDRTypeCustom(typ.value, self.typedefs.get(typ.value, None))
+
+    def parse_enum_body(self):
+        body = self.lexer.next()
+        if type(body) != XDRTokenPunctuation or body.value != "{":
+            raise Exception("Expected '{', but got %s" % body)
+
+        values = []
+        while True:
+            ident = self.lexer.next()
+            if type(ident) != XDRTokenIdentifier:
+                raise Exception("Expected identifier, but got %s" % ident)
+
+            equal = self.lexer.next()
+            if type(equal) != XDRTokenPunctuation or equal.value != "=":
+                raise Exception("Expected '=', but got %s" % ident)
+
+            value = self.lexer.next()
+            if type(value) != XDRTokenConstant:
+                raise Exception("Expected constant, but got %s" % ident)
+
+            separator = self.lexer.next()
+            if type(separator) != XDRTokenPunctuation and separator.value not in [
+                "}",
+                ",",
+            ]:
+                raise Exception("Expected '}' or ',', but got %s" % separator)
+
+            values.append(XDREnumValue(ident.value, value.value))
+
+            if separator.value == "}":
+                break
+
+        return XDREnumBody(values)
+
+    def parse_type_enum(self):
+        body = self.parse_enum_body()
+        return XDRTypeEnum(body)
+
+    def parse_struct_body(self):
+        body = self.lexer.next()
+        if type(body) != XDRTokenPunctuation or body.value != "{":
+            raise Exception("Expected '{', but got %s" % body)
+
+        fields = []
+        while True:
+            field = self.parse_declaration()
+            fields.append(field)
+
+            separator = self.lexer.next()
+            if type(separator) != XDRTokenPunctuation and separator.value != ";":
+                raise Exception("Expected ';', but got %s" % separator)
+
+            end = self.lexer.peek()
+            if type(end) == XDRTokenPunctuation and end.value == "}":
+                break
+
+        # discard the '}' we peeked at to end the loop
+        _ = self.lexer.next()
+        return XDRStructBody(fields)
+
+    def parse_type_struct(self):
+        body = self.parse_struct_body()
+        return XDRTypeStruct(body)
+
+    def parse_union_body(self):
+        ident = self.lexer.next()
+        if type(ident) != XDRTokenIdentifier or ident.value != "switch":
+            raise Exception("Expected 'switch', but got %s" % ident)
+
+        bracket = self.lexer.next()
+        if type(bracket) != XDRTokenPunctuation or bracket.value != "(":
+            raise Exception("Expected '(', but got %s" % bracket)
+
+        discriminator = self.parse_declaration()
+
+        bracket = self.lexer.next()
+        if type(bracket) != XDRTokenPunctuation or bracket.value != ")":
+            raise Exception("Expected ')', but got %s" % bracket)
+
+        bracket = self.lexer.next()
+        if type(bracket) != XDRTokenPunctuation or bracket.value != "{":
+            raise Exception("Expected '{', but got %s" % bracket)
+
+        default = None
+        cases = []
+        while True:
+            ident = self.lexer.next()
+            if type(ident) != XDRTokenIdentifier or ident.value not in [
+                "default",
+                "case",
+            ]:
+                raise Exception("Expected 'default' or 'case', but got %s" % ident)
+
+            value = None
+            if ident.value == "case":
+                value = self.lexer.next()
+                if type(value) not in [XDRTokenConstant, XDRTokenIdentifier]:
+                    raise Exception("Expected constant, but got %s" % value)
+
+                sep = self.lexer.next()
+                if type(sep) != XDRTokenPunctuation or sep.value != ":":
+                    raise Exception("Expected ':', but got %s" % value)
+
+                decl = self.parse_declaration()
+
+                case = XDRUnionCase(value.value, decl)
+                cases.append(case)
+            else:
+                if default is not None:
+                    raise Exception("Duplicate 'default' clause")
+
+                sep = self.lexer.next()
+                if type(sep) != XDRTokenPunctuation or sep.value != ":":
+                    raise Exception("Expected ':', but got %s" % value)
+
+                default = self.parse_declaration()
+
+            separator = self.lexer.next()
+            if type(separator) != XDRTokenPunctuation and separator.value != ";":
+                raise Exception("Expected ';', but got %s" % bracket)
+
+            end = self.lexer.peek()
+            if type(end) == XDRTokenPunctuation and end.value == "}":
+                break
+
+        # discard the '}' we peeked at to end the loop
+        _ = self.lexer.next()
+        return XDRUnionBody(discriminator, cases, default)
+
+    def parse_type_union(self):
+        body = self.parse_union_body()
+        return XDRTypeUnion(body)
diff --git a/scripts/rpcgen/tests/meson.build b/scripts/rpcgen/tests/meson.build
index 9162412d31..4b1ea308ce 100644
--- a/scripts/rpcgen/tests/meson.build
+++ b/scripts/rpcgen/tests/meson.build
@@ -1,3 +1,4 @@
 rpcgen_tests = files([
     'test_lexer.py',
+    'test_parser.py',
 ])
diff --git a/scripts/rpcgen/tests/test_parser.py b/scripts/rpcgen/tests/test_parser.py
new file mode 100644
index 0000000000..8527b8d6e2
--- /dev/null
+++ b/scripts/rpcgen/tests/test_parser.py
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+from pathlib import Path
+
+from rpcgen.ast import (
+    XDRSpecification,
+    XDRDefinitionConstant,
+    XDRDefinitionEnum,
+    XDRDefinitionUnion,
+    XDRDefinitionStruct,
+    XDRDeclarationScalar,
+    XDRDeclarationVariableArray,
+    XDREnumValue,
+    XDREnumBody,
+    XDRStructBody,
+    XDRUnionCase,
+    XDRUnionBody,
+    XDRTypeCustom,
+    XDRTypeVoid,
+    XDRTypeString,
+    XDRTypeOpaque,
+)
+from rpcgen.parser import XDRParser
+
+
+def test_parser():
+    p = Path(Path(__file__).parent, "simple.x")
+    with p.open("r") as fp:
+        parser = XDRParser(fp)
+
+        got = parser.parse()
+
+    enum = XDRDefinitionEnum(
+        "filekind",
+        XDREnumBody(
+            [
+                XDREnumValue("TEXT", "0"),
+                XDREnumValue("DATA", "1"),
+                XDREnumValue("EXEC", "2"),
+            ],
+        ),
+    )
+
+    union = XDRDefinitionUnion(
+        "filetype",
+        XDRUnionBody(
+            XDRDeclarationScalar(XDRTypeCustom("filekind", enum), "kind"),
+            [
+                XDRUnionCase("TEXT", XDRDeclarationScalar(XDRTypeVoid(), None)),
+                XDRUnionCase(
+                    "DATA",
+                    XDRDeclarationVariableArray(
+                        XDRTypeString(), "creator", "MAXNAMELEN"
+                    ),
+                ),
+                XDRUnionCase(
+                    "EXEC",
+                    XDRDeclarationVariableArray(
+                        XDRTypeString(), "interpretor", "MAXNAMELEN"
+                    ),
+                ),
+            ],
+            None,
+        ),
+    )
+
+    struct = XDRDefinitionStruct(
+        "file",
+        XDRStructBody(
+            [
+                XDRDeclarationVariableArray(XDRTypeString(), "filename", "MAXNAMELEN"),
+                XDRDeclarationScalar(XDRTypeCustom("filetype", union), "type"),
+                XDRDeclarationVariableArray(XDRTypeString(), "owner", "MAXUSERNAME"),
+                XDRDeclarationVariableArray(XDRTypeOpaque(), "data", "MAXFILELEN"),
+            ]
+        ),
+    )
+
+    want = XDRSpecification()
+    want.definitions.extend(
+        [
+            XDRDefinitionConstant("MAXUSERNAME", "32"),
+            XDRDefinitionConstant("MAXFILELEN", "65535"),
+            XDRDefinitionConstant("MAXNAMELEN", "255"),
+            enum,
+            union,
+            struct,
+        ]
+    )
+
+    assert str(got) == str(want)
-- 
2.39.1



More information about the libvir-list mailing list