[libvirt PATCH] [RFC] scripts: Check spelling

Tim Wiederhake twiederh at redhat.com
Fri Oct 1 14:00:18 UTC 2021


This is a wrapper for codespell [1], a spell checker for source code.
Codespell does not compare words to a dictionary, but rather works by
checking words against a list of common typos, making it produce fewer
false positives than other solutions.

The script in this patch works around the lack of per-directory ignore
lists and some oddities regarding capitalization in ignore lists.

[1] (https://github.com/codespell-project/codespell/)

RFC:
Is there interest in having something like this in CI?
Examples of spelling mistakes that were found using codespell:
4ad3c95f4bef5c7c9657de470fb74a4d14c8a331,
785a11cec8693de7df024aae68975dd1799b646a,
1452317b5c727eb17178942012f57f0c37631ae4.

Signed-off-by: Tim Wiederhake <twiederh at redhat.com>
---
 scripts/check-spelling.py | 115 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100755 scripts/check-spelling.py

diff --git a/scripts/check-spelling.py b/scripts/check-spelling.py
new file mode 100755
index 0000000000..01371c0d1e
--- /dev/null
+++ b/scripts/check-spelling.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+import subprocess
+import os
+
+
+IGNORE_LIST = [
+    # ignore all translation files
+    ("/po/", []),
+
+    # ignore this script
+    ("/scripts/check-spelling.py", []),
+
+    # 3rd-party: keycodemapdb
+    ("/src/keycodemapdb/", []),
+
+    # 3rd-party: VirtualBox SDK
+    ("/src/vbox/vbox_CAPI", [
+        "aAdd",
+        "aCount",
+        "aLocation",
+        "aNumber",
+        "aParent",
+        "progess"]),
+
+    # 3rd-party: qemu
+    ("/tests/qemucapabilitiesdata/caps_", "encyption"),
+
+    # other
+    ("/", ["msdos", "MSDOS", "wan", "WAN", "hda", "HDA", "inout"]),
+    ("/NEWS.rst", ["crashers"]),
+    ("/docs/gitdm/companies/others", "Archiv"),
+    ("/docs/glib-adoption.rst", ["preferrable"]),
+    ("/docs/js/main.js", "whats"),
+    ("/examples/polkit/libvirt-acl.rules", ["userA", "userB", "userC"]),
+    ("/src/libvirt-domain.c", "PTD"),
+    ("/src/libxl/libxl_logger.c", ["purposedly"]),
+    ("/src/nwfilter/nwfilter_dhcpsnoop.c", "ether"),
+    ("/src/nwfilter/nwfilter_ebiptables_driver.c", "parm"),
+    ("/src/nwfilter/nwfilter_learnipaddr.c", "ether"),
+    ("/src/qemu/qemu_agent.c", "crypted"),
+    ("/src/qemu/qemu_agent.h", "crypted"),
+    ("/src/security/apparmor/libvirt-lxc", "devic"),
+    ("/src/security/apparmor/libvirt-qemu", "readby"),
+    ("/src/storage_file/storage_file_probe.c", "conectix"),
+    ("/src/util/virnetdevmacvlan.c", "calld"),
+    ("/src/util/virtpm.c", "parm"),
+    ("/tests/qemuagenttest.c", "IST"),
+    ("/tests/storagepoolxml2xml", "cant"),
+    ("/tests/sysinfodata/", ["sie"]),
+    ("/tests/testutils.c", ["nIn"]),
+    ("/tests/vircgroupdata/ovirt-node-6.6.mounts", "hald"),
+    ("/tests/virhostcpudata/", ["sie"]),
+    ("/tools/virt-host-validate-common.c", ["sie"]),
+]
+
+
+def check_spelling(directory):
+    """Returns list of tuple(filename, line number, word, suggestion)."""
+    process = subprocess.run(
+        ["codespell", directory],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True)
+
+    if process.returncode not in (0, 65):
+        exit("error: unexpected returncode %s" % process.returncode)
+
+    if process.stderr:
+        exit("error: unexpected output to stderr: \"%s\"" % process.stderr)
+
+    line_pattern = re.compile("^(.*):(.*): (.*) ==> (.*)$")
+    for line in process.stdout.split("\n"):
+        line = line.strip().replace(directory, "")
+        if not line:
+            continue
+        match = line_pattern.match(line)
+        if not match:
+            exit("error: unexpected line: \"%s\"" % line)
+        yield match.groups()
+
+
+def ignore(filename, linenumber, word, suggestion):
+    # Ignore abbreviations and ad-hoc variable names
+    if len(word) <= 2:
+        return True
+
+    for f, w in IGNORE_LIST:
+        if not filename.startswith(f):
+            continue
+        if word in w or not w:
+            return True
+    return False
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Check spelling")
+    parser.add_argument(
+        "dir",
+        help="Path to source directory",
+        type=os.path.realpath)
+    args = parser.parse_args()
+
+    findings = [f for f in check_spelling(args.dir) if not ignore(*f)]
+    if findings:
+        template = "(\"{0}\", \"{2}\"),\t# line {1}, \"{3}\"?"
+        for finding in findings:
+            print(template.format(*finding))
+        exit("error: %s spelling errors" % len(findings))
+
+
+if __name__ == "__main__":
+    main()
-- 
2.31.1




More information about the libvir-list mailing list