[libvirt PATCH v3 1/3] scripts: Check spelling

Tim Wiederhake twiederh at redhat.com
Fri Jan 21 09:41:48 UTC 2022


This is a wrapper for codespell [1], a spell checker for source code.
Codespell does not compare words to a dictionary, but rather works by
checking words against a list of common typos, making it produce fewer
false positives than other solutions.

The script in this patch works around the lack of per-directory ignore
lists and some oddities regarding capitalization in ignore lists.
The ".codespellrc" file is used to coarsly filter out translation and
git files, as scanning those makes up for roughly 50% of the run time
otherwise.

[1] (https://github.com/codespell-project/codespell/)

Signed-off-by: Tim Wiederhake <twiederh at redhat.com>
---
 .codespellrc              |   2 +
 scripts/check-spelling.py | 135 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 .codespellrc
 create mode 100755 scripts/check-spelling.py

diff --git a/.codespellrc b/.codespellrc
new file mode 100644
index 0000000000..0c45be445b
--- /dev/null
+++ b/.codespellrc
@@ -0,0 +1,2 @@
+[codespell]
+skip = .git/,*.po
diff --git a/scripts/check-spelling.py b/scripts/check-spelling.py
new file mode 100755
index 0000000000..ce3e7d89f0
--- /dev/null
+++ b/scripts/check-spelling.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+import subprocess
+import os
+
+
+IGNORE_LIST = [
+    # ignore this script
+    ("scripts/check-spelling.py", []),
+
+    # 3rd-party: keycodemapdb
+    ("src/keycodemapdb/", []),
+
+    # 3rd-party: VirtualBox SDK
+    ("src/vbox/vbox_CAPI", []),
+
+    # 3rd-party: qemu
+    ("tests/qemucapabilitiesdata/caps_", []),
+
+    # other
+    ("", ["msdos", "MSDOS", "wan", "WAN", "hda", "HDA", "inout"]),
+    ("NEWS.rst", "crashers"),
+    ("docs/gitdm/companies/others", "Archiv"),
+    ("docs/glib-adoption.rst", "preferrable"),
+    ("docs/js/main.js", "whats"),
+    ("examples/polkit/libvirt-acl.rules", ["userA", "userB", "userC"]),
+    ("src/libvirt-domain.c", "PTD"),
+    ("src/libxl/libxl_logger.c", "purposedly"),
+    ("src/nwfilter/nwfilter_dhcpsnoop.c", "ether"),
+    ("src/nwfilter/nwfilter_ebiptables_driver.c", "parm"),
+    ("src/nwfilter/nwfilter_learnipaddr.c", "ether"),
+    ("src/qemu/qemu_agent.c", "crypted"),
+    ("src/qemu/qemu_agent.h", "crypted"),
+    ("src/qemu/qemu_process.c", "wee"),
+    ("src/security/apparmor/libvirt-lxc", "devic"),
+    ("src/security/apparmor/libvirt-qemu", "readby"),
+    ("src/storage_file/storage_file_probe.c", "conectix"),
+    ("src/util/virnetdevmacvlan.c", "calld"),
+    ("src/util/virtpm.c", "parm"),
+    ("tests/qemuagenttest.c", "IST"),
+    ("tests/storagepoolxml2xml", "cant"),
+    ("tests/sysinfodata/", "sie"),
+    ("tests/testutils.c", "nIn"),
+    ("tests/vircgroupdata/ovirt-node-6.6.mounts", "hald"),
+    ("tests/virhostcpudata/", "sie"),
+    ("tools/virt-host-validate-common.c", "sie"),
+]
+
+
+def ignore(filename, linenumber, word, suggestion):
+    if len(word) <= 2:
+        return True
+
+    for f, w in IGNORE_LIST:
+        if not filename.startswith(f):
+            continue
+        if word in w or not w:
+            return True
+    return False
+
+
+def main():
+    line_pattern = re.compile("^(.*):(.*): (.*) ==> (.*)$")
+    output_template = "(\"{0}\", \"{2}\"),\t# line {1}, \"{3}\"?"
+
+    parser = argparse.ArgumentParser(description="Check spelling")
+    parser.add_argument(
+        "dir",
+        help="Path to source directory. "
+        "Defaults to parent directory of this script",
+        type=os.path.realpath,
+        nargs='?')
+    parser.add_argument(
+        "-i",
+        "--ignore",
+        help="File to ignore. Can be specified more than once",
+        metavar="FILE",
+        default=list(),
+        action="append")
+    parser.add_argument(
+        "--ignore-untracked",
+        help="Ignore all files not tracked by git",
+        action="store_true")
+    args = parser.parse_args()
+
+    if not args.dir:
+        args.dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+
+    if args.ignore_untracked:
+        args.ignore.extend(subprocess.check_output(
+            ["git", "-C", args.dir, "ls-files", "--others"],
+            universal_newlines=True).split("\n"))
+
+    try:
+        process = subprocess.run(
+            [
+                "codespell",
+                args.dir,
+                "--config",
+                os.path.join(args.dir, ".codespellrc")],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True)
+    except FileNotFoundError:
+        exit("error: codespell not found")
+    if process.returncode not in (0, 65):
+        exit("error: unexpected returncode %s" % process.returncode)
+
+    if process.stderr:
+        exit("error: unexpected output to stderr: \"%s\"" % process.stderr)
+
+    findings = 0
+    for line in process.stdout.split("\n"):
+        line = line.strip().replace(args.dir, "").lstrip("/")
+        if not line:
+            continue
+
+        match = line_pattern.match(line)
+        if not match:
+            exit("error: unexpected line: \"%s\"" % line)
+
+        if match.group(1) in args.ignore or ignore(*match.groups()):
+            continue
+
+        print(output_template.format(*match.groups()))
+        findings += 1
+
+    if findings:
+        exit("error: %s spelling errors" % findings)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.31.1




More information about the libvir-list mailing list