[Cluster-devel] [PATCH dlm-tool 13/14] dlm_controld: plock log lock state

Andreas Gruenbacher agruenba at redhat.com
Fri Mar 3 15:52:55 UTC 2023


Now, let me get to the core of the matter.  We've been talking about
using user-space (SDT) trace points for collecting the data, and I still
think that that's what we should do instead of introducing a new
dlm_controld log file.  In the dlm_controld code, this would look like
the below patch.

Note that <sys/sdt.h> is part of the systemtap-sdt-devel package, so a
"BuildRequires: systemtap-sdt-devel" dependency will be needed in
dlm.spec.

With that, we can use standard tools like perf, bpftrace, etc. for
collecting all the relevant information without any further
modifications to dlm_controld.  We can also collect additional kernel
and user-space trace point data at the same time with very little
additional effort.

For example, here is how to register the four plock dlm_controld trace
points in perf:

  for ev in \
      sdt_dlm_controld:plock_lock_begin \
      sdt_dlm_controld:plock_lock_end \
      sdt_dlm_controld:plock_wait_begin \
      sdt_dlm_controld:plock_wait_end; do \
    perf probe -x /usr/sbin/dlm_controld $ev; \
  done

The events can then be recorded with "perf record":

  perf record \
    -e sdt_dlm_controld:plock_lock_begin \
    -e sdt_dlm_controld:plock_lock_end \
    -e sdt_dlm_controld:plock_wait_begin \
    -e sdt_dlm_controld:plock_wait_end \
    [...]

We've already gone through how the resulting log can be processed with
"perf script".  One possible result would be the log file format that
lockdb_plot expects, but there are countless other possibilities.

Other useful "tricks":

  $ bpftrace -l 'usdt:/usr/sbin/dlm_controld:*'

  $ readelf -n /usr/sbin/dlm_controld | sed -ne '/\.note\.stapsdt/,/^$/p'


Thanks,
Andreas

-- 
 dlm_controld/plock.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/dlm_controld/plock.c b/dlm_controld/plock.c
index 39bdd1f6..588bcaaa 100644
--- a/dlm_controld/plock.c
+++ b/dlm_controld/plock.c
@@ -8,6 +8,7 @@
 
 #include "dlm_daemon.h"
 #include <linux/dlm_plock.h>
+#include <sys/sdt.h>
 
 /* FIXME: remove this once everyone is using the version of
  * dlm_plock.h which defines it */
@@ -211,6 +212,11 @@ static uint64_t dt_usec(const struct timeval *start, const struct timeval *stop)
 static void plock_print_start_waiter(const struct lockspace *ls,
 				     struct lock_waiter *w)
 {
+	const struct dlm_plock_info *info = &w->info;
+
+	DTRACE_PROBE7(dlm_controld, plock_wait_begin, info->number, w, info->start,
+		      info->end, info->nodeid, info->pid, info->owner);
+
 	log_plock(ls, "state waiter start %llx %p %llx-%llx %d/%u/%llx",
 		  (unsigned long long)w->info.number,
 		  w,
@@ -223,6 +229,10 @@ static void plock_print_start_waiter(const struct lockspace *ls,
 static void plock_print_end_waiter(const struct lockspace *ls,
 				   const struct lock_waiter *w)
 {
+	const struct dlm_plock_info *info = &w->info;
+
+	DTRACE_PROBE2(dlm_controld, plock_wait_end, info->number, w);
+
 	log_plock(ls, "state waiter end %llx %p",
 		  (unsigned long long)w->info.number, w);
 }
@@ -230,6 +240,9 @@ static void plock_print_end_waiter(const struct lockspace *ls,
 static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
 				    const struct posix_lock *po)
 {
+	DTRACE_PROBE8(dlm_controld, plock_lock_begin, number, po, po->ex, po->start,
+		      po->end, po->nodeid, po->pid, po->owner);
+
 	log_plock(ls, "state plock start %llx %p %s %llx-%llx %d/%u/%llx",
 		  (unsigned long long)number,
 		  po,
@@ -243,6 +256,8 @@ static void plock_print_start_plock(const struct lockspace *ls, uint64_t number,
 static void plock_print_end_plock(const struct lockspace *ls, uint64_t number,
 				  const struct posix_lock *po)
 {
+	DTRACE_PROBE1(dlm_controld, pock_lock_end, po);
+
 	log_plock(ls, "state plock end %llx %p",
 		  (unsigned long long)number, po);
 }
-- 
2.39.0



More information about the Cluster-devel mailing list