[Linux-cluster] clurgmgrd refuses to die

Jie Gao J.Gao at isu.usyd.edu.au
Mon Jul 24 01:45:04 UTC 2006


And,

ps -eaf |grep rg
root     26354 26349  0 11:08 ?        00:00:00 /bin/sh /etc/rc6.d/K01rgmanager stop

> strace -p 26354
Process 26354 attached - interrupt to quit
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 30924
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(-1, 0x7fbfffd654, WNOHANG, NULL)  = -1 ECHILD (No child processes)
rt_sigreturn(0xffffffffffffffff)        = 0
rt_sigaction(SIGINT, {SIG_DFL}, {0x432ba0, [], SA_RESTORER, 0x3e1012e380}, 8) = 0
pipe([3, 4])                            = 0
rt_sigprocmask(SIG_BLOCK, [INT CHLD], [], 8) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2a95574470) =
30926
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigaction(SIGCHLD, {0x433e50, [], SA_RESTORER, 0x3e1012e380}, {0x433e50, [], SA_RESTORER,
0x3e1012e380}, 8) = 0
close(4)                                = 0
read(3, "25824\n", 128)                 = 6
read(3, "", 128)                        = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], WNOHANG, NULL) = 30926
wait4(-1, 0x7fbfffd404, WNOHANG, NULL)  = -1 ECHILD (No child processes)
rt_sigreturn(0xffffffffffffffff)        = 0
close(3)                                = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGINT, {0x432ba0, [], SA_RESTORER, 0x3e1012e380}, {SIG_DFL}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigaction(SIGINT, {SIG_DFL}, {0x432ba0, [], SA_RESTORER, 0x3e1012e380}, 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
stat("/bin/sleep", {st_mode=S_IFREG|0755, st_size=22040, ...}) = 0
access("/bin/sleep", X_OK)              = 0
rt_sigprocmask(SIG_BLOCK, [INT CHLD], [], 8) = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2a95574470) =
30927
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGINT, {0x432ba0, [], SA_RESTORER, 0x3e1012e380}, {SIG_DFL}, 8) = 0
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 30927
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
...



On Mon, 24 Jul 2006, Jie Gao wrote:

> Date: Mon, 24 Jul 2006 11:29:13 +1000 (EST)
> From: Jie Gao <J.Gao at isu.usyd.edu.au>
> Reply-To: linux clustering <linux-cluster at redhat.com>
> To: linux clustering <linux-cluster at redhat.com>
> Subject: [Linux-cluster] clurgmgrd refuses to die
>
> Hi All
>
> I have been having a persistent problem with shutting down a cluster node.
>
> I have a two-node cluster. If Node A starts first, there is no problem
> rebooting Node B at any time. But if I try to reboot Node A, it hangs
> while trying to kill "clurgmgrd":
>
> > ps -eaf |grep clurgmgrd
> 116:root     25824     1  0 10:45 ?        00:00:00 clurgmgrd
>
> > strace -f kill -TERM 25824
> execve("/bin/kill", ["kill", "-TERM", "25824"], [/* 28 vars */]) = 0
> uname({sys="Linux", node="mix", ...})   = 0
> brk(0)                                  = 0x503000
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95556000
> access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
> open("/etc/ld.so.cache", O_RDONLY)      = 3
> fstat(3, {st_mode=S_IFREG|0644, st_size=114663, ...}) = 0
> mmap(NULL, 114663, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a95557000
> close(3)                                = 0
> open("/lib64/tls/libc.so.6", O_RDONLY)  = 3
> read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\305\21"..., 832) = 832
> fstat(3, {st_mode=S_IFREG|0755, st_size=1493186, ...}) = 0
> mmap(0x3e10100000, 2310056, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3e10100000
> mprotect(0x3e1022b000, 1085352, PROT_NONE) = 0
> mmap(0x3e1032a000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12a000) =
> 0x3e1032a000
> mmap(0x3e10330000, 16296, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3e10330000
> close(3)                                = 0
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95573000
> mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95574000
> mprotect(0x3e1032a000, 12288, PROT_READ) = 0
> arch_prctl(ARCH_SET_FS, 0x2a95573b00)   = 0
> munmap(0x2a95557000, 114663)            = 0
> open("/usr/lib/locale/locale-archive", O_RDONLY) = 3
> fstat(3, {st_mode=S_IFREG|0644, st_size=48516832, ...}) = 0
> mmap(NULL, 48516832, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a95575000
> close(3)                                = 0
> brk(0)                                  = 0x503000
> brk(0x524000)                           = 0x524000
> kill(25824, SIGTERM)                    = 0
> exit_group(0)                           = ?
> Process 28578 detached
>
> > ps -eaf |grep clurgmgrd
> 116:root     25824     1  0 10:45 ?        00:00:00 clurgmgrd
>
> > strace -p 25824
> Process 25824 attached - interrupt to quit
> select(7, [4 5 6], NULL, NULL, {7, 735000}) = 0 (Timeout)
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
> read(9, "\1\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\0\0\0\0", 20) = 20
> close(9)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\3\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\31\0\0\0/cluster/@"..., 45) = 45
> read(9, "\3\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\3\0\0\0", 20) = 20
> read(9, "30\0", 3)                      = 3
> close(9)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\2\0\0\0\0\0\0\0\210\35\0\0\0\0\0\0\0\0\0\0", 20) = 20
> read(9, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
> close(9)                                = 0
> select(7, [6], [6], NULL, {0, 0})       = 0 (Timeout)
> select(6, [5], [5], NULL, {0, 0})       = 0 (Timeout)
> select(7, [4 5 6], NULL, NULL, {10, 0}) = 0 (Timeout)
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
> read(9, "\1\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\0\0\0\0", 20) = 20
> close(9)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\3\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\31\0\0\0/cluster/@"..., 45) = 45
> read(9, "\3\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\3\0\0\0", 20) = 20
> read(9, "30\0", 3)                      = 3
> close(9)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\2\0\0\0\0\0\0\0\246\35\0\0\0\0\0\0\0\0\0\0", 20) = 20
> read(9, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
> close(9)                                = 0
> select(7, [6], [6], NULL, {0, 0})       = 0 (Timeout)
> select(6, [5], [5], NULL, {0, 0})       = 0 (Timeout)
> select(7, [4 5 6], NULL, NULL, {10, 0}) = 0 (Timeout)
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
> read(9, "\1\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\0\0\0\0", 20) = 20
> close(9)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\3\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\31\0\0\0/cluster/@"..., 45) = 45
> read(9, "\3\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\3\0\0\0", 20) = 20
> read(9, "30\0", 3)                      = 3
> close(9)                                = 0
> socket(PF_FILE, SOCK_STREAM, 0)         = 9
> connect(9, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
> write(9, "\2\0\0\0\0\0\0\0\304\35\0\0\0\0\0\0\0\0\0\0", 20) = 20
> read(9, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
> close(9)                                = 0
> ...
>
>
> What is clurgmgrd exactly trying to do?
>
> Regards,
>
>
> Jie
>
> --
> Linux-cluster mailing list
> Linux-cluster at redhat.com
> https://www.redhat.com/mailman/listinfo/linux-cluster
>




More information about the Linux-cluster mailing list