[Linux-cluster] Fenceing Daemon Issues

Tristram Cheer tristram at ubernet.co.nz
Tue Jan 31 08:11:30 UTC 2006


Hi All,

Once again i seem to be having a blonde moment with our dev cluster, 
after a full nuke of it i'm getting it going again and i've come across 
a issue when tring to mount a gfs volume

root at asimov:~# mount -t gfs /dev/gnbd/shared /shared/
Lock_Harness <CVS> (built Jan 25 2006 17:22:25) installed
GFS <CVS> (built Jan 25 2006 17:22:54) installed
GFS: Trying to join cluster "lock_dlm", "ubernet:shared"
Lock_DLM (built Jan 25 2006 17:22:29) installed
lock_dlm: fence domain not found; check fenced
GFS: can't mount proto = lock_dlm, table = ubernet:shared, hostdata =
mount: permission denied

when i try and run fence from init.d i get this

fence_tool: waiting for fence domain run state

here is cat /proc/cluster/services

root at asimov:~# cat /proc/cluster/services
Service          Name                              GID LID State     Code
Fence Domain:    "default"                           0   3 join      
S-1,80,2
[]

User:            "usrm::manager"                     1   1 run       -
[1 2]


heres and strace of fence_tool join -D

execve("/usr/sbin/fence_tool", ["fence_tool", "join"], [/* 16 vars */]) = 0
uname({sys="Linux", node="asimov", ...}) = 0
brk(0)                                  = 0x804d000
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
-1, 0) = 0xb7faa000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
old_mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
-1, 0) = 0xb7fa8000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or 
directory)
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=9831, ...}) = 0
old_mmap(NULL, 9831, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7fa5000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libc.so.6", O_RDONLY)        = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\306S\1"..., 
512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=1131932, ...}) = 0
old_mmap(NULL, 1141908, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 
3, 0) = 0xb7e8e000
old_mmap(0xb7f9f000, 16384, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x110000) = 0xb7f9f000
old_mmap(0xb7fa3000, 7316, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7fa3000
close(3)                                = 0
munmap(0xb7fa5000, 9831)                = 0
socket(0x1e /* PF_??? */, SOCK_DGRAM, 3) = 3
ioctl(3, 0x780b, 0)                     = 1
ioctl(3, 0x7805, 0)                     = 1
ioctl(3, 0xc1187890, 0xbfebeec0)        = 0
close(3)                                = 0
brk(0)                                  = 0x804d000
brk(0x806e000)                          = 0x806e000
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 3
connect(3, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(3, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(3, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
close(3)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 3
connect(3, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(3, "\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(3, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
close(3)                                = 0
execve("/usr/local/sbin/fenced", ["fenced"], [/* 16 vars */]) = -1 
ENOENT (No such file or directory)
execve("/usr/local/bin/fenced", ["fenced"], [/* 16 vars */]) = -1 ENOENT 
(No such file or directory)
execve("/usr/sbin/fenced", ["fenced"], [/* 16 vars */]) = 0
uname({sys="Linux", node="asimov", ...}) = 0
brk(0)                                  = 0x8050000
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
-1, 0) = 0xb7f76000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
old_mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
-1, 0) = 0xb7f74000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or 
directory)
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=9831, ...}) = 0
old_mmap(NULL, 9831, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7f71000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libpthread.so.0", O_RDONLY)  = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\0@\0\000"..., 
512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=84199, ...}) = 0
old_mmap(NULL, 331364, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 
3, 0) = 0xb7f20000
old_mmap(0xb7f2e000, 4096, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe000) = 0xb7f2e000
old_mmap(0xb7f2f000, 269924, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f2f000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or 
directory)
open("/lib/libc.so.6", O_RDONLY)        = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\306S\1"..., 
512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=1131932, ...}) = 0
old_mmap(NULL, 1141908, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 
3, 0) = 0xb7e09000
old_mmap(0xb7f1a000, 16384, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x110000) = 0xb7f1a000
old_mmap(0xb7f1e000, 7316, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7f1e000
close(3)                                = 0
old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
-1, 0) = 0xb7e08000
munmap(0xb7f71000, 9831)                = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0
setrlimit(RLIMIT_STACK, {rlim_cur=2044*1024, rlim_max=RLIM_INFINITY}) = 0
getpid()                                = 5019
rt_sigaction(SIGRTMIN, {0xb7f27f85, [], 0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0xb7f2800e, [RTMIN], 0}, NULL, 8) = 0
rt_sigaction(SIGRT_2, {0xb7f28120, [], 0}, NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [RTMIN], NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RT_1], NULL, 8) = 0
_sysctl({{CTL_KERN, KERN_VERSION, 0, 20c61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 2, 0xbfb8ab88, 
32, (nil), 0}) = 0
socket(PF_FILE, SOCK_DGRAM, 0)          = 3
brk(0)                                  = 0x8050000
brk(0x8071000)                          = 0x8071000
socket(0x1e /* PF_??? */, SOCK_DGRAM, 3) = 4
ioctl(4, 0x780b, 0)                     = 1
ioctl(4, 0xc1187890, 0xbfb8ad90)        = 0
time(NULL)                              = 1138694819
sendto(3, "1138694819 our name from cman \"a"..., 39, 0, 
{sa_family=AF_FILE, path=@fenced_socket}, 16) = -1 ECONNREFUSED 
(Connection refused)
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(4, "\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0008\0\0\0/cluster/clu"..., 76) 
= 76
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\7\0\0\0", 20) = 20
read(4, "asimov\0", 7)                  = 7
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0#\0\0\0/cluster/fen"..., 55) = 55
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0", 20) = 20
read(4, "0\0", 2)                       = 2
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\'\0\0\0/cluster/fen"..., 59) = 59
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0", 20) = 20
read(4, "3\0", 2)                       = 2
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\'\0\0\0/cluster/fen"..., 59) = 59
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0\0\0", 20) = 20
read(4, "0\0", 2)                       = 2
close(4)                                = 0
time(NULL)                              = 1138694819
sendto(3, "1138694819 delay post_join 3s po"..., 43, 0, 
{sa_family=AF_FILE, path=@fenced_socket}, 16) = -1 ECONNREFUSED 
(Connection refused)
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\7\0\0\0", 20) = 20
read(4, "asimov\0", 7)                  = 7
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0\0\0", 20) = 20
read(4, "abdul\0", 6)                   = 6
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0", 20) = 20
read(4, "faye\0", 5)                    = 5
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0\0\0", 20) = 20
read(4, "maria\0", 6)                   = 6
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0", 20) = 20
read(4, "mao\0", 4)                     = 4
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\10\0\0\0", 20) = 20
read(4, "vincent\0", 8)                 = 8
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0", 20) = 20
read(4, "roco\0", 5)                    = 5
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\t\0\0\0", 20) = 20
read(4, "piccarro\0", 9)                = 9
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0+\0\0\0/cluster/clu"..., 63) = 63
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\7\0\0\0", 20) = 20
read(4, "edward\0", 7)                  = 7
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0,\0\0\0/cluster/clu"..., 64) = 64
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0\0\0", 20) = 20
read(4, "baker\0", 6)                   = 6
close(4)                                = 0
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\3\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0,\0\0\0/cluster/clu"..., 64) = 64
read(4, "\3\0\0\0\0\0\0\0\0\0\0\0\303\377\377\377\0\0\0\0", 20) = 20
close(4)                                = 0
time(NULL)                              = 1138694819
sendto(3, "1138694819 added 10 nodes from c"..., 35, 0, 
{sa_family=AF_FILE, path=@fenced_socket}, 16) = -1 ECONNREFUSED 
(Connection refused)
getpid()                                = 5019
socket(PF_FILE, SOCK_STREAM, 0)         = 4
connect(4, {sa_family=AF_FILE, path="/var/run/cluster/ccsd.sock"}, 110) = 0
write(4, "\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 20) = 20
read(4, "\2\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0\0\0\0\0", 20) = 20
close(4)                                = 0
fork()                                  = 5020
--- SIGCHLD (Child exited) @ 0 (0) ---
exit_group(0)                           = ?

This is running in a XEN 3.0 enviroment as an FYI

Does anyone have any pointers on what this could be?

Cheers

Tristram




More information about the Linux-cluster mailing list