[Ovirt-devel] oVirt Nodes become unavailable
Василец Дмитрий
pronix.service at gmail.com
Fri Jan 30 20:07:24 UTC 2009
this is because in db not actual data
apply this patch and restart db-omatic
diff --git a/src/db-omatic/db_omatic.rb b/src/db-omatic/db_omatic.rb
index 4afffb1..c499610 100755
--- a/src/db-omatic/db_omatic.rb
+++ b/src/db-omatic/db_omatic.rb
@@ -74,6 +74,64 @@ class DbOmatic < Qpid::Qmf::Console
domain[:synced] = true
end
+ #find hostname from values['node'] where values['class_type'] ==
'domain'
+ def get_host_id(abank,bbank)
+ begin
+ @cached_objects.keys.each do |objkey|
+ if @cached_objects[objkey][:agent_bank].to_s == abank and
@cached_objects[objkey][:broker_bank].to_s == bbank and
@cached_objects[objkey][:class_type].to_s == 'node'
+ return Host.find(:first, :conditions => ['hostname =
?', at cached_objects[objkey]["hostname"].to_s]).id
+ break
+ end
+ end
+ rescue => ex
+ log("error in get_host_id")
+ log(ex)
+ end
+ end
+
+ def set_host(values,digit)
+ begin
+ vm = Vm.find(:first, :conditions => ['description =
?',values["name"].to_s])
+ if vm and digit
+ vm.host_id = digit
+ vm.save!
+ else
+ log("this vm not exist #{values["name"]}")
+ end
+ rescue => ex
+ puts "error when set_host for #{values["name"]}"
+ puts ex
+ end
+ end
+
+ def start_crashed_vm(vm)
+ task = VmTask.new( :user => 'db-omatic', :task_target => vm, :action
=> 'start_vm', :state => 'queued')
+ task.save!
+ log("set task for start crashed vm #{vm.id}")
+ end
+
+ def set_domain_stopped(domain)
+ begin
+ vm = Vm.find(:first, :conditions => ['uuid = ?', domain['uuid']])
+ if vm != nil
+ curstate = vm.state
+ vm.state = Vm::STATE_STOPPED
+ vm.host_id = nil
+ vm.save
+ domain['state'] = 'crashed' # and now i will use ipmi for reboot
anavailable host - simple fencing
+ # if curstate == Vm::STATE_RUNNING and vm.ha # vm.ha true or false
+ # start_crashed_vm(vm)
+ # end
+ else
+ log('vm == nil ')
+ end
+ log("domain #{domain['id']} already stopped")
+ rescue => ex
+ log("can\'t set domain #{domain['id']} stopped")
+ log(ex)
+ end
+ end
+
def update_host_state(host_info, state)
db_host = Host.find(:first, :conditions => [ "hostname = ?",
host_info['hostname'] ])
if db_host
@@ -131,6 +189,7 @@ class DbOmatic < Qpid::Qmf::Console
domain_state_change = false
+ change_node = false
obj.properties.each do |key, newval|
if values[key.to_s] != newval
values[key.to_s] = newval
@@ -138,12 +197,30 @@ class DbOmatic < Qpid::Qmf::Console
if type == "domain" and key.to_s == "state"
domain_state_change = true
end
+ if type == "domain" and key.to_s == "node"
+ change_node = true
+ end
+
+
end
end
if domain_state_change
update_domain_state(values)
end
+ if change_node
+ values.each do |key,val|
+ if key == 'state' and val == 'running'
+ abank = values['node'].to_s.split('-')[3]
+ bbank = values['node'].to_s.split('-')[4]
+ @@host_id = get_host_id(abank,bbank)
+ set_host(values,@@host_id)
+ log("update node data for #{values['name']}")
+ break
+ end
+ end
+ end
+
if new_object
if type == "node"
@@ -187,11 +264,6 @@ class DbOmatic < Qpid::Qmf::Console
end
end
-
- def del_agent(agent)
- agent_disconnected(agent)
- end
-
# This method marks objects associated with the given agent as timed
out/invalid. Called either
# when the agent heartbeats out, or we get a del_agent callback.
def agent_disconnected(agent)
@@ -205,8 +277,10 @@ class DbOmatic < Qpid::Qmf::Console
if values[:class_type] == 'node'
update_host_state(values, Host::STATE_UNAVAILABLE)
elsif values[:class_type] == 'domain'
- update_domain_state(values, Vm::STATE_UNREACHABLE)
- end
+ set_domain_stopped(values)
+ values[:timed_out] = true
+ @cached_objects.delete(objkey)
+ end
end
values[:timed_out] = true
end
@@ -248,6 +322,7 @@ class DbOmatic < Qpid::Qmf::Console
db_vm = Vm.find(:all)
db_vm.each do |vm|
log "Marking vm #{vm.description} as stopped."
+ vm.host_id = nil
vm.state = Vm::STATE_STOPPED
vm.save
end
2009/1/30 Hugh O. Brock <hbrock at redhat.com>
> On Fri, Jan 30, 2009 at 01:24:36PM -0600, Carb, Brian A wrote:
> > date --utc on all nodes and the appliance shows that they are within a
> few seconds of each other.
> >
> > brian carb
> > unisys corporation - malvern, pa
> > brian.carb at unisys.com
> >
> > -----Original Message-----
> > From: Perry Myers [mailto:pmyers at redhat.com]
> > Sent: Friday, January 30, 2009 2:19 PM
> > To: Carb, Brian A
> > Cc: ovirt-devel at redhat.com; Ian Main
> > Subject: Re: [Ovirt-devel] oVirt Nodes become unavailable
> >
> > Carb, Brian A wrote:
> > > running oVirt 0.96 on fedora10...
> > >
> > > I noticed that after some time has elapsed (machines sitting idle
> overnight), the oVirt dashboard shows 2 of my 4 nodes as
> "unavailable(enabled)". I can access them via their consoles though, so the
> machines are up. Restarting the browser session does not change this. If I
> shutdown and then restart the oVirt server appliance, all 4 show as
> unavailable. Do I have to do something to make the nodes available (short of
> restarting them)?
> > >
> > > Any ideas? Thanks.
> >
> > Really quick thing.... check timestamps on each Node and the appliance
> (date --utc)
> >
> > If they're not within a few seconds of each other then kerberos gets
> unhappy... If that is the case then our NTP setup is not working which
> needs to be fixed.
> >
> > If they are in sync then Ian should be able to help
> >
> > Perry
>
> Hmm... sounds like it could be a qpid or host-browser problem. Ian?
>
> --Hugh
>
> _______________________________________________
> Ovirt-devel mailing list
> Ovirt-devel at redhat.com
> https://www.redhat.com/mailman/listinfo/ovirt-devel
>
--
С уважением, Дмитрий.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/ovirt-devel/attachments/20090130/db45980c/attachment.htm>
More information about the ovirt-devel
mailing list