[Ovirt-devel] [PATCH] Fix dbomatic state changes.
Ian Main
imain at redhat.com
Thu Jun 25 20:46:35 UTC 2009
This patch fixes state changes in dbomatic so that there is no
intermediate 'stopped' state after being unreachable or on startup
of dbomatic etc. Also fixes a number of smaller bugs around host
associations, state changes, etc.
Signed-off-by: Ian Main <imain at redhat.com>
---
src/db-omatic/db_omatic.rb | 85 +++++++++++++++++++++++++++++++-------------
1 files changed, 60 insertions(+), 25 deletions(-)
diff --git a/src/db-omatic/db_omatic.rb b/src/db-omatic/db_omatic.rb
index b3d5e73..1091333 100755
--- a/src/db-omatic/db_omatic.rb
+++ b/src/db-omatic/db_omatic.rb
@@ -113,6 +113,15 @@ class DbOmatic < Qpid::Qmf::Console
end
end
+ def set_vm_stopped(db_vm)
+ db_vm.host_id = nil
+ db_vm.memory_used = nil
+ db_vm.num_vcpus_used = nil
+ db_vm.needs_restart = nil
+ db_vm.vnc_port = nil
+ db_vm.state = Vm::STATE_STOPPED
+ end
+
def update_domain_state(domain, state_override = nil)
vm = Vm.find(:first, :conditions => [ "uuid = ?", domain['uuid'] ])
if vm == nil
@@ -190,12 +199,7 @@ class DbOmatic < Qpid::Qmf::Console
result = qmf_vm.undefine
if result.status == 0
@logger.info "Delete of VM #{vm.description} successful, syncing DB."
- vm.host_id = nil
- vm.memory_used = nil
- vm.num_vcpus_used = nil
- vm.state = Vm::STATE_STOPPED
- vm.needs_restart = nil
- vm.vnc_port = nil
+ set_vm_stopped(vm)
end
end
end
@@ -223,20 +227,37 @@ class DbOmatic < Qpid::Qmf::Console
host_info[:synced] = true
if state == Host::STATE_AVAILABLE
- # At this point we want to set all domains that are
- # unreachable to stopped. If a domain is indeed running
- # then dbomatic will see that and set it either before
- # or after. If the node was rebooted, the VMs will all
- # be gone and dbomatic won't see them so we need to set
- # them to stopped.
- db_vm = Vm.find(:all, :conditions => ["host_id = ? AND state = ?", db_host.id, Vm::STATE_UNREACHABLE])
- db_vm.each do |vm|
- @logger.info "Moving vm #{vm.description} in state #{vm.state} to state stopped."
- vm.state = Vm::STATE_STOPPED
- vm.save!
+ Thread.new do
+ @logger.info "#{host_info['hostname']} has moved to available, sleeping for updates to vms."
+ sleep(20)
+
+ # At this point we want to set all domains that are
+ # unreachable to stopped. We're using a thread here to
+ # sleep for 10 seconds outside of the main dbomatic loop.
+ # If after 10 seconds with this host up there are still
+ # domains set to 'unreachable', then we're going to guess
+ # the node rebooted and so the domains should be set to
+ # stopped.
+ @logger.info "Checking for dead VMs on newly available host #{host_info['hostname']}."
+
+ # Double check to make sure this host is still up.
+ begin
+ qmf_host = @session.object(:class => 'node', 'hostname' => host_info['hostname'])
+ if !qmf_host
+ @logger.info "Host #{host_info['hostname']} is not up after waiting 20 seconds, skipping dead VM check."
+ else
+ db_vm = Vm.find(:all, :conditions => ["host_id = ? AND state = ?", db_host.id, Vm::STATE_UNREACHABLE])
+ db_vm.each do |vm|
+ @logger.info "Moving vm #{vm.description} in state #{vm.state} to state stopped."
+ set_vm_stopped(vm)
+ vm.save!
+ end
+ end
+ rescue Exception => e # just log any errors here
+ @logger.info "Exception checking for dead VMs (could be normal): #{e.message}"
+ end
end
end
-
else
# FIXME: This would be a newly registered host. We could put it in the database.
@logger.info "Unknown host #{host_info['hostname']}, probably not registered yet??"
@@ -344,6 +365,7 @@ class DbOmatic < Qpid::Qmf::Console
end
def heartbeat(agent, timestamp)
+ puts "heartbeat from agent #{agent}"
return if agent == nil
synchronize do
bank_key = "#{agent.agent_bank}.#{agent.broker.broker_bank}"
@@ -376,6 +398,8 @@ class DbOmatic < Qpid::Qmf::Console
values[:timed_out] = true
end
end
+ bank_key = "#{agent.agent_bank}.#{agent.broker.broker_bank}"
+ @heartbeats.delete(bank_key)
end
# The opposite of above, this is called when an agent is alive and well and makes sure
@@ -415,11 +439,21 @@ class DbOmatic < Qpid::Qmf::Console
@logger.error "Error with closing all VM VNCs operation: #{e.message}"
end
- db_vm = Vm.find(:all)
- db_vm.each do |vm|
- @logger.info "Marking vm #{vm.description} as stopped."
- vm.state = Vm::STATE_STOPPED
- vm.save!
+ Thread.new do
+ sleep(20)
+
+ # At this point we want to set all domains that are
+ # unreachable to stopped. We're using a thread here to
+ # sleep for 20 seconds outside of the main dbomatic loop.
+ # After 20 seconds all domains should be synced and so
+ # anything that's still unreachable is marked as stopped because we
+ # haven't seen any instances of it.
+ db_vm = Vm.find(:all, :conditions => ["state = ?", Vm::STATE_UNREACHABLE])
+ db_vm.each do |vm|
+ @logger.info "After startup delay, VM #{vm.description} is still unreachable, setting to stopped."
+ set_vm_stopped(vm)
+ vm.save!
+ end
end
end
@@ -444,6 +478,7 @@ class DbOmatic < Qpid::Qmf::Console
# Get seconds from the epoch
t = Time.new.to_i
+ puts "going through heartbeats.."
@heartbeats.keys.each do | key |
agent, timestamp = @heartbeats[key]
@@ -451,11 +486,11 @@ class DbOmatic < Qpid::Qmf::Console
s = timestamp / 1000000000
delta = t - s
+ puts "Checking time delta for agent #{agent} - #{delta}"
+
if delta > 30
# No heartbeat for 30 seconds.. deal with dead/disconnected agent.
agent_disconnected(agent)
-
- @heartbeats.delete(key)
else
agent_connected(agent)
end
--
1.6.0.6
More information about the ovirt-devel
mailing list