[Ovirt-devel] [PATCH] Fix dbomatic state changes.
Ian Main
imain at redhat.com
Fri Jun 26 19:30:46 UTC 2009
This patch fixes state changes in dbomatic so that there is no
intermediate 'stopped' state after being unreachable or on startup
of dbomatic etc. Also fixes a number of smaller bugs around host
associations, state changes, etc.
Signed-off-by: Ian Main <imain at redhat.com>
---
src/db-omatic/db_omatic.rb | 90 ++++++++++++++++++++++++++++++++-----------
1 files changed, 67 insertions(+), 23 deletions(-)
diff --git a/src/db-omatic/db_omatic.rb b/src/db-omatic/db_omatic.rb
index b3d5e73..155ff5e 100755
--- a/src/db-omatic/db_omatic.rb
+++ b/src/db-omatic/db_omatic.rb
@@ -113,6 +113,15 @@ class DbOmatic < Qpid::Qmf::Console
end
end
+ def set_vm_stopped(db_vm)
+ db_vm.host_id = nil
+ db_vm.memory_used = nil
+ db_vm.num_vcpus_used = nil
+ db_vm.needs_restart = nil
+ db_vm.vnc_port = nil
+ db_vm.state = Vm::STATE_STOPPED
+ end
+
def update_domain_state(domain, state_override = nil)
vm = Vm.find(:first, :conditions => [ "uuid = ?", domain['uuid'] ])
if vm == nil
@@ -190,12 +199,7 @@ class DbOmatic < Qpid::Qmf::Console
result = qmf_vm.undefine
if result.status == 0
@logger.info "Delete of VM #{vm.description} successful, syncing DB."
- vm.host_id = nil
- vm.memory_used = nil
- vm.num_vcpus_used = nil
- vm.state = Vm::STATE_STOPPED
- vm.needs_restart = nil
- vm.vnc_port = nil
+ set_vm_stopped(vm)
end
end
end
@@ -223,20 +227,37 @@ class DbOmatic < Qpid::Qmf::Console
host_info[:synced] = true
if state == Host::STATE_AVAILABLE
- # At this point we want to set all domains that are
- # unreachable to stopped. If a domain is indeed running
- # then dbomatic will see that and set it either before
- # or after. If the node was rebooted, the VMs will all
- # be gone and dbomatic won't see them so we need to set
- # them to stopped.
- db_vm = Vm.find(:all, :conditions => ["host_id = ? AND state = ?", db_host.id, Vm::STATE_UNREACHABLE])
- db_vm.each do |vm|
- @logger.info "Moving vm #{vm.description} in state #{vm.state} to state stopped."
- vm.state = Vm::STATE_STOPPED
- vm.save!
+ Thread.new do
+ @logger.info "#{host_info['hostname']} has moved to available, sleeping for updates to vms."
+ sleep(20)
+
+ # At this point we want to set all domains that are
+ # unreachable to stopped. We're using a thread here to
+ # sleep for 10 seconds outside of the main dbomatic loop.
+ # If after 10 seconds with this host up there are still
+ # domains set to 'unreachable', then we're going to guess
+ # the node rebooted and so the domains should be set to
+ # stopped.
+ @logger.info "Checking for dead VMs on newly available host #{host_info['hostname']}."
+
+ # Double check to make sure this host is still up.
+ begin
+ qmf_host = @session.object(:class => 'node', 'hostname' => host_info['hostname'])
+ if !qmf_host
+ @logger.info "Host #{host_info['hostname']} is not up after waiting 20 seconds, skipping dead VM check."
+ else
+ db_vm = Vm.find(:all, :conditions => ["host_id = ? AND state = ?", db_host.id, Vm::STATE_UNREACHABLE])
+ db_vm.each do |vm|
+ @logger.info "Moving vm #{vm.description} in state #{vm.state} to state stopped."
+ set_vm_stopped(vm)
+ vm.save!
+ end
+ end
+ rescue Exception => e # just log any errors here
+ @logger.info "Exception checking for dead VMs (could be normal): #{e.message}"
+ end
end
end
-
else
# FIXME: This would be a newly registered host. We could put it in the database.
@logger.info "Unknown host #{host_info['hostname']}, probably not registered yet??"
@@ -344,6 +365,7 @@ class DbOmatic < Qpid::Qmf::Console
end
def heartbeat(agent, timestamp)
+ puts "heartbeat from agent #{agent}"
return if agent == nil
synchronize do
bank_key = "#{agent.agent_bank}.#{agent.broker.broker_bank}"
@@ -376,6 +398,8 @@ class DbOmatic < Qpid::Qmf::Console
values[:timed_out] = true
end
end
+ bank_key = "#{agent.agent_bank}.#{agent.broker.broker_bank}"
+ @heartbeats.delete(bank_key)
end
# The opposite of above, this is called when an agent is alive and well and makes sure
@@ -415,11 +439,30 @@ class DbOmatic < Qpid::Qmf::Console
@logger.error "Error with closing all VM VNCs operation: #{e.message}"
end
+ # On startup, since we don't know the previous states of anything, we basically
+ # do a big sync up with teh states of all VMs. We don't worry about hosts since
+ # they are very simple and are either up or down, but it's possible we left
+ # VMs in various states that are no longer applicable to this moment.
db_vm = Vm.find(:all)
db_vm.each do |vm|
- @logger.info "Marking vm #{vm.description} as stopped."
- vm.state = Vm::STATE_STOPPED
- vm.save!
+ set_stopped = false
+ # Basically here we are looking for VMs which are not up in some form or another and setting
+ # them to stopped. VMs that exist as QMF objects will get set appropriately when the objects
+ # appear on the bus.
+ begin
+ qmf_vm = @session.object(:class => 'domain', 'uuid' => db_vm.uuid)
+ if qmf_vm == nil
+ set_stopped = true
+ end
+ rescue Exception => ex
+ set_stopped = true
+ end
+
+ if set_stopped
+ @logger.info "On startup, VM #{vm.description} is not found, setting to stopped."
+ set_vm_stopped(vm)
+ vm.save!
+ end
end
end
@@ -444,6 +487,7 @@ class DbOmatic < Qpid::Qmf::Console
# Get seconds from the epoch
t = Time.new.to_i
+ puts "going through heartbeats.."
@heartbeats.keys.each do | key |
agent, timestamp = @heartbeats[key]
@@ -451,11 +495,11 @@ class DbOmatic < Qpid::Qmf::Console
s = timestamp / 1000000000
delta = t - s
+ puts "Checking time delta for agent #{agent} - #{delta}"
+
if delta > 30
# No heartbeat for 30 seconds.. deal with dead/disconnected agent.
agent_disconnected(agent)
-
- @heartbeats.delete(key)
else
agent_connected(agent)
end
--
1.6.0.6
More information about the ovirt-devel
mailing list