[Ovirt-devel] [PATCH] Fix dbomatic state changes.

Ian Main imain at redhat.com
Fri Jun 26 19:30:46 UTC 2009


This patch fixes state changes in dbomatic so that there is no
intermediate 'stopped' state after being unreachable or on startup
of dbomatic etc.  Also fixes a number of smaller bugs around host
associations, state changes, etc.

Signed-off-by: Ian Main <imain at redhat.com>
---
 src/db-omatic/db_omatic.rb |   90 ++++++++++++++++++++++++++++++++-----------
 1 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/src/db-omatic/db_omatic.rb b/src/db-omatic/db_omatic.rb
index b3d5e73..155ff5e 100755
--- a/src/db-omatic/db_omatic.rb
+++ b/src/db-omatic/db_omatic.rb
@@ -113,6 +113,15 @@ class DbOmatic < Qpid::Qmf::Console
         end
     end
 
+    def set_vm_stopped(db_vm)
+        db_vm.host_id = nil
+        db_vm.memory_used = nil
+        db_vm.num_vcpus_used = nil
+        db_vm.needs_restart = nil
+        db_vm.vnc_port = nil
+        db_vm.state = Vm::STATE_STOPPED
+    end
+
     def update_domain_state(domain, state_override = nil)
         vm = Vm.find(:first, :conditions => [ "uuid = ?", domain['uuid'] ])
         if vm == nil
@@ -190,12 +199,7 @@ class DbOmatic < Qpid::Qmf::Console
                 result = qmf_vm.undefine
                 if result.status == 0
                     @logger.info "Delete of VM #{vm.description} successful, syncing DB."
-                    vm.host_id = nil
-                    vm.memory_used = nil
-                    vm.num_vcpus_used = nil
-                    vm.state = Vm::STATE_STOPPED
-                    vm.needs_restart = nil
-                    vm.vnc_port = nil
+                    set_vm_stopped(vm)
                 end
             end
         end
@@ -223,20 +227,37 @@ class DbOmatic < Qpid::Qmf::Console
             host_info[:synced] = true
 
             if state == Host::STATE_AVAILABLE
-                # At this point we want to set all domains that are
-                # unreachable to stopped.  If a domain is indeed running
-                # then dbomatic will see that and set it either before
-                # or after.  If the node was rebooted, the VMs will all
-                # be gone and dbomatic won't see them so we need to set
-                # them to stopped.
-                db_vm = Vm.find(:all, :conditions => ["host_id = ? AND state = ?", db_host.id, Vm::STATE_UNREACHABLE])
-                db_vm.each do |vm|
-                    @logger.info "Moving vm #{vm.description} in state #{vm.state} to state stopped."
-                    vm.state = Vm::STATE_STOPPED
-                    vm.save!
+                Thread.new do
+                    @logger.info "#{host_info['hostname']} has moved to available, sleeping for updates to vms."
+                    sleep(20)
+
+                    # At this point we want to set all domains that are
+                    # unreachable to stopped.  We're using a thread here to
+                    # sleep for 10 seconds outside of the main dbomatic loop.
+                    # If after 10 seconds with this host up there are still
+                    # domains set to 'unreachable', then we're going to guess
+                    # the node rebooted and so the domains should be set to
+                    # stopped.
+                    @logger.info "Checking for dead VMs on newly available host #{host_info['hostname']}."
+
+                    # Double check to make sure this host is still up.
+                    begin
+                        qmf_host = @session.object(:class => 'node', 'hostname' => host_info['hostname'])
+                        if !qmf_host
+                            @logger.info "Host #{host_info['hostname']} is not up after waiting 20 seconds, skipping dead VM check."
+                        else
+                            db_vm = Vm.find(:all, :conditions => ["host_id = ? AND state = ?", db_host.id, Vm::STATE_UNREACHABLE])
+                            db_vm.each do |vm|
+                                @logger.info "Moving vm #{vm.description} in state #{vm.state} to state stopped."
+                                set_vm_stopped(vm)
+                                vm.save!
+                            end
+                        end
+                    rescue Exception => e # just log any errors here
+                        @logger.info "Exception checking for dead VMs (could be normal): #{e.message}"
+                    end
                 end
             end
-
         else
             # FIXME: This would be a newly registered host.  We could put it in the database.
             @logger.info "Unknown host #{host_info['hostname']}, probably not registered yet??"
@@ -344,6 +365,7 @@ class DbOmatic < Qpid::Qmf::Console
     end
 
     def heartbeat(agent, timestamp)
+        puts "heartbeat from agent #{agent}"
         return if agent == nil
         synchronize do
             bank_key = "#{agent.agent_bank}.#{agent.broker.broker_bank}"
@@ -376,6 +398,8 @@ class DbOmatic < Qpid::Qmf::Console
             values[:timed_out] = true
             end
         end
+        bank_key = "#{agent.agent_bank}.#{agent.broker.broker_bank}"
+        @heartbeats.delete(bank_key)
     end
 
     # The opposite of above, this is called when an agent is alive and well and makes sure
@@ -415,11 +439,30 @@ class DbOmatic < Qpid::Qmf::Console
             @logger.error "Error with closing all VM VNCs operation: #{e.message}"
          end
 
+        # On startup, since we don't know the previous states of anything, we basically
+        # do a big sync up with teh states of all VMs.  We don't worry about hosts since
+        # they are very simple and are either up or down, but it's possible we left
+        # VMs in various states that are no longer applicable to this moment.
         db_vm = Vm.find(:all)
         db_vm.each do |vm|
-            @logger.info "Marking vm #{vm.description} as stopped."
-            vm.state = Vm::STATE_STOPPED
-            vm.save!
+            set_stopped = false
+            # Basically here we are looking for VMs which are not up in some form or another and setting
+            # them to stopped.  VMs that exist as QMF objects will get set appropriately when the objects
+            # appear on the bus.
+            begin
+                qmf_vm = @session.object(:class => 'domain', 'uuid' => db_vm.uuid)
+                if qmf_vm == nil
+                    set_stopped = true
+                end
+            rescue Exception => ex
+                set_stopped = true
+            end
+
+            if set_stopped
+                @logger.info "On startup, VM #{vm.description} is not found, setting to stopped."
+                set_vm_stopped(vm)
+                vm.save!
+            end
         end
     end
 
@@ -444,6 +487,7 @@ class DbOmatic < Qpid::Qmf::Console
                     # Get seconds from the epoch
                     t = Time.new.to_i
 
+                    puts "going through heartbeats.."
                     @heartbeats.keys.each do | key |
                         agent, timestamp = @heartbeats[key]
 
@@ -451,11 +495,11 @@ class DbOmatic < Qpid::Qmf::Console
                         s = timestamp / 1000000000
                         delta = t - s
 
+                        puts "Checking time delta for agent #{agent} - #{delta}"
+
                         if delta > 30
                             # No heartbeat for 30 seconds.. deal with dead/disconnected agent.
                             agent_disconnected(agent)
-
-                            @heartbeats.delete(key)
                         else
                             agent_connected(agent)
                         end
-- 
1.6.0.6




More information about the ovirt-devel mailing list