[Ovirt-devel] [PATCH] Host Status Patches

Ian Main imain at redhat.com
Fri May 9 04:31:23 UTC 2008


A few comments first:

There will likely be some fallout from this for the UI and maybe some vm task modifications.  This has had some light testing but may still have issues.  I just want to get it out there so we can work together to wrap this up.  

This was developed and tested on 0.4 and then moved to tip but was having problems getting it running.  I'll test it more on tip in the morning.

---

This patch gets host-status working properly and adds a new STATE_UNREACHABLE to the VM states.  This state is set when a managed host with VMs is no longer reachable and we're not sure if it died or fell off the network.  Once the host is back up the actual state of the VMs can be inferred and taskomatic will be informed.

Signed-off-by: Ian Main <imain at stemwinder.org>


diff --git a/wui/src/app/models/vm.rb b/wui/src/app/models/vm.rb
index 197ca9c..d5866cc 100644
--- a/wui/src/app/models/vm.rb
+++ b/wui/src/app/models/vm.rb
@@ -41,6 +41,8 @@ class Vm < ActiveRecord::Base
   STATE_CREATING       = "creating"
   STATE_RUNNING        = "running"
 
+  STATE_UNREACHABLE    = "unreachable"
+
   STATE_STOPPING       = "stopping"
   STATE_STOPPED        = "stopped"
   STATE_STARTING       = "starting"
@@ -65,6 +67,7 @@ class Vm < ActiveRecord::Base
                           STATE_RESTORING]
 
   EFFECTIVE_STATE = {  STATE_PENDING       => STATE_PENDING,
+                       STATE_UNREACHABLE   => STATE_UNREACHABLE,
                        STATE_CREATING      => STATE_STOPPED, 
                        STATE_RUNNING       => STATE_RUNNING,
                        STATE_STOPPING      => STATE_STOPPED,
diff --git a/wui/src/host-status/host-status.rb b/wui/src/host-status/host-status.rb
index 5063197..e77087e 100755
--- a/wui/src/host-status/host-status.rb
+++ b/wui/src/host-status/host-status.rb
@@ -58,19 +58,34 @@ end
 # connects to the db in here
 require 'dutils'
 
-
-def findHost(vm)
-  host = Host.find(:first, :conditions => [ "id = ?", vm.host_id])
-
-  if host == nil
-    # Hm, we didn't find the host_id.  Seems odd.  Return a failure
-    raise
+def check_state(vm, dom_info)
+  case dom_info.state
+
+  when Libvirt::Domain::NOSTATE, Libvirt::Domain::SHUTDOWN,
+    Libvirt::Domain::SHUTOFF, Libvirt::Domain::CRASHED then
+    if Vm::RUNNING_STATES.include?(vm.state)
+      # OK, the host thinks this VM is off, while the database thinks it
+      # is running; we have to kick taskomatic
+      kick_taskomatic(Vm::STATE_STOPPED, vm)
+    end
+  when Libvirt::Domain::RUNNING, Libvirt::Domain::BLOCKED then
+    if not Vm::RUNNING_STATES.include?(vm.state)
+      # OK, the host thinks this VM is running, but it's not marked as running
+      # in the database; kick taskomatic
+      kick_taskomatic(Vm::STATE_RUNNING, vm)
+    end
+  when Libvirt::Domain::PAUSED then
+    if vm.state != Vm::STATE_SUSPENDING and vm.state != Vm::STATE_SUSPENDED
+      kick_taskomatic(Vm::STATE_SUSPENDED, vm)
+    end
+  else
+    puts "Unknown vm state...skipping"
   end
-
-  return host
 end
 
+
 def kick_taskomatic(msg, vm)
+  print "Kicking taskomatic, state is %s\n" % msg
   task = VmTask.new
   task.user = "host-status"
   task.action = VmTask::ACTION_UPDATE_STATE_VM
@@ -86,59 +101,89 @@ loop do
   puts "Waking up to check host status"
   get_credentials
 
-  # FIXME: this only monitors hosts that have VMs running that *we* started.
-  # We might want to enhance this to look at all hosts that we are capable
-  # of contacting, just to check that rogue guests didn't get started.
-  vms = Vm.find(:all, :conditions => [ "host_id is NOT NULL" ])
-  vms.each do |vm|
-    host = findHost(vm)
+  hosts = Host.find(:all)
+  hosts.each do |host|
+    
+    puts "checking host" + host.hostname
 
     begin
       conn = Libvirt::open("qemu+tcp://" + host.hostname + "/system")
     rescue
-      # we couldn't contact the host for whatever reason; we'll try again
-      # on the next iteration
-      puts "Failed to contact host " + host.hostname + "; skipping for now"
+      # we couldn't contact the host for whatever reason.  Since we can't get to this
+      # host, we have to mark all vms on it as disconnected or stopped or such.
+      puts "Failed to contact host " + host.hostname + "; skipping for now", $!
+      vms = Vm.find(:all, :conditions => [ "host_id = ?", host.id ])
+      vms.each do |vm|
+        # Since we can't reach the host on which the vms reside, we mark these as
+        # STATE_UNREACHABLE.  If they come back up we can mark them as running again,
+        # else they'll be stopped.  At least for now the user will know what's going on.
+        kick_taskomatic(Vm::STATE_UNREACHABLE, vm)
+      end
+
       next
     end
 
     begin
-      dom = conn.lookup_domain_by_uuid(vm.uuid)
+      vm_ids = conn.list_domains
     rescue
-      # OK.  We couldn't find the UUID that we thought was there.  The only
-      # explanation is that the domain is no longer there.  Kick taskomatic
-      # and tell it
-      puts "Failed to find domain " + vm.description
-      kick_taskomatic(Vm::STATE_STOPPED, vm)
+      puts "Failed to request domain list on host " + host.hostname
       conn.close
       next
     end
-    info = dom.info
-    conn.close
-
-    case info.state
-    when Libvirt::Domain::NOSTATE, Libvirt::Domain::SHUTDOWN,
-      Libvirt::Domain::SHUTOFF, Libvirt::Domain::CRASHED then
-      if Vm::RUNNING_STATES.include?(vm.state)
-        # OK, the host thinks this VM is off, while the database thinks it
-        # is running; we have to kick taskomatic
-        kick_taskomatic(Vm::STATE_STOPPED, vm)
+
+    puts vm_ids.length
+
+    # Here we're going through every vm listed through libvirt.  This
+    # really only lets us find ones that are started that shouldn't be.
+    vm_ids.each do |vm_id|
+      puts "VM ID: %d" % [vm_id]
+      begin
+        dom = conn.lookup_domain_by_id(vm_id)
+      rescue
+        puts "Failed to find domain " + vm.description
+        next
       end
-    when Libvirt::Domain::RUNNING, Libvirt::Domain::BLOCKED then
-      if not Vm::RUNNING_STATES.include?(vm.state)
-        # OK, the host thinks this VM is running, but it's not marked as running
-        # in the database; kick taskomatic
-        kick_taskomatic(Vm::STATE_RUNNING, vm)
+      
+      vm_uuid = dom.uuid
+      info = dom.info
+
+      puts "VM UUID: %s" % [vm_uuid]
+      info = dom.info
+      puts info.to_s
+ 
+      vm = Vm.find(:first, :conditions => [ "uuid = ?", vm_uuid ])
+      if vm == nil
+        puts "VM Not found in database, must be created by user.  giving up."
+        next
       end
-    when Libvirt::Domain::PAUSED then
-      if vm.state != Vm::STATE_SUSPENDING and vm.state != Vm::STATE_SUSPENDED
-        kick_taskomatic(Vm::STATE_SUSPENDED, vm)
+
+      check_state(vm, info)
+    end
+
+    # Now we get a list of all vms that should be on this system and see if
+    # they are all running.
+    vms = Vm.find(:all, :conditions => [ "host_id = ?", host.id ])
+    vms.each do |vm|
+    
+      begin
+        dom = conn.lookup_domain_by_uuid(vm.uuid)
+      rescue
+        # OK.  We couldn't find the UUID that we thought was there.  The only
+        # explanation is that the domain is dead.
+        puts "Failed to find domain " + vm.description
+        kick_taskomatic(Vm::STATE_STOPPED, vm)
+        next
       end
-    else
-      puts "Unknown vm state...skipping"
+      info = dom.info
+      check_state(vm, info)
+
+      conn.close
+
     end
   end
 
   STDOUT.flush
   sleep sleeptime
 end
+
+
diff --git a/wui/src/task-omatic/task_vm.rb b/wui/src/task-omatic/task_vm.rb
index 6c63f48..29bcb1f 100644
--- a/wui/src/task-omatic/task_vm.rb
+++ b/wui/src/task-omatic/task_vm.rb
@@ -633,6 +633,10 @@ end
 def update_state_vm(task)
   puts "update_state_vm"
 
+  # NOTE: findVM() will only return a vm if all the host information is filled
+  # in.  So if a vm that we thought was stopped is running, this returns nil
+  # and we don't update any information about it.  The tricky part
+  # is that we're still not sure what to do in this case :).  - Ian
   begin
     vm = findVM(task)
   rescue




More information about the ovirt-devel mailing list