[Ovirt-devel] [PATCH] Add virtual machine restart if host crashed. Use fencing by ssh and id_rsa key.

pronix.service at gmail.com pronix.service at gmail.com
Tue Feb 3 16:45:01 UTC 2009


From: root <root at voip.adenin.ru>

---
 src/app/controllers/vm_controller.rb |   13 +++-
 src/app/views/vm/show.rhtml          |   16 ++++
 src/db-omatic/db_omatic.rb           |  126 ++++++++++++++++++++++++++++++---
 src/db/migrate/006_create_vms.rb     |    1 +
 4 files changed, 143 insertions(+), 13 deletions(-)

diff --git a/src/app/controllers/vm_controller.rb b/src/app/controllers/vm_controller.rb
index 56501fd..0f43680 100644
--- a/src/app/controllers/vm_controller.rb
+++ b/src/app/controllers/vm_controller.rb
@@ -20,11 +20,22 @@ require 'socket'
 
 class VmController < ApplicationController
   # GETs should be safe (see http://www.w3.org/2001/tag/doc/whenToUseGet.html)
-  verify :method => :post, :only => [ :destroy, :create, :update ],
+  verify :method => :post, :only => [ :destroy, :create, :update , :change_ha_vm],
          :redirect_to => { :controller => 'dashboard' }
 
   before_filter :pre_vm_action, :only => [:vm_action, :cancel_queued_tasks, :console]
 
+   def change_ha_vm
+     vm = Vm.find_by_id(params[:id])
+     if vm.ha 
+       then vm.ha = false
+       else vm.ha = true
+     end
+     vm.save!	 
+     alert = "vm was change ha successfully "
+     render :json => { :object => "vm", :success => true, :alert => alert  }
+   end
+
   def index
       roles = "('" +
           Permission::roles_for_privilege(Permission::PRIV_VIEW).join("', '") +
diff --git a/src/app/views/vm/show.rhtml b/src/app/views/vm/show.rhtml
index f361131..b0479a6 100644
--- a/src/app/views/vm/show.rhtml
+++ b/src/app/views/vm/show.rhtml
@@ -29,6 +29,9 @@
         </a>
       <% end -%>
     <% end %>
+    <a href="#" onClick="change_ha_vm()" rel="facebox[.bolder]">
+      <%= image_tag "icon_x.png" %> <% if @vm.ha %> disable_ha (cur. enabled)<% else %> enable_ha (cur. disabled) <% end %>
+    </a>
     <a href="#confirm_cancel"  rel="facebox[.bolder]">
       <%= image_tag "icon_x.png" %> Cancel queued tasks
     </a> 
@@ -40,6 +43,19 @@
 <%= confirmation_dialog("confirm_cancel", "Are you sure?", "cancel_queued_tasks()") %>
 <%= confirmation_dialog("confirm_delete", "Are you sure?", "delete_vm()") %>
 <script type="text/javascript">
+   function change_ha_vm()
+   {
+     $(document).trigger('close.facebox');
+     $.post('<%= url_for :controller => "vm", :action => "change_ha_vm", :id => @vm %>',
+            {x: 1},
+             function(data,status){
+               $("#vms_grid").flexReload();
+               if (data.alert) {
+                 $.jGrowl(data.alert);
+               }
+               empty_summary('vms_selection', 'Virtual Machine');
+             }, 'json');
+   }
   function cancel_queued_tasks()
   {
     $(document).trigger('close.facebox');
diff --git a/src/db-omatic/db_omatic.rb b/src/db-omatic/db_omatic.rb
index 4afffb1..94d93fc 100755
--- a/src/db-omatic/db_omatic.rb
+++ b/src/db-omatic/db_omatic.rb
@@ -52,6 +52,8 @@ class DbOmatic < Qpid::Qmf::Console
                     state = Vm::STATE_PENDING
                 when "running"
                     state = Vm::STATE_RUNNING
+		    # must find host for current vm and set host_id
+		    # domain.broker 
                 when "blocked"
                     state = Vm::STATE_SUSPENDED #?
                 when "paused"
@@ -74,6 +76,91 @@ class DbOmatic < Qpid::Qmf::Console
         domain[:synced] = true
     end
 
+    #find hostname from values['node'] where values['class_type'] == 'domain'
+    def get_host_id(abank,bbank) #возвращает значение не то что надо - числовой идентификатор ноды возвращает
+     begin
+	@cached_objects.keys.each do |objkey|
+  	  if @cached_objects[objkey][:agent_bank].to_s == abank and @cached_objects[objkey][:broker_bank].to_s == bbank and @cached_objects[objkey][:class_type].to_s == 'node'
+		return Host.find(:first, :conditions => ['hostname = ?', at cached_objects[objkey]["hostname"].to_s]).id
+		    break
+	  end
+	end
+      rescue => ex
+	log("error in get_host_id")
+	log(ex)
+     end
+    end
+
+    def set_host(values,digit)
+      begin 
+	vm = Vm.find(:first, :conditions => ['description = ?',values["name"].to_s]) 
+	if vm and digit 
+	vm.host_id = digit 
+	vm.save!
+	else 
+		log("this vm not exist #{values["name"]}")
+	end
+      rescue => ex
+	puts "error when set_host for #{values["name"]}"
+	puts ex
+      end
+    end
+
+    def start_crashed_vm(vm)
+	task = VmTask.new( :user => 'db-omatic', :task_target => vm, :action => 'start_vm', :state => 'queued')   
+	task.save!
+	log("set task for start crashed vm #{vm.id}")
+    end
+
+    def fencing_by_ssh_reboot(hname)
+	begin
+ 		# Fix need - very insecure, and need manual configure keys, may be kerberos ticket is better ?  
+	Process.fork do
+ 	 `ssh #{hname} -i /root/.ssh/id_rsa  -o StrictHostKeyChecking=no "/sbin/init 6" & ` 
+	end
+		# also can use ipmi - `ipmitool -I lan -H #{"ipmi."+hname} -U admin -P password chassis power reset `
+	  log("success fencing#{hname}")
+		# execute in shell, must be identify key for login as root to storage server
+		# after restart crashed host need manualy remove this rule from iptables
+	
+	 return true
+	rescue => ex
+		log("error in fencing #{hname}")
+		log(ex)
+	end 
+    end
+
+    def set_domain_stopped(domain)
+      begin
+        vm = Vm.find(:first, :conditions => ['uuid = ?', domain['uuid']])
+        if vm != nil
+	
+	 if vm.state  == Vm::STATE_RUNNING and vm.ha
+		fencing_and_restart = true
+		fencing_by_ssh_reboot(vm.host.hostname)
+	 end
+	  puts fencing_and_restart
+          vm.state = Vm::STATE_STOPPED
+          vm.host_id = nil
+          vm.save!
+	  domain['state'] = 'crashed'
+	  if fencing_and_restart
+	    log("must restart vm ")
+            start_crashed_vm(vm)
+	  else
+		log("not running and not restart")
+	  end
+        else
+          log('vm == nil ')
+        end
+        log("domain  #{domain['id']} already stopped")
+      rescue => ex
+        log("can\'t set domain #{domain['id']} stopped")
+	log(ex)
+      end
+    end
+
+
     def update_host_state(host_info, state)
         db_host = Host.find(:first, :conditions => [ "hostname = ?", host_info['hostname'] ])
         if db_host
@@ -116,7 +203,6 @@ class DbOmatic < Qpid::Qmf::Console
             if values == nil
                 values = {}
                 @cached_objects[obj.object_id.to_s] = values
-
                 # Save the agent and broker bank so that we can tell what objects
                 # are expired when the heartbeat for them stops.
                 values[:broker_bank] = obj.object_id.broker_bank
@@ -130,20 +216,38 @@ class DbOmatic < Qpid::Qmf::Console
             end
 
             domain_state_change = false
-
+	    change_node = false
             obj.properties.each do |key, newval|
                 if values[key.to_s] != newval
                     values[key.to_s] = newval
                     #log "new value for property #{key} : #{newval}"
                     if type == "domain" and key.to_s == "state"
-                        domain_state_change = true
+			domain_state_change = true
                     end
+		    if type == "domain" and key.to_s == "node"
+			log("DEBUG change node = true")
+			change_node = true
+		    end
+
                 end
             end
 
             if domain_state_change
+		log("DEBUG update_domain_state")
                 update_domain_state(values)
             end
+	if change_node		
+	values.each do |key,val|
+	  if key == 'state' and val == 'running'
+		abank = values['node'].to_s.split('-')[3]
+		bbank = values['node'].to_s.split('-')[4]
+		@@host_id = get_host_id(abank,bbank)
+		set_host(values,@@host_id)
+		log("update node data for #{values['name']}")
+		break
+	  end
+	end
+	end
 
             if new_object
                 if type == "node"
@@ -187,11 +291,6 @@ class DbOmatic < Qpid::Qmf::Console
         end
     end
 
-
-    def del_agent(agent)
-        agent_disconnected(agent)
-    end
-
     # This method marks objects associated with the given agent as timed out/invalid.  Called either
     # when the agent heartbeats out, or we get a del_agent callback.
     def agent_disconnected(agent)
@@ -204,11 +303,14 @@ class DbOmatic < Qpid::Qmf::Console
                 if values[:timed_out] == false
                     if values[:class_type] == 'node'
                         update_host_state(values, Host::STATE_UNAVAILABLE)
+			values[:timed_out] = true
                     elsif values[:class_type] == 'domain'
-                        update_domain_state(values, Vm::STATE_UNREACHABLE)
-                    end
+			log("set_domain_stopped")
+			set_domain_stopped(values)
+			values[:timed_out] = true 
+		@cached_objects.delete(objkey)
+		    end
                 end
-            values[:timed_out] = true
             end
         end
     end
@@ -216,7 +318,6 @@ class DbOmatic < Qpid::Qmf::Console
     # The opposite of above, this is called when an agent is alive and well and makes sure
     # all of the objects associated with it are marked as valid.
     def agent_connected(agent)
-
         @cached_objects.keys.each do |objkey|
             if @cached_objects[objkey][:broker_bank] == agent.broker.broker_bank and
                @cached_objects[objkey][:agent_bank] == agent.agent_bank
@@ -248,6 +349,7 @@ class DbOmatic < Qpid::Qmf::Console
         db_vm = Vm.find(:all)
         db_vm.each do |vm|
             log "Marking vm #{vm.description} as stopped."
+	    vm.host_id = nil
             vm.state = Vm::STATE_STOPPED
             vm.save
         end
diff --git a/src/db/migrate/006_create_vms.rb b/src/db/migrate/006_create_vms.rb
index 74794b6..a5460f0 100644
--- a/src/db/migrate/006_create_vms.rb
+++ b/src/db/migrate/006_create_vms.rb
@@ -34,6 +34,7 @@ class CreateVms < ActiveRecord::Migration
       t.string  :boot_device,    :null => false
       t.integer :vnc_port
       t.integer :lock_version,   :default => 0
+      t.boolean :ha , :default => true 
     end
     execute "alter table vms add constraint fk_vms_hosts
              foreign key (host_id) references hosts(id)"
-- 
1.5.5.6




More information about the ovirt-devel mailing list