[Ovirt-devel] [PATCH] Add virtual machine restart if host crashed. Use fencing by ssh and id_rsa key.
pronix.service at gmail.com
pronix.service at gmail.com
Tue Feb 3 16:45:01 UTC 2009
From: root <root at voip.adenin.ru>
---
src/app/controllers/vm_controller.rb | 13 +++-
src/app/views/vm/show.rhtml | 16 ++++
src/db-omatic/db_omatic.rb | 126 ++++++++++++++++++++++++++++++---
src/db/migrate/006_create_vms.rb | 1 +
4 files changed, 143 insertions(+), 13 deletions(-)
diff --git a/src/app/controllers/vm_controller.rb b/src/app/controllers/vm_controller.rb
index 56501fd..0f43680 100644
--- a/src/app/controllers/vm_controller.rb
+++ b/src/app/controllers/vm_controller.rb
@@ -20,11 +20,22 @@ require 'socket'
class VmController < ApplicationController
# GETs should be safe (see http://www.w3.org/2001/tag/doc/whenToUseGet.html)
- verify :method => :post, :only => [ :destroy, :create, :update ],
+ verify :method => :post, :only => [ :destroy, :create, :update , :change_ha_vm],
:redirect_to => { :controller => 'dashboard' }
before_filter :pre_vm_action, :only => [:vm_action, :cancel_queued_tasks, :console]
+ def change_ha_vm
+ vm = Vm.find_by_id(params[:id])
+ if vm.ha
+ then vm.ha = false
+ else vm.ha = true
+ end
+ vm.save!
+ alert = "vm was change ha successfully "
+ render :json => { :object => "vm", :success => true, :alert => alert }
+ end
+
def index
roles = "('" +
Permission::roles_for_privilege(Permission::PRIV_VIEW).join("', '") +
diff --git a/src/app/views/vm/show.rhtml b/src/app/views/vm/show.rhtml
index f361131..b0479a6 100644
--- a/src/app/views/vm/show.rhtml
+++ b/src/app/views/vm/show.rhtml
@@ -29,6 +29,9 @@
</a>
<% end -%>
<% end %>
+ <a href="#" onClick="change_ha_vm()" rel="facebox[.bolder]">
+ <%= image_tag "icon_x.png" %> <% if @vm.ha %> disable_ha (cur. enabled)<% else %> enable_ha (cur. disabled) <% end %>
+ </a>
<a href="#confirm_cancel" rel="facebox[.bolder]">
<%= image_tag "icon_x.png" %> Cancel queued tasks
</a>
@@ -40,6 +43,19 @@
<%= confirmation_dialog("confirm_cancel", "Are you sure?", "cancel_queued_tasks()") %>
<%= confirmation_dialog("confirm_delete", "Are you sure?", "delete_vm()") %>
<script type="text/javascript">
+ function change_ha_vm()
+ {
+ $(document).trigger('close.facebox');
+ $.post('<%= url_for :controller => "vm", :action => "change_ha_vm", :id => @vm %>',
+ {x: 1},
+ function(data,status){
+ $("#vms_grid").flexReload();
+ if (data.alert) {
+ $.jGrowl(data.alert);
+ }
+ empty_summary('vms_selection', 'Virtual Machine');
+ }, 'json');
+ }
function cancel_queued_tasks()
{
$(document).trigger('close.facebox');
diff --git a/src/db-omatic/db_omatic.rb b/src/db-omatic/db_omatic.rb
index 4afffb1..94d93fc 100755
--- a/src/db-omatic/db_omatic.rb
+++ b/src/db-omatic/db_omatic.rb
@@ -52,6 +52,8 @@ class DbOmatic < Qpid::Qmf::Console
state = Vm::STATE_PENDING
when "running"
state = Vm::STATE_RUNNING
+ # must find host for current vm and set host_id
+ # domain.broker
when "blocked"
state = Vm::STATE_SUSPENDED #?
when "paused"
@@ -74,6 +76,91 @@ class DbOmatic < Qpid::Qmf::Console
domain[:synced] = true
end
+ #find hostname from values['node'] where values['class_type'] == 'domain'
+ def get_host_id(abank,bbank) #возвÑаÑÐ°ÐµÑ Ð·Ð½Ð°Ñение не Ñо ÑÑо надо - ÑиÑловой иденÑиÑикаÑÐ¾Ñ Ð½Ð¾Ð´Ñ Ð²Ð¾Ð·Ð²ÑаÑаеÑ
+ begin
+ @cached_objects.keys.each do |objkey|
+ if @cached_objects[objkey][:agent_bank].to_s == abank and @cached_objects[objkey][:broker_bank].to_s == bbank and @cached_objects[objkey][:class_type].to_s == 'node'
+ return Host.find(:first, :conditions => ['hostname = ?', at cached_objects[objkey]["hostname"].to_s]).id
+ break
+ end
+ end
+ rescue => ex
+ log("error in get_host_id")
+ log(ex)
+ end
+ end
+
+ def set_host(values,digit)
+ begin
+ vm = Vm.find(:first, :conditions => ['description = ?',values["name"].to_s])
+ if vm and digit
+ vm.host_id = digit
+ vm.save!
+ else
+ log("this vm not exist #{values["name"]}")
+ end
+ rescue => ex
+ puts "error when set_host for #{values["name"]}"
+ puts ex
+ end
+ end
+
+ def start_crashed_vm(vm)
+ task = VmTask.new( :user => 'db-omatic', :task_target => vm, :action => 'start_vm', :state => 'queued')
+ task.save!
+ log("set task for start crashed vm #{vm.id}")
+ end
+
+ def fencing_by_ssh_reboot(hname)
+ begin
+ # Fix need - very insecure, and need manual configure keys, may be kerberos ticket is better ?
+ Process.fork do
+ `ssh #{hname} -i /root/.ssh/id_rsa -o StrictHostKeyChecking=no "/sbin/init 6" & `
+ end
+ # also can use ipmi - `ipmitool -I lan -H #{"ipmi."+hname} -U admin -P password chassis power reset `
+ log("success fencing#{hname}")
+ # execute in shell, must be identify key for login as root to storage server
+ # after restart crashed host need manualy remove this rule from iptables
+
+ return true
+ rescue => ex
+ log("error in fencing #{hname}")
+ log(ex)
+ end
+ end
+
+ def set_domain_stopped(domain)
+ begin
+ vm = Vm.find(:first, :conditions => ['uuid = ?', domain['uuid']])
+ if vm != nil
+
+ if vm.state == Vm::STATE_RUNNING and vm.ha
+ fencing_and_restart = true
+ fencing_by_ssh_reboot(vm.host.hostname)
+ end
+ puts fencing_and_restart
+ vm.state = Vm::STATE_STOPPED
+ vm.host_id = nil
+ vm.save!
+ domain['state'] = 'crashed'
+ if fencing_and_restart
+ log("must restart vm ")
+ start_crashed_vm(vm)
+ else
+ log("not running and not restart")
+ end
+ else
+ log('vm == nil ')
+ end
+ log("domain #{domain['id']} already stopped")
+ rescue => ex
+ log("can\'t set domain #{domain['id']} stopped")
+ log(ex)
+ end
+ end
+
+
def update_host_state(host_info, state)
db_host = Host.find(:first, :conditions => [ "hostname = ?", host_info['hostname'] ])
if db_host
@@ -116,7 +203,6 @@ class DbOmatic < Qpid::Qmf::Console
if values == nil
values = {}
@cached_objects[obj.object_id.to_s] = values
-
# Save the agent and broker bank so that we can tell what objects
# are expired when the heartbeat for them stops.
values[:broker_bank] = obj.object_id.broker_bank
@@ -130,20 +216,38 @@ class DbOmatic < Qpid::Qmf::Console
end
domain_state_change = false
-
+ change_node = false
obj.properties.each do |key, newval|
if values[key.to_s] != newval
values[key.to_s] = newval
#log "new value for property #{key} : #{newval}"
if type == "domain" and key.to_s == "state"
- domain_state_change = true
+ domain_state_change = true
end
+ if type == "domain" and key.to_s == "node"
+ log("DEBUG change node = true")
+ change_node = true
+ end
+
end
end
if domain_state_change
+ log("DEBUG update_domain_state")
update_domain_state(values)
end
+ if change_node
+ values.each do |key,val|
+ if key == 'state' and val == 'running'
+ abank = values['node'].to_s.split('-')[3]
+ bbank = values['node'].to_s.split('-')[4]
+ @@host_id = get_host_id(abank,bbank)
+ set_host(values,@@host_id)
+ log("update node data for #{values['name']}")
+ break
+ end
+ end
+ end
if new_object
if type == "node"
@@ -187,11 +291,6 @@ class DbOmatic < Qpid::Qmf::Console
end
end
-
- def del_agent(agent)
- agent_disconnected(agent)
- end
-
# This method marks objects associated with the given agent as timed out/invalid. Called either
# when the agent heartbeats out, or we get a del_agent callback.
def agent_disconnected(agent)
@@ -204,11 +303,14 @@ class DbOmatic < Qpid::Qmf::Console
if values[:timed_out] == false
if values[:class_type] == 'node'
update_host_state(values, Host::STATE_UNAVAILABLE)
+ values[:timed_out] = true
elsif values[:class_type] == 'domain'
- update_domain_state(values, Vm::STATE_UNREACHABLE)
- end
+ log("set_domain_stopped")
+ set_domain_stopped(values)
+ values[:timed_out] = true
+ @cached_objects.delete(objkey)
+ end
end
- values[:timed_out] = true
end
end
end
@@ -216,7 +318,6 @@ class DbOmatic < Qpid::Qmf::Console
# The opposite of above, this is called when an agent is alive and well and makes sure
# all of the objects associated with it are marked as valid.
def agent_connected(agent)
-
@cached_objects.keys.each do |objkey|
if @cached_objects[objkey][:broker_bank] == agent.broker.broker_bank and
@cached_objects[objkey][:agent_bank] == agent.agent_bank
@@ -248,6 +349,7 @@ class DbOmatic < Qpid::Qmf::Console
db_vm = Vm.find(:all)
db_vm.each do |vm|
log "Marking vm #{vm.description} as stopped."
+ vm.host_id = nil
vm.state = Vm::STATE_STOPPED
vm.save
end
diff --git a/src/db/migrate/006_create_vms.rb b/src/db/migrate/006_create_vms.rb
index 74794b6..a5460f0 100644
--- a/src/db/migrate/006_create_vms.rb
+++ b/src/db/migrate/006_create_vms.rb
@@ -34,6 +34,7 @@ class CreateVms < ActiveRecord::Migration
t.string :boot_device, :null => false
t.integer :vnc_port
t.integer :lock_version, :default => 0
+ t.boolean :ha , :default => true
end
execute "alter table vms add constraint fk_vms_hosts
foreign key (host_id) references hosts(id)"
--
1.5.5.6
More information about the ovirt-devel
mailing list