Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle services in FAILED_DEPLOYING #31

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion share/swagger/api.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
openapi: "3.0.0"
info:
version: "0.9.12"
version: "0.9.13"
title: "Provisioning Engine REST API"
description: Provides FaaS capabilities by leveraging features from OpenNebula. Allows to manage Serverless Runtime instances based on a group of Functions defined on request.

Expand Down
37 changes: 36 additions & 1 deletion src/server/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ module ProvisionEngine
#
class CloudClient

SERVICE_STATES = [
'PENDING',
'DEPLOYING',
'RUNNING',
'UNDEPLOYING',
'WARNING',
'DONE',
'FAILED_UNDEPLOYING',
'FAILED_DEPLOYING',
'SCALING',
'FAILED_SCALING',
'COOLDOWN',
'DEPLOYING_NETS',
'UNDEPLOYING_NETS',
'FAILED_DEPLOYING_NETS',
'FAILED_UNDEPLOYING_NETS',
'HOLD'
]


def self.map_error_oned(xmlrpc_errno)
# ESUCCESS = 0x0000
# EAUTHENTICATION = 0x0100
Expand Down Expand Up @@ -107,6 +127,13 @@ def service_delete(id)
return_http_response(response)
end

def service_recover(id, options = {})
@logger.debug("Forcing service #{id} deletion")

response = service_action(id, 'recover', options)
return_http_response(response)
end

def service_template_get(id)
response = @client_oneflow.get("/service_template/#{id}")
return_http_response(response)
Expand All @@ -124,6 +151,14 @@ def service_template_instantiate(id, options = {})
return_http_response(response)
end

def service_fail?(service)
SERVICE_STATES[service_state(service)].include?('FAILED')
end

def service_state(service)
service['DOCUMENT']['TEMPLATE']['BODY']['state']
end

private

def return_http_response(response)
Expand All @@ -146,7 +181,7 @@ def service_template_action(id, action, options = {})
end

def service_action(id, action, options = {})
url = "/service/#{id}/action", body
url = "/service/#{id}/action"

flow_element_action(url, action, options)
end
Expand Down
172 changes: 125 additions & 47 deletions src/server/runtime.rb
Original file line number Diff line number Diff line change
Expand Up @@ -178,31 +178,41 @@ def self.create(client, specification)
rc = response[0]
rb = response[1]

return [rc, rb] if rc != 201

service_id = rb['DOCUMENT']['ID']
case rc
when 200
service_id = rb['DOCUMENT']['ID'].to_i
specification['SERVICE_ID'] = service_id
client.logger.info("#{SR} Service #{service_id} created")

response = ServerlessRuntime.service_sync(client, specification,
service_id)
rc = response[0]

client.logger.info("#{SR} Service #{service_id} created")
return [rc, response[1]] if rc != 200

ServerlessRuntime.service_sync(client, specification, service_id)
client.logger.info("Allocating #{SR} Document")
client.logger.debug(specification)

client.logger.info("Allocating #{SR} Document")
client.logger.debug(specification)
xml = ServerlessRuntime.build_xml
runtime = ServerlessRuntime.new(xml, client.client_oned)
response = runtime.allocate(specification)

xml = ServerlessRuntime.build_xml
runtime = ServerlessRuntime.new(xml, client.client_oned)
response = runtime.allocate(specification)
if OpenNebula.is_error?(response)
return [ProvisionEngine::CloudClient.map_error_oned(response.errno),
response.message]
end

if OpenNebula.is_error?(response)
return [ProvisionEngine::CloudClient.map_error_oned(response.errno),
response.message]
end
client.logger.info("Created #{SR} Document")

client.logger.info("Created #{SR} Document")
runtime.info

runtime.info
return [201, runtime]
when 204
return [500, rb]

[201, runtime]
else
return response
end
end

def self.get(client, id)
Expand All @@ -212,12 +222,15 @@ def self.get(client, id)
runtime.cclient = client

# DocumentJSON.info doesn't have error code
return [404, 'Document not found'] if runtime.name.nil?
return [404, "#{SR} document not found"] if runtime.name.nil?

runtime.load_body
service_id = runtime.body['SERVICE_ID']

ServerlessRuntime.service_sync(client, runtime.body, service_id)
response = ServerlessRuntime.service_sync(client, runtime.body, service_id)
rc = response[0]
return [rc, response[1]] if rc != 200

runtime.update

[200, runtime]
Expand Down Expand Up @@ -256,6 +269,40 @@ def delete
[204, '']
end

def self.recover_service(client, service_id, options = {})
if options[:delete]
response = client.service_recover(service_id, { 'delete' => true })
rc = response[0]

return response if rc == 204

client.logger.error(response[1])
msg = "Could not force service #{service_id} deletion"
return [rc, msg]
else
response = client.service_recover(service_id)

if response[0] != 201
client.logger.error("Could not recover service #{service_id}")
return response
end

response = client.service_get(service_id)

return response unless response[0] == 200

service = response[1]

if client.service_fail?(service)
msg = "Cannot recover #{service_id} from failure"
client.logger.error(service)
return [500, msg]
end

return [200, service]
end
end

#
# Validates the Serverless Runtime specification using the distributed schema
#
Expand Down Expand Up @@ -316,48 +363,48 @@ def to_sr
end

#
# Updates Serverless Runtime Document specification based on the underlying elements state
# Updates Serverless Runtime definition based on the underlying elements state
#
# @param [CloudClient] client OpenNebula interface
# @param [Hash] runtime_definition Serverless Runtime definition to be updated
# @param [Hash] runtime Serverless Runtime definition to be updated
# @param [Integer] service_id OneFlow service ID mapped to the Serverless Runtime
# @param [Integer] timeout How long to wait for Role VMs to be created
#
def self.service_sync(client, runtime_definition, service_id, timeout = 30)
def self.service_sync(client, runtime, service_id, timeout = 30)
1.upto(timeout) do |t|
sleep 1

if t == 30
msg = "OpenNebula did not create VMs for the #{SR} service after #{t} seconds"
return [504, msg]
end
catch(:query_service) do
if t == 30
msg = "OpenNebula did not create VMs for the #{SR} service after #{t} seconds"
return [504, msg]
end

response = client.service_get(service_id)
rc = response[0]
rb = response[1]
response = client.service_get(service_id)
rc = response[0]
rb = response[1]

return [rc, rb] if rc != 200
return [rc, rb] if rc != 200

service = rb
service = rb
roles = service['DOCUMENT']['TEMPLATE']['BODY']['roles']

service_template = service['DOCUMENT']['TEMPLATE']['BODY']
roles = service_template['roles']
roles.each do |role|
next unless role['nodes'].size < role['cardinality']

begin
roles[0]['nodes'][0]['vm_info']['VM']
rescue NoMethodError # will fail if service VM information is missing
client.logger.debug("Waiting #{t} seconds for service VMs")
msg = "Waiting #{t} seconds for service role #{role['name']} VMs"
client.logger.debug(msg)
sleep 1

next
end
throw(:query_service)
end

client.logger.debug(service)
client.logger.debug(service)

runtime_definition['SERVICE_ID'] = service['DOCUMENT']['ID'].to_i
runtime_definition['FAAS'].merge!(xaas_template(client, roles[0]))
runtime_definition['DAAS'].merge!(xaas_template(client, roles[1])) if roles[1]
roles.each do |role|
runtime[role['name']].merge!(xaas_template(client, role))
end

break
return [200, '']
end
end
end

Expand Down Expand Up @@ -436,7 +483,38 @@ def self.to_service(client, specification)
end
end

return client.service_template_instantiate(service_template['ID'], merge_template)
response = client.service_template_instantiate(service_template['ID'],
merge_template)
rc = response[0]
rb = response[1]

return response if rc != 201

service_id = rb['DOCUMENT']['ID'].to_i

response = client.service_get(service_id)
rc = response[0]
rb = response[1]

return response if rc != 200

service = rb

if client.service_fail?(service)
error = "#{SR} service #{service_id} entered FAILED state"

client.logger.error(error)
client.logger.debug(service)

response = recover_service(client, service_id, { :delete => true })

if response[0] == 204
service_log = service['DOCUMENT']['TEMPLATE']['BODY']['log']
response[1] = { 'error' => error, 'message' => service_log }
end
end

return response
end

msg = "Cannot find a valid service template for the specified flavours: #{tuple}\n"
Expand Down
14 changes: 7 additions & 7 deletions src/server/server.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
require 'client'
require 'runtime'

VERSION = '0.9.12'
VERSION = '0.9.13'

############################################################################
# Define API Helpers
Expand Down Expand Up @@ -90,8 +90,8 @@ def log_response(level, code, data, message)
end

settings.logger.info("#{RC}: #{code}")
settings.logger.debug("Response Body: #{body}")
settings.logger.send(level, message)
settings.logger.debug("Response Body: #{body}")
end

############################################################################
Expand Down Expand Up @@ -158,7 +158,7 @@ def log_response(level, code, data, message)
log_response('error', rc, rb, "Timeout when creating #{SR}")
halt rc, json_response(rc, rb)
else
log_response('error', rc, rb, "Failed to create #{SR}")
log_response('error', 500, rb, "Failed to create #{SR}")
halt 500, json_response(500, rb)
end
end
Expand All @@ -177,7 +177,7 @@ def log_response(level, code, data, message)

case rc
when 200
log_response('info', rc, rb, SR)
log_response('info', rc, rb, "#{SR} retrieved")
json_response(rc, rb.to_sr)
when 401
log_response('error', rc, rb, NO_AUTH)
Expand All @@ -189,7 +189,7 @@ def log_response(level, code, data, message)
log_response('error', rc, rb, SR_NOT_FOUND)
halt rc, json_response(rc, rb)
else
log_response('error', rc, rb, "Failed to get #{SR}")
log_response('error', 500, rb, "Failed to retrieve #{SR}")
halt 500, json_response(500, rb)
end
end
Expand Down Expand Up @@ -244,7 +244,7 @@ def log_response(level, code, data, message)
log_response('error', rc, rb, NO_DELETE)
halt rc, json_response(rc, rb)
else
log_response('error', rc, rb, NO_DELETE)
log_response('error', 500, rb, NO_DELETE)
halt 500, json_response(500, rb)
end
when 401
Expand All @@ -257,7 +257,7 @@ def log_response(level, code, data, message)
log_response('error', rc, rb, SR_NOT_FOUND)
halt rc, json_response(rc, rb)
else
log_response('error', rc, rb, NO_DELETE)
log_response('error', 500, rb, NO_DELETE)
halt 500, json_response(500, rb)
end
end
Expand Down
5 changes: 5 additions & 0 deletions tests/init.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,9 @@
end

examples?('inspect logs', rspec_conf[:conf])

# cleanup possible leftover services for the test user ENV['TESTS_AUTH'][0]
after(:all) do
# TODO: Skip if oneadmin
end
end
Loading