Skip to content

Commit

Permalink
Set vmSettings as primary channel for extensions goal state (Azure#2475)
Browse files Browse the repository at this point in the history
* Remove goal state comparison

* Remove fallback to extensionsConfig

* pylint warnings

* Remove double-logging

* Update comments

Co-authored-by: narrieta <narrieta>
  • Loading branch information
narrieta authored Jan 20, 2022
1 parent 034c868 commit ea00436
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 300 deletions.
82 changes: 0 additions & 82 deletions azurelinuxagent/common/protocol/extensions_goal_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,9 @@

import azurelinuxagent.common.logger as logger
from azurelinuxagent.common.AgentGlobals import AgentGlobals
from azurelinuxagent.common.exception import AgentError
from azurelinuxagent.common.utils import textutil


class GoalStateMismatchError(AgentError):
def __init__(self, message, attribute):
super(GoalStateMismatchError, self).__init__(message)
self.attribute = attribute


class ExtensionsGoalState(object):
"""
ExtensionsGoalState represents the extensions information in the goal state; that information can originate from
Expand Down Expand Up @@ -90,81 +83,6 @@ def get_redacted_text(self):
"""
raise NotImplementedError()

@staticmethod
def compare(from_extensions_config, from_vm_settings):
"""
Compares the two instances given as argument and logs a GoalStateMismatch message if they are different.
NOTE: The order of the two instances is important for the debug info to be logged correctly (ExtensionsConfig first, vmSettings second)
"""
context = [] # used to keep track of the attribute that is being compared

def compare_goal_states(first, second):
# A mismatch on the timestamp or the activity ID (and maybe also on the correlation ID) most likely indicate that we are comparing two
# different goal states so we check them first (we raise an exception as soon as a mismatch is detected). A mismatch on the other
# attributes likely indicates an actual issue on vmSettings or extensionsConfig).
compare_attributes(first, second, "created_on_timestamp")
compare_attributes(first, second, "activity_id")
compare_attributes(first, second, "correlation_id")
compare_attributes(first, second, "status_upload_blob")
compare_attributes(first, second, "status_upload_blob_type")
compare_attributes(first, second, "required_features")
compare_attributes(first, second, "on_hold")
compare_array(first.agent_manifests, second.agent_manifests, compare_agent_manifests, "agent_manifests")
compare_array(first.extensions, second.extensions, compare_extensions, "extensions")

def compare_agent_manifests(first, second):
compare_attributes(first, second, "family")
compare_attributes(first, second, "requested_version_string")
compare_attributes(first, second, "uris", ignore_order=True)

def compare_extensions(first, second):
compare_attributes(first, second, "name")
compare_attributes(first, second, "version")
compare_attributes(first, second, "state")
compare_attributes(first, second, "supports_multi_config")
compare_attributes(first, second, "manifest_uris", ignore_order=True)
compare_array(first.settings, second.settings, compare_settings, "settings")

def compare_settings(first, second):
compare_attributes(first, second, "name")
compare_attributes(first, second, "sequenceNumber")
compare_attributes(first, second, "publicSettings")
compare_attributes(first, second, "protectedSettings")
compare_attributes(first, second, "certificateThumbprint")
compare_attributes(first, second, "dependencyLevel")
compare_attributes(first, second, "state")

def compare_array(first, second, comparer, name):
if len(first) != len(second):
raise Exception("Number of items in {0} mismatch: {1} != {2}".format(name, len(first), len(second)))
for i in range(len(first)):
context.append("{0}[{1}]".format(name, i))
try:
comparer(first[i], second[i])
finally:
context.pop()

def compare_attributes(first, second, attribute, ignore_order=False):
context.append(attribute)
try:
first_value = getattr(first, attribute)
second_value = getattr(second, attribute)
if ignore_order:
first_value = first_value[:]
first_value.sort()
second_value = second_value[:]
second_value.sort()

if first_value != second_value:
mistmatch = "[{0}] != [{1}] (Attribute: {2})".format(first_value, second_value, ".".join(context))
message = "Mismatch in Goal States [Incarnation {0}] != [Etag: {1}]: {2}".format(from_extensions_config.id, from_vm_settings.id, mistmatch)
raise GoalStateMismatchError(message, attribute)
finally:
context.pop()

compare_goal_states(from_extensions_config, from_vm_settings)

def _do_common_validations(self):
"""
Does validations common to vmSettings and ExtensionsConfig
Expand Down
85 changes: 34 additions & 51 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from azurelinuxagent.common.exception import ProtocolNotFoundError, \
ResourceGoneError, ExtensionDownloadError, InvalidContainerError, ProtocolError, HttpError
from azurelinuxagent.common.future import httpclient, bytebuffer, ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateMismatchError
from azurelinuxagent.common.protocol.extensions_goal_state_factory import ExtensionsGoalStateFactory
from azurelinuxagent.common.protocol.goal_state import GoalState, TRANSPORT_CERT_FILE_NAME, TRANSPORT_PRV_FILE_NAME
from azurelinuxagent.common.protocol.hostplugin import HostPluginProtocol
Expand All @@ -47,7 +46,7 @@
from azurelinuxagent.common.utils.cryptutil import CryptUtil
from azurelinuxagent.common.utils.flexible_version import FlexibleVersion
from azurelinuxagent.common.utils.textutil import parse_doc, findall, find, \
findtext, gettext, remove_bom, get_bytes_from_pem, parse_json, format_exception
findtext, gettext, remove_bom, get_bytes_from_pem, parse_json
from azurelinuxagent.common.version import AGENT_NAME, CURRENT_VERSION

VERSION_INFO_URI = "http://{0}/?comp=versions"
Expand Down Expand Up @@ -780,45 +779,42 @@ def update_host_plugin_from_goal_state(self):
goal_state = GoalState(self)
self._update_host_plugin(goal_state.container_id, goal_state.role_config_name)

def update_goal_state(self, force_update=False, is_retry=False):
def update_goal_state(self, force_update=False):
"""
Updates the goal state if the incarnation or etag changed or if 'force_update' is True
"""
try:
#
# The entire goal state needs to be retrieved from the WireServer (via the GoalState class), and the HostGAPlugin
# The goal state needs to be retrieved using both the WireServer (via the GoalState class) and the HostGAPlugin
# (via the self._fetch_vm_settings_goal_state method).
#
# We fetch it in 3 parts:
# We always need at least 2 queries: one to the WireServer (to check for incarnation changes) and one to the HostGAPlugin
# (to check for extension updates). Note that vmSettings are not a full goal state; they include only the extension information
# (minus certificates). The check on incarnation (which is also not included in the vmSettings) is needed to check for changes
# in, for example, the remote users for JIT access.
#
# 1) The "main" goal state from the WireServer, which includes the incarnation, container ID, role config, and URLs
# to the rest of the goal state (certificates, remote users, extensions config, etc). We do this first because
# we need to initialize the HostGAPlugin with the container ID and role config.
# We start by fetching the goal state from the WireServer. The response to this initial query will include the incarnation,
# container ID, role config, and URLs to the rest of the goal state (certificates, remote users, extensions config, etc). We
# do this first because we need to initialize the HostGAPlugin with the container ID and role config.
#
goal_state = GoalState(self)

self._update_host_plugin(goal_state.container_id, goal_state.role_config_name)

#
# 2) Then we fetch the vmSettings from the HostGAPlugin. We do this before fetching the rest of the goal state from the
# WireServer to minimize the time between the initial call to the WireServer and the call to the HostGAPlugin (and hence
# reduce the window in which a new goal state may arrive in-between the 2 calls)
# Then we fetch the vmSettings from the HostGAPlugin; the response will include the goal state for extensions.
#
vm_settings_goal_state, vm_settings_goal_state_updated = (None, False)

if conf.get_enable_fast_track():
try:
vm_settings_goal_state, vm_settings_goal_state_updated = self._fetch_vm_settings_goal_state(force_update=force_update)

except Exception as error:
# _fetch_vm_settings_goal_state() does its own detailed error reporting and raises ProtocolError; do not report those
if not isinstance(error, ProtocolError):
self._vm_settings_error_reporter.report_error(format_exception(error))
self._vm_settings_error_reporter.report_summary()
except VmSettingsNotSupported:
pass # if vmSettings are not supported we use extensionsConfig below

#
# 3) Lastly we, fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or
# a change in the incarnation). Note that if we fetch the full goal state we also update self._goal_state.
# Now we fetch the rest of the goal state from the WireServer (but ony if needed: initialization, a "forced" update, or
# a change in the incarnation). Note that if we fetch the full goal state we also update self._goal_state.
#
if force_update:
logger.info("Forcing an update of the goal state..")
Expand All @@ -833,28 +829,9 @@ def update_goal_state(self, force_update=False, is_retry=False):
goal_state_updated = True

#
# If we fetched the vmSettings then compare them against extensionsConfig and use them for the extensions goal state if
# everything matches, otherwise use extensionsConfig.
# And, lastly, we use extensionsConfig if we don't have the vmSettings (Fast Track may be disabled or not supported).
#
use_vm_settings = False
if vm_settings_goal_state is not None:
if not goal_state_updated and not vm_settings_goal_state_updated: # no need to compare them, just use vmSettings
use_vm_settings = True
else:
try:
ExtensionsGoalState.compare(self._goal_state.extensions_config, vm_settings_goal_state)
use_vm_settings = True
except GoalStateMismatchError as mismatch:
if not is_retry and mismatch.attribute in ("created_on_timestamp", "activity_id"):
# this may be OK; a new goal state may have arrived in-between the calls to the HostGAPlugin and the WireServer;
# retry one time after a delay and then report the error if it happens again.
time.sleep(conf.get_goal_state_period())
self.update_goal_state(is_retry=True)
return
self._vm_settings_error_reporter.report_error(ustr(mismatch))
self._vm_settings_error_reporter.report_summary()

if use_vm_settings:
self._extensions_goal_state = vm_settings_goal_state
else:
self._extensions_goal_state = self._goal_state.extensions_config
Expand All @@ -875,7 +852,8 @@ def _fetch_vm_settings_goal_state(self, force_update):
Queries the vmSettings from the HostGAPlugin and returns an (ExtensionsGoalStateFromVmSettings, bool) tuple with the vmSettings and
a boolean indicating if they are an updated (True) or a cached value (False).
Raises ProtocolError if the request fails for any reason (e.g. not supported, time out, server error)
Raises VmSettingsNotSupported if the HostGAPlugin does not support the vmSettings API, or ProtocolError if the request fails for any other reason
(e.g. not supported, time out, server error).
"""
def raise_not_supported(reset_state=False):
if reset_state:
Expand All @@ -884,19 +862,19 @@ def raise_not_supported(reset_state=False):
# "Not supported" is not considered an error, so don't use self._vm_settings_error_reporter to report it
logger.info("vmSettings is not supported")
add_event(op=WALAEventOperation.HostPlugin, message="vmSettings is not supported", is_success=True)
raise ProtocolError("VmSettings not supported")
raise VmSettingsNotSupported()

# Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check
if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now():
raise_not_supported()
try:
# Raise if VmSettings are not supported but check for periodically since the HostGAPlugin could have been updated since the last check
if not self._host_plugin_supports_vm_settings and self._host_plugin_supports_vm_settings_next_check > datetime.now():
raise_not_supported()

etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag
correlation_id = str(uuid.uuid4())
etag = None if force_update or self._cached_vm_settings is None else self._cached_vm_settings.etag
correlation_id = str(uuid.uuid4())

def format_message(msg):
return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg)
def format_message(msg):
return "GET vmSettings [correlation ID: {0} eTag: {1}]: {2}".format(correlation_id, etag, msg)

try:
def get_vm_settings():
url, headers = self.get_host_plugin().get_vm_settings_request(correlation_id)
if etag is not None:
Expand Down Expand Up @@ -963,7 +941,7 @@ def get_vm_settings():
self._cached_vm_settings = vm_settings
return vm_settings, True

except ProtocolError:
except (ProtocolError, VmSettingsNotSupported):
raise
except Exception as exception:
if isinstance(exception, IOError) and "timed out" in ustr(exception):
Expand All @@ -973,6 +951,8 @@ def get_vm_settings():
message = format_message("Request failed: {0}".format(textutil.format_exception(exception)))
self._vm_settings_error_reporter.report_error(message, _VmSettingsError.RequestFailed)
raise ProtocolError(message)
finally:
self._vm_settings_error_reporter.report_summary()

def _update_host_plugin(self, container_id, role_config_name):
if self._host_plugin is not None:
Expand Down Expand Up @@ -1559,7 +1539,6 @@ def report_error(self, error, category=None):
self._error_count += 1

if self._error_count <= _VmSettingsErrorReporter._MaxErrors:
logger.info("[VmSettings] [Informational only, the Agent will continue normal operation] {0}", error)
add_event(op=WALAEventOperation.VmSettings, message=error, is_success=False, log_event=False)

if category == _VmSettingsError.ServerError:
Expand Down Expand Up @@ -1588,3 +1567,7 @@ def report_summary(self):
logger.info("[VmSettingsSummary] {0}", message)

self._reset()


class VmSettingsNotSupported(TypeError):
pass
2 changes: 1 addition & 1 deletion tests/protocol/mockwiredata.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def reload(self):
self.in_vm_artifacts_profile = load_data(in_vm_artifacts_profile_file)

def mock_http_get(self, url, *_, **kwargs):
content = None
content = ''
response_headers = []

resp = MagicMock()
Expand Down
Loading

0 comments on commit ea00436

Please sign in to comment.