Add support for network_cli connection retry (#61103)
* Add support for network_cli connection retry * Add network_cli connection configuration option to allow retrying the connection initialization with remote host. * Add docs * Fix test failures * Fix review comments
This commit is contained in:
parent
ee3e8704e8
commit
768dbe5490
3 changed files with 63 additions and 2 deletions
|
@ -846,3 +846,20 @@ Modify the error regex for individual task.
|
|||
The terminal plugin regex options ``ansible_terminal_stderr_re`` and ``ansible_terminal_stdout_re`` have
|
||||
``pattern`` and ``flags`` as keys. The value of the ``flags`` key should be a value that is accepted by
|
||||
the ``re.compile`` python method.
|
||||
|
||||
|
||||
Intermittent failure while using ``network_cli`` connection type due to slower network or remote target host
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
In Ansible 2.9 and later, the ``network_cli`` connection plugin configuration option is added to control
|
||||
the number of attempts to connect to a remote host. The default number of attempts is three.
|
||||
every attempt by power of 2 in seconds until either the maximum attempts are exhausted or either of the
|
||||
the maximum attempts are exhausted or either the ``persistent_command_timeout`` or ``persistent_connect_timeout``
|
||||
timers are triggered.
|
||||
|
||||
To make this a global setting, add the following to your ``ansible.cfg`` file:
|
||||
|
||||
.. code-block:: ini
|
||||
|
||||
[persistent_connection]
|
||||
network_cli_retries = 5
|
||||
|
|
|
@ -139,6 +139,7 @@ class ConnectionProcess(object):
|
|||
display.display("jsonrpc request: %s" % data, log_only=True)
|
||||
|
||||
signal.alarm(self.connection.get_option('persistent_command_timeout'))
|
||||
|
||||
resp = self.srv.handle_request(data)
|
||||
signal.alarm(0)
|
||||
|
||||
|
|
|
@ -194,6 +194,7 @@ options:
|
|||
terminal_stdout_re:
|
||||
type: list
|
||||
elements: dict
|
||||
version_added: '2.9'
|
||||
description:
|
||||
- A single regex pattern or a sequence of patterns along with optional flags
|
||||
to match the command prompt from the received response chunk. This option
|
||||
|
@ -206,6 +207,7 @@ options:
|
|||
terminal_stderr_re:
|
||||
type: list
|
||||
elements: dict
|
||||
version_added: '2.9'
|
||||
description:
|
||||
- This option provides the regex pattern and optional flags to match the
|
||||
error string from the received response chunk. This option
|
||||
|
@ -217,6 +219,7 @@ options:
|
|||
- name: ansible_terminal_stderr_re
|
||||
terminal_initial_prompt:
|
||||
type: list
|
||||
version_added: '2.9'
|
||||
description:
|
||||
- A single regex pattern or a sequence of patterns to evaluate the expected
|
||||
prompt at the time of initial login to the remote host.
|
||||
|
@ -224,6 +227,7 @@ options:
|
|||
- name: ansible_terminal_initial_prompt
|
||||
terminal_initial_answer:
|
||||
type: list
|
||||
version_added: '2.9'
|
||||
description:
|
||||
- The answer to reply with if the C(terminal_initial_prompt) is matched. The value can be a single answer
|
||||
or a list of answers for multiple terminal_initial_prompt. In case the login menu has
|
||||
|
@ -234,6 +238,7 @@ options:
|
|||
- name: ansible_terminal_initial_answer
|
||||
terminal_initial_prompt_checkall:
|
||||
type: boolean
|
||||
version_added: '2.9'
|
||||
description:
|
||||
- By default the value is set to I(False) and any one of the prompts mentioned in C(terminal_initial_prompt)
|
||||
option is matched it won't check for other prompts. When set to I(True) it will check for all the prompts
|
||||
|
@ -244,12 +249,28 @@ options:
|
|||
- name: ansible_terminal_initial_prompt_checkall
|
||||
terminal_inital_prompt_newline:
|
||||
type: boolean
|
||||
version_added: '2.9'
|
||||
description:
|
||||
- This boolean flag, that when set to I(True) will send newline in the response if any of values
|
||||
in I(terminal_initial_prompt) is matched.
|
||||
default: True
|
||||
vars:
|
||||
- name: ansible_terminal_initial_prompt_newline
|
||||
network_cli_retries:
|
||||
description:
|
||||
- Number of attempts to connect to remote host. The delay time between the retires increases after
|
||||
every attempt by power of 2 in seconds till either the maximum attempts are exhausted or any of the
|
||||
C(persistent_command_timeout) or C(persistent_connect_timeout) timers are triggered.
|
||||
default: 3
|
||||
version_added: '2.9'
|
||||
type: integer
|
||||
env:
|
||||
- name: ANSIBLE_NETWORK_CLI_RETRIES
|
||||
ini:
|
||||
- section: persistent_connection
|
||||
key: network_cli_retries
|
||||
vars:
|
||||
- name: ansible_network_cli_retries
|
||||
"""
|
||||
|
||||
import getpass
|
||||
|
@ -259,6 +280,7 @@ import re
|
|||
import os
|
||||
import signal
|
||||
import socket
|
||||
import time
|
||||
import traceback
|
||||
from io import BytesIO
|
||||
|
||||
|
@ -386,13 +408,34 @@ class Connection(NetworkConnectionBase):
|
|||
self.paramiko_conn._set_log_channel(self._get_log_channel())
|
||||
self.paramiko_conn.set_options(direct={'look_for_keys': not bool(self._play_context.password and not self._play_context.private_key_file)})
|
||||
self.paramiko_conn.force_persistence = self.force_persistence
|
||||
|
||||
command_timeout = self.get_option('persistent_command_timeout')
|
||||
max_pause = min([self.get_option('persistent_connect_timeout'), command_timeout])
|
||||
retries = self.get_option('network_cli_retries')
|
||||
total_pause = 0
|
||||
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
ssh = self.paramiko_conn._connect()
|
||||
break
|
||||
except Exception as e:
|
||||
pause = 2 ** (attempt + 1)
|
||||
if attempt == retries or total_pause >= max_pause:
|
||||
raise AnsibleConnectionFailure(to_text(e, errors='surrogate_or_strict'))
|
||||
else:
|
||||
msg = (u"network_cli_retry: attempt: %d, caught exception(%s), "
|
||||
u"pausing for %d seconds" % (attempt + 1, to_text(e, errors='surrogate_or_strict'), pause))
|
||||
|
||||
self.queue_message('vv', msg)
|
||||
time.sleep(pause)
|
||||
total_pause += pause
|
||||
continue
|
||||
|
||||
self.queue_message('vvvv', 'ssh connection done, setting terminal')
|
||||
self._connected = True
|
||||
|
||||
self._ssh_shell = ssh.ssh.invoke_shell()
|
||||
self._ssh_shell.settimeout(self.get_option('persistent_command_timeout'))
|
||||
self._ssh_shell.settimeout(command_timeout)
|
||||
|
||||
self.queue_message('vvvv', 'loaded terminal plugin for network_os %s' % self._network_os)
|
||||
|
||||
|
|
Loading…
Reference in a new issue