ansible/test/integration/targets/aws_ses_rule_set/tasks/obtain-lock.yaml

127 lines
5 KiB
YAML
Raw Normal View History

# ============================================================
# This file attempts to obtain a global lock (for a given
# region / account combination.
#
# This makes one attempt to get the lock and will set the
# won_lock variable to True or False to indicate whether
# or not we got the lock.
#
# It's expected that this will be executed in a retry loop
# so that if we don't get the lock we delay then try again.
#
# This should only be used in a block with cleanup-lock.yaml
# included in the always clause to ensure the lock is released.
#
# There are several variables that control the locking behaviour:
# * lock_timeout_seconds
# How old a lock must be before it's assumed to be an expired
# lock that was not cleaned up by the owner. Any locks older
# than this will not prevent a lock being obtained and will
# be deleted when a new process obtains the lock.
# * lock_log_group_prefix
# The log_group prefix that represents the lock being obtained.
# This must be the same across all processes trying to obtain
# the lock.
# * lock_process_id
# A unique identifier of this process. Each process that might
# attempt to lock the process must have a different identifier.
# This defaults to the resource_prefix which is generally
# appropriate.
# * max_obtain_lock_attempts
# How many attempts to make to get the lock before giving up
# NB: This is actually done in main.yaml
# * obtain_lock_delay_seconds:
# How long to delay after failing to get the lock before
# trying again.
# NB: This is actually done in obtain-lock-wrapper.yaml
#
# The locking here is based around creating cloudwatch log groups.
# This resource was chosen because:
# A) it's free
# B) we have a built in grouping concept because of the hierarchy
# that allows us to easily group attempts for the same lock
# C) the creation time is tracked and returned which gives us
# a mechanism for deterministically picking a winner
#
# Each lock is represented by a log group prefix. Each attempt
# to obtain the lock is a log group of the lock_process_id below
# that prefix.
#
# The winning lock is the one with the earliest creation time.
#
# To prevent a hanging lock from permanently hanging the build
# lock attempts older than the lock timeout are ignored and
# cleaned up by the next process to win the lock.
# ============================================================
- name: set up aws connection info
set_fact:
aws_connection_info: &aws_connection_info
aws_access_key: "{{ aws_access_key }}"
aws_secret_key: "{{ aws_secret_key }}"
security_token: "{{ security_token }}"
region: "{{ aws_region }}"
no_log: yes
- name: Set lock_attempt_log_group_name
set_fact:
lock_attempt_log_group_name: "{{ lock_log_group_prefix }}/{{ lock_process_id|default(resource_prefix) }}"
# Note the overwrite below to ensure that the creation time
# is upated. This is important as we calculate expiry relative
# the attempt creation.
#
# Because of this it's imporatnt that we delete the attempt
# if we don't get the lock. Otherwise we can get a deadlock
# where the stale atttempt from one process wins, but then
# because that process updates the creation date it doesn't
# consider its self to havewone.
- name: Create Lock Attempt Log Group
cloudwatchlogs_log_group:
log_group_name: "{{ lock_attempt_log_group_name }}"
state: present
overwrite: True
<<: *aws_connection_info
register: lock_attempt_log_group_result
- name: Get Lock Attempt Lock Groups
cloudwatchlogs_log_group_facts:
log_group_name: "{{ lock_log_group_prefix }}/"
<<: *aws_connection_info
register: lock_attempt_log_groups
- name: Calculate Expired Lock Attempt Timestamp
set_fact:
expired_lock_timestamp: "{{ lock_attempt_log_group_result.creation_time - (lock_timeout_seconds * 1000) }}"
- name: Get Expired and Active Lock Attempts
set_fact:
expired_lock_attempts: "{{ lock_attempt_log_groups.log_groups|selectattr('creation_time', 'lt', expired_lock_timestamp|int)|list }}"
active_lock_attempts: "{{ lock_attempt_log_groups.log_groups|selectattr('creation_time', 'ge', expired_lock_timestamp|int)|list }}"
- name: Pick Winning Lock Attempt
set_fact:
winning_lock_attempt: "{{ active_lock_attempts|sort(attribute='creation_time')|first }}"
- name: Determine if Won Lock
set_fact:
won_lock: "{{ winning_lock_attempt.log_group_name == lock_attempt_log_group_name }}"
# Remove the lock attempt if we didn't get the lock. This prevents
# our stale lock attempt blocking another process from getting the lock.
# See more detailed comment above Create Lock Attempt Log Group
- name: Remove Failed Lock Attempt Log Group
cloudwatchlogs_log_group:
log_group_name: "{{ lock_attempt_log_group_name }}"
state: absent
<<: *aws_connection_info
when: "not won_lock|bool"
- name: Delete Expired Lock Attempts
cloudwatchlogs_log_group:
log_group_name: "{{ item.log_group_name }}"
state: absent
<<: *aws_connection_info
when: "won_lock|bool"
loop: "{{ expired_lock_attempts }}"