chore: add updates to force redeployment on Vercel (#348)

## Changes made
- Updated `check.sh` script to add support for automatic re-deploying in
the event that the the registry has a partial/full outage.

---------

Co-authored-by: Cian Johnston <cian@coder.com>
pull/345/head^2
Michael Smith 5 months ago committed by GitHub
parent 5101c27c83
commit 6597a2d547
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -2,11 +2,17 @@
set -o pipefail set -o pipefail
set -u set -u
VERBOSE="${VERBOSE:-0}"
if [[ "${VERBOSE}" -ne "0" ]]; then
set -x
fi
# List of required environment variables # List of required environment variables
required_vars=( required_vars=(
"INSTATUS_API_KEY" "INSTATUS_API_KEY"
"INSTATUS_PAGE_ID" "INSTATUS_PAGE_ID"
"INSTATUS_COMPONENT_ID" "INSTATUS_COMPONENT_ID"
"VERCEL_API_KEY"
) )
# Check if each required variable is set # Check if each required variable is set
@ -24,7 +30,7 @@ declare -a modules=()
declare -a failures=() declare -a failures=()
# Collect all module directories containing a main.tf file # Collect all module directories containing a main.tf file
for path in $(find . -not -path '*/.*' -type f -name main.tf -maxdepth 2 | cut -d '/' -f 2 | sort -u); do for path in $(find . -maxdepth 2 -not -path '*/.*' -type f -name main.tf | cut -d '/' -f 2 | sort -u); do
modules+=("${path}") modules+=("${path}")
done done
@ -45,7 +51,7 @@ create_incident() {
local incident_name="Testing Instatus" local incident_name="Testing Instatus"
local message="The following modules are experiencing issues:\n" local message="The following modules are experiencing issues:\n"
for i in "${!failures[@]}"; do for i in "${!failures[@]}"; do
message+="$(($i + 1)). ${failures[$i]}\n" message+="$((i + 1)). ${failures[$i]}\n"
done done
component_status="PARTIALOUTAGE" component_status="PARTIALOUTAGE"
@ -74,6 +80,70 @@ create_incident() {
echo "$incident_id" echo "$incident_id"
} }
force_redeploy_registry () {
# These are not secret values; safe to just expose directly in script
local VERCEL_TEAM_SLUG="codercom"
local VERCEL_TEAM_ID="team_tGkWfhEGGelkkqUUm9nXq17r"
local VERCEL_APP="registry"
local latest_res
latest_res=$(curl "https://api.vercel.com/v6/deployments?app=$VERCEL_APP&limit=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID&target=production&state=BUILDING,INITIALIZING,QUEUED,READY" \
--fail \
--silent \
--header "Authorization: Bearer $VERCEL_API_KEY" \
--header "Content-Type: application/json"
)
# If we have zero deployments, something is VERY wrong. Make the whole
# script exit with a non-zero status code
local latest_id
latest_id=$(echo "${latest_res}" | jq -r '.deployments[0].uid')
if [[ "${latest_id}" = "null" ]]; then
echo "Unable to pull any previous deployments for redeployment"
echo "Please redeploy the latest deployment manually in Vercel."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
local latest_date_ts_seconds
latest_date_ts_seconds=$(echo "${latest_res}" | jq -r '.deployments[0].createdAt/1000|floor')
local current_date_ts_seconds
current_date_ts_seconds="$(date +%s)"
local max_redeploy_interval_seconds=7200 # 2 hours
if (( current_date_ts_seconds - latest_date_ts_seconds < max_redeploy_interval_seconds )); then
echo "The registry was deployed less than 2 hours ago."
echo "Not automatically re-deploying the regitstry."
echo "A human reading this message should decide if a redeployment is necessary."
echo "Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
local latest_deployment_state
latest_deployment_state="$(echo "${latest_res}" | jq -r '.deployments[0].state')"
if [[ "${latest_deployment_state}" != "READY" ]]; then
echo "Last deployment was not in READY state. Skipping redeployment."
echo "A human reading this message should decide if a redeployment is necessary."
echo "Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
echo "============================================================="
echo "!!! Redeploying registry with deployment ID: ${latest_id} !!!"
echo "============================================================="
if ! curl -X POST "https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID" \
--fail \
--header "Authorization: Bearer $VERCEL_API_KEY" \
--header "Content-Type: application/json" \
--data-raw "{ \"deploymentId\": \"${latest_id}\", \"name\": \"${VERCEL_APP}\", \"target\": \"production\" }"; then
echo "DEPLOYMENT FAILED! Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
}
# Check each module's accessibility # Check each module's accessibility
for module in "${modules[@]}"; do for module in "${modules[@]}"; do
# Trim leading/trailing whitespace from module name # Trim leading/trailing whitespace from module name
@ -81,7 +151,6 @@ for module in "${modules[@]}"; do
url="${REGISTRY_BASE_URL}/modules/${module}" url="${REGISTRY_BASE_URL}/modules/${module}"
printf "=== Checking module %s at %s\n" "${module}" "${url}" printf "=== Checking module %s at %s\n" "${module}" "${url}"
status_code=$(curl --output /dev/null --head --silent --fail --location "${url}" --retry 3 --write-out "%{http_code}") status_code=$(curl --output /dev/null --head --silent --fail --location "${url}" --retry 3 --write-out "%{http_code}")
# shellcheck disable=SC2181
if (( status_code != 200 )); then if (( status_code != 200 )); then
printf "==> FAIL(%s)\n" "${status_code}" printf "==> FAIL(%s)\n" "${status_code}"
status=1 status=1
@ -94,11 +163,11 @@ done
# Determine overall status and update Instatus component # Determine overall status and update Instatus component
if (( status == 0 )); then if (( status == 0 )); then
echo "All modules are operational." echo "All modules are operational."
# set to # set to
update_component_status "OPERATIONAL" update_component_status "OPERATIONAL"
else else
echo "The following modules have issues: ${failures[*]}" echo "The following modules have issues: ${failures[*]}"
# check if all modules are down # check if all modules are down
if (( ${#failures[@]} == ${#modules[@]} )); then if (( ${#failures[@]} == ${#modules[@]} )); then
update_component_status "MAJOROUTAGE" update_component_status "MAJOROUTAGE"
else else
@ -108,6 +177,10 @@ else
# Create a new incident # Create a new incident
incident_id=$(create_incident) incident_id=$(create_incident)
echo "Created incident with ID: $incident_id" echo "Created incident with ID: $incident_id"
# If a module is down, force a reployment to try getting things back online
# ASAP
force_redeploy_registry
fi fi
exit "${status}" exit "${status}"

@ -2,7 +2,7 @@ name: Health
# Check modules health on registry.coder.com # Check modules health on registry.coder.com
on: on:
schedule: schedule:
- cron: "*/13 * * * *" # Runs every 13th minute - cron: "0,15,30,45 * * * *" # Runs every 15 minutes
workflow_dispatch: # Allows manual triggering of the workflow if needed workflow_dispatch: # Allows manual triggering of the workflow if needed
jobs: jobs:
@ -20,3 +20,4 @@ jobs:
INSTATUS_API_KEY: ${{ secrets.INSTATUS_API_KEY }} INSTATUS_API_KEY: ${{ secrets.INSTATUS_API_KEY }}
INSTATUS_PAGE_ID: ${{ secrets.INSTATUS_PAGE_ID }} INSTATUS_PAGE_ID: ${{ secrets.INSTATUS_PAGE_ID }}
INSTATUS_COMPONENT_ID: ${{ secrets.INSTATUS_COMPONENT_ID }} INSTATUS_COMPONENT_ID: ${{ secrets.INSTATUS_COMPONENT_ID }}
VERCEL_API_KEY: ${{ secrets.VERCEL_API_KEY }}

Loading…
Cancel
Save