@ -2,11 +2,17 @@
set -o pipefail
set -u
VERBOSE = " ${ VERBOSE :- 0 } "
if [ [ " ${ VERBOSE } " -ne "0" ] ] ; then
set -x
fi
# List of required environment variables
required_vars = (
"INSTATUS_API_KEY"
"INSTATUS_PAGE_ID"
"INSTATUS_COMPONENT_ID"
"VERCEL_API_KEY"
)
# Check if each required variable is set
@ -24,7 +30,7 @@ declare -a modules=()
declare -a failures = ( )
# Collect all module directories containing a main.tf file
for path in $( find . -not -path '*/.*' -type f -name main.tf -maxdepth 2 | cut -d '/' -f 2 | sort -u) ; do
for path in $( find . -maxdepth 2 - not -path '*/.*' -type f -name main.tf | cut -d '/' -f 2 | sort -u) ; do
modules += ( " ${ path } " )
done
@ -45,7 +51,7 @@ create_incident() {
local incident_name = "Testing Instatus"
local message = "The following modules are experiencing issues:\n"
for i in " ${ !failures[@] } " ; do
message += " $(( $ i + 1 )) . ${ failures [ $i ] } \n "
message += " $(( i + 1 )) . ${ failures [ $i ] } \n "
done
component_status = "PARTIALOUTAGE"
@ -74,6 +80,70 @@ create_incident() {
echo " $incident_id "
}
force_redeploy_registry ( ) {
# These are not secret values; safe to just expose directly in script
local VERCEL_TEAM_SLUG = "codercom"
local VERCEL_TEAM_ID = "team_tGkWfhEGGelkkqUUm9nXq17r"
local VERCEL_APP = "registry"
local latest_res
latest_res = $( curl " https://api.vercel.com/v6/deployments?app= $VERCEL_APP &limit=1&slug= $VERCEL_TEAM_SLUG &teamId= $VERCEL_TEAM_ID &target=production&state=BUILDING,INITIALIZING,QUEUED,READY " \
--fail \
--silent \
--header " Authorization: Bearer $VERCEL_API_KEY " \
--header "Content-Type: application/json"
)
# If we have zero deployments, something is VERY wrong. Make the whole
# script exit with a non-zero status code
local latest_id
latest_id = $( echo " ${ latest_res } " | jq -r '.deployments[0].uid' )
if [ [ " ${ latest_id } " = "null" ] ] ; then
echo "Unable to pull any previous deployments for redeployment"
echo "Please redeploy the latest deployment manually in Vercel."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
local latest_date_ts_seconds
latest_date_ts_seconds = $( echo " ${ latest_res } " | jq -r '.deployments[0].createdAt/1000|floor' )
local current_date_ts_seconds
current_date_ts_seconds = " $( date +%s) "
local max_redeploy_interval_seconds = 7200 # 2 hours
if ( ( current_date_ts_seconds - latest_date_ts_seconds < max_redeploy_interval_seconds ) ) ; then
echo "The registry was deployed less than 2 hours ago."
echo "Not automatically re-deploying the regitstry."
echo "A human reading this message should decide if a redeployment is necessary."
echo "Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
local latest_deployment_state
latest_deployment_state = " $( echo " ${ latest_res } " | jq -r '.deployments[0].state' ) "
if [ [ " ${ latest_deployment_state } " != "READY" ] ] ; then
echo "Last deployment was not in READY state. Skipping redeployment."
echo "A human reading this message should decide if a redeployment is necessary."
echo "Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
echo "============================================================="
echo " !!! Redeploying registry with deployment ID: ${ latest_id } !!! "
echo "============================================================="
if ! curl -X POST " https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug= $VERCEL_TEAM_SLUG &teamId= $VERCEL_TEAM_ID " \
--fail \
--header " Authorization: Bearer $VERCEL_API_KEY " \
--header "Content-Type: application/json" \
--data-raw " { \"deploymentId\": \" ${ latest_id } \", \"name\": \" ${ VERCEL_APP } \", \"target\": \"production\" } " ; then
echo "DEPLOYMENT FAILED! Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
}
# Check each module's accessibility
for module in " ${ modules [@] } " ; do
# Trim leading/trailing whitespace from module name
@ -81,7 +151,6 @@ for module in "${modules[@]}"; do
url = " ${ REGISTRY_BASE_URL } /modules/ ${ module } "
printf "=== Checking module %s at %s\n" " ${ module } " " ${ url } "
status_code = $( curl --output /dev/null --head --silent --fail --location " ${ url } " --retry 3 --write-out "%{http_code}" )
# shellcheck disable=SC2181
if ( ( status_code != 200 ) ) ; then
printf "==> FAIL(%s)\n" " ${ status_code } "
status = 1
@ -94,11 +163,11 @@ done
# Determine overall status and update Instatus component
if ( ( status = = 0 ) ) ; then
echo "All modules are operational."
# set to
# set to
update_component_status "OPERATIONAL"
else
echo " The following modules have issues: ${ failures [*] } "
# check if all modules are down
# check if all modules are down
if ( ( ${# failures [@] } = = ${# modules [@] } ) ) ; then
update_component_status "MAJOROUTAGE"
else
@ -108,6 +177,10 @@ else
# Create a new incident
incident_id = $( create_incident)
echo " Created incident with ID: $incident_id "
# If a module is down, force a reployment to try getting things back online
# ASAP
force_redeploy_registry
fi
exit " ${ status } "