Compare commits
6 Commits
v1.0.25
...
coder-moni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
66aa7bf80a | ||
|
|
1a101ecc10 | ||
|
|
9c049bd555 | ||
|
|
84bf63705f | ||
|
|
e4af2a9887 | ||
|
|
cbd06b1135 |
32
.github/scripts/check.sh
vendored
32
.github/scripts/check.sh
vendored
@@ -48,7 +48,7 @@ update_component_status() {
|
||||
|
||||
# Function to create an incident
|
||||
create_incident() {
|
||||
local incident_name="Testing Instatus"
|
||||
local incident_name="Degraded Service"
|
||||
local message="The following modules are experiencing issues:\n"
|
||||
for i in "${!failures[@]}"; do
|
||||
message+="$((i + 1)). ${failures[$i]}\n"
|
||||
@@ -59,7 +59,7 @@ create_incident() {
|
||||
component_status="MAJOROUTAGE"
|
||||
fi
|
||||
# see https://instatus.com/help/api/incidents
|
||||
response=$(curl -s -X POST "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
|
||||
incident_id=$(curl -s -X POST "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
|
||||
-H "Authorization: Bearer $INSTATUS_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
@@ -74,10 +74,25 @@ create_incident() {
|
||||
\"status\": \"PARTIALOUTAGE\"
|
||||
}
|
||||
]
|
||||
}")
|
||||
}" | jq -r '.id')
|
||||
|
||||
incident_id=$(echo "$response" | jq -r '.id')
|
||||
echo "$incident_id"
|
||||
echo "Created incident with ID: $incident_id"
|
||||
}
|
||||
|
||||
# Function to check for existing unresolved incidents
|
||||
check_existing_incident() {
|
||||
# Fetch the latest incidents with status not equal to "RESOLVED"
|
||||
local unresolved_incidents=$(curl -s -X GET "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
|
||||
-H "Authorization: Bearer $INSTATUS_API_KEY" \
|
||||
-H "Content-Type: application/json" | jq -r '.incidents[] | select(.status != "RESOLVED") | .id')
|
||||
|
||||
if [[ -n "$unresolved_incidents" ]]; then
|
||||
echo "Unresolved incidents found: $unresolved_incidents"
|
||||
return 0 # Indicate that there are unresolved incidents
|
||||
else
|
||||
echo "No unresolved incidents found."
|
||||
return 1 # Indicate that no unresolved incidents exist
|
||||
fi
|
||||
}
|
||||
|
||||
force_redeploy_registry () {
|
||||
@@ -174,9 +189,10 @@ else
|
||||
update_component_status "PARTIALOUTAGE"
|
||||
fi
|
||||
|
||||
# Create a new incident
|
||||
incident_id=$(create_incident)
|
||||
echo "Created incident with ID: $incident_id"
|
||||
# Check if there is an existing incident before creating a new one
|
||||
if ! check_existing_incident; then
|
||||
create_incident
|
||||
fi
|
||||
|
||||
# If a module is down, force a reployment to try getting things back online
|
||||
# ASAP
|
||||
|
||||
72
monitoring/README.md
Normal file
72
monitoring/README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
---
|
||||
display_name: Monitoring
|
||||
description: Monitoring of workspace resources
|
||||
maintainer_github: coder
|
||||
verified: true
|
||||
tags: [monitoring]
|
||||
---
|
||||
|
||||
# Monitoring
|
||||
|
||||
This module adds monitoring of workspace resources.
|
||||
|
||||
```tf
|
||||
module "monitoring" {
|
||||
source = "registry.coder.com/modules/monitoring/coder"
|
||||
version = "1.0.0"
|
||||
agent_id = coder_agent.dev.id
|
||||
}
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
```tf
|
||||
module "monitoring" {
|
||||
source = "registry.coder.com/modules/monitoring/coder"
|
||||
version = "1.0.0"
|
||||
agent_id = coder_agent.dev.id
|
||||
}
|
||||
```
|
||||
|
||||
### Enable/Disable
|
||||
|
||||
You can customize the monitoring by setting the `enabled`, `memory_enabled`, and `disk_enabled` variables.
|
||||
|
||||
```tf
|
||||
module "monitoring" {
|
||||
source = "registry.coder.com/modules/monitoring/coder"
|
||||
version = "1.0.0"
|
||||
agent_id = coder_agent.dev.id
|
||||
enabled = false
|
||||
memory_enabled = true
|
||||
disk_enabled = false
|
||||
}
|
||||
```
|
||||
|
||||
### Customize Thresholds
|
||||
|
||||
You can customize the thresholds by setting the `threshold`, `memory_threshold`, and `disk_threshold` variables.
|
||||
|
||||
```tf
|
||||
module "monitoring" {
|
||||
source = "registry.coder.com/modules/monitoring/coder"
|
||||
version = "1.0.0"
|
||||
agent_id = coder_agent.dev.id
|
||||
threshold = 90
|
||||
memory_threshold = 95
|
||||
disk_threshold = 90
|
||||
}
|
||||
```
|
||||
|
||||
### Customize Disks
|
||||
|
||||
You can customize the disks by setting the `disks` variable.
|
||||
|
||||
```tf
|
||||
module "monitoring" {
|
||||
source = "registry.coder.com/modules/monitoring/coder"
|
||||
version = "1.0.0"
|
||||
agent_id = coder_agent.dev.id
|
||||
disks = ["/"]
|
||||
}
|
||||
```
|
||||
92
monitoring/main.tf
Normal file
92
monitoring/main.tf
Normal file
@@ -0,0 +1,92 @@
|
||||
terraform {
|
||||
required_version = ">= 1.0.25"
|
||||
|
||||
required_providers {
|
||||
coder = {
|
||||
source = "coder/coder"
|
||||
version = ">= 2.0.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
variable "threshold" {
|
||||
type = number
|
||||
description = "The threshold for the monitoring, used for all resources unless overridden by *_threshold - expressed as a percentage."
|
||||
default = 90
|
||||
validation {
|
||||
condition = var.threshold >= 0 && var.threshold <= 100
|
||||
error_message = "The threshold must be between 0 and 100."
|
||||
}
|
||||
}
|
||||
|
||||
variable "memory_threshold" {
|
||||
type = number
|
||||
description = "The threshold for the memory monitoring - expressed as a percentage."
|
||||
default = 90
|
||||
validation {
|
||||
condition = var.memory_threshold >= 0 && var.memory_threshold <= 100
|
||||
error_message = "The memory_threshold must be between 0 and 100."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disk_threshold" {
|
||||
type = number
|
||||
description = "The threshold for the disk monitoring - expressed as a percentage."
|
||||
default = 90
|
||||
validation {
|
||||
condition = var.disk_threshold >= 0 && var.disk_threshold <= 100
|
||||
error_message = "The disk_threshold must be between 0 and 100."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disks" {
|
||||
type = list(string)
|
||||
description = "The disks to monitor. e.g. ['/', '/home']"
|
||||
default = ["/"]
|
||||
}
|
||||
|
||||
variable "enabled" {
|
||||
type = bool
|
||||
description = "Whether the monitoring is enabled."
|
||||
default = true
|
||||
validation {
|
||||
condition = var.enabled == true || var.enabled == false
|
||||
error_message = "The enabled must be true or false."
|
||||
}
|
||||
}
|
||||
|
||||
variable "memory_enabled" {
|
||||
type = bool
|
||||
description = "Whether the memory monitoring is enabled."
|
||||
default = true
|
||||
validation {
|
||||
condition = var.memory_enabled == true || var.memory_enabled == false
|
||||
error_message = "The memory_enabled must be true or false."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disk_enabled" {
|
||||
type = bool
|
||||
description = "Whether the disk monitoring is enabled."
|
||||
default = true
|
||||
validation {
|
||||
condition = var.disk_enabled == true || var.disk_enabled == false
|
||||
error_message = "The disk_enabled must be true or false."
|
||||
}
|
||||
}
|
||||
|
||||
variable "agent_id" {
|
||||
type = string
|
||||
description = "The ID of the agent to monitor."
|
||||
}
|
||||
|
||||
data "coder_monitoring" "monitoring" {
|
||||
threshold = var.threshold
|
||||
memory_threshold = var.memory_threshold
|
||||
disk_threshold = var.disk_threshold
|
||||
disks = var.disks
|
||||
enabled = var.enabled
|
||||
memory_enabled = var.memory_enabled
|
||||
disk_enabled = var.disk_enabled
|
||||
agent_id = var.agent_id
|
||||
}
|
||||
Reference in New Issue
Block a user