Compare commits
6 Commits
v1.0.25
...
coder-moni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
66aa7bf80a | ||
|
|
1a101ecc10 | ||
|
|
9c049bd555 | ||
|
|
84bf63705f | ||
|
|
e4af2a9887 | ||
|
|
cbd06b1135 |
32
.github/scripts/check.sh
vendored
32
.github/scripts/check.sh
vendored
@@ -48,7 +48,7 @@ update_component_status() {
|
|||||||
|
|
||||||
# Function to create an incident
|
# Function to create an incident
|
||||||
create_incident() {
|
create_incident() {
|
||||||
local incident_name="Testing Instatus"
|
local incident_name="Degraded Service"
|
||||||
local message="The following modules are experiencing issues:\n"
|
local message="The following modules are experiencing issues:\n"
|
||||||
for i in "${!failures[@]}"; do
|
for i in "${!failures[@]}"; do
|
||||||
message+="$((i + 1)). ${failures[$i]}\n"
|
message+="$((i + 1)). ${failures[$i]}\n"
|
||||||
@@ -59,7 +59,7 @@ create_incident() {
|
|||||||
component_status="MAJOROUTAGE"
|
component_status="MAJOROUTAGE"
|
||||||
fi
|
fi
|
||||||
# see https://instatus.com/help/api/incidents
|
# see https://instatus.com/help/api/incidents
|
||||||
response=$(curl -s -X POST "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
|
incident_id=$(curl -s -X POST "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
|
||||||
-H "Authorization: Bearer $INSTATUS_API_KEY" \
|
-H "Authorization: Bearer $INSTATUS_API_KEY" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d "{
|
-d "{
|
||||||
@@ -74,10 +74,25 @@ create_incident() {
|
|||||||
\"status\": \"PARTIALOUTAGE\"
|
\"status\": \"PARTIALOUTAGE\"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}")
|
}" | jq -r '.id')
|
||||||
|
|
||||||
incident_id=$(echo "$response" | jq -r '.id')
|
echo "Created incident with ID: $incident_id"
|
||||||
echo "$incident_id"
|
}
|
||||||
|
|
||||||
|
# Function to check for existing unresolved incidents
|
||||||
|
check_existing_incident() {
|
||||||
|
# Fetch the latest incidents with status not equal to "RESOLVED"
|
||||||
|
local unresolved_incidents=$(curl -s -X GET "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
|
||||||
|
-H "Authorization: Bearer $INSTATUS_API_KEY" \
|
||||||
|
-H "Content-Type: application/json" | jq -r '.incidents[] | select(.status != "RESOLVED") | .id')
|
||||||
|
|
||||||
|
if [[ -n "$unresolved_incidents" ]]; then
|
||||||
|
echo "Unresolved incidents found: $unresolved_incidents"
|
||||||
|
return 0 # Indicate that there are unresolved incidents
|
||||||
|
else
|
||||||
|
echo "No unresolved incidents found."
|
||||||
|
return 1 # Indicate that no unresolved incidents exist
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
force_redeploy_registry () {
|
force_redeploy_registry () {
|
||||||
@@ -174,9 +189,10 @@ else
|
|||||||
update_component_status "PARTIALOUTAGE"
|
update_component_status "PARTIALOUTAGE"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Create a new incident
|
# Check if there is an existing incident before creating a new one
|
||||||
incident_id=$(create_incident)
|
if ! check_existing_incident; then
|
||||||
echo "Created incident with ID: $incident_id"
|
create_incident
|
||||||
|
fi
|
||||||
|
|
||||||
# If a module is down, force a reployment to try getting things back online
|
# If a module is down, force a reployment to try getting things back online
|
||||||
# ASAP
|
# ASAP
|
||||||
|
|||||||
72
monitoring/README.md
Normal file
72
monitoring/README.md
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
---
|
||||||
|
display_name: Monitoring
|
||||||
|
description: Monitoring of workspace resources
|
||||||
|
maintainer_github: coder
|
||||||
|
verified: true
|
||||||
|
tags: [monitoring]
|
||||||
|
---
|
||||||
|
|
||||||
|
# Monitoring
|
||||||
|
|
||||||
|
This module adds monitoring of workspace resources.
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "monitoring" {
|
||||||
|
source = "registry.coder.com/modules/monitoring/coder"
|
||||||
|
version = "1.0.0"
|
||||||
|
agent_id = coder_agent.dev.id
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "monitoring" {
|
||||||
|
source = "registry.coder.com/modules/monitoring/coder"
|
||||||
|
version = "1.0.0"
|
||||||
|
agent_id = coder_agent.dev.id
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Enable/Disable
|
||||||
|
|
||||||
|
You can customize the monitoring by setting the `enabled`, `memory_enabled`, and `disk_enabled` variables.
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "monitoring" {
|
||||||
|
source = "registry.coder.com/modules/monitoring/coder"
|
||||||
|
version = "1.0.0"
|
||||||
|
agent_id = coder_agent.dev.id
|
||||||
|
enabled = false
|
||||||
|
memory_enabled = true
|
||||||
|
disk_enabled = false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Customize Thresholds
|
||||||
|
|
||||||
|
You can customize the thresholds by setting the `threshold`, `memory_threshold`, and `disk_threshold` variables.
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "monitoring" {
|
||||||
|
source = "registry.coder.com/modules/monitoring/coder"
|
||||||
|
version = "1.0.0"
|
||||||
|
agent_id = coder_agent.dev.id
|
||||||
|
threshold = 90
|
||||||
|
memory_threshold = 95
|
||||||
|
disk_threshold = 90
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Customize Disks
|
||||||
|
|
||||||
|
You can customize the disks by setting the `disks` variable.
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "monitoring" {
|
||||||
|
source = "registry.coder.com/modules/monitoring/coder"
|
||||||
|
version = "1.0.0"
|
||||||
|
agent_id = coder_agent.dev.id
|
||||||
|
disks = ["/"]
|
||||||
|
}
|
||||||
|
```
|
||||||
92
monitoring/main.tf
Normal file
92
monitoring/main.tf
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
terraform {
|
||||||
|
required_version = ">= 1.0.25"
|
||||||
|
|
||||||
|
required_providers {
|
||||||
|
coder = {
|
||||||
|
source = "coder/coder"
|
||||||
|
version = ">= 2.0.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "threshold" {
|
||||||
|
type = number
|
||||||
|
description = "The threshold for the monitoring, used for all resources unless overridden by *_threshold - expressed as a percentage."
|
||||||
|
default = 90
|
||||||
|
validation {
|
||||||
|
condition = var.threshold >= 0 && var.threshold <= 100
|
||||||
|
error_message = "The threshold must be between 0 and 100."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "memory_threshold" {
|
||||||
|
type = number
|
||||||
|
description = "The threshold for the memory monitoring - expressed as a percentage."
|
||||||
|
default = 90
|
||||||
|
validation {
|
||||||
|
condition = var.memory_threshold >= 0 && var.memory_threshold <= 100
|
||||||
|
error_message = "The memory_threshold must be between 0 and 100."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_threshold" {
|
||||||
|
type = number
|
||||||
|
description = "The threshold for the disk monitoring - expressed as a percentage."
|
||||||
|
default = 90
|
||||||
|
validation {
|
||||||
|
condition = var.disk_threshold >= 0 && var.disk_threshold <= 100
|
||||||
|
error_message = "The disk_threshold must be between 0 and 100."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disks" {
|
||||||
|
type = list(string)
|
||||||
|
description = "The disks to monitor. e.g. ['/', '/home']"
|
||||||
|
default = ["/"]
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "enabled" {
|
||||||
|
type = bool
|
||||||
|
description = "Whether the monitoring is enabled."
|
||||||
|
default = true
|
||||||
|
validation {
|
||||||
|
condition = var.enabled == true || var.enabled == false
|
||||||
|
error_message = "The enabled must be true or false."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "memory_enabled" {
|
||||||
|
type = bool
|
||||||
|
description = "Whether the memory monitoring is enabled."
|
||||||
|
default = true
|
||||||
|
validation {
|
||||||
|
condition = var.memory_enabled == true || var.memory_enabled == false
|
||||||
|
error_message = "The memory_enabled must be true or false."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_enabled" {
|
||||||
|
type = bool
|
||||||
|
description = "Whether the disk monitoring is enabled."
|
||||||
|
default = true
|
||||||
|
validation {
|
||||||
|
condition = var.disk_enabled == true || var.disk_enabled == false
|
||||||
|
error_message = "The disk_enabled must be true or false."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "agent_id" {
|
||||||
|
type = string
|
||||||
|
description = "The ID of the agent to monitor."
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_monitoring" "monitoring" {
|
||||||
|
threshold = var.threshold
|
||||||
|
memory_threshold = var.memory_threshold
|
||||||
|
disk_threshold = var.disk_threshold
|
||||||
|
disks = var.disks
|
||||||
|
enabled = var.enabled
|
||||||
|
memory_enabled = var.memory_enabled
|
||||||
|
disk_enabled = var.disk_enabled
|
||||||
|
agent_id = var.agent_id
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user