Alerts

/etc/alerting.rules > alert.rules
goroutines_backend (1 active)
alert: goroutines_backend
expr: go_goroutines{job="backend"}
  >= 1000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
Labels State Active Since Value
alertname="goroutines_backend" cluster="stage" environment="staging" instance="12.0.1.52:9001" ip="12.0.1.52" job="backend" region="eu-central-1" firing 2025-11-21 11:44:41.991701887 +0000 UTC 92716
client_service_no_events (0 active)
disk (0 active)
alert: disk
expr: 100
  * container_fs_usage_bytes{device="/dev/xvda1"} / container_fs_limit_bytes{device="/dev/xvda1"}
  > 85
for: 1m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.device }} is {{ printf "%d"
    $value }}% full'
goroutines_alarm (0 active)
alert: goroutines_alarm
expr: go_goroutines{job="alarm-service"}
  >= 1000
for: 5m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_cadvisor (0 active)
alert: goroutines_cadvisor
expr: go_goroutines{job="cadvisor"}
  >= 1000
for: 2m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_client_svc (0 active)
alert: goroutines_client_svc
expr: go_goroutines{environment!="demo",job="client-service"}
  >= 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_client_svc_demo (0 active)
alert: goroutines_client_svc_demo
expr: go_goroutines{environment="demo",job="client-service"}
  >= 35000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_dataproc (0 active)
alert: goroutines_dataproc
expr: go_goroutines{job="data-processor"}
  >= 1000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_gateway_svc (0 active)
alert: goroutines_gateway_svc
expr: go_goroutines{job="gateway-service"}
  >= 1000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_monitor (0 active)
alert: goroutines_monitor
expr: go_goroutines{job="monitor"}
  >= 1000
for: 5m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_prometheus (0 active)
alert: goroutines_prometheus
expr: go_goroutines{job="prometheus"}
  >= 1000
for: 2m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
goroutines_router (0 active)
alert: goroutines_router
expr: go_goroutines{job="router"}
  >= 3500
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.job }} goroutines high ({{ $value
    }})'
queue_size_interval_facts_ready (0 active)
alert: queue_size_interval_facts_ready
expr: redis_key_size{key="rmq::queue::[interval-facts]::ready"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_interval_facts_rejected (0 active)
alert: queue_size_interval_facts_rejected
expr: redis_key_size{key="rmq::queue::[interval-facts]::rejected"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_occ_sensor_ready (0 active)
alert: queue_size_occ_sensor_ready
expr: redis_key_size{key="rmq::queue::[occupancy-sensor-data]::ready"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_occ_sensor_rejected (0 active)
alert: queue_size_occ_sensor_rejected
expr: redis_key_size{key="rmq::queue::[occupancy-sensor-data]::rejected"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_sensor_data_ready (0 active)
alert: queue_size_sensor_data_ready
expr: redis_key_size{key="rmq::queue::[sensor-data]::ready"}
  > 10000
for: 45m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_sensor_data_rejected (0 active)
alert: queue_size_sensor_data_rejected
expr: redis_key_size{key="rmq::queue::[sensor-data]::rejected"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_usage_facts_ready (0 active)
alert: queue_size_usage_facts_ready
expr: redis_key_size{key="rmq::queue::[usage-facts]::ready"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
queue_size_usage_facts_rejected (0 active)
alert: queue_size_usage_facts_rejected
expr: redis_key_size{key="rmq::queue::[usage-facts]::rejected"}
  > 10000
for: 30m
annotations:
  summary: '{{ $labels.environment }} {{ $labels.key }} queue size at {{ $value }}'
service_last_seen (0 active)
alert: service_last_seen
expr: time()
  - service_last_seen{service!="demo-asset-tracking-service"} > 300
for: 2m
annotations:
  summary: '{{ $labels.environment }} service {{ $labels.service }} heartbeat'