Skip to content

Commit

Permalink
Add alerting rule for Gitlab webservice error rate
Browse files Browse the repository at this point in the history
  • Loading branch information
kotfic committed Sep 1, 2022
1 parent a0769c7 commit ca2a193
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 0 deletions.
9 changes: 9 additions & 0 deletions k8s/prometheus/alertmanagerconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,12 @@ spec:
{{.Annotations.description}}
{{ end }}
inhibitRules:
- targetMatch:
- name: severity
value: warning
sourceMatch:
- name: severity
value: critical
equal:
- group
28 changes: 28 additions & 0 deletions k8s/prometheus/custom/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,34 @@ spec:
expr: node_namespace_pod_name:pipeline_stuck_pods_info == 1
for: 5m
labels:
group: gitlab_webservice_error_rate
severity: warning
namespace: monitoring
source_namespace: "{{ $labels.namespace }}"

- alert: GitLabWebServiceErrorRate5Percent
annotations:
description: 'GitLab Web Service has been seeing a 5% error rate for the last 5 minutes'
runbook_url: 'TODO'
summary: 'The GitLab web service has seen 4XX/5XX responses for at least 5% of requests over the last 5 minutes.'
expr: ingress_error_rate:gitlab_webservice_default <= 95.0
for: 5m
labels:
group: gitlab_webservice_error_rate
severity: warning
namespace: monitoring
source_namespace: "{{ $labels.namespace }}"


- alert: GitLabWebServiceErrorRate10Percent
annotations:
description: 'GitLab Web Service has been seeing a 10% error rate for the last 2 minutes'
runbook_url: 'TODO'
summary: 'The GitLab web service has seen 4XX/5XX responses for at least 10% of requests over the last 2 minutes.'
expr: ingress_error_rate:gitlab_webservice_default <= 90.0
for: 2m
labels:
group: gitlab_webservice_error_rate
severity: critical
namespace: monitoring
source_namespace: "{{ $labels.namespace }}"
5 changes: 5 additions & 0 deletions k8s/prometheus/custom/rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@ spec:
irate(container_network_transmit_bytes_total{namespace="pipeline"}[3m]) == 0
)
record: node_namespace_pod_name:pipeline_stuck_pods_info
- expr: |-
sum(irate(nginx_ingress_controller_requests{exported_service="gitlab-webservice-default", status!~"[4-5].*"}[2m])) by (ingress) /
sum(irate(nginx_ingress_controller_requests{exported_service="gitlab-webservice-default"}[2m])) by (ingress) * 100
record: ingress_error_rate:gitlab_webservice_default

0 comments on commit ca2a193

Please sign in to comment.