Advertisement
thiagofaioli

alert.rules.yaml

Dec 28th, 2018
240
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
YAML 2.11 KB | None | 0 0
  1. groups:
  2. - name: "Load System"
  3.   rules:
  4. #       description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
  5.  
  6.        # Alert for any instance that has a median request late
  7.    - alert: high_treta_load
  8.      expr: netdata_system_load_load_average{dimension!="load5",dimension!="load15"} > 2.0
  9.      for: 10m
  10.      labels:
  11.        severity: atention
  12.      annotations:
  13.        summary: "High load system CPU"
  14.        description: "{{ $labels.instance }} is under high load ({{ $value }})"
  15.  
  16.  
  17. - name: "Disponibility"
  18.   rules:
  19.    - alert: service_down
  20.      expr: up == 0
  21.      for: 30s
  22.      labels:
  23.        severity: page
  24.      annotations:
  25.        summary: "Instance {{ $labels.instance }} down"
  26.        description: "{{ $labels.instance }} lost connection whit API for more than {{ $value }}s."
  27.  
  28. - name: "File System Usage"
  29.   rules:
  30.    - alert: usage_treta_fs
  31.      expr: netdata_disk_space_GB_average{dimension="avail",family="/"} < 5
  32.      for: 3m
  33.      annotations:
  34.        summary: "Disk space usage for  /"
  35.        description: "{{ $labels.instance }} current space avail is: ({{ $value }}g)"
  36.  
  37.    #- alert: LowRootFilesytem
  38.    # expr: if ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75
  39.    #  expr: if ((netdata_disk_space_GB_average{dimension="used",family="/"} * 100) / (netdata_disk_space_GB_average{dimension="avail",family="/"} + netdata_disk_space_GB_average{dimension="used",family="/"})
  40.    #   for: 2m
  41.    #   labels:
  42.    #     severity:
  43.    #   annotations:
  44.    #     summary: "{{$labels.instance}}: Low root disk space",
  45.    #     description: "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value  }})"
  46.  
  47.    - alert: Disk_Will_Fill_In_3Hours
  48.      expr: predict_linear (netdata_disk_space_GB_average{dimension="avail",family="/"}[1h], 3 * 3600) < 0
  49.      for: 5m
  50.      labels:
  51.        severity: page
  52.      annotations:
  53.        summary: "Filesystem / will be exhausted in 3 hours"
  54.        description: "We recovered 1 hour of history to predict 3 hours ahead"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement