INFRA-2999 | Saqib | Script to update labels of elastic stack prometheus rules
This commit is contained in:
@@ -38,7 +38,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "The heap usage is over 80% for 15m\n VALUE = `{{ $value }}`\n NAME: `{{ $labels.node }}`",
|
||||
"summary": "Elasticsearch Heap Usage warning (node `{{ $labels.node }}`)"
|
||||
},
|
||||
"expr": f"(es_jvm_mem_heap_used_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}} / es_jvm_mem_heap_max_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}) * 100 \u003e 80",
|
||||
"expr": f"(es_jvm_mem_heap_used_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}} / es_jvm_mem_heap_max_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}) * 100 > 80",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -52,7 +52,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "The disk usage is over 85%\n VALUE = `{{ $value }}`",
|
||||
"summary": "Elasticsearch average disk out of space (node - `{{ $labels.node }}`). No new shards will be allocated at this node"
|
||||
},
|
||||
"expr": f"(es_fs_total_free_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}/es_fs_total_total_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}) * 100 \u003c 15",
|
||||
"expr": f"(es_fs_total_free_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}/es_fs_total_total_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}) * 100 < 15",
|
||||
"for": "20m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -66,7 +66,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "The disk usage is over 90%\n VALUE = `{{ $value }}`\n NAME: `{{ $labels.node }}`",
|
||||
"summary": "Elasticsearch disk out of space (node `{{ $labels.node }}`). No new shards will be allocated at this node"
|
||||
},
|
||||
"expr": f"(es_fs_total_free_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}/es_fs_total_total_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}) * 100 \u003c 10",
|
||||
"expr": f"(es_fs_total_free_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}/es_fs_total_total_bytes{{job=~\".*http\",es_cluster=\"{app_name}\"}}) * 100 < 10",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -108,7 +108,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "Elastic Cluster Index Replica less than 1 for 15 minutes\n VALUE = `{{ $value }}`",
|
||||
"summary": "Elasticsearch Cluster Index Replica less than 1 (cluster - `{{ $labels.es_cluster }}`)"
|
||||
},
|
||||
"expr": f"min(es_index_replicas_number{{job=~\".*http\",es_cluster=\"{app_name}\",index!~\"^[.].*\"}}) by (es_cluster,index) \u003c 1",
|
||||
"expr": f"min(es_index_replicas_number{{job=~\".*http\",es_cluster=\"{app_name}\",index!~\"^[.].*\"}}) by (es_cluster,index) < 1",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -122,7 +122,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "Number of initializing shards for 10 min\n VALUE = `{{ $value }}`",
|
||||
"summary": "Elasticsearch initializing shards (cluster `{{ $labels.es_cluster }}`)"
|
||||
},
|
||||
"expr": f"max(es_cluster_shards_number{{type=\"initializing\",job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) \u003e 0",
|
||||
"expr": f"max(es_cluster_shards_number{{type=\"initializing\",job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) > 0",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -136,7 +136,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "Number of unassigned shards for 30 min\n VALUE = `{{ $value }}`",
|
||||
"summary": "Elasticsearch unassigned shards (cluster `{{ $labels.es_cluster }}`)"
|
||||
},
|
||||
"expr": f"max(es_cluster_shards_number{{type=\"unassigned\",job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) \u003e 0",
|
||||
"expr": f"max(es_cluster_shards_number{{type=\"unassigned\",job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) > 0",
|
||||
"for": "30m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -150,7 +150,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "Number of unassigned shards for 15 min\n VALUE = `{{ $value }}`",
|
||||
"summary": "Elasticsearch unassigned shards (cluster `{{ $labels.es_cluster }}`)"
|
||||
},
|
||||
"expr": f"max(es_cluster_shards_number{{type=\"unassigned\",job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) \u003e 0",
|
||||
"expr": f"max(es_cluster_shards_number{{type=\"unassigned\",job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) > 0",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
@@ -164,7 +164,7 @@ def update_es_rule(api_instance, rule, namespace):
|
||||
"description": "Number of pending tasks for 15 min. Cluster works slowly.\n VALUE = `{{ $value }}`",
|
||||
"summary": "Elasticsearch pending tasks (cluster `{{ $labels.es_cluster }}`)"
|
||||
},
|
||||
"expr": f"max(es_cluster_pending_tasks_number{{job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) \u003e 0",
|
||||
"expr": f"max(es_cluster_pending_tasks_number{{job=~\".*http\",es_cluster=\"{app_name}\"}}) by (es_cluster) > 0",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"alertTeam": app_team,
|
||||
|
||||
Reference in New Issue
Block a user