INFRA-3438 | Saurabh | Added Client TLS Negotiation Error in json schema and migration script (#1085)

* INFRA-3438 | Saurabh | Added Client TLS Negotiation Error in json schema and script

* INFRA-3438 | Saurabh | Added Client TLS Negotiation Error in json schema and script
This commit is contained in:
Saurabh Bhagwan Sathe
2024-07-15 17:20:12 +05:30
committed by GitHub
parent 6e92718100
commit 5d17f96135
2 changed files with 102 additions and 0 deletions

View File

@@ -0,0 +1,98 @@
from kubernetes import client, config
def modify_prometheus_rules(api_instance, namespaces):
# Retrieve the existing PrometheusRule resources in the specified namespace
for namespace in namespaces:
# skip if the namespace name is either monitoring, logging, kube-system, istio-system, kube-node-lease, kube-public, portieris, telepresence, twistlock, velero, wiz
if namespace in ["monitoring", "logging", "kube-system", "istio-system", "kube-node-lease",
"kube-public", "portieris", "telepresence", "twistlock", "velero", "wiz"]:
print(f"Skipping namespace: {namespace}")
continue
rules = api_instance.list_namespaced_custom_object(
group="monitoring.coreos.com",
version="v1",
plural="prometheusrules",
namespace=namespace
)
for rule in rules.get("items", []):
print(f"Modifying PrometheusRule: {namespace} {rule['metadata']['name']}")
if len(rule["spec"]["groups"]) == 0:
print(f"PrometheusRule: {namespace} {rule['metadata']['name']} has no rules")
continue
if 'labels' not in rule["metadata"] or 'Team' not in rule["metadata"]["labels"]:
print(f"PrometheusRule: {namespace} {rule['metadata']['name']} has no Team label")
continue
elb5xx_exists = any(r["alert"] == "HighELB5xx" for r in rule["spec"]["groups"][0]["rules"])
if elb5xx_exists:
app_name = rule["metadata"]["name"]
app_team = rule["metadata"]["labels"]['Team']
high_tls_negotiation_errors_alert = {
"alert": "HighTLSNegotiationErrors",
"annotations": {
"description":f'Namespace: {namespace}, AppName: {app_name}; has more client TLS negotiation errors in the last 1m',
"runbook": 'https://navihq.atlassian.net/wiki/spaces/IN/pages/509936863/Runbook',
"summary": 'Service is facing a lot of TLS negotiation errors'
},
"expr": f"sum by (tag_Ingress) (aws_alb_client_tlsnegotiation_error_count_sum{{tag_Ingress=\"{app_name}\"}}) > 1",
"for": "1m",
"labels": {
"alertTeam": f'{app_team}',
"appName": f'{app_name}',
"severity": "warning"
}
}
# Add the HighTLSNegotiationErrors alert to the PrometheusRule
rule["spec"]["groups"][0]["rules"].append(high_tls_negotiation_errors_alert)
# Update the PrometheusRule resource with the modified rules
api_instance.replace_namespaced_custom_object(
group="monitoring.coreos.com",
version="v1",
plural="prometheusrules",
namespace=namespace,
name=app_name,
body=rule
)
print(f"Added HighTLSNegotiationErrors alert to PrometheusRule: {namespace} {rule['metadata']['name']}")
if __name__ == "__main__":
kubernetes_contexts = [
"nonprod.np.navi-tech.in",
"aps1.prod.navi-tech.in",
"aps1.np.navi-gi.in",
"aps1.prod.navi-gi.in",
"aps1.np.navi-sa.in",
"aps1.prod.navi-sa.in",
"aps1.np.navi-pay.in",
"aps1.prod.navi-pay.in",
]
for context in kubernetes_contexts:
print(f"Modifying PrometheusRule resources for Kubernetes context: {context}")
try:
# Load the Kubernetes configuration for the current context
config.load_kube_config(context=context)
# Create the API client
api_inst = client.CustomObjectsApi()
# Call the function to modify the PrometheusRule resources
v1 = client.CoreV1Api()
ns = [ns.metadata.name for ns in v1.list_namespace().items]
modify_prometheus_rules(api_inst, ns)
print(f"PrometheusRule modifications completed for Kubernetes context: {context}")
print("--------------------------------------------------------------")
except Exception as e:
print(f"Error occurred for Kubernetes context {context}: {str(e)}")

View File

@@ -45,6 +45,10 @@
{
"const": "latency",
"title": "Latency (in ms)"
},
{
"const": "clientTLSNegotiationError",
"title": "ClientTLSNegotiationError (in error count)"
}
]
},