TP-55035 | Added alerts for conference event creation, fetching and deletion failures (#357)
* TP-38709 | Merging the changes to master on the logfix * TP-55035 | Added alerts for failures while creating, fetching and deleting conference events * TP-55035 | Fixed conflicts * TP-55035 | trying guage metric to reset counter * TP-55035 | Revertingn guage metrics
This commit is contained in:
@@ -26,7 +26,7 @@ func main() {
|
||||
logger.InitLogger()
|
||||
appcontext.InitiateContext()
|
||||
appcontext.InitializeServices()
|
||||
prometheus.MustRegister(metrics.SlackChannelCreationFailureCounter, metrics.RCAGenerationFailureCounter)
|
||||
prometheus.MustRegister(metrics.SlackChannelCreationFailureCounter, metrics.RCAGenerationFailureCounter, metrics.ConferenceFailureCounter)
|
||||
|
||||
command := &cobra.Command{
|
||||
Use: "houston",
|
||||
|
||||
@@ -41,6 +41,14 @@ func (amp *PublisherImpl) PublishMetrics(metricAttributes ingester.MetricAttribu
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
case ingester.ConferenceFailureMetrics:
|
||||
{
|
||||
if err := publishConferenceFailureMetric(metricAttributes.ConferenceFailureMetric); err != nil {
|
||||
logger.Error("error while publishing conference failure metrics", zap.Error(err))
|
||||
}
|
||||
return
|
||||
}
|
||||
default:
|
||||
{
|
||||
return
|
||||
@@ -88,3 +96,16 @@ func publishRCAGenerationFailureMetric(rcaGenerationFailureMetrics ingester.RCAG
|
||||
).Inc()
|
||||
return
|
||||
}
|
||||
|
||||
func publishConferenceFailureMetric(conferenceFailureMetrics ingester.ConferenceFailureMetric) (err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
err = r.(error)
|
||||
}
|
||||
}()
|
||||
ConferenceFailureCounter.WithLabelValues(
|
||||
conferenceFailureMetrics.Event,
|
||||
conferenceFailureMetrics.ConferenceError,
|
||||
).Inc()
|
||||
return
|
||||
}
|
||||
|
||||
@@ -59,4 +59,12 @@ var (
|
||||
},
|
||||
[]string{"incident_id", "rca_generation_error"},
|
||||
)
|
||||
|
||||
ConferenceFailureCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "conference_failure",
|
||||
Help: "Conference failure",
|
||||
},
|
||||
[]string{"event", "conference_error"},
|
||||
)
|
||||
)
|
||||
|
||||
@@ -6,6 +6,7 @@ const (
|
||||
ApiMetrics MetricType = "API_METRICS"
|
||||
SlackMetrics MetricType = "SLACK_METRICS"
|
||||
RCAGenerationFailureMetrics MetricType = "RCA_GENERATION_FAILURE_METRICS"
|
||||
ConferenceFailureMetrics MetricType = "CONFERENCE_FAILURE_METRICS"
|
||||
)
|
||||
|
||||
type ApiMetric struct {
|
||||
@@ -31,8 +32,14 @@ type RCAGenerationFailureMetric struct {
|
||||
RCAGenerationError string `json:"rca_generation_error,omitempty"`
|
||||
}
|
||||
|
||||
type ConferenceFailureMetric struct {
|
||||
Event string `json:"event,omitempty"`
|
||||
ConferenceError string `json:"conference_error,omitempty"`
|
||||
}
|
||||
|
||||
type MetricAttributes struct {
|
||||
ApiMetric
|
||||
SlackMetric
|
||||
RCAGenerationFailureMetric
|
||||
ConferenceFailureMetric
|
||||
}
|
||||
|
||||
@@ -2,7 +2,9 @@ package conference
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"houston/internal/metrics"
|
||||
"houston/logger"
|
||||
"houston/model/ingester"
|
||||
"houston/pkg/conference"
|
||||
)
|
||||
|
||||
@@ -21,6 +23,7 @@ func (calendarService *CalendarService) CreateEvent(eventName string) (conferenc
|
||||
eventData, err := calendarService.calendarActions.CreateEvent(eventData)
|
||||
if err != nil {
|
||||
logger.Error(fmt.Sprintf("Unable to create conference event due to error: %s", err))
|
||||
publishConferenceFailureMetrics(eventName, err)
|
||||
}
|
||||
return eventData, err
|
||||
}
|
||||
@@ -28,6 +31,7 @@ func (calendarService *CalendarService) CreateEvent(eventName string) (conferenc
|
||||
func (calendarService *CalendarService) DeleteEvent(eventId string) error {
|
||||
err := calendarService.calendarActions.DeleteEvent(eventId)
|
||||
if err != nil {
|
||||
publishConferenceFailureMetrics(eventId, err)
|
||||
logger.Error(fmt.Sprintf("Unable to delete conference event due to error: %s", err))
|
||||
} else {
|
||||
logger.Info("Successfully deleted conference event")
|
||||
@@ -38,6 +42,7 @@ func (calendarService *CalendarService) DeleteEvent(eventId string) error {
|
||||
func (calendarService *CalendarService) GetEvent(eventId string) (conference.EventData, error) {
|
||||
event, err := calendarService.calendarActions.GetEvent(eventId)
|
||||
if err != nil {
|
||||
publishConferenceFailureMetrics(eventId, err)
|
||||
logger.Error(fmt.Sprintf("Unable to get conference event due to error: %s", err))
|
||||
} else {
|
||||
logger.Info("Successfully retrieved conference event details")
|
||||
@@ -48,3 +53,13 @@ func (calendarService *CalendarService) GetEvent(eventId string) (conference.Eve
|
||||
func (calendarService *CalendarService) GetConferenceTitle() string {
|
||||
return calendarService.calendarActions.GetConferenceTitle()
|
||||
}
|
||||
|
||||
func publishConferenceFailureMetrics(incidentName string, err error) {
|
||||
conferenceFailureMetrics := ingester.MetricAttributes{
|
||||
ConferenceFailureMetric: ingester.ConferenceFailureMetric{
|
||||
Event: incidentName,
|
||||
ConferenceError: err.Error(),
|
||||
},
|
||||
}
|
||||
metrics.NewMetricPublisher().PublishMetrics(conferenceFailureMetrics, ingester.ConferenceFailureMetrics)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user