package metrics import ( "fmt" "regexp" "runtime" "strconv" "strings" "sync" "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" ) // Metrics holds all metrics for the base service type Metrics struct { // HTTP metrics HTTPRequest *prometheus.HistogramVec // Database metrics DatabaseQuery *prometheus.HistogramVec // RabbitMQ metrics RabbitMQMessages *prometheus.HistogramVec // Business metrics BusinessOperations *prometheus.HistogramVec // Cache metrics Cache *prometheus.HistogramVec // External service metrics ExternalServiceCall *prometheus.HistogramVec // Configuration namespace string subsystem string serviceName string } var ( metricsInstance *Metrics metricsOnce = &sync.Once{} startTime = time.Now() ) // GetMetrics returns a singleton instance of Metrics func GetMetrics(namespace, subsystem, serviceName string) *Metrics { metricsOnce.Do(func() { metricsInstance = newMetrics(namespace, subsystem, serviceName) }) return metricsInstance } // newMetrics creates a new instance of Metrics func newMetrics(namespace, subsystem, serviceName string) *Metrics { return &Metrics{ namespace: namespace, subsystem: subsystem, serviceName: serviceName, HTTPRequest: promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "http_request_duration_seconds", Help: "HTTP request duration in seconds", Buckets: prometheus.DefBuckets, ConstLabels: prometheus.Labels{"service": serviceName}, }, []string{"method", "endpoint", "status_code"}, ), DatabaseQuery: promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "database_query_duration_seconds", Help: "Database query duration in seconds", Buckets: prometheus.DefBuckets, ConstLabels: prometheus.Labels{"service": serviceName}, }, []string{"operation", "table", "error"}, ), // RabbitMQ metrics RabbitMQMessages: promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "rabbitmq_messages_duration_seconds", Help: "Duration of RabbitMQ message operations (publish/consume) in seconds", Buckets: prometheus.DefBuckets, ConstLabels: prometheus.Labels{"service": serviceName}, }, []string{"exchange", "routing_key", "action", "error"}, ), // Business metrics BusinessOperations: promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "business_operations_duration_seconds", Help: "Duration of business operations in seconds", Buckets: prometheus.DefBuckets, ConstLabels: prometheus.Labels{"service": serviceName}, }, []string{"operation_type", "error"}, ), // Cache metrics Cache: promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "cache_operations_duration_seconds", Help: "Duration of store operations in seconds", Buckets: prometheus.DefBuckets, ConstLabels: prometheus.Labels{"service": serviceName}, }, []string{"cache_type", "key_pattern", "action", "hit", "error"}, ), ExternalServiceCall: promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, Name: "external_service_duration_seconds", Help: "External service call duration in seconds", Buckets: prometheus.DefBuckets, ConstLabels: prometheus.Labels{"service": serviceName}, }, []string{"service_name", "endpoint", "error"}, ), } } // GetNamespace returns the metrics namespace func (m *Metrics) GetNamespace() string { return m.namespace } // GetSubsystem returns the metrics subsystem func (m *Metrics) GetSubsystem() string { return m.subsystem } // GetServiceName returns the service name func (m *Metrics) GetServiceName() string { return m.serviceName } // GetFullMetricName returns the full metric name with namespace and subsystem func (m *Metrics) GetFullMetricName(metricName string) string { return fmt.Sprintf("%s_%s_%s", m.namespace, m.subsystem, metricName) } // RecordHTTPRequest HTTP Metrics Functions func (m *Metrics) RecordHTTPRequest(method, endpoint, statusCode string, duration time.Duration) { m.HTTPRequest.WithLabelValues(method, endpoint, statusCode).Observe(duration.Seconds()) } // NormalizePath normalizes HTTP paths by replacing numeric IDs and parameters with placeholders // This prevents metric cardinality explosion while maintaining meaningful endpoint grouping func (m *Metrics) NormalizePath(path string) string { // Replace numeric IDs with :id placeholder path = regexp.MustCompile(`/\d+`).ReplaceAllString(path, "/:id") // Replace UUIDs with :uuid placeholder path = regexp.MustCompile(`/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`).ReplaceAllString(path, "/:uuid") // Replace other common parameter patterns path = regexp.MustCompile(`/[a-zA-Z0-9]{20,}`).ReplaceAllString(path, "/:hash") // Long hashes path = regexp.MustCompile(`/\d{10,}`).ReplaceAllString(path, "/:long_id") // Very long numbers return path } // NormalizeExternalServiceEndpoint normalizes external service endpoint names // Use this when you have dynamic endpoint names that could cause cardinality issues func (m *Metrics) NormalizeExternalServiceEndpoint(endpoint string) string { // Replace numeric IDs with :id placeholder endpoint = regexp.MustCompile(`\d+`).ReplaceAllString(endpoint, ":id") // Replace UUIDs with :uuid placeholder endpoint = regexp.MustCompile(`[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`).ReplaceAllString(endpoint, ":uuid") // Replace other common parameter patterns endpoint = regexp.MustCompile(`[a-zA-Z0-9]{20,}`).ReplaceAllString(endpoint, ":hash") // Long hashes endpoint = regexp.MustCompile(`\d{10,}`).ReplaceAllString(endpoint, ":long_id") // Very long numbers return endpoint } // RecordDatabaseQuery Database Metrics Functions func (m *Metrics) RecordDatabaseQuery(operation, table string, duration time.Duration, err error) { m.DatabaseQuery.WithLabelValues(operation, table, m.classifyError(err)).Observe(duration.Seconds()) } // RecordRabbitMQMessage RabbitMQ Metrics Functions func (m *Metrics) RecordRabbitMQMessage(exchange, routingKey, action string, duration time.Duration, err error) { m.RabbitMQMessages.WithLabelValues(exchange, routingKey, action, m.classifyError(err)).Observe(duration.Seconds()) } // RecordBusinessOperation Business Metrics Functions func (m *Metrics) RecordBusinessOperation(operationType string, err error, duration time.Duration) { m.BusinessOperations.WithLabelValues(operationType, m.classifyError(err)).Observe(duration.Seconds()) } // RecordCacheHit Cache Metrics Functions func (m *Metrics) RecordCacheHit(cacheType, keyPattern, action string, hit bool, err error, duration time.Duration) { m.Cache.WithLabelValues(cacheType, keyPattern, action, strconv.FormatBool(hit), m.classifyError(err)).Observe(duration.Seconds()) } // RecordExternalServiceCall External Service Metrics Functions func (m *Metrics) RecordExternalServiceCall(serviceName, endpoint string, err error, duration time.Duration) { m.ExternalServiceCall.WithLabelValues(serviceName, endpoint, m.classifyError(err)).Observe(duration.Seconds()) } // Utility Functions func (m *Metrics) classifyError(err error) string { if err == nil { return "none" } errStr := err.Error() switch { case strings.Contains(errStr, "connection"): return "connection_error" case strings.Contains(errStr, "connection lost"): return "connection_lost" case strings.Contains(errStr, "connection reset by peer"): return "connection_reset_by_peer" case strings.Contains(errStr, "timeout"): return "timeout_error" case strings.Contains(strings.ToLower(errStr), "deadlock"): return "deadlock_error" case strings.Contains(errStr, "not found") || strings.Contains(errStr, "NotFound"): return "not_found_error" case strings.Contains(errStr, "Duplicate"): return "duplicate_error" case strings.Contains(errStr, "permission"): return "permission_error" case strings.Contains(errStr, "validation"): return "validation_error" case strings.Contains(errStr, "failed to publish") || strings.Contains(errStr, "publish error"): return "publish_error" case strings.Contains(errStr, "failed to marshal"): return "marshal_error" case strings.Contains(errStr, "failed to save"): return "save_error" case strings.Contains(errStr, "too many open files"): return "too_many_open_files" case strings.Contains(errStr, "no such file or directory"): return "no_such_file" case strings.Contains(errStr, "failed to parse CSV"): return "parse_csv_error" case strings.Contains(errStr, "Internal Server Error"): return "internal_server_error" default: return "unknown_error" } } // RecordCacheMetrics records comprehensive store metrics func (m *Metrics) RecordCacheMetrics(cacheType, keyPattern, action string, hit bool, err error, duration time.Duration) { m.RecordCacheHit(cacheType, keyPattern, action, hit, err, duration) } // RecordDatabaseOperation records comprehensive database operation metrics func (m *Metrics) RecordDatabaseOperation(operation, table string, duration time.Duration, err error) { m.RecordDatabaseQuery(operation, table, duration, err) } // GetMetricsSummary returns a summary of current metrics func (m *Metrics) GetMetricsSummary() map[string]interface{} { return map[string]interface{}{ "uptime_seconds": time.Since(startTime).Seconds(), "goroutines": runtime.NumGoroutine(), "start_time": startTime.Format(time.RFC3339), } }