监控与日志

生产环境必须有完善的监控和日志系统,这是保障服务稳定的基础。

结构化日志

package main

import (
    "time"
    
    "github.com/gin-gonic/gin"
    "go.uber.org/zap"
)

func main() {
    logger, _ := zap.NewProduction()
    defer logger.Sync()
    
    r := gin.New()
    
    r.Use(func(c *gin.Context) {
        start := time.Now()
        path := c.Request.URL.Path
        
        c.Next()
        
        logger.Info("request",
            zap.String("method", c.Request.Method),
            zap.String("path", path),
            zap.Int("status", c.Writer.Status()),
            zap.Duration("latency", time.Since(start)),
            zap.String("client_ip", c.ClientIP()),
        )
    })
    
    r.Run(":8080")
}

Prometheus 指标

import (
    "github.com/gin-gonic/gin"
    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promhttp"
)

var (
    httpRequestsTotal = prometheus.NewCounterVec(
        prometheus.CounterOpts{
            Name: "http_requests_total",
            Help: "Total number of HTTP requests",
        },
        []string{"method", "path", "status"},
    )
    
    httpRequestDuration = prometheus.NewHistogramVec(
        prometheus.HistogramOpts{
            Name:    "http_request_duration_seconds",
            Help:    "HTTP request duration in seconds",
            Buckets: []float64{0.1, 0.5, 1, 2, 5},
        },
        []string{"method", "path"},
    )
)

func init() {
    prometheus.MustRegister(httpRequestsTotal)
    prometheus.MustRegister(httpRequestDuration)
}

func PrometheusMiddleware() gin.HandlerFunc {
    return func(c *gin.Context) {
        start := time.Now()
        
        c.Next()
        
        duration := time.Since(start).Seconds()
        status := strconv.Itoa(c.Writer.Status())
        
        httpRequestsTotal.WithLabelValues(c.Request.Method, c.FullPath(), status).Inc()
        httpRequestDuration.WithLabelValues(c.Request.Method, c.FullPath()).Observe(duration)
    }
}

func main() {
    r := gin.New()
    
    r.Use(PrometheusMiddleware())
    
    r.GET("/metrics", gin.WrapH(promhttp.Handler()))
    
    r.GET("/api", func(c *gin.Context) {
        c.String(200, "OK")
    })
    
    r.Run(":8080")
}

健康检查

var (
    isHealthy int32 = 1
)

func HealthCheck(c *gin.Context) {
    if atomic.LoadInt32(&isHealthy) == 1 {
        c.JSON(200, gin.H{"status": "healthy"})
    } else {
        c.JSON(503, gin.H{"status": "unhealthy"})
    }
}

func ReadinessCheck(c *gin.Context) {
    if checkDependencies() {
        c.JSON(200, gin.H{"status": "ready"})
    } else {
        c.JSON(503, gin.H{"status": "not ready"})
    }
}

func checkDependencies() bool {
    return db.Ping() == nil && redis.Ping() == nil
}

r.GET("/health", HealthCheck)
r.GET("/ready", ReadinessCheck)

日志中间件

func LoggerMiddleware(logger *zap.Logger) gin.HandlerFunc {
    return func(c *gin.Context) {
        start := time.Now()
        path := c.Request.URL.Path
        query := c.Request.URL.RawQuery
        
        c.Next()
        
        end := time.Now()
        latency := end.Sub(start)
        
        fields := []zap.Field{
            zap.Int("status", c.Writer.Status()),
            zap.String("method", c.Request.Method),
            zap.String("path", path),
            zap.String("query", query),
            zap.String("ip", c.ClientIP()),
            zap.Duration("latency", latency),
            zap.String("user-agent", c.Request.UserAgent()),
        }
        
        if len(c.Errors) > 0 {
            fields = append(fields, zap.String("errors", c.Errors.String()))
        }
        
        switch {
        case c.Writer.Status() >= 500:
            logger.Error("server error", fields...)
        case c.Writer.Status() >= 400:
            logger.Warn("client error", fields...)
        default:
            logger.Info("request", fields...)
        }
    }
}

错误追踪

集成 Sentry:

import "github.com/getsentry/sentry-go"

func main() {
    sentry.Init(sentry.ClientOptions{
        Dsn: "https://xxx@sentry.io/xxx",
    })
    
    r := gin.Default()
    
    r.Use(func(c *gin.Context) {
        defer func() {
            if err := recover(); err != nil {
                sentry.CurrentHub().Recover(err)
                sentry.Flush(time.Second * 5)
                
                c.AbortWithStatusJSON(500, gin.H{"error": "internal error"})
            }
        }()
        c.Next()
    })
    
    r.Run(":8080")
}

日志轮转

使用 lumberjack:

import "gopkg.in/natefinch/lumberjack.v2"

func setupLogger() *zap.Logger {
    writer := &lumberjack.Logger{
        Filename:   "/var/log/myapp/app.log",
        MaxSize:    100,
        MaxBackups: 10,
        MaxAge:     30,
        Compress:   true,
    }
    
    core := zapcore.NewCore(
        zapcore.NewJSONEncoder(zap.NewProductionEncoderConfig()),
        zapcore.AddSync(writer),
        zap.InfoLevel,
    )
    
    return zap.New(core)
}

指标告警

Prometheus 告警规则:

groups:
  - name: myapp
    rules:
      - alert: HighErrorRate
        expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "High error rate detected"
          
      - alert: HighLatency
        expr: histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m])) > 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High latency detected"

小结

监控和日志是生产环境的眼睛和耳朵。结构化日志便于查询分析,Prometheus 指标便于监控告警,健康检查让负载均衡器正确管理节点。集成 Sentry 可以追踪错误,日志轮转防止磁盘爆满。