基础入门

核心技能

附录

环境配置

docker-compose.yml

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
version: '3.8'

services:
  # Prometheus - 指标采集与存储
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--web.enable-lifecycle'
    restart: unless-stopped

  # Grafana - 可视化与仪表盘
  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    ports:
      - "3000:3000"
    volumes:
      - grafana_data:/var/lib/grafana
    environment:
      # 默认管理员密码,首次登录后建议修改
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin123
      # 允许匿名访问(学习环境,生产环境不要这样做)
      - GF_AUTH_ANONYMOUS_ENABLED=false
      # 设置时区
      - GF_DEFAULT_TIMEZONE=Asia/Singapore
    depends_on:
      - prometheus
    restart: unless-stopped

volumes:
  prometheus_data:
  grafana_data:

prometheus.yml

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# Prometheus 配置文件
# 学习环境配置 - Day 2

global:
  # 每 15 秒抓取一次指标(默认值)
  scrape_interval: 15s
  # 每 15 秒评估一次告警规则
  evaluation_interval: 15s

# 抓取配置
scrape_configs:
  # 抓取 Prometheus 自身的指标
  # Prometheus 会在 /metrics 端点暴露自己的运行指标
  - job_name: 'prometheus'
    static_configs:
      - targets: ['prometheus:9090']

用 Go 构建可观测的 HTTP 服务

main.go

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
package main

import (
	"fmt"
	"log"
	"math/rand"
	"net/http"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promhttp"
)

// 第一步定义指标

// Counter - 记录请求总数,按 method、endpoint、status 分维度
var httpRequestsTotal = prometheus.NewCounterVec(
	prometheus.CounterOpts{
		Name: "myapp_http_requests_total",
		Help: "Total number of HTTP requests",
	},
	[]string{"method", "endpoint", "status"},
)

// Histogram - 记录请求延迟分布
// Buckets 定义了延迟区间:10ms,20ms,50ms,100ms,250ms,500ms,1s,2.5s
var httpRequestDuration = prometheus.NewHistogramVec(
	prometheus.HistogramOpts{
		Name:    "myapp_http_request_duration_seconds",
		Help:    "HTTP request duration in seconds",
		Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.25, 0.5, 1, 2.5}, // 10ms to ~2.5s
	},
	[]string{"method", "endpoint"},
)

// Gauge - 记录当前正在处理的请求数
var httpRequestsInFlight = prometheus.NewGauge(
	prometheus.GaugeOpts{
		Name: "myapp_http_requests_in_flight",
		Help: "Number of HTTP requests currently being processed",
	},
)

// Counter 模拟业务指标:订单创建数量
var ordersCreatedTotal = prometheus.NewCounterVec(
	prometheus.CounterOpts{
		Name: "myapp_orders_created_total",
		Help: "Total number of orders created",
	},
	[]string{"product_type"},
)

// Guage 模拟业务指标:订单队列深度
var orderQueueSize = prometheus.NewGauge(
	prometheus.GaugeOpts{
		Name: "myapp_order_queue_depth",
		Help: "Current number of orders in the processing queue",
	},
)

// 第二步:注册指标

func init() {
	prometheus.MustRegister(httpRequestsTotal)
	prometheus.MustRegister(httpRequestDuration)
	prometheus.MustRegister(httpRequestsInFlight)
	prometheus.MustRegister(ordersCreatedTotal)
	prometheus.MustRegister(orderQueueSize)
}

// 第三步:编写业务逻辑(带指标埋点)
// instrumentHandler 是一个中间件,自动为 Handler 添加指标采集
func instrumentHandler(endpoint string, next http.HandlerFunc) http.HandlerFunc {
	return func(w http.ResponseWriter, r *http.Request) {
		// 进入请求: in_flight + 1
		httpRequestsInFlight.Inc()
		defer httpRequestsInFlight.Dec() // 请求结束: in_flight - 1

		// 记录开始时间
		start := time.Now()
		// 调用实际的 handler
		next(w, r)
		// 计算耗时并记录
		duration := time.Since(start).Seconds()
		httpRequestDuration.WithLabelValues(r.Method, endpoint).Observe(duration)
	}
}

// handleGetOrders 获取订单列表
func handleGetOrders(w http.ResponseWriter, r *http.Request) {
	delay := time.Duration(20+rand.Intn(180)) * time.Millisecond // 模拟20-200ms的处理时间
	time.Sleep(delay)
	// 90% 请求成功,10% 返回 500 错误
	if rand.Float64() < 0.1 {
		httpRequestsTotal.WithLabelValues(r.Method, "/api/orders", "500").Inc()
		w.WriteHeader(http.StatusInternalServerError)
		fmt.Fprint(w, `{"error": "database timeout"}`)
		return
	}

	httpRequestsTotal.WithLabelValues(r.Method, "/api/orders", "200").Inc()
	w.Header().Set("Content-Type", "application/json")
	fmt.Fprint(w, `{"orders": [{"id": 1, "product": "Laptop"}, {"id": 2, "product": "Phone"}]}`)
}

// handleCreateOrder 创建订单
func handleCreateOrder(w http.ResponseWriter, r *http.Request) {
	delay := time.Duration(50+rand.Intn(450)) * time.Millisecond // 模拟50-500ms的处理时间
	time.Sleep(delay)

	// 随机选择商品类型
	products := []string{"electronics", "clothing", "books", "food"}
	product := products[rand.Intn(len(products))]

	// 95% 成功,5% 失败
	if rand.Float64() < 0.05 {
		httpRequestsTotal.WithLabelValues(r.Method, "/api/orders", "400").Inc()
		w.WriteHeader(http.StatusBadRequest)
		fmt.Fprint(w, `{"error": "invalid order data"}`)
		return
	}

	// 记录业务指标
	ordersCreatedTotal.WithLabelValues(product).Inc()
	httpRequestsTotal.WithLabelValues(r.Method, "/api/orders", "201").Inc()

	w.WriteHeader(http.StatusCreated)
	fmt.Fprintf(w, `{"order_id": %d, "product_type": "%s", "status": "created"}`, rand.Intn(10000), product)
}

// handleHealth —— 健康检查端点
func handleHealth(w http.ResponseWriter, r *http.Request) {
	httpRequestsTotal.WithLabelValues(r.Method, "/health", "200").Inc()
	fmt.Fprint(w, `{"status": "healthy"}`)
}

// 第四步:模拟背景流量
func simulateTraffic() {
	client := &http.Client{Timeout: 5 * time.Second}
	endpoints := []struct {
		method string
		url    string
	}{
		{"GET", "http://localhost:8080/api/orders"},
		{"GET", "http://localhost:8080/api/orders"},
		{"GET", "http://localhost:8080/api/orders"},
		{"POST", "http://localhost:8080/api/orders"},
		{"GET", "http://localhost:8080/health"},
	}

	// 等待服务启动
	time.Sleep(2 * time.Second)
	log.Println("Starting traffic simulation...")

	for {
		// 随机选择一个端点
		ep := endpoints[rand.Intn(len(endpoints))]

		req, _ := http.NewRequest(ep.method, ep.url, nil)
		client.Do(req) //nolint:errcheck

		// 模拟队列深度波动
		orderQueueSize.Set(float64(rand.Intn(50)))

		// 请求间隔:200ms ~ 2s
		time.Sleep(time.Duration(200+rand.Intn(1800)) * time.Millisecond)
	}
}

// 第五步:启动服务
func main() {
	// 注册业务路由
	http.HandleFunc("/api/orders", func(w http.ResponseWriter, r *http.Request) {
		handler := instrumentHandler("/api/orders", func(w http.ResponseWriter, r *http.Request) {
			switch r.Method {
			case "GET":
				handleGetOrders(w, r)
			case "POST":
				handleCreateOrder(w, r)
			default:
				httpRequestsTotal.WithLabelValues(r.Method, "/api/orders", "405").Inc()
				w.WriteHeader(http.StatusMethodNotAllowed)
			}
		})
		handler(w, r)
	})

	http.HandleFunc("/health", instrumentHandler("/health", handleHealth))

	// 暴露 /metrics 端点给 Prometheus 抓取
	http.Handle("/metrics", promhttp.Handler())

	// 启动模拟流量(后台 goroutine)
	go simulateTraffic()

	log.Println("Server starting on :8080")
	log.Println("Metrics available at :8080/metrics")
	log.Fatal(http.ListenAndServe(":8080", nil))
}
1
2
3
4
5
6
7
8
grafana-learning/
├── docker-compose.yml          (已更新)
├── prometheus/
│   └── prometheus.yml          (已更新)
└── myapp/
    ├── main.go
    ├── go.mod
    └── Dockerfile

Dockerfile

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
FROM golang:1.23-alpine AS builder

WORKDIR /app
COPY go.mod go.sum* ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o /myapp main.go

FROM alpine:3.19
COPY --from=builder /myapp /myapp
EXPOSE 8080
CMD ["/myapp"]

Prometheus.yml

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
# Prometheus 配置文件
# 学习环境配置 - Day 9 更新

global:
  scrape_interval: 15s
  evaluation_interval: 15s

scrape_configs:
  # 抓取 Prometheus 自身的指标
  - job_name: 'prometheus'
    static_configs:
      - targets: ['prometheus:9090']

  # 抓取你的 Go 应用的指标
  - job_name: 'myapp'
    static_configs:
      - targets: ['myapp:8080']

Docker-compose.yml

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
version: '3.8'

services:
  # 你的 Go 应用服务
  myapp:
    build: ./myapp
    container_name: myapp
    ports:
      - "8080:8080"
    restart: unless-stopped

  # Prometheus - 指标采集与存储
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--web.enable-lifecycle'
    restart: unless-stopped

  # Grafana - 可视化与仪表盘
  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    ports:
      - "3000:3000"
    volumes:
      - grafana_data:/var/lib/grafana
    environment:
      # 默认管理员密码,首次登录后建议修改
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin123
      # 允许匿名访问(学习环境,生产环境不要这样做)
      - GF_AUTH_ANONYMOUS_ENABLED=false
      # 设置时区
      - GF_DEFAULT_TIMEZONE=Asia/Singapore
    depends_on:
      - prometheus
    restart: unless-stopped

volumes:
  prometheus_data:
  grafana_data: