mirror of
				https://github.com/hibiken/asynqmon.git
				synced 2025-10-26 16:26:12 +08:00 
			
		
		
		
	Add support for Prometheus integration
This commit is contained in:
		
							
								
								
									
										230
									
								
								metrics_handler.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										230
									
								
								metrics_handler.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,230 @@ | ||||
| package asynqmon | ||||
|  | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| 	"net/url" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"time" | ||||
| ) | ||||
|  | ||||
| type getMetricsResponse struct { | ||||
| 	QueueSize            *json.RawMessage `json:"queue_size"` | ||||
| 	QueueLatency         *json.RawMessage `json:"queue_latency_seconds"` | ||||
| 	QueueMemUsgApprox    *json.RawMessage `json:"queue_memory_usage_approx_bytes"` | ||||
| 	ProcessedPerSecond   *json.RawMessage `json:"tasks_processed_per_second"` | ||||
| 	FailedPerSecond      *json.RawMessage `json:"tasks_failed_per_second"` | ||||
| 	ErrorRate            *json.RawMessage `json:"error_rate"` | ||||
| 	PendingTasksByQueue  *json.RawMessage `json:"pending_tasks_by_queue"` | ||||
| 	RetryTasksByQueue    *json.RawMessage `json:"retry_tasks_by_queue"` | ||||
| 	ArchivedTasksByQueue *json.RawMessage `json:"archived_tasks_by_queue"` | ||||
| } | ||||
|  | ||||
| type metricsFetchOptions struct { | ||||
| 	// Specifies the number of seconds to scan for metrics. | ||||
| 	duration time.Duration | ||||
|  | ||||
| 	// Specifies the end time when fetching metrics. | ||||
| 	endTime time.Time | ||||
|  | ||||
| 	// Optional filter to speicify a list of queues to get metrics for. | ||||
| 	// Empty list indicates no filter (i.e. get metrics for all queues). | ||||
| 	queues []string | ||||
| } | ||||
|  | ||||
| func newGetMetricsHandlerFunc(client *http.Client, prometheusAddr string) http.HandlerFunc { | ||||
| 	// res is the result of calling a JSON API endpoint. | ||||
| 	type res struct { | ||||
| 		query string | ||||
| 		msg   *json.RawMessage | ||||
| 		err   error | ||||
| 	} | ||||
|  | ||||
| 	// List of PromQLs. | ||||
| 	// Strings are used as template to optionally insert queue filter specified by QUEUE_FILTER. | ||||
| 	const ( | ||||
| 		promQLQueueSize      = "asynq_queue_size{QUEUE_FILTER}" | ||||
| 		promQLQueueLatency   = "asynq_queue_latency_seconds{QUEUE_FILTER}" | ||||
| 		promQLMemUsage       = "asynq_queue_memory_usage_approx_bytes{QUEUE_FILTER}" | ||||
| 		promQLProcessedTasks = "rate(asynq_tasks_processed_total{QUEUE_FILTER}[5m])" | ||||
| 		promQLFailedTasks    = "rate(asynq_tasks_failed_total{QUEUE_FILTER}[5m])" | ||||
| 		promQLErrorRate      = "rate(asynq_tasks_failed_total{QUEUE_FILTER}[5m]) / rate(asynq_tasks_processed_total{QUEUE_FILTER}[5m])" | ||||
| 		promQLPendingTasks   = "asynq_tasks_enqueued_total{state=\"pending\",QUEUE_FILTER}" | ||||
| 		promQLRetryTasks     = "asynq_tasks_enqueued_total{state=\"retry\",QUEUE_FILTER}" | ||||
| 		promQLArchivedTasks  = "asynq_tasks_enqueued_total{state=\"archived\",QUEUE_FILTER}" | ||||
| 	) | ||||
|  | ||||
| 	// Optional query params: | ||||
| 	// `duration_sec`: specifies the number of seconds to scan | ||||
| 	// `end_time`:     specifies the end_time in Unix time seconds | ||||
| 	return func(w http.ResponseWriter, r *http.Request) { | ||||
| 		opts, err := extractMetricsFetchOptions(r) | ||||
| 		if err != nil { | ||||
| 			http.Error(w, fmt.Sprintf("invalid query parameter: %v", err), http.StatusBadRequest) | ||||
| 			return | ||||
| 		} | ||||
| 		// List of queries (i.e. promQL) to send to prometheus server. | ||||
| 		queries := []string{ | ||||
| 			promQLQueueSize, | ||||
| 			promQLQueueLatency, | ||||
| 			promQLMemUsage, | ||||
| 			promQLProcessedTasks, | ||||
| 			promQLFailedTasks, | ||||
| 			promQLErrorRate, | ||||
| 			promQLPendingTasks, | ||||
| 			promQLRetryTasks, | ||||
| 			promQLArchivedTasks, | ||||
| 		} | ||||
| 		resp := getMetricsResponse{} | ||||
| 		// Make multiple API calls concurrently | ||||
| 		n := len(queries) | ||||
| 		ch := make(chan res, len(queries)) | ||||
| 		for _, q := range queries { | ||||
| 			go func(q string) { | ||||
| 				url := buildPrometheusURL(prometheusAddr, q, opts) | ||||
| 				msg, err := fetchPrometheusMetrics(client, url) | ||||
| 				ch <- res{q, msg, err} | ||||
| 			}(q) | ||||
| 		} | ||||
| 		for r := range ch { | ||||
| 			n-- | ||||
| 			if r.err != nil { | ||||
| 				http.Error(w, fmt.Sprintf("failed to fetch %q: %v", r.query, err), http.StatusInternalServerError) | ||||
| 				return | ||||
| 			} | ||||
| 			switch r.query { | ||||
| 			case promQLQueueSize: | ||||
| 				resp.QueueSize = r.msg | ||||
| 			case promQLQueueLatency: | ||||
| 				resp.QueueLatency = r.msg | ||||
| 			case promQLMemUsage: | ||||
| 				resp.QueueMemUsgApprox = r.msg | ||||
| 			case promQLProcessedTasks: | ||||
| 				resp.ProcessedPerSecond = r.msg | ||||
| 			case promQLFailedTasks: | ||||
| 				resp.FailedPerSecond = r.msg | ||||
| 			case promQLErrorRate: | ||||
| 				resp.ErrorRate = r.msg | ||||
| 			case promQLPendingTasks: | ||||
| 				resp.PendingTasksByQueue = r.msg | ||||
| 			case promQLRetryTasks: | ||||
| 				resp.RetryTasksByQueue = r.msg | ||||
| 			case promQLArchivedTasks: | ||||
| 				resp.ArchivedTasksByQueue = r.msg | ||||
| 			} | ||||
| 			if n == 0 { | ||||
| 				break // fetched all metrics | ||||
| 			} | ||||
| 		} | ||||
| 		bytes, err := json.Marshal(resp) | ||||
| 		if err != nil { | ||||
| 			http.Error(w, fmt.Sprintf("failed to marshal response into JSON: %v", err), http.StatusInternalServerError) | ||||
| 			return | ||||
| 		} | ||||
| 		if _, err := w.Write(bytes); err != nil { | ||||
| 			http.Error(w, fmt.Sprintf("failed to write to response: %v", err), http.StatusInternalServerError) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| const prometheusAPIPath = "/api/v1/query_range" | ||||
|  | ||||
| func extractMetricsFetchOptions(r *http.Request) (*metricsFetchOptions, error) { | ||||
| 	opts := &metricsFetchOptions{ | ||||
| 		duration: 60 * time.Minute, | ||||
| 		endTime:  time.Now(), | ||||
| 	} | ||||
| 	q := r.URL.Query() | ||||
| 	if d := q.Get("duration"); d != "" { | ||||
| 		val, err := strconv.Atoi(d) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("invalid value provided for duration: %q", d) | ||||
| 		} | ||||
| 		opts.duration = time.Duration(val) * time.Second | ||||
| 	} | ||||
| 	if t := q.Get("endtime"); t != "" { | ||||
| 		val, err := strconv.Atoi(t) | ||||
| 		if err != nil { | ||||
| 			return nil, fmt.Errorf("invalid value provided for end_time: %q", t) | ||||
| 		} | ||||
| 		opts.endTime = time.Unix(int64(val), 0) | ||||
| 	} | ||||
| 	if qs := q.Get("queues"); qs != "" { | ||||
| 		opts.queues = strings.Split(qs, ",") | ||||
| 	} | ||||
| 	return opts, nil | ||||
| } | ||||
|  | ||||
| func buildPrometheusURL(baseAddr, promQL string, opts *metricsFetchOptions) string { | ||||
| 	var b strings.Builder | ||||
| 	b.WriteString(strings.TrimSuffix(baseAddr, "/")) | ||||
| 	b.WriteString(prometheusAPIPath) | ||||
| 	v := url.Values{} | ||||
| 	v.Add("query", applyQueueFilter(promQL, opts.queues)) | ||||
| 	v.Add("start", unixTimeString(opts.endTime.Add(-opts.duration))) | ||||
| 	v.Add("end", unixTimeString(opts.endTime)) | ||||
| 	v.Add("step", strconv.Itoa(int(step(opts).Seconds()))) | ||||
| 	b.WriteString("?") | ||||
| 	b.WriteString(v.Encode()) | ||||
| 	return b.String() | ||||
| } | ||||
|  | ||||
| func applyQueueFilter(promQL string, qnames []string) string { | ||||
| 	if len(qnames) == 0 { | ||||
| 		return strings.ReplaceAll(promQL, "QUEUE_FILTER", "") | ||||
| 	} | ||||
| 	var b strings.Builder | ||||
| 	b.WriteString(`queue=~"`) | ||||
| 	for i, q := range qnames { | ||||
| 		if i != 0 { | ||||
| 			b.WriteString("|") | ||||
| 		} | ||||
| 		b.WriteString(q) | ||||
| 	} | ||||
| 	b.WriteByte('"') | ||||
| 	return strings.ReplaceAll(promQL, "QUEUE_FILTER", b.String()) | ||||
| } | ||||
|  | ||||
| func fetchPrometheusMetrics(client *http.Client, url string) (*json.RawMessage, error) { | ||||
| 	resp, err := client.Get(url) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	defer resp.Body.Close() | ||||
| 	bytes, err := io.ReadAll(resp.Body) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	msg := json.RawMessage(bytes) | ||||
| 	return &msg, err | ||||
| } | ||||
|  | ||||
| // Returns step to use given the fetch options. | ||||
| // In general, the longer the duration, longer the each step. | ||||
| func step(opts *metricsFetchOptions) time.Duration { | ||||
| 	if opts.duration <= 6*time.Hour { | ||||
| 		// maximum number of data points to return: 6h / 10s = 2160 | ||||
| 		return 10 * time.Second | ||||
| 	} | ||||
| 	if opts.duration <= 24*time.Hour { | ||||
| 		// maximum number of data points to return: 24h / 1m = 1440 | ||||
| 		return 1 * time.Minute | ||||
| 	} | ||||
| 	if opts.duration <= 8*24*time.Hour { | ||||
| 		// maximum number of data points to return: (8*24)h / 3m = 3840 | ||||
| 		return 3 * time.Minute | ||||
| 	} | ||||
| 	if opts.duration <= 30*24*time.Hour { | ||||
| 		// maximum number of data points to return: (30*24)h / 10m = 4320 | ||||
| 		return 10 * time.Minute | ||||
| 	} | ||||
| 	return opts.duration / 3000 | ||||
| } | ||||
|  | ||||
| func unixTimeString(t time.Time) string { | ||||
| 	return strconv.Itoa(int(t.Unix())) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user