2
0
mirror of https://github.com/hibiken/asynq.git synced 2024-09-20 11:05:58 +08:00

Add recoverer

This commit is contained in:
Ken Hibino 2020-06-21 07:05:57 -07:00
parent 7657f560ec
commit feee87adda
7 changed files with 292 additions and 149 deletions

View File

@ -97,6 +97,20 @@ func NewTaskMessageWithQueue(taskType string, payload map[string]interface{}, qn
} }
} }
// TaskMessageAfterRetry returns an updated copy of t after retry.
// It increments retry count and sets the error message.
func TaskMessageAfterRetry(t base.TaskMessage, errMsg string) *base.TaskMessage {
t.Retried = t.Retried + 1
t.ErrorMsg = errMsg
return &t
}
// TaskMessageWithError returns an updated copy of t with the given error message.
func TaskMessageWithError(t base.TaskMessage, errMsg string) *base.TaskMessage {
t.ErrorMsg = errMsg
return &t
}
// MustMarshal marshals given task message and returns a json string. // MustMarshal marshals given task message and returns a json string.
// Calling test will fail if marshaling errors out. // Calling test will fail if marshaling errors out.
func MustMarshal(tb testing.TB, msg *base.TaskMessage) string { func MustMarshal(tb testing.TB, msg *base.TaskMessage) string {

View File

@ -264,6 +264,7 @@ type Broker interface {
Retry(msg *TaskMessage, processAt time.Time, errMsg string) error Retry(msg *TaskMessage, processAt time.Time, errMsg string) error
Kill(msg *TaskMessage, errMsg string) error Kill(msg *TaskMessage, errMsg string) error
CheckAndEnqueue() error CheckAndEnqueue() error
ListDeadlineExceeded(deadline time.Time) ([]*TaskMessage, error)
WriteServerState(info *ServerInfo, workers []*WorkerInfo, ttl time.Duration) error WriteServerState(info *ServerInfo, workers []*WorkerInfo, ttl time.Duration) error
ClearServerState(host string, pid int, serverID string) error ClearServerState(host string, pid int, serverID string) error
CancelationPubSub() (*redis.PubSub, error) // TODO: Need to decouple from redis to support other brokers CancelationPubSub() (*redis.PubSub, error) // TODO: Need to decouple from redis to support other brokers

View File

@ -1192,155 +1192,6 @@ func TestListDeadlineExceeded(t *testing.T) {
} }
} }
/*
func TestRequeueDeadlineExceeded(t *testing.T) {
t1 := h.NewTaskMessage("task1", nil)
t2 := h.NewTaskMessage("task2", nil)
t3 := h.NewTaskMessageWithQueue("task3", nil, "critical")
now := time.Now()
oneHourFromNow := now.Add(1 * time.Hour)
fiveMinutesFromNow := now.Add(5 * time.Minute)
fiveMinutesAgo := now.Add(-5 * time.Minute)
oneHourAgo := now.Add(-1 * time.Hour)
tests := []struct {
desc string
inProgress []*base.TaskMessage
deadlines []h.ZSetEntry
enqueued map[string][]*base.TaskMessage
wantN int
wantInProgress []*base.TaskMessage
wantDeadlines []h.ZSetEntry
wantEnqueued map[string][]*base.TaskMessage
}{
{
desc: "with one task in-progress",
inProgress: []*base.TaskMessage{t1},
deadlines: []h.ZSetEntry{
{Msg: t1, Score: float64(fiveMinutesAgo.Unix())},
},
enqueued: map[string][]*base.TaskMessage{
"default": {},
},
wantN: 1,
wantInProgress: []*base.TaskMessage{},
wantDeadlines: []h.ZSetEntry{},
wantEnqueued: map[string][]*base.TaskMessage{
"default": {t1},
},
},
{
desc: "with multiple tasks in-progress, and one expired",
inProgress: []*base.TaskMessage{t1, t2, t3},
deadlines: []h.ZSetEntry{
{Msg: t1, Score: float64(oneHourAgo.Unix())},
{Msg: t2, Score: float64(fiveMinutesFromNow.Unix())},
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
enqueued: map[string][]*base.TaskMessage{
"default": {},
},
wantN: 1,
wantInProgress: []*base.TaskMessage{t2, t3},
wantDeadlines: []h.ZSetEntry{
{Msg: t2, Score: float64(fiveMinutesFromNow.Unix())},
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
wantEnqueued: map[string][]*base.TaskMessage{
"default": {t1},
},
},
{
desc: "with multiple expired tasks in-progress",
inProgress: []*base.TaskMessage{t1, t2, t3},
deadlines: []h.ZSetEntry{
{Msg: t1, Score: float64(oneHourAgo.Unix())},
{Msg: t2, Score: float64(fiveMinutesAgo.Unix())},
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
enqueued: map[string][]*base.TaskMessage{
"default": {},
},
wantN: 2,
wantInProgress: []*base.TaskMessage{t3},
wantDeadlines: []h.ZSetEntry{
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
wantEnqueued: map[string][]*base.TaskMessage{
"default": {t1, t2},
},
},
{
desc: "with empty in-progress queue",
inProgress: []*base.TaskMessage{},
deadlines: []h.ZSetEntry{},
enqueued: map[string][]*base.TaskMessage{
"default": {},
},
wantN: 0,
wantInProgress: []*base.TaskMessage{},
wantDeadlines: []h.ZSetEntry{},
wantEnqueued: map[string][]*base.TaskMessage{
"default": {},
},
},
{
desc: "push back task to appropriate queue",
inProgress: []*base.TaskMessage{t3},
deadlines: []h.ZSetEntry{
{Msg: t3, Score: float64(fiveMinutesAgo.Unix())},
},
enqueued: map[string][]*base.TaskMessage{
"default": {},
"critical": {},
},
wantN: 1,
wantInProgress: []*base.TaskMessage{},
wantDeadlines: []h.ZSetEntry{},
wantEnqueued: map[string][]*base.TaskMessage{
"default": {},
"critical": {t3},
},
},
}
r := setup(t)
for _, tc := range tests {
h.FlushDB(t, r.client)
h.SeedInProgressQueue(t, r.client, tc.inProgress)
h.SeedDeadlines(t, r.client, tc.deadlines)
for queue, msgs := range tc.enqueued {
h.SeedEnqueuedQueue(t, r.client, msgs, queue)
}
gotN, err := r.RequeueDeadlineExceeded()
if err != nil {
t.Errorf("%s; RequeueDeadlineExceeded() returned error: %v", tc.desc, err)
continue
}
if gotN != tc.wantN {
t.Errorf("%s; RequeueDeadlineExeeded() == %d want %d", tc.desc, gotN, tc.wantN)
}
gotInProgress := h.GetInProgressMessages(t, r.client)
if diff := cmp.Diff(tc.wantInProgress, gotInProgress, h.SortMsgOpt); diff != "" {
t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.InProgressQueue, diff)
}
gotDeadlines := h.GetDeadlinesEntries(t, r.client)
if diff := cmp.Diff(tc.wantDeadlines, gotDeadlines, h.SortZSetEntryOpt); diff != "" {
t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.KeyDeadlines, diff)
}
for queue, want := range tc.wantEnqueued {
gotEnqueued := h.GetEnqueuedMessages(t, r.client, queue)
if diff := cmp.Diff(want, gotEnqueued, h.SortMsgOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want,+got):\n%s", tc.desc, base.QueueKey(queue), diff)
}
}
}
}
*/
func TestWriteServerState(t *testing.T) { func TestWriteServerState(t *testing.T) {
r := setup(t) r := setup(t)

View File

@ -132,6 +132,15 @@ func (tb *TestBroker) CheckAndEnqueue() error {
return tb.real.CheckAndEnqueue() return tb.real.CheckAndEnqueue()
} }
func (tb *TestBroker) ListDeadlineExceeded(deadline time.Time) ([]*base.TaskMessage, error) {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return nil, errRedisDown
}
return tb.real.ListDeadlineExceeded(deadline)
}
func (tb *TestBroker) WriteServerState(info *base.ServerInfo, workers []*base.WorkerInfo, ttl time.Duration) error { func (tb *TestBroker) WriteServerState(info *base.ServerInfo, workers []*base.WorkerInfo, ttl time.Duration) error {
tb.mu.Lock() tb.mu.Lock()
defer tb.mu.Unlock() defer tb.mu.Unlock()

96
recoverer.go Normal file
View File

@ -0,0 +1,96 @@
// Copyright 2020 Kentaro Hibino. All rights reserved.
// Use of this source code is governed by a MIT license
// that can be found in the LICENSE file.
package asynq
import (
"fmt"
"sync"
"time"
"github.com/hibiken/asynq/internal/base"
"github.com/hibiken/asynq/internal/log"
)
type recoverer struct {
logger *log.Logger
broker base.Broker
retryDelayFunc retryDelayFunc
// channel to communicate back to the long running "recoverer" goroutine.
done chan struct{}
// poll interval.
interval time.Duration
}
type recovererParams struct {
logger *log.Logger
broker base.Broker
interval time.Duration
retryDelayFunc retryDelayFunc
}
func newRecoverer(params recovererParams) *recoverer {
return &recoverer{
logger: params.logger,
broker: params.broker,
done: make(chan struct{}),
interval: params.interval,
retryDelayFunc: params.retryDelayFunc,
}
}
func (r *recoverer) terminate() {
r.logger.Debug("Recoverer shutting down...")
// Signal the recoverer goroutine to stop polling.
r.done <- struct{}{}
}
func (r *recoverer) start(wg *sync.WaitGroup) {
wg.Add(1)
go func() {
defer wg.Done()
timer := time.NewTimer(r.interval)
for {
select {
case <-r.done:
r.logger.Debug("Recoverer done")
timer.Stop()
return
case <-timer.C:
// Get all tasks which have expired 30 seconds ago or earlier.
deadline := time.Now().Add(-30 * time.Second)
msgs, err := r.broker.ListDeadlineExceeded(deadline)
if err != nil {
r.logger.Warn("recoverer: could not list deadline exceeded tasks")
continue
}
const errMsg = "deadline exceeded" // TODO: better error message
for _, msg := range msgs {
if msg.Retried >= msg.Retry {
r.kill(msg, errMsg)
} else {
r.retry(msg, errMsg)
}
}
}
}
}()
}
func (r *recoverer) retry(msg *base.TaskMessage, errMsg string) {
delay := r.retryDelayFunc(msg.Retried, fmt.Errorf(errMsg), NewTask(msg.Type, msg.Payload))
retryAt := time.Now().Add(delay)
if err := r.broker.Retry(msg, retryAt, errMsg); err != nil {
r.logger.Warnf("recoverer: could not retry deadline exceeded task: %v", err)
}
}
func (r *recoverer) kill(msg *base.TaskMessage, errMsg string) {
if err := r.broker.Kill(msg, errMsg); err != nil {
r.logger.Warnf("recoverer: could not move task to dead queue: %v", err)
}
}

162
recoverer_test.go Normal file
View File

@ -0,0 +1,162 @@
// Copyright 2020 Kentaro Hibino. All rights reserved.
// Use of this source code is governed by a MIT license
// that can be found in the LICENSE file.
package asynq
import (
"sync"
"testing"
"time"
"github.com/google/go-cmp/cmp"
h "github.com/hibiken/asynq/internal/asynqtest"
"github.com/hibiken/asynq/internal/base"
"github.com/hibiken/asynq/internal/rdb"
)
func TestRecoverer(t *testing.T) {
r := setup(t)
rdbClient := rdb.NewRDB(r)
t1 := h.NewTaskMessage("task1", nil)
t2 := h.NewTaskMessage("task2", nil)
t3 := h.NewTaskMessageWithQueue("task3", nil, "critical")
t4 := h.NewTaskMessage("task4", nil)
t4.Retried = t4.Retry // t4 has reached its max retry count
now := time.Now()
oneHourFromNow := now.Add(1 * time.Hour)
fiveMinutesFromNow := now.Add(5 * time.Minute)
fiveMinutesAgo := now.Add(-5 * time.Minute)
oneHourAgo := now.Add(-1 * time.Hour)
tests := []struct {
desc string
inProgress []*base.TaskMessage
deadlines []h.ZSetEntry
retry []h.ZSetEntry
dead []h.ZSetEntry
wantInProgress []*base.TaskMessage
wantDeadlines []h.ZSetEntry
wantRetry []*base.TaskMessage
wantDead []*base.TaskMessage
}{
{
desc: "with one task in-progress",
inProgress: []*base.TaskMessage{t1},
deadlines: []h.ZSetEntry{
{Msg: t1, Score: float64(fiveMinutesAgo.Unix())},
},
retry: []h.ZSetEntry{},
dead: []h.ZSetEntry{},
wantInProgress: []*base.TaskMessage{},
wantDeadlines: []h.ZSetEntry{},
wantRetry: []*base.TaskMessage{
h.TaskMessageAfterRetry(*t1, "deadline exceeded"),
},
wantDead: []*base.TaskMessage{},
},
{
desc: "with a task with max-retry reached",
inProgress: []*base.TaskMessage{t4},
deadlines: []h.ZSetEntry{
{Msg: t4, Score: float64(fiveMinutesAgo.Unix())},
},
retry: []h.ZSetEntry{},
dead: []h.ZSetEntry{},
wantInProgress: []*base.TaskMessage{},
wantDeadlines: []h.ZSetEntry{},
wantRetry: []*base.TaskMessage{},
wantDead: []*base.TaskMessage{h.TaskMessageWithError(*t4, "deadline exceeded")},
},
{
desc: "with multiple tasks in-progress, and one expired",
inProgress: []*base.TaskMessage{t1, t2, t3},
deadlines: []h.ZSetEntry{
{Msg: t1, Score: float64(oneHourAgo.Unix())},
{Msg: t2, Score: float64(fiveMinutesFromNow.Unix())},
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
retry: []h.ZSetEntry{},
dead: []h.ZSetEntry{},
wantInProgress: []*base.TaskMessage{t2, t3},
wantDeadlines: []h.ZSetEntry{
{Msg: t2, Score: float64(fiveMinutesFromNow.Unix())},
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
wantRetry: []*base.TaskMessage{
h.TaskMessageAfterRetry(*t1, "deadline exceeded"),
},
wantDead: []*base.TaskMessage{},
},
{
desc: "with multiple expired tasks in-progress",
inProgress: []*base.TaskMessage{t1, t2, t3},
deadlines: []h.ZSetEntry{
{Msg: t1, Score: float64(oneHourAgo.Unix())},
{Msg: t2, Score: float64(fiveMinutesAgo.Unix())},
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
retry: []h.ZSetEntry{},
dead: []h.ZSetEntry{},
wantInProgress: []*base.TaskMessage{t3},
wantDeadlines: []h.ZSetEntry{
{Msg: t3, Score: float64(oneHourFromNow.Unix())},
},
wantRetry: []*base.TaskMessage{
h.TaskMessageAfterRetry(*t1, "deadline exceeded"),
h.TaskMessageAfterRetry(*t2, "deadline exceeded"),
},
wantDead: []*base.TaskMessage{},
},
{
desc: "with empty in-progress queue",
inProgress: []*base.TaskMessage{},
deadlines: []h.ZSetEntry{},
retry: []h.ZSetEntry{},
dead: []h.ZSetEntry{},
wantInProgress: []*base.TaskMessage{},
wantDeadlines: []h.ZSetEntry{},
wantRetry: []*base.TaskMessage{},
wantDead: []*base.TaskMessage{},
},
}
for _, tc := range tests {
h.FlushDB(t, r)
h.SeedInProgressQueue(t, r, tc.inProgress)
h.SeedDeadlines(t, r, tc.deadlines)
h.SeedRetryQueue(t, r, tc.retry)
h.SeedDeadQueue(t, r, tc.dead)
recoverer := newRecoverer(recovererParams{
logger: testLogger,
broker: rdbClient,
interval: 1 * time.Second,
retryDelayFunc: func(n int, err error, task *Task) time.Duration { return 30 * time.Second },
})
var wg sync.WaitGroup
recoverer.start(&wg)
time.Sleep(2 * time.Second)
recoverer.terminate()
gotInProgress := h.GetInProgressMessages(t, r)
if diff := cmp.Diff(tc.wantInProgress, gotInProgress, h.SortMsgOpt); diff != "" {
t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.InProgressQueue, diff)
}
gotDeadlines := h.GetDeadlinesEntries(t, r)
if diff := cmp.Diff(tc.wantDeadlines, gotDeadlines, h.SortZSetEntryOpt); diff != "" {
t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.KeyDeadlines, diff)
}
gotRetry := h.GetRetryMessages(t, r)
if diff := cmp.Diff(tc.wantRetry, gotRetry, h.SortMsgOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.RetryQueue, diff)
}
gotDead := h.GetDeadMessages(t, r)
if diff := cmp.Diff(tc.wantDead, gotDead, h.SortMsgOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.DeadQueue, diff)
}
}
}

View File

@ -46,6 +46,7 @@ type Server struct {
syncer *syncer syncer *syncer
heartbeater *heartbeater heartbeater *heartbeater
subscriber *subscriber subscriber *subscriber
recoverer *recoverer
} }
// Config specifies the server's background-task processing behavior. // Config specifies the server's background-task processing behavior.
@ -329,6 +330,12 @@ func NewServer(r RedisConnOpt, cfg Config) *Server {
starting: starting, starting: starting,
finished: finished, finished: finished,
}) })
recoverer := newRecoverer(recovererParams{
logger: logger,
broker: rdb,
retryDelayFunc: delayFunc,
interval: 1 * time.Minute,
})
return &Server{ return &Server{
logger: logger, logger: logger,
broker: rdb, broker: rdb,
@ -338,6 +345,7 @@ func NewServer(r RedisConnOpt, cfg Config) *Server {
syncer: syncer, syncer: syncer,
heartbeater: heartbeater, heartbeater: heartbeater,
subscriber: subscriber, subscriber: subscriber,
recoverer: recoverer,
} }
} }
@ -407,6 +415,7 @@ func (srv *Server) Start(handler Handler) error {
srv.heartbeater.start(&srv.wg) srv.heartbeater.start(&srv.wg)
srv.subscriber.start(&srv.wg) srv.subscriber.start(&srv.wg)
srv.syncer.start(&srv.wg) srv.syncer.start(&srv.wg)
srv.recoverer.start(&srv.wg)
srv.scheduler.start(&srv.wg) srv.scheduler.start(&srv.wg)
srv.processor.start(&srv.wg) srv.processor.start(&srv.wg)
return nil return nil
@ -430,6 +439,7 @@ func (srv *Server) Stop() {
// processor -> heartbeater (via starting, finished channels) // processor -> heartbeater (via starting, finished channels)
srv.scheduler.terminate() srv.scheduler.terminate()
srv.processor.terminate() srv.processor.terminate()
srv.recoverer.terminate()
srv.syncer.terminate() srv.syncer.terminate()
srv.subscriber.terminate() srv.subscriber.terminate()
srv.heartbeater.terminate() srv.heartbeater.terminate()