2
0
mirror of https://github.com/hibiken/asynq.git synced 2024-09-20 11:05:58 +08:00

Update recoverer to use ListLeaseExpired

This commit is contained in:
Ken Hibino 2022-02-11 06:18:27 -08:00
parent bc2f1986d7
commit dabcb120d5
5 changed files with 62 additions and 58 deletions

View File

@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `BaseContext` is introduced in `Config` to specify callback hook to provide a base `context` from which `Handler` `context` is derived - `BaseContext` is introduced in `Config` to specify callback hook to provide a base `context` from which `Handler` `context` is derived
### Changed
- `Server` now recovers tasks with an expired lease. Recovered tasks are retried/archived with `ErrLeaseExpired` error.
## [0.21.0] - 2022-01-22 ## [0.21.0] - 2022-01-22
### Added ### Added

View File

@ -625,7 +625,7 @@ type Broker interface {
Archive(msg *TaskMessage, errMsg string) error Archive(msg *TaskMessage, errMsg string) error
ForwardIfReady(qnames ...string) error ForwardIfReady(qnames ...string) error
DeleteExpiredCompletedTasks(qname string) error DeleteExpiredCompletedTasks(qname string) error
ListDeadlineExceeded(deadline time.Time, qnames ...string) ([]*TaskMessage, error) ListLeaseExpired(cutoff time.Time, qnames ...string) ([]*TaskMessage, error)
WriteServerState(info *ServerInfo, workers []*WorkerInfo, ttl time.Duration) error WriteServerState(info *ServerInfo, workers []*WorkerInfo, ttl time.Duration) error
ClearServerState(host string, pid int, serverID string) error ClearServerState(host string, pid int, serverID string) error
CancelationPubSub() (*redis.PubSub, error) // TODO: Need to decouple from redis to support other brokers CancelationPubSub() (*redis.PubSub, error) // TODO: Need to decouple from redis to support other brokers

View File

@ -154,13 +154,13 @@ func (tb *TestBroker) DeleteExpiredCompletedTasks(qname string) error {
return tb.real.DeleteExpiredCompletedTasks(qname) return tb.real.DeleteExpiredCompletedTasks(qname)
} }
func (tb *TestBroker) ListDeadlineExceeded(deadline time.Time, qnames ...string) ([]*base.TaskMessage, error) { func (tb *TestBroker) ListLeaseExpired(cutoff time.Time, qnames ...string) ([]*base.TaskMessage, error) {
tb.mu.Lock() tb.mu.Lock()
defer tb.mu.Unlock() defer tb.mu.Unlock()
if tb.sleeping { if tb.sleeping {
return nil, errRedisDown return nil, errRedisDown
} }
return tb.real.ListDeadlineExceeded(deadline, qnames...) return tb.real.ListLeaseExpired(cutoff, qnames...)
} }
func (tb *TestBroker) WriteServerState(info *base.ServerInfo, workers []*base.WorkerInfo, ttl time.Duration) error { func (tb *TestBroker) WriteServerState(info *base.ServerInfo, workers []*base.WorkerInfo, ttl time.Duration) error {

View File

@ -5,11 +5,11 @@
package asynq package asynq
import ( import (
"context"
"sync" "sync"
"time" "time"
"github.com/hibiken/asynq/internal/base" "github.com/hibiken/asynq/internal/base"
"github.com/hibiken/asynq/internal/errors"
"github.com/hibiken/asynq/internal/log" "github.com/hibiken/asynq/internal/log"
) )
@ -76,19 +76,23 @@ func (r *recoverer) start(wg *sync.WaitGroup) {
}() }()
} }
// ErrLeaseExpired error indicates that the task failed because the worker working on the task
// could not extend its lease due to missing heartbeats. The worker may have crashed or got cutoff from the network.
var ErrLeaseExpired = errors.New("asynq: task lease expired")
func (r *recoverer) recover() { func (r *recoverer) recover() {
// Get all tasks which have expired 30 seconds ago or earlier. // Get all tasks which have expired 30 seconds ago or earlier to accomodate certain amount of clock skew.
deadline := time.Now().Add(-30 * time.Second) cutoff := time.Now().Add(-30 * time.Second)
msgs, err := r.broker.ListDeadlineExceeded(deadline, r.queues...) msgs, err := r.broker.ListLeaseExpired(cutoff, r.queues...)
if err != nil { if err != nil {
r.logger.Warn("recoverer: could not list deadline exceeded tasks") r.logger.Warn("recoverer: could not list lease expired tasks")
return return
} }
for _, msg := range msgs { for _, msg := range msgs {
if msg.Retried >= msg.Retry { if msg.Retried >= msg.Retry {
r.archive(msg, context.DeadlineExceeded) r.archive(msg, ErrLeaseExpired)
} else { } else {
r.retry(msg, context.DeadlineExceeded) r.retry(msg, ErrLeaseExpired)
} }
} }
} }
@ -97,7 +101,7 @@ func (r *recoverer) retry(msg *base.TaskMessage, err error) {
delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload)) delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload))
retryAt := time.Now().Add(delay) retryAt := time.Now().Add(delay)
if err := r.broker.Retry(msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil { if err := r.broker.Retry(msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil {
r.logger.Warnf("recoverer: could not retry deadline exceeded task: %v", err) r.logger.Warnf("recoverer: could not retry lease expired task: %v", err)
} }
} }

View File

@ -27,29 +27,25 @@ func TestRecoverer(t *testing.T) {
t4.Retried = t4.Retry // t4 has reached its max retry count t4.Retried = t4.Retry // t4 has reached its max retry count
now := time.Now() now := time.Now()
oneHourFromNow := now.Add(1 * time.Hour)
fiveMinutesFromNow := now.Add(5 * time.Minute)
fiveMinutesAgo := now.Add(-5 * time.Minute)
oneHourAgo := now.Add(-1 * time.Hour)
tests := []struct { tests := []struct {
desc string desc string
inProgress map[string][]*base.TaskMessage active map[string][]*base.TaskMessage
deadlines map[string][]base.Z lease map[string][]base.Z
retry map[string][]base.Z retry map[string][]base.Z
archived map[string][]base.Z archived map[string][]base.Z
wantActive map[string][]*base.TaskMessage wantActive map[string][]*base.TaskMessage
wantDeadlines map[string][]base.Z wantLease map[string][]base.Z
wantRetry map[string][]*base.TaskMessage wantRetry map[string][]*base.TaskMessage
wantArchived map[string][]*base.TaskMessage wantArchived map[string][]*base.TaskMessage
}{ }{
{ {
desc: "with one active task", desc: "with one active task",
inProgress: map[string][]*base.TaskMessage{ active: map[string][]*base.TaskMessage{
"default": {t1}, "default": {t1},
}, },
deadlines: map[string][]base.Z{ lease: map[string][]base.Z{
"default": {{Message: t1, Score: fiveMinutesAgo.Unix()}}, "default": {{Message: t1, Score: now.Add(-1 * time.Minute).Unix()}},
}, },
retry: map[string][]base.Z{ retry: map[string][]base.Z{
"default": {}, "default": {},
@ -60,7 +56,7 @@ func TestRecoverer(t *testing.T) {
wantActive: map[string][]*base.TaskMessage{ wantActive: map[string][]*base.TaskMessage{
"default": {}, "default": {},
}, },
wantDeadlines: map[string][]base.Z{ wantLease: map[string][]base.Z{
"default": {}, "default": {},
}, },
wantRetry: map[string][]*base.TaskMessage{ wantRetry: map[string][]*base.TaskMessage{
@ -72,12 +68,12 @@ func TestRecoverer(t *testing.T) {
}, },
{ {
desc: "with a task with max-retry reached", desc: "with a task with max-retry reached",
inProgress: map[string][]*base.TaskMessage{ active: map[string][]*base.TaskMessage{
"default": {t4}, "default": {t4},
"critical": {}, "critical": {},
}, },
deadlines: map[string][]base.Z{ lease: map[string][]base.Z{
"default": {{Message: t4, Score: fiveMinutesAgo.Unix()}}, "default": {{Message: t4, Score: now.Add(-40 * time.Second).Unix()}},
"critical": {}, "critical": {},
}, },
retry: map[string][]base.Z{ retry: map[string][]base.Z{
@ -92,7 +88,7 @@ func TestRecoverer(t *testing.T) {
"default": {}, "default": {},
"critical": {}, "critical": {},
}, },
wantDeadlines: map[string][]base.Z{ wantLease: map[string][]base.Z{
"default": {}, "default": {},
"critical": {}, "critical": {},
}, },
@ -107,17 +103,17 @@ func TestRecoverer(t *testing.T) {
}, },
{ {
desc: "with multiple active tasks, and one expired", desc: "with multiple active tasks, and one expired",
inProgress: map[string][]*base.TaskMessage{ active: map[string][]*base.TaskMessage{
"default": {t1, t2}, "default": {t1, t2},
"critical": {t3}, "critical": {t3},
}, },
deadlines: map[string][]base.Z{ lease: map[string][]base.Z{
"default": { "default": {
{Message: t1, Score: oneHourAgo.Unix()}, {Message: t1, Score: now.Add(-2 * time.Minute).Unix()},
{Message: t2, Score: fiveMinutesFromNow.Unix()}, {Message: t2, Score: now.Add(20 * time.Second).Unix()},
}, },
"critical": { "critical": {
{Message: t3, Score: oneHourFromNow.Unix()}, {Message: t3, Score: now.Add(20 * time.Second).Unix()},
}, },
}, },
retry: map[string][]base.Z{ retry: map[string][]base.Z{
@ -132,9 +128,9 @@ func TestRecoverer(t *testing.T) {
"default": {t2}, "default": {t2},
"critical": {t3}, "critical": {t3},
}, },
wantDeadlines: map[string][]base.Z{ wantLease: map[string][]base.Z{
"default": {{Message: t2, Score: fiveMinutesFromNow.Unix()}}, "default": {{Message: t2, Score: now.Add(20 * time.Second).Unix()}},
"critical": {{Message: t3, Score: oneHourFromNow.Unix()}}, "critical": {{Message: t3, Score: now.Add(20 * time.Second).Unix()}},
}, },
wantRetry: map[string][]*base.TaskMessage{ wantRetry: map[string][]*base.TaskMessage{
"default": {t1}, "default": {t1},
@ -147,17 +143,17 @@ func TestRecoverer(t *testing.T) {
}, },
{ {
desc: "with multiple expired active tasks", desc: "with multiple expired active tasks",
inProgress: map[string][]*base.TaskMessage{ active: map[string][]*base.TaskMessage{
"default": {t1, t2}, "default": {t1, t2},
"critical": {t3}, "critical": {t3},
}, },
deadlines: map[string][]base.Z{ lease: map[string][]base.Z{
"default": { "default": {
{Message: t1, Score: oneHourAgo.Unix()}, {Message: t1, Score: now.Add(-1 * time.Minute).Unix()},
{Message: t2, Score: oneHourFromNow.Unix()}, {Message: t2, Score: now.Add(10 * time.Second).Unix()},
}, },
"critical": { "critical": {
{Message: t3, Score: fiveMinutesAgo.Unix()}, {Message: t3, Score: now.Add(-1 * time.Minute).Unix()},
}, },
}, },
retry: map[string][]base.Z{ retry: map[string][]base.Z{
@ -172,8 +168,8 @@ func TestRecoverer(t *testing.T) {
"default": {t2}, "default": {t2},
"critical": {}, "critical": {},
}, },
wantDeadlines: map[string][]base.Z{ wantLease: map[string][]base.Z{
"default": {{Message: t2, Score: oneHourFromNow.Unix()}}, "default": {{Message: t2, Score: now.Add(10 * time.Second).Unix()}},
}, },
wantRetry: map[string][]*base.TaskMessage{ wantRetry: map[string][]*base.TaskMessage{
"default": {t1}, "default": {t1},
@ -186,11 +182,11 @@ func TestRecoverer(t *testing.T) {
}, },
{ {
desc: "with empty active queue", desc: "with empty active queue",
inProgress: map[string][]*base.TaskMessage{ active: map[string][]*base.TaskMessage{
"default": {}, "default": {},
"critical": {}, "critical": {},
}, },
deadlines: map[string][]base.Z{ lease: map[string][]base.Z{
"default": {}, "default": {},
"critical": {}, "critical": {},
}, },
@ -206,7 +202,7 @@ func TestRecoverer(t *testing.T) {
"default": {}, "default": {},
"critical": {}, "critical": {},
}, },
wantDeadlines: map[string][]base.Z{ wantLease: map[string][]base.Z{
"default": {}, "default": {},
"critical": {}, "critical": {},
}, },
@ -223,8 +219,8 @@ func TestRecoverer(t *testing.T) {
for _, tc := range tests { for _, tc := range tests {
h.FlushDB(t, r) h.FlushDB(t, r)
h.SeedAllActiveQueues(t, r, tc.inProgress) h.SeedAllActiveQueues(t, r, tc.active)
h.SeedAllDeadlines(t, r, tc.deadlines) h.SeedAllLease(t, r, tc.lease)
h.SeedAllRetryQueues(t, r, tc.retry) h.SeedAllRetryQueues(t, r, tc.retry)
h.SeedAllArchivedQueues(t, r, tc.archived) h.SeedAllArchivedQueues(t, r, tc.archived)
@ -249,10 +245,10 @@ func TestRecoverer(t *testing.T) {
t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.ActiveKey(qname), diff) t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.ActiveKey(qname), diff)
} }
} }
for qname, want := range tc.wantDeadlines { for qname, want := range tc.wantLease {
gotDeadlines := h.GetDeadlinesEntries(t, r, qname) gotLease := h.GetLeaseEntries(t, r, qname)
if diff := cmp.Diff(want, gotDeadlines, h.SortZSetEntryOpt); diff != "" { if diff := cmp.Diff(want, gotLease, h.SortZSetEntryOpt); diff != "" {
t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.DeadlinesKey(qname), diff) t.Errorf("%s; mismatch found in %q; (-want,+got)\n%s", tc.desc, base.LeaseKey(qname), diff)
} }
} }
cmpOpt := h.EquateInt64Approx(2) // allow up to two-second difference in `LastFailedAt` cmpOpt := h.EquateInt64Approx(2) // allow up to two-second difference in `LastFailedAt`
@ -260,7 +256,7 @@ func TestRecoverer(t *testing.T) {
gotRetry := h.GetRetryMessages(t, r, qname) gotRetry := h.GetRetryMessages(t, r, qname)
var wantRetry []*base.TaskMessage // Note: construct message here since `LastFailedAt` is relative to each test run var wantRetry []*base.TaskMessage // Note: construct message here since `LastFailedAt` is relative to each test run
for _, msg := range msgs { for _, msg := range msgs {
wantRetry = append(wantRetry, h.TaskMessageAfterRetry(*msg, "context deadline exceeded", runTime)) wantRetry = append(wantRetry, h.TaskMessageAfterRetry(*msg, ErrLeaseExpired.Error(), runTime))
} }
if diff := cmp.Diff(wantRetry, gotRetry, h.SortMsgOpt, cmpOpt); diff != "" { if diff := cmp.Diff(wantRetry, gotRetry, h.SortMsgOpt, cmpOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.RetryKey(qname), diff) t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.RetryKey(qname), diff)
@ -270,7 +266,7 @@ func TestRecoverer(t *testing.T) {
gotArchived := h.GetArchivedMessages(t, r, qname) gotArchived := h.GetArchivedMessages(t, r, qname)
var wantArchived []*base.TaskMessage var wantArchived []*base.TaskMessage
for _, msg := range msgs { for _, msg := range msgs {
wantArchived = append(wantArchived, h.TaskMessageWithError(*msg, "context deadline exceeded", runTime)) wantArchived = append(wantArchived, h.TaskMessageWithError(*msg, ErrLeaseExpired.Error(), runTime))
} }
if diff := cmp.Diff(wantArchived, gotArchived, h.SortMsgOpt, cmpOpt); diff != "" { if diff := cmp.Diff(wantArchived, gotArchived, h.SortMsgOpt, cmpOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.ArchivedKey(qname), diff) t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.ArchivedKey(qname), diff)