2
0
mirror of https://github.com/hibiken/asynq.git synced 2024-12-25 23:32:17 +08:00

Add IsFailure to Config

With this IsFailure config, users can provide a predicate function to 
determine whether the error returned from Handler counts as a failure.
This commit is contained in:
Ken Hibino 2021-09-01 06:00:54 -07:00 committed by GitHub
parent 3ae0e7f528
commit f0db219f6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 246 additions and 35 deletions

View File

@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- `IsFailure` config option is added to determine whether error returned from Handler counts as a failure.
## [0.18.4] - 2020-08-17
### Fixed

View File

@ -645,7 +645,7 @@ type Broker interface {
Requeue(msg *TaskMessage) error
Schedule(msg *TaskMessage, processAt time.Time) error
ScheduleUnique(msg *TaskMessage, processAt time.Time, ttl time.Duration) error
Retry(msg *TaskMessage, processAt time.Time, errMsg string) error
Retry(msg *TaskMessage, processAt time.Time, errMsg string, isFailure bool) error
Archive(msg *TaskMessage, errMsg string) error
ForwardIfReady(qnames ...string) error
ListDeadlineExceeded(deadline time.Time, qnames ...string) ([]*TaskMessage, error)

View File

@ -184,7 +184,7 @@ func BenchmarkRetry(b *testing.B) {
asynqtest.SeedDeadlines(b, r.client, zs, base.DefaultQueueName)
b.StartTimer()
if err := r.Retry(msgs[0], time.Now().Add(1*time.Minute), "error"); err != nil {
if err := r.Retry(msgs[0], time.Now().Add(1*time.Minute), "error", true /*isFailure*/); err != nil {
b.Fatalf("Retry failed: %v", err)
}
}

View File

@ -467,6 +467,7 @@ func (r *RDB) ScheduleUnique(msg *base.TaskMessage, processAt time.Time, ttl tim
// ARGV[2] -> updated base.TaskMessage value
// ARGV[3] -> retry_at UNIX timestamp
// ARGV[4] -> stats expiration timestamp
// ARGV[5] -> is_failure (bool)
var retryCmd = redis.NewScript(`
if redis.call("LREM", KEYS[2], 0, ARGV[1]) == 0 then
return redis.error_reply("NOT FOUND")
@ -476,23 +477,28 @@ if redis.call("ZREM", KEYS[3], ARGV[1]) == 0 then
end
redis.call("ZADD", KEYS[4], ARGV[3], ARGV[1])
redis.call("HSET", KEYS[1], "msg", ARGV[2], "state", "retry")
local n = redis.call("INCR", KEYS[5])
if tonumber(n) == 1 then
redis.call("EXPIREAT", KEYS[5], ARGV[4])
end
local m = redis.call("INCR", KEYS[6])
if tonumber(m) == 1 then
redis.call("EXPIREAT", KEYS[6], ARGV[4])
if tonumber(ARGV[5]) == 1 then
local n = redis.call("INCR", KEYS[5])
if tonumber(n) == 1 then
redis.call("EXPIREAT", KEYS[5], ARGV[4])
end
local m = redis.call("INCR", KEYS[6])
if tonumber(m) == 1 then
redis.call("EXPIREAT", KEYS[6], ARGV[4])
end
end
return redis.status_reply("OK")`)
// Retry moves the task from active to retry queue, incrementing retry count
// and assigning error message to the task message.
func (r *RDB) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string) error {
// Retry moves the task from active to retry queue.
// It also annotates the message with the given error message and
// if isFailure is true increments the retried counter.
func (r *RDB) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error {
var op errors.Op = "rdb.Retry"
now := time.Now()
modified := *msg
modified.Retried++
if isFailure {
modified.Retried++
}
modified.ErrorMsg = errMsg
modified.LastFailedAt = now.Unix()
encoded, err := base.EncodeMessage(&modified)
@ -513,6 +519,7 @@ func (r *RDB) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string) e
encoded,
processAt.Unix(),
expireAt.Unix(),
isFailure,
}
return r.runScript(op, retryCmd, keys, argv...)
}

View File

@ -1158,7 +1158,7 @@ func TestRetry(t *testing.T) {
h.SeedAllRetryQueues(t, r.client, tc.retry)
callTime := time.Now() // time when method was called
err := r.Retry(tc.msg, tc.processAt, tc.errMsg)
err := r.Retry(tc.msg, tc.processAt, tc.errMsg, true /*isFailure*/)
if err != nil {
t.Errorf("(*RDB).Retry = %v, want nil", err)
continue
@ -1210,6 +1210,173 @@ func TestRetry(t *testing.T) {
}
}
func TestRetryWithNonFailureError(t *testing.T) {
r := setup(t)
defer r.Close()
now := time.Now()
t1 := &base.TaskMessage{
ID: uuid.New(),
Type: "send_email",
Payload: h.JSON(map[string]interface{}{"subject": "Hola!"}),
Retried: 10,
Timeout: 1800,
Queue: "default",
}
t2 := &base.TaskMessage{
ID: uuid.New(),
Type: "gen_thumbnail",
Payload: h.JSON(map[string]interface{}{"path": "some/path/to/image.jpg"}),
Timeout: 3000,
Queue: "default",
}
t3 := &base.TaskMessage{
ID: uuid.New(),
Type: "reindex",
Payload: nil,
Timeout: 60,
Queue: "default",
}
t4 := &base.TaskMessage{
ID: uuid.New(),
Type: "send_notification",
Payload: nil,
Timeout: 1800,
Queue: "custom",
}
t1Deadline := now.Unix() + t1.Timeout
t2Deadline := now.Unix() + t2.Timeout
t4Deadline := now.Unix() + t4.Timeout
errMsg := "SMTP server is not responding"
tests := []struct {
active map[string][]*base.TaskMessage
deadlines map[string][]base.Z
retry map[string][]base.Z
msg *base.TaskMessage
processAt time.Time
errMsg string
wantActive map[string][]*base.TaskMessage
wantDeadlines map[string][]base.Z
getWantRetry func(failedAt time.Time) map[string][]base.Z
}{
{
active: map[string][]*base.TaskMessage{
"default": {t1, t2},
},
deadlines: map[string][]base.Z{
"default": {{Message: t1, Score: t1Deadline}, {Message: t2, Score: t2Deadline}},
},
retry: map[string][]base.Z{
"default": {{Message: t3, Score: now.Add(time.Minute).Unix()}},
},
msg: t1,
processAt: now.Add(5 * time.Minute),
errMsg: errMsg,
wantActive: map[string][]*base.TaskMessage{
"default": {t2},
},
wantDeadlines: map[string][]base.Z{
"default": {{Message: t2, Score: t2Deadline}},
},
getWantRetry: func(failedAt time.Time) map[string][]base.Z {
return map[string][]base.Z{
"default": {
// Task message should include the error message but without incrementing the retry count.
{Message: h.TaskMessageWithError(*t1, errMsg, failedAt), Score: now.Add(5 * time.Minute).Unix()},
{Message: t3, Score: now.Add(time.Minute).Unix()},
},
}
},
},
{
active: map[string][]*base.TaskMessage{
"default": {t1, t2},
"custom": {t4},
},
deadlines: map[string][]base.Z{
"default": {{Message: t1, Score: t1Deadline}, {Message: t2, Score: t2Deadline}},
"custom": {{Message: t4, Score: t4Deadline}},
},
retry: map[string][]base.Z{
"default": {},
"custom": {},
},
msg: t4,
processAt: now.Add(5 * time.Minute),
errMsg: errMsg,
wantActive: map[string][]*base.TaskMessage{
"default": {t1, t2},
"custom": {},
},
wantDeadlines: map[string][]base.Z{
"default": {{Message: t1, Score: t1Deadline}, {Message: t2, Score: t2Deadline}},
"custom": {},
},
getWantRetry: func(failedAt time.Time) map[string][]base.Z {
return map[string][]base.Z{
"default": {},
"custom": {
// Task message should include the error message but without incrementing the retry count.
{Message: h.TaskMessageWithError(*t4, errMsg, failedAt), Score: now.Add(5 * time.Minute).Unix()},
},
}
},
},
}
for _, tc := range tests {
h.FlushDB(t, r.client)
h.SeedAllActiveQueues(t, r.client, tc.active)
h.SeedAllDeadlines(t, r.client, tc.deadlines)
h.SeedAllRetryQueues(t, r.client, tc.retry)
callTime := time.Now() // time when method was called
err := r.Retry(tc.msg, tc.processAt, tc.errMsg, false /*isFailure*/)
if err != nil {
t.Errorf("(*RDB).Retry = %v, want nil", err)
continue
}
for queue, want := range tc.wantActive {
gotActive := h.GetActiveMessages(t, r.client, queue)
if diff := cmp.Diff(want, gotActive, h.SortMsgOpt); diff != "" {
t.Errorf("mismatch found in %q; (-want, +got)\n%s", base.ActiveKey(queue), diff)
}
}
for queue, want := range tc.wantDeadlines {
gotDeadlines := h.GetDeadlinesEntries(t, r.client, queue)
if diff := cmp.Diff(want, gotDeadlines, h.SortZSetEntryOpt); diff != "" {
t.Errorf("mismatch found in %q; (-want, +got)\n%s", base.DeadlinesKey(queue), diff)
}
}
cmpOpts := []cmp.Option{
h.SortZSetEntryOpt,
cmpopts.EquateApproxTime(5 * time.Second), // for LastFailedAt field
}
wantRetry := tc.getWantRetry(callTime)
for queue, want := range wantRetry {
gotRetry := h.GetRetryEntries(t, r.client, queue)
if diff := cmp.Diff(want, gotRetry, cmpOpts...); diff != "" {
t.Errorf("mismatch found in %q; (-want, +got)\n%s", base.RetryKey(queue), diff)
}
}
// If isFailure is set to false, no stats should be recorded to avoid skewing the error rate.
processedKey := base.ProcessedKey(tc.msg.Queue, time.Now())
gotProcessed := r.client.Get(processedKey).Val()
if gotProcessed != "" {
t.Errorf("GET %q = %q, want empty", processedKey, gotProcessed)
}
// If isFailure is set to false, no stats should be recorded to avoid skewing the error rate.
failedKey := base.FailedKey(tc.msg.Queue, time.Now())
gotFailed := r.client.Get(failedKey).Val()
if gotFailed != "" {
t.Errorf("GET %q = %q, want empty", failedKey, gotFailed)
}
}
}
func TestArchive(t *testing.T) {
r := setup(t)
defer r.Close()

View File

@ -108,13 +108,13 @@ func (tb *TestBroker) ScheduleUnique(msg *base.TaskMessage, processAt time.Time,
return tb.real.ScheduleUnique(msg, processAt, ttl)
}
func (tb *TestBroker) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string) error {
func (tb *TestBroker) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error {
tb.mu.Lock()
defer tb.mu.Unlock()
if tb.sleeping {
return errRedisDown
}
return tb.real.Retry(msg, processAt, errMsg)
return tb.real.Retry(msg, processAt, errMsg, isFailure)
}
func (tb *TestBroker) Archive(msg *base.TaskMessage, errMsg string) error {

View File

@ -33,6 +33,7 @@ type processor struct {
orderedQueues []string
retryDelayFunc RetryDelayFunc
isFailureFunc func(error) bool
errHandler ErrorHandler
@ -70,6 +71,7 @@ type processorParams struct {
logger *log.Logger
broker base.Broker
retryDelayFunc RetryDelayFunc
isFailureFunc func(error) bool
syncCh chan<- *syncRequest
cancelations *base.Cancelations
concurrency int
@ -94,6 +96,7 @@ func newProcessor(params processorParams) *processor {
queueConfig: queues,
orderedQueues: orderedQueues,
retryDelayFunc: params.retryDelayFunc,
isFailureFunc: params.isFailureFunc,
syncRequestCh: params.syncCh,
cancelations: params.cancelations,
errLogLimiter: rate.NewLimiter(rate.Every(3*time.Second), 1),
@ -197,7 +200,7 @@ func (p *processor) exec() {
select {
case <-ctx.Done():
// already canceled (e.g. deadline exceeded).
p.retryOrKill(ctx, msg, ctx.Err())
p.retryOrArchive(ctx, msg, ctx.Err())
return
default:
}
@ -214,7 +217,7 @@ func (p *processor) exec() {
p.requeue(msg)
return
case <-ctx.Done():
p.retryOrKill(ctx, msg, ctx.Err())
p.retryOrArchive(ctx, msg, ctx.Err())
return
case resErr := <-resCh:
// Note: One of three things should happen.
@ -222,7 +225,7 @@ func (p *processor) exec() {
// 2) Retry -> Removes the message from Active & Adds the message to Retry
// 3) Archive -> Removes the message from Active & Adds the message to archive
if resErr != nil {
p.retryOrKill(ctx, msg, resErr)
p.retryOrArchive(ctx, msg, resErr)
return
}
p.markAsDone(ctx, msg)
@ -263,22 +266,27 @@ func (p *processor) markAsDone(ctx context.Context, msg *base.TaskMessage) {
// the task should not be retried and should be archived instead.
var SkipRetry = errors.New("skip retry for the task")
func (p *processor) retryOrKill(ctx context.Context, msg *base.TaskMessage, err error) {
func (p *processor) retryOrArchive(ctx context.Context, msg *base.TaskMessage, err error) {
if p.errHandler != nil {
p.errHandler.HandleError(ctx, NewTask(msg.Type, msg.Payload), err)
}
if !p.isFailureFunc(err) {
// retry the task without marking it as failed
p.retry(ctx, msg, err, false /*isFailure*/)
return
}
if msg.Retried >= msg.Retry || errors.Is(err, SkipRetry) {
p.logger.Warnf("Retry exhausted for task id=%s", msg.ID)
p.archive(ctx, msg, err)
} else {
p.retry(ctx, msg, err)
p.retry(ctx, msg, err, true /*isFailure*/)
}
}
func (p *processor) retry(ctx context.Context, msg *base.TaskMessage, e error) {
func (p *processor) retry(ctx context.Context, msg *base.TaskMessage, e error, isFailure bool) {
d := p.retryDelayFunc(msg.Retried, e, NewTask(msg.Type, msg.Payload))
retryAt := time.Now().Add(d)
err := p.broker.Retry(msg, retryAt, e.Error())
err := p.broker.Retry(msg, retryAt, e.Error(), isFailure)
if err != nil {
errMsg := fmt.Sprintf("Could not move task id=%s from %q to %q", msg.ID, base.ActiveKey(msg.Queue), base.RetryKey(msg.Queue))
deadline, ok := ctx.Deadline()
@ -288,7 +296,7 @@ func (p *processor) retry(ctx context.Context, msg *base.TaskMessage, e error) {
p.logger.Warnf("%s; Will retry syncing", errMsg)
p.syncRequestCh <- &syncRequest{
fn: func() error {
return p.broker.Retry(msg, retryAt, e.Error())
return p.broker.Retry(msg, retryAt, e.Error(), isFailure)
},
errMsg: errMsg,
deadline: deadline,

View File

@ -98,6 +98,7 @@ func TestProcessorSuccessWithSingleQueue(t *testing.T) {
logger: testLogger,
broker: rdbClient,
retryDelayFunc: DefaultRetryDelayFunc,
isFailureFunc: defaultIsFailureFunc,
syncCh: syncCh,
cancelations: base.NewCancelations(),
concurrency: 10,
@ -190,6 +191,7 @@ func TestProcessorSuccessWithMultipleQueues(t *testing.T) {
logger: testLogger,
broker: rdbClient,
retryDelayFunc: DefaultRetryDelayFunc,
isFailureFunc: defaultIsFailureFunc,
syncCh: syncCh,
cancelations: base.NewCancelations(),
concurrency: 10,
@ -276,6 +278,7 @@ func TestProcessTasksWithLargeNumberInPayload(t *testing.T) {
logger: testLogger,
broker: rdbClient,
retryDelayFunc: DefaultRetryDelayFunc,
isFailureFunc: defaultIsFailureFunc,
syncCh: syncCh,
cancelations: base.NewCancelations(),
concurrency: 10,
@ -395,6 +398,7 @@ func TestProcessorRetry(t *testing.T) {
logger: testLogger,
broker: rdbClient,
retryDelayFunc: delayFunc,
isFailureFunc: defaultIsFailureFunc,
syncCh: nil,
cancelations: base.NewCancelations(),
concurrency: 10,
@ -486,6 +490,7 @@ func TestProcessorQueues(t *testing.T) {
logger: testLogger,
broker: nil,
retryDelayFunc: DefaultRetryDelayFunc,
isFailureFunc: defaultIsFailureFunc,
syncCh: nil,
cancelations: base.NewCancelations(),
concurrency: 10,
@ -577,6 +582,7 @@ func TestProcessorWithStrictPriority(t *testing.T) {
logger: testLogger,
broker: rdbClient,
retryDelayFunc: DefaultRetryDelayFunc,
isFailureFunc: defaultIsFailureFunc,
syncCh: syncCh,
cancelations: base.NewCancelations(),
concurrency: 1, // Set concurrency to 1 to make sure tasks are processed one at a time.

View File

@ -5,7 +5,7 @@
package asynq
import (
"fmt"
"context"
"sync"
"time"
@ -17,6 +17,7 @@ type recoverer struct {
logger *log.Logger
broker base.Broker
retryDelayFunc RetryDelayFunc
isFailureFunc func(error) bool
// channel to communicate back to the long running "recoverer" goroutine.
done chan struct{}
@ -34,6 +35,7 @@ type recovererParams struct {
queues []string
interval time.Duration
retryDelayFunc RetryDelayFunc
isFailureFunc func(error) bool
}
func newRecoverer(params recovererParams) *recoverer {
@ -44,6 +46,7 @@ func newRecoverer(params recovererParams) *recoverer {
queues: params.queues,
interval: params.interval,
retryDelayFunc: params.retryDelayFunc,
isFailureFunc: params.isFailureFunc,
}
}
@ -81,26 +84,25 @@ func (r *recoverer) recover() {
r.logger.Warn("recoverer: could not list deadline exceeded tasks")
return
}
const errMsg = "deadline exceeded"
for _, msg := range msgs {
if msg.Retried >= msg.Retry {
r.archive(msg, errMsg)
r.archive(msg, context.DeadlineExceeded)
} else {
r.retry(msg, errMsg)
r.retry(msg, context.DeadlineExceeded)
}
}
}
func (r *recoverer) retry(msg *base.TaskMessage, errMsg string) {
delay := r.retryDelayFunc(msg.Retried, fmt.Errorf(errMsg), NewTask(msg.Type, msg.Payload))
func (r *recoverer) retry(msg *base.TaskMessage, err error) {
delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload))
retryAt := time.Now().Add(delay)
if err := r.broker.Retry(msg, retryAt, errMsg); err != nil {
if err := r.broker.Retry(msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil {
r.logger.Warnf("recoverer: could not retry deadline exceeded task: %v", err)
}
}
func (r *recoverer) archive(msg *base.TaskMessage, errMsg string) {
if err := r.broker.Archive(msg, errMsg); err != nil {
func (r *recoverer) archive(msg *base.TaskMessage, err error) {
if err := r.broker.Archive(msg, err.Error()); err != nil {
r.logger.Warnf("recoverer: could not move task to archive: %v", err)
}
}

View File

@ -234,6 +234,7 @@ func TestRecoverer(t *testing.T) {
queues: []string{"default", "critical"},
interval: 1 * time.Second,
retryDelayFunc: func(n int, err error, task *Task) time.Duration { return 30 * time.Second },
isFailureFunc: defaultIsFailureFunc,
})
var wg sync.WaitGroup
@ -259,7 +260,7 @@ func TestRecoverer(t *testing.T) {
gotRetry := h.GetRetryMessages(t, r, qname)
var wantRetry []*base.TaskMessage // Note: construct message here since `LastFailedAt` is relative to each test run
for _, msg := range msgs {
wantRetry = append(wantRetry, h.TaskMessageAfterRetry(*msg, "deadline exceeded", runTime))
wantRetry = append(wantRetry, h.TaskMessageAfterRetry(*msg, "context deadline exceeded", runTime))
}
if diff := cmp.Diff(wantRetry, gotRetry, h.SortMsgOpt, cmpOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.RetryKey(qname), diff)
@ -269,7 +270,7 @@ func TestRecoverer(t *testing.T) {
gotArchived := h.GetArchivedMessages(t, r, qname)
var wantArchived []*base.TaskMessage
for _, msg := range msgs {
wantArchived = append(wantArchived, h.TaskMessageWithError(*msg, "deadline exceeded", runTime))
wantArchived = append(wantArchived, h.TaskMessageWithError(*msg, "context deadline exceeded", runTime))
}
if diff := cmp.Diff(wantArchived, gotArchived, h.SortMsgOpt, cmpOpt); diff != "" {
t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.ArchivedKey(qname), diff)

View File

@ -64,6 +64,14 @@ type Config struct {
// By default, it uses exponential backoff algorithm to calculate the delay.
RetryDelayFunc RetryDelayFunc
// Predicate function to determine whether the error returned from Handler is a failure.
// If the function returns false, Server will not increment the retried counter for the task,
// and Server won't record the queue stats (processed and failed stats) to avoid skewing the error
// rate of the queue.
//
// By default, if the given error is non-nil the function returns true.
IsFailure func(error) bool
// List of queues to process with given priority value. Keys are the names of the
// queues and values are associated priority value.
//
@ -268,6 +276,8 @@ func DefaultRetryDelayFunc(n int, e error, t *Task) time.Duration {
return time.Duration(s) * time.Second
}
func defaultIsFailureFunc(err error) bool { return err != nil }
var defaultQueueConfig = map[string]int{
base.DefaultQueueName: 1,
}
@ -293,6 +303,10 @@ func NewServer(r RedisConnOpt, cfg Config) *Server {
if delayFunc == nil {
delayFunc = DefaultRetryDelayFunc
}
isFailureFunc := cfg.IsFailure
if isFailureFunc == nil {
isFailureFunc = defaultIsFailureFunc
}
queues := make(map[string]int)
for qname, p := range cfg.Queues {
if err := base.ValidateQueueName(qname); err != nil {
@ -362,6 +376,7 @@ func NewServer(r RedisConnOpt, cfg Config) *Server {
logger: logger,
broker: rdb,
retryDelayFunc: delayFunc,
isFailureFunc: isFailureFunc,
syncCh: syncCh,
cancelations: cancels,
concurrency: n,
@ -376,6 +391,7 @@ func NewServer(r RedisConnOpt, cfg Config) *Server {
logger: logger,
broker: rdb,
retryDelayFunc: delayFunc,
isFailureFunc: isFailureFunc,
queues: qnames,
interval: 1 * time.Minute,
})