diff --git a/CHANGELOG.md b/CHANGELOG.md index 97b3de4..13f7bcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,34 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.18.5] - 2020-09-01 + +### Added + +- `IsFailure` config option is added to determine whether error returned from Handler counts as a failure. + +## [0.18.4] - 2020-08-17 + +### Fixed + +- Scheduler methods are now thread-safe. It's now safe to call `Register` and `Unregister` concurrently. + +## [0.18.3] - 2020-08-09 + +### Changed + +- `Client.Enqueue` no longer enqueues tasks with empty typename; Error message is returned. + ## [0.18.2] - 2020-07-15 ### Changed - Changed `Queue` function to not to convert the provided queue name to lowercase. Queue names are now case-sensitive. +- `QueueInfo.MemoryUsage` is now an approximate usage value. + +### Fixed + +- Fixed latency issue around memory usage (see https://github.com/hibiken/asynq/issues/309). ## [0.18.1] - 2020-07-04 diff --git a/README.md b/README.md index afcf4f0..e1140e9 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ Task queues are used as a mechanism to distribute work across multiple machines. - Scheduling of tasks - [Retries](https://github.com/hibiken/asynq/wiki/Task-Retry) of failed tasks - Automatic recovery of tasks in the event of a worker crash -- [Weighted priority queues](https://github.com/hibiken/asynq/wiki/Priority-Queues#weighted-priority-queues) -- [Strict priority queues](https://github.com/hibiken/asynq/wiki/Priority-Queues#strict-priority-queues) +- [Weighted priority queues](https://github.com/hibiken/asynq/wiki/Queue-Priority#weighted-priority) +- [Strict priority queues](https://github.com/hibiken/asynq/wiki/Queue-Priority#strict-priority) - Low latency to add a task since writes are fast in Redis - De-duplication of tasks using [unique option](https://github.com/hibiken/asynq/wiki/Unique-Tasks) - Allow [timeout and deadline per task](https://github.com/hibiken/asynq/wiki/Task-Timeout-and-Cancelation) @@ -91,7 +91,7 @@ type ImageResizePayload struct { //---------------------------------------------- func NewEmailDeliveryTask(userID int, tmplID string) (*asynq.Task, error) { - payload, err := json.Marshal(EmailDeliveryPayload{UserID: userID, TemplateID: templID}) + payload, err := json.Marshal(EmailDeliveryPayload{UserID: userID, TemplateID: tmplID}) if err != nil { return nil, err } @@ -129,7 +129,7 @@ type ImageProcessor struct { // ... fields for struct } -func (p *ImageProcessor) ProcessTask(ctx context.Context, t *asynq.Task) error { +func (processor *ImageProcessor) ProcessTask(ctx context.Context, t *asynq.Task) error { var p ImageResizePayload if err := json.Unmarshal(t.Payload(), &p); err != nil { return fmt.Errorf("json.Unmarshal failed: %v: %w", err, asynq.SkipRetry) @@ -140,7 +140,7 @@ func (p *ImageProcessor) ProcessTask(ctx context.Context, t *asynq.Task) error { } func NewImageProcessor() *ImageProcessor { - // ... return an instance + return &ImageProcessor{} } ``` @@ -215,7 +215,7 @@ func main() { info, err = client.Enqueue(task, asynq.Queue("critical"), asynq.Timeout(30*time.Second)) if err != nil { - log.Fatal("could not enqueue task: %v", err) + log.Fatalf("could not enqueue task: %v", err) } log.Printf("enqueued task: id=%s queue=%s", info.ID, info.Queue) } @@ -239,7 +239,7 @@ const redisAddr = "127.0.0.1:6379" func main() { srv := asynq.NewServer( - asynq.RedisClientOpt{Addr: redisAddr} + asynq.RedisClientOpt{Addr: redisAddr}, asynq.Config{ // Specify how many concurrent workers to use Concurrency: 10, diff --git a/client.go b/client.go index 2865210..f7c9e79 100644 --- a/client.go +++ b/client.go @@ -6,6 +6,7 @@ package asynq import ( "fmt" + "strings" "sync" "time" @@ -266,6 +267,9 @@ func (c *Client) Close() error { // // If no ProcessAt or ProcessIn options are provided, the task will be pending immediately. func (c *Client) Enqueue(task *Task, opts ...Option) (*TaskInfo, error) { + if strings.TrimSpace(task.Type()) == "" { + return nil, fmt.Errorf("task typename cannot be empty") + } c.mu.Lock() if defaults, ok := c.opts[task.Type()]; ok { opts = append(defaults, opts...) diff --git a/client_test.go b/client_test.go index b15fd81..0ef3211 100644 --- a/client_test.go +++ b/client_test.go @@ -586,6 +586,16 @@ func TestClientEnqueueError(t *testing.T) { Queue(""), }, }, + { + desc: "With empty task typename", + task: NewTask("", h.JSON(map[string]interface{}{})), + opts: []Option{}, + }, + { + desc: "With blank task typename", + task: NewTask(" ", h.JSON(map[string]interface{}{})), + opts: []Option{}, + }, } for _, tc := range tests { diff --git a/inspector.go b/inspector.go index 5dec648..70826bc 100644 --- a/inspector.go +++ b/inspector.go @@ -50,6 +50,7 @@ type QueueInfo struct { Queue string // Total number of bytes that the queue and its tasks require to be stored in redis. + // It is an approximate memory usage value in bytes since the value is computed by sampling. MemoryUsage int64 // Size is the total number of tasks in the queue. diff --git a/internal/base/base.go b/internal/base/base.go index ca0d323..5e2c916 100644 --- a/internal/base/base.go +++ b/internal/base/base.go @@ -23,7 +23,7 @@ import ( ) // Version of asynq library and CLI. -const Version = "0.18.2" +const Version = "0.18.5" // DefaultQueueName is the queue name used if none are specified by user. const DefaultQueueName = "default" @@ -645,7 +645,7 @@ type Broker interface { Requeue(msg *TaskMessage) error Schedule(msg *TaskMessage, processAt time.Time) error ScheduleUnique(msg *TaskMessage, processAt time.Time, ttl time.Duration) error - Retry(msg *TaskMessage, processAt time.Time, errMsg string) error + Retry(msg *TaskMessage, processAt time.Time, errMsg string, isFailure bool) error Archive(msg *TaskMessage, errMsg string) error ForwardIfReady(qnames ...string) error ListDeadlineExceeded(deadline time.Time, qnames ...string) ([]*TaskMessage, error) diff --git a/internal/rdb/benchmark_test.go b/internal/rdb/benchmark_test.go index 576b111..8880b3b 100644 --- a/internal/rdb/benchmark_test.go +++ b/internal/rdb/benchmark_test.go @@ -184,7 +184,7 @@ func BenchmarkRetry(b *testing.B) { asynqtest.SeedDeadlines(b, r.client, zs, base.DefaultQueueName) b.StartTimer() - if err := r.Retry(msgs[0], time.Now().Add(1*time.Minute), "error"); err != nil { + if err := r.Retry(msgs[0], time.Now().Add(1*time.Minute), "error", true /*isFailure*/); err != nil { b.Fatalf("Retry failed: %v", err) } } diff --git a/internal/rdb/inspect.go b/internal/rdb/inspect.go index 78bd6e9..b53a155 100644 --- a/internal/rdb/inspect.go +++ b/internal/rdb/inspect.go @@ -27,7 +27,8 @@ type Stats struct { // Name of the queue (e.g. "default", "critical"). Queue string // MemoryUsage is the total number of bytes the queue and its tasks require - // to be stored in redis. + // to be stored in redis. It is an approximate memory usage value in bytes + // since the value is computed by sampling. MemoryUsage int64 // Paused indicates whether the queue is paused. // If true, tasks in the queue should not be processed. @@ -173,31 +174,82 @@ func (r *RDB) CurrentStats(qname string) (*Stats, error) { return stats, nil } +// Computes memory usage for the given queue by sampling tasks +// from each redis list/zset. Returns approximate memory usage value +// in bytes. +// +// KEYS[1] -> asynq:{qname}:active +// KEYS[2] -> asynq:{qname}:pending +// KEYS[3] -> asynq:{qname}:scheduled +// KEYS[4] -> asynq:{qname}:retry +// KEYS[5] -> asynq:{qname}:archived +// +// ARGV[1] -> asynq:{qname}:t: +// ARGV[2] -> sample_size (e.g 20) +var memoryUsageCmd = redis.NewScript(` +local sample_size = tonumber(ARGV[2]) +if sample_size <= 0 then + return redis.error_reply("sample size must be a positive number") +end +local memusg = 0 +for i=1,2 do + local ids = redis.call("LRANGE", KEYS[i], 0, sample_size - 1) + local sample_total = 0 + if (table.getn(ids) > 0) then + for _, id in ipairs(ids) do + local bytes = redis.call("MEMORY", "USAGE", ARGV[1] .. id) + sample_total = sample_total + bytes + end + local n = redis.call("LLEN", KEYS[i]) + local avg = sample_total / table.getn(ids) + memusg = memusg + (avg * n) + end + local m = redis.call("MEMORY", "USAGE", KEYS[i]) + if (m) then + memusg = memusg + m + end +end +for i=3,5 do + local ids = redis.call("ZRANGE", KEYS[i], 0, sample_size - 1) + local sample_total = 0 + if (table.getn(ids) > 0) then + for _, id in ipairs(ids) do + local bytes = redis.call("MEMORY", "USAGE", ARGV[1] .. id) + sample_total = sample_total + bytes + end + local n = redis.call("ZCARD", KEYS[i]) + local avg = sample_total / table.getn(ids) + memusg = memusg + (avg * n) + end + local m = redis.call("MEMORY", "USAGE", KEYS[i]) + if (m) then + memusg = memusg + m + end +end +return memusg +`) + func (r *RDB) memoryUsage(qname string) (int64, error) { var op errors.Op = "rdb.memoryUsage" - var ( - keys []string - data []string - cursor uint64 - err error - ) - for { - data, cursor, err = r.client.Scan(context.Background(), cursor, fmt.Sprintf("asynq:{%s}*", qname), 100).Result() - if err != nil { - return 0, errors.E(op, errors.Unknown, &errors.RedisCommandError{Command: "scan", Err: err}) - } - keys = append(keys, data...) - if cursor == 0 { - break - } + const sampleSize = 20 + keys := []string{ + base.ActiveKey(qname), + base.PendingKey(qname), + base.ScheduledKey(qname), + base.RetryKey(qname), + base.ArchivedKey(qname), } - var usg int64 - for _, k := range keys { - n, err := r.client.MemoryUsage(context.Background(), k).Result() - if err != nil { - return 0, errors.E(op, errors.Unknown, &errors.RedisCommandError{Command: "memory usage", Err: err}) - } - usg += n + argv := []interface{}{ + base.TaskKeyPrefix(qname), + sampleSize, + } + res, err := memoryUsageCmd.Run(context.Background(), r.client, keys, argv...).Result() + if err != nil { + return 0, errors.E(op, errors.Unknown, fmt.Sprintf("redis eval error: %v", err)) + } + usg, err := cast.ToInt64E(res) + if err != nil { + return 0, errors.E(op, errors.Internal, fmt.Sprintf("could not cast script return value to int64")) } return usg, nil } diff --git a/internal/rdb/rdb.go b/internal/rdb/rdb.go index 1b8410f..9dd3513 100644 --- a/internal/rdb/rdb.go +++ b/internal/rdb/rdb.go @@ -468,6 +468,7 @@ func (r *RDB) ScheduleUnique(msg *base.TaskMessage, processAt time.Time, ttl tim // ARGV[2] -> updated base.TaskMessage value // ARGV[3] -> retry_at UNIX timestamp // ARGV[4] -> stats expiration timestamp +// ARGV[5] -> is_failure (bool) var retryCmd = redis.NewScript(` if redis.call("LREM", KEYS[2], 0, ARGV[1]) == 0 then return redis.error_reply("NOT FOUND") @@ -477,23 +478,28 @@ if redis.call("ZREM", KEYS[3], ARGV[1]) == 0 then end redis.call("ZADD", KEYS[4], ARGV[3], ARGV[1]) redis.call("HSET", KEYS[1], "msg", ARGV[2], "state", "retry") -local n = redis.call("INCR", KEYS[5]) -if tonumber(n) == 1 then - redis.call("EXPIREAT", KEYS[5], ARGV[4]) -end -local m = redis.call("INCR", KEYS[6]) -if tonumber(m) == 1 then - redis.call("EXPIREAT", KEYS[6], ARGV[4]) +if tonumber(ARGV[5]) == 1 then + local n = redis.call("INCR", KEYS[5]) + if tonumber(n) == 1 then + redis.call("EXPIREAT", KEYS[5], ARGV[4]) + end + local m = redis.call("INCR", KEYS[6]) + if tonumber(m) == 1 then + redis.call("EXPIREAT", KEYS[6], ARGV[4]) + end end return redis.status_reply("OK")`) -// Retry moves the task from active to retry queue, incrementing retry count -// and assigning error message to the task message. -func (r *RDB) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string) error { +// Retry moves the task from active to retry queue. +// It also annotates the message with the given error message and +// if isFailure is true increments the retried counter. +func (r *RDB) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error { var op errors.Op = "rdb.Retry" now := time.Now() modified := *msg - modified.Retried++ + if isFailure { + modified.Retried++ + } modified.ErrorMsg = errMsg modified.LastFailedAt = now.Unix() encoded, err := base.EncodeMessage(&modified) @@ -514,6 +520,7 @@ func (r *RDB) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string) e encoded, processAt.Unix(), expireAt.Unix(), + isFailure, } return r.runScript(op, retryCmd, keys, argv...) } diff --git a/internal/rdb/rdb_test.go b/internal/rdb/rdb_test.go index 907aeff..c566722 100644 --- a/internal/rdb/rdb_test.go +++ b/internal/rdb/rdb_test.go @@ -1159,7 +1159,7 @@ func TestRetry(t *testing.T) { h.SeedAllRetryQueues(t, r.client, tc.retry) callTime := time.Now() // time when method was called - err := r.Retry(tc.msg, tc.processAt, tc.errMsg) + err := r.Retry(tc.msg, tc.processAt, tc.errMsg, true /*isFailure*/) if err != nil { t.Errorf("(*RDB).Retry = %v, want nil", err) continue @@ -1211,6 +1211,173 @@ func TestRetry(t *testing.T) { } } +func TestRetryWithNonFailureError(t *testing.T) { + r := setup(t) + defer r.Close() + now := time.Now() + t1 := &base.TaskMessage{ + ID: uuid.New(), + Type: "send_email", + Payload: h.JSON(map[string]interface{}{"subject": "Hola!"}), + Retried: 10, + Timeout: 1800, + Queue: "default", + } + t2 := &base.TaskMessage{ + ID: uuid.New(), + Type: "gen_thumbnail", + Payload: h.JSON(map[string]interface{}{"path": "some/path/to/image.jpg"}), + Timeout: 3000, + Queue: "default", + } + t3 := &base.TaskMessage{ + ID: uuid.New(), + Type: "reindex", + Payload: nil, + Timeout: 60, + Queue: "default", + } + t4 := &base.TaskMessage{ + ID: uuid.New(), + Type: "send_notification", + Payload: nil, + Timeout: 1800, + Queue: "custom", + } + t1Deadline := now.Unix() + t1.Timeout + t2Deadline := now.Unix() + t2.Timeout + t4Deadline := now.Unix() + t4.Timeout + errMsg := "SMTP server is not responding" + + tests := []struct { + active map[string][]*base.TaskMessage + deadlines map[string][]base.Z + retry map[string][]base.Z + msg *base.TaskMessage + processAt time.Time + errMsg string + wantActive map[string][]*base.TaskMessage + wantDeadlines map[string][]base.Z + getWantRetry func(failedAt time.Time) map[string][]base.Z + }{ + { + active: map[string][]*base.TaskMessage{ + "default": {t1, t2}, + }, + deadlines: map[string][]base.Z{ + "default": {{Message: t1, Score: t1Deadline}, {Message: t2, Score: t2Deadline}}, + }, + retry: map[string][]base.Z{ + "default": {{Message: t3, Score: now.Add(time.Minute).Unix()}}, + }, + msg: t1, + processAt: now.Add(5 * time.Minute), + errMsg: errMsg, + wantActive: map[string][]*base.TaskMessage{ + "default": {t2}, + }, + wantDeadlines: map[string][]base.Z{ + "default": {{Message: t2, Score: t2Deadline}}, + }, + getWantRetry: func(failedAt time.Time) map[string][]base.Z { + return map[string][]base.Z{ + "default": { + // Task message should include the error message but without incrementing the retry count. + {Message: h.TaskMessageWithError(*t1, errMsg, failedAt), Score: now.Add(5 * time.Minute).Unix()}, + {Message: t3, Score: now.Add(time.Minute).Unix()}, + }, + } + }, + }, + { + active: map[string][]*base.TaskMessage{ + "default": {t1, t2}, + "custom": {t4}, + }, + deadlines: map[string][]base.Z{ + "default": {{Message: t1, Score: t1Deadline}, {Message: t2, Score: t2Deadline}}, + "custom": {{Message: t4, Score: t4Deadline}}, + }, + retry: map[string][]base.Z{ + "default": {}, + "custom": {}, + }, + msg: t4, + processAt: now.Add(5 * time.Minute), + errMsg: errMsg, + wantActive: map[string][]*base.TaskMessage{ + "default": {t1, t2}, + "custom": {}, + }, + wantDeadlines: map[string][]base.Z{ + "default": {{Message: t1, Score: t1Deadline}, {Message: t2, Score: t2Deadline}}, + "custom": {}, + }, + getWantRetry: func(failedAt time.Time) map[string][]base.Z { + return map[string][]base.Z{ + "default": {}, + "custom": { + // Task message should include the error message but without incrementing the retry count. + {Message: h.TaskMessageWithError(*t4, errMsg, failedAt), Score: now.Add(5 * time.Minute).Unix()}, + }, + } + }, + }, + } + + for _, tc := range tests { + h.FlushDB(t, r.client) + h.SeedAllActiveQueues(t, r.client, tc.active) + h.SeedAllDeadlines(t, r.client, tc.deadlines) + h.SeedAllRetryQueues(t, r.client, tc.retry) + + callTime := time.Now() // time when method was called + err := r.Retry(tc.msg, tc.processAt, tc.errMsg, false /*isFailure*/) + if err != nil { + t.Errorf("(*RDB).Retry = %v, want nil", err) + continue + } + + for queue, want := range tc.wantActive { + gotActive := h.GetActiveMessages(t, r.client, queue) + if diff := cmp.Diff(want, gotActive, h.SortMsgOpt); diff != "" { + t.Errorf("mismatch found in %q; (-want, +got)\n%s", base.ActiveKey(queue), diff) + } + } + for queue, want := range tc.wantDeadlines { + gotDeadlines := h.GetDeadlinesEntries(t, r.client, queue) + if diff := cmp.Diff(want, gotDeadlines, h.SortZSetEntryOpt); diff != "" { + t.Errorf("mismatch found in %q; (-want, +got)\n%s", base.DeadlinesKey(queue), diff) + } + } + cmpOpts := []cmp.Option{ + h.SortZSetEntryOpt, + cmpopts.EquateApproxTime(5 * time.Second), // for LastFailedAt field + } + wantRetry := tc.getWantRetry(callTime) + for queue, want := range wantRetry { + gotRetry := h.GetRetryEntries(t, r.client, queue) + if diff := cmp.Diff(want, gotRetry, cmpOpts...); diff != "" { + t.Errorf("mismatch found in %q; (-want, +got)\n%s", base.RetryKey(queue), diff) + } + } + + // If isFailure is set to false, no stats should be recorded to avoid skewing the error rate. + processedKey := base.ProcessedKey(tc.msg.Queue, time.Now()) + gotProcessed := r.client.Get(context.Background(), processedKey).Val() + if gotProcessed != "" { + t.Errorf("GET %q = %q, want empty", processedKey, gotProcessed) + } + + // If isFailure is set to false, no stats should be recorded to avoid skewing the error rate. + failedKey := base.FailedKey(tc.msg.Queue, time.Now()) + gotFailed := r.client.Get(context.Background(), failedKey).Val() + if gotFailed != "" { + t.Errorf("GET %q = %q, want empty", failedKey, gotFailed) + } + } +} + func TestArchive(t *testing.T) { r := setup(t) defer r.Close() diff --git a/internal/testbroker/testbroker.go b/internal/testbroker/testbroker.go index abdfd44..735c08c 100644 --- a/internal/testbroker/testbroker.go +++ b/internal/testbroker/testbroker.go @@ -108,13 +108,13 @@ func (tb *TestBroker) ScheduleUnique(msg *base.TaskMessage, processAt time.Time, return tb.real.ScheduleUnique(msg, processAt, ttl) } -func (tb *TestBroker) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string) error { +func (tb *TestBroker) Retry(msg *base.TaskMessage, processAt time.Time, errMsg string, isFailure bool) error { tb.mu.Lock() defer tb.mu.Unlock() if tb.sleeping { return errRedisDown } - return tb.real.Retry(msg, processAt, errMsg) + return tb.real.Retry(msg, processAt, errMsg, isFailure) } func (tb *TestBroker) Archive(msg *base.TaskMessage, errMsg string) error { diff --git a/processor.go b/processor.go index c1857bf..5ede2d2 100644 --- a/processor.go +++ b/processor.go @@ -33,6 +33,7 @@ type processor struct { orderedQueues []string retryDelayFunc RetryDelayFunc + isFailureFunc func(error) bool errHandler ErrorHandler @@ -70,6 +71,7 @@ type processorParams struct { logger *log.Logger broker base.Broker retryDelayFunc RetryDelayFunc + isFailureFunc func(error) bool syncCh chan<- *syncRequest cancelations *base.Cancelations concurrency int @@ -94,6 +96,7 @@ func newProcessor(params processorParams) *processor { queueConfig: queues, orderedQueues: orderedQueues, retryDelayFunc: params.retryDelayFunc, + isFailureFunc: params.isFailureFunc, syncRequestCh: params.syncCh, cancelations: params.cancelations, errLogLimiter: rate.NewLimiter(rate.Every(3*time.Second), 1), @@ -197,7 +200,7 @@ func (p *processor) exec() { select { case <-ctx.Done(): // already canceled (e.g. deadline exceeded). - p.retryOrKill(ctx, msg, ctx.Err()) + p.retryOrArchive(ctx, msg, ctx.Err()) return default: } @@ -214,7 +217,7 @@ func (p *processor) exec() { p.requeue(msg) return case <-ctx.Done(): - p.retryOrKill(ctx, msg, ctx.Err()) + p.retryOrArchive(ctx, msg, ctx.Err()) return case resErr := <-resCh: // Note: One of three things should happen. @@ -222,7 +225,7 @@ func (p *processor) exec() { // 2) Retry -> Removes the message from Active & Adds the message to Retry // 3) Archive -> Removes the message from Active & Adds the message to archive if resErr != nil { - p.retryOrKill(ctx, msg, resErr) + p.retryOrArchive(ctx, msg, resErr) return } p.markAsDone(ctx, msg) @@ -263,22 +266,27 @@ func (p *processor) markAsDone(ctx context.Context, msg *base.TaskMessage) { // the task should not be retried and should be archived instead. var SkipRetry = errors.New("skip retry for the task") -func (p *processor) retryOrKill(ctx context.Context, msg *base.TaskMessage, err error) { +func (p *processor) retryOrArchive(ctx context.Context, msg *base.TaskMessage, err error) { if p.errHandler != nil { p.errHandler.HandleError(ctx, NewTask(msg.Type, msg.Payload), err) } + if !p.isFailureFunc(err) { + // retry the task without marking it as failed + p.retry(ctx, msg, err, false /*isFailure*/) + return + } if msg.Retried >= msg.Retry || errors.Is(err, SkipRetry) { p.logger.Warnf("Retry exhausted for task id=%s", msg.ID) p.archive(ctx, msg, err) } else { - p.retry(ctx, msg, err) + p.retry(ctx, msg, err, true /*isFailure*/) } } -func (p *processor) retry(ctx context.Context, msg *base.TaskMessage, e error) { +func (p *processor) retry(ctx context.Context, msg *base.TaskMessage, e error, isFailure bool) { d := p.retryDelayFunc(msg.Retried, e, NewTask(msg.Type, msg.Payload)) retryAt := time.Now().Add(d) - err := p.broker.Retry(msg, retryAt, e.Error()) + err := p.broker.Retry(msg, retryAt, e.Error(), isFailure) if err != nil { errMsg := fmt.Sprintf("Could not move task id=%s from %q to %q", msg.ID, base.ActiveKey(msg.Queue), base.RetryKey(msg.Queue)) deadline, ok := ctx.Deadline() @@ -288,7 +296,7 @@ func (p *processor) retry(ctx context.Context, msg *base.TaskMessage, e error) { p.logger.Warnf("%s; Will retry syncing", errMsg) p.syncRequestCh <- &syncRequest{ fn: func() error { - return p.broker.Retry(msg, retryAt, e.Error()) + return p.broker.Retry(msg, retryAt, e.Error(), isFailure) }, errMsg: errMsg, deadline: deadline, diff --git a/processor_test.go b/processor_test.go index 32f7820..3708ecf 100644 --- a/processor_test.go +++ b/processor_test.go @@ -98,6 +98,7 @@ func TestProcessorSuccessWithSingleQueue(t *testing.T) { logger: testLogger, broker: rdbClient, retryDelayFunc: DefaultRetryDelayFunc, + isFailureFunc: defaultIsFailureFunc, syncCh: syncCh, cancelations: base.NewCancelations(), concurrency: 10, @@ -190,6 +191,7 @@ func TestProcessorSuccessWithMultipleQueues(t *testing.T) { logger: testLogger, broker: rdbClient, retryDelayFunc: DefaultRetryDelayFunc, + isFailureFunc: defaultIsFailureFunc, syncCh: syncCh, cancelations: base.NewCancelations(), concurrency: 10, @@ -276,6 +278,7 @@ func TestProcessTasksWithLargeNumberInPayload(t *testing.T) { logger: testLogger, broker: rdbClient, retryDelayFunc: DefaultRetryDelayFunc, + isFailureFunc: defaultIsFailureFunc, syncCh: syncCh, cancelations: base.NewCancelations(), concurrency: 10, @@ -395,6 +398,7 @@ func TestProcessorRetry(t *testing.T) { logger: testLogger, broker: rdbClient, retryDelayFunc: delayFunc, + isFailureFunc: defaultIsFailureFunc, syncCh: nil, cancelations: base.NewCancelations(), concurrency: 10, @@ -486,6 +490,7 @@ func TestProcessorQueues(t *testing.T) { logger: testLogger, broker: nil, retryDelayFunc: DefaultRetryDelayFunc, + isFailureFunc: defaultIsFailureFunc, syncCh: nil, cancelations: base.NewCancelations(), concurrency: 10, @@ -577,6 +582,7 @@ func TestProcessorWithStrictPriority(t *testing.T) { logger: testLogger, broker: rdbClient, retryDelayFunc: DefaultRetryDelayFunc, + isFailureFunc: defaultIsFailureFunc, syncCh: syncCh, cancelations: base.NewCancelations(), concurrency: 1, // Set concurrency to 1 to make sure tasks are processed one at a time. diff --git a/recoverer.go b/recoverer.go index 7aae69c..4165e43 100644 --- a/recoverer.go +++ b/recoverer.go @@ -5,7 +5,7 @@ package asynq import ( - "fmt" + "context" "sync" "time" @@ -17,6 +17,7 @@ type recoverer struct { logger *log.Logger broker base.Broker retryDelayFunc RetryDelayFunc + isFailureFunc func(error) bool // channel to communicate back to the long running "recoverer" goroutine. done chan struct{} @@ -34,6 +35,7 @@ type recovererParams struct { queues []string interval time.Duration retryDelayFunc RetryDelayFunc + isFailureFunc func(error) bool } func newRecoverer(params recovererParams) *recoverer { @@ -44,6 +46,7 @@ func newRecoverer(params recovererParams) *recoverer { queues: params.queues, interval: params.interval, retryDelayFunc: params.retryDelayFunc, + isFailureFunc: params.isFailureFunc, } } @@ -81,26 +84,25 @@ func (r *recoverer) recover() { r.logger.Warn("recoverer: could not list deadline exceeded tasks") return } - const errMsg = "deadline exceeded" for _, msg := range msgs { if msg.Retried >= msg.Retry { - r.archive(msg, errMsg) + r.archive(msg, context.DeadlineExceeded) } else { - r.retry(msg, errMsg) + r.retry(msg, context.DeadlineExceeded) } } } -func (r *recoverer) retry(msg *base.TaskMessage, errMsg string) { - delay := r.retryDelayFunc(msg.Retried, fmt.Errorf(errMsg), NewTask(msg.Type, msg.Payload)) +func (r *recoverer) retry(msg *base.TaskMessage, err error) { + delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload)) retryAt := time.Now().Add(delay) - if err := r.broker.Retry(msg, retryAt, errMsg); err != nil { + if err := r.broker.Retry(msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil { r.logger.Warnf("recoverer: could not retry deadline exceeded task: %v", err) } } -func (r *recoverer) archive(msg *base.TaskMessage, errMsg string) { - if err := r.broker.Archive(msg, errMsg); err != nil { +func (r *recoverer) archive(msg *base.TaskMessage, err error) { + if err := r.broker.Archive(msg, err.Error()); err != nil { r.logger.Warnf("recoverer: could not move task to archive: %v", err) } } diff --git a/recoverer_test.go b/recoverer_test.go index 0a9fdb6..a62f7b7 100644 --- a/recoverer_test.go +++ b/recoverer_test.go @@ -234,6 +234,7 @@ func TestRecoverer(t *testing.T) { queues: []string{"default", "critical"}, interval: 1 * time.Second, retryDelayFunc: func(n int, err error, task *Task) time.Duration { return 30 * time.Second }, + isFailureFunc: defaultIsFailureFunc, }) var wg sync.WaitGroup @@ -259,7 +260,7 @@ func TestRecoverer(t *testing.T) { gotRetry := h.GetRetryMessages(t, r, qname) var wantRetry []*base.TaskMessage // Note: construct message here since `LastFailedAt` is relative to each test run for _, msg := range msgs { - wantRetry = append(wantRetry, h.TaskMessageAfterRetry(*msg, "deadline exceeded", runTime)) + wantRetry = append(wantRetry, h.TaskMessageAfterRetry(*msg, "context deadline exceeded", runTime)) } if diff := cmp.Diff(wantRetry, gotRetry, h.SortMsgOpt, cmpOpt); diff != "" { t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.RetryKey(qname), diff) @@ -269,7 +270,7 @@ func TestRecoverer(t *testing.T) { gotArchived := h.GetArchivedMessages(t, r, qname) var wantArchived []*base.TaskMessage for _, msg := range msgs { - wantArchived = append(wantArchived, h.TaskMessageWithError(*msg, "deadline exceeded", runTime)) + wantArchived = append(wantArchived, h.TaskMessageWithError(*msg, "context deadline exceeded", runTime)) } if diff := cmp.Diff(wantArchived, gotArchived, h.SortMsgOpt, cmpOpt); diff != "" { t.Errorf("%s; mismatch found in %q: (-want, +got)\n%s", tc.desc, base.ArchivedKey(qname), diff) diff --git a/scheduler.go b/scheduler.go index f49f87a..8e2d264 100644 --- a/scheduler.go +++ b/scheduler.go @@ -19,6 +19,8 @@ import ( ) // A Scheduler kicks off tasks at regular intervals based on the user defined schedule. +// +// Schedulers are safe for concurrent use by multiple goroutines. type Scheduler struct { id string state *base.ServerState @@ -30,6 +32,9 @@ type Scheduler struct { done chan struct{} wg sync.WaitGroup errHandler func(task *Task, opts []Option, err error) + + // guards idmap + mu sync.Mutex // idmap maps Scheduler's entry ID to cron.EntryID // to avoid using cron.EntryID as the public API of // the Scheduler. @@ -154,17 +159,22 @@ func (s *Scheduler) Register(cronspec string, task *Task, opts ...Option) (entry if err != nil { return "", err } + s.mu.Lock() s.idmap[job.id.String()] = cronID + s.mu.Unlock() return job.id.String(), nil } // Unregister removes a registered entry by entry ID. // Unregister returns a non-nil error if no entries were found for the given entryID. func (s *Scheduler) Unregister(entryID string) error { + s.mu.Lock() + defer s.mu.Unlock() cronID, ok := s.idmap[entryID] if !ok { return fmt.Errorf("asynq: no scheduler entry found") } + delete(s.idmap, entryID) s.cron.Remove(cronID) return nil } diff --git a/servemux.go b/servemux.go index 6dc670d..c11fbce 100644 --- a/servemux.go +++ b/servemux.go @@ -98,7 +98,7 @@ func (mux *ServeMux) Handle(pattern string, handler Handler) { mux.mu.Lock() defer mux.mu.Unlock() - if pattern == "" { + if strings.TrimSpace(pattern) == "" { panic("asynq: invalid pattern") } if handler == nil { diff --git a/server.go b/server.go index 33712ba..98f8b3f 100644 --- a/server.go +++ b/server.go @@ -64,6 +64,14 @@ type Config struct { // By default, it uses exponential backoff algorithm to calculate the delay. RetryDelayFunc RetryDelayFunc + // Predicate function to determine whether the error returned from Handler is a failure. + // If the function returns false, Server will not increment the retried counter for the task, + // and Server won't record the queue stats (processed and failed stats) to avoid skewing the error + // rate of the queue. + // + // By default, if the given error is non-nil the function returns true. + IsFailure func(error) bool + // List of queues to process with given priority value. Keys are the names of the // queues and values are associated priority value. // @@ -268,6 +276,8 @@ func DefaultRetryDelayFunc(n int, e error, t *Task) time.Duration { return time.Duration(s) * time.Second } +func defaultIsFailureFunc(err error) bool { return err != nil } + var defaultQueueConfig = map[string]int{ base.DefaultQueueName: 1, } @@ -293,6 +303,10 @@ func NewServer(r RedisConnOpt, cfg Config) *Server { if delayFunc == nil { delayFunc = DefaultRetryDelayFunc } + isFailureFunc := cfg.IsFailure + if isFailureFunc == nil { + isFailureFunc = defaultIsFailureFunc + } queues := make(map[string]int) for qname, p := range cfg.Queues { if err := base.ValidateQueueName(qname); err != nil { @@ -362,6 +376,7 @@ func NewServer(r RedisConnOpt, cfg Config) *Server { logger: logger, broker: rdb, retryDelayFunc: delayFunc, + isFailureFunc: isFailureFunc, syncCh: syncCh, cancelations: cancels, concurrency: n, @@ -376,6 +391,7 @@ func NewServer(r RedisConnOpt, cfg Config) *Server { logger: logger, broker: rdb, retryDelayFunc: delayFunc, + isFailureFunc: isFailureFunc, queues: qnames, interval: 1 * time.Minute, })