2
0
mirror of https://github.com/hibiken/asynq.git synced 2024-12-26 07:42:17 +08:00
asynq/processor.go

424 lines
11 KiB
Go
Raw Normal View History

2020-01-03 10:13:16 +08:00
// Copyright 2020 Kentaro Hibino. All rights reserved.
// Use of this source code is governed by a MIT license
// that can be found in the LICENSE file.
2019-11-19 22:48:54 +08:00
package asynq
import (
"context"
2019-11-19 22:48:54 +08:00
"fmt"
"math/rand"
2020-01-12 23:46:51 +08:00
"sort"
"sync"
2019-11-19 22:48:54 +08:00
"time"
2019-12-04 13:01:26 +08:00
2019-12-22 23:15:45 +08:00
"github.com/hibiken/asynq/internal/base"
2020-05-06 13:10:11 +08:00
"github.com/hibiken/asynq/internal/log"
2019-12-04 13:01:26 +08:00
"github.com/hibiken/asynq/internal/rdb"
2020-01-22 22:28:47 +08:00
"golang.org/x/time/rate"
2019-11-19 22:48:54 +08:00
)
2019-11-21 12:08:03 +08:00
type processor struct {
2020-05-06 13:10:11 +08:00
logger *log.Logger
2020-04-18 22:55:10 +08:00
broker base.Broker
2019-11-19 22:48:54 +08:00
2020-04-13 02:41:50 +08:00
ss *base.ServerState
2020-02-18 22:57:39 +08:00
handler Handler
2019-11-19 22:48:54 +08:00
queueConfig map[string]int
2020-01-06 23:15:59 +08:00
2020-01-12 23:46:51 +08:00
// orderedQueues is set only in strict-priority mode.
orderedQueues []string
retryDelayFunc retryDelayFunc
errHandler ErrorHandler
shutdownTimeout time.Duration
// channel via which to send sync requests to syncer.
syncRequestCh chan<- *syncRequest
2020-01-22 22:28:47 +08:00
// rate limiter to prevent spamming logs with a bunch of errors.
errLogLimiter *rate.Limiter
2019-11-19 22:48:54 +08:00
// sema is a counting semaphore to ensure the number of active workers
2019-11-30 04:48:54 +08:00
// does not exceed the limit.
2019-11-19 22:48:54 +08:00
sema chan struct{}
2019-11-21 12:08:03 +08:00
// channel to communicate back to the long running "processor" goroutine.
2019-12-18 12:34:56 +08:00
// once is used to send value to the channel only once.
2019-11-19 22:48:54 +08:00
done chan struct{}
2019-12-18 12:34:56 +08:00
once sync.Once
2019-12-19 10:57:48 +08:00
// abort channel is closed when the shutdown of the "processor" goroutine starts.
abort chan struct{}
// quit channel communicates to the in-flight worker goroutines to stop.
quit chan struct{}
// cancelations is a set of cancel functions for all in-progress tasks.
cancelations *base.Cancelations
2019-11-19 22:48:54 +08:00
}
type retryDelayFunc func(n int, err error, task *Task) time.Duration
type newProcessorParams struct {
2020-05-06 13:10:11 +08:00
logger *log.Logger
2020-04-18 22:55:10 +08:00
broker base.Broker
ss *base.ServerState
retryDelayFunc retryDelayFunc
syncCh chan<- *syncRequest
cancelations *base.Cancelations
errHandler ErrorHandler
shutdownTimeout time.Duration
}
2020-01-12 23:46:51 +08:00
// newProcessor constructs a new processor.
func newProcessor(params newProcessorParams) *processor {
info := params.ss.GetInfo()
2020-02-18 22:57:39 +08:00
qcfg := normalizeQueueCfg(info.Queues)
2020-01-12 23:46:51 +08:00
orderedQueues := []string(nil)
2020-02-18 22:57:39 +08:00
if info.StrictPriority {
2020-01-12 23:46:51 +08:00
orderedQueues = sortByPriority(qcfg)
}
2019-11-21 12:08:03 +08:00
return &processor{
logger: params.logger,
2020-04-17 21:56:44 +08:00
broker: params.broker,
ss: params.ss,
2020-01-06 23:15:59 +08:00
queueConfig: qcfg,
2020-01-12 23:46:51 +08:00
orderedQueues: orderedQueues,
retryDelayFunc: params.retryDelayFunc,
syncRequestCh: params.syncCh,
cancelations: params.cancelations,
2020-01-22 22:28:47 +08:00
errLogLimiter: rate.NewLimiter(rate.Every(3*time.Second), 1),
2020-02-18 22:57:39 +08:00
sema: make(chan struct{}, info.Concurrency),
2019-11-30 04:48:54 +08:00
done: make(chan struct{}),
2019-12-19 10:57:48 +08:00
abort: make(chan struct{}),
quit: make(chan struct{}),
errHandler: params.errHandler,
handler: HandlerFunc(func(ctx context.Context, t *Task) error { return fmt.Errorf("handler not set") }),
2019-11-19 22:48:54 +08:00
}
}
// Note: stops only the "processor" goroutine, does not stop workers.
// It's safe to call this method multiple times.
func (p *processor) stop() {
2019-12-18 12:34:56 +08:00
p.once.Do(func() {
2020-03-09 22:11:16 +08:00
p.logger.Info("Processor shutting down...")
2019-12-18 12:34:56 +08:00
// Unblock if processor is waiting for sema token.
2019-12-19 10:57:48 +08:00
close(p.abort)
2019-12-18 12:34:56 +08:00
// Signal the processor goroutine to stop processing tasks
// from the queue.
p.done <- struct{}{}
})
}
2019-11-30 04:48:54 +08:00
// NOTE: once terminated, processor cannot be re-started.
2019-11-21 12:08:03 +08:00
func (p *processor) terminate() {
p.stop()
time.AfterFunc(p.shutdownTimeout, func() { close(p.quit) })
2020-03-09 22:11:16 +08:00
p.logger.Info("Waiting for all workers to finish...")
// send cancellation signal to all in-progress task handlers
for _, cancel := range p.cancelations.GetAll() {
cancel()
}
2019-11-28 11:36:56 +08:00
// block until all workers have released the token
2019-11-21 12:08:03 +08:00
for i := 0; i < cap(p.sema); i++ {
p.sema <- struct{}{}
}
2020-03-09 22:11:16 +08:00
p.logger.Info("All workers have finished")
p.restore() // move any unfinished tasks back to the queue.
2019-11-19 22:48:54 +08:00
}
2020-02-16 15:14:30 +08:00
func (p *processor) start(wg *sync.WaitGroup) {
// NOTE: The call to "restore" needs to complete before starting
// the processor goroutine.
p.restore()
2020-02-16 15:14:30 +08:00
wg.Add(1)
2019-11-19 22:48:54 +08:00
go func() {
2020-02-16 15:14:30 +08:00
defer wg.Done()
2019-11-19 22:48:54 +08:00
for {
select {
2019-11-21 12:08:03 +08:00
case <-p.done:
2020-03-09 22:11:16 +08:00
p.logger.Info("Processor done")
return
2019-11-19 22:48:54 +08:00
default:
2019-11-21 12:08:03 +08:00
p.exec()
2019-11-19 22:48:54 +08:00
}
}
}()
}
2019-11-22 13:45:27 +08:00
// exec pulls a task out of the queue and starts a worker goroutine to
// process the task.
2019-11-21 12:08:03 +08:00
func (p *processor) exec() {
qnames := p.queues()
2020-04-17 21:56:44 +08:00
msg, err := p.broker.Dequeue(qnames...)
if err == rdb.ErrNoProcessableTask { // TODO: Need to decouple this error from rdb to support other brokers
// queues are empty, this is a normal behavior.
if len(p.queueConfig) > 1 {
// sleep to avoid slamming redis and let scheduler move tasks into queues.
// Note: With multiple queues, we are not using blocking pop operation and
// polling queues instead. This adds significant load to redis.
time.Sleep(time.Second)
}
2019-11-28 11:36:56 +08:00
return
}
2019-11-19 22:48:54 +08:00
if err != nil {
2020-01-22 22:28:47 +08:00
if p.errLogLimiter.Allow() {
2020-05-06 13:10:11 +08:00
p.logger.Errorf("Dequeue error: %v", err)
2020-01-22 22:28:47 +08:00
}
2019-11-28 11:36:56 +08:00
return
2019-11-19 22:48:54 +08:00
}
select {
2019-12-19 10:57:48 +08:00
case <-p.abort:
// shutdown is starting, return immediately after requeuing the message.
p.requeue(msg)
return
case p.sema <- struct{}{}: // acquire token
2020-04-13 02:41:50 +08:00
p.ss.AddWorkerStats(msg, time.Now())
go func() {
2020-02-02 14:22:48 +08:00
defer func() {
2020-04-13 02:41:50 +08:00
p.ss.DeleteWorkerStats(msg)
2020-02-02 14:22:48 +08:00
<-p.sema /* release token */
}()
2020-02-12 13:53:59 +08:00
ctx, cancel := createContext(msg)
p.cancelations.Add(msg.ID.String(), cancel)
defer func() {
cancel()
p.cancelations.Delete(msg.ID.String())
}()
resCh := make(chan error, 1)
task := NewTask(msg.Type, msg.Payload)
go func() { resCh <- perform(ctx, task, p.handler) }()
select {
case <-p.quit:
// time is up, quit this worker goroutine.
2020-05-06 13:10:11 +08:00
p.logger.Warnf("Quitting worker. task id=%s", msg.ID)
return
case resErr := <-resCh:
// Note: One of three things should happen.
// 1) Done -> Removes the message from InProgress
// 2) Retry -> Removes the message from InProgress & Adds the message to Retry
// 3) Kill -> Removes the message from InProgress & Adds the message to Dead
if resErr != nil {
if p.errHandler != nil {
p.errHandler.HandleError(task, resErr, msg.Retried, msg.Retry)
}
if msg.Retried >= msg.Retry {
p.kill(msg, resErr)
2019-12-26 12:04:29 +08:00
} else {
p.retry(msg, resErr)
}
return
}
p.markAsDone(msg)
2019-11-22 13:45:27 +08:00
}
}()
}
2019-11-19 22:48:54 +08:00
}
// restore moves all tasks from "in-progress" back to queue
// to restore all unfinished tasks.
func (p *processor) restore() {
2020-04-17 21:56:44 +08:00
n, err := p.broker.RequeueAll()
if err != nil {
2020-05-06 13:10:11 +08:00
p.logger.Errorf("Could not restore unfinished tasks: %v", err)
}
if n > 0 {
2020-05-06 13:10:11 +08:00
p.logger.Infof("Restored %d unfinished tasks back to queue", n)
}
}
2019-12-22 23:15:45 +08:00
func (p *processor) requeue(msg *base.TaskMessage) {
2020-04-17 21:56:44 +08:00
err := p.broker.Requeue(msg)
if err != nil {
2020-05-06 13:10:11 +08:00
p.logger.Errorf("Could not push task id=%s back to queue: %v", msg.ID, err)
}
}
2019-12-22 23:15:45 +08:00
func (p *processor) markAsDone(msg *base.TaskMessage) {
2020-04-17 21:56:44 +08:00
err := p.broker.Done(msg)
if err != nil {
2020-01-21 23:17:41 +08:00
errMsg := fmt.Sprintf("Could not remove task id=%s from %q", msg.ID, base.InProgressQueue)
2020-05-06 13:10:11 +08:00
p.logger.Warnf("%s; Will retry syncing", errMsg)
p.syncRequestCh <- &syncRequest{
fn: func() error {
2020-04-17 21:56:44 +08:00
return p.broker.Done(msg)
},
errMsg: errMsg,
}
}
}
func (p *processor) retry(msg *base.TaskMessage, e error) {
d := p.retryDelayFunc(msg.Retried, e, NewTask(msg.Type, msg.Payload))
retryAt := time.Now().Add(d)
2020-04-17 21:56:44 +08:00
err := p.broker.Retry(msg, retryAt, e.Error())
if err != nil {
2020-01-21 23:17:41 +08:00
errMsg := fmt.Sprintf("Could not move task id=%s from %q to %q", msg.ID, base.InProgressQueue, base.RetryQueue)
2020-05-06 13:10:11 +08:00
p.logger.Warnf("%s; Will retry syncing", errMsg)
p.syncRequestCh <- &syncRequest{
fn: func() error {
2020-04-17 21:56:44 +08:00
return p.broker.Retry(msg, retryAt, e.Error())
},
errMsg: errMsg,
}
}
}
func (p *processor) kill(msg *base.TaskMessage, e error) {
2020-05-06 13:10:11 +08:00
p.logger.Warnf("Retry exhausted for task id=%s", msg.ID)
2020-04-17 21:56:44 +08:00
err := p.broker.Kill(msg, e.Error())
if err != nil {
2020-01-21 23:17:41 +08:00
errMsg := fmt.Sprintf("Could not move task id=%s from %q to %q", msg.ID, base.InProgressQueue, base.DeadQueue)
2020-05-06 13:10:11 +08:00
p.logger.Warnf("%s; Will retry syncing", errMsg)
p.syncRequestCh <- &syncRequest{
fn: func() error {
2020-04-17 21:56:44 +08:00
return p.broker.Kill(msg, e.Error())
},
errMsg: errMsg,
}
}
}
2019-11-28 06:03:04 +08:00
2020-01-12 23:46:51 +08:00
// queues returns a list of queues to query.
// Order of the queue names is based on the priority of each queue.
// Queue names is sorted by their priority level if strict-priority is true.
// If strict-priority is false, then the order of queue names are roughly based on
// the priority level but randomized in order to avoid starving low priority queues.
func (p *processor) queues() []string {
// skip the overhead of generating a list of queue names
// if we are processing one queue.
if len(p.queueConfig) == 1 {
for qname := range p.queueConfig {
return []string{qname}
}
}
2020-01-12 23:46:51 +08:00
if p.orderedQueues != nil {
return p.orderedQueues
}
var names []string
for qname, priority := range p.queueConfig {
for i := 0; i < int(priority); i++ {
names = append(names, qname)
}
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
r.Shuffle(len(names), func(i, j int) { names[i], names[j] = names[j], names[i] })
return uniq(names, len(p.queueConfig))
}
2019-11-28 06:03:04 +08:00
// perform calls the handler with the given task.
// If the call returns without panic, it simply returns the value,
// otherwise, it recovers from panic and returns an error.
func perform(ctx context.Context, task *Task, h Handler) (err error) {
2019-11-28 06:03:04 +08:00
defer func() {
if x := recover(); x != nil {
err = fmt.Errorf("panic: %v", x)
}
}()
return h.ProcessTask(ctx, task)
2019-11-28 06:03:04 +08:00
}
// uniq dedupes elements and returns a slice of unique names of length l.
// Order of the output slice is based on the input list.
func uniq(names []string, l int) []string {
var res []string
seen := make(map[string]struct{})
for _, s := range names {
if _, ok := seen[s]; !ok {
seen[s] = struct{}{}
res = append(res, s)
}
if len(res) == l {
break
}
}
return res
}
2020-01-12 23:46:51 +08:00
2020-01-13 22:50:03 +08:00
// sortByPriority returns a list of queue names sorted by
2020-01-12 23:46:51 +08:00
// their priority level in descending order.
func sortByPriority(qcfg map[string]int) []string {
2020-01-12 23:46:51 +08:00
var queues []*queue
for qname, n := range qcfg {
queues = append(queues, &queue{qname, n})
}
sort.Sort(sort.Reverse(byPriority(queues)))
var res []string
for _, q := range queues {
res = append(res, q.name)
}
return res
}
type queue struct {
name string
priority int
2020-01-12 23:46:51 +08:00
}
type byPriority []*queue
func (x byPriority) Len() int { return len(x) }
func (x byPriority) Less(i, j int) bool { return x[i].priority < x[j].priority }
func (x byPriority) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
2020-02-02 14:22:48 +08:00
// normalizeQueueCfg divides priority numbers by their
// greatest common divisor.
func normalizeQueueCfg(queueCfg map[string]int) map[string]int {
var xs []int
2020-02-02 14:22:48 +08:00
for _, x := range queueCfg {
xs = append(xs, x)
}
d := gcd(xs...)
res := make(map[string]int)
2020-02-02 14:22:48 +08:00
for q, x := range queueCfg {
res[q] = x / d
}
return res
}
func gcd(xs ...int) int {
fn := func(x, y int) int {
2020-02-02 14:22:48 +08:00
for y > 0 {
x, y = y, x%y
}
return x
}
res := xs[0]
for i := 0; i < len(xs); i++ {
res = fn(xs[i], res)
if res == 1 {
return 1
}
}
return res
}
2020-02-12 13:53:59 +08:00
// createContext returns a context and cancel function for a given task message.
func createContext(msg *base.TaskMessage) (ctx context.Context, cancel context.CancelFunc) {
ctx = context.Background()
2020-02-12 13:53:59 +08:00
timeout, err := time.ParseDuration(msg.Timeout)
if err == nil && timeout != 0 {
ctx, cancel = context.WithTimeout(ctx, timeout)
}
deadline, err := time.Parse(time.RFC3339, msg.Deadline)
if err == nil && !deadline.IsZero() {
ctx, cancel = context.WithDeadline(ctx, deadline)
2020-02-12 13:53:59 +08:00
}
if cancel == nil {
ctx, cancel = context.WithCancel(ctx)
2020-02-12 13:53:59 +08:00
}
return ctx, cancel
2020-02-12 13:53:59 +08:00
}