2
0
mirror of https://github.com/hibiken/asynq.git synced 2025-01-12 16:03:37 +08:00
asynq/recoverer.go

127 lines
3.4 KiB
Go
Raw Normal View History

2020-06-21 22:05:57 +08:00
// Copyright 2020 Kentaro Hibino. All rights reserved.
// Use of this source code is governed by a MIT license
// that can be found in the LICENSE file.
package asynq
import (
"context"
2020-06-21 22:05:57 +08:00
"sync"
"time"
"github.com/hibiken/asynq/internal/base"
"github.com/hibiken/asynq/internal/errors"
2020-06-21 22:05:57 +08:00
"github.com/hibiken/asynq/internal/log"
)
type recoverer struct {
logger *log.Logger
broker base.Broker
2021-01-13 03:40:26 +08:00
retryDelayFunc RetryDelayFunc
isFailureFunc func(error) bool
2020-06-21 22:05:57 +08:00
// channel to communicate back to the long running "recoverer" goroutine.
done chan struct{}
2020-08-10 21:10:14 +08:00
// list of queues to check for deadline.
queues []string
2020-06-21 22:05:57 +08:00
// poll interval.
interval time.Duration
}
type recovererParams struct {
logger *log.Logger
broker base.Broker
2020-08-10 21:10:14 +08:00
queues []string
2020-06-21 22:05:57 +08:00
interval time.Duration
2021-01-13 03:40:26 +08:00
retryDelayFunc RetryDelayFunc
isFailureFunc func(error) bool
2020-06-21 22:05:57 +08:00
}
func newRecoverer(params recovererParams) *recoverer {
return &recoverer{
logger: params.logger,
broker: params.broker,
done: make(chan struct{}),
2020-08-10 21:10:14 +08:00
queues: params.queues,
2020-06-21 22:05:57 +08:00
interval: params.interval,
retryDelayFunc: params.retryDelayFunc,
isFailureFunc: params.isFailureFunc,
2020-06-21 22:05:57 +08:00
}
}
func (r *recoverer) shutdown() {
2020-06-21 22:05:57 +08:00
r.logger.Debug("Recoverer shutting down...")
// Signal the recoverer goroutine to stop polling.
r.done <- struct{}{}
}
func (r *recoverer) start(wg *sync.WaitGroup) {
wg.Add(1)
go func() {
defer wg.Done()
r.recover()
2020-06-21 22:05:57 +08:00
timer := time.NewTimer(r.interval)
for {
select {
case <-r.done:
r.logger.Debug("Recoverer done")
timer.Stop()
return
case <-timer.C:
r.recover()
timer.Reset(r.interval)
2020-06-21 22:05:57 +08:00
}
}
}()
}
// ErrLeaseExpired error indicates that the task failed because the worker working on the task
// could not extend its lease due to missing heartbeats. The worker may have crashed or got cutoff from the network.
var ErrLeaseExpired = errors.New("asynq: task lease expired")
func (r *recoverer) recover() {
2022-03-12 02:44:12 +08:00
r.recoverLeaseExpiredTasks()
r.recoverStaleAggregationSets()
}
func (r *recoverer) recoverLeaseExpiredTasks() {
// Get all tasks which have expired 30 seconds ago or earlier to accomodate certain amount of clock skew.
cutoff := time.Now().Add(-30 * time.Second)
msgs, err := r.broker.ListLeaseExpired(cutoff, r.queues...)
if err != nil {
2022-03-12 02:44:12 +08:00
r.logger.Warnf("recoverer: could not list lease expired tasks: %v", err)
return
}
for _, msg := range msgs {
if msg.Retried >= msg.Retry {
r.archive(msg, ErrLeaseExpired)
} else {
r.retry(msg, ErrLeaseExpired)
}
}
}
2022-03-12 02:44:12 +08:00
func (r *recoverer) recoverStaleAggregationSets() {
for _, qname := range r.queues {
if err := r.broker.ReclaimStaleAggregationSets(qname); err != nil {
r.logger.Warnf("recoverer: could not reclaim stale aggregation sets in queue %q: %v", qname, err)
}
}
}
func (r *recoverer) retry(msg *base.TaskMessage, err error) {
delay := r.retryDelayFunc(msg.Retried, err, NewTask(msg.Type, msg.Payload))
2020-06-21 22:05:57 +08:00
retryAt := time.Now().Add(delay)
if err := r.broker.Retry(context.Background(), msg, retryAt, err.Error(), r.isFailureFunc(err)); err != nil {
r.logger.Warnf("recoverer: could not retry lease expired task: %v", err)
2020-06-21 22:05:57 +08:00
}
}
func (r *recoverer) archive(msg *base.TaskMessage, err error) {
if err := r.broker.Archive(context.Background(), msg, err.Error()); err != nil {
r.logger.Warnf("recoverer: could not move task to archive: %v", err)
2020-06-21 22:05:57 +08:00
}
}