feat: stop zombie and endless tasks

This commit is contained in:
Jason Song 2022-11-02 17:59:01 +08:00
parent b1da53286d
commit 705aaf3eb2
6 changed files with 223 additions and 3 deletions

View File

@ -37,8 +37,8 @@ type Task struct {
Attempt int64
RunnerID int64 `xorm:"index"`
Result runnerv1.Result
Status Status `xorm:"index"`
Started timeutil.TimeStamp
Status Status `xorm:"index"`
Started timeutil.TimeStamp `xorm:"index"`
Stopped timeutil.TimeStamp
Token string `xorm:"-"`
@ -54,7 +54,7 @@ type Task struct {
LogExpired bool // files that are too old will be deleted
Created timeutil.TimeStamp `xorm:"created"`
Updated timeutil.TimeStamp `xorm:"updated"`
Updated timeutil.TimeStamp `xorm:"updated index"`
}
func init() {
@ -412,6 +412,57 @@ func UpdateTaskByState(state *runnerv1.TaskState) (*Task, error) {
return task, nil
}
func StopTask(ctx context.Context, task *Task, result runnerv1.Result) (*Task, error) {
ctx, commiter, err := db.TxContext()
if err != nil {
return nil, err
}
defer commiter.Close()
e := db.GetEngine(ctx)
now := timeutil.TimeStampNow()
task.Result = result
if task.Result != runnerv1.Result_RESULT_UNSPECIFIED {
task.Status = Status(task.Result)
task.Stopped = now
if _, err := UpdateRunJob(ctx, &RunJob{
ID: task.JobID,
Status: task.Status,
Stopped: task.Stopped,
}, nil); err != nil {
return nil, err
}
}
if _, err := e.ID(task.ID).Update(task); err != nil {
return nil, err
}
if err := task.LoadAttributes(ctx); err != nil {
return nil, err
}
for _, step := range task.Steps {
if step.Result == runnerv1.Result_RESULT_UNSPECIFIED {
step.Result = result
if step.Started == 0 {
step.Started = now
}
step.Stopped = now
}
if _, err := e.ID(step.ID).Update(step); err != nil {
return nil, err
}
}
if err := commiter.Commit(); err != nil {
return nil, err
}
return task, nil
}
func isSubset(set, subset []string) bool {
m := make(map[string]struct{}, len(set))
for _, v := range set {

50
models/bots/task_list.go Normal file
View File

@ -0,0 +1,50 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package bots
import (
"context"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/timeutil"
"xorm.io/builder"
)
type TaskList []*Task
type FindTaskOptions struct {
db.ListOptions
Status Status
UpdatedBefore timeutil.TimeStamp
StartedBefore timeutil.TimeStamp
}
func (opts FindTaskOptions) toConds() builder.Cond {
cond := builder.NewCond()
if opts.Status > StatusUnknown {
cond = cond.And(builder.Eq{"status": opts.Status})
}
if opts.UpdatedBefore > 0 {
cond = cond.And(builder.Lt{"updated": opts.UpdatedBefore})
}
if opts.StartedBefore > 0 {
cond = cond.And(builder.Lt{"started": opts.StartedBefore})
}
return cond
}
func FindTasks(ctx context.Context, opts FindTaskOptions) (TaskList, int64, error) {
e := db.GetEngine(ctx).Where(opts.toConds())
if opts.PageSize > 0 && opts.Page >= 1 {
e.Limit(opts.PageSize, (opts.Page-1)*opts.PageSize)
}
var tasks TaskList
total, err := e.FindAndCount(&tasks)
return tasks, total, err
}
func CountTasks(ctx context.Context, opts FindTaskOptions) (int64, error) {
return db.GetEngine(ctx).Where(opts.toConds()).Count(new(Task))
}

View File

@ -2574,6 +2574,9 @@ dashboard.delete_old_actions = Delete all old actions from database
dashboard.delete_old_actions.started = Delete all old actions from database started.
dashboard.update_checker = Update checker
dashboard.delete_old_system_notices = Delete all old system notices from database
dashboard.stop_zombie_tasks = Stop zombie tasks
dashboard.stop_endless_tasks = Stop endless tasks
dashboard.cancel_abandoned_jobs = Cancel abandoned jobs
users.user_manage_panel = User Account Management
users.new_account = Create User Account

View File

@ -0,0 +1,67 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package bots
import (
"context"
"time"
bots_model "code.gitea.io/gitea/models/bots"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil"
runnerv1 "gitea.com/gitea/proto-go/runner/v1"
)
const (
zombieTaskTimeout = 10 * time.Minute
endlessTaskTimeout = 3 * time.Hour // the task is running for a long time with updates
abandonedJobTimeout = 24 * time.Hour // the job is waiting for being picked by a runner
)
// StopZombieTasks stops the task which have running status, but haven't been updated for a long time
func StopZombieTasks(ctx context.Context) error {
tasks, _, err := bots_model.FindTasks(ctx, bots_model.FindTaskOptions{
Status: bots_model.StatusRunning,
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-zombieTaskTimeout).Unix()),
})
if err != nil {
log.Warn("find zombie tasks: %v", err)
return err
}
for _, task := range tasks {
if _, err := bots_model.StopTask(ctx, task, runnerv1.Result_RESULT_FAILURE); err != nil {
log.Warn("stop zombie task %v: %v", task.ID, err)
// go on
}
}
return nil
}
// StopEndlessTasks stops the tasks which have running status and continuous updates, but don't end for a long time
func StopEndlessTasks(ctx context.Context) error {
tasks, _, err := bots_model.FindTasks(ctx, bots_model.FindTaskOptions{
Status: bots_model.StatusRunning,
StartedBefore: timeutil.TimeStamp(time.Now().Add(-endlessTaskTimeout).Unix()),
})
if err != nil {
log.Warn("find endless tasks: %v", err)
return err
}
for _, task := range tasks {
if _, err := bots_model.StopTask(ctx, task, runnerv1.Result_RESULT_FAILURE); err != nil {
log.Warn("stop endless task %v: %v", task.ID, err)
// go on
}
}
return nil
}
// CancelAbandonedJobs cancels the jobs which have waiting status, but haven't been picked by a runner for a long time
func CancelAbandonedJobs(ctx context.Context) error {
// TODO
return nil
}

View File

@ -31,6 +31,7 @@ func NewContext(original context.Context) {
_, _, finished := process.GetManager().AddTypedContext(graceful.GetManager().ShutdownContext(), "Service: Cron", process.SystemProcessType, true)
initBasicTasks()
initExtendedTasks()
initBotsTasks()
lock.Lock()
for _, task := range tasks {

View File

@ -0,0 +1,48 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package cron
import (
"context"
user_model "code.gitea.io/gitea/models/user"
bots_service "code.gitea.io/gitea/services/bots"
)
func initBotsTasks() {
registerStopZombieTasks()
registerStopEndlessTasks()
registerCancelAbandonedJobs()
}
func registerStopZombieTasks() {
RegisterTaskFatal("stop_zombie_tasks", &BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@every 5m",
}, func(ctx context.Context, _ *user_model.User, cfg Config) error {
return bots_service.StopZombieTasks(ctx)
})
}
func registerStopEndlessTasks() {
RegisterTaskFatal("stop_endless_tasks", &BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@every 30m",
}, func(ctx context.Context, _ *user_model.User, cfg Config) error {
return bots_service.StopEndlessTasks(ctx)
})
}
func registerCancelAbandonedJobs() {
RegisterTaskFatal("cancel_abandoned_jobs", &BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@every 6h",
}, func(ctx context.Context, _ *user_model.User, cfg Config) error {
return bots_service.CancelAbandonedJobs(ctx)
})
}