feat: use dbfs to store log

This commit is contained in:
Jason Song 2022-10-18 13:32:52 +08:00
parent f633ec9704
commit d7b99e0028
7 changed files with 206 additions and 122 deletions

View File

@ -24,6 +24,11 @@
"path": "git.sr.ht/~mariusor/go-xsd-duration/LICENSE",
"licenseText": "MIT License\n\nCopyright (c) 2019 Go xsd:duration\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
},
{
"name": "gitea.com/gitea/act_runner/core",
"path": "gitea.com/gitea/act_runner/core/LICENSE",
"licenseText": "Copyright (c) 2022 The Gitea Authors\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
},
{
"name": "gitea.com/gitea/proto-go",
"path": "gitea.com/gitea/proto-go/LICENSE",
@ -544,11 +549,6 @@
"path": "github.com/gorilla/sessions/LICENSE",
"licenseText": "Copyright (c) 2012-2018 The Gorilla Authors. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n\n\t * Redistributions of source code must retain the above copyright\nnotice, this list of conditions and the following disclaimer.\n\t * Redistributions in binary form must reproduce the above\ncopyright notice, this list of conditions and the following disclaimer\nin the documentation and/or other materials provided with the\ndistribution.\n\t * Neither the name of Google Inc. nor the names of its\ncontributors may be used to endorse or promote products derived from\nthis software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nOWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
},
{
"name": "github.com/gorilla/websocket",
"path": "github.com/gorilla/websocket/LICENSE",
"licenseText": "Copyright (c) 2013 The Gorilla WebSocket Authors. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n Redistributions of source code must retain the above copyright notice, this\n list of conditions and the following disclaimer.\n\n Redistributions in binary form must reproduce the above copyright notice,\n this list of conditions and the following disclaimer in the documentation\n and/or other materials provided with the distribution.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
},
{
"name": "github.com/hashicorp/go-cleanhttp",
"path": "github.com/hashicorp/go-cleanhttp/LICENSE",

2
go.mod
View File

@ -59,7 +59,6 @@ require (
github.com/google/uuid v1.3.0
github.com/gorilla/feeds v1.1.1
github.com/gorilla/sessions v1.2.1
github.com/gorilla/websocket v1.4.2
github.com/hashicorp/go-version v1.6.0
github.com/hashicorp/golang-lru v0.5.4
github.com/huandu/xstrings v1.3.2
@ -209,6 +208,7 @@ require (
github.com/gorilla/handlers v1.5.1 // indirect
github.com/gorilla/mux v1.8.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect

View File

@ -19,19 +19,24 @@ import (
// Task represents a distribution of job
type Task struct {
ID int64
JobID int64
Job *RunJob `xorm:"-"`
Steps []*TaskStep `xorm:"-"`
Attempt int64
RunnerID int64 `xorm:"index"`
LogToFile bool // read log from database or from storage
LogURL string // url of the log file in storage
Result runnerv1.Result
Started timeutil.TimeStamp
Stopped timeutil.TimeStamp
Created timeutil.TimeStamp `xorm:"created"`
Updated timeutil.TimeStamp `xorm:"updated"`
ID int64
JobID int64
Job *RunJob `xorm:"-"`
Steps []*TaskStep `xorm:"-"`
Attempt int64
RunnerID int64 `xorm:"index"`
Result runnerv1.Result
Started timeutil.TimeStamp
Stopped timeutil.TimeStamp
LogURL string // dbfs:///a/b.log or s3://endpoint.com/a/b.log and etc.
LogLength int64 // lines count
LogSize int64 // blob size
LogIndexes []int64 `xorm:"JSON TEXT"` // line number to offset
LogExpired bool
Created timeutil.TimeStamp `xorm:"created"`
Updated timeutil.TimeStamp `xorm:"updated"`
}
func init() {
@ -80,7 +85,7 @@ func (task *Task) FullSteps() []*TaskStep {
headStep := &TaskStep{
Name: "Set up job",
LogIndex: 0,
LogLength: -1, // no limit
LogLength: task.LogLength,
Started: task.Started,
}
if firstStep != nil {
@ -93,7 +98,7 @@ func (task *Task) FullSteps() []*TaskStep {
}
if lastStep != nil {
tailStep.LogIndex = lastStep.LogIndex + lastStep.LogLength
tailStep.LogLength = -1 // no limit
tailStep.LogLength = task.LogLength - tailStep.LogIndex
tailStep.Started = lastStep.Stopped
}
steps := make([]*TaskStep, 0, len(task.Steps)+2)
@ -208,7 +213,16 @@ func CreateTaskForRunner(runner *Runner) (*Task, bool, error) {
return task, true, nil
}
func UpdateTask(state *runnerv1.TaskState) error {
func UpdateTask(ctx context.Context, task *Task, cols ...string) error {
sess := db.GetEngine(ctx).ID(task.ID)
if len(cols) > 0 {
sess.Cols(cols...)
}
_, err := sess.Update(task)
return err
}
func UpdateTaskByState(state *runnerv1.TaskState) error {
stepStates := map[int64]*runnerv1.StepState{}
for _, v := range state.Steps {
stepStates[v.Id] = v

View File

@ -1,77 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package bots
import (
"fmt"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil"
)
// TaskLog represents a task's log, every task has a standalone table
type TaskLog struct {
ID int64 `xorm:"pk"`
Timestamp timeutil.TimeStamp
Content string `xorm:"LONGTEXT"`
}
func GetTaskLogTableName(taskID int64) string {
return fmt.Sprintf("bots_task_log_%d", taskID)
}
// CreateTaskLog table for a task
func CreateTaskLog(taskID int64) error {
return db.GetEngine(db.DefaultContext).
Table(GetTaskLogTableName(taskID)).
Sync(new(TaskLog))
}
func GetTaskLogs(taskID, index, length int64) (logs []*TaskLog, err error) {
sess := db.GetEngine(db.DefaultContext).Table(GetTaskLogTableName(taskID)).
Where("id>=?", index).OrderBy("id")
if length >= 0 {
sess.Limit(int(length))
}
err = sess.Find(&logs)
return
}
func InsertTaskLogs(taskID int64, logs []*TaskLog) (int64, error) {
if err := CreateTaskLog(taskID); err != nil {
return 0, err
}
table := GetTaskLogTableName(taskID)
// TODO: A more complete way to insert logs
// Be careful:
// - the id of a log can be 0
// - some logs may already exist in db
// - if use exec, consider different databases
// - the input should be ordered by id
// - the ids should be continuously increasing
// - the min id of input should be 1 + (the max id in db)
if len(logs) == 0 {
return 0, fmt.Errorf("no logs")
}
ack := logs[0].ID
sess := db.GetEngine(db.DefaultContext)
for _, v := range logs {
_, err := sess.Exec(fmt.Sprintf("INSERT IGNORE INTO %s (id, timestamp, content) VALUES (?,?,?)", table), v.ID, v.Timestamp, []byte(v.Content))
if err != nil {
log.Error("insert log %d of task %d: %v", v.ID, taskID, err)
break
}
ack = v.ID + 1
}
return ack, nil
}

130
modules/bots/log.go Normal file
View File

@ -0,0 +1,130 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package bots
import (
"bufio"
"context"
"fmt"
"io"
"net/url"
"os"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/models/dbfs"
runnerv1 "gitea.com/gitea/proto-go/runner/v1"
"google.golang.org/protobuf/types/known/timestamppb"
)
const defaultBufSize = 64 * 1024
const (
StorageSchemaDBFS = "dbfs"
StorageSchemaS2 = "s2"
// ...
)
func WriteLogs(ctx context.Context, rawURL string, offset int64, rows []*runnerv1.LogRow) ([]int, error) {
name, err := parseDBFSName(rawURL)
if err != nil {
return nil, err
}
f, err := dbfs.OpenFile(ctx, name, os.O_RDWR|os.O_CREATE|os.O_TRUNC|os.O_APPEND)
if err != nil {
return nil, fmt.Errorf("dbfs OpenFile %q: %w", name, err)
}
defer f.Close()
if _, err := f.Seek(offset, io.SeekStart); err != nil {
return nil, fmt.Errorf("dbfs Seek %q: %w", name, err)
}
writer := bufio.NewWriterSize(f, defaultBufSize)
ns := make([]int, 0, len(rows))
for _, row := range rows {
n, err := writer.WriteString(FormatLog(row.Time.AsTime(), row.Content) + "\n")
if err != nil {
return nil, err
}
ns = append(ns, n)
}
if err := writer.Flush(); err != nil {
return nil, err
}
return ns, nil
}
func ReadLogs(ctx context.Context, rawURL string, offset int64, limit int64) ([]*runnerv1.LogRow, error) {
name, err := parseDBFSName(rawURL)
if err != nil {
return nil, err
}
f, err := dbfs.Open(ctx, name)
if err != nil {
return nil, fmt.Errorf("dbfs Open %q: %w", name, err)
}
defer f.Close()
if _, err := f.Seek(offset, io.SeekStart); err != nil {
return nil, fmt.Errorf("dbfs Seek %q: %w", name, err)
}
var rows []*runnerv1.LogRow
scanner := bufio.NewScanner(f)
for scanner.Scan() && (int64(len(rows)) < limit || limit < 0) {
t, c, err := ParseLog(scanner.Text())
if err != nil {
return nil, fmt.Errorf("parse log %q: %w", scanner.Text(), err)
}
rows = append(rows, &runnerv1.LogRow{
Time: timestamppb.New(t),
Content: c,
})
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("scan: %w", err)
}
return rows, nil
}
func parseDBFSName(rawURL string) (string, error) {
u, err := url.Parse(rawURL)
if err != nil {
return "", fmt.Errorf("invalid url: %w", err)
}
if u.Scheme != StorageSchemaDBFS {
return "", fmt.Errorf("%s supported only yet", StorageSchemaDBFS)
}
if u.Path == "" {
return "", fmt.Errorf("empty path")
}
return u.Path, nil
}
func FormatLog(timestamp time.Time, content string) string {
return fmt.Sprintf("%s %s", timestamp.UTC().Format(time.RFC3339Nano), strconv.Quote(content))
}
func ParseLog(in string) (timestamp time.Time, content string, err error) {
index := strings.IndexRune(in, ' ')
if index < 0 {
err = fmt.Errorf("invalid log: %q", in)
return
}
timestamp, err = time.Parse(time.RFC3339Nano, in[:index])
if err != nil {
return
}
content, err = strconv.Unquote(in[index+1:])
return
}

View File

@ -12,9 +12,9 @@ import (
"code.gitea.io/gitea/core"
bots_model "code.gitea.io/gitea/models/bots"
"code.gitea.io/gitea/modules/bots"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil"
runnerv1 "gitea.com/gitea/proto-go/runner/v1"
"gitea.com/gitea/proto-go/runner/v1/runnerv1connect"
@ -149,7 +149,7 @@ func (s *Service) UpdateTask(
) (*connect.Response[runnerv1.UpdateTaskResponse], error) {
res := connect.NewResponse(&runnerv1.UpdateTaskResponse{})
if err := bots_model.UpdateTask(req.Msg.State); err != nil {
if err := bots_model.UpdateTaskByState(req.Msg.State); err != nil {
return nil, status.Errorf(codes.Internal, "update task: %v", err)
}
@ -163,27 +163,38 @@ func (s *Service) UpdateLog(
) (*connect.Response[runnerv1.UpdateLogResponse], error) {
res := connect.NewResponse(&runnerv1.UpdateLogResponse{})
if len(req.Msg.Rows) == 0 {
// TODO: should be 1 + the max id of stored log
res.Msg.AckIndex = req.Msg.Index
task, err := bots_model.GetTaskByID(ctx, req.Msg.TaskId)
if err != nil {
return nil, status.Errorf(codes.Internal, "get task: %v", err)
}
if task.LogURL == "" {
task.LogURL = fmt.Sprintf("dbfs:///bots/tasks/%d.log", task.ID)
if err := bots_model.UpdateTask(ctx, task, "log_url"); err != nil {
return nil, status.Errorf(codes.Internal, "update task: %v", err)
}
}
ack := task.LogLength
if len(req.Msg.Rows) == 0 || req.Msg.Index > ack || int64(len(req.Msg.Rows))+req.Msg.Index <= ack {
res.Msg.AckIndex = ack
return res, nil
}
rowIndex := req.Msg.Index
rows := make([]*bots_model.TaskLog, len(req.Msg.Rows))
for i, v := range req.Msg.Rows {
rows[i] = &bots_model.TaskLog{
ID: rowIndex + int64(i),
Timestamp: timeutil.TimeStamp(v.Time.AsTime().Unix()),
Content: v.Content,
}
rows := req.Msg.Rows[ack-req.Msg.Index:]
ns, err := bots.WriteLogs(ctx, task.LogURL, task.LogSize, rows)
if err != nil {
return nil, status.Errorf(codes.Internal, "write logs: %v", err)
}
task.LogLength += int64(len(rows))
for _, n := range ns {
task.LogIndexes = append(task.LogIndexes, task.LogSize)
task.LogSize += int64(n)
}
if err := bots_model.UpdateTask(ctx, task, "log_indexes", "log_length", "log_size"); err != nil {
return nil, status.Errorf(codes.Internal, "update task: %v", err)
}
ack, err := bots_model.InsertTaskLogs(req.Msg.TaskId, rows)
if err != nil {
return nil, status.Errorf(codes.Internal, "insert task log: %v", err)
}
res.Msg.AckIndex = ack
res.Msg.AckIndex = task.LogLength
if req.Msg.NoMore {
// TODO: transfer logs to storage from db

View File

@ -3,11 +3,14 @@ package dev
import (
"fmt"
"net/http"
"time"
"code.gitea.io/gitea/core"
bots_model "code.gitea.io/gitea/models/bots"
"code.gitea.io/gitea/modules/bots"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/web"
runnerv1 "gitea.com/gitea/proto-go/runner/v1"
)
func BuildView(ctx *context.Context) {
@ -78,7 +81,7 @@ type BuildViewStepLog struct {
}
type BuildViewStepLogLine struct {
Ln int `json:"ln"`
Ln int64 `json:"ln"`
M string `json:"m"`
T float64 `json:"t"`
}
@ -172,9 +175,12 @@ func BuildViewPost(ctx *context.Context) {
for _, cursor := range req.StepLogCursors {
if cursor.Expanded {
step := steps[cursor.StepIndex]
var logRows []*bots_model.TaskLog
var logRows []*runnerv1.LogRow
if cursor.Cursor < step.LogLength || step.LogLength < 0 {
logRows, err = bots_model.GetTaskLogs(task.ID, step.LogIndex+cursor.Cursor, step.LogLength-cursor.Cursor)
index := step.LogIndex + cursor.Cursor
length := step.LogLength - cursor.Cursor
offset := task.LogIndexes[index]
logRows, err = bots.ReadLogs(ctx, task.LogURL, offset, length)
if err != nil {
ctx.Error(http.StatusInternalServerError, err.Error())
return
@ -183,9 +189,9 @@ func BuildViewPost(ctx *context.Context) {
logLines := make([]BuildViewStepLogLine, len(logRows))
for i, row := range logRows {
logLines[i] = BuildViewStepLogLine{
Ln: i,
Ln: cursor.Cursor + int64(i),
M: row.Content,
T: float64(row.Timestamp),
T: float64(row.Time.AsTime().UnixNano()) / float64(time.Second),
}
}
resp.LogsData.StreamingLogs = append(resp.LogsData.StreamingLogs, BuildViewStepLog{