Added handling for artifacts with classifiers and added debug mode which will list files for deletion

This commit is contained in:
diana.strebkova@t-systems.com 2025-03-11 19:20:58 +01:00
parent 1788baf2cd
commit 8823e7aa50
5 changed files with 126 additions and 76 deletions

View File

@ -2622,6 +2622,8 @@ LEVEL = Info
;; Cleanup expired packages/data then targets the files within all maven snapshots versions
;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1
;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
; Enable debug logging for Maven cleanup. Enabling debug will stop snapshot version artifacts from being deleted but will log the files which were meant for deletion.
; DEBUG_MAVEN_CLEANUP = true
;LIMIT_SIZE_NPM = -1
;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
;LIMIT_SIZE_NUGET = -1

View File

@ -12,7 +12,6 @@ import (
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
@ -24,6 +23,8 @@ func init() {
}
var (
// ErrMetadataFile indicated a metadata file
ErrMetadataFile = errors.New("metadata file")
// ErrDuplicatePackageFile indicates a duplicated package file error
ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists")
// ErrPackageFileNotExist indicates a package file not exist error
@ -229,57 +230,69 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error)
return db.Exist[PackageFile](ctx, opts.toConds())
}
// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber.
func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) {
if maxBuildNumber < 0 {
return nil, errors.New("maxBuildNumber must be a non-negative integer")
// GetFilesBelowBuildNumber retrieves all files for maven snapshot version where the build number is <= maxBuildNumber.
// Returns two slices: one for filtered files and one for skipped files.
func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) {
if maxBuildNumber <= 0 {
return nil, nil, errors.New("maxBuildNumber must be a positive integer")
}
files, err := GetFilesByVersionID(ctx, versionID)
if err != nil {
return nil, fmt.Errorf("failed to retrieve files: %w", err)
return nil, nil, fmt.Errorf("failed to retrieve files: %w", err)
}
var filteredFiles []*PackageFile
var filteredFiles, skippedFiles []*PackageFile
for _, file := range files {
buildNumber, err := extractBuildNumberFromFileName(file.Name)
buildNumber, err := extractBuildNumberFromFileName(file.Name, classifiers...)
if err != nil {
if err.Error() == "metadata file" {
continue
if !errors.Is(err, ErrMetadataFile) {
skippedFiles = append(skippedFiles, file)
}
log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err)
continue
}
if buildNumber <= maxBuildNumber {
filteredFiles = append(filteredFiles, file)
}
}
log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber)
return filteredFiles, nil
return filteredFiles, skippedFiles, nil
}
// extractBuildNumberFromFileName extracts the build number from the file name.
func extractBuildNumberFromFileName(filename string) (int, error) {
// Skip metadata files
// extractBuildNumberFromFileName extracts the build number from a Maven snapshot file name.
// Expected formats:
//
// "artifact-1.0.0-20250311.083409-9.tgz" returns 9
// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10
func extractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) {
if strings.Contains(filename, "maven-metadata.xml") {
return 0, errors.New("metadata file")
return 0, ErrMetadataFile
}
// Split filename by hyphens to extract the build number
parts := strings.Split(filename, "-")
if len(parts) < 3 {
return 0, fmt.Errorf("invalid file name format: '%s'", filename)
dotIdx := strings.LastIndex(filename, ".")
if dotIdx == -1 {
return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename)
}
base := filename[:dotIdx]
// Remove classifier suffix if present.
for _, classifier := range classifiers {
suffix := "-" + classifier
if strings.HasSuffix(base, suffix) {
base = base[:len(base)-len(suffix)]
break
}
}
// Extract the last part before the extension
buildNumberWithExt := parts[len(parts)-1]
buildNumberStr := strings.Split(buildNumberWithExt, ".")[0]
// The build number should be the token after the last dash.
lastDash := strings.LastIndex(base, "-")
if lastDash == -1 {
return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename)
}
buildNumberStr := base[lastDash+1:]
buildNumber, err := strconv.Atoi(buildNumberStr)
if err != nil {
return 0, fmt.Errorf("failed to convert maven package build number to integer: '%s'", buildNumberStr)
return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err)
}
return buildNumber, nil

View File

@ -7,6 +7,7 @@ import (
"encoding/xml"
"errors"
"io"
"strconv"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/modules/validation"
@ -32,6 +33,12 @@ type Dependency struct {
Version string `json:"version,omitempty"`
}
// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version
type SnapshotMetadata struct {
BuildNumber int `json:"build_number,omitempty"`
Classifiers []string `json:"classifiers,omitempty"`
}
type pomStruct struct {
XMLName xml.Name `xml:"project"`
@ -62,7 +69,7 @@ type pomStruct struct {
} `xml:"dependencies>dependency"`
}
type MavenMetadata struct {
type snapshotMetadataStruct struct {
XMLName xml.Name `xml:"metadata"`
GroupID string `xml:"groupId"`
ArtifactID string `xml:"artifactId"`
@ -74,11 +81,10 @@ type MavenMetadata struct {
BuildNumber string `xml:"buildNumber"`
} `xml:"snapshot"`
SnapshotVersions []struct {
SnapshotVersion struct {
Extension string `xml:"extension"`
Value string `xml:"value"`
Updated string `xml:"updated"`
} `xml:"snapshotVersion"`
Extension string `xml:"extension"`
Classifier string `xml:"classifier"`
Value string `xml:"value"`
Updated string `xml:"updated"`
} `xml:"snapshotVersions>snapshotVersion"`
} `xml:"versioning"`
}
@ -132,19 +138,30 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
}, nil
}
// ParseMavenMetadata parses the Maven metadata XML to extract the build number.
func ParseMavenMetaData(r io.Reader) (string, error) {
var metadata MavenMetadata
// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers.
func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) {
var metadata snapshotMetadataStruct
dec := xml.NewDecoder(r)
dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding.
dec.CharsetReader = charset.NewReaderLabel
if err := dec.Decode(&metadata); err != nil {
return "", err
return nil, err
}
if metadata.Versioning.Snapshot.BuildNumber == "" {
return "", errors.New("no build number in snapshot metadata found")
buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber)
if err != nil {
return nil, errors.New("invalid or missing build number in snapshot metadata")
}
return metadata.Versioning.Snapshot.BuildNumber, nil
var classifiers []string
for _, snapshotVersion := range metadata.Versioning.SnapshotVersions {
if snapshotVersion.Classifier != "" {
classifiers = append(classifiers, snapshotVersion.Classifier)
}
}
return &SnapshotMetadata{
BuildNumber: buildNumber,
Classifiers: classifiers,
}, nil
}

View File

@ -46,6 +46,7 @@ var (
DefaultRPMSignEnabled bool
RetainMavenSnapshotBuilds int
DebugMavenCleanup bool
}{
Enabled: true,
LimitTotalOwnerCount: -1,
@ -105,6 +106,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) {
Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT")
Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false)
Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds)
Packages.DebugMavenCleanup = sec.Key("DEBUG_MAVEN_CLEANUP").MustBool(true)
return nil
}

View File

@ -3,7 +3,6 @@ package maven
import (
"context"
"fmt"
"strconv"
"strings"
"code.gitea.io/gitea/models/packages"
@ -13,40 +12,46 @@ import (
packages_service "code.gitea.io/gitea/services/packages"
)
// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages.
// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages.
func CleanupSnapshotVersions(ctx context.Context) error {
retainBuilds := setting.Packages.RetainMavenSnapshotBuilds
log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds)
debugSession := setting.Packages.DebugMavenCleanup
log.Debug("Starting Maven CleanupSnapshotVersions with retainBuilds: %d, debugSession: %t", retainBuilds, debugSession)
if retainBuilds == -1 {
log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1")
log.Info("Maven CleanupSnapshotVersions skipped because retainBuilds is set to -1")
return nil
}
if retainBuilds < 1 {
return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds)
return fmt.Errorf("Maven CleanupSnapshotVersions: forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds)
}
versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven)
if err != nil {
return fmt.Errorf("failed to retrieve Maven package versions: %w", err)
return fmt.Errorf("Maven CleanupSnapshotVersions: failed to retrieve Maven package versions: %w", err)
}
for _, version := range versions {
log.Info("Processing version: %s (ID: %d)", version.Version, version.ID)
var errors []error
for _, version := range versions {
if !isSnapshotVersion(version.Version) {
log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID)
continue
}
if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil {
log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err)
return err
if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds, debugSession); err != nil {
errors = append(errors, fmt.Errorf("Maven CleanupSnapshotVersions: version '%s' (ID: %d): %w", version.Version, version.ID, err))
}
}
log.Info("Completed CleanupSnapshotVersion")
if len(errors) > 0 {
for _, err := range errors {
log.Warn("Maven CleanupSnapshotVersions: Error during cleanup: %v", err)
}
return fmt.Errorf("Maven CleanupSnapshotVersions: cleanup completed with errors: %v", errors)
}
log.Debug("Completed Maven CleanupSnapshotVersions")
return nil
}
@ -54,64 +59,75 @@ func isSnapshotVersion(version string) bool {
return strings.HasSuffix(version, "-SNAPSHOT")
}
func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error {
log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds)
func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int, debugSession bool) error {
log.Debug("Starting Maven cleanSnapshotFiles for versionID: %d with retainBuilds: %d, debugSession: %t", versionID, retainBuilds, debugSession)
metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey)
if err != nil {
return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err)
return fmt.Errorf("cleanSnapshotFiles: failed to retrieve Maven metadata file for version ID %d: %w", versionID, err)
}
maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile)
maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile)
if err != nil {
return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err)
return fmt.Errorf("cleanSnapshotFiles: failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err)
}
log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber)
thresholdBuildNumber := maxBuildNumber - retainBuilds
if thresholdBuildNumber <= 0 {
log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID)
log.Debug("cleanSnapshotFiles: No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID)
return nil
}
filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber)
filesToRemove, skippedFiles, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...)
if err != nil {
return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err)
return fmt.Errorf("cleanSnapshotFiles: failed to retrieve files for version ID %d: %w", versionID, err)
}
if debugSession {
var fileNamesToRemove, skippedFileNames []string
for _, file := range filesToRemove {
fileNamesToRemove = append(fileNamesToRemove, file.Name)
}
for _, file := range skippedFiles {
skippedFileNames = append(skippedFileNames, file.Name)
}
log.Info("cleanSnapshotFiles: Debug session active. Files to remove: %v, Skipped files: %v", fileNamesToRemove, skippedFileNames)
return nil
}
for _, file := range filesToRemove {
log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber)
if err := packages_service.DeletePackageFile(ctx, file); err != nil {
return fmt.Errorf("failed to delete file '%s': %w", file.Name, err)
return fmt.Errorf("Maven cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err)
}
}
log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID)
log.Debug("Completed Maven cleanSnapshotFiles for versionID: %d", versionID)
return nil
}
func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) {
func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) {
pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID)
if err != nil {
return 0, fmt.Errorf("failed to get package blob: %w", err)
return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package blob: %w", err)
}
content, _, _, err := packages_service.GetPackageBlobStream(ctx, metadataFile, pb, nil, true)
if err != nil {
return 0, fmt.Errorf("failed to get package file stream: %w", err)
return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package file stream: %w", err)
}
defer content.Close()
buildNumberStr, err := maven.ParseMavenMetaData(content)
snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content)
if err != nil {
return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err)
return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to parse maven-metadata.xml: %w", err)
}
buildNumber, err := strconv.Atoi(buildNumberStr)
if err != nil {
return 0, fmt.Errorf("invalid build number format: %w", err)
}
buildNumber := snapshotMetadata.BuildNumber
classifiers := snapshotMetadata.Classifiers
return buildNumber, nil
return buildNumber, classifiers, nil
}