diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 588f8bb7e2..1c3ee06368 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2622,6 +2622,8 @@ LEVEL = Info ;; Cleanup expired packages/data then targets the files within all maven snapshots versions ;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1 ;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) +; Enable debug logging for Maven cleanup. Enabling debug will stop snapshot version artifacts from being deleted but will log the files which were meant for deletion. +; DEBUG_MAVEN_CLEANUP = true ;LIMIT_SIZE_NPM = -1 ;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_NUGET = -1 diff --git a/models/packages/package_file.go b/models/packages/package_file.go index ca7bd4023e..f940efc90e 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -12,7 +12,6 @@ import ( "time" "code.gitea.io/gitea/models/db" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -24,6 +23,8 @@ func init() { } var ( + // ErrMetadataFile indicated a metadata file + ErrMetadataFile = errors.New("metadata file") // ErrDuplicatePackageFile indicates a duplicated package file error ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists") // ErrPackageFileNotExist indicates a package file not exist error @@ -229,57 +230,69 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error) return db.Exist[PackageFile](ctx, opts.toConds()) } -// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber. -func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) { - if maxBuildNumber < 0 { - return nil, errors.New("maxBuildNumber must be a non-negative integer") +// GetFilesBelowBuildNumber retrieves all files for maven snapshot version where the build number is <= maxBuildNumber. +// Returns two slices: one for filtered files and one for skipped files. +func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) { + if maxBuildNumber <= 0 { + return nil, nil, errors.New("maxBuildNumber must be a positive integer") } files, err := GetFilesByVersionID(ctx, versionID) if err != nil { - return nil, fmt.Errorf("failed to retrieve files: %w", err) + return nil, nil, fmt.Errorf("failed to retrieve files: %w", err) } - var filteredFiles []*PackageFile + var filteredFiles, skippedFiles []*PackageFile for _, file := range files { - buildNumber, err := extractBuildNumberFromFileName(file.Name) + buildNumber, err := extractBuildNumberFromFileName(file.Name, classifiers...) if err != nil { - if err.Error() == "metadata file" { - continue + if !errors.Is(err, ErrMetadataFile) { + skippedFiles = append(skippedFiles, file) } - log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err) continue } - if buildNumber <= maxBuildNumber { filteredFiles = append(filteredFiles, file) } } - log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber) - return filteredFiles, nil + return filteredFiles, skippedFiles, nil } -// extractBuildNumberFromFileName extracts the build number from the file name. -func extractBuildNumberFromFileName(filename string) (int, error) { - // Skip metadata files +// extractBuildNumberFromFileName extracts the build number from a Maven snapshot file name. +// Expected formats: +// +// "artifact-1.0.0-20250311.083409-9.tgz" returns 9 +// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10 +func extractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) { if strings.Contains(filename, "maven-metadata.xml") { - return 0, errors.New("metadata file") + return 0, ErrMetadataFile } - // Split filename by hyphens to extract the build number - parts := strings.Split(filename, "-") - if len(parts) < 3 { - return 0, fmt.Errorf("invalid file name format: '%s'", filename) + dotIdx := strings.LastIndex(filename, ".") + if dotIdx == -1 { + return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename) + } + base := filename[:dotIdx] + + // Remove classifier suffix if present. + for _, classifier := range classifiers { + suffix := "-" + classifier + if strings.HasSuffix(base, suffix) { + base = base[:len(base)-len(suffix)] + break + } } - // Extract the last part before the extension - buildNumberWithExt := parts[len(parts)-1] - buildNumberStr := strings.Split(buildNumberWithExt, ".")[0] - + // The build number should be the token after the last dash. + lastDash := strings.LastIndex(base, "-") + if lastDash == -1 { + return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename) + } + buildNumberStr := base[lastDash+1:] buildNumber, err := strconv.Atoi(buildNumberStr) if err != nil { - return 0, fmt.Errorf("failed to convert maven package build number to integer: '%s'", buildNumberStr) + return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err) } return buildNumber, nil diff --git a/modules/packages/maven/metadata.go b/modules/packages/maven/metadata.go index d903d4b394..8dcbf14d18 100644 --- a/modules/packages/maven/metadata.go +++ b/modules/packages/maven/metadata.go @@ -7,6 +7,7 @@ import ( "encoding/xml" "errors" "io" + "strconv" "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/validation" @@ -32,6 +33,12 @@ type Dependency struct { Version string `json:"version,omitempty"` } +// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version +type SnapshotMetadata struct { + BuildNumber int `json:"build_number,omitempty"` + Classifiers []string `json:"classifiers,omitempty"` +} + type pomStruct struct { XMLName xml.Name `xml:"project"` @@ -62,7 +69,7 @@ type pomStruct struct { } `xml:"dependencies>dependency"` } -type MavenMetadata struct { +type snapshotMetadataStruct struct { XMLName xml.Name `xml:"metadata"` GroupID string `xml:"groupId"` ArtifactID string `xml:"artifactId"` @@ -74,11 +81,10 @@ type MavenMetadata struct { BuildNumber string `xml:"buildNumber"` } `xml:"snapshot"` SnapshotVersions []struct { - SnapshotVersion struct { - Extension string `xml:"extension"` - Value string `xml:"value"` - Updated string `xml:"updated"` - } `xml:"snapshotVersion"` + Extension string `xml:"extension"` + Classifier string `xml:"classifier"` + Value string `xml:"value"` + Updated string `xml:"updated"` } `xml:"snapshotVersions>snapshotVersion"` } `xml:"versioning"` } @@ -132,19 +138,30 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) { }, nil } -// ParseMavenMetadata parses the Maven metadata XML to extract the build number. -func ParseMavenMetaData(r io.Reader) (string, error) { - var metadata MavenMetadata +// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers. +func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) { + var metadata snapshotMetadataStruct dec := xml.NewDecoder(r) - dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding. + dec.CharsetReader = charset.NewReaderLabel if err := dec.Decode(&metadata); err != nil { - return "", err + return nil, err } - if metadata.Versioning.Snapshot.BuildNumber == "" { - return "", errors.New("no build number in snapshot metadata found") + buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber) + if err != nil { + return nil, errors.New("invalid or missing build number in snapshot metadata") } - return metadata.Versioning.Snapshot.BuildNumber, nil + var classifiers []string + for _, snapshotVersion := range metadata.Versioning.SnapshotVersions { + if snapshotVersion.Classifier != "" { + classifiers = append(classifiers, snapshotVersion.Classifier) + } + } + + return &SnapshotMetadata{ + BuildNumber: buildNumber, + Classifiers: classifiers, + }, nil } diff --git a/modules/setting/packages.go b/modules/setting/packages.go index 69de064d42..790c047aad 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -46,6 +46,7 @@ var ( DefaultRPMSignEnabled bool RetainMavenSnapshotBuilds int + DebugMavenCleanup bool }{ Enabled: true, LimitTotalOwnerCount: -1, @@ -105,6 +106,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) + Packages.DebugMavenCleanup = sec.Key("DEBUG_MAVEN_CLEANUP").MustBool(true) return nil } diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go index d4f6138977..f7a14cdc4c 100644 --- a/services/packages/maven/cleanup.go +++ b/services/packages/maven/cleanup.go @@ -3,7 +3,6 @@ package maven import ( "context" "fmt" - "strconv" "strings" "code.gitea.io/gitea/models/packages" @@ -13,40 +12,46 @@ import ( packages_service "code.gitea.io/gitea/services/packages" ) -// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages. +// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages. func CleanupSnapshotVersions(ctx context.Context) error { retainBuilds := setting.Packages.RetainMavenSnapshotBuilds - log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds) + debugSession := setting.Packages.DebugMavenCleanup + log.Debug("Starting Maven CleanupSnapshotVersions with retainBuilds: %d, debugSession: %t", retainBuilds, debugSession) if retainBuilds == -1 { - log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1") + log.Info("Maven CleanupSnapshotVersions skipped because retainBuilds is set to -1") return nil } if retainBuilds < 1 { - return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) + return fmt.Errorf("Maven CleanupSnapshotVersions: forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) } versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven) if err != nil { - return fmt.Errorf("failed to retrieve Maven package versions: %w", err) + return fmt.Errorf("Maven CleanupSnapshotVersions: failed to retrieve Maven package versions: %w", err) } - for _, version := range versions { - log.Info("Processing version: %s (ID: %d)", version.Version, version.ID) + var errors []error + for _, version := range versions { if !isSnapshotVersion(version.Version) { - log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID) continue } - if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil { - log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err) - return err + if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds, debugSession); err != nil { + errors = append(errors, fmt.Errorf("Maven CleanupSnapshotVersions: version '%s' (ID: %d): %w", version.Version, version.ID, err)) } } - log.Info("Completed CleanupSnapshotVersion") + if len(errors) > 0 { + for _, err := range errors { + log.Warn("Maven CleanupSnapshotVersions: Error during cleanup: %v", err) + } + return fmt.Errorf("Maven CleanupSnapshotVersions: cleanup completed with errors: %v", errors) + } + + log.Debug("Completed Maven CleanupSnapshotVersions") return nil } @@ -54,64 +59,75 @@ func isSnapshotVersion(version string) bool { return strings.HasSuffix(version, "-SNAPSHOT") } -func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error { - log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds) +func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int, debugSession bool) error { + log.Debug("Starting Maven cleanSnapshotFiles for versionID: %d with retainBuilds: %d, debugSession: %t", versionID, retainBuilds, debugSession) metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey) if err != nil { - return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) } - maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile) + maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile) if err != nil { - return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) + return fmt.Errorf("cleanSnapshotFiles: failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) } - log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber) - thresholdBuildNumber := maxBuildNumber - retainBuilds if thresholdBuildNumber <= 0 { - log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) + log.Debug("cleanSnapshotFiles: No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) return nil } - filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber) + filesToRemove, skippedFiles, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...) if err != nil { - return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err) + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve files for version ID %d: %w", versionID, err) + } + + if debugSession { + var fileNamesToRemove, skippedFileNames []string + + for _, file := range filesToRemove { + fileNamesToRemove = append(fileNamesToRemove, file.Name) + } + + for _, file := range skippedFiles { + skippedFileNames = append(skippedFileNames, file.Name) + } + + log.Info("cleanSnapshotFiles: Debug session active. Files to remove: %v, Skipped files: %v", fileNamesToRemove, skippedFileNames) + return nil } for _, file := range filesToRemove { log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber) if err := packages_service.DeletePackageFile(ctx, file); err != nil { - return fmt.Errorf("failed to delete file '%s': %w", file.Name, err) + return fmt.Errorf("Maven cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err) } } - log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID) + log.Debug("Completed Maven cleanSnapshotFiles for versionID: %d", versionID) return nil } -func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) { +func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) { pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID) if err != nil { - return 0, fmt.Errorf("failed to get package blob: %w", err) + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package blob: %w", err) } content, _, _, err := packages_service.GetPackageBlobStream(ctx, metadataFile, pb, nil, true) if err != nil { - return 0, fmt.Errorf("failed to get package file stream: %w", err) + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package file stream: %w", err) } defer content.Close() - buildNumberStr, err := maven.ParseMavenMetaData(content) + snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content) if err != nil { - return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err) + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to parse maven-metadata.xml: %w", err) } - buildNumber, err := strconv.Atoi(buildNumberStr) - if err != nil { - return 0, fmt.Errorf("invalid build number format: %w", err) - } + buildNumber := snapshotMetadata.BuildNumber + classifiers := snapshotMetadata.Classifiers - return buildNumber, nil + return buildNumber, classifiers, nil }