Use HTTP ETags in addition to the If-Modified-Since header if supported by curl to avoid downloading from servers not using the latter method (raw.githubcontent.com)

Signed-off-by: DL6ER <dl6er@dl6er.de>
This commit is contained in:
DL6ER 2025-01-29 16:58:23 +01:00
parent 9a3fcf2a85
commit cee021c358
No known key found for this signature in database
GPG Key ID: 00135ACBD90B28DD
1 changed files with 46 additions and 6 deletions

View File

@ -44,6 +44,7 @@ gravityDBcopy="${piholeGitDir}/advanced/Templates/gravity_copy.sql"
domainsExtension="domains"
curl_connect_timeout=10
etag_support=false
# Check gravity temp directory
if [ ! -d "${GRAVITY_TMPDIR}" ] || [ ! -w "${GRAVITY_TMPDIR}" ]; then
@ -502,6 +503,20 @@ gravity_DownloadBlocklists() {
compression=""
echo -e " ${INFO} Libz compression not available\n"
fi
# Check if etag is supported by the locally available version of curl by
# comparing the version string being >= 7.68.0 (released Jan 2020)
# https://github.com/curl/curl/pull/4543 followed by
# https://github.com/curl/curl/pull/4678
if curl -V | grep -q "curl 7\.[6-9][8-9]"; then
etag_support=true
else
# Check if the version is >= 8
if curl -V | grep -q "curl 8"; then
etag_support=true
fi
fi
# Loop through $sources and download each one
for ((i = 0; i < "${#sources[@]}"; i++)); do
url="${sources[$i]}"
@ -595,12 +610,37 @@ gravity_DownloadBlocklistFromUrl() {
mv "${listCurlBuffer}" "${listCurlBuffer%.*}.phgpb"
listCurlBuffer="${listCurlBuffer%.*}.phgpb"
# Determine if $saveLocation has read permission
if [[ -r "${saveLocation}" && $url != "file"* ]]; then
# Have curl determine if a remote file has been modified since last retrieval
# Uses "Last-Modified" header, which certain web servers do not provide (e.g: raw github urls)
# Note: Don't do this for local files, always download them
heisenbergCompensator="-z ${saveLocation}"
# For all remote files, we try to determine if the file has changed to skip
# downloading them whenever possible.
if [[ $url != "file"* ]]; then
# Use the HTTP ETag header to determine if the file has changed if supported
# by curl. Using ETags is supported by raw.githubusercontent.com URLs.
if [[ "${etag_support}" == true ]]; then
# Save HTTP ETag to the specified file. An ETag is a caching related header,
# usually returned in a response. If no ETag is sent by the server, an empty
# file is created and can later be used consistently.
heisenbergCompensator="--etag-save ${saveLocation}.etag"
if [[ -f "${saveLocation}.etag" ]]; then
# This option makes a conditional HTTP request for the specific ETag read
# from the given file by sending a custom If-None-Match header using the
# stored ETag. This way, the server will only send the file if it has
# changed since the last request.
heisenbergCompensator="${heisenbergCompensator} --etag-compare ${saveLocation}.etag"
fi
fi
# Add If-Modified-Since header to the request if we did already download the
# file once
if [[ -f "${saveLocation}" ]]; then
# Request a file that has been modified later than the given time and
# date. We provide a file here which makes curl use the modification
# timestamp (mtime) of this file.
# Interstingly, this option is not supported by raw.githubusercontent.com
# URLs, however, it is still supported by many older web servers which may
# not support the HTTP ETag method so we keep it as a fallback.
heisenbergCompensator="${heisenbergCompensator} -z ${saveLocation}"
fi
fi
str="Status:"