Improve scheme matching to avoid inadvertent blocking

Signed-off-by: Riley Avron <riley.avron@gmail.com>
This commit is contained in:
Riley Avron 2018-03-06 21:48:12 -08:00
parent 512ec7fb87
commit f390671018
1 changed files with 7 additions and 3 deletions

View File

@ -346,12 +346,16 @@ gravity_ParseFileIntoDomains() {
echo -ne " ${INFO} Format: URL"
awk '
# Remove URL protocol, optional "username:password@", and ":?/;"
/[:?\/;]/ { gsub(/(^.*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) }
# Remove URL scheme, optional "username:password@", and ":?/;"
# The scheme must be matched carefully to avoid blocking the wrong URL
# in cases like:
# http://www.evil.com?http://www.good.com
# See RFC 3986 section 3.1 for details.
/[:?\/;]/ { gsub(/(^[a-zA-Z][a-zA-Z0-9+.-]*:\/\/(.*:.*@)?|[:?\/;].*)/, "", $0) }
# Skip lines which are only IPv4 addresses
/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$/ { next }
# Print if nonempty
length { print $0 }
length { print }
' "${source}" 2> /dev/null > "${destination}"
echo -e "${OVER} ${TICK} Format: URL"