Merge pull request #2780 from mmotti/fix/regexp_query

Fix for regexp queries through pihole -q
This commit is contained in:
Mark Drobnak 2019-06-05 13:19:39 -04:00 committed by GitHub
commit b0c5dcf42f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 36 additions and 41 deletions

View File

@ -12,7 +12,6 @@
# Globals
piholeDir="/etc/pihole"
gravityDBfile="${piholeDir}/gravity.db"
wildcardlist="/etc/dnsmasq.d/03-pihole-wildcard.conf"
options="$*"
adlist=""
all=""
@ -23,27 +22,10 @@ matchType="match"
colfile="/opt/pihole/COL_TABLE"
source "${colfile}"
# Print each subdomain
# e.g: foo.bar.baz.com = "foo.bar.baz.com bar.baz.com baz.com com"
processWildcards() {
IFS="." read -r -a array <<< "${1}"
for (( i=${#array[@]}-1; i>=0; i-- )); do
ar=""
for (( j=${#array[@]}-1; j>${#array[@]}-i-2; j-- )); do
if [[ $j == $((${#array[@]}-1)) ]]; then
ar="${array[$j]}"
else
ar="${array[$j]}.${ar}"
fi
done
echo "${ar}"
done
}
# Scan an array of files for matching strings
scanList(){
# Escape full stops
local domain="${1//./\\.}" lists="${2}" type="${3:-}"
local domain="${1}" esc_domain="${1//./\\.}" lists="${2}" type="${3:-}"
# Prevent grep from printing file path
cd "$piholeDir" || exit 1
@ -54,9 +36,14 @@ scanList(){
# /dev/null forces filename to be printed when only one list has been generated
# shellcheck disable=SC2086
case "${type}" in
"exact" ) grep -i -E -l "(^|(?<!#)\\s)${domain}($|\\s|#)" ${lists} /dev/null 2>/dev/null;;
"wc" ) grep -i -o -m 1 "/${domain}/" ${lists} 2>/dev/null;;
* ) grep -i "${domain}" ${lists} /dev/null 2>/dev/null;;
"exact" ) grep -i -E -l "(^|(?<!#)\\s)${esc_domain}($|\\s|#)" ${lists} /dev/null 2>/dev/null;;
# Create array of regexps
# Iterate through each regexp and check whether it matches the domainQuery
# If it does, print the matching regexp and continue looping
# Input 1 - regexps | Input 2 - domainQuery
"regex" ) awk 'NR==FNR{regexps[$0]}{for (r in regexps)if($0 ~ r)print r}' \
<(echo "${lists}") <(echo "${domain}") 2>/dev/null;;
* ) grep -i "${esc_domain}" ${lists} /dev/null 2>/dev/null;;
esac
}
@ -113,8 +100,8 @@ scanDatabaseTable() {
# behavior. The "ESCAPE '\'" clause specifies that an underscore preceded by an '\' should be matched
# as a literal underscore character. We pretreat the $domain variable accordingly to escape underscores.
case "${type}" in
"exact" ) querystr="SELECT domain FROM vw_${table} WHERE domain = '${domain}'";;
* ) querystr="SELECT domain FROM vw_${table} WHERE domain LIKE '%${domain//_/\\_}%' ESCAPE '\\'";;
"exact" ) querystr="SELECT domain FROM vw_${table} WHERE domain = '${domain}'";;
* ) querystr="SELECT domain FROM vw_${table} WHERE domain LIKE '%${domain//_/\\_}%' ESCAPE '\\'";;
esac
# Send prepared query to gravity database
@ -145,24 +132,32 @@ scanDatabaseTable() {
scanDatabaseTable "${domainQuery}" "whitelist" "${exact}"
scanDatabaseTable "${domainQuery}" "blacklist" "${exact}"
# Scan Wildcards
if [[ -e "${wildcardlist}" ]]; then
# Determine all subdomains, domain and TLDs
mapfile -t wildcards <<< "$(processWildcards "${domainQuery}")"
for match in "${wildcards[@]}"; do
# Search wildcard list for matches
mapfile -t results <<< "$(scanList "${match}" "${wildcardlist}" "wc")"
if [[ -n "${results[*]}" ]]; then
if [[ -z "${wcMatch:-}" ]] && [[ -z "${blockpage}" ]]; then
wcMatch=true
echo " ${matchType^} found in ${COL_BOLD}Wildcards${COL_NC}:"
fi
case "${blockpage}" in
true ) echo "π ${wildcardlist##*/}"; exit 0;;
* ) echo " *.${match}";;
esac
# Scan Regex table
mapfile -t regexlist <<< "$(sqlite3 "${gravityDBfile}" "SELECT domain FROM vw_regex" 2> /dev/null)"
# Split results over new line and store in a string
# ready for processing
str_regexlist=$(IFS=$'\n'; echo "${regexlist[*]}")
# If there are regexps in the DB
if [[ -n "${str_regexlist}" ]]; then
# Return any regexps that match the domainQuery
mapfile -t results <<< "$(scanList "${domainQuery}" "${str_regexlist}" "regex")"
# If there are matches to the domain query
if [[ -n "${results[*]}" ]]; then
# Form output strings
str="${matchType^} found in ${COL_BOLD}Regex list${COL_NC}"
result="${COL_BOLD}$(IFS=$'\n'; echo "${results[*]}")${COL_NC}"
if [[ -z "${blockpage}" ]]; then
wcMatch=true
echo " $str"
fi
done
case "${blockpage}" in
true ) echo "π Regex list"; exit 0;;
* ) awk '{print " "$0}' <<< "${result}";;
esac
fi
fi
# Get version sorted *.domains filenames (without dir path)