Fix a performance issue due to URL recognization

The function "addHotSpot" can become very slow when the screen contains certain sequences of characters that look like URLs but are not valid, due to a form of backtracking. This change eliminates the possibility of backtracking.

This commit does two things:

First, it tightens the requirements for “looks like a URL” by checking the scheme earlier in the process. That is necessary to keep the next step from skipping valid URLs in reasonable contexts.

Second, once the beginning of a potential URL passes the tighter initial scanning and the end of the URL is found, we “commit” to that portion of the line. If the potential URL fails InternetCrackUrl validation, we restart scanning from the end of of the string that looked like a URL but wasn’t, rather than from just after the scheme.

Fix #13916, close #14900
This commit is contained in:
Coises 2024-03-23 11:12:11 -07:00 committed by Don Ho
parent 29fcd1ac91
commit 0a7295878d
1 changed files with 11 additions and 26 deletions

View File

@ -3120,37 +3120,17 @@ bool isUrlQueryDelimiter(TCHAR const c)
return false;
}
bool isUrlSchemeSupported(INTERNET_SCHEME s, TCHAR *url)
bool isUrlSchemeSupported(TCHAR *url, int remainingLength)
{
switch (s)
{
case INTERNET_SCHEME_FTP:
case INTERNET_SCHEME_HTTP:
case INTERNET_SCHEME_HTTPS:
case INTERNET_SCHEME_MAILTO:
case INTERNET_SCHEME_FILE:
return true;
case INTERNET_SCHEME_PARTIAL:
case INTERNET_SCHEME_UNKNOWN:
case INTERNET_SCHEME_DEFAULT:
case INTERNET_SCHEME_GOPHER:
case INTERNET_SCHEME_NEWS:
case INTERNET_SCHEME_SOCKS:
case INTERNET_SCHEME_JAVASCRIPT:
case INTERNET_SCHEME_VBSCRIPT:
case INTERNET_SCHEME_RES:
default:
break;
}
generic_string const mySchemes = (NppParameters::getInstance()).getNppGUI()._uriSchemes + TEXT(" ");
generic_string const mySchemes = L"ftp:// http:// https:// mailto: file:// "
+ (NppParameters::getInstance()).getNppGUI()._uriSchemes + L" ";
TCHAR *p = (TCHAR *)mySchemes.c_str();
while (*p)
{
int i = 0;
while (p [i] && (p [i] != ' ')) i++;
if (i == 0) return false;
if (wcsnicmp(url, p, i) == 0) return true;
if (i <= remainingLength && wcsnicmp(url, p, i) == 0) return true;
p += i;
while (*p == ' ') p++;
}
@ -3183,7 +3163,7 @@ bool scanToUrlStart(TCHAR *text, int textLen, int start, int* distance, int* sch
break;
case sScheme:
if (text [p] == ':')
if (text [p] == ':' && isUrlSchemeSupported(text + p0, textLen - p0))
{
*distance = p0 - start;
*schemeLength = p - p0 + 1;
@ -3380,13 +3360,18 @@ bool isUrl(TCHAR * text, int textLen, int start, int* segmentLen)
URL_COMPONENTS url;
memset (& url, 0, sizeof(url));
url.dwStructSize = sizeof(url);
bool r = InternetCrackUrl(& text [start], len, 0, & url) && isUrlSchemeSupported(url.nScheme, & text [start]);
bool r = InternetCrackUrl(& text [start], len, 0, & url);
if (r)
{
while (removeUnwantedTrailingCharFromUrl (& text [start], & len));
*segmentLen = len;
return true;
}
else // to avoid potentially catastrophic backtracking, skip the entire text that looked like a URL
{
*segmentLen = len;
return false;
}
}
len = 1;
int lMax = textLen - start;