[NEW_FEATURE] (Author: Dave Brotherstone) Add PCRE (Perl Compatible Regular Expressions) support.

git-svn-id: svn://svn.tuxfamily.org/svnroot/notepadplus/repository/trunk@863 f5eea248-9336-0410-98b8-ebc06183d4e3
This commit is contained in:
Don Ho 2012-02-13 01:45:05 +00:00
parent 73ffaa8554
commit 7e0ed0ebf3
16 changed files with 817 additions and 32 deletions

View File

@ -1864,6 +1864,10 @@ void NppParameters::feedFindHistoryParameters(TiXmlNode *node)
(findHistoryRoot->ToElement())->Attribute(TEXT("transparency"), &_findHistory._transparency);
if (_findHistory._transparency <= 0 || _findHistory._transparency > 200)
_findHistory._transparency = 150;
boolStr = (findHistoryRoot->ToElement())->Attribute(TEXT("dotMatchesNewline"));
if (boolStr)
_findHistory._dotMatchesNewline = !lstrcmp(TEXT("yes"), boolStr);
}
void NppParameters::feedShortcut(TiXmlNode *node)
@ -4633,6 +4637,7 @@ bool NppParameters::writeFindHistory()
(findHistoryRoot->ToElement())->SetAttribute(TEXT("searchMode"), _findHistory._searchMode);
(findHistoryRoot->ToElement())->SetAttribute(TEXT("transparencyMode"), _findHistory._transparencyMode);
(findHistoryRoot->ToElement())->SetAttribute(TEXT("transparency"), _findHistory._transparency);
(findHistoryRoot->ToElement())->SetAttribute(TEXT("dotMatchesNewline"), _findHistory._dotMatchesNewline?TEXT("yes"):TEXT("no"));
TiXmlElement hist_element(TEXT(""));

View File

@ -961,7 +961,8 @@ struct FindHistory {
_isMatchWord(false), _isMatchCase(false),_isWrap(true),_isDirectionDown(true),\
_isFifRecuisive(true), _isFifInHiddenFolder(false), _isDlgAlwaysVisible(false),\
_isFilterFollowDoc(false), _isFolderFollowDoc(false),\
_searchMode(normal), _transparencyMode(onLossingFocus), _transparency(150)
_searchMode(normal), _transparencyMode(onLossingFocus), _transparency(150),
_dotMatchesNewline(false)
{};
int _nbMaxFindHistoryPath;
@ -978,6 +979,7 @@ struct FindHistory {
bool _isMatchCase;
bool _isWrap;
bool _isDirectionDown;
bool _dotMatchesNewline;
bool _isFifRecuisive;
bool _isFifInHiddenFolder;

View File

@ -343,16 +343,12 @@ void FindReplaceDlg::fillFindHistory()
::SendDlgItemMessage(_hSelf, IDNORMAL, BM_SETCHECK, findHistory._searchMode == FindHistory::normal, 0);
::SendDlgItemMessage(_hSelf, IDEXTENDED, BM_SETCHECK, findHistory._searchMode == FindHistory::extended, 0);
::SendDlgItemMessage(_hSelf, IDREGEXP, BM_SETCHECK, findHistory._searchMode == FindHistory::regExpr, 0);
::SendDlgItemMessage(_hSelf, IDREDOTMATCHNL, BM_SETCHECK, findHistory._dotMatchesNewline, 0);
if (findHistory._searchMode == FindHistory::regExpr)
{
//regex doesnt allow wholeword
::SendDlgItemMessage(_hSelf, IDWHOLEWORD, BM_SETCHECK, BST_UNCHECKED, 0);
::EnableWindow(::GetDlgItem(_hSelf, IDWHOLEWORD), (BOOL)false);
//regex doesnt allow upward search
::SendDlgItemMessage(_hSelf, IDDIRECTIONDOWN, BM_SETCHECK, BST_CHECKED, 0);
::SendDlgItemMessage(_hSelf, IDDIRECTIONUP, BM_SETCHECK, BST_UNCHECKED, 0);
::EnableWindow(::GetDlgItem(_hSelf, IDDIRECTIONUP), (BOOL)false);
}
if (nppParams->isTransparentAvailable())
@ -993,6 +989,10 @@ BOOL CALLBACK FindReplaceDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM lP
}
return TRUE;
//Option actions
case IDREDOTMATCHNL:
findHistory._dotMatchesNewline = _options._dotMatchesNewline = isCheckedOrNot(IDREDOTMATCHNL);
return TRUE;
case IDWHOLEWORD :
findHistory._isMatchWord = _options._isWholeWord = isCheckedOrNot(IDWHOLEWORD);
return TRUE;
@ -1008,16 +1008,19 @@ BOOL CALLBACK FindReplaceDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM lP
{
_options._searchType = FindRegex;
findHistory._searchMode = FindHistory::regExpr;
::EnableWindow(GetDlgItem(_hSelf, IDREDOTMATCHNL), true);
}
else if (isCheckedOrNot(IDEXTENDED))
{
_options._searchType = FindExtended;
findHistory._searchMode = FindHistory::extended;
::EnableWindow(GetDlgItem(_hSelf, IDREDOTMATCHNL), false);
}
else
{
_options._searchType = FindNormal;
findHistory._searchMode = FindHistory::normal;
::EnableWindow(GetDlgItem(_hSelf, IDREDOTMATCHNL), false);
}
bool isRegex = (_options._searchType == FindRegex);
@ -1027,14 +1030,10 @@ BOOL CALLBACK FindReplaceDlg::run_dlgProc(UINT message, WPARAM wParam, LPARAM lP
_options._isWholeWord = false;
::SendDlgItemMessage(_hSelf, IDWHOLEWORD, BM_SETCHECK, _options._isWholeWord?BST_CHECKED:BST_UNCHECKED, 0);
//regex doesnt allow upward search
::SendDlgItemMessage(_hSelf, IDDIRECTIONDOWN, BM_SETCHECK, BST_CHECKED, 0);
::SendDlgItemMessage(_hSelf, IDDIRECTIONUP, BM_SETCHECK, BST_UNCHECKED, 0);
_options._whichDirection = DIR_DOWN;
}
::EnableWindow(::GetDlgItem(_hSelf, IDWHOLEWORD), (BOOL)!isRegex);
::EnableWindow(::GetDlgItem(_hSelf, IDDIRECTIONUP), (BOOL)!isRegex);
return TRUE; }
case IDWRAP :
@ -1266,7 +1265,11 @@ bool FindReplaceDlg::processFindNext(const TCHAR *txt2find, const FindOption *op
return false;
}
}
else if (posFind == -2) // Invalid Regular expression
{
::MessageBox(_hParent, TEXT("Invalid regular expression"), TEXT("Find"), MB_ICONERROR | MB_OK);
return false;
}
int start = posFind;
int end = int((*_ppEditView)->execute(SCI_GETTARGETEND));
@ -1329,6 +1332,11 @@ bool FindReplaceDlg::processReplace(const TCHAR *txt2find, const TCHAR *txt2repl
(*_ppEditView)->execute(SCI_SETSEL, start, start + replacedLen);
}
}
else if (posFind == -2) // Invalid Regular expression
{
::MessageBox(_hParent, TEXT("Invalid regular expression"), TEXT("Find"), MB_ICONERROR | MB_OK);
return false;
}
delete [] pTextFind;
delete [] pTextReplace;
@ -1509,11 +1517,12 @@ int FindReplaceDlg::processRange(ProcessOperation op, const TCHAR *txt2find, con
(*_ppEditView)->execute(SCI_SETSEARCHFLAGS, flags);
targetStart = (*_ppEditView)->searchInTarget(pTextFind, stringSizeFind, startRange, endRange);
if ((targetStart != -1) && (op == ProcessFindAll)) //add new filetitle if this file results in hits
if ((targetStart >= 0) && (op == ProcessFindAll)) //add new filetitle if this file results in hits
{
_pFinder->addFileNameTitle(fileName);
}
while (targetStart != -1)
while (targetStart != -1 && targetStart != -2)
{
//int posFindBefore = posFind;
targetStart = int((*_ppEditView)->execute(SCI_GETTARGETSTART));
@ -1866,6 +1875,8 @@ void FindReplaceDlg::saveInMacro(int cmd, int cmdType)
::SendMessage(_hParent, WM_FRSAVE_STR, IDFINDWHAT, reinterpret_cast<LPARAM>(_options._str2Search.c_str()));
booleans |= _options._isWholeWord?IDF_WHOLEWORD:0;
booleans |= _options._isMatchCase?IDF_MATCHCASE:0;
booleans |= _options._dotMatchesNewline?IDF_REDOTMATCHNL:0;
::SendMessage(_hParent, WM_FRSAVE_INT, IDNORMAL, _options._searchType);
if (cmd == IDCMARKALL)
{
@ -1911,6 +1922,7 @@ void FindReplaceDlg::execSavedCommand(int cmd, int intValue, generic_string stri
_env->_isInSelection = ((intValue & IDF_IN_SELECTION_CHECK)> 0);
_env->_isWrapAround = ((intValue & IDF_WRAP)> 0);
_env->_whichDirection = ((intValue & IDF_WHICH_DIRECTION)> 0);
_env->_dotMatchesNewline = ((intValue & IDF_REDOTMATCHNL)> 0);
break;
case IDNORMAL:
_env->_searchType = (SearchType)intValue;
@ -2063,6 +2075,7 @@ void FindReplaceDlg::initOptionsFromDlg()
_options._isWrapAround = isCheckedOrNot(IDWRAP);
_options._isInSelection = isCheckedOrNot(IDC_IN_SELECTION_CHECK);
_options._dotMatchesNewline = isCheckedOrNot(IDREDOTMATCHNL);
_options._doPurge = isCheckedOrNot(IDC_PURGE_CHECK);
_options._doMarkLine = isCheckedOrNot(IDC_MARKLINE_CHECK);

View File

@ -30,6 +30,8 @@
#include "DockingDlgInterface.h"
#endif //DOCKINGDLGINTERFACE_H
#include "BoostRegexSearch.h"
#define FIND_RECURSIVE 1
#define FIND_INHIDDENDIR 2
@ -78,10 +80,12 @@ struct FindOption
generic_string _directory;
bool _isRecursive;
bool _isInHiddenDir;
bool _dotMatchesNewline;
FindOption() : _isWholeWord(true), _isMatchCase(true), _searchType(FindNormal),\
_isWrapAround(true), _whichDirection(DIR_DOWN), _incrementalType(NotIncremental),
_doPurge(false), _doMarkLine(false),
_isInSelection(false), _isRecursive(true), _isInHiddenDir(false),
_dotMatchesNewline(false),
_filters(TEXT("")), _directory(TEXT("")) {};
};
@ -93,7 +97,8 @@ public:
static int buildSearchFlags(const FindOption * option) {
return (option->_isWholeWord ? SCFIND_WHOLEWORD : 0) |
(option->_isMatchCase ? SCFIND_MATCHCASE : 0) |
(option->_searchType == FindRegex ? SCFIND_REGEXP|SCFIND_POSIX : 0);
(option->_searchType == FindRegex ? SCFIND_REGEXP|SCFIND_POSIX : 0) |
((option->_searchType == FindRegex && option->_dotMatchesNewline) ? SCFIND_REGEXP_DOTMATCHESNL : 0);
};
static void displaySectionCentered(int posStart, int posEnd, ScintillaEditView * pEditView, bool isDownwards = true);

View File

@ -51,11 +51,11 @@ BEGIN
CONTROL "Match &whole word only",IDWHOLEWORD,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,12,88,140,15
CONTROL "Match &case",IDMATCHCASE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,12,100,140,15
CONTROL "Wra&p around",IDWRAP,"Button",BS_AUTOCHECKBOX | BS_MULTILINE | WS_TABSTOP,12,112,110,15
GROUPBOX "Search Mode",IDC_MODE_STATIC,6,131,156,48
GROUPBOX "Search Mode",IDC_MODE_STATIC,6,131,159,48
CONTROL "&Normal",IDNORMAL,"Button",BS_AUTORADIOBUTTON | WS_GROUP,12,143,126,10
CONTROL "E&xtended (\\n, \\r, \\t, \\0, \\x...)",IDEXTENDED,
"Button",BS_AUTORADIOBUTTON,12,155,145,10
CONTROL "Re&gular expression",IDREGEXP,"Button",BS_AUTORADIOBUTTON,12,167,139,10
CONTROL "Re&gular expression",IDREGEXP,"Button",BS_AUTORADIOBUTTON,12,167,78,10
CONTROL "&Up",IDDIRECTIONUP,"Button",BS_AUTORADIOBUTTON | WS_GROUP | WS_TABSTOP,177,143,64,12
CONTROL "&Down",IDDIRECTIONDOWN,"Button",BS_AUTORADIOBUTTON | WS_TABSTOP,177,155,63,12
GROUPBOX "Direction",IDC_DIR_STATIC,172,131,74,48,WS_GROUP
@ -74,6 +74,7 @@ BEGIN
CONTROL "On losing focus",IDC_TRANSPARENT_LOSSFOCUS_RADIO,"Button",BS_AUTORADIOBUTTON | WS_TABSTOP,268,143,86,10
CONTROL "Always",IDC_TRANSPARENT_ALWAYS_RADIO,"Button",BS_AUTORADIOBUTTON | WS_TABSTOP,268,155,83,10
CONTROL "",IDC_PERCENTAGE_SLIDER,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | NOT WS_VISIBLE | WS_TABSTOP,271,166,53,10
CONTROL "&. matches newline",IDREDOTMATCHNL,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,93,167,67,9
END
IDB_INCREMENTAL_BG BITMAP "..\\icons\\incrementalBg.bmp"

View File

@ -109,4 +109,6 @@
#define IDC_FRCOMMAND_EXEC 1701
#define IDC_FRCOMMAND_BOOLEANS 1702
#define IDREDOTMATCHNL 1703
#define IDF_REDOTMATCHNL 1024
#endif //FINDREPLACE_DLG_H

View File

@ -19,14 +19,14 @@
#include "xmlMatchedTagsHighlighter.h"
#include "ScintillaEditView.h"
int XmlMatchedTagsHighlighter::getFirstTokenPosFrom(int targetStart, int targetEnd, const char *token, pair<int, int> & foundPos)
int XmlMatchedTagsHighlighter::getFirstTokenPosFrom(int targetStart, int targetEnd, const char *token, bool isRegex, pair<int, int> & foundPos)
{
//int start = currentPos;
//int end = (direction == DIR_LEFT)?0:_pEditView->getCurrentDocLen();
_pEditView->execute(SCI_SETTARGETSTART, targetStart);
_pEditView->execute(SCI_SETTARGETEND, targetEnd);
_pEditView->execute(SCI_SETSEARCHFLAGS, SCFIND_REGEXP|SCFIND_POSIX);
_pEditView->execute(SCI_SETSEARCHFLAGS, isRegex ? (SCFIND_REGEXP|SCFIND_POSIX) : 0);
int posFind = _pEditView->execute(SCI_SEARCHINTARGET, (WPARAM)strlen(token), (LPARAM)token);
if (posFind != -1)
{
@ -42,8 +42,8 @@ TagCateg XmlMatchedTagsHighlighter::getTagCategory(XmlMatchedTagsPos & tagsPos,
int docLen = _pEditView->getCurrentDocLen();
int gtPos = getFirstTokenPosFrom(curPos, 0, ">", foundPos);
int ltPos = getFirstTokenPosFrom(curPos, 0, "<", foundPos);
int gtPos = getFirstTokenPosFrom(curPos, 0, ">", false, foundPos);
int ltPos = getFirstTokenPosFrom(curPos, 0, "<", false, foundPos);
if (ltPos != -1)
{
if ((gtPos != -1) && (ltPos < gtPos))
@ -63,8 +63,8 @@ TagCateg XmlMatchedTagsHighlighter::getTagCategory(XmlMatchedTagsPos & tagsPos,
// so now we are sure we have tag sign '<'
// We'll see on the right
int gtPosOnR = getFirstTokenPosFrom(curPos, docLen, ">", foundPos);
int ltPosOnR = getFirstTokenPosFrom(curPos, docLen, "<", foundPos);
int gtPosOnR = getFirstTokenPosFrom(curPos, docLen, ">", false, foundPos);
int ltPosOnR = getFirstTokenPosFrom(curPos, docLen, "<", false, foundPos);
if (gtPosOnR == -1)
return invalidTag;
@ -112,7 +112,7 @@ bool XmlMatchedTagsHighlighter::getMatchedTagPos(int searchStart, int searchEnd,
bool direction = searchEnd > searchStart;
pair<int, int> foundPos;
int ltPosOnR = getFirstTokenPosFrom(searchStart, searchEnd, tag2find, foundPos);
int ltPosOnR = getFirstTokenPosFrom(searchStart, searchEnd, tag2find, true, foundPos);
if (ltPosOnR == -1)
return false;
@ -150,7 +150,7 @@ bool XmlMatchedTagsHighlighter::getMatchedTagPos(int searchStart, int searchEnd,
e = tagsPos.tagCloseStart;
}
int ltTag = getFirstTokenPosFrom(s, e, oppositeTag2find, oppositeTagPos);
int ltTag = getFirstTokenPosFrom(s, e, oppositeTag2find, true, oppositeTagPos);
if (ltTag == -1)
{
@ -174,7 +174,7 @@ bool XmlMatchedTagsHighlighter::getMatchedTagPos(int searchStart, int searchEnd,
{
for(;;)
{
ltTag = getFirstTokenPosFrom(ltTag, e, oppositeTag2find, oppositeTagPos);
ltTag = getFirstTokenPosFrom(ltTag, e, oppositeTag2find, true, oppositeTagPos);
if (ltTag == -1)
{
@ -204,7 +204,7 @@ bool XmlMatchedTagsHighlighter::getMatchedTagPos(int searchStart, int searchEnd,
{
for(;;)
{
ltTag = getFirstTokenPosFrom(ltTag, e, oppositeTag2find, oppositeTagPos);
ltTag = getFirstTokenPosFrom(ltTag, e, oppositeTag2find, true, oppositeTagPos);
if (ltTag == -1)
{
if (direction == search2Left)

View File

@ -41,7 +41,7 @@ private:
ScintillaEditView *_pEditView;
int getFirstTokenPosFrom(int targetStart, int targetEnd, const char *token, std::pair<int, int> & foundPos);
int getFirstTokenPosFrom(int targetStart, int targetEnd, const char *token, bool isRegex, std::pair<int, int> & foundPos);
TagCateg getTagCategory(XmlMatchedTagsPos & tagsPos, int curPos);
bool getMatchedTagPos(int searchStart, int searchEnd, const char *tag2find, const char *oppositeTag2find, vector<int> oppositeTagFound, XmlMatchedTagsPos & tagsPos);
bool getXmlMatchedTagsPos(XmlMatchedTagsPos & tagsPos);

View File

@ -228,7 +228,7 @@
FavorSizeOrSpeed="1"
OmitFramePointers="false"
WholeProgramOptimization="false"
AdditionalIncludeDirectories="..\src\WinControls\AboutDlg;..\..\scintilla\include;..\include;..\src\WinControls;..\src\WinControls\ImageListSet;..\src\WinControls\OpenSaveFileDialog;..\src\WinControls\SplitterContainer;..\src\WinControls\StaticDialog;..\src\WinControls\TabBar;..\src\WinControls\ToolBar;..\src\MISC\Process;..\src\ScitillaComponent;..\src\MISC;..\src\MISC\SysMsg;..\src\WinControls\StatusBar;..\src;..\src\WinControls\StaticDialog\RunDlg;..\src\tinyxml;..\src\WinControls\ColourPicker;..\src\Win32Explr;..\src\MISC\RegExt;..\src\WinControls\TrayIcon;..\src\WinControls\shortcut;..\src\WinControls\Grid;..\src\WinControls\ContextMenu;..\src\MISC\PluginsManager;..\src\WinControls\Preference;..\src\WinControls\WindowsDlg;..\src\WinControls\TaskList;..\src\WinControls\DockingWnd;..\src\WinControls\ToolTip;..\src\MISC\Exception;..\src\MISC\Common;..\src\tinyxml\tinyXmlA;..\src\WinControls\AnsiCharPanel;..\src\WinControls\ClipboardHistory;..\src\WinControls\FindCharsInRange;..\src\WinControls\VerticalFileSwitcher;..\src\WinControls\ProjectPanel"
AdditionalIncludeDirectories="..\src\WinControls\AboutDlg;..\..\scintilla\include;..\include;..\src\WinControls;..\src\WinControls\ImageListSet;..\src\WinControls\OpenSaveFileDialog;..\src\WinControls\SplitterContainer;..\src\WinControls\StaticDialog;..\src\WinControls\TabBar;..\src\WinControls\ToolBar;..\src\MISC\Process;..\src\ScitillaComponent;..\src\MISC;..\src\MISC\SysMsg;..\src\WinControls\StatusBar;..\src;..\src\WinControls\StaticDialog\RunDlg;..\src\tinyxml;..\src\WinControls\ColourPicker;..\src\Win32Explr;..\src\MISC\RegExt;..\src\WinControls\TrayIcon;..\src\WinControls\shortcut;..\src\WinControls\Grid;..\src\WinControls\ContextMenu;..\src\MISC\PluginsManager;..\src\WinControls\Preference;..\src\WinControls\WindowsDlg;..\src\WinControls\TaskList;..\src\WinControls\DockingWnd;..\src\WinControls\ToolTip;..\src\MISC\Exception;..\src\MISC\Common;..\src\tinyxml\tinyXmlA;..\src\WinControls\AnsiCharPanel;..\src\WinControls\ClipboardHistory;..\src\WinControls\FindCharsInRange;..\src\WinControls\VerticalFileSwitcher;..\src\WinControls\ProjectPanel;..\src\WinControls\DocumentMap"
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USE_64BIT_TIME_T;TIXML_USE_STL;TIXMLA_USE_STL;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;_CRT_NON_CONFORMING_SWPRINTFS=1"
GeneratePreprocessedFile="0"
StringPooling="true"

View File

@ -0,0 +1,94 @@
#ifndef ANSIDOCUMENTITERATOR_H_12481491281240
#define ANSIDOCUMENTITERATOR_H_12481491281240
class AnsiDocumentIterator : public std::iterator<std::bidirectional_iterator_tag, char>
{
public:
AnsiDocumentIterator() :
m_doc(0),
m_pos(0),
m_end(0)
{
}
AnsiDocumentIterator(Document* doc, int pos, int end) :
m_doc(doc),
m_pos(pos),
m_end(end)
{
// Check for debug builds
PLATFORM_ASSERT(m_pos <= m_end);
// Ensure for release.
if (m_pos > m_end)
{
m_pos = m_end;
}
}
AnsiDocumentIterator(const AnsiDocumentIterator& copy) :
m_doc(copy.m_doc),
m_pos(copy.m_pos),
m_end(copy.m_end)
{
// Check for debug builds
PLATFORM_ASSERT(m_pos <= m_end);
// Ensure for release.
if (m_pos > m_end)
{
m_pos = m_end;
}
}
bool operator == (const AnsiDocumentIterator& other) const
{
return (ended() == other.ended()) && (m_doc == other.m_doc) && (m_pos == other.m_pos);
}
bool operator != (const AnsiDocumentIterator& other) const
{
return !(*this == other);
}
char operator * () const
{
return charAt(m_pos);
}
AnsiDocumentIterator& operator ++ ()
{
PLATFORM_ASSERT(m_pos < m_end);
m_pos++;
return *this;
}
AnsiDocumentIterator& operator -- ()
{
m_pos--;
return *this;
}
int pos() const
{
return m_pos;
}
private:
char charAt(int position) const
{
return m_doc->CharAt(position);
}
bool ended() const
{
return m_pos == m_end;
}
int m_pos;
int m_end;
Document* m_doc;
};
#endif

View File

@ -0,0 +1,323 @@
/**
* Copyright (c) since 2009 Simon Steele - http://untidy.net/
* Based on the work of Simon Steele for Programmer's Notepad 2 (http://untidy.net)
* Converted from boost::xpressive to boost::regex and performance improvements
* (principally caching the compiled regex), and support for UTF8 encoded text
* (c) 2012 Dave Brotherstone - Changes for boost::regex
*
*
*/
#include <stdlib.h>
#include <iterator>
#include "scintilla.h"
#include "Platform.h"
#include "SplitVector.h"
#include "Partitioning.h"
#include "RunStyles.h"
#include "CellBuffer.h"
#include "CharClassify.h"
#include "Decoration.h"
#include "ILexer.h"
#include "Document.h"
#include "UniConversion.h"
#include "UTF8DocumentIterator.h"
#include "AnsiDocumentIterator.h"
#include "BoostRegexSearch.h"
#include <boost/regex.hpp>
#define CP_UTF8 65001
#define SC_CP_UTF8 65001
#ifdef SCI_NAMESPACE
using namespace Scintilla;
#endif
using namespace boost;
typedef basic_regex<char> charregex_t;
typedef boost::wregex wcharregex_t;
// , std::vector<boost::sub_match<DocumentIterator> >::allocator_type
typedef match_results<UTF8DocumentIterator> utf8match_t;
typedef match_results<AnsiDocumentIterator> ansimatch_t;
class BoostRegexSearch : public RegexSearchBase
{
public:
BoostRegexSearch() : substituted(NULL), lastCompileFlags(-1) {}
virtual ~BoostRegexSearch()
{
if (substituted)
{
delete [] substituted;
substituted = NULL;
}
}
virtual long FindText(Document* doc, int minPos, int maxPos, const char *s,
bool caseSensitive, bool word, bool wordStart, int flags, int *length);
virtual const char *SubstituteByPosition(Document* doc, const char *text, int *length);
private:
wchar_t *utf8ToWchar(const char *utf8);
char *wcharToUtf8(const wchar_t *w);
charregex_t m_charre;
wcharregex_t m_wcharre;
utf8match_t m_utf8match;
ansimatch_t m_ansimatch;
char *substituted;
std::string m_lastRegexString;
std::string m_lastRegexUtf8string;
int lastCompileFlags;
};
#ifdef SCI_NAMESPACE
namespace Scintilla
{
#endif
RegexSearchBase *CreateRegexSearch(CharClassify* /* charClassTable */)
{
return new BoostRegexSearch();
}
#ifdef SCI_NAMESPACE
}
#endif
/**
* Find text in document, supporting both forward and backward
* searches (just pass minPos > maxPos to do a backward search)
*/
long BoostRegexSearch::FindText(Document* doc, int minPos, int maxPos, const char *s,
bool caseSensitive, bool /*word*/, bool /*wordStart*/, int searchFlags, int *length)
{
int startPos, endPos, increment;
if (minPos > maxPos)
{
startPos = maxPos;
endPos = minPos;
increment = -1;
}
else
{
startPos = minPos;
endPos = maxPos;
increment = 1;
}
// Range endpoints should not be inside DBCS characters, but just in case, move them.
startPos = doc->MovePositionOutsideChar(startPos, 1, false);
endPos = doc->MovePositionOutsideChar(endPos, 1, false);
int compileFlags(regex_constants::ECMAScript);
if (!caseSensitive)
{
compileFlags |= regex_constants::icase;
}
bool isUtf8 = (doc->CodePage() == SC_CP_UTF8);
try
{
if (compileFlags != lastCompileFlags
|| (isUtf8 && m_lastRegexUtf8string != s)
|| (!isUtf8 && m_lastRegexString != s)) // Test to see if we're called with the same
// regex as last time, if we are, then we don't need to recompile it
{
if (isUtf8)
{
const wchar_t* wchars = utf8ToWchar(s);
m_wcharre = wcharregex_t(wchars, static_cast<regex_constants::syntax_option_type>(compileFlags));
delete [] wchars;
m_lastRegexUtf8string = s;
}
else
{ // Ansi
m_charre = charregex_t(s, static_cast<regex_constants::syntax_option_type>(compileFlags));
m_lastRegexString = s;
}
lastCompileFlags = compileFlags;
}
}
catch(regex_error& /*ex*/)
{
// -1 is normally used for not found, -2 is used here for invalid regex
return -2;
}
// Work out the range of lines we're searching across, moving beyond an empty end-of-line
int lineRangeStart = doc->LineFromPosition(startPos);
int lineRangeEnd = doc->LineFromPosition(endPos);
if ((increment == 1) &&
(startPos >= doc->LineEnd(lineRangeStart)) &&
(lineRangeStart < lineRangeEnd))
{
// the start position is at end of line or between line end characters.
lineRangeStart++;
startPos = doc->LineStart(lineRangeStart);
}
regex_constants::match_flag_type flags(regex_constants::match_default);
// Work out the flags:
if (startPos != doc->LineStart(lineRangeStart))
{
flags |= regex_constants::match_not_bol;
}
if (endPos != doc->LineEnd(lineRangeEnd))
{
flags |= regex_constants::match_not_eol;
}
if (0 == (searchFlags & SCFIND_REGEXP_DOTMATCHESNL))
{
flags |= regex_constants::match_not_dot_newline;
}
int pos(-1);
int lenRet(0);
if (doc->CodePage() == SC_CP_UTF8)
{
UTF8DocumentIterator end(doc, endPos, endPos);
bool success = boost::regex_search(UTF8DocumentIterator(doc, startPos, endPos), end, m_utf8match, m_wcharre, flags);
if (success)
{
pos = m_utf8match[0].first.pos();
lenRet = m_utf8match[0].second.pos() - pos;
if (increment == -1)
{
// Check for the last match on this line.
int repetitions = 100; // Break out of infinite loop
int previousPos = pos;
while (success && ((pos + lenRet) <= endPos))
{
if (previousPos >= pos && 0 >= (--repetitions))
break;
previousPos = pos;
success = regex_search(UTF8DocumentIterator(doc, pos + 1, endPos), end, m_utf8match, m_wcharre, flags);
// success = regex_search(DocumentIterator(doc, pos + 1, endPos), end, match, re, static_cast<regex_constants::match_flag_type>(flags));
if (success)
{
if ((pos + lenRet) <= minPos)
{
pos = m_utf8match[0].first.pos();
lenRet = m_utf8match[0].second.pos() - pos;
}
else
{
success = 0;
}
}
}
}
*length = lenRet;
}
}
else
{
AnsiDocumentIterator end(doc, endPos, endPos);
bool success = boost::regex_search(AnsiDocumentIterator(doc, startPos, endPos), end, m_ansimatch, m_charre, flags);
if (success)
{
pos = m_ansimatch[0].first.pos();
lenRet = m_ansimatch.length();
if (increment == -1)
{
// Check for the last match on this line.
int repetitions = 100; // Break out of infinite loop
int previousPos = pos;
while (success && ((pos + lenRet) <= endPos))
{
if (previousPos >= pos && 0 >= (--repetitions))
break;
previousPos = pos;
success = regex_search(AnsiDocumentIterator(doc, pos + 1, endPos), end, m_ansimatch, m_charre, flags);
// success = regex_search(DocumentIterator(doc, pos + 1, endPos), end, match, re, static_cast<regex_constants::match_flag_type>(flags));
if (success)
{
if ((pos + lenRet) <= minPos)
{
pos = m_ansimatch[0].first.pos();
lenRet = m_ansimatch[0].length();
}
else
{
success = 0;
}
}
}
}
*length = lenRet;
}
}
return pos;
}
const char *BoostRegexSearch::SubstituteByPosition(Document* doc, const char *text, int *length) {
delete []substituted;
substituted = NULL;
if (doc->CodePage() == SC_CP_UTF8)
{
const wchar_t* wtext = utf8ToWchar(text);
std::wstring replaced = m_utf8match.format(wtext, boost::format_all);
delete[] wtext;
substituted = wcharToUtf8(replaced.c_str());
*length = strlen(substituted);
}
else
{
std::string replaced = m_ansimatch.format(text, boost::format_all);
*length = replaced.size();
substituted = new char[*length + 1];
strcpy(substituted, replaced.c_str());
}
return substituted;
}
wchar_t *BoostRegexSearch::utf8ToWchar(const char *utf8)
{
int utf8Size = strlen(utf8);
int wcharSize = UTF16Length(utf8, utf8Size);
wchar_t *w = new wchar_t[wcharSize + 1];
UTF16FromUTF8(utf8, utf8Size, w, wcharSize + 1);
w[wcharSize] = 0;
return w;
}
char* BoostRegexSearch::wcharToUtf8(const wchar_t *w)
{
int wcharSize = wcslen(w);
int charSize = UTF8Length(w, wcharSize);
char *c = new char[charSize + 1];
UTF8FromUTF16(w, wcharSize, c, charSize);
c[charSize] = 0;
return c;
}

View File

@ -0,0 +1,88 @@
@ECHO OFF
:: Perform the pre-steps to build boost and set the boost path for the build file
:: TODO - Shift stuff to get the boost path / check/display usage
SET BOOSTPATH=%1
IF NOT EXIST "%BOOSTPATH%\boost\regex.hpp" (
GOTO BOOSTNOTFOUND
)
IF NOT EXIST "%BOOSTPATH%\bjam\bin\bjam.exe" (
ECHO Building BJAM, the boost build tool
PUSHD %BOOSTPATH%\tools\build\v2
CALL bootstrap.bat
%BOOSTPATH%\tools\build\v2\b2 --prefix=%BOOSTPATH%\bjam install
)
IF NOT ERRORLEVEL 0 (
GOTO BUILDERROR
)
ECHO.
ECHO Building Boost::regex
CD %BOOSTPATH%\libs\regex\build
%BOOSTPATH%\bjam\bin\bjam variant=release threading=multi link=static runtime-link=static
IF NOT ERRORLEVEL 0 (
GOTO BUILDERROR
)
%BOOSTPATH%\bjam\bin\bjam variant=debug threading=multi link=static runtime-link=static
IF NOT ERRORLEVEL 0 (
GOTO BUILDERROR
)
IF EXIST "%BOOSTPATH%\bin.v2\libs\regex\build\msvc-8.0" (
SET BOOSTVSVERPATH=%BOOSTPATH%\bin.v2\libs\regex\build\msvc-8.0
) ELSE IF EXIST "%BOOSTPATH%\bin.v2\libs\regex\build\msvc-9.0" (
SET BOOSTVSVERPATH=%BOOSTPATH%\bin.v2\libs\regex\build\msvc-9.0
) ELSE IF EXIST "%BOOSTPATH%\bin.v2\libs\regex\build\msvc-10.0" (
SET BOOSTVSVERPATH=%BOOSTPATH%\bin.v2\libs\regex\build\msvc-10.0
)
ECHO # Autogenerated file, run BuildBoost.bat [path_to_boost] to generate > %~dp0%\boostpath.mak
ECHO BOOSTPATH=%BOOSTPATH% >> %~dp0%\boostpath.mak
ECHO BOOSTVSVERPATH=%BOOSTVSVERPATH% >> %~dp0%\boostpath.mak
POPD
ECHO Boost::regex built.
ECHO.
ECHO Now you need to build scintilla.
ECHO First, edit the scintilla\win32\scintilla.mak, and make sure there's a line in there that says
ECHO.
ECHO !INCLUDE ../boostregex/nppSpecifics.mak
ECHO.
ECHO just after all the LOBJS have been defined (it's around line 211)
ECHO If not, add it in.
ECHO Then, from the scintilla\win32 directory
ECHO.
ECHO nmake -f scintilla.mak
ECHO.
ECHO.
GOTO EOF
:BOOSTNOTFOUND
ECHO Boost Path not valid. Run BuildBoost.bat with the directory where you unpacked your boost zip
ECHO Boost is available free from www.boost.org
ECHO.
ECHO e.g.
ECHO buildboost.bat d:\libs\boost_1_48_0
GOTO EOF
:BUILDERROR
ECHO There was an error building boost. Please see the messages above for details.
ECHO - Have you got a clean extract from a recent boost version, such as 1.48?
ECHO - Download a fresh copy from www.boost.org and extract it to a directory,
ECHO and run the batch again with the name of that directory
:EOF

View File

@ -0,0 +1,57 @@
#include "UTF8DocumentIterator.h"
void UTF8DocumentIterator::readCharacter()
{
unsigned char currentChar = m_doc->CharAt(m_pos);
if (currentChar & 0x80)
{
int mask = 0x40;
int nBytes = 1;
do
{
mask >>= 1;
++nBytes;
} while (currentChar & mask);
int result = currentChar & m_firstByteMask[nBytes];
int pos = m_pos;
m_utf8Length = 1;
// work out the unicode point, and count the actual bytes.
// If a byte does not start with 10xxxxxx then it's not part of the
// the code. Therefore invalid UTF-8 encodings are dealt with, simply by stopping when
// the UTF8 extension bytes are no longer valid.
while ((--nBytes) && (pos < m_end) && (0x80 == ((currentChar = m_doc->CharAt(++pos)) & 0xC0)))
{
result = (result << 6) | (currentChar & 0x3F);
++m_utf8Length;
}
if (result >= 0x10000)
{
result -= 0x10000;
m_utf16Length = 2;
// UTF-16 Pair
m_character[0] = 0xD800 + (result >> 10);
m_character[1] = 0xDC00 + (result & 0x3FF);
}
else
{
m_utf16Length = 1;
m_character[0] = static_cast<wchar_t>(result);
}
}
else
{
m_utf8Length = 1;
m_utf16Length = 1;
m_characterIndex = 0;
m_character[0] = static_cast<wchar_t>(currentChar);
}
}
const unsigned char UTF8DocumentIterator::m_firstByteMask[7] = { 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };

View File

@ -0,0 +1,149 @@
#ifndef UTF8DOCUMENTITERATOR_H_3452843291318441149
#define UTF8DOCUMENTITERATOR_H_3452843291318441149
#include <stdlib.h>
#include <iterator>
#include "Platform.h"
#include "SplitVector.h"
#include "Partitioning.h"
#include "RunStyles.h"
#include "CellBuffer.h"
#include "CharClassify.h"
#include "Decoration.h"
#include <ILexer.h>
#include <Document.h>
class UTF8DocumentIterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t>
{
public:
UTF8DocumentIterator() :
m_doc(0),
m_pos(0),
m_end(0),
m_characterIndex(0),
m_utf8Length(0),
m_utf16Length(0)
{
}
UTF8DocumentIterator(Document* doc, int pos, int end) :
m_doc(doc),
m_pos(pos),
m_end(end),
m_characterIndex(0)
{
// Check for debug builds
PLATFORM_ASSERT(m_pos <= m_end);
// Ensure for release.
if (m_pos > m_end)
{
m_pos = m_end;
}
readCharacter();
}
UTF8DocumentIterator(const UTF8DocumentIterator& copy) :
m_doc(copy.m_doc),
m_pos(copy.m_pos),
m_end(copy.m_end),
m_characterIndex(copy.m_characterIndex),
m_utf8Length(copy.m_utf8Length),
m_utf16Length(copy.m_utf16Length)
{
// Check for debug builds
PLATFORM_ASSERT(m_pos <= m_end);
m_character[0] = copy.m_character[0];
m_character[1] = copy.m_character[1];
// Ensure for release.
if (m_pos > m_end)
{
m_pos = m_end;
}
}
bool operator == (const UTF8DocumentIterator& other) const
{
return (ended() == other.ended()) && (m_doc == other.m_doc) && (m_pos == other.m_pos);
}
bool operator != (const UTF8DocumentIterator& other) const
{
return !(*this == other);
}
wchar_t operator * () const
{
return m_character[m_characterIndex];
}
UTF8DocumentIterator& operator = (int other)
{
m_pos = other;
return *this;
}
UTF8DocumentIterator& operator ++ ()
{
PLATFORM_ASSERT(m_pos < m_end);
if (2 == m_utf16Length && 0 == m_characterIndex)
{
m_characterIndex = 1;
}
else
{
m_pos += m_utf8Length;
m_characterIndex = 0;
readCharacter();
}
return *this;
}
UTF8DocumentIterator& operator -- ()
{
if (m_utf16Length == 2 && m_characterIndex == 1)
{
m_characterIndex = 0;
}
else
{
--m_pos;
// Skip past the UTF-8 extension bytes
while (0x80 == (m_doc->CharAt(m_pos) & 0xC0))
--m_pos;
readCharacter();
if (m_utf16Length == 2)
{
m_characterIndex = 1;
}
}
return *this;
}
int pos() const
{
return m_pos;
}
private:
void readCharacter();
bool ended() const
{
return m_pos == m_end;
}
int m_pos;
wchar_t m_character[2];
int m_characterIndex;
int m_end;
int m_utf8Length;
int m_utf16Length;
Document* m_doc;
static const unsigned char m_firstByteMask[];
};
#endif // UTF8DOCUMENTITERATOR_H_3452843291318441149

View File

@ -0,0 +1,44 @@
# This makefile should be included in the main scintilla.mak file,
# just after where LOBJS is defined (around line
#
# The following line should be added around line 211 of scintilla.mak
# !INCLUDE nppSpecifics.mak
# Set your boost path (the root of where you've unpacked your boost zip).
# Boost is available from www.boost.org
!IF EXIST(..\boostregex\boostpath.mak)
!INCLUDE ..\boostregex\boostpath.mak
SOBJS=\
$(SOBJS)\
$(DIR_O)\BoostRegexSearch.obj\
$(DIR_O)\UTF8DocumentIterator.obj
LOBJS=\
$(LOBJS)\
$(DIR_O)\BoostRegexSearch.obj\
$(DIR_O)\UTF8DocumentIterator.obj
INCLUDEDIRS=$(INCLUDEDIRS) -I$(BOOSTPATH)
CXXFLAGS=$(CXXFLAGS) -DSCI_OWNREGEX
!IFDEF DEBUG
LDFLAGS=$(LDFLAGS) -LIBPATH:$(BOOSTVSVERPATH)\debug\link-static\runtime-link-static\threading-multi
!ELSE
LDFLAGS=$(LDFLAGS) -LIBPATH:$(BOOSTVSVERPATH)\release\link-static\runtime-link-static\threading-multi
!ENDIF
$(DIR_O)\UTF8DocumentIterator.obj:: ../boostregex/UTF8DocumentIterator.cxx
$(CC) $(CXXFLAGS) -c ../boostregex/UTF8DocumentIterator.cxx
$(DIR_O)\BoostRegexSearch.obj:: ../boostregex/BoostRegexSearch.cxx ../src/CharClassify.h ../src/RESearch.h
$(CC) $(CXXFLAGS) -c ../boostregex/BoostRegexSearch.cxx
!ENDIF

View File

@ -206,6 +206,8 @@ LOBJS=\
$(DIR_O)\XPM.obj \
$(LEXOBJS)
!INCLUDE ../boostregex/nppSpecifics.mak
$(DIR_O)\ScintRes.res : ScintRes.rc
$(RC) -fo$@ $**