ADD: Case insensitive text search with different encodings (patch by GrayElf)

This commit is contained in:
Alexander Koblov 2014-01-19 14:17:12 +00:00
commit a1f65bdd8a
5 changed files with 265 additions and 21 deletions

View file

@ -393,6 +393,8 @@ type
function ConvertToUTF8(const sText: AnsiString): UTF8String;
function ConvertFromUTF8(const sText: UTF8String): AnsiString;
function FindUtf8Text(iStartPos: PtrInt; const sSearchText: UTF8String;
bCaseSensitive: Boolean; bSearchBackwards: Boolean): PtrInt;
function DetectEncoding: TViewerEncoding;
@ -3171,6 +3173,81 @@ begin
end;
end;
function TViewerControl.FindUtf8Text(iStartPos: PtrInt; const sSearchText: UTF8String;
bCaseSensitive: Boolean; bSearchBackwards: Boolean): PtrInt;
var
SearchTextLength: Integer;
sSearchChars: array of UTF8String;
pCurrentAddr, pEndAddr: PtrInt;
i, charLen: Integer;
function sPos2(pAdr: PtrInt):Boolean;
var
curChr:UTF8String;
i, charLen: Integer;
begin
Result := False;
for i := 0 to SearchTextLength-1 do
begin
curChr:=GetNextCharAsUtf8(pAdr,charLen);
case bCaseSensitive of
False: if UTF8UpperCase(curChr) <> UTF8UpperCase(sSearchChars[i]) then Exit;
True : if curChr <> sSearchChars[i] then Exit;
end;
if charLen>0 then
pAdr:=pAdr+charLen
else
Inc(pAdr);
end;
Result:=True;
end;
begin
Result := PtrInt(-1);
SearchTextLength := UTF8Length(sSearchText);
if (SearchTextLength <= 0) then
Exit;
setLength(sSearchChars,SearchTextLength);
for i:=1 to SearchTextLength do
sSearchChars[i-1]:=UTF8Copy(sSearchText,i,1);
pCurrentAddr := iStartPos;
pEndAddr := FHighLimit - Length(ConvertFromUTF8(sSearchText));
if bSearchBackwards and (pCurrentAddr > pEndAddr) then
// Move to the first possible position for searching backwards.
pCurrentAddr := pEndAddr;
if (pEndAddr < 0) or (pCurrentAddr < 0) or (pCurrentAddr > pEndAddr) then
Exit;
while True do
begin
if (pCurrentAddr > pEndAddr) or (pCurrentAddr < 0) then
Exit;
if sPos2(pCurrentAddr) then
begin
Result := pCurrentAddr;
Exit;
end;
case bSearchBackwards of
False:
begin
GetNextCharAsUtf8(pCurrentAddr,charLen);
if charLen>0 then
pCurrentAddr:=pCurrentAddr+charLen
else
Inc(pCurrentAddr);
end;
True : Dec(pCurrentAddr);
end;
end;
end;
procedure Register;
begin
RegisterComponents('SeksiCmd', [TViewerControl]);

View file

@ -471,17 +471,20 @@ end;
procedure TfrmFindDlg.cmbEncodingSelect(Sender: TObject);
begin
if cmbEncoding.ItemIndex <> cmbEncoding.Items.IndexOf(EncodingAnsi) then
begin
cbCaseSens.Tag:= Integer(cbCaseSens.Checked);
cbCaseSens.Checked:= True;
cbCaseSens.Enabled:= False;
end
else
begin
cbCaseSens.Checked:= Boolean(cbCaseSens.Tag);
cbCaseSens.Enabled:= True;
end;
if not gUseMmapInSearch then
begin
if cmbEncoding.ItemIndex <> cmbEncoding.Items.IndexOf(EncodingAnsi) then
begin
cbCaseSens.Tag:= Integer(cbCaseSens.Checked);
cbCaseSens.Checked:= True;
cbCaseSens.Enabled:= False;
end
else
begin
cbCaseSens.Checked:= Boolean(cbCaseSens.Tag);
cbCaseSens.Enabled:= True;
end;
end;
end;
constructor TfrmFindDlg.Create(TheOwner: TComponent);

View file

@ -1975,8 +1975,8 @@ end;
procedure TfrmViewer.DoSearch(bQuickSearch: Boolean; bSearchBackwards: Boolean);
var
PAdr: PChar;
iSizeData: Integer;
PAdr: PtrInt;
iSizeData, charLen: Integer;
sSearchTextU: UTF8String;
sSearchTextA: AnsiString;
begin
@ -2039,13 +2039,12 @@ begin
end;
sSearchTextA:= ViewerControl.ConvertFromUTF8(sSearchTextU);
PAdr := PosMem(ViewerControl.GetDataAdr, ViewerControl.FileSize,
FLastSearchPos, sSearchTextA,
FFindDialog.cbCaseSens.Checked, bSearchBackwards);
PAdr := ViewerControl.FindUtf8Text(FLastSearchPos, sSearchTextU,
FFindDialog.cbCaseSens.Checked, bSearchBackwards);
if (PAdr <> Pointer(-1)) then
if (PAdr <> PtrInt(-1)) then
begin
FLastSearchPos := PAdr - ViewerControl.GetDataAdr;
FLastSearchPos := PAdr;
// Text found, show it in ViewerControl if not visible
ViewerControl.MakeVisible(FLastSearchPos);
// Select found text.

163
src/ufindbyrmr.pas Normal file
View file

@ -0,0 +1,163 @@
{
implementing memory searching with case (any single-byte encoding)
and mmap file to memory
based on ufindmmap.pas by radek.cervinka@centrum.cz
}
unit uFindByrMr;
{$mode objfpc}{$H+}
interface
type
TAbortFunction = function: Boolean of object;
TRecodeTable = array[0..255] of byte;
{en
Searches a file for a string using memory mapping.
@param(sFileName File to search in.)
@param(sFindData String to search for.)
@param(RecodeTable table for case-insensitive compare)
@param(Abort This function is called repeatedly during searching.
If it returns @true the search is aborted.)
@returns(-1 in case of error
@br 0 if the string wasn't found
@br 1 if the string was found)
}
function FindMmapBM(const sFileName:String; const sFindData:String; RecodeTable:TRecodeTable;
Abort: TAbortFunction):Integer;
{en
Initializes table for recode from different encodings.
@param(Encoding Name of encoding.)
@param(bCaseSensitive If @true the search is case sensitive.)
@returns(TRecodeTable array to use in FindMmap)
}
function InitRecodeTable(Encoding:string; bCaseSensitive: Boolean): TRecodeTable;
implementation
uses
DCOSUtils,LConvEncoding, LCLProc;
type
TIntArray = array of Integer;
function InitRecodeTable(Encoding:string; bCaseSensitive: Boolean): TRecodeTable;
var i:byte;
c:string;
begin
for i:=0 to 255 do
begin
if bCaseSensitive then
Result[i]:=i
else
begin
c:=ConvertEncoding(chr(i), Encoding, EncodingUTF8);
c:=UTF8UpperCase(c);
c:=ConvertEncoding(c, EncodingUTF8, Encoding);
if length(c)>0 then Result[i]:=ord(c[1]);
end;
end;
end;
function FindMmapBM(const sFileName, sFindData:String; RecodeTable:TRecodeTable;
Abort: TAbortFunction):Integer;
function PosMemBoyerMur(pAdr:PChar; iLength:Integer):Integer;
function prefixFunc(s:string):TIntArray;
var k,i:Integer;
begin
SetLength(Result, Length(s)+1);
Result[0] := 0;
Result[1] := 0;
k := 0;
for i := 2 to Length(s) do
begin
while (k > 0) and (s[k+1] <> s[i]) do
k := Result[k];
if s[k+1] = s[i] then Inc(k);
Result[i] := k;
end;
end;
var StopTable:array[0..255] of byte;
prefTable,pf1,pf2:TIntArray;
i,j,len:Integer;
curPos,curCharPos:Integer;
encStr,rvrsStr:string;
curChar:byte;
begin
Result:=-1;
len:=Length(sFindData);
encStr:='';
for i:=1 to len do
encStr:=encStr+chr(RecodeTable[ord(sFindData[i])]);
rvrsStr:='';
for i:=len downto 1 do
rvrsStr:=rvrsStr+encStr[i];
for i:=0 to 255 do
StopTable[i]:=0;
for i:=len-1 downto 1 do
if StopTable[ord(encStr[i])]=0 then
StopTable[ord(encStr[i])]:=i;
//Calc prefix table
pf1:=prefixFunc(encStr);
pf2:=prefixFunc(rvrsStr);
setLength(prefTable,len+1);
for j:=0 to len do
prefTable[j]:= len - pf1[len];
for i:=1 to len do
begin
j:= len - pf2[i];
if i - pf2[i] < prefTable[j] then
prefTable[j]:= i - pf2[i];
end;
curPos:=0;
while curPos<=iLength-len do
begin
curCharPos:=len;
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
while (curCharPos>0) do
begin
if (curChar<>byte(encStr[curCharPos])) then break;
dec(curCharPos);
if curCharPos>0 then
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
end;
if curCharPos=0 then
begin//found
Result:=curPos;
exit;
end
else
begin//shift
if curCharPos=len then
curPos:=curPos+len-StopTable[curChar]
else
curPos:=curPos+prefTable[curCharPos];
end
end;
end;
var
fmr : TFileMapRec;
begin
Result := -1;
if MapFile(sFileName, fmr) then
begin
try
begin
if PosMemBoyerMur(fmr.MappedFile, fmr.FileSize) <> -1 then
Result := 1
else
Result := 0;
end;
finally
UnMapFile(fmr);
end;
end;
end;
end.

View file

@ -29,7 +29,7 @@ unit uFindThread;
interface
uses
Classes, StdCtrls, SysUtils, uFindFiles, uFindEx;
Classes, StdCtrls, SysUtils, uFindFiles, uFindEx, uFindByrMr;
type
@ -50,7 +50,7 @@ type
FSelectedFiles: TStringList;
FFileChecks: TFindFileChecks;
FLinkTargets: TStringList; // A list of encountered directories (for detecting cycles)
RecodeTable:TRecodeTable;
function CheckFile(const Folder : String; const sr : TSearchRecEx) : Boolean;
function CheckDirectory(const CurrentDir, FolderName : String) : Boolean;
function FindInFile(const sFileName: UTF8String;
@ -103,6 +103,8 @@ begin
FindText := ConvertEncoding(FindText, EncodingUTF8, TextEncoding);
ReplaceText := ConvertEncoding(ReplaceText, EncodingUTF8, TextEncoding);
if IsFindText then
RecodeTable:=InitRecodeTable(TextEncoding,CaseSensitive);
end;
SearchTemplateToFindFileChecks(FSearchTemplate, FFileChecks);
@ -233,7 +235,7 @@ begin
if gUseMmapInSearch then
begin
// memory mapping should be slightly faster and use less memory
case FindMmap(sFileName, sData, bCase, @IsAborting) of
case FindMmapBM(sFileName, sData, RecodeTable, @IsAborting) of
0 : Exit(False);
1 : Exit(True);
// else fall back to searching via stream reading