mirror of
https://github.com/doublecmd/doublecmd.git
synced 2026-06-28 10:02:14 +00:00
ADD: Case insensitive text search with different encodings (patch by GrayElf)
This commit is contained in:
parent
54920a611d
commit
a1f65bdd8a
5 changed files with 265 additions and 21 deletions
|
|
@ -393,6 +393,8 @@ type
|
|||
|
||||
function ConvertToUTF8(const sText: AnsiString): UTF8String;
|
||||
function ConvertFromUTF8(const sText: UTF8String): AnsiString;
|
||||
function FindUtf8Text(iStartPos: PtrInt; const sSearchText: UTF8String;
|
||||
bCaseSensitive: Boolean; bSearchBackwards: Boolean): PtrInt;
|
||||
|
||||
function DetectEncoding: TViewerEncoding;
|
||||
|
||||
|
|
@ -3171,6 +3173,81 @@ begin
|
|||
end;
|
||||
end;
|
||||
|
||||
function TViewerControl.FindUtf8Text(iStartPos: PtrInt; const sSearchText: UTF8String;
|
||||
bCaseSensitive: Boolean; bSearchBackwards: Boolean): PtrInt;
|
||||
var
|
||||
SearchTextLength: Integer;
|
||||
sSearchChars: array of UTF8String;
|
||||
pCurrentAddr, pEndAddr: PtrInt;
|
||||
i, charLen: Integer;
|
||||
|
||||
function sPos2(pAdr: PtrInt):Boolean;
|
||||
var
|
||||
curChr:UTF8String;
|
||||
i, charLen: Integer;
|
||||
begin
|
||||
Result := False;
|
||||
for i := 0 to SearchTextLength-1 do
|
||||
begin
|
||||
curChr:=GetNextCharAsUtf8(pAdr,charLen);
|
||||
case bCaseSensitive of
|
||||
False: if UTF8UpperCase(curChr) <> UTF8UpperCase(sSearchChars[i]) then Exit;
|
||||
True : if curChr <> sSearchChars[i] then Exit;
|
||||
end;
|
||||
if charLen>0 then
|
||||
pAdr:=pAdr+charLen
|
||||
else
|
||||
Inc(pAdr);
|
||||
end;
|
||||
Result:=True;
|
||||
end;
|
||||
|
||||
begin
|
||||
Result := PtrInt(-1);
|
||||
SearchTextLength := UTF8Length(sSearchText);
|
||||
if (SearchTextLength <= 0) then
|
||||
Exit;
|
||||
|
||||
setLength(sSearchChars,SearchTextLength);
|
||||
for i:=1 to SearchTextLength do
|
||||
sSearchChars[i-1]:=UTF8Copy(sSearchText,i,1);
|
||||
|
||||
|
||||
pCurrentAddr := iStartPos;
|
||||
pEndAddr := FHighLimit - Length(ConvertFromUTF8(sSearchText));
|
||||
|
||||
if bSearchBackwards and (pCurrentAddr > pEndAddr) then
|
||||
// Move to the first possible position for searching backwards.
|
||||
pCurrentAddr := pEndAddr;
|
||||
|
||||
if (pEndAddr < 0) or (pCurrentAddr < 0) or (pCurrentAddr > pEndAddr) then
|
||||
Exit;
|
||||
|
||||
while True do
|
||||
begin
|
||||
if (pCurrentAddr > pEndAddr) or (pCurrentAddr < 0) then
|
||||
Exit;
|
||||
|
||||
if sPos2(pCurrentAddr) then
|
||||
begin
|
||||
Result := pCurrentAddr;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
case bSearchBackwards of
|
||||
False:
|
||||
begin
|
||||
GetNextCharAsUtf8(pCurrentAddr,charLen);
|
||||
if charLen>0 then
|
||||
pCurrentAddr:=pCurrentAddr+charLen
|
||||
else
|
||||
Inc(pCurrentAddr);
|
||||
end;
|
||||
True : Dec(pCurrentAddr);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure Register;
|
||||
begin
|
||||
RegisterComponents('SeksiCmd', [TViewerControl]);
|
||||
|
|
|
|||
|
|
@ -471,17 +471,20 @@ end;
|
|||
|
||||
procedure TfrmFindDlg.cmbEncodingSelect(Sender: TObject);
|
||||
begin
|
||||
if cmbEncoding.ItemIndex <> cmbEncoding.Items.IndexOf(EncodingAnsi) then
|
||||
begin
|
||||
cbCaseSens.Tag:= Integer(cbCaseSens.Checked);
|
||||
cbCaseSens.Checked:= True;
|
||||
cbCaseSens.Enabled:= False;
|
||||
end
|
||||
else
|
||||
begin
|
||||
cbCaseSens.Checked:= Boolean(cbCaseSens.Tag);
|
||||
cbCaseSens.Enabled:= True;
|
||||
end;
|
||||
if not gUseMmapInSearch then
|
||||
begin
|
||||
if cmbEncoding.ItemIndex <> cmbEncoding.Items.IndexOf(EncodingAnsi) then
|
||||
begin
|
||||
cbCaseSens.Tag:= Integer(cbCaseSens.Checked);
|
||||
cbCaseSens.Checked:= True;
|
||||
cbCaseSens.Enabled:= False;
|
||||
end
|
||||
else
|
||||
begin
|
||||
cbCaseSens.Checked:= Boolean(cbCaseSens.Tag);
|
||||
cbCaseSens.Enabled:= True;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
constructor TfrmFindDlg.Create(TheOwner: TComponent);
|
||||
|
|
|
|||
|
|
@ -1975,8 +1975,8 @@ end;
|
|||
|
||||
procedure TfrmViewer.DoSearch(bQuickSearch: Boolean; bSearchBackwards: Boolean);
|
||||
var
|
||||
PAdr: PChar;
|
||||
iSizeData: Integer;
|
||||
PAdr: PtrInt;
|
||||
iSizeData, charLen: Integer;
|
||||
sSearchTextU: UTF8String;
|
||||
sSearchTextA: AnsiString;
|
||||
begin
|
||||
|
|
@ -2039,13 +2039,12 @@ begin
|
|||
end;
|
||||
|
||||
sSearchTextA:= ViewerControl.ConvertFromUTF8(sSearchTextU);
|
||||
PAdr := PosMem(ViewerControl.GetDataAdr, ViewerControl.FileSize,
|
||||
FLastSearchPos, sSearchTextA,
|
||||
FFindDialog.cbCaseSens.Checked, bSearchBackwards);
|
||||
PAdr := ViewerControl.FindUtf8Text(FLastSearchPos, sSearchTextU,
|
||||
FFindDialog.cbCaseSens.Checked, bSearchBackwards);
|
||||
|
||||
if (PAdr <> Pointer(-1)) then
|
||||
if (PAdr <> PtrInt(-1)) then
|
||||
begin
|
||||
FLastSearchPos := PAdr - ViewerControl.GetDataAdr;
|
||||
FLastSearchPos := PAdr;
|
||||
// Text found, show it in ViewerControl if not visible
|
||||
ViewerControl.MakeVisible(FLastSearchPos);
|
||||
// Select found text.
|
||||
|
|
|
|||
163
src/ufindbyrmr.pas
Normal file
163
src/ufindbyrmr.pas
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
{
|
||||
implementing memory searching with case (any single-byte encoding)
|
||||
and mmap file to memory
|
||||
based on ufindmmap.pas by radek.cervinka@centrum.cz
|
||||
}
|
||||
|
||||
unit uFindByrMr;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
interface
|
||||
|
||||
type
|
||||
TAbortFunction = function: Boolean of object;
|
||||
TRecodeTable = array[0..255] of byte;
|
||||
{en
|
||||
Searches a file for a string using memory mapping.
|
||||
|
||||
@param(sFileName File to search in.)
|
||||
@param(sFindData String to search for.)
|
||||
@param(RecodeTable table for case-insensitive compare)
|
||||
@param(Abort This function is called repeatedly during searching.
|
||||
If it returns @true the search is aborted.)
|
||||
|
||||
@returns(-1 in case of error
|
||||
@br 0 if the string wasn't found
|
||||
@br 1 if the string was found)
|
||||
}
|
||||
function FindMmapBM(const sFileName:String; const sFindData:String; RecodeTable:TRecodeTable;
|
||||
Abort: TAbortFunction):Integer;
|
||||
|
||||
{en
|
||||
Initializes table for recode from different encodings.
|
||||
|
||||
@param(Encoding Name of encoding.)
|
||||
@param(bCaseSensitive If @true the search is case sensitive.)
|
||||
@returns(TRecodeTable array to use in FindMmap)
|
||||
|
||||
}
|
||||
function InitRecodeTable(Encoding:string; bCaseSensitive: Boolean): TRecodeTable;
|
||||
|
||||
implementation
|
||||
uses
|
||||
DCOSUtils,LConvEncoding, LCLProc;
|
||||
type
|
||||
TIntArray = array of Integer;
|
||||
|
||||
function InitRecodeTable(Encoding:string; bCaseSensitive: Boolean): TRecodeTable;
|
||||
var i:byte;
|
||||
c:string;
|
||||
begin
|
||||
for i:=0 to 255 do
|
||||
begin
|
||||
if bCaseSensitive then
|
||||
Result[i]:=i
|
||||
else
|
||||
begin
|
||||
c:=ConvertEncoding(chr(i), Encoding, EncodingUTF8);
|
||||
c:=UTF8UpperCase(c);
|
||||
c:=ConvertEncoding(c, EncodingUTF8, Encoding);
|
||||
if length(c)>0 then Result[i]:=ord(c[1]);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
function FindMmapBM(const sFileName, sFindData:String; RecodeTable:TRecodeTable;
|
||||
Abort: TAbortFunction):Integer;
|
||||
function PosMemBoyerMur(pAdr:PChar; iLength:Integer):Integer;
|
||||
function prefixFunc(s:string):TIntArray;
|
||||
var k,i:Integer;
|
||||
begin
|
||||
SetLength(Result, Length(s)+1);
|
||||
Result[0] := 0;
|
||||
Result[1] := 0;
|
||||
k := 0;
|
||||
for i := 2 to Length(s) do
|
||||
begin
|
||||
while (k > 0) and (s[k+1] <> s[i]) do
|
||||
k := Result[k];
|
||||
if s[k+1] = s[i] then Inc(k);
|
||||
Result[i] := k;
|
||||
end;
|
||||
end;
|
||||
var StopTable:array[0..255] of byte;
|
||||
prefTable,pf1,pf2:TIntArray;
|
||||
i,j,len:Integer;
|
||||
curPos,curCharPos:Integer;
|
||||
encStr,rvrsStr:string;
|
||||
curChar:byte;
|
||||
begin
|
||||
Result:=-1;
|
||||
len:=Length(sFindData);
|
||||
encStr:='';
|
||||
for i:=1 to len do
|
||||
encStr:=encStr+chr(RecodeTable[ord(sFindData[i])]);
|
||||
rvrsStr:='';
|
||||
for i:=len downto 1 do
|
||||
rvrsStr:=rvrsStr+encStr[i];
|
||||
for i:=0 to 255 do
|
||||
StopTable[i]:=0;
|
||||
for i:=len-1 downto 1 do
|
||||
if StopTable[ord(encStr[i])]=0 then
|
||||
StopTable[ord(encStr[i])]:=i;
|
||||
//Calc prefix table
|
||||
pf1:=prefixFunc(encStr);
|
||||
pf2:=prefixFunc(rvrsStr);
|
||||
setLength(prefTable,len+1);
|
||||
for j:=0 to len do
|
||||
prefTable[j]:= len - pf1[len];
|
||||
for i:=1 to len do
|
||||
begin
|
||||
j:= len - pf2[i];
|
||||
if i - pf2[i] < prefTable[j] then
|
||||
prefTable[j]:= i - pf2[i];
|
||||
end;
|
||||
|
||||
curPos:=0;
|
||||
while curPos<=iLength-len do
|
||||
begin
|
||||
curCharPos:=len;
|
||||
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
|
||||
while (curCharPos>0) do
|
||||
begin
|
||||
if (curChar<>byte(encStr[curCharPos])) then break;
|
||||
dec(curCharPos);
|
||||
if curCharPos>0 then
|
||||
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
|
||||
end;
|
||||
if curCharPos=0 then
|
||||
begin//found
|
||||
Result:=curPos;
|
||||
exit;
|
||||
end
|
||||
else
|
||||
begin//shift
|
||||
if curCharPos=len then
|
||||
curPos:=curPos+len-StopTable[curChar]
|
||||
else
|
||||
curPos:=curPos+prefTable[curCharPos];
|
||||
end
|
||||
end;
|
||||
end;
|
||||
|
||||
var
|
||||
fmr : TFileMapRec;
|
||||
begin
|
||||
Result := -1;
|
||||
|
||||
if MapFile(sFileName, fmr) then
|
||||
begin
|
||||
try
|
||||
begin
|
||||
if PosMemBoyerMur(fmr.MappedFile, fmr.FileSize) <> -1 then
|
||||
Result := 1
|
||||
else
|
||||
Result := 0;
|
||||
end;
|
||||
finally
|
||||
UnMapFile(fmr);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end.
|
||||
|
|
@ -29,7 +29,7 @@ unit uFindThread;
|
|||
interface
|
||||
|
||||
uses
|
||||
Classes, StdCtrls, SysUtils, uFindFiles, uFindEx;
|
||||
Classes, StdCtrls, SysUtils, uFindFiles, uFindEx, uFindByrMr;
|
||||
|
||||
type
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ type
|
|||
FSelectedFiles: TStringList;
|
||||
FFileChecks: TFindFileChecks;
|
||||
FLinkTargets: TStringList; // A list of encountered directories (for detecting cycles)
|
||||
|
||||
RecodeTable:TRecodeTable;
|
||||
function CheckFile(const Folder : String; const sr : TSearchRecEx) : Boolean;
|
||||
function CheckDirectory(const CurrentDir, FolderName : String) : Boolean;
|
||||
function FindInFile(const sFileName: UTF8String;
|
||||
|
|
@ -103,6 +103,8 @@ begin
|
|||
|
||||
FindText := ConvertEncoding(FindText, EncodingUTF8, TextEncoding);
|
||||
ReplaceText := ConvertEncoding(ReplaceText, EncodingUTF8, TextEncoding);
|
||||
if IsFindText then
|
||||
RecodeTable:=InitRecodeTable(TextEncoding,CaseSensitive);
|
||||
end;
|
||||
|
||||
SearchTemplateToFindFileChecks(FSearchTemplate, FFileChecks);
|
||||
|
|
@ -233,7 +235,7 @@ begin
|
|||
if gUseMmapInSearch then
|
||||
begin
|
||||
// memory mapping should be slightly faster and use less memory
|
||||
case FindMmap(sFileName, sData, bCase, @IsAborting) of
|
||||
case FindMmapBM(sFileName, sData, RecodeTable, @IsAborting) of
|
||||
0 : Exit(False);
|
||||
1 : Exit(True);
|
||||
// else fall back to searching via stream reading
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue