mirror of
https://github.com/doublecmd/doublecmd.git
synced 2026-06-21 09:58:13 +00:00
ADD: Case insensitive text search in UTF-16 files
This commit is contained in:
parent
9cb56b7300
commit
8a95b25684
5 changed files with 154 additions and 25 deletions
|
|
@ -589,20 +589,6 @@ var
|
|||
AEncoding: String;
|
||||
begin
|
||||
AEncoding:= NormalizeEncoding(cmbEncoding.Text);
|
||||
if (AEncoding = EncodingUCS2LE) or (AEncoding = EncodingUCS2BE) then
|
||||
begin
|
||||
if cbCaseSens.Enabled then
|
||||
begin
|
||||
cbCaseSens.Tag:= Integer(cbCaseSens.Checked);
|
||||
cbCaseSens.Checked:= True;
|
||||
cbCaseSens.Enabled:= False;
|
||||
end;
|
||||
end
|
||||
else if not cbCaseSens.Enabled then
|
||||
begin
|
||||
cbCaseSens.Checked:= Boolean(cbCaseSens.Tag);
|
||||
cbCaseSens.Enabled:= True;
|
||||
end;
|
||||
cbTextRegExp.Enabled := (AEncoding = EncodingAnsi);
|
||||
if not cbTextRegExp.Enabled then cbTextRegExp.Checked:= False;
|
||||
end;
|
||||
|
|
@ -829,7 +815,20 @@ end;
|
|||
|
||||
procedure TfrmFindDlg.cbTextRegExpChange(Sender: TObject);
|
||||
begin
|
||||
if cbTextRegExp.Checked then cbCaseSens.Checked:= False;
|
||||
if cbTextRegExp.Checked then
|
||||
begin
|
||||
if cbCaseSens.Enabled then
|
||||
begin
|
||||
cbCaseSens.Tag:= Integer(cbCaseSens.Checked);
|
||||
cbCaseSens.Checked:= False;
|
||||
cbCaseSens.Enabled:= False;
|
||||
end;
|
||||
end
|
||||
else if not cbCaseSens.Enabled then
|
||||
begin
|
||||
cbCaseSens.Checked:= Boolean(cbCaseSens.Tag);
|
||||
cbCaseSens.Enabled:= True;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TfrmFindDlg.cbSelectedFilesChange(Sender: TObject);
|
||||
|
|
|
|||
|
|
@ -2025,6 +2025,15 @@ begin
|
|||
bTextFound := (PAnsiAddr <> Pointer(-1));
|
||||
if bTextFound then FLastSearchPos := PAnsiAddr - ViewerControl.GetDataAdr;
|
||||
end
|
||||
// Using special case insensitive UTF-16 search algorithm
|
||||
else if (ViewerControl.Encoding in [veUtf16le, veUtf16be, veUcs2le, veUcs2be]) then
|
||||
begin
|
||||
PAnsiAddr := PosMemW(ViewerControl.GetDataAdr, ViewerControl.FileSize,
|
||||
FLastSearchPos, sSearchTextA, bSearchBackwards,
|
||||
ViewerControl.Encoding in [veUtf16le, veUcs2le]);
|
||||
bTextFound := (PAnsiAddr <> Pointer(-1));
|
||||
if bTextFound then FLastSearchPos := PAnsiAddr - ViewerControl.GetDataAdr;
|
||||
end
|
||||
// Using very slow search algorithm
|
||||
else if (ViewerControl.Encoding in ViewerEncodingMultiByte) or bSearchBackwards then
|
||||
begin
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ uses
|
|||
Classes, SysUtils, DCBasicTypes, uFile;
|
||||
|
||||
type
|
||||
TTextSearch = (tsAnsi, tsUtf8, tsOther);
|
||||
TTextSearch = (tsAnsi, tsUtf8, tsUtf16le, tsUtf16be, tsOther);
|
||||
TTimeUnit = (tuSecond, tuMinute, tuHour, tuDay, tuWeek, tuMonth, tuYear);
|
||||
TFileSizeUnit = (suBytes, suKilo, suMega, suGiga, suTera);
|
||||
TPluginOperator = (poEqual, poNotEqual, poMore, poLess, poMoreEqual, poLessEqual,
|
||||
|
|
|
|||
|
|
@ -42,6 +42,9 @@ function PosMem(pDataAddr: PChar; iDataLength, iStartPos: PtrInt; const sSearchT
|
|||
function PosMemU(pDataAddr: PChar; iDataLength, iStartPos: PtrInt;
|
||||
const sSearchText: String; bSearchBackwards: Boolean): Pointer;
|
||||
|
||||
function PosMemW(pDataAddr: PChar; iDataLength, iStartPos: PtrInt;
|
||||
const sSearchText: String; bSearchBackwards, bLittleEndian: Boolean): Pointer;
|
||||
|
||||
{en
|
||||
Searches a file for a string using memory mapping.
|
||||
|
||||
|
|
@ -60,10 +63,12 @@ function FindMmap(const sFileName:String; const sFindData:String; bCase:Boolean;
|
|||
|
||||
function FindMmapU(const sFileName: String; const sFindData: String): Integer;
|
||||
|
||||
function FindMmapW(const sFileName: String; const sFindData: String; bLittleEndian: Boolean): Integer;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
DCOSUtils, UnicodeUtils, LazUTF8, StrUtils;
|
||||
SysUtils, DCOSUtils, UnicodeUtils, LazUTF8, StrUtils, DCStrUtils;
|
||||
|
||||
function PosMem(pDataAddr: PChar; iDataLength, iStartPos: PtrInt; const sSearchText: String;
|
||||
bCaseSensitive: Boolean; bSearchBackwards: Boolean): Pointer;
|
||||
|
|
@ -195,8 +200,86 @@ begin
|
|||
end;
|
||||
end;
|
||||
|
||||
function FindMmap(const sFileName, sFindData:String; bCase:Boolean;
|
||||
Abort: TAbortFunction):Integer;
|
||||
function PosMemW(pDataAddr: PChar; iDataLength, iStartPos: PtrInt;
|
||||
const sSearchText: String; bSearchBackwards, bLittleEndian: Boolean): Pointer;
|
||||
const
|
||||
BUFFER_SIZE = 4096;
|
||||
var
|
||||
iSize: PtrInt;
|
||||
iLength: Integer;
|
||||
iTextPos: Integer;
|
||||
bSwapEndian: Boolean;
|
||||
sTextBuffer: UnicodeString;
|
||||
sLowerCase: UnicodeString;
|
||||
begin
|
||||
Result := Pointer(-1);
|
||||
iLength:= Length(sSearchText);
|
||||
bSwapEndian:= {$IFDEF ENDIAN_BIG}bLittleEndian{$ELSE}not bLittleEndian{$ENDIF};
|
||||
if bSearchBackwards then
|
||||
begin
|
||||
iSize:= iStartPos;
|
||||
if iLength > iSize then Exit;
|
||||
sLowerCase:= PUnicodeChar(Pointer(sSearchText + #0));
|
||||
if bSwapEndian then Utf16SwapEndian(sLowerCase);
|
||||
sLowerCase:= UnicodeLowerCase(sLowerCase);
|
||||
|
||||
// While text size > buffer size
|
||||
while iStartPos > BUFFER_SIZE do
|
||||
begin
|
||||
iStartPos:= iStartPos - BUFFER_SIZE;
|
||||
SetString(sTextBuffer, PUnicodeChar(pDataAddr + iStartPos), BUFFER_SIZE div 2);
|
||||
if bSwapEndian then Utf16SwapEndian(sTextBuffer);
|
||||
sTextBuffer:= UnicodeLowerCase(sTextBuffer);
|
||||
iTextPos:= RPos(sLowerCase, sTextBuffer);
|
||||
if iTextPos > 0 then
|
||||
Exit(pDataAddr + iStartPos + iTextPos * 2 - 2)
|
||||
else begin
|
||||
// Shift text buffer
|
||||
iStartPos:= iStartPos + iLength;
|
||||
end;
|
||||
end;
|
||||
// Process remaining buffer
|
||||
if iLength > iStartPos then Exit;
|
||||
SetString(sTextBuffer, PUnicodeChar(pDataAddr), iStartPos div 2);
|
||||
if bSwapEndian then Utf16SwapEndian(sTextBuffer);
|
||||
sTextBuffer:= UnicodeLowerCase(sTextBuffer);
|
||||
iTextPos:= RPos(sLowerCase, sTextBuffer);
|
||||
if iTextPos > 0 then Result:= pDataAddr + iTextPos * 2 - 2
|
||||
end
|
||||
else begin
|
||||
iSize:= iDataLength - iStartPos;
|
||||
if iLength > iSize then Exit;
|
||||
sLowerCase:= PUnicodeChar(Pointer(sSearchText + #0));
|
||||
if bSwapEndian then Utf16SwapEndian(sLowerCase);
|
||||
sLowerCase:= UnicodeLowerCase(sLowerCase);
|
||||
|
||||
// While text size > buffer size
|
||||
while iSize > BUFFER_SIZE do
|
||||
begin
|
||||
SetString(sTextBuffer, PUnicodeChar(pDataAddr + iStartPos), BUFFER_SIZE div 2);
|
||||
if bSwapEndian then Utf16SwapEndian(sTextBuffer);
|
||||
sTextBuffer:= UnicodeLowerCase(sTextBuffer);
|
||||
iTextPos:= Pos(sLowerCase, sTextBuffer);
|
||||
if iTextPos > 0 then
|
||||
Exit(pDataAddr + iStartPos + iTextPos * 2 - 2)
|
||||
else begin
|
||||
// Shift text buffer
|
||||
iStartPos:= iStartPos + (BUFFER_SIZE - iLength);
|
||||
end;
|
||||
iSize:= iDataLength - iStartPos;
|
||||
end;
|
||||
// Process remaining buffer
|
||||
if iLength > iSize then Exit;
|
||||
SetString(sTextBuffer, PUnicodeChar(pDataAddr + iStartPos), iSize div 2);
|
||||
if bSwapEndian then Utf16SwapEndian(sTextBuffer);
|
||||
sTextBuffer:= UnicodeLowerCase(sTextBuffer);
|
||||
iTextPos:= Pos(sLowerCase, sTextBuffer);
|
||||
if iTextPos > 0 then Result:= pDataAddr + iStartPos + iTextPos * 2 - 2;
|
||||
end;
|
||||
end;
|
||||
|
||||
function FindMmap(const sFileName: String; const sFindData: String;
|
||||
bCase: Boolean; Abort: TAbortFunction): Integer;
|
||||
|
||||
function PosMem(pAdr:PChar; iLength:Integer):Pointer;
|
||||
var
|
||||
|
|
@ -278,4 +361,25 @@ begin
|
|||
end;
|
||||
end;
|
||||
|
||||
function FindMmapW(const sFileName: String; const sFindData: String; bLittleEndian: Boolean): Integer;
|
||||
var
|
||||
fmr : TFileMapRec;
|
||||
begin
|
||||
Result := -1;
|
||||
|
||||
if MapFile(sFileName, fmr) then
|
||||
begin
|
||||
try
|
||||
begin
|
||||
if PosMemW(fmr.MappedFile, fmr.FileSize, 0, sFindData, False, bLittleEndian) <> Pointer(-1) then
|
||||
Result := 1
|
||||
else
|
||||
Result := 0;
|
||||
end;
|
||||
finally
|
||||
UnMapFile(fmr);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
|
|
|||
|
|
@ -119,8 +119,17 @@ begin
|
|||
FTextSearchType := tsAnsi;
|
||||
RecodeTable := InitRecodeTable(TextEncoding, CaseSensitive);
|
||||
end
|
||||
else if (CaseSensitive = False) and ((TextEncoding = EncodingUTF8) or (TextEncoding = EncodingUTF8BOM)) then
|
||||
FTextSearchType:= tsUtf8
|
||||
else if (CaseSensitive = False) then
|
||||
begin
|
||||
if ((TextEncoding = EncodingUTF8) or (TextEncoding = EncodingUTF8BOM)) then
|
||||
FTextSearchType:= tsUtf8
|
||||
else if (TextEncoding = EncodingUCS2LE) then
|
||||
FTextSearchType:= tsUtf16le
|
||||
else if (TextEncoding = EncodingUCS2BE) then
|
||||
FTextSearchType:= tsUtf16be
|
||||
else
|
||||
FTextSearchType:= tsOther;
|
||||
end
|
||||
else begin
|
||||
FTextSearchType:= tsOther;
|
||||
end;
|
||||
|
|
@ -282,9 +291,11 @@ begin
|
|||
begin
|
||||
// Memory mapping should be slightly faster and use less memory
|
||||
case FTextSearchType of
|
||||
tsAnsi: lastPos:= FindMmapBM(sFileName, sData, RecodeTable, @IsAborting);
|
||||
tsUtf8: lastPos:= FindMmapU(sFileName, sData)
|
||||
else lastPos:= FindMmap(sFileName, sData, bCase, @IsAborting);
|
||||
tsAnsi: lastPos:= FindMmapBM(sFileName, sData, RecodeTable, @IsAborting);
|
||||
tsUtf8: lastPos:= FindMmapU(sFileName, sData);
|
||||
tsUtf16le: lastPos:= FindMmapW(sFileName, sData, True);
|
||||
tsUtf16be: lastPos:= FindMmapW(sFileName, sData, False);
|
||||
else lastPos:= FindMmap(sFileName, sData, bCase, @IsAborting);
|
||||
end;
|
||||
case lastPos of
|
||||
0 : Exit(False);
|
||||
|
|
@ -331,7 +342,13 @@ begin
|
|||
begin
|
||||
if PosMemU(@Buffer[0], DataRead + sDataLength - 1, 0, sData, False) <> Pointer(-1) then
|
||||
Exit(True);
|
||||
end
|
||||
end;
|
||||
tsUtf16le,
|
||||
tsUtf16be:
|
||||
begin
|
||||
if PosMemW(@Buffer[0], DataRead + sDataLength - 1, 0, sData, False, FTextSearchType = tsUtf16le) <> Pointer(-1) then
|
||||
Exit(True);
|
||||
end;
|
||||
else
|
||||
begin
|
||||
if PosMem(@Buffer[0], DataRead + sDataLength - 1, 0, sData, bCase, False) <> Pointer(-1) then
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue