UPD: Public PosMemBoyerMur function

This commit is contained in:
Alexander Koblov 2014-06-29 11:20:49 +00:00
commit bfe034fb73

View file

@ -13,6 +13,9 @@ interface
type
TAbortFunction = function: Boolean of object;
TRecodeTable = array[0..255] of byte;
function PosMemBoyerMur(pAdr: PChar; iLength: Integer; const sFindData: String;
RecodeTable: TRecodeTable): Integer;
{en
Searches a file for a string using memory mapping.
@ -26,8 +29,8 @@ type
@br 0 if the string wasn't found
@br 1 if the string was found)
}
function FindMmapBM(const sFileName:String; const sFindData:String; RecodeTable:TRecodeTable;
Abort: TAbortFunction):Integer;
function FindMmapBM(const sFileName: String; const sFindData: String;
RecodeTable: TRecodeTable; Abort: TAbortFunction):Integer;
{en
Initializes table for recode from different encodings.
@ -37,7 +40,7 @@ function FindMmapBM(const sFileName:String; const sFindData:String; RecodeTable:
@returns(TRecodeTable array to use in FindMmap)
}
function InitRecodeTable(Encoding:string; bCaseSensitive: Boolean): TRecodeTable;
function InitRecodeTable(Encoding: String; bCaseSensitive: Boolean): TRecodeTable;
implementation
uses
@ -63,84 +66,87 @@ begin
end;
end;
function FindMmapBM(const sFileName, sFindData:String; RecodeTable:TRecodeTable;
Abort: TAbortFunction):Integer;
function PosMemBoyerMur(pAdr:PChar; iLength:Integer):Integer;
function prefixFunc(s:string):TIntArray;
var k,i:Integer;
begin
SetLength(Result, Length(s)+1);
Result[0] := 0;
Result[1] := 0;
k := 0;
for i := 2 to Length(s) do
begin
while (k > 0) and (s[k+1] <> s[i]) do
k := Result[k];
if s[k+1] = s[i] then Inc(k);
Result[i] := k;
end;
end;
var StopTable:array[0..255] of byte;
prefTable,pf1,pf2:TIntArray;
i,j,len:Integer;
curPos,curCharPos:Integer;
encStr,rvrsStr:string;
curChar:byte;
begin
Result:=-1;
len:=Length(sFindData);
encStr:='';
for i:=1 to len do
encStr:=encStr+chr(RecodeTable[ord(sFindData[i])]);
rvrsStr:='';
for i:=len downto 1 do
rvrsStr:=rvrsStr+encStr[i];
for i:=0 to 255 do
StopTable[i]:=0;
for i:=len-1 downto 1 do
if StopTable[ord(encStr[i])]=0 then
StopTable[ord(encStr[i])]:=i;
//Calc prefix table
pf1:=prefixFunc(encStr);
pf2:=prefixFunc(rvrsStr);
setLength(prefTable,len+1);
for j:=0 to len do
prefTable[j]:= len - pf1[len];
for i:=1 to len do
begin
j:= len - pf2[i];
if i - pf2[i] < prefTable[j] then
prefTable[j]:= i - pf2[i];
end;
function PosMemBoyerMur(pAdr: PChar; iLength: Integer; const sFindData: String;
RecodeTable: TRecodeTable):Integer;
curPos:=0;
while curPos<=iLength-len do
function prefixFunc(s:string):TIntArray;
var k,i:Integer;
begin
SetLength(Result, Length(s)+1);
Result[0] := 0;
Result[1] := 0;
k := 0;
for i := 2 to Length(s) do
begin
curCharPos:=len;
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
while (curCharPos>0) do
begin
if (curChar<>byte(encStr[curCharPos])) then break;
dec(curCharPos);
if curCharPos>0 then
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
end;
if curCharPos=0 then
begin//found
Result:=curPos;
exit;
end
else
begin//shift
if curCharPos=len then
curPos:=curPos+len-StopTable[curChar]
else
curPos:=curPos+prefTable[curCharPos];
end
while (k > 0) and (s[k+1] <> s[i]) do
k := Result[k];
if s[k+1] = s[i] then Inc(k);
Result[i] := k;
end;
end;
var StopTable:array[0..255] of byte;
prefTable,pf1,pf2:TIntArray;
i,j,len:Integer;
curPos,curCharPos:Integer;
encStr,rvrsStr:string;
curChar:byte;
begin
Result:=-1;
len:=Length(sFindData);
encStr:='';
for i:=1 to len do
encStr:=encStr+chr(RecodeTable[ord(sFindData[i])]);
rvrsStr:='';
for i:=len downto 1 do
rvrsStr:=rvrsStr+encStr[i];
for i:=0 to 255 do
StopTable[i]:=0;
for i:=len-1 downto 1 do
if StopTable[ord(encStr[i])]=0 then
StopTable[ord(encStr[i])]:=i;
//Calc prefix table
pf1:=prefixFunc(encStr);
pf2:=prefixFunc(rvrsStr);
setLength(prefTable,len+1);
for j:=0 to len do
prefTable[j]:= len - pf1[len];
for i:=1 to len do
begin
j:= len - pf2[i];
if i - pf2[i] < prefTable[j] then
prefTable[j]:= i - pf2[i];
end;
curPos:=0;
while curPos<=iLength-len do
begin
curCharPos:=len;
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
while (curCharPos>0) do
begin
if (curChar<>byte(encStr[curCharPos])) then break;
dec(curCharPos);
if curCharPos>0 then
curChar:=RecodeTable[ord((pAdr+curPos+curCharPos-1)^)];
end;
if curCharPos=0 then
begin//found
Result:=curPos;
exit;
end
else
begin//shift
if curCharPos=len then
curPos:=curPos+len-StopTable[curChar]
else
curPos:=curPos+prefTable[curCharPos];
end
end;
end;
function FindMmapBM(const sFileName, sFindData: String; RecodeTable: TRecodeTable;
Abort: TAbortFunction):Integer;
var
fmr : TFileMapRec;
begin
@ -150,7 +156,7 @@ begin
begin
try
begin
if PosMemBoyerMur(fmr.MappedFile, fmr.FileSize) <> -1 then
if PosMemBoyerMur(fmr.MappedFile, fmr.FileSize, sFindData, RecodeTable) <> -1 then
Result := 1
else
Result := 0;