ADD: Find duplicate files by hash

This commit is contained in:
Alexander Koblov 2019-12-04 18:03:38 +00:00
commit b6a4a232ec
5 changed files with 88 additions and 8 deletions

View file

@ -931,7 +931,7 @@ object frmFindDlg: TfrmFindDlg
BevelOuter = bvNone
ChildSizing.HorizontalSpacing = 8
ChildSizing.Layout = cclLeftToRightThenTopToBottom
ChildSizing.ControlsPerLine = 3
ChildSizing.ControlsPerLine = 4
ClientHeight = 19
ClientWidth = 259
Enabled = False
@ -954,6 +954,15 @@ object frmFindDlg: TfrmFindDlg
OnChange = chkDuplicateSizeChange
TabOrder = 1
end
object chkDuplicateHash: TCheckBox
Left = 89
Height = 19
Top = 0
Width = 70
Caption = 'same hash'
OnChange = chkDuplicateHashChange
TabOrder = 2
end
object chkDuplicateContent: TCheckBox
Left = 167
Height = 19
@ -961,7 +970,7 @@ object frmFindDlg: TfrmFindDlg
Width = 92
Caption = 'same content'
OnChange = chkDuplicateContentChange
TabOrder = 2
TabOrder = 3
end
end
object Bevel3: TBevel

View file

@ -103,6 +103,7 @@ type
cbOpenedTabs: TCheckBox;
chkDuplicateContent: TCheckBox;
chkDuplicateSize: TCheckBox;
chkDuplicateHash: TCheckBox;
chkDuplicateName: TCheckBox;
chkDuplicates: TCheckBox;
chkHex: TCheckBox;
@ -218,6 +219,7 @@ type
procedure cbTextRegExpChange(Sender: TObject);
procedure cbSelectedFilesChange(Sender: TObject);
procedure chkDuplicateContentChange(Sender: TObject);
procedure chkDuplicateHashChange(Sender: TObject);
procedure chkDuplicateNameChange(Sender: TObject);
procedure chkDuplicatesChange(Sender: TObject);
procedure chkDuplicateSizeChange(Sender: TObject);
@ -1028,7 +1030,21 @@ end;
procedure TfrmFindDlg.chkDuplicateContentChange(Sender: TObject);
begin
if chkDuplicateContent.Checked then chkDuplicateSize.Checked:= True;
if chkDuplicateContent.Checked then
begin
chkDuplicateSize.Checked:= True;
chkDuplicateHash.Checked:= False;
end;
chkDuplicateNameChange(chkDuplicateName);
end;
procedure TfrmFindDlg.chkDuplicateHashChange(Sender: TObject);
begin
if chkDuplicateHash.Checked then
begin
chkDuplicateSize.Checked:= True;
chkDuplicateContent.Checked:= False;
end;
chkDuplicateNameChange(chkDuplicateName);
end;
@ -1050,7 +1066,11 @@ end;
procedure TfrmFindDlg.chkDuplicateSizeChange(Sender: TObject);
begin
if not chkDuplicateSize.Checked then chkDuplicateContent.Checked:= False;
if not chkDuplicateSize.Checked then
begin
chkDuplicateHash.Checked:= False;
chkDuplicateContent.Checked:= False;
end;
chkDuplicateNameChange(chkDuplicateName);
end;
@ -1168,6 +1188,7 @@ begin
Duplicates:= chkDuplicates.Checked;
DuplicateName:= chkDuplicateName.Checked;
DuplicateSize:= chkDuplicateSize.Checked;
DuplicateHash:= chkDuplicateHash.Checked;
DuplicateContent:= chkDuplicateContent.Checked;
{ Plugins }
SearchPlugin := cmbPlugin.Text;
@ -2156,6 +2177,7 @@ begin
chkDuplicates.Checked := Duplicates;
chkDuplicateName.Checked := DuplicateName;
chkDuplicateSize.Checked := DuplicateSize;
chkDuplicateHash.Checked := DuplicateHash;
chkDuplicateContent.Checked := DuplicateContent;
// plugins
cmbPlugin.Text := SearchPlugin;

View file

@ -88,6 +88,7 @@ type
Duplicates: Boolean;
DuplicateName: Boolean;
DuplicateSize: Boolean;
DuplicateHash: Boolean;
DuplicateContent: Boolean;
{ Plugins }
SearchPlugin: String;

View file

@ -66,6 +66,7 @@ type
FTimeSearchEnd:TTime;
FTimeOfScan:TTime;
FBuffer: TBytes;
FFoundIndex: IntPtr;
FDuplicateIndex: Integer;
FDuplicates: TStringHashListUtf8;
@ -104,7 +105,7 @@ implementation
uses
LCLProc, LazUtf8, StrUtils, LConvEncoding, DCStrUtils,
uLng, DCClassesUtf8, uFindMmap, uGlobs, uShowMsg, DCOSUtils, uOSUtils,
uLng, DCClassesUtf8, uFindMmap, uGlobs, uShowMsg, DCOSUtils, uOSUtils, uHash,
uLog, uWCXmodule, WcxPlugin, Math, uDCUtils, uConvEncoding, DCDateTimeUtils;
function ProcessDataProcAG(FileName: PAnsiChar; Size: LongInt): LongInt; dcpcall;
@ -195,6 +196,9 @@ begin
FExcludeDirectories := TMaskList.Create(ExcludeDirectories);
end;
if FSearchTemplate.Duplicates and FSearchTemplate.DuplicateHash then
SetLength(FBuffer, gHashBlockSize);
FTimeSearchStart:=0;
FTimeSearchEnd:=0;
FTimeOfScan:=0;
@ -618,11 +622,40 @@ end;
function TFindThread.CheckDuplicate(const Folder: String; const sr: TSearchRecEx): Boolean;
var
AKey: String;
AHash: String;
Index: IntPtr;
AData: TDuplicate;
AFileName: String;
AValue: String = '';
AStart, AFinish: Integer;
function FileHash(Size: Int64): Boolean;
var
Handle: THandle;
BytesRead: Integer;
BytesToRead: Integer;
Context: THashContext;
begin
Handle:= mbFileOpen(AFileName, fmOpenRead or fmShareDenyWrite);
Result:= (Handle <> feInvalidHandle);
if Result then
begin
HashInit(Context, HASH_BLAKE2S);
BytesToRead:= Length(FBuffer);
while (Size > 0) and (not Terminated) do
begin
if (Size < BytesToRead) then BytesToRead:= Size;
BytesRead := FileRead(Handle, FBuffer[0], BytesToRead);
if (BytesRead < 0) then Break;
HashUpdate(Context, FBuffer[0], BytesRead);
Dec(Size, BytesRead);
end;
FileClose(Handle);
Result:= (Size = 0);
HashFinal(Context, AHash);
end;
end;
function CompareFiles(fn1, fn2: String; len: Int64): Boolean;
const
BUFLEN = 1024 * 32;
@ -660,6 +693,11 @@ var
end;
begin
AFileName:= Folder + PathDelim + sr.Name;
if (FPS_ISDIR(sr.Attr) or FileIsLinkToDirectory(AFileName, sr.Attr)) then
Exit(False);
if FSearchTemplate.DuplicateName then
begin
if FileNameCaseSensitive then
@ -671,6 +709,14 @@ begin
if FSearchTemplate.DuplicateSize then
AValue+= IntToStr(sr.Size);
if FSearchTemplate.DuplicateHash then
begin
if FileHash(sr.Size) then
AValue+= AHash
else
Exit(False);
end;
Index:= FDuplicates.Find(AValue);
Result:= (Index >= 0);
if Result then
@ -686,7 +732,7 @@ begin
AData:= TDuplicate(FDuplicates.List[Index]^.Data);
if FSearchTemplate.DuplicateContent then
Result:= CompareFiles(AData.Name, Folder + PathDelim + sr.Name, sr.Size)
Result:= CompareFiles(AData.Name, AFileName, sr.Size)
else begin
Result:= True;
end;
@ -709,8 +755,8 @@ begin
if not Result then
begin
AData:= TDuplicate.Create;
AData.Name:= Folder + PathDelim + sr.Name;
Index:= FDuplicates.Add(AValue, AData);
AData.Name:= AFileName;
FDuplicates.Add(AValue, AData);
end;
end;

View file

@ -246,6 +246,7 @@ begin
begin
DuplicateName:= AConfig.GetValue(Node, 'Name', False);
DuplicateSize:= AConfig.GetValue(Node, 'Size', False);
DuplicateHash:= AConfig.GetValue(Node, 'Hash', False);
DuplicateContent:= AConfig.GetValue(Node, 'Content', False);
end;
// plugins
@ -347,6 +348,7 @@ begin
begin
AConfig.AddValue(Node, 'Name', DuplicateName);
AConfig.AddValue(Node, 'Size', DuplicateSize);
AConfig.AddValue(Node, 'Hash', DuplicateHash);
AConfig.AddValue(Node, 'Content', DuplicateContent);
end;
// plugins