mirror of
https://github.com/doublecmd/doublecmd.git
synced 2026-06-21 09:58:13 +00:00
ADD: Find files - search text in Office XML (*.docx)
This commit is contained in:
parent
6db4ff0b63
commit
a57b98ddac
7 changed files with 201 additions and 4 deletions
|
|
@ -411,7 +411,7 @@ object frmFindDlg: TfrmFindDlg
|
|||
ItemHeight = 15
|
||||
OnSelect = cmbEncodingSelect
|
||||
Style = csDropDownList
|
||||
TabOrder = 6
|
||||
TabOrder = 7
|
||||
end
|
||||
object cmbFindText: TComboBoxWithDelItems
|
||||
AnchorSideLeft.Control = CheksPanel
|
||||
|
|
@ -518,7 +518,23 @@ object frmFindDlg: TfrmFindDlg
|
|||
Width = 88
|
||||
Caption = 'Hexadeci&mal'
|
||||
OnChange = chkHexChange
|
||||
TabOrder = 7
|
||||
TabOrder = 8
|
||||
end
|
||||
object cbOffceXML: TCheckBox
|
||||
AnchorSideLeft.Control = cbTextRegExp
|
||||
AnchorSideLeft.Side = asrBottom
|
||||
AnchorSideTop.Control = cbTextRegExp
|
||||
Left = 592
|
||||
Height = 24
|
||||
Hint = 'Offce XML (*.docx)'
|
||||
Top = 67
|
||||
Width = 93
|
||||
BorderSpacing.Left = 15
|
||||
Caption = 'Offce XML'
|
||||
OnChange = cbOffceXMLChange
|
||||
ParentShowHint = False
|
||||
ShowHint = True
|
||||
TabOrder = 6
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
{"hash":35720169,"name":"tfrmfinddlg.cbreplacetext.caption","sourcebytes":[82,101,38,112,108,97,99,101,32,98,121],"value":"Re&place by"},
|
||||
{"hash":137727326,"name":"tfrmfinddlg.cbtextregexp.caption","sourcebytes":[82,101,103,38,117,108,97,114,32,101,120,112,114,101,115,115,105,111,110],"value":"Reg&ular expression"},
|
||||
{"hash":259470556,"name":"tfrmfinddlg.chkhex.caption","sourcebytes":[72,101,120,97,100,101,99,105,38,109,97,108],"value":"Hexadeci&mal"},
|
||||
{"hash":233337209,"name":"tfrmfinddlg.cboffcexml.hint","sourcebytes":[79,102,102,99,101,32,88,77,76,32,40,42,46,100,111,99,120,41],"value":"Offce XML (*.docx)"},
|
||||
{"hash":211225308,"name":"tfrmfinddlg.cboffcexml.caption","sourcebytes":[79,102,102,99,101,32,88,77,76],"value":"Offce XML"},
|
||||
{"hash":197676484,"name":"tfrmfinddlg.tsadvanced.caption","sourcebytes":[65,100,118,97,110,99,101,100],"value":"Advanced"},
|
||||
{"hash":122109610,"name":"tfrmfinddlg.cbdatefrom.caption","sourcebytes":[38,68,97,116,101,32,102,114,111,109,58],"value":"&Date from:"},
|
||||
{"hash":34324922,"name":"tfrmfinddlg.cbnotolderthan.caption","sourcebytes":[78,38,111,116,32,111,108,100,101,114,32,116,104,97,110,58],"value":"N&ot older than:"},
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ type
|
|||
cbTextRegExp: TCheckBox;
|
||||
cbFindInArchive: TCheckBox;
|
||||
cbOpenedTabs: TCheckBox;
|
||||
cbOffceXML: TCheckBox;
|
||||
chkDuplicateContent: TCheckBox;
|
||||
chkDuplicateSize: TCheckBox;
|
||||
chkDuplicateHash: TCheckBox;
|
||||
|
|
@ -211,6 +212,7 @@ type
|
|||
procedure cbDateFromChange(Sender: TObject);
|
||||
procedure cbDateToChange(Sender: TObject);
|
||||
procedure cbFindInArchiveChange(Sender: TObject);
|
||||
procedure cbOffceXMLChange(Sender: TObject);
|
||||
procedure cbOpenedTabsChange(Sender: TObject);
|
||||
procedure cbPartialNameSearchChange(Sender: TObject);
|
||||
procedure cbRegExpChange(Sender: TObject);
|
||||
|
|
@ -805,6 +807,7 @@ begin
|
|||
EnableControl(cbReplaceText, cbFindText.Checked and not cbFindInArchive.Checked);
|
||||
EnableControl(cbNotContainingText, cbFindText.Checked);
|
||||
EnableControl(cbTextRegExp, cbFindText.Checked);
|
||||
EnableControl(cbOffceXML, cbFindText.Checked);
|
||||
lblEncoding.Enabled := cbFindText.Checked;
|
||||
cbReplaceText.Checked := False;
|
||||
cmbEncodingSelect(nil);
|
||||
|
|
@ -999,6 +1002,16 @@ begin
|
|||
cbReplaceTextChange(cbReplaceText);
|
||||
end;
|
||||
|
||||
procedure TfrmFindDlg.cbOffceXMLChange(Sender: TObject);
|
||||
begin
|
||||
if cbOffceXML.Checked then
|
||||
begin
|
||||
chkHex.Checked:= False;
|
||||
cbReplaceText.Checked:= False;
|
||||
end;
|
||||
cbReplaceText.Enabled:= not (chkHex.Checked or cbOffceXML.Checked);
|
||||
end;
|
||||
|
||||
{ TfrmFindDlg.cbOpenedTabsChange }
|
||||
procedure TfrmFindDlg.cbOpenedTabsChange(Sender: TObject);
|
||||
begin
|
||||
|
|
@ -1088,6 +1101,7 @@ begin
|
|||
begin
|
||||
cbCaseSens.Tag := Integer(cbCaseSens.Checked);
|
||||
end;
|
||||
cbOffceXML.Checked:= False;
|
||||
cbReplaceText.Checked:= False;
|
||||
end
|
||||
else if not cbCaseSens.Enabled then
|
||||
|
|
@ -1095,7 +1109,7 @@ begin
|
|||
cbCaseSens.Checked := Boolean(cbCaseSens.Tag);
|
||||
end;
|
||||
cmbEncoding.Enabled:= not chkHex.Checked;
|
||||
cbReplaceText.Enabled:= not chkHex.Checked;
|
||||
cbReplaceText.Enabled:= not (chkHex.Checked or cbOffceXML.Checked);
|
||||
cmbEncodingSelect(cmbEncoding);
|
||||
end;
|
||||
|
||||
|
|
@ -1189,6 +1203,7 @@ begin
|
|||
NotContainingText := cbNotContainingText.Checked;
|
||||
TextRegExp := cbTextRegExp.Checked;
|
||||
TextEncoding := cmbEncoding.Text;
|
||||
OfficeXML := cbOffceXML.Checked;
|
||||
{ Duplicates }
|
||||
Duplicates:= chkDuplicates.Checked;
|
||||
DuplicateName:= chkDuplicateName.Checked;
|
||||
|
|
@ -2291,6 +2306,7 @@ begin
|
|||
cbNotContainingText.Checked := NotContainingText;
|
||||
cbTextRegExp.Checked := TextRegExp;
|
||||
cmbEncoding.Text := TextEncoding;
|
||||
cbOffceXML.Checked := OfficeXML;
|
||||
|
||||
if cbFindInArchive.Enabled then
|
||||
begin
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ type
|
|||
NotContainingText: Boolean;
|
||||
TextRegExp: Boolean;
|
||||
TextEncoding: String;
|
||||
OfficeXML: Boolean;
|
||||
{ Duplicates }
|
||||
Duplicates: Boolean;
|
||||
DuplicateName: Boolean;
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ type
|
|||
FFoundFile:String;
|
||||
FCurrentDepth: Integer;
|
||||
FTextSearchType: TTextSearch;
|
||||
FSearchText: String;
|
||||
FSearchTemplate: TSearchTemplateRec;
|
||||
FSelectedFiles: TStringList;
|
||||
FFileChecks: TFindFileChecks;
|
||||
|
|
@ -112,7 +113,7 @@ implementation
|
|||
uses
|
||||
LCLProc, LazUtf8, StrUtils, LConvEncoding, DCStrUtils,
|
||||
uLng, DCClassesUtf8, uFindMmap, uGlobs, uShowMsg, DCOSUtils, uOSUtils, uHash,
|
||||
uLog, WcxPlugin, Math, uDCUtils, uConvEncoding, DCDateTimeUtils;
|
||||
uLog, WcxPlugin, Math, uDCUtils, uConvEncoding, DCDateTimeUtils, uOfficeXML;
|
||||
|
||||
function ProcessDataProcAG(FileName: PAnsiChar; Size: LongInt): LongInt; dcpcall;
|
||||
begin
|
||||
|
|
@ -157,6 +158,8 @@ begin
|
|||
|
||||
if IsFindText then
|
||||
begin
|
||||
FSearchText := FindText;
|
||||
|
||||
if HexValue then
|
||||
begin
|
||||
TextEncoding := EncodingAnsi;
|
||||
|
|
@ -352,6 +355,21 @@ begin
|
|||
Result := False;
|
||||
if sData = '' then Exit;
|
||||
|
||||
if FSearchTemplate.OfficeXML and MatchesMask(sFileName, '*.docx') then
|
||||
begin
|
||||
if LoadFromOffice(sFileName, S) then
|
||||
begin
|
||||
if bRegExp then
|
||||
Result:= uRegExprW.ExecRegExpr(UTF8ToUTF16(FSearchText), UTF8ToUTF16(S))
|
||||
else if FSearchTemplate.CaseSensitive then
|
||||
Result:= PosMem(Pointer(S), Length(S), 0, FSearchText, False, False) <> Pointer(-1)
|
||||
else begin
|
||||
Result:= PosMemU(Pointer(S), Length(S), 0, FSearchText, False) <> Pointer(-1);
|
||||
end;
|
||||
end;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
// Simple regular expression search (don't work for very big files)
|
||||
if bRegExp then
|
||||
begin
|
||||
|
|
|
|||
140
src/uofficexml.pas
Normal file
140
src/uofficexml.pas
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
{
|
||||
Double commander
|
||||
-------------------------------------------------------------------------
|
||||
Load text from office xml (*.docx)
|
||||
|
||||
Copyright (C) 2021 Alexander Koblov (alexx2000@mail.ru)
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
}
|
||||
|
||||
unit uOfficeXML;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
Classes, SysUtils;
|
||||
|
||||
function LoadFromOffice(const FileName: String; out AText: String): Boolean;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
Unzip, ZipUtils, Laz2_DOM, laz2_XMLRead;
|
||||
|
||||
procedure ProcessNodes(var S: String; ANode: TDOMNode);
|
||||
var
|
||||
I: Integer;
|
||||
ASubNode: TDOMNode;
|
||||
ANodeName: DOMString;
|
||||
begin
|
||||
for I:= 0 to ANode.ChildNodes.Count - 1 do
|
||||
begin
|
||||
ASubNode := ANode.ChildNodes.Item[I];
|
||||
ANodeName := ASubNode.NodeName;
|
||||
|
||||
if (ANodeName = 'w:t') then
|
||||
begin
|
||||
if Assigned(ASubNode.FirstChild) then
|
||||
S += ASubNode.FirstChild.NodeValue;
|
||||
end
|
||||
else if (ANodeName = 'w:p') then
|
||||
S += LineEnding + LineEnding
|
||||
else if (ANodeName = 'w:br') or (ANodeName = 'w:cr') then
|
||||
S += LineEnding
|
||||
else if (ANodeName = 'w:tab') then
|
||||
S += #9;
|
||||
|
||||
if ASubNode.ChildNodes.Count > 0 then
|
||||
ProcessNodes(S, ASubNode);
|
||||
end;
|
||||
end;
|
||||
|
||||
function ExtractFile(ZipFile: unzFile; FileName: PAnsiChar; MemoryStream: TMemoryStream): Boolean;
|
||||
var
|
||||
ASize: LongInt;
|
||||
FileInfo: unz_file_info;
|
||||
begin
|
||||
Result:= unzGetCurrentFileInfo(ZipFile, @FileInfo, nil, 0, nil, 0, nil, 0) = UNZ_OK;
|
||||
if Result then
|
||||
begin
|
||||
MemoryStream.SetSize(FileInfo.uncompressed_size);
|
||||
if unzOpenCurrentFile(ZipFile) = UNZ_OK then
|
||||
begin
|
||||
ASize:= unzReadCurrentFile(ZipFile, MemoryStream.Memory, FileInfo.uncompressed_size);
|
||||
Result:= (ASize = FileInfo.uncompressed_size);
|
||||
unzCloseCurrentFile(ZipFile);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ProcessFile(ZipFile: unzFile; const FileName: String; var AText: String);
|
||||
var
|
||||
ADoc: TXMLDocument;
|
||||
AStream: TMemoryStream;
|
||||
begin
|
||||
if unzLocateFile(ZipFile, PAnsiChar(FileName), 0) = UNZ_OK then
|
||||
begin
|
||||
AStream:= TMemoryStream.Create;
|
||||
try
|
||||
if ExtractFile(ZipFile, PAnsiChar(FileName), AStream) then
|
||||
begin
|
||||
ReadXMLFile(ADoc, AStream, [xrfPreserveWhiteSpace]);
|
||||
if Assigned (ADoc) then
|
||||
begin
|
||||
ProcessNodes(AText, ADoc.DocumentElement);
|
||||
ADoc.Free;
|
||||
end;
|
||||
end;
|
||||
finally
|
||||
AStream.Free;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
function LoadFromOffice(const FileName: String; out AText: String): Boolean;
|
||||
const
|
||||
HEADER_XML = 'word/header%d.xml';
|
||||
FOOTER_XML = 'word/footer%d.xml';
|
||||
var
|
||||
Index: Integer;
|
||||
ZipFile: unzFile;
|
||||
begin
|
||||
AText:= EmptyStr;
|
||||
ZipFile:= unzOpen(PAnsiChar(FileName));
|
||||
Result:= Assigned(ZipFile);
|
||||
if Result then
|
||||
try
|
||||
// Read headers
|
||||
for Index:= 0 to 9 do
|
||||
begin
|
||||
ProcessFile(ZipFile, Format(HEADER_XML, [Index]), AText);
|
||||
end;
|
||||
// Read body
|
||||
ProcessFile(ZipFile, 'word/document.xml', AText);
|
||||
// Read footers
|
||||
for Index:= 0 to 9 do
|
||||
begin
|
||||
ProcessFile(ZipFile, Format(FOOTER_XML, [Index]), AText);
|
||||
end;
|
||||
Result:= Length(AText) > 0;
|
||||
finally
|
||||
unzClose(ZipFile);
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
|
|
@ -231,10 +231,12 @@ begin
|
|||
IsReplaceText:= AConfig.GetValue(ANode, 'IsReplaceText', False);
|
||||
if IsReplaceText then
|
||||
ReplaceText:= AConfig.GetValue(ANode, 'ReplaceText', '');
|
||||
// text search options
|
||||
HexValue:= AConfig.GetValue(ANode, 'HexValue', False);
|
||||
CaseSensitive:= AConfig.GetValue(ANode, 'CaseSensitive', False);
|
||||
NotContainingText:= AConfig.GetValue(ANode, 'NotContainingText', False);
|
||||
TextRegExp:= AConfig.GetValue(ANode, 'TextRegExp', False);
|
||||
OfficeXML:= AConfig.GetValue(ANode, 'OfficeXML', False);
|
||||
TextEncoding:= AConfig.GetValue(ANode, 'TextEncoding', '');
|
||||
if TextEncoding = 'UTF-8BOM' then TextEncoding:= 'UTF-8';
|
||||
if TextEncoding = 'UCS-2LE' then TextEncoding:= 'UTF-16LE';
|
||||
|
|
@ -336,11 +338,13 @@ begin
|
|||
AConfig.AddValue(SubNode, 'IsReplaceText', IsReplaceText);
|
||||
if IsReplaceText then
|
||||
AConfig.AddValue(SubNode, 'ReplaceText', ReplaceText);
|
||||
// text search options
|
||||
AConfig.AddValue(SubNode, 'HexValue', HexValue);
|
||||
AConfig.AddValue(SubNode, 'CaseSensitive', CaseSensitive);
|
||||
AConfig.AddValue(SubNode, 'NotContainingText', NotContainingText);
|
||||
AConfig.AddValue(SubNode, 'TextRegExp', TextRegExp);
|
||||
AConfig.AddValue(SubNode, 'TextEncoding', TextEncoding);
|
||||
AConfig.AddValue(SubNode, 'OfficeXML', OfficeXML);
|
||||
// duplicates
|
||||
Node := AConfig.AddNode(SubNode, 'Duplicates');
|
||||
AConfig.SetAttr(Node, 'Enabled', Duplicates);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue