mirror of
https://github.com/doublecmd/doublecmd.git
synced 2026-06-21 09:58:13 +00:00
ADD: Find and replace using RegEx in UTF8 encoded files (#323)
* ADD: Implemented TRegExprU.ReplaceAll * ADD: Implemented TRegExprEx.ReplaceAll * UPD: Processing EncodingDefault (probably as UTF8) in TRegExprEx.ChangeEncoding * ADD: Ability to set encoding immediately in the TRegExprEx.Create * UPD: Allow find and replace text using RegEx in UTF8 encoded files
This commit is contained in:
parent
430afd91ee
commit
67e8fa01c1
4 changed files with 109 additions and 15 deletions
|
|
@ -29,7 +29,7 @@ interface
|
|||
uses
|
||||
Graphics, SysUtils, Classes, Controls, Forms, Dialogs, StdCtrls, ComCtrls,
|
||||
ExtCtrls, Menus, EditBtn, Spin, Buttons, DateTimePicker, KASComboBox,
|
||||
fAttributesEdit, uDsxModule, DsxPlugin, uFindThread, uFindFiles,
|
||||
fAttributesEdit, uDsxModule, DsxPlugin, uFindThread, uFindFiles, uRegExprU,
|
||||
uSearchTemplate, fSearchPlugin, uFileView, types, DCStrUtils,
|
||||
ActnList, uOSForms, uShellContextMenu, uExceptions, uFileSystemFileSource,
|
||||
uFormCommands, uHotkeyManager, LCLVersion, uWcxModule, uFileSource;
|
||||
|
|
@ -737,11 +737,19 @@ end;
|
|||
{ TfrmFindDlg.cmbEncodingSelect }
|
||||
procedure TfrmFindDlg.cmbEncodingSelect(Sender: TObject);
|
||||
var
|
||||
SingleByte: Boolean;
|
||||
SupportedEncoding: Boolean;
|
||||
Encoding: String;
|
||||
begin
|
||||
SingleByte:= SingleByteEncoding(cmbEncoding.Text);
|
||||
Encoding := cmbEncoding.Text;
|
||||
SupportedEncoding:= SingleByteEncoding(Encoding);
|
||||
if (not SupportedEncoding) and TRegExprU.AvailableNew then
|
||||
begin
|
||||
Encoding := NormalizeEncoding(Encoding);
|
||||
if Encoding = EncodingDefault then Encoding := GetDefaultTextEncoding;
|
||||
SupportedEncoding := Encoding = EncodingUTF8;
|
||||
end;
|
||||
|
||||
cbTextRegExp.Enabled := cbFindText.Checked and SingleByte and (not chkHex.Checked);
|
||||
cbTextRegExp.Enabled := cbFindText.Checked and SupportedEncoding and (not chkHex.Checked);
|
||||
if not cbTextRegExp.Enabled then cbTextRegExp.Checked := False;
|
||||
|
||||
cbCaseSens.Enabled:= cbFindText.Checked and (not cbReplaceText.Checked) and (not chkHex.Checked) and (not cbTextRegExp.Checked);
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ interface
|
|||
|
||||
uses
|
||||
Classes, SysUtils, DCStringHashListUtf8, uFindFiles, uFindEx, uFindByrMr,
|
||||
uMasks, uRegExprA, uRegExprW, uWcxModule;
|
||||
uMasks, uRegExpr, uRegExprW, uWcxModule;
|
||||
|
||||
type
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ type
|
|||
FExcludeDirectories: TMaskList;
|
||||
FFilesMasksRegExp: TRegExprW;
|
||||
FExcludeFilesRegExp: TRegExprW;
|
||||
FRegExpr: TRegExpr;
|
||||
FRegExpr: TRegExprEx;
|
||||
FArchive: TWcxModule;
|
||||
FHeader: TWcxHeader;
|
||||
|
||||
|
|
@ -167,7 +167,7 @@ begin
|
|||
end
|
||||
else begin
|
||||
TextEncoding := NormalizeEncoding(TextEncoding);
|
||||
if TextRegExp then FRegExpr := TRegExpr.Create(TextEncoding);
|
||||
if TextRegExp then FRegExpr := TRegExprEx.Create(TextEncoding, True);
|
||||
FindText := ConvertEncoding(FindText, EncodingUTF8, TextEncoding);
|
||||
ReplaceText := ConvertEncoding(ReplaceText, EncodingUTF8, TextEncoding);
|
||||
end;
|
||||
|
|
@ -385,7 +385,9 @@ begin
|
|||
finally
|
||||
fs.Free;
|
||||
end;
|
||||
Exit(FRegExpr.ExecRegExpr(sData, S));
|
||||
FRegExpr.Expression := sData;
|
||||
FRegExpr.SetInputString(Pointer(S), Length(S));
|
||||
Exit(FRegExpr.Exec());
|
||||
end;
|
||||
|
||||
if gUseMmapInSearch then
|
||||
|
|
@ -496,7 +498,7 @@ begin
|
|||
end;
|
||||
|
||||
if bRegExp then
|
||||
S := FRegExpr.ReplaceRegExpr(SearchString, S, replaceString, True)
|
||||
S := FRegExpr.ReplaceAll(SearchString, S, replaceString)
|
||||
else
|
||||
begin
|
||||
Include(Flags, rfReplaceAll);
|
||||
|
|
|
|||
|
|
@ -21,13 +21,14 @@ type
|
|||
FRegExpW: TRegExprW;
|
||||
FRegExpU: TRegExprU;
|
||||
FType: TRegExprType;
|
||||
procedure SetExpression(AValue: String);
|
||||
procedure SetExpression(const AValue: String);
|
||||
function GetMatchLen(Idx : Integer): PtrInt;
|
||||
function GetMatchPos(Idx : Integer): PtrInt;
|
||||
public
|
||||
constructor Create(const AEncoding: String = EncodingDefault);
|
||||
constructor Create(const AEncoding: String = EncodingDefault; ASetEncoding: Boolean = False);
|
||||
destructor Destroy; override;
|
||||
function Exec(AOffset: UIntPtr = 1): Boolean;
|
||||
function ReplaceAll(const AExpression, AStr, AReplacement: String): String;
|
||||
procedure ChangeEncoding(const AEncoding: String);
|
||||
procedure SetInputString(AInputString : Pointer; ALength : UIntPtr);
|
||||
public
|
||||
|
|
@ -43,7 +44,7 @@ uses
|
|||
|
||||
{ TRegExprEx }
|
||||
|
||||
procedure TRegExprEx.SetExpression(AValue: String);
|
||||
procedure TRegExprEx.SetExpression(const AValue: String);
|
||||
begin
|
||||
case FType of
|
||||
retUtf8: FRegExpU.Expression:= AValue;
|
||||
|
|
@ -70,11 +71,12 @@ begin
|
|||
end;
|
||||
end;
|
||||
|
||||
constructor TRegExprEx.Create(const AEncoding: String);
|
||||
constructor TRegExprEx.Create(const AEncoding: String; ASetEncoding: Boolean = False);
|
||||
begin
|
||||
FRegExpW:= TRegExprW.Create;
|
||||
FRegExpU:= TRegExprU.Create;
|
||||
FRegExpA:= TRegExpr.Create(AEncoding);
|
||||
if ASetEncoding then ChangeEncoding(AEncoding);
|
||||
end;
|
||||
|
||||
destructor TRegExprEx.Destroy;
|
||||
|
|
@ -94,9 +96,31 @@ begin
|
|||
end;
|
||||
end;
|
||||
|
||||
function TRegExprEx.ReplaceAll(const AExpression, AStr, AReplacement: String): String;
|
||||
var
|
||||
InputString: String;
|
||||
begin
|
||||
case FType of
|
||||
retAnsi:
|
||||
Result := FRegExpA.ReplaceRegExpr(AExpression, AStr, AReplacement, True);
|
||||
retUtf8:
|
||||
begin
|
||||
FRegExpU.Expression := AExpression;
|
||||
InputString := AStr;
|
||||
FRegExpU.SetInputString(PAnsiChar(InputString), Length(InputString));
|
||||
if not FRegExpU.ReplaceAll(AReplacement, Result) then
|
||||
Result := InputString;
|
||||
end;
|
||||
retUtf16le:
|
||||
Result := AStr; // TODO : Implement ReplaceAll for TRegExprW
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TRegExprEx.ChangeEncoding(const AEncoding: String);
|
||||
begin
|
||||
FEncoding:= NormalizeEncoding(AEncoding);
|
||||
if FEncoding = EncodingDefault then
|
||||
FEncoding:= GetDefaultTextEncoding;
|
||||
if FEncoding = EncodingUTF16LE then
|
||||
FType:= retUtf16le
|
||||
else if (FEncoding = EncodingUTF8) or (FEncoding = EncodingUTF8BOM) then
|
||||
|
|
|
|||
|
|
@ -48,14 +48,16 @@ type
|
|||
FExpression: String;
|
||||
FInputLength: UIntPtr;
|
||||
FOvector: array[Byte] of cint;
|
||||
procedure SetExpression(AValue: String);
|
||||
procedure SetExpression(const AValue: String);
|
||||
function GetMatchLen(Idx : integer): PtrInt;
|
||||
function GetMatchPos(Idx : integer): PtrInt;
|
||||
public
|
||||
destructor Destroy; override;
|
||||
class function Available: Boolean;
|
||||
class function AvailableNew: Boolean;
|
||||
function Exec(AOffset: UIntPtr): Boolean;
|
||||
procedure SetInputString(AInputString : PAnsiChar; ALength : UIntPtr);
|
||||
function ReplaceAll(const Replacement: AnsiString; out Output: AnsiString): Boolean;
|
||||
public
|
||||
property Expression : String read FExpression write SetExpression;
|
||||
property MatchPos [Idx : integer] : PtrInt read GetMatchPos;
|
||||
|
|
@ -81,6 +83,14 @@ const
|
|||
PCRE2_CONFIG_UNICODE = 9;
|
||||
PCRE2_UTF = $00080000;
|
||||
|
||||
PCRE2_SUBSTITUTE_GLOBAL = $00000100;
|
||||
//PCRE2_SUBSTITUTE_EXTENDED = $00000200;
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY = $00000400;
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET = $00000800;
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH = $00001000;
|
||||
|
||||
PCRE2_ERROR_NOMEMORY = -48;
|
||||
|
||||
var
|
||||
pcre2_compile: function(pattern: PAnsiChar; length: csize_t; options: cuint32; errorcode: pcint; erroroffset: pcsize_t; ccontext: Pointer): Pointer; cdecl;
|
||||
pcre2_code_free: procedure(code: Pointer); cdecl;
|
||||
|
|
@ -90,6 +100,11 @@ var
|
|||
pcre2_match_data_create_from_pattern: function(code: Pointer; gcontext: Pointer): Pointer; cdecl;
|
||||
pcre2_match_data_free: procedure(match_data: Pointer); cdecl;
|
||||
pcre2_config: function(what: cuint32; where: pointer): cint; cdecl;
|
||||
pcre2_substitute: function(code: Pointer; subject: PAnsiChar; length: csize_t; startoffset: csize_t;
|
||||
options: cuint32; match_data: Pointer; mcontext: Pointer;
|
||||
replacement: PAnsiChar; rlength: csize_t;
|
||||
outputbuffer: PAnsiChar; var outlength: csize_t): cint; cdecl;
|
||||
|
||||
|
||||
// PCRE 1
|
||||
const
|
||||
|
|
@ -119,7 +134,7 @@ var
|
|||
|
||||
{ TRegExprU }
|
||||
|
||||
procedure TRegExprU.SetExpression(AValue: String);
|
||||
procedure TRegExprU.SetExpression(const AValue: String);
|
||||
var
|
||||
Message: String;
|
||||
error: PAnsiChar;
|
||||
|
|
@ -198,6 +213,11 @@ begin
|
|||
Result:= (hLib <> NilHandle);
|
||||
end;
|
||||
|
||||
class function TRegExprU.AvailableNew: Boolean;
|
||||
begin
|
||||
Result:= (hLib <> NilHandle) and pcre_new;
|
||||
end;
|
||||
|
||||
function TRegExprU.Exec(AOffset: UIntPtr): Boolean;
|
||||
begin
|
||||
Dec(AOffset);
|
||||
|
|
@ -227,6 +247,45 @@ begin
|
|||
FInputLength:= ALength;
|
||||
end;
|
||||
|
||||
function TRegExprU.ReplaceAll(const Replacement: AnsiString; out Output: AnsiString): Boolean;
|
||||
var
|
||||
outlength: csize_t;
|
||||
options: cuint32;
|
||||
res: cint;
|
||||
begin
|
||||
if not pcre_new then
|
||||
begin
|
||||
Output := '';
|
||||
Exit(False);
|
||||
end;
|
||||
|
||||
if FInputLength = 0 then
|
||||
begin
|
||||
Output := '';
|
||||
Exit(True);
|
||||
end;
|
||||
|
||||
options := PCRE2_SUBSTITUTE_OVERFLOW_LENGTH or PCRE2_SUBSTITUTE_UNKNOWN_UNSET or PCRE2_SUBSTITUTE_UNSET_EMPTY;
|
||||
//options := options or PCRE2_SUBSTITUTE_EXTENDED;
|
||||
options := options or PCRE2_SUBSTITUTE_GLOBAL;
|
||||
|
||||
outlength := FInputLength * 2 + 1; // + space for #0
|
||||
if outlength < 2048 then outlength := 2048;
|
||||
SetLength(Output, outlength - 1);
|
||||
|
||||
res := pcre2_substitute(FCode, FInput, FInputLength, 0, options, FMatch, nil,
|
||||
PAnsiChar(Replacement), Length(Replacement), PAnsiChar(Output), outlength);
|
||||
if res >= 0 then // if res = 0 then nothing found
|
||||
SetLength(Output, outlength)
|
||||
else if res = PCRE2_ERROR_NOMEMORY then
|
||||
begin
|
||||
SetLength(Output, outlength - 1);
|
||||
res := pcre2_substitute(FCode, FInput, FInputLength, 0, options, FMatch, nil,
|
||||
PAnsiChar(Replacement), Length(Replacement), PAnsiChar(Output), outlength);
|
||||
end;
|
||||
Result := res >= 0;
|
||||
end;
|
||||
|
||||
procedure Initialize;
|
||||
var
|
||||
Where: IntPtr;
|
||||
|
|
@ -246,6 +305,7 @@ begin
|
|||
@pcre2_get_ovector_pointer:= SafeGetProcAddress(hLib, 'pcre2_get_ovector_pointer_8');
|
||||
@pcre2_match_data_create_from_pattern:= SafeGetProcAddress(hLib, 'pcre2_match_data_create_from_pattern_8');
|
||||
@pcre2_match_data_free:= SafeGetProcAddress(hLib, 'pcre2_match_data_free_8');
|
||||
@pcre2_substitute:= SafeGetProcAddress(hLib, 'pcre2_substitute_8');
|
||||
except
|
||||
on E: Exception do
|
||||
begin
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue