ADD: Find and replace using RegEx in UTF8 encoded files (#323)

* ADD: Implemented TRegExprU.ReplaceAll

* ADD: Implemented TRegExprEx.ReplaceAll

* UPD: Processing EncodingDefault (probably as UTF8) in TRegExprEx.ChangeEncoding

* ADD: Ability to set encoding immediately in the TRegExprEx.Create

* UPD: Allow find and replace text using RegEx in UTF8 encoded files
This commit is contained in:
larinsv 2022-01-09 15:32:50 +03:00 committed by GitHub
commit 67e8fa01c1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 109 additions and 15 deletions

View file

@ -29,7 +29,7 @@ interface
uses
Graphics, SysUtils, Classes, Controls, Forms, Dialogs, StdCtrls, ComCtrls,
ExtCtrls, Menus, EditBtn, Spin, Buttons, DateTimePicker, KASComboBox,
fAttributesEdit, uDsxModule, DsxPlugin, uFindThread, uFindFiles,
fAttributesEdit, uDsxModule, DsxPlugin, uFindThread, uFindFiles, uRegExprU,
uSearchTemplate, fSearchPlugin, uFileView, types, DCStrUtils,
ActnList, uOSForms, uShellContextMenu, uExceptions, uFileSystemFileSource,
uFormCommands, uHotkeyManager, LCLVersion, uWcxModule, uFileSource;
@ -737,11 +737,19 @@ end;
{ TfrmFindDlg.cmbEncodingSelect }
procedure TfrmFindDlg.cmbEncodingSelect(Sender: TObject);
var
SingleByte: Boolean;
SupportedEncoding: Boolean;
Encoding: String;
begin
SingleByte:= SingleByteEncoding(cmbEncoding.Text);
Encoding := cmbEncoding.Text;
SupportedEncoding:= SingleByteEncoding(Encoding);
if (not SupportedEncoding) and TRegExprU.AvailableNew then
begin
Encoding := NormalizeEncoding(Encoding);
if Encoding = EncodingDefault then Encoding := GetDefaultTextEncoding;
SupportedEncoding := Encoding = EncodingUTF8;
end;
cbTextRegExp.Enabled := cbFindText.Checked and SingleByte and (not chkHex.Checked);
cbTextRegExp.Enabled := cbFindText.Checked and SupportedEncoding and (not chkHex.Checked);
if not cbTextRegExp.Enabled then cbTextRegExp.Checked := False;
cbCaseSens.Enabled:= cbFindText.Checked and (not cbReplaceText.Checked) and (not chkHex.Checked) and (not cbTextRegExp.Checked);

View file

@ -29,7 +29,7 @@ interface
uses
Classes, SysUtils, DCStringHashListUtf8, uFindFiles, uFindEx, uFindByrMr,
uMasks, uRegExprA, uRegExprW, uWcxModule;
uMasks, uRegExpr, uRegExprW, uWcxModule;
type
@ -64,7 +64,7 @@ type
FExcludeDirectories: TMaskList;
FFilesMasksRegExp: TRegExprW;
FExcludeFilesRegExp: TRegExprW;
FRegExpr: TRegExpr;
FRegExpr: TRegExprEx;
FArchive: TWcxModule;
FHeader: TWcxHeader;
@ -167,7 +167,7 @@ begin
end
else begin
TextEncoding := NormalizeEncoding(TextEncoding);
if TextRegExp then FRegExpr := TRegExpr.Create(TextEncoding);
if TextRegExp then FRegExpr := TRegExprEx.Create(TextEncoding, True);
FindText := ConvertEncoding(FindText, EncodingUTF8, TextEncoding);
ReplaceText := ConvertEncoding(ReplaceText, EncodingUTF8, TextEncoding);
end;
@ -385,7 +385,9 @@ begin
finally
fs.Free;
end;
Exit(FRegExpr.ExecRegExpr(sData, S));
FRegExpr.Expression := sData;
FRegExpr.SetInputString(Pointer(S), Length(S));
Exit(FRegExpr.Exec());
end;
if gUseMmapInSearch then
@ -496,7 +498,7 @@ begin
end;
if bRegExp then
S := FRegExpr.ReplaceRegExpr(SearchString, S, replaceString, True)
S := FRegExpr.ReplaceAll(SearchString, S, replaceString)
else
begin
Include(Flags, rfReplaceAll);

View file

@ -21,13 +21,14 @@ type
FRegExpW: TRegExprW;
FRegExpU: TRegExprU;
FType: TRegExprType;
procedure SetExpression(AValue: String);
procedure SetExpression(const AValue: String);
function GetMatchLen(Idx : Integer): PtrInt;
function GetMatchPos(Idx : Integer): PtrInt;
public
constructor Create(const AEncoding: String = EncodingDefault);
constructor Create(const AEncoding: String = EncodingDefault; ASetEncoding: Boolean = False);
destructor Destroy; override;
function Exec(AOffset: UIntPtr = 1): Boolean;
function ReplaceAll(const AExpression, AStr, AReplacement: String): String;
procedure ChangeEncoding(const AEncoding: String);
procedure SetInputString(AInputString : Pointer; ALength : UIntPtr);
public
@ -43,7 +44,7 @@ uses
{ TRegExprEx }
procedure TRegExprEx.SetExpression(AValue: String);
procedure TRegExprEx.SetExpression(const AValue: String);
begin
case FType of
retUtf8: FRegExpU.Expression:= AValue;
@ -70,11 +71,12 @@ begin
end;
end;
constructor TRegExprEx.Create(const AEncoding: String);
constructor TRegExprEx.Create(const AEncoding: String; ASetEncoding: Boolean = False);
begin
FRegExpW:= TRegExprW.Create;
FRegExpU:= TRegExprU.Create;
FRegExpA:= TRegExpr.Create(AEncoding);
if ASetEncoding then ChangeEncoding(AEncoding);
end;
destructor TRegExprEx.Destroy;
@ -94,9 +96,31 @@ begin
end;
end;
function TRegExprEx.ReplaceAll(const AExpression, AStr, AReplacement: String): String;
var
InputString: String;
begin
case FType of
retAnsi:
Result := FRegExpA.ReplaceRegExpr(AExpression, AStr, AReplacement, True);
retUtf8:
begin
FRegExpU.Expression := AExpression;
InputString := AStr;
FRegExpU.SetInputString(PAnsiChar(InputString), Length(InputString));
if not FRegExpU.ReplaceAll(AReplacement, Result) then
Result := InputString;
end;
retUtf16le:
Result := AStr; // TODO : Implement ReplaceAll for TRegExprW
end;
end;
procedure TRegExprEx.ChangeEncoding(const AEncoding: String);
begin
FEncoding:= NormalizeEncoding(AEncoding);
if FEncoding = EncodingDefault then
FEncoding:= GetDefaultTextEncoding;
if FEncoding = EncodingUTF16LE then
FType:= retUtf16le
else if (FEncoding = EncodingUTF8) or (FEncoding = EncodingUTF8BOM) then

View file

@ -48,14 +48,16 @@ type
FExpression: String;
FInputLength: UIntPtr;
FOvector: array[Byte] of cint;
procedure SetExpression(AValue: String);
procedure SetExpression(const AValue: String);
function GetMatchLen(Idx : integer): PtrInt;
function GetMatchPos(Idx : integer): PtrInt;
public
destructor Destroy; override;
class function Available: Boolean;
class function AvailableNew: Boolean;
function Exec(AOffset: UIntPtr): Boolean;
procedure SetInputString(AInputString : PAnsiChar; ALength : UIntPtr);
function ReplaceAll(const Replacement: AnsiString; out Output: AnsiString): Boolean;
public
property Expression : String read FExpression write SetExpression;
property MatchPos [Idx : integer] : PtrInt read GetMatchPos;
@ -81,6 +83,14 @@ const
PCRE2_CONFIG_UNICODE = 9;
PCRE2_UTF = $00080000;
PCRE2_SUBSTITUTE_GLOBAL = $00000100;
//PCRE2_SUBSTITUTE_EXTENDED = $00000200;
PCRE2_SUBSTITUTE_UNSET_EMPTY = $00000400;
PCRE2_SUBSTITUTE_UNKNOWN_UNSET = $00000800;
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH = $00001000;
PCRE2_ERROR_NOMEMORY = -48;
var
pcre2_compile: function(pattern: PAnsiChar; length: csize_t; options: cuint32; errorcode: pcint; erroroffset: pcsize_t; ccontext: Pointer): Pointer; cdecl;
pcre2_code_free: procedure(code: Pointer); cdecl;
@ -90,6 +100,11 @@ var
pcre2_match_data_create_from_pattern: function(code: Pointer; gcontext: Pointer): Pointer; cdecl;
pcre2_match_data_free: procedure(match_data: Pointer); cdecl;
pcre2_config: function(what: cuint32; where: pointer): cint; cdecl;
pcre2_substitute: function(code: Pointer; subject: PAnsiChar; length: csize_t; startoffset: csize_t;
options: cuint32; match_data: Pointer; mcontext: Pointer;
replacement: PAnsiChar; rlength: csize_t;
outputbuffer: PAnsiChar; var outlength: csize_t): cint; cdecl;
// PCRE 1
const
@ -119,7 +134,7 @@ var
{ TRegExprU }
procedure TRegExprU.SetExpression(AValue: String);
procedure TRegExprU.SetExpression(const AValue: String);
var
Message: String;
error: PAnsiChar;
@ -198,6 +213,11 @@ begin
Result:= (hLib <> NilHandle);
end;
class function TRegExprU.AvailableNew: Boolean;
begin
Result:= (hLib <> NilHandle) and pcre_new;
end;
function TRegExprU.Exec(AOffset: UIntPtr): Boolean;
begin
Dec(AOffset);
@ -227,6 +247,45 @@ begin
FInputLength:= ALength;
end;
function TRegExprU.ReplaceAll(const Replacement: AnsiString; out Output: AnsiString): Boolean;
var
outlength: csize_t;
options: cuint32;
res: cint;
begin
if not pcre_new then
begin
Output := '';
Exit(False);
end;
if FInputLength = 0 then
begin
Output := '';
Exit(True);
end;
options := PCRE2_SUBSTITUTE_OVERFLOW_LENGTH or PCRE2_SUBSTITUTE_UNKNOWN_UNSET or PCRE2_SUBSTITUTE_UNSET_EMPTY;
//options := options or PCRE2_SUBSTITUTE_EXTENDED;
options := options or PCRE2_SUBSTITUTE_GLOBAL;
outlength := FInputLength * 2 + 1; // + space for #0
if outlength < 2048 then outlength := 2048;
SetLength(Output, outlength - 1);
res := pcre2_substitute(FCode, FInput, FInputLength, 0, options, FMatch, nil,
PAnsiChar(Replacement), Length(Replacement), PAnsiChar(Output), outlength);
if res >= 0 then // if res = 0 then nothing found
SetLength(Output, outlength)
else if res = PCRE2_ERROR_NOMEMORY then
begin
SetLength(Output, outlength - 1);
res := pcre2_substitute(FCode, FInput, FInputLength, 0, options, FMatch, nil,
PAnsiChar(Replacement), Length(Replacement), PAnsiChar(Output), outlength);
end;
Result := res >= 0;
end;
procedure Initialize;
var
Where: IntPtr;
@ -246,6 +305,7 @@ begin
@pcre2_get_ovector_pointer:= SafeGetProcAddress(hLib, 'pcre2_get_ovector_pointer_8');
@pcre2_match_data_create_from_pattern:= SafeGetProcAddress(hLib, 'pcre2_match_data_create_from_pattern_8');
@pcre2_match_data_free:= SafeGetProcAddress(hLib, 'pcre2_match_data_free_8');
@pcre2_substitute:= SafeGetProcAddress(hLib, 'pcre2_substitute_8');
except
on E: Exception do
begin