FIX: Zip plugin: Don't use the simple detect functions AbDetectCharSet and IsOEM because they fail for some combinations of code pages and characters. Use MultiByteToWideChar instead and check if the conversion succeeds. Bug [0000416].

2026-06-28 10:02:14 +00:00 · 2012-04-13 06:36:25 +00:00 · 2012-04-13 06:36:25 +00:00 · 144e10f7d6
commit 144e10f7d6
parent 0bc5ff4fd6
2 changed files with 24 additions and 6 deletions
--- a/plugins/wcx/zip/fparchive/abutils.pas
+++ b/plugins/wcx/zip/fparchive/abutils.pas
@ -383,7 +383,10 @@ const
 type
  TAbCharSet = (csASCII, csANSI, csUTF8);

-function AbDetectCharSet(const aValue: RawByteString): TAbCharSet;
+// This function fails with some code pages and characters (eg. 936 and 图片) !
+// Don't use it when merging with Abbrevia.
+// Better to try to convert with MultiByteToWideChar (see AbZipTyp TryEncode and TryDecode).
+//function AbDetectCharSet(const aValue: RawByteString): TAbCharSet;
 {$IFDEF LINUX}
 function AbSysCharSetIsUTF8: Boolean;
 {$ENDIF}
@ -1838,4 +1841,4 @@ begin
 end;
 {$ENDIF}

-end.
+end.
--- a/plugins/wcx/zip/fparchive/abziptyp.pas
+++ b/plugins/wcx/zip/fparchive/abziptyp.pas
@ -1008,7 +1008,10 @@ begin
 end;         
 {============================================================================}
 {$IFDEF MSWINDOWS}
-function IsOEM(const aValue: RawByteString): Boolean;
+// This function fails with some code pages and characters (eg. 936 and 图片) !
+// Don't use when merging with Abbrevia.
+// Better to try to convert with MultiByteToWideChar (see TryDecode).
+{function IsOEM(const aValue: RawByteString): Boolean;
 const
  // Byte values of alpha-numeric characters in OEM and ANSI codepages.
  // Excludes NBSP, ordinal indicators, exponents, the florin symbol, and, for
@ -1149,7 +1152,7 @@ begin
    end;
  if IsANSI then
    Result := False;
-end;
+end;}
 {============================================================================}
 function TryEncode(const aValue: UnicodeString; aCodePage: UINT; aAllowBestFit: Boolean;
  out aResult: AnsiString): Boolean;
@ -1170,6 +1173,16 @@ begin
    Result := not UsedDefault;
  end;
 end;
+
+function TryDecode(const aValue: AnsiString; aCodePage: UINT;
+  out aResult: UnicodeString): Boolean;
+begin
+  SetLength(aResult, MultiByteToWideChar(aCodePage, MB_ERR_INVALID_CHARS,
+    LPCSTR(aValue), Length(aValue), nil, 0) * SizeOf(UnicodeChar));
+  SetLength(aResult, MultiByteToWideChar(aCodePage, MB_ERR_INVALID_CHARS,
+    LPCSTR(aValue), Length(aValue), PWideChar(aResult), Length(aResult)));
+  Result := Length(aResult) > 0;
+end;
 {$ENDIF MSWINDOWS}
 {============================================================================}
 { TAbZipDataDescriptor implementation ====================================== }
@ -1744,7 +1757,7 @@ var
 begin
  FItemInfo.LoadFromStream( Stream );

-  if FItemInfo.IsUTF8 or (AbDetectCharSet(FItemInfo.FileName) = csUTF8) then
+  if FItemInfo.IsUTF8 then
    inherited SetFileName(FItemInfo.FileName)
  else if FItemInfo.ExtraField.Get(Ab_InfoZipUnicodePathSubfieldID, Pointer(InfoZipField), FieldSize) and
     (FieldSize > SizeOf(TInfoZipUnicodePathRec)) and
@ -1765,8 +1778,10 @@ begin
  begin
    SystemCode := TAbZipHostOs(Byte(VersionMadeBy shr 8));
    {$IF DEFINED(MSWINDOWS)}
-    if (GetACP <> GetOEMCP) and ((SystemCode = hosMSDOS) or IsOEM(FItemInfo.FileName)) then
+    if (GetACP <> GetOEMCP) and (SystemCode = hosMSDOS) then
      inherited SetFileName(AnsiToUtf8(AbStrOemToAnsi(FItemInfo.FileName)))
+    else if (GetACP <> GetOEMCP) and TryDecode(FItemInfo.FileName, CP_OEMCP, UnicodeName) then
+      inherited SetFileName(UTF8Encode(UnicodeName))
    else if (SystemCode = hosNTFS) or (SystemCode = hosMVS) then
      inherited SetFileName(AnsiToUtf8(FItemInfo.FileName))
    else