FIX: Zip plugin: Don't use the simple detect functions AbDetectCharSet and IsOEM because they fail for some combinations of code pages and characters. Use MultiByteToWideChar instead and check if the conversion succeeds. Bug [0000416].

This commit is contained in:
cobines 2012-04-13 06:36:25 +00:00
commit 144e10f7d6
2 changed files with 24 additions and 6 deletions

View file

@ -383,7 +383,10 @@ const
type
TAbCharSet = (csASCII, csANSI, csUTF8);
function AbDetectCharSet(const aValue: RawByteString): TAbCharSet;
// This function fails with some code pages and characters (eg. 936 and 图片) !
// Don't use it when merging with Abbrevia.
// Better to try to convert with MultiByteToWideChar (see AbZipTyp TryEncode and TryDecode).
//function AbDetectCharSet(const aValue: RawByteString): TAbCharSet;
{$IFDEF LINUX}
function AbSysCharSetIsUTF8: Boolean;
{$ENDIF}
@ -1838,4 +1841,4 @@ begin
end;
{$ENDIF}
end.
end.

View file

@ -1008,7 +1008,10 @@ begin
end;
{============================================================================}
{$IFDEF MSWINDOWS}
function IsOEM(const aValue: RawByteString): Boolean;
// This function fails with some code pages and characters (eg. 936 and 图片) !
// Don't use when merging with Abbrevia.
// Better to try to convert with MultiByteToWideChar (see TryDecode).
{function IsOEM(const aValue: RawByteString): Boolean;
const
// Byte values of alpha-numeric characters in OEM and ANSI codepages.
// Excludes NBSP, ordinal indicators, exponents, the florin symbol, and, for
@ -1149,7 +1152,7 @@ begin
end;
if IsANSI then
Result := False;
end;
end;}
{============================================================================}
function TryEncode(const aValue: UnicodeString; aCodePage: UINT; aAllowBestFit: Boolean;
out aResult: AnsiString): Boolean;
@ -1170,6 +1173,16 @@ begin
Result := not UsedDefault;
end;
end;
function TryDecode(const aValue: AnsiString; aCodePage: UINT;
out aResult: UnicodeString): Boolean;
begin
SetLength(aResult, MultiByteToWideChar(aCodePage, MB_ERR_INVALID_CHARS,
LPCSTR(aValue), Length(aValue), nil, 0) * SizeOf(UnicodeChar));
SetLength(aResult, MultiByteToWideChar(aCodePage, MB_ERR_INVALID_CHARS,
LPCSTR(aValue), Length(aValue), PWideChar(aResult), Length(aResult)));
Result := Length(aResult) > 0;
end;
{$ENDIF MSWINDOWS}
{============================================================================}
{ TAbZipDataDescriptor implementation ====================================== }
@ -1744,7 +1757,7 @@ var
begin
FItemInfo.LoadFromStream( Stream );
if FItemInfo.IsUTF8 or (AbDetectCharSet(FItemInfo.FileName) = csUTF8) then
if FItemInfo.IsUTF8 then
inherited SetFileName(FItemInfo.FileName)
else if FItemInfo.ExtraField.Get(Ab_InfoZipUnicodePathSubfieldID, Pointer(InfoZipField), FieldSize) and
(FieldSize > SizeOf(TInfoZipUnicodePathRec)) and
@ -1765,8 +1778,10 @@ begin
begin
SystemCode := TAbZipHostOs(Byte(VersionMadeBy shr 8));
{$IF DEFINED(MSWINDOWS)}
if (GetACP <> GetOEMCP) and ((SystemCode = hosMSDOS) or IsOEM(FItemInfo.FileName)) then
if (GetACP <> GetOEMCP) and (SystemCode = hosMSDOS) then
inherited SetFileName(AnsiToUtf8(AbStrOemToAnsi(FItemInfo.FileName)))
else if (GetACP <> GetOEMCP) and TryDecode(FItemInfo.FileName, CP_OEMCP, UnicodeName) then
inherited SetFileName(UTF8Encode(UnicodeName))
else if (SystemCode = hosNTFS) or (SystemCode = hosMVS) then
inherited SetFileName(AnsiToUtf8(FItemInfo.FileName))
else