ADD: Zip - New Deflate64 decoder (issue #2115)

(cherry picked from commit 5b60341ad7)
This commit is contained in:
Alexander Koblov 2025-01-25 23:16:24 +03:00
commit 1ce1606fda
6 changed files with 1643 additions and 0 deletions

View file

@ -0,0 +1,23 @@
The MIT License (MIT)
Copyright (c) .NET Foundation and Contributors
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,392 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE.txt file in the directory for more information.
// The Pascal translation by Alexander Koblov.
unit HuffmanTree;
{$mode ObjFPC}{$H+}
interface
uses
Classes, SysUtils, Types, InputBuffer;
type
{ IHuffmanTree }
IHuffmanTree = interface(IUnknown)
['{83791289-1747-4815-AC47-B57A0A0C39C6}']
function GetNextSymbol(input: TInputBuffer): Integer;
end;
{ THuffmanTree }
THuffmanTree = class(TInterfacedObject, IHuffmanTree)
public
const MAX_LITERAL_TREE_ELEMENTS = Integer(288);
const MAX_DIST_TREE_ELEMENTS = Integer(32);
const END_OF_BLOCK_CODE = Integer(256);
const NUMBER_OF_CODE_LENGTH_TREE_ELEMENTS = Integer(19);
private
_tableBits: Integer;
_table: TSmallIntDynArray;
_left: TSmallIntDynArray;
_right: TSmallIntDynArray;
_codeLengthArray: TBytes;
_tableMask: Integer;
private
_StaticLiteralLengthTree: THuffmanTree; static;
_StaticDistanceTree: THuffmanTree; static;
private
class procedure CreateStaticTrees;
class procedure FreeStaticTrees;
// Generate the array contains huffman codes lengths for static huffman tree
class function GetStaticLiteralTreeLength: TBytes;
class function GetStaticDistanceTreeLength: TBytes;
// Calculate the huffman code for each character based on the code length for each character.
function CalculateHuffmanCode: TCardinalDynArray;
procedure CreateTable;
public
class property StaticLiteralLengthTree: THuffmanTree read _StaticLiteralLengthTree;
class property StaticDistanceTree: THuffmanTree read _StaticDistanceTree;
public
constructor Create(constref codeLengths: TBytes);
// This function will try to get enough bits from input and try to decode the bits
function GetNextSymbol(input: TInputBuffer): Integer;
end;
implementation
// Reverse 'length' of the bits in code
function BitReverse(code: Cardinal; length: Integer): Cardinal;
begin
Result := 0;
Assert((length > 0) and (length <= 16), 'Invalid len');
repeat
Result := Result or (code and 1);
Result := Result << 1;
code := code >> 1;
Dec(length);
until (length = 0);
Result := Result >> 1;
end;
{ THuffmanTree }
class procedure THuffmanTree.CreateStaticTrees;
begin
_StaticLiteralLengthTree:= THuffmanTree.Create(GetStaticLiteralTreeLength());
_StaticDistanceTree:= THuffmanTree.Create(GetStaticDistanceTreeLength());
_StaticLiteralLengthTree._AddRef;
_StaticDistanceTree._AddRef;
end;
class procedure THuffmanTree.FreeStaticTrees;
begin
_StaticLiteralLengthTree.Free;
_StaticDistanceTree.Free;
end;
class function THuffmanTree.GetStaticLiteralTreeLength: TBytes;
var
i: Integer;
literalTreeLength: TBytes;
begin
SetLength(literalTreeLength, MAX_LITERAL_TREE_ELEMENTS);
for i := 0 to 143 do
begin
literalTreeLength[i] := 8;
end;
for i := 144 to 255 do
begin
literalTreeLength[i] := 9;
end;
for i := 256 to 279 do
begin
literalTreeLength[i] := 7;
end;
for i := 280 to 287 do
begin
literalTreeLength[i] := 8;
end;
Result := literalTreeLength;
end;
class function THuffmanTree.GetStaticDistanceTreeLength: TBytes;
var
i: Integer;
staticDistanceTreeLength: TBytes;
begin
SetLength(staticDistanceTreeLength, MAX_DIST_TREE_ELEMENTS);
for i := 0 to Pred(MAX_DIST_TREE_ELEMENTS) do
begin
staticDistanceTreeLength[i] := 5;
end;
Result := staticDistanceTreeLength;
end;
function THuffmanTree.CalculateHuffmanCode: TCardinalDynArray;
var
i, bits: Integer;
tempCode: Cardinal = 0;
code: TCardinalDynArray;
len, codeLength: Integer;
bitLengthCount: TCardinalDynArray;
nextCode: array[0..16] of Cardinal;
begin
SetLength(bitLengthCount, 17);
SetLength(code, MAX_LITERAL_TREE_ELEMENTS);
for i := 0 to High(_codeLengthArray) do
begin
codeLength := _codeLengthArray[i];
Inc(bitLengthCount[codeLength]);
end;
bitLengthCount[0] := 0; // clear count for length 0
for bits := 1 to 16 do
begin
tempCode := (tempCode + bitLengthCount[bits - 1]) << 1;
nextCode[bits] := tempCode;
end;
for i := 0 to High(_codeLengthArray) do
begin
len := _codeLengthArray[i];
if (len > 0) then
begin
code[i] := BitReverse(nextCode[len], len);
Inc(nextCode[len]);
end;
end;
Result:= code;
end;
procedure THuffmanTree.CreateTable;
var
avail, value: Int16;
increment, locs: Integer;
array_: TSmallIntDynArray;
codeArray: TCardinalDynArray;
j, ch, len, start, index: Integer;
overflowBits, codeBitMask: Integer;
begin
codeArray := CalculateHuffmanCode();
avail := Int16(Length(_codeLengthArray));
for ch := 0 to High(_codeLengthArray) do
begin
// length of this code
len := _codeLengthArray[ch];
if (len > 0) then
begin
// start value (bit reversed)
start := Integer(codeArray[ch]);
if (len <= _tableBits) then
begin
// If a particular symbol is shorter than nine bits,
// then that symbol's translation is duplicated
// in all those entries that start with that symbol's bits.
// For example, if the symbol is four bits, then it's duplicated
// 32 times in a nine-bit table. If a symbol is nine bits long,
// it appears in the table once.
//
// Make sure that in the loop below, code is always
// less than table_size.
//
// On last iteration we store at array index:
// initial_start_at + (locs-1)*increment
// = initial_start_at + locs*increment - increment
// = initial_start_at + (1 << tableBits) - increment
// = initial_start_at + table_size - increment
//
// Therefore we must ensure:
// initial_start_at + table_size - increment < table_size
// or: initial_start_at < increment
//
increment := 1 << len;
if (start >= increment) then
begin
raise Exception.Create('Deflate64: invalid Huffman data');
end;
// Note the bits in the table are reverted.
locs := 1 << (_tableBits - len);
for j := 0 to Pred(locs) do
begin
_table[start] := Int16(ch);
start += increment;
end;
end
else
begin
// For any code which has length longer than num_elements,
// build a binary tree.
overflowBits := len - _tableBits; // the nodes we need to respent the data.
codeBitMask := 1 << _tableBits; // mask to get current bit (the bits can't fit in the table)
// the left, right table is used to repesent the
// the rest bits. When we got the first part (number bits.) and look at
// tbe table, we will need to follow the tree to find the real character.
// This is in place to avoid bloating the table if there are
// a few ones with long code.
index := start and ((1 << _tableBits) - 1);
array_ := _table;
repeat
value := array_[index];
if (value = 0) then
begin
// set up next pointer if this node is not used before.
array_[index] := Int16(-avail); // use next available slot.
value := Int16(-avail);
Inc(avail);
end;
if (value > 0) then
begin
// prevent an IndexOutOfRangeException from array[index]
raise Exception.Create('Deflate64: invalid Huffman data');
end;
Assert(
value < 0,
'CreateTable: Only negative numbers are used for tree pointers!'
);
if ((start and codeBitMask) = 0) then
begin
// if current bit is 0, go change the left array
array_ := _left;
end
else
begin
// if current bit is 1, set value in the right array
array_ := _right;
end;
index := -value; // go to next node
codeBitMask := codeBitMask << 1;
Dec(overflowBits);
until (overflowBits = 0);
array_[index] := Int16(ch);
end;
end;
end;
end;
constructor THuffmanTree.Create(constref codeLengths: TBytes);
begin
Assert(
(Length(codeLengths) = MAX_LITERAL_TREE_ELEMENTS)
or (Length(codeLengths) = MAX_DIST_TREE_ELEMENTS)
or (Length(codeLengths) = NUMBER_OF_CODE_LENGTH_TREE_ELEMENTS),
'we only expect three kinds of Length here'
);
_codeLengthArray := codeLengths;
if (Length(_codeLengthArray) = MAX_LITERAL_TREE_ELEMENTS) then
begin
// bits for Literal/Length tree table
_tableBits := 9;
end
else
begin
// bits for distance tree table and code length tree table
_tableBits := 7;
end;
_tableMask := (1 << _tableBits) - 1;
SetLength(_table, 1 << _tableBits);
// I need to find proof that left and right array will always be
// enough. I think they are.
SetLength(_left, 2 * Length(_codeLengthArray));
SetLength(_right, 2 * Length(_codeLengthArray));
CreateTable();
end;
function THuffmanTree.GetNextSymbol(input: TInputBuffer): Integer;
var
bitBuffer, mask: Cardinal;
symbol, codeLength: Integer;
begin
// Try to load 16 bits into input buffer if possible and get the bitBuffer value.
// If there aren't 16 bits available we will return all we have in the
// input buffer.
bitBuffer := input.TryLoad16Bits();
if (input.AvailableBits = 0) then
begin // running out of input.
Exit(-1);
end;
// decode an element
symbol := _table[bitBuffer and _tableMask];
if (symbol < 0) then
begin // this will be the start of the binary tree
// navigate the tree
mask := Cardinal(1) << _tableBits;
repeat
symbol := -symbol;
if ((bitBuffer and mask) = 0) then
begin
symbol := _left[symbol];
end
else
begin
symbol := _right[symbol];
end;
mask := mask << 1;
until (symbol >= 0);
end;
codeLength := _codeLengthArray[symbol];
// huffman code lengths must be at least 1 bit long
if (codeLength <= 0) then
begin
raise Exception.Create('Deflate64: invalid Huffman data');
end;
//
// If this code is longer than the # bits we had in the bit buffer (i.e.
// we read only part of the code), we can hit the entry in the table or the tree
// for another symbol. However the length of another symbol will not match the
// available bits count.
if (codeLength > input.AvailableBits) then
begin
// We already tried to load 16 bits and maximum length is 15,
// so this means we are running out of input.
Exit(-1);
end;
input.SkipBits(codeLength);
Result := symbol;
end;
initialization
THuffmanTree.CreateStaticTrees;
finalization
THuffmanTree.FreeStaticTrees;
end.

View file

@ -0,0 +1,93 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE.txt file in the directory for more information.
// The Pascal translation by Alexander Koblov.
unit Inflate64Stream;
{$mode ObjFPC}{$H+}
interface
uses
Classes, SysUtils, InflaterManaged;
type
{ TInflate64Stream }
TInflate64Stream = class(TOwnerStream)
private
_buffer: array[Word] of Byte;
_inflater: TInflaterManaged;
public
constructor Create(ASource: TStream);
destructor Destroy; override;
function Read(var Buffer; Count: Longint): Longint; override;
end;
implementation
{ TInflate64Stream }
constructor TInflate64Stream.Create(ASource: TStream);
begin
inherited Create(ASource);
_inflater:= TInflaterManaged.Create(True);
end;
destructor TInflate64Stream.Destroy;
begin
inherited Destroy;
_inflater.Free;
end;
function TInflate64Stream.Read(var Buffer; Count: Longint): Longint;
var
bytesRead, bytes: Integer;
currentOffset, remainingCount: Integer;
begin
currentOffset := 0;
remainingCount := Count;
while (true) do
begin
bytesRead := _inflater.Inflate(@Buffer, currentOffset, remainingCount);
currentOffset += bytesRead;
remainingCount -= bytesRead;
if (remainingCount = 0) then
begin
break;
end;
if (_inflater.Finished()) then
begin
// if we finished decompressing, we can't have anything left in the outputwindow.
Assert(
_inflater.AvailableOutput = 0,
'We should have copied all stuff out!'
);
break;
end;
bytes := FSource.Read(_buffer, Length(_buffer));
if (bytes <= 0) then
begin
break;
end
else if (bytes > Length(_buffer)) then
begin
// The stream is either malicious or poorly implemented and returned a number of
// bytes larger than the buffer supplied to it.
raise Exception.Create('Deflate64: invalid data');
end;
_inflater.SetInput(_buffer, 0, bytes);
end;
Result := count - remainingCount;
end;
end.

View file

@ -0,0 +1,746 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE.txt file in the directory for more information.
// The Pascal translation by Alexander Koblov.
unit InflaterManaged;
{$mode ObjFPC}{$H+}
interface
uses
Classes, SysUtils, OutputWindow, InputBuffer, HuffmanTree;
type
TBlockType =
(
Uncompressed = 0,
Static = 1,
Dynamic = 2
);
// Do not rearrange the enum values.
TInflaterState =
(
ReadingHeader = 0, // Only applies to GZIP
ReadingBFinal = 2, // About to read bfinal bit
ReadingBType = 3, // About to read blockType bits
ReadingNumLitCodes = 4, // About to read # literal codes
ReadingNumDistCodes = 5, // About to read # dist codes
ReadingNumCodeLengthCodes = 6, // About to read # code length codes
ReadingCodeLengthCodes = 7, // In the middle of reading the code length codes
ReadingTreeCodesBefore = 8, // In the middle of reading tree codes (loop top)
ReadingTreeCodesAfter = 9, // In the middle of reading tree codes (extension; code > 15)
DecodeTop = 10, // About to decode a literal (char/match) in a compressed block
HaveInitialLength = 11, // Decoding a match, have the literal code (base length)
HaveFullLength = 12, // Ditto, now have the full match length (incl. extra length bits)
HaveDistCode = 13, // Ditto, now have the distance code also, need extra dist bits
//* uncompressed blocks */
UncompressedAligning = 15,
UncompressedByte1 = 16,
UncompressedByte2 = 17,
UncompressedByte3 = 18,
UncompressedByte4 = 19,
DecodingUncompressed = 20,
// These three apply only to GZIP
StartReadingFooter = 21, // (Initialisation for reading footer)
ReadingFooter = 22,
VerifyingFooter = 23,
Done = 24 // Finished
);
type
{ TInflaterManaged }
TInflaterManaged = class
private
_output: TOutputWindow;
_input: TInputBuffer;
_literalLengthTree: IHuffmanTree;
_distanceTree: IHuffmanTree;
_state: TInflaterState;
_bfinal: Integer;
_blockType: TBlockType;
// uncompressed block
_blockLengthBuffer: array[0..3] of Byte;
_blockLength: Integer;
// compressed block
_length: Integer;
_distanceCode: Integer;
_extraBits: Integer;
_loopCounter: Integer;
_literalLengthCodeCount: Integer;
_distanceCodeCount: Integer;
_codeLengthCodeCount: Integer;
_codeArraySize: Integer;
_lengthCode: Integer;
_codeList: TBytes; // temporary array to store the code length for literal/Length and distance
_codeLengthTreeCodeLength: TBytes;
_deflate64: Boolean;
_codeLengthTree: IHuffmanTree;
private
procedure Reset;
function Decode: Boolean;
function DecodeUncompressedBlock(out endOfBlock: Boolean): Boolean;
function DecodeBlock(out endOfBlockCodeSeen: Boolean): Boolean;
function DecodeDynamicBlockHeader: Boolean;
public
constructor Create(deflate64: Boolean);
destructor Destroy; override;
procedure SetInput(inputBytes: PByte; offset, length: Integer);
function Finished: Boolean;
function AvailableOutput: Integer;
function Inflate(bytes: PByte; offset, length: Integer): Integer;
end;
implementation
const
// Extra bits for length code 257 - 285.
S_EXTRA_LENGTH_BITS: array[0..28] of Byte =
(
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 16
);
// The base length for length code 257 - 285.
// The formula to get the real length for a length code is lengthBase[code - 257] + (value stored in extraBits)
S_LENGTH_BASE: array[0..28] of Integer =
(
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51,
59, 67, 83, 99, 115, 131, 163, 195, 227, 3
);
// The base distance for distance code 0 - 31
// The real distance for a distance code is distanceBasePosition[code] + (value stored in extraBits)
S_DISTANCE_BASE_POSITION: array[0..31] of Integer =
(
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153
);
// code lengths for code length alphabet is stored in following order
S_CODE_ORDER: array[0..18] of Byte = (16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15);
S_STATIC_DISTANCE_TREE_TABLE: array[0..31] of Byte =
(
$00, $10, $08, $18, $04, $14, $0c, $1c, $02, $12, $0a, $1a,
$06, $16, $0e, $1e, $01, $11, $09, $19, $05, $15, $0d, $1d,
$03, $13, $0b, $1b, $07, $17, $0f, $1f
);
{ TInflaterManaged }
procedure TInflaterManaged.Reset;
begin
_state := TInflaterState.ReadingBFinal; // start by reading BFinal bit
end;
function TInflaterManaged.Decode: Boolean;
var
eob: Boolean = false;
begin
result := false;
if (Finished()) then
begin
Exit(true);
end;
if (_state = TInflaterState.ReadingBFinal) then
begin
// reading bfinal bit
// Need 1 bit
if (not _input.EnsureBitsAvailable(1)) then
begin
Exit(false);
end;
_bfinal := _input.GetBits(1);
_state := TInflaterState.ReadingBType;
end;
if (_state = TInflaterState.ReadingBType) then
begin
// Need 2 bits
if (not _input.EnsureBitsAvailable(2)) then
begin
_state := TInflaterState.ReadingBType;
Exit(false);
end;
_blockType := TBlockType(_input.GetBits(2));
if (_blockType = TBlockType.Dynamic) then
begin
_state := TInflaterState.ReadingNumLitCodes;
end
else if (_blockType = TBlockType.Static) then
begin
_literalLengthTree := THuffmanTree.StaticLiteralLengthTree;
_distanceTree := THuffmanTree.StaticDistanceTree;
_state := TInflaterState.DecodeTop;
end
else if (_blockType = TBlockType.Uncompressed) then
begin
_state := TInflaterState.UncompressedAligning;
end
else
begin
raise Exception.Create('Deflate64: unknown block type');
end;
end;
if (_blockType = TBlockType.Dynamic) then
begin
if (_state < TInflaterState.DecodeTop) then
begin
// we are reading the header
result := DecodeDynamicBlockHeader();
end
else
begin
result := DecodeBlock(eob); // this can returns true when output is full
end;
end
else if (_blockType = TBlockType.Static) then
begin
result := DecodeBlock(eob);
end
else if (_blockType = TBlockType.Uncompressed) then
begin
result := DecodeUncompressedBlock(eob);
end
else
begin
raise Exception.Create('Deflate64: unknown block type');
end;
//
// If we reached the end of the block and the block we were decoding had
// bfinal=1 (final block)
//
if (eob and (_bfinal <> 0)) then
begin
_state := TInflaterState.Done;
end;
end;
function TInflaterManaged.DecodeUncompressedBlock(out endOfBlock: Boolean
): Boolean;
var
bits, bytesCopied: Integer;
blockLengthComplement: Integer;
begin
endOfBlock := false;
while (true) do
begin
case (_state) of
TInflaterState.UncompressedAligning: // initial state when calling this function
begin
// we must skip to a byte boundary
_input.SkipToByteBoundary();
_state := TInflaterState.UncompressedByte1;
Continue;
end;
TInflaterState.UncompressedByte1, // decoding block length
TInflaterState.UncompressedByte2,
TInflaterState.UncompressedByte3,
TInflaterState.UncompressedByte4:
begin
bits := _input.GetBits(8);
if (bits < 0) then
begin
Exit(false);
end;
_blockLengthBuffer[Integer(_state) - Integer(TInflaterState.UncompressedByte1)] := byte(bits);
if (_state = TInflaterState.UncompressedByte4) then
begin
_blockLength := _blockLengthBuffer[0] + (_blockLengthBuffer[1] * 256);
blockLengthComplement :=
_blockLengthBuffer[2] + (_blockLengthBuffer[3] * 256);
// make sure complement matches
if (UInt16(_blockLength) <> UInt16((not blockLengthComplement))) then
begin
raise Exception.Create('Deflate64: invalid block length');
end;
end;
Inc(_state);
end;
TInflaterState.DecodingUncompressed: // copying block data
begin
// Directly copy bytes from input to output.
bytesCopied := _output.CopyFrom(_input, _blockLength);
_blockLength -= bytesCopied;
if (_blockLength = 0) then
begin
// Done with this block, need to re-init bit buffer for next block
_state := TInflaterState.ReadingBFinal;
endOfBlock := true;
Exit(true);
end;
// We can fail to copy all bytes for two reasons:
// Running out of Input
// running out of free space in output window
if (_output.FreeBytes = 0) then
begin
Exit(true);
end;
Exit(false);
end;
else begin
//*Fail*/
Assert(false, 'check why we are here!');
raise Exception.Create('Deflate64: unknown state');
end;
end;
end;
end;
function TInflaterManaged.DecodeBlock(out endOfBlockCodeSeen: Boolean): Boolean;
var
freeBytes, symbol, bits, offset: Integer;
begin
endOfBlockCodeSeen := false;
freeBytes := _output.FreeBytes; // it is a little bit faster than frequently accessing the property
while (freeBytes > 65536) do
begin
// With Deflate64 we can have up to a 64kb length, so we ensure at least that much space is available
// in the OutputWindow to avoid overwriting previous unflushed output data.
case (_state) of
TInflaterState.DecodeTop:
begin
// decode an element from the literal tree
// TODO: optimize this!!!
symbol := _literalLengthTree.GetNextSymbol(_input);
if (symbol < 0) then
begin
// running out of input
Exit(false);
end;
if (symbol < 256) then
begin
// literal
_output.Write(Byte(symbol));
Dec(freeBytes);
end
else if (symbol = 256) then
begin
// end of block
endOfBlockCodeSeen := true;
// Reset state
_state := TInflaterState.ReadingBFinal;
Exit(true);
end
else
begin
// length/distance pair
symbol -= 257; // length code started at 257
if (symbol < 8) then
begin
symbol += 3; // match length = 3,4,5,6,7,8,9,10
_extraBits := 0;
end
else if ((not _deflate64) and (symbol = 28)) then
begin
// extra bits for code 285 is 0
symbol := 258; // code 285 means length 258
_extraBits := 0;
end
else
begin
if (symbol < 0) or (symbol >= Length(S_EXTRA_LENGTH_BITS)) then
begin
raise Exception.Create('Deflate64: invalid data');
end;
_extraBits := S_EXTRA_LENGTH_BITS[symbol];
Assert(_extraBits <> 0, 'We handle other cases separately!');
end;
_length := symbol;
_state := TInflaterState.HaveInitialLength;
Continue;
end;
end;
TInflaterState.HaveInitialLength:
begin
if (_extraBits > 0) then
begin
_state := TInflaterState.HaveInitialLength;
bits := _input.GetBits(_extraBits);
if (bits < 0) then
begin
Exit(false);
end;
if (_length < 0) or (_length >= Length(S_LENGTH_BASE)) then
begin
raise Exception.Create('Deflate64: invalid data');
end;
_length := S_LENGTH_BASE[_length] + bits;
end;
_state := TInflaterState.HaveFullLength;
Continue;
end;
TInflaterState.HaveFullLength:
begin
if (_blockType = TBlockType.Dynamic) then
begin
_distanceCode := _distanceTree.GetNextSymbol(_input);
end
else
begin
// get distance code directly for static block
_distanceCode := _input.GetBits(5);
if (_distanceCode >= 0) then
begin
_distanceCode := S_STATIC_DISTANCE_TREE_TABLE[_distanceCode];
end;
end;
if (_distanceCode < 0) then
begin
// running out input
Exit(false);
end;
_state := TInflaterState.HaveDistCode;
Continue;
end;
TInflaterState.HaveDistCode:
begin
// To avoid a table lookup we note that for distanceCode > 3,
// extra_bits = (distanceCode-2) >> 1
if (_distanceCode > 3) then
begin
_extraBits := (_distanceCode - 2) >> 1;
bits := _input.GetBits(_extraBits);
if (bits < 0) then
begin
Exit(false);
end;
offset := S_DISTANCE_BASE_POSITION[_distanceCode] + bits;
end
else
begin
offset := _distanceCode + 1;
end;
_output.WriteLengthDistance(_length, offset);
freeBytes -= _length;
_state := TInflaterState.DecodeTop;
end;
else begin
//*Fail*/
Assert(false, 'check why we are here!');
raise Exception.Create('Deflate64: unknown state');
end;
end;
end;
Result := true;
end;
function TInflaterManaged.DecodeDynamicBlockHeader: Boolean;
var
i, j, bits, repeatCount, previousCode: Integer;
literalTreeCodeLength, distanceTreeCodeLength: TBytes;
begin
while (True) do
begin
case (_state) of
TInflaterState.ReadingNumLitCodes:
begin
_literalLengthCodeCount := _input.GetBits(5);
if (_literalLengthCodeCount < 0) then
begin
Exit(false);
end;
_literalLengthCodeCount += 257;
_state := TInflaterState.ReadingNumDistCodes;
Continue;
end;
TInflaterState.ReadingNumDistCodes:
begin
_distanceCodeCount := _input.GetBits(5);
if (_distanceCodeCount < 0) then
begin
Exit(false);
end;
_distanceCodeCount += 1;
_state := TInflaterState.ReadingNumCodeLengthCodes;
Continue;
end;
TInflaterState.ReadingNumCodeLengthCodes:
begin
_codeLengthCodeCount := _input.GetBits(4);
if (_codeLengthCodeCount < 0) then
begin
Exit(false);
end;
_codeLengthCodeCount += 4;
_loopCounter := 0;
_state := TInflaterState.ReadingCodeLengthCodes;
Continue;
end;
TInflaterState.ReadingCodeLengthCodes:
begin
while (_loopCounter < _codeLengthCodeCount) do
begin
bits := _input.GetBits(3);
if (bits < 0) then
begin
Exit(false);
end;
_codeLengthTreeCodeLength[S_CODE_ORDER[_loopCounter]] := Byte(bits);
Inc(_loopCounter);
end;
for i := _codeLengthCodeCount to High(S_CODE_ORDER) do
begin
_codeLengthTreeCodeLength[S_CODE_ORDER[i]] := 0;
end;
// create huffman tree for code length
_codeLengthTree := THuffmanTree.Create(_codeLengthTreeCodeLength);
_codeArraySize := _literalLengthCodeCount + _distanceCodeCount;
_loopCounter := 0; // reset loop count
_state := TInflaterState.ReadingTreeCodesBefore;
Continue;
end;
TInflaterState.ReadingTreeCodesBefore,
TInflaterState.ReadingTreeCodesAfter:
begin
while (_loopCounter < _codeArraySize) do
begin
if (_state = TInflaterState.ReadingTreeCodesBefore) then
begin
_lengthCode := _codeLengthTree.GetNextSymbol(_input);
if (_lengthCode < 0) then
begin
Exit(false);
end;
end;
// The alphabet for code lengths is as follows:
// 0 - 15: Represent code lengths of 0 - 15
// 16: Copy the previous code length 3 - 6 times.
// The next 2 bits indicate repeat length
// (0 = 3, ... , 3 = 6)
// Example: Codes 8, 16 (+2 bits 11),
// 16 (+2 bits 10) will expand to
// 12 code lengths of 8 (1 + 6 + 5)
// 17: Repeat a code length of 0 for 3 - 10 times.
// (3 bits of length)
// 18: Repeat a code length of 0 for 11 - 138 times
// (7 bits of length)
if (_lengthCode <= 15) then
begin
_codeList[_loopCounter] := Byte(_lengthCode);
Inc(_loopCounter);
end
else
begin
if (_lengthCode = 16) then
begin
if (not _input.EnsureBitsAvailable(2)) then
begin
_state := TInflaterState.ReadingTreeCodesAfter;
Exit(false);
end;
if (_loopCounter = 0) then
begin
// can't have "prev code" on first code
raise Exception(EmptyStr);
end;
previousCode := _codeList[_loopCounter - 1];
repeatCount := _input.GetBits(2) + 3;
if (_loopCounter + repeatCount > _codeArraySize) then
begin
raise Exception(EmptyStr);
end;
for j := 0 to Pred(repeatCount) do
begin
_codeList[_loopCounter] := previousCode;
Inc(_loopCounter);
end;
end
else if (_lengthCode = 17) then
begin
if (not _input.EnsureBitsAvailable(3)) then
begin
_state := TInflaterState.ReadingTreeCodesAfter;
Exit(false);
end;
repeatCount := _input.GetBits(3) + 3;
if (_loopCounter + repeatCount > _codeArraySize) then
begin
raise Exception(EmptyStr);
end;
for j := 0 to Pred(repeatCount) do
begin
_codeList[_loopCounter] := 0;
Inc(_loopCounter);
end;
end
else
begin
// code == 18
if (not _input.EnsureBitsAvailable(7)) then
begin
_state := TInflaterState.ReadingTreeCodesAfter;
Exit(false);
end;
repeatCount := _input.GetBits(7) + 11;
if (_loopCounter + repeatCount > _codeArraySize) then
begin
raise Exception(EmptyStr);
end;
for j := 0 to Pred(repeatCount) do
begin
_codeList[_loopCounter] := 0;
Inc(_loopCounter);
end;
end;
end;
_state := TInflaterState.ReadingTreeCodesBefore; // we want to read the next code.
end;
end;
else
//*Fail*/
Assert(false, 'check why we are here!');
raise Exception.Create('Deflate64: unknown state');
end;
Break;
end;
SetLength(literalTreeCodeLength, THuffmanTree.MAX_LITERAL_TREE_ELEMENTS);
SetLength(distanceTreeCodeLength, THuffmanTree.MAX_DIST_TREE_ELEMENTS);
// Create literal and distance tables
Move(_codeList[0], literalTreeCodeLength[0], _literalLengthCodeCount);
Move(_codeList[_literalLengthCodeCount], distanceTreeCodeLength[0], _distanceCodeCount);
// Make sure there is an end-of-block code, otherwise how could we ever end?
if (literalTreeCodeLength[THuffmanTree.END_OF_BLOCK_CODE] = 0) then
begin
raise Exception(EmptyStr);
end;
_literalLengthTree := THuffmanTree.Create(literalTreeCodeLength);
_distanceTree := THuffmanTree.Create(distanceTreeCodeLength);
_state := TInflaterState.DecodeTop;
Result := true;
end;
constructor TInflaterManaged.Create(deflate64: Boolean);
begin
_output := TOutputWindow.Create;
_input := TInputBuffer.Create;
SetLength(_codeList, THuffmanTree.MAX_LITERAL_TREE_ELEMENTS + THuffmanTree.MAX_DIST_TREE_ELEMENTS);
SetLength(_codeLengthTreeCodeLength, THuffmanTree.NUMBER_OF_CODE_LENGTH_TREE_ELEMENTS);
_deflate64 := deflate64;
Reset();
end;
destructor TInflaterManaged.Destroy;
begin
inherited Destroy;
_output.Free;
_input.Free;
end;
procedure TInflaterManaged.SetInput(inputBytes: PByte; offset, length: Integer);
begin
_input.SetInput(inputBytes, offset, length); // append the bytes
end;
function TInflaterManaged.Finished: Boolean;
begin
Result := (_state = TInflaterState.Done) or (_state = TInflaterState.VerifyingFooter);
end;
function TInflaterManaged.AvailableOutput: Integer;
begin
Result := _output.AvailableBytes;
end;
function TInflaterManaged.Inflate(bytes: PByte; offset, length: Integer): Integer;
var
copied: Integer;
count: Integer = 0;
begin
// copy bytes from output to outputbytes if we have available bytes
// if buffer is not filled up. keep decoding until no input are available
// if decodeBlock returns false. Throw an exception.
repeat
copied := _output.CopyTo(bytes, offset, length);
if (copied > 0) then
begin
offset += copied;
count += copied;
length -= copied;
end;
if (length = 0) then
begin // filled in the bytes array
break;
end;
// Decode will return false when more input is needed
until not ((not Finished() and Decode()));
Result := count;
end;
end.

View file

@ -0,0 +1,208 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE.txt file in the directory for more information.
// The Pascal translation by Alexander Koblov.
unit InputBuffer;
{$mode ObjFPC}{$H+}
interface
uses
Classes, SysUtils;
type
{ TInputBuffer }
TInputBuffer = class
private
_buffer: PByte; // byte array to store input
_start: Integer; // start poisition of the buffer
_end: Integer; // end position of the buffer
_bitBuffer: Cardinal; // store the bits here, we can quickly shift in this buffer
_bitsInBuffer: Integer; // number of bits available in bitBuffer
public
// Total bytes available in the input buffer
function AvailableBytes: Integer;
// Ensure that count bits are in the bit buffer
function EnsureBitsAvailable(count: Integer): Boolean;
// This function will try to load 16 or more bits into bitBuffer
function TryLoad16Bits: Cardinal;
// Gets count bits from the input buffer. Returns -1 if not enough bits available
function GetBits(count: Integer): Integer;
// Copies length bytes from input buffer to output buffer starting at output[offset]
function CopyTo(output: PByte; offset, length: Integer): Integer;
// Return true is all input bytes are used
function NeedsInput: Boolean;
// Set the byte array to be processed
procedure SetInput(buffer: PByte; offset, length: Integer);
// Skip n bits in the buffer
procedure SkipBits(n: Integer);
/// Skips to the next byte boundary
procedure SkipToByteBoundary;
// Total bits available in the input buffer
property AvailableBits: Integer read _bitsInBuffer;
end;
implementation
{ TInputBuffer }
function TInputBuffer.AvailableBytes: Integer;
begin
Result:= (_end - _start) + (_bitsInBuffer div 8);
end;
function TInputBuffer.EnsureBitsAvailable(count: Integer): Boolean;
begin
Assert((0 < count) and (count <= 16), 'count is invalid.');
// manual inlining to improve perf
if (_bitsInBuffer < count) then
begin
if (NeedsInput()) then
begin
Exit(false);
end;
// insert a byte to bitbuffer
_bitBuffer := _bitBuffer or Cardinal(_buffer[_start]) << _bitsInBuffer;
_bitsInBuffer += 8;
Inc(_start);
if (_bitsInBuffer < count) then
begin
if (NeedsInput()) then
begin
Exit(false);
end;
// insert a byte to bitbuffer
_bitBuffer := _bitBuffer or Cardinal(_buffer[_start]) << _bitsInBuffer;
_bitsInBuffer += 8;
Inc(_start);
end;
end;
Result := true;
end;
function TInputBuffer.TryLoad16Bits: Cardinal;
begin
if (_bitsInBuffer < 8) then
begin
if (_start < _end) then
begin
_bitBuffer := _bitBuffer or Cardinal(_buffer[_start]) << _bitsInBuffer;
_bitsInBuffer += 8;
Inc(_start);
end;
if (_start < _end) then
begin
_bitBuffer := _bitBuffer or Cardinal(_buffer[_start]) << _bitsInBuffer;
_bitsInBuffer += 8;
Inc(_start);
end;
end
else if (_bitsInBuffer < 16) then
begin
if (_start < _end) then
begin
_bitBuffer := _bitBuffer or Cardinal(_buffer[_start]) << _bitsInBuffer;
_bitsInBuffer += 8;
Inc(_start);
end;
end;
Result := _bitBuffer;
end;
function TInputBuffer.GetBits(count: Integer): Integer;
begin
Assert((0 < count) and (count <= 16), 'count is invalid.');
if (not EnsureBitsAvailable(count)) then
begin
Exit(-1);
end;
result := Integer(_bitBuffer) and (((Cardinal(1) << count) - 1));
_bitBuffer := _bitBuffer >> count;
_bitsInBuffer -= count;
end;
function TInputBuffer.CopyTo(output: PByte; offset, length: Integer): Integer;
var
avail: Integer;
bytesFromBitBuffer: Integer = 0;
begin
Assert(output <> nil);
Assert(offset >= 0);
Assert(length >= 0);
// Assert(offset <= System.Length(output) - length);
Assert((_bitsInBuffer mod 8) = 0);
// Copy the bytes in bitBuffer first.
while (_bitsInBuffer > 0) and (length > 0) do
begin
output[offset] := Byte(_bitBuffer);
_bitBuffer := _bitBuffer >> 8;
_bitsInBuffer -= 8;
Inc(offset);
Dec(length);
Inc(bytesFromBitBuffer);
end;
if (length = 0) then
begin
Exit(bytesFromBitBuffer);
end;
avail := _end - _start;
if (length > avail) then
begin
length := avail;
end;
Move(_buffer[_start], output[offset], length);
_start += length;
Result := bytesFromBitBuffer + length;
end;
function TInputBuffer.NeedsInput(): Boolean;
begin
Result := (_start = _end);
end;
procedure TInputBuffer.SetInput(buffer: PByte; offset, length: Integer);
begin
Assert(buffer <> nil);
Assert(offset >= 0);
Assert(length >= 0);
// Assert(offset <= System.Length(buffer) - length);
Assert(_start = _end);
_buffer := buffer;
_start := offset;
_end := offset + length;
end;
procedure TInputBuffer.SkipBits(n: Integer);
begin
Assert(
_bitsInBuffer >= n,
'No enough bits in the buffer, Did you call EnsureBitsAvailable?'
);
_bitBuffer := _bitBuffer >> n;
_bitsInBuffer -= n;
end;
procedure TInputBuffer.SkipToByteBoundary;
begin
_bitBuffer := _bitBuffer >> (_bitsInBuffer mod 8);
_bitsInBuffer -= (_bitsInBuffer mod 8);
end;
end.

View file

@ -0,0 +1,181 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE.txt file in the directory for more information.
// The Pascal translation by Alexander Koblov.
unit OutputWindow;
{$mode ObjFPC}{$H+}
interface
uses
Classes, SysUtils, InputBuffer;
type
{ TOutputWindow }
TOutputWindow = class
private
// With Deflate64 we can have up to a 65536 length as well as up to a 65538 distance. This means we need a Window that is at
// least 131074 bytes long so we have space to retrieve up to a full 64kb in lookback and place it in our buffer without
// overwriting existing data. OutputWindow requires that the WindowSize be an exponent of 2, so we round up to 2^18.
const WINDOW_SIZE = Integer(262144);
const WINDOW_MASK = Integer(262143);
private
_window: array [0..Pred(WINDOW_SIZE)] of Byte; // The window is 2^18 bytes
_end: Integer; // this is the position to where we should write next byte
_bytesUsed: Integer; // The number of bytes in the output window which is not consumed.
public
// Add a byte to output window
procedure Write(b: Byte);
procedure WriteLengthDistance(length, distance: Integer);
// Copy up to length of bytes from input directly
function CopyFrom(input: TInputBuffer; length: Integer): Integer;
// Free space in output window
function FreeBytes: Integer;
// Copy the decompressed bytes to output array
function CopyTo(output: PByte; offset, length: Integer): Integer;
// Bytes not consumed in output window
property AvailableBytes: Integer read _bytesUsed;
end;
implementation
uses
Math;
{ TOutputWindow }
procedure TOutputWindow.Write(b: Byte);
begin
Assert(_bytesUsed < WINDOW_SIZE, 'Can''t add byte when window is full!');
_window[_end] := b;
Inc(_end);
_end := _end and WINDOW_MASK;
Inc(_bytesUsed);
end;
procedure TOutputWindow.WriteLengthDistance(length, distance: Integer);
var
copyStart, border: Integer;
begin
Assert((_bytesUsed + length) <= WINDOW_SIZE, 'No Enough space');
// move backwards distance bytes in the output stream,
// and copy length bytes from this position to the output stream.
_bytesUsed += length;
copyStart := (_end - distance) and WINDOW_MASK; // start position for coping.
border := WINDOW_SIZE - length;
if (copyStart <= border) and (_end < border) then
begin
if (length <= distance) then
begin
Move(_window[copyStart], _window[_end], length);
_end += length;
end
else
begin
// The referenced string may overlap the current
// position; for example, if the last 2 bytes decoded have values
// X and Y, a string reference with <length = 5, distance = 2>
// adds X,Y,X,Y,X to the output stream.
while (length > 0) do
begin
_window[_end] := _window[copyStart];
Inc(_end);
Dec(length);
Inc(copyStart);
end;
end;
end
else
begin
// copy byte by byte
while (length > 0) do
begin
_window[_end] := _window[copyStart];
Inc(_end);
Dec(length);
Inc(copyStart);
_end := _end and WINDOW_MASK;
copyStart := copyStart and WINDOW_MASK;
end;
end;
end;
function TOutputWindow.CopyFrom(input: TInputBuffer; length: Integer): Integer;
var
copied, tailLen: Integer;
begin
length := Math.Min(Math.Min(length, WINDOW_SIZE - _bytesUsed), input.AvailableBytes);
// We might need wrap around to copy all bytes.
tailLen := WINDOW_SIZE - _end;
if (length > tailLen) then
begin
// copy the first part
copied := input.CopyTo(_window, _end, tailLen);
if (copied = tailLen) then
begin
// only try to copy the second part if we have enough bytes in input
copied += input.CopyTo(_window, 0, length - tailLen);
end;
end
else
begin
// only one copy is needed if there is no wrap around.
copied := input.CopyTo(_window, _end, length);
end;
_end := (_end + copied) and WINDOW_MASK;
_bytesUsed += copied;
Result := copied;
end;
function TOutputWindow.FreeBytes: Integer;
begin
Result := WINDOW_SIZE - _bytesUsed;
end;
function TOutputWindow.CopyTo(output: PByte; offset, length: Integer): Integer;
var
copyEnd, copied, tailLen: Integer;
begin
if (length > _bytesUsed) then
begin
// we can copy all the decompressed bytes out
copyEnd := _end;
length := _bytesUsed;
end
else
begin
copyEnd := (_end - _bytesUsed + length) and WINDOW_MASK; // copy length of bytes
end;
copied := length;
tailLen := length - copyEnd;
if (tailLen > 0) then
begin
// this means we need to copy two parts separately
// copy tailLen bytes from the end of output window
Move(_window[WINDOW_SIZE - tailLen], output[offset], tailLen);
offset += tailLen;
length := copyEnd;
end;
if (length > 0) then
begin
Move(_window[copyEnd - length], output[offset], length);
end;
_bytesUsed -= copied;
Assert(
_bytesUsed >= 0,
'check this function and find why we copied more bytes than we have'
);
Result := copied;
end;
end.