Dump: add output format tar and output to stdout (#10376)

* Dump: Use mholt/archive/v3 to support tar including many compressions

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Allow dump output to stdout

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file"

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: also dump LFS (fixes #10058)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Use log.Info instead of fmt.Fprintf

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* import ordering

* make fmt

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
PhilippHomann
2020-06-05 22:47:39 +02:00
committed by GitHub
parent 209b17c4e2
commit 684b7a999f
303 changed files with 301317 additions and 1183 deletions
+8
View File
@@ -0,0 +1,8 @@
# Package xz authors
Michael Cross <https://github.com/xi2>
# XZ Embedded authors
Lasse Collin <lasse.collin@tukaani.org>
Igor Pavlov <http://7-zip.org/>
+18
View File
@@ -0,0 +1,18 @@
Licensing of github.com/xi2/xz
==============================
This Go package is a modified version of
XZ Embedded <http://tukaani.org/xz/embedded.html>
The contents of the testdata directory are modified versions of
the test files from
XZ Utils <http://tukaani.org/xz/>
All the files in this package have been written by Michael Cross,
Lasse Collin and/or Igor PavLov. All these files have been put
into the public domain. You can do whatever you want with these
files.
This software is provided "as is", without any warranty.
+10
View File
@@ -0,0 +1,10 @@
# Xz
Package xz implements XZ decompression natively in Go.
Documentation at <https://godoc.org/github.com/xi2/xz>.
Download and install with `go get github.com/xi2/xz`.
If you need compression as well as decompression, you might want to
look at <https://github.com/ulikunitz/xz>.
+461
View File
@@ -0,0 +1,461 @@
/*
* Branch/Call/Jump (BCJ) filter decoders
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* Translation to Go: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
/* from linux/lib/xz/xz_dec_bcj.c *************************************/
type xzDecBCJ struct {
/* Type of the BCJ filter being used */
typ xzFilterID
/*
* Return value of the next filter in the chain. We need to preserve
* this information across calls, because we must not call the next
* filter anymore once it has returned xzStreamEnd
*/
ret xzRet
/*
* Absolute position relative to the beginning of the uncompressed
* data (in a single .xz Block).
*/
pos int
/* x86 filter state */
x86PrevMask uint32
/* Temporary space to hold the variables from xzBuf */
out []byte
outPos int
temp struct {
/* Amount of already filtered data in the beginning of buf */
filtered int
/*
* Buffer to hold a mix of filtered and unfiltered data. This
* needs to be big enough to hold Alignment + 2 * Look-ahead:
*
* Type Alignment Look-ahead
* x86 1 4
* PowerPC 4 0
* IA-64 16 0
* ARM 4 0
* ARM-Thumb 2 2
* SPARC 4 0
*/
buf []byte // slice buf will be backed by bufArray
bufArray [16]byte
}
}
/*
* This is used to test the most significant byte of a memory address
* in an x86 instruction.
*/
func bcjX86TestMSByte(b byte) bool {
return b == 0x00 || b == 0xff
}
func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
var maskToAllowedStatus = []bool{
true, true, true, false, true, false, false, false,
}
var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
var i int
var prevPos int = -1
var prevMask uint32 = s.x86PrevMask
var src uint32
var dest uint32
var j uint32
var b byte
if len(buf) <= 4 {
return 0
}
for i = 0; i < len(buf)-4; i++ {
if buf[i]&0xfe != 0xe8 {
continue
}
prevPos = i - prevPos
if prevPos > 3 {
prevMask = 0
} else {
prevMask = (prevMask << (uint(prevPos) - 1)) & 7
if prevMask != 0 {
b = buf[i+4-int(maskToBitNum[prevMask])]
if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
prevPos = i
prevMask = prevMask<<1 | 1
continue
}
}
}
prevPos = i
if bcjX86TestMSByte(buf[i+4]) {
src = getLE32(buf[i+1:])
for {
dest = src - uint32(s.pos+i+5)
if prevMask == 0 {
break
}
j = uint32(maskToBitNum[prevMask]) * 8
b = byte(dest >> (24 - j))
if !bcjX86TestMSByte(b) {
break
}
src = dest ^ (1<<(32-j) - 1)
}
dest &= 0x01FFFFFF
dest |= 0 - dest&0x01000000
putLE32(dest, buf[i+1:])
i += 4
} else {
prevMask = prevMask<<1 | 1
}
}
prevPos = i - prevPos
if prevPos > 3 {
s.x86PrevMask = 0
} else {
s.x86PrevMask = prevMask << (uint(prevPos) - 1)
}
return i
}
func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
var i int
var instr uint32
for i = 0; i+4 <= len(buf); i += 4 {
instr = getBE32(buf[i:])
if instr&0xFC000003 == 0x48000001 {
instr &= 0x03FFFFFC
instr -= uint32(s.pos + i)
instr &= 0x03FFFFFC
instr |= 0x48000001
putBE32(instr, buf[i:])
}
}
return i
}
var bcjIA64BranchTable = [...]byte{
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
4, 4, 6, 6, 0, 0, 7, 7,
4, 4, 0, 0, 4, 4, 0, 0,
}
func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
var branchTable = bcjIA64BranchTable[:]
/*
* The local variables take a little bit stack space, but it's less
* than what LZMA2 decoder takes, so it doesn't make sense to reduce
* stack usage here without doing that for the LZMA2 decoder too.
*/
/* Loop counters */
var i int
var j int
/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
var slot uint32
/* Bitwise offset of the instruction indicated by slot */
var bitPos uint32
/* bit_pos split into byte and bit parts */
var bytePos uint32
var bitRes uint32
/* Address part of an instruction */
var addr uint32
/* Mask used to detect which instructions to convert */
var mask uint32
/* 41-bit instruction stored somewhere in the lowest 48 bits */
var instr uint64
/* Instruction normalized with bit_res for easier manipulation */
var norm uint64
for i = 0; i+16 <= len(buf); i += 16 {
mask = uint32(branchTable[buf[i]&0x1f])
for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
if (mask>>slot)&1 == 0 {
continue
}
bytePos = bitPos >> 3
bitRes = bitPos & 7
instr = 0
for j = 0; j < 6; j++ {
instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
}
norm = instr >> bitRes
if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
addr = uint32((norm >> 13) & 0x0fffff)
addr |= (uint32(norm>>36) & 1) << 20
addr <<= 4
addr -= uint32(s.pos + i)
addr >>= 4
norm &= ^(uint64(0x8fffff) << 13)
norm |= uint64(addr&0x0fffff) << 13
norm |= uint64(addr&0x100000) << (36 - 20)
instr &= 1<<bitRes - 1
instr |= norm << bitRes
for j = 0; j < 6; j++ {
buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
}
}
}
}
return i
}
func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
var i int
var addr uint32
for i = 0; i+4 <= len(buf); i += 4 {
if buf[i+3] == 0xeb {
addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
uint32(buf[i+2])<<16
addr <<= 2
addr -= uint32(s.pos + i + 8)
addr >>= 2
buf[i] = byte(addr)
buf[i+1] = byte(addr >> 8)
buf[i+2] = byte(addr >> 16)
}
}
return i
}
func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
var i int
var addr uint32
for i = 0; i+4 <= len(buf); i += 2 {
if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
addr = uint32(buf[i+1]&0x07)<<19 |
uint32(buf[i])<<11 |
uint32(buf[i+3]&0x07)<<8 |
uint32(buf[i+2])
addr <<= 1
addr -= uint32(s.pos + i + 4)
addr >>= 1
buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
buf[i] = byte(addr >> 11)
buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
buf[i+2] = byte(addr)
i += 2
}
}
return i
}
func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
var i int
var instr uint32
for i = 0; i+4 <= len(buf); i += 4 {
instr = getBE32(buf[i:])
if instr>>22 == 0x100 || instr>>22 == 0x1ff {
instr <<= 2
instr -= uint32(s.pos + i)
instr >>= 2
instr = (0x40000000 - instr&0x400000) |
0x40000000 | (instr & 0x3FFFFF)
putBE32(instr, buf[i:])
}
}
return i
}
/*
* Apply the selected BCJ filter. Update *pos and s.pos to match the amount
* of data that got filtered.
*/
func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
var filtered int
buf = buf[*pos:]
switch s.typ {
case idBCJX86:
filtered = bcjX86Filter(s, buf)
case idBCJPowerPC:
filtered = bcjPowerPCFilter(s, buf)
case idBCJIA64:
filtered = bcjIA64Filter(s, buf)
case idBCJARM:
filtered = bcjARMFilter(s, buf)
case idBCJARMThumb:
filtered = bcjARMThumbFilter(s, buf)
case idBCJSPARC:
filtered = bcjSPARCFilter(s, buf)
default:
/* Never reached */
}
*pos += filtered
s.pos += filtered
}
/*
* Flush pending filtered data from temp to the output buffer.
* Move the remaining mixture of possibly filtered and unfiltered
* data to the beginning of temp.
*/
func bcjFlush(s *xzDecBCJ, b *xzBuf) {
var copySize int
copySize = len(b.out) - b.outPos
if copySize > s.temp.filtered {
copySize = s.temp.filtered
}
copy(b.out[b.outPos:], s.temp.buf[:copySize])
b.outPos += copySize
s.temp.filtered -= copySize
copy(s.temp.buf, s.temp.buf[copySize:])
s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
}
/*
* Decode raw stream which has a BCJ filter as the first filter.
*
* The BCJ filter functions are primitive in sense that they process the
* data in chunks of 1-16 bytes. To hide this issue, this function does
* some buffering.
*/
func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
var outStart int
/*
* Flush pending already filtered data to the output buffer. Return
* immediately if we couldn't flush everything, or if the next
* filter in the chain had already returned xzStreamEnd.
*/
if s.temp.filtered > 0 {
bcjFlush(s, b)
if s.temp.filtered > 0 {
return xzOK
}
if s.ret == xzStreamEnd {
return xzStreamEnd
}
}
/*
* If we have more output space than what is currently pending in
* temp, copy the unfiltered data from temp to the output buffer
* and try to fill the output buffer by decoding more data from the
* next filter in the chain. Apply the BCJ filter on the new data
* in the output buffer. If everything cannot be filtered, copy it
* to temp and rewind the output buffer position accordingly.
*
* This needs to be always run when len(temp.buf) == 0 to handle a special
* case where the output buffer is full and the next filter has no
* more output coming but hasn't returned xzStreamEnd yet.
*/
if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
outStart = b.outPos
copy(b.out[b.outPos:], s.temp.buf)
b.outPos += len(s.temp.buf)
s.ret = chain(b)
if s.ret != xzStreamEnd && s.ret != xzOK {
return s.ret
}
bcjApply(s, b.out[:b.outPos], &outStart)
/*
* As an exception, if the next filter returned xzStreamEnd,
* we can do that too, since the last few bytes that remain
* unfiltered are meant to remain unfiltered.
*/
if s.ret == xzStreamEnd {
return xzStreamEnd
}
s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
b.outPos -= len(s.temp.buf)
copy(s.temp.buf, b.out[b.outPos:])
/*
* If there wasn't enough input to the next filter to fill
* the output buffer with unfiltered data, there's no point
* to try decoding more data to temp.
*/
if b.outPos+len(s.temp.buf) < len(b.out) {
return xzOK
}
}
/*
* We have unfiltered data in temp. If the output buffer isn't full
* yet, try to fill the temp buffer by decoding more data from the
* next filter. Apply the BCJ filter on temp. Then we hopefully can
* fill the actual output buffer by copying filtered data from temp.
* A mix of filtered and unfiltered data may be left in temp; it will
* be taken care on the next call to this function.
*/
if b.outPos < len(b.out) {
/* Make b.out temporarily point to s.temp. */
s.out = b.out
s.outPos = b.outPos
b.out = s.temp.bufArray[:]
b.outPos = len(s.temp.buf)
s.ret = chain(b)
s.temp.buf = s.temp.bufArray[:b.outPos]
b.out = s.out
b.outPos = s.outPos
if s.ret != xzOK && s.ret != xzStreamEnd {
return s.ret
}
bcjApply(s, s.temp.buf, &s.temp.filtered)
/*
* If the next filter returned xzStreamEnd, we mark that
* everything is filtered, since the last unfiltered bytes
* of the stream are meant to be left as is.
*/
if s.ret == xzStreamEnd {
s.temp.filtered = len(s.temp.buf)
}
bcjFlush(s, b)
if s.temp.filtered > 0 {
return xzOK
}
}
return s.ret
}
/*
* Allocate memory for BCJ decoders. xzDecBCJReset must be used before
* calling xzDecBCJRun.
*/
func xzDecBCJCreate() *xzDecBCJ {
return new(xzDecBCJ)
}
/*
* Decode the Filter ID of a BCJ filter and check the start offset is
* valid. Returns xzOK if the given Filter ID and offset is
* supported. Otherwise xzOptionsError is returned.
*/
func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
switch id {
case idBCJX86:
case idBCJPowerPC:
case idBCJIA64:
case idBCJARM:
case idBCJARMThumb:
case idBCJSPARC:
default:
/* Unsupported Filter ID */
return xzOptionsError
}
// check offset is a multiple of alignment
switch id {
case idBCJPowerPC, idBCJARM, idBCJSPARC:
if offset%4 != 0 {
return xzOptionsError
}
case idBCJIA64:
if offset%16 != 0 {
return xzOptionsError
}
case idBCJARMThumb:
if offset%2 != 0 {
return xzOptionsError
}
}
s.typ = id
s.ret = xzOK
s.pos = offset
s.x86PrevMask = 0
s.temp.filtered = 0
s.temp.buf = nil
return xzOK
}
+55
View File
@@ -0,0 +1,55 @@
/*
* Delta decoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* Translation to Go: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
type xzDecDelta struct {
delta [256]byte
pos byte
distance int // in range [1, 256]
}
/*
* Decode raw stream which has a delta filter as the first filter.
*/
func xzDecDeltaRun(s *xzDecDelta, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
outStart := b.outPos
ret := chain(b)
for i := outStart; i < b.outPos; i++ {
tmp := b.out[i] + s.delta[byte(s.distance+int(s.pos))]
s.delta[s.pos] = tmp
b.out[i] = tmp
s.pos--
}
return ret
}
/*
* Allocate memory for a delta decoder. xzDecDeltaReset must be used
* before calling xzDecDeltaRun.
*/
func xzDecDeltaCreate() *xzDecDelta {
return new(xzDecDelta)
}
/*
* Returns xzOK if the given distance is valid. Otherwise
* xzOptionsError is returned.
*/
func xzDecDeltaReset(s *xzDecDelta, distance int) xzRet {
if distance < 1 || distance > 256 {
return xzOptionsError
}
s.delta = [256]byte{}
s.pos = 0
s.distance = distance
return xzOK
}
+1235
View File
File diff suppressed because it is too large Load Diff
+932
View File
@@ -0,0 +1,932 @@
/*
* .xz Stream decoder
*
* Author: Lasse Collin <lasse.collin@tukaani.org>
*
* Translation to Go: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
import (
"bytes"
"crypto/sha256"
"hash"
"hash/crc32"
"hash/crc64"
)
/* from linux/lib/xz/xz_stream.h **************************************/
/*
* See the .xz file format specification at
* http://tukaani.org/xz/xz-file-format.txt
* to understand the container format.
*/
const (
streamHeaderSize = 12
headerMagic = "\xfd7zXZ\x00"
footerMagic = "YZ"
)
/*
* Variable-length integer can hold a 63-bit unsigned integer or a special
* value indicating that the value is unknown.
*/
type vliType uint64
const (
vliUnknown vliType = ^vliType(0)
/* Maximum encoded size of a VLI */
vliBytesMax = 8 * 8 / 7 // (Sizeof(vliType) * 8 / 7)
)
/* from linux/lib/xz/xz_dec_stream.c **********************************/
/* Hash used to validate the Index field */
type xzDecHash struct {
unpadded vliType
uncompressed vliType
sha256 hash.Hash
}
// type of xzDec.sequence
type xzDecSeq int
const (
seqStreamHeader xzDecSeq = iota
seqBlockStart
seqBlockHeader
seqBlockUncompress
seqBlockPadding
seqBlockCheck
seqIndex
seqIndexPadding
seqIndexCRC32
seqStreamFooter
)
// type of xzDec.index.sequence
type xzDecIndexSeq int
const (
seqIndexCount xzDecIndexSeq = iota
seqIndexUnpadded
seqIndexUncompressed
)
/**
* xzDec - Opaque type to hold the XZ decoder state
*/
type xzDec struct {
/* Position in decMain */
sequence xzDecSeq
/* Position in variable-length integers and Check fields */
pos int
/* Variable-length integer decoded by decVLI */
vli vliType
/* Saved inPos and outPos */
inStart int
outStart int
/* CRC32 checksum hash used in Index */
crc32 hash.Hash
/* Hashes used in Blocks */
checkCRC32 hash.Hash
checkCRC64 hash.Hash
checkSHA256 hash.Hash
/* for checkTypes CRC32/CRC64/SHA256, check is one of the above 3 hashes */
check hash.Hash
/* Embedded stream header struct containing CheckType */
*Header
/*
* True if the next call to xzDecRun is allowed to return
* xzBufError.
*/
allowBufError bool
/* Information stored in Block Header */
blockHeader struct {
/*
* Value stored in the Compressed Size field, or
* vliUnknown if Compressed Size is not present.
*/
compressed vliType
/*
* Value stored in the Uncompressed Size field, or
* vliUnknown if Uncompressed Size is not present.
*/
uncompressed vliType
/* Size of the Block Header field */
size int
}
/* Information collected when decoding Blocks */
block struct {
/* Observed compressed size of the current Block */
compressed vliType
/* Observed uncompressed size of the current Block */
uncompressed vliType
/* Number of Blocks decoded so far */
count vliType
/*
* Hash calculated from the Block sizes. This is used to
* validate the Index field.
*/
hash xzDecHash
}
/* Variables needed when verifying the Index field */
index struct {
/* Position in decIndex */
sequence xzDecIndexSeq
/* Size of the Index in bytes */
size vliType
/* Number of Records (matches block.count in valid files) */
count vliType
/*
* Hash calculated from the Records (matches block.hash in
* valid files).
*/
hash xzDecHash
}
/*
* Temporary buffer needed to hold Stream Header, Block Header,
* and Stream Footer. The Block Header is the biggest (1 KiB)
* so we reserve space according to that. bufArray has to be aligned
* to a multiple of four bytes; the variables before it
* should guarantee this.
*/
temp struct {
pos int
buf []byte // slice buf will be backed by bufArray
bufArray [1024]byte
}
// chain is the function (or to be more precise, closure) which
// does the decompression and will call into the lzma2 and other
// filter code as needed. It is constructed by decBlockHeader
chain func(b *xzBuf) xzRet
// lzma2 holds the state of the last filter (which must be LZMA2)
lzma2 *xzDecLZMA2
// pointers to allocated BCJ/Delta filters
bcjs []*xzDecBCJ
deltas []*xzDecDelta
// number of currently in use BCJ/Delta filters from the above
bcjsUsed int
deltasUsed int
}
/* Sizes of the Check field with different Check IDs */
var checkSizes = [...]byte{
0,
4, 4, 4,
8, 8, 8,
16, 16, 16,
32, 32, 32,
64, 64, 64,
}
/*
* Fill s.temp by copying data starting from b.in[b.inPos]. Caller
* must have set s.temp.pos to indicate how much data we are supposed
* to copy into s.temp.buf. Return true once s.temp.pos has reached
* len(s.temp.buf).
*/
func fillTemp(s *xzDec, b *xzBuf) bool {
copySize := len(b.in) - b.inPos
tempRemaining := len(s.temp.buf) - s.temp.pos
if copySize > tempRemaining {
copySize = tempRemaining
}
copy(s.temp.buf[s.temp.pos:], b.in[b.inPos:])
b.inPos += copySize
s.temp.pos += copySize
if s.temp.pos == len(s.temp.buf) {
s.temp.pos = 0
return true
}
return false
}
/* Decode a variable-length integer (little-endian base-128 encoding) */
func decVLI(s *xzDec, in []byte, inPos *int) xzRet {
var byte byte
if s.pos == 0 {
s.vli = 0
}
for *inPos < len(in) {
byte = in[*inPos]
*inPos++
s.vli |= vliType(byte&0x7f) << uint(s.pos)
if byte&0x80 == 0 {
/* Don't allow non-minimal encodings. */
if byte == 0 && s.pos != 0 {
return xzDataError
}
s.pos = 0
return xzStreamEnd
}
s.pos += 7
if s.pos == 7*vliBytesMax {
return xzDataError
}
}
return xzOK
}
/*
* Decode the Compressed Data field from a Block. Update and validate
* the observed compressed and uncompressed sizes of the Block so that
* they don't exceed the values possibly stored in the Block Header
* (validation assumes that no integer overflow occurs, since vliType
* is uint64). Update s.check if presence of the CRC32/CRC64/SHA256
* field was indicated in Stream Header.
*
* Once the decoding is finished, validate that the observed sizes match
* the sizes possibly stored in the Block Header. Update the hash and
* Block count, which are later used to validate the Index field.
*/
func decBlock(s *xzDec, b *xzBuf) xzRet {
var ret xzRet
s.inStart = b.inPos
s.outStart = b.outPos
ret = s.chain(b)
s.block.compressed += vliType(b.inPos - s.inStart)
s.block.uncompressed += vliType(b.outPos - s.outStart)
/*
* There is no need to separately check for vliUnknown since
* the observed sizes are always smaller than vliUnknown.
*/
if s.block.compressed > s.blockHeader.compressed ||
s.block.uncompressed > s.blockHeader.uncompressed {
return xzDataError
}
switch s.CheckType {
case CheckCRC32, CheckCRC64, CheckSHA256:
_, _ = s.check.Write(b.out[s.outStart:b.outPos])
}
if ret == xzStreamEnd {
if s.blockHeader.compressed != vliUnknown &&
s.blockHeader.compressed != s.block.compressed {
return xzDataError
}
if s.blockHeader.uncompressed != vliUnknown &&
s.blockHeader.uncompressed != s.block.uncompressed {
return xzDataError
}
s.block.hash.unpadded +=
vliType(s.blockHeader.size) + s.block.compressed
s.block.hash.unpadded += vliType(checkSizes[s.CheckType])
s.block.hash.uncompressed += s.block.uncompressed
var buf [2 * 8]byte // 2*Sizeof(vliType)
putLE64(uint64(s.block.hash.unpadded), buf[:])
putLE64(uint64(s.block.hash.uncompressed), buf[8:])
_, _ = s.block.hash.sha256.Write(buf[:])
s.block.count++
}
return ret
}
/* Update the Index size and the CRC32 hash. */
func indexUpdate(s *xzDec, b *xzBuf) {
inUsed := b.inPos - s.inStart
s.index.size += vliType(inUsed)
_, _ = s.crc32.Write(b.in[s.inStart : s.inStart+inUsed])
}
/*
* Decode the Number of Records, Unpadded Size, and Uncompressed Size
* fields from the Index field. That is, Index Padding and CRC32 are not
* decoded by this function.
*
* This can return xzOK (more input needed), xzStreamEnd (everything
* successfully decoded), or xzDataError (input is corrupt).
*/
func decIndex(s *xzDec, b *xzBuf) xzRet {
var ret xzRet
for {
ret = decVLI(s, b.in, &b.inPos)
if ret != xzStreamEnd {
indexUpdate(s, b)
return ret
}
switch s.index.sequence {
case seqIndexCount:
s.index.count = s.vli
/*
* Validate that the Number of Records field
* indicates the same number of Records as
* there were Blocks in the Stream.
*/
if s.index.count != s.block.count {
return xzDataError
}
s.index.sequence = seqIndexUnpadded
case seqIndexUnpadded:
s.index.hash.unpadded += s.vli
s.index.sequence = seqIndexUncompressed
case seqIndexUncompressed:
s.index.hash.uncompressed += s.vli
var buf [2 * 8]byte // 2*Sizeof(vliType)
putLE64(uint64(s.index.hash.unpadded), buf[:])
putLE64(uint64(s.index.hash.uncompressed), buf[8:])
_, _ = s.index.hash.sha256.Write(buf[:])
s.index.count--
s.index.sequence = seqIndexUnpadded
}
if !(s.index.count > 0) {
break
}
}
return xzStreamEnd
}
/*
* Validate that the next 4 bytes match s.crc32.Sum(nil). s.pos must
* be zero when starting to validate the first byte.
*/
func crcValidate(s *xzDec, b *xzBuf) xzRet {
sum := s.crc32.Sum(nil)
// CRC32 - reverse slice
sum[0], sum[1], sum[2], sum[3] = sum[3], sum[2], sum[1], sum[0]
for {
if b.inPos == len(b.in) {
return xzOK
}
if sum[s.pos] != b.in[b.inPos] {
return xzDataError
}
b.inPos++
s.pos++
if !(s.pos < 4) {
break
}
}
s.crc32.Reset()
s.pos = 0
return xzStreamEnd
}
/*
* Validate that the next 4/8/32 bytes match s.check.Sum(nil). s.pos
* must be zero when starting to validate the first byte.
*/
func checkValidate(s *xzDec, b *xzBuf) xzRet {
sum := s.check.Sum(nil)
if s.CheckType == CheckCRC32 || s.CheckType == CheckCRC64 {
// CRC32/64 - reverse slice
for i, j := 0, len(sum)-1; i < j; i, j = i+1, j-1 {
sum[i], sum[j] = sum[j], sum[i]
}
}
for {
if b.inPos == len(b.in) {
return xzOK
}
if sum[s.pos] != b.in[b.inPos] {
return xzDataError
}
b.inPos++
s.pos++
if !(s.pos < len(sum)) {
break
}
}
s.check.Reset()
s.pos = 0
return xzStreamEnd
}
/*
* Skip over the Check field when the Check ID is not supported.
* Returns true once the whole Check field has been skipped over.
*/
func checkSkip(s *xzDec, b *xzBuf) bool {
for s.pos < int(checkSizes[s.CheckType]) {
if b.inPos == len(b.in) {
return false
}
b.inPos++
s.pos++
}
s.pos = 0
return true
}
/* polynomial table used in decStreamHeader below */
var xzCRC64Table = crc64.MakeTable(crc64.ECMA)
/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
func decStreamHeader(s *xzDec) xzRet {
if string(s.temp.buf[:len(headerMagic)]) != headerMagic {
return xzFormatError
}
if crc32.ChecksumIEEE(s.temp.buf[len(headerMagic):len(headerMagic)+2]) !=
getLE32(s.temp.buf[len(headerMagic)+2:]) {
return xzDataError
}
if s.temp.buf[len(headerMagic)] != 0 {
return xzOptionsError
}
/*
* Of integrity checks, we support none (Check ID = 0),
* CRC32 (Check ID = 1), CRC64 (Check ID = 4) and SHA256 (Check ID = 10)
* However, we will accept other check types too, but then the check
* won't be verified and a warning (xzUnsupportedCheck) will be given.
*/
s.CheckType = CheckID(s.temp.buf[len(headerMagic)+1])
if s.CheckType > checkMax {
return xzOptionsError
}
switch s.CheckType {
case CheckNone:
// CheckNone: no action needed
case CheckCRC32:
if s.checkCRC32 == nil {
s.checkCRC32 = crc32.NewIEEE()
} else {
s.checkCRC32.Reset()
}
s.check = s.checkCRC32
case CheckCRC64:
if s.checkCRC64 == nil {
s.checkCRC64 = crc64.New(xzCRC64Table)
} else {
s.checkCRC64.Reset()
}
s.check = s.checkCRC64
case CheckSHA256:
if s.checkSHA256 == nil {
s.checkSHA256 = sha256.New()
} else {
s.checkSHA256.Reset()
}
s.check = s.checkSHA256
default:
return xzUnsupportedCheck
}
return xzOK
}
/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
func decStreamFooter(s *xzDec) xzRet {
if string(s.temp.buf[10:10+len(footerMagic)]) != footerMagic {
return xzDataError
}
if crc32.ChecksumIEEE(s.temp.buf[4:10]) != getLE32(s.temp.buf) {
return xzDataError
}
/*
* Validate Backward Size. Note that we never added the size of the
* Index CRC32 field to s->index.size, thus we use s->index.size / 4
* instead of s->index.size / 4 - 1.
*/
if s.index.size>>2 != vliType(getLE32(s.temp.buf[4:])) {
return xzDataError
}
if s.temp.buf[8] != 0 || CheckID(s.temp.buf[9]) != s.CheckType {
return xzDataError
}
/*
* Use xzStreamEnd instead of xzOK to be more convenient
* for the caller.
*/
return xzStreamEnd
}
/* Decode the Block Header and initialize the filter chain. */
func decBlockHeader(s *xzDec) xzRet {
var ret xzRet
/*
* Validate the CRC32. We know that the temp buffer is at least
* eight bytes so this is safe.
*/
crc := getLE32(s.temp.buf[len(s.temp.buf)-4:])
s.temp.buf = s.temp.buf[:len(s.temp.buf)-4]
if crc32.ChecksumIEEE(s.temp.buf) != crc {
return xzDataError
}
s.temp.pos = 2
/*
* Catch unsupported Block Flags.
*/
if s.temp.buf[1]&0x3C != 0 {
return xzOptionsError
}
/* Compressed Size */
if s.temp.buf[1]&0x40 != 0 {
if decVLI(s, s.temp.buf, &s.temp.pos) != xzStreamEnd {
return xzDataError
}
if s.vli >= 1<<63-8 {
// the whole block must stay smaller than 2^63 bytes
// the block header cannot be smaller than 8 bytes
return xzDataError
}
if s.vli == 0 {
// compressed size must be non-zero
return xzDataError
}
s.blockHeader.compressed = s.vli
} else {
s.blockHeader.compressed = vliUnknown
}
/* Uncompressed Size */
if s.temp.buf[1]&0x80 != 0 {
if decVLI(s, s.temp.buf, &s.temp.pos) != xzStreamEnd {
return xzDataError
}
s.blockHeader.uncompressed = s.vli
} else {
s.blockHeader.uncompressed = vliUnknown
}
// get total number of filters (1-4)
filterTotal := int(s.temp.buf[1]&0x03) + 1
// slice to hold decoded filters
filterList := make([]struct {
id xzFilterID
props uint32
}, filterTotal)
// decode the non-last filters which cannot be LZMA2
for i := 0; i < filterTotal-1; i++ {
/* Valid Filter Flags always take at least two bytes. */
if len(s.temp.buf)-s.temp.pos < 2 {
return xzDataError
}
s.temp.pos += 2
switch id := xzFilterID(s.temp.buf[s.temp.pos-2]); id {
case idDelta:
// delta filter
if s.temp.buf[s.temp.pos-1] != 0x01 {
return xzOptionsError
}
/* Filter Properties contains distance - 1 */
if len(s.temp.buf)-s.temp.pos < 1 {
return xzDataError
}
props := uint32(s.temp.buf[s.temp.pos])
s.temp.pos++
filterList[i] = struct {
id xzFilterID
props uint32
}{id: id, props: props}
case idBCJX86, idBCJPowerPC, idBCJIA64,
idBCJARM, idBCJARMThumb, idBCJSPARC:
// bcj filter
var props uint32
switch s.temp.buf[s.temp.pos-1] {
case 0x00:
props = 0
case 0x04:
if len(s.temp.buf)-s.temp.pos < 4 {
return xzDataError
}
props = getLE32(s.temp.buf[s.temp.pos:])
s.temp.pos += 4
default:
return xzOptionsError
}
filterList[i] = struct {
id xzFilterID
props uint32
}{id: id, props: props}
default:
return xzOptionsError
}
}
/*
* decode the last filter which must be LZMA2
*/
if len(s.temp.buf)-s.temp.pos < 2 {
return xzDataError
}
/* Filter ID = LZMA2 */
if xzFilterID(s.temp.buf[s.temp.pos]) != idLZMA2 {
return xzOptionsError
}
s.temp.pos++
/* Size of Properties = 1-byte Filter Properties */
if s.temp.buf[s.temp.pos] != 0x01 {
return xzOptionsError
}
s.temp.pos++
/* Filter Properties contains LZMA2 dictionary size. */
if len(s.temp.buf)-s.temp.pos < 1 {
return xzDataError
}
props := uint32(s.temp.buf[s.temp.pos])
s.temp.pos++
filterList[filterTotal-1] = struct {
id xzFilterID
props uint32
}{id: idLZMA2, props: props}
/*
* Process the filter list and create s.chain, going from last
* filter (LZMA2) to first filter
*
* First, LZMA2.
*/
ret = xzDecLZMA2Reset(s.lzma2, byte(filterList[filterTotal-1].props))
if ret != xzOK {
return ret
}
s.chain = func(b *xzBuf) xzRet {
return xzDecLZMA2Run(s.lzma2, b)
}
/*
* Now the non-last filters
*/
for i := filterTotal - 2; i >= 0; i-- {
switch id := filterList[i].id; id {
case idDelta:
// delta filter
var delta *xzDecDelta
if s.deltasUsed < len(s.deltas) {
delta = s.deltas[s.deltasUsed]
} else {
delta = xzDecDeltaCreate()
s.deltas = append(s.deltas, delta)
}
s.deltasUsed++
ret = xzDecDeltaReset(delta, int(filterList[i].props)+1)
if ret != xzOK {
return ret
}
chain := s.chain
s.chain = func(b *xzBuf) xzRet {
return xzDecDeltaRun(delta, b, chain)
}
case idBCJX86, idBCJPowerPC, idBCJIA64,
idBCJARM, idBCJARMThumb, idBCJSPARC:
// bcj filter
var bcj *xzDecBCJ
if s.bcjsUsed < len(s.bcjs) {
bcj = s.bcjs[s.bcjsUsed]
} else {
bcj = xzDecBCJCreate()
s.bcjs = append(s.bcjs, bcj)
}
s.bcjsUsed++
ret = xzDecBCJReset(bcj, id, int(filterList[i].props))
if ret != xzOK {
return ret
}
chain := s.chain
s.chain = func(b *xzBuf) xzRet {
return xzDecBCJRun(bcj, b, chain)
}
}
}
/* The rest must be Header Padding. */
for s.temp.pos < len(s.temp.buf) {
if s.temp.buf[s.temp.pos] != 0x00 {
return xzOptionsError
}
s.temp.pos++
}
s.temp.pos = 0
s.block.compressed = 0
s.block.uncompressed = 0
return xzOK
}
func decMain(s *xzDec, b *xzBuf) xzRet {
var ret xzRet
/*
* Store the start position for the case when we are in the middle
* of the Index field.
*/
s.inStart = b.inPos
for {
switch s.sequence {
case seqStreamHeader:
/*
* Stream Header is copied to s.temp, and then
* decoded from there. This way if the caller
* gives us only little input at a time, we can
* still keep the Stream Header decoding code
* simple. Similar approach is used in many places
* in this file.
*/
if !fillTemp(s, b) {
return xzOK
}
/*
* If decStreamHeader returns
* xzUnsupportedCheck, it is still possible
* to continue decoding. Thus, update s.sequence
* before calling decStreamHeader.
*/
s.sequence = seqBlockStart
ret = decStreamHeader(s)
if ret != xzOK {
return ret
}
fallthrough
case seqBlockStart:
/* We need one byte of input to continue. */
if b.inPos == len(b.in) {
return xzOK
}
/* See if this is the beginning of the Index field. */
if b.in[b.inPos] == 0 {
s.inStart = b.inPos
b.inPos++
s.sequence = seqIndex
break
}
/*
* Calculate the size of the Block Header and
* prepare to decode it.
*/
s.blockHeader.size = (int(b.in[b.inPos]) + 1) * 4
s.temp.buf = s.temp.bufArray[:s.blockHeader.size]
s.temp.pos = 0
s.sequence = seqBlockHeader
fallthrough
case seqBlockHeader:
if !fillTemp(s, b) {
return xzOK
}
ret = decBlockHeader(s)
if ret != xzOK {
return ret
}
s.sequence = seqBlockUncompress
fallthrough
case seqBlockUncompress:
ret = decBlock(s, b)
if ret != xzStreamEnd {
return ret
}
s.sequence = seqBlockPadding
fallthrough
case seqBlockPadding:
/*
* Size of Compressed Data + Block Padding
* must be a multiple of four. We don't need
* s->block.compressed for anything else
* anymore, so we use it here to test the size
* of the Block Padding field.
*/
for s.block.compressed&3 != 0 {
if b.inPos == len(b.in) {
return xzOK
}
if b.in[b.inPos] != 0 {
return xzDataError
}
b.inPos++
s.block.compressed++
}
s.sequence = seqBlockCheck
fallthrough
case seqBlockCheck:
switch s.CheckType {
case CheckCRC32, CheckCRC64, CheckSHA256:
ret = checkValidate(s, b)
if ret != xzStreamEnd {
return ret
}
default:
if !checkSkip(s, b) {
return xzOK
}
}
s.sequence = seqBlockStart
case seqIndex:
ret = decIndex(s, b)
if ret != xzStreamEnd {
return ret
}
s.sequence = seqIndexPadding
fallthrough
case seqIndexPadding:
for (s.index.size+vliType(b.inPos-s.inStart))&3 != 0 {
if b.inPos == len(b.in) {
indexUpdate(s, b)
return xzOK
}
if b.in[b.inPos] != 0 {
return xzDataError
}
b.inPos++
}
/* Finish the CRC32 value and Index size. */
indexUpdate(s, b)
/* Compare the hashes to validate the Index field. */
if !bytes.Equal(
s.block.hash.sha256.Sum(nil), s.index.hash.sha256.Sum(nil)) {
return xzDataError
}
s.sequence = seqIndexCRC32
fallthrough
case seqIndexCRC32:
ret = crcValidate(s, b)
if ret != xzStreamEnd {
return ret
}
s.temp.buf = s.temp.bufArray[:streamHeaderSize]
s.sequence = seqStreamFooter
fallthrough
case seqStreamFooter:
if !fillTemp(s, b) {
return xzOK
}
return decStreamFooter(s)
}
}
/* Never reached */
}
/**
* xzDecRun - Run the XZ decoder
* @s: Decoder state allocated using xzDecInit
* @b: Input and output buffers
*
* See xzRet for details of return values.
*
* xzDecRun is a wrapper for decMain to handle some special cases.
*
* We must return xzBufError when it seems clear that we are not
* going to make any progress anymore. This is to prevent the caller
* from calling us infinitely when the input file is truncated or
* otherwise corrupt. Since zlib-style API allows that the caller
* fills the input buffer only when the decoder doesn't produce any
* new output, we have to be careful to avoid returning xzBufError
* too easily: xzBufError is returned only after the second
* consecutive call to xzDecRun that makes no progress.
*/
func xzDecRun(s *xzDec, b *xzBuf) xzRet {
inStart := b.inPos
outStart := b.outPos
ret := decMain(s, b)
if ret == xzOK && inStart == b.inPos && outStart == b.outPos {
if s.allowBufError {
ret = xzBufError
}
s.allowBufError = true
} else {
s.allowBufError = false
}
return ret
}
/**
* xzDecInit - Allocate and initialize a XZ decoder state
* @dictMax: Maximum size of the LZMA2 dictionary (history buffer) for
* decoding. LZMA2 dictionary is always 2^n bytes
* or 2^n + 2^(n-1) bytes (the latter sizes are less common
* in practice), so other values for dictMax don't make sense.
*
* dictMax specifies the maximum allowed dictionary size that xzDecRun
* may allocate once it has parsed the dictionary size from the stream
* headers. This way excessive allocations can be avoided while still
* limiting the maximum memory usage to a sane value to prevent running the
* system out of memory when decompressing streams from untrusted sources.
*
* xzDecInit returns a pointer to an xzDec, which is ready to be used with
* xzDecRun.
*/
func xzDecInit(dictMax uint32, header *Header) *xzDec {
s := new(xzDec)
s.crc32 = crc32.NewIEEE()
s.Header = header
s.block.hash.sha256 = sha256.New()
s.index.hash.sha256 = sha256.New()
s.lzma2 = xzDecLZMA2Create(dictMax)
xzDecReset(s)
return s
}
/**
* xzDecReset - Reset an already allocated decoder state
* @s: Decoder state allocated using xzDecInit
*
* This function can be used to reset the decoder state without
* reallocating memory with xzDecInit.
*/
func xzDecReset(s *xzDec) {
s.sequence = seqStreamHeader
s.allowBufError = false
s.pos = 0
s.crc32.Reset()
s.check = nil
s.CheckType = checkUnset
s.block.compressed = 0
s.block.uncompressed = 0
s.block.count = 0
s.block.hash.unpadded = 0
s.block.hash.uncompressed = 0
s.block.hash.sha256.Reset()
s.index.sequence = seqIndexCount
s.index.size = 0
s.index.count = 0
s.index.hash.unpadded = 0
s.index.hash.uncompressed = 0
s.index.hash.sha256.Reset()
s.temp.pos = 0
s.temp.buf = s.temp.bufArray[:streamHeaderSize]
s.chain = nil
s.bcjsUsed = 0
s.deltasUsed = 0
}
+52
View File
@@ -0,0 +1,52 @@
/*
* XZ decompressor utility functions
*
* Author: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
func getLE32(buf []byte) uint32 {
return uint32(buf[0]) |
uint32(buf[1])<<8 |
uint32(buf[2])<<16 |
uint32(buf[3])<<24
}
func getBE32(buf []byte) uint32 {
return uint32(buf[0])<<24 |
uint32(buf[1])<<16 |
uint32(buf[2])<<8 |
uint32(buf[3])
}
func putLE32(val uint32, buf []byte) {
buf[0] = byte(val)
buf[1] = byte(val >> 8)
buf[2] = byte(val >> 16)
buf[3] = byte(val >> 24)
return
}
func putBE32(val uint32, buf []byte) {
buf[0] = byte(val >> 24)
buf[1] = byte(val >> 16)
buf[2] = byte(val >> 8)
buf[3] = byte(val)
return
}
func putLE64(val uint64, buf []byte) {
buf[0] = byte(val)
buf[1] = byte(val >> 8)
buf[2] = byte(val >> 16)
buf[3] = byte(val >> 24)
buf[4] = byte(val >> 32)
buf[5] = byte(val >> 40)
buf[6] = byte(val >> 48)
buf[7] = byte(val >> 56)
return
}
+124
View File
@@ -0,0 +1,124 @@
/*
* XZ decompressor
*
* Authors: Lasse Collin <lasse.collin@tukaani.org>
* Igor Pavlov <http://7-zip.org/>
*
* Translation to Go: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
/* from linux/include/linux/xz.h **************************************/
/**
* xzRet - Return codes
* @xzOK: Everything is OK so far. More input or more
* output space is required to continue.
* @xzStreamEnd: Operation finished successfully.
* @xzUnSupportedCheck: Integrity check type is not supported. Decoding
* is still possible by simply calling xzDecRun
* again.
* @xzMemlimitError: A bigger LZMA2 dictionary would be needed than
* allowed by the dictMax argument given to
* xzDecInit.
* @xzFormatError: File format was not recognized (wrong magic
* bytes).
* @xzOptionsError: This implementation doesn't support the requested
* compression options. In the decoder this means
* that the header CRC32 matches, but the header
* itself specifies something that we don't support.
* @xzDataError: Compressed data is corrupt.
* @xzBufError: Cannot make any progress.
*
* xzBufError is returned when two consecutive calls to XZ code cannot
* consume any input and cannot produce any new output. This happens
* when there is no new input available, or the output buffer is full
* while at least one output byte is still pending. Assuming your code
* is not buggy, you can get this error only when decoding a
* compressed stream that is truncated or otherwise corrupt.
*/
type xzRet int
const (
xzOK xzRet = iota
xzStreamEnd
xzUnsupportedCheck
xzMemlimitError
xzFormatError
xzOptionsError
xzDataError
xzBufError
)
/**
* xzBuf - Passing input and output buffers to XZ code
* @in: Input buffer.
* @inPos: Current position in the input buffer. This must not exceed
* input buffer size.
* @out: Output buffer.
* @outPos: Current position in the output buffer. This must not exceed
* output buffer size.
*
* Only the contents of the output buffer from out[outPos] onward, and
* the variables inPos and outPos are modified by the XZ code.
*/
type xzBuf struct {
in []byte
inPos int
out []byte
outPos int
}
/* All XZ filter IDs */
type xzFilterID int64
const (
idDelta xzFilterID = 0x03
idBCJX86 xzFilterID = 0x04
idBCJPowerPC xzFilterID = 0x05
idBCJIA64 xzFilterID = 0x06
idBCJARM xzFilterID = 0x07
idBCJARMThumb xzFilterID = 0x08
idBCJSPARC xzFilterID = 0x09
idLZMA2 xzFilterID = 0x21
)
// CheckID is the type of the data integrity check in an XZ stream
// calculated from the uncompressed data.
type CheckID int
func (id CheckID) String() string {
switch id {
case CheckNone:
return "None"
case CheckCRC32:
return "CRC32"
case CheckCRC64:
return "CRC64"
case CheckSHA256:
return "SHA256"
default:
return "Unknown"
}
}
const (
CheckNone CheckID = 0x00
CheckCRC32 CheckID = 0x01
CheckCRC64 CheckID = 0x04
CheckSHA256 CheckID = 0x0A
checkMax CheckID = 0x0F
checkUnset CheckID = -1
)
// An XZ stream contains a stream header which holds information about
// the stream. That information is exposed as fields of the
// Reader. Currently it contains only the stream's data integrity
// check type.
type Header struct {
CheckType CheckID // type of the stream's data integrity check
}
+35
View File
@@ -0,0 +1,35 @@
// Package xz implements XZ decompression natively in Go.
//
// Usage
//
// For ease of use, this package is designed to have a similar API to
// compress/gzip. See the examples for further details.
//
// Implementation
//
// This package is a translation from C to Go of XZ Embedded
// (http://tukaani.org/xz/embedded.html) with enhancements made so as
// to implement all mandatory and optional parts of the XZ file format
// specification v1.0.4. It supports all filters and block check
// types, supports multiple streams, and performs index verification
// using SHA-256 as recommended by the specification.
//
// Speed
//
// On the author's Intel Ivybridge i5, decompression speed is about
// half that of the standard XZ Utils (tested with a recent linux
// kernel tarball).
//
// Thanks
//
// Thanks are due to Lasse Collin and Igor Pavlov, the authors of XZ
// Embedded, on whose code package xz is based. It would not exist
// without their decision to allow others to modify and reuse their
// code.
//
// Bug reports
//
// For bug reports relating to this package please contact the author
// through https://github.com/xi2/xz/issues, and not the authors of XZ
// Embedded.
package xz
+256
View File
@@ -0,0 +1,256 @@
/*
* Package xz Go Reader API
*
* Author: Michael Cross <https://github.com/xi2>
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package xz
import (
"errors"
"io"
)
// Package specific errors.
var (
ErrUnsupportedCheck = errors.New("xz: integrity check type not supported")
ErrMemlimit = errors.New("xz: LZMA2 dictionary size exceeds max")
ErrFormat = errors.New("xz: file format not recognized")
ErrOptions = errors.New("xz: compression options not supported")
ErrData = errors.New("xz: data is corrupt")
ErrBuf = errors.New("xz: data is truncated or corrupt")
)
// DefaultDictMax is the default maximum dictionary size in bytes used
// by the decoder. This value is sufficient to decompress files
// created with XZ Utils "xz -9".
const DefaultDictMax = 1 << 26 // 64 MiB
// inBufSize is the input buffer size used by the decoder.
const inBufSize = 1 << 13 // 8 KiB
// A Reader is an io.Reader that can be used to retrieve uncompressed
// data from an XZ file.
//
// In general, an XZ file can be a concatenation of other XZ
// files. Reads from the Reader return the concatenation of the
// uncompressed data of each.
type Reader struct {
Header
r io.Reader // the wrapped io.Reader
multistream bool // true if reader is in multistream mode
rEOF bool // true after io.EOF received on r
dEOF bool // true after decoder has completed
padding int // bytes of stream padding read (or -1)
in [inBufSize]byte // backing array for buf.in
buf *xzBuf // decoder input/output buffers
dec *xzDec // decoder state
err error // the result of the last decoder call
}
// NewReader creates a new Reader reading from r. The decompressor
// will use an LZMA2 dictionary size up to dictMax bytes in
// size. Passing a value of zero sets dictMax to DefaultDictMax. If
// an individual XZ stream requires a dictionary size greater than
// dictMax in order to decompress, Read will return ErrMemlimit.
//
// If NewReader is passed a value of nil for r then a Reader is
// created such that all read attempts will return io.EOF. This is
// useful if you just want to allocate memory for a Reader which will
// later be initialized with Reset.
//
// Due to internal buffering, the Reader may read more data than
// necessary from r.
func NewReader(r io.Reader, dictMax uint32) (*Reader, error) {
if dictMax == 0 {
dictMax = DefaultDictMax
}
z := &Reader{
r: r,
multistream: true,
padding: -1,
buf: &xzBuf{},
}
if r == nil {
z.rEOF, z.dEOF = true, true
}
z.dec = xzDecInit(dictMax, &z.Header)
var err error
if r != nil {
_, err = z.Read(nil) // read stream header
}
return z, err
}
// decode is a wrapper around xzDecRun that additionally handles
// stream padding. It treats the padding as a kind of stream that
// decodes to nothing.
//
// When decoding padding, z.padding >= 0
// When decoding a real stream, z.padding == -1
func (z *Reader) decode() (ret xzRet) {
if z.padding >= 0 {
// read all padding in input buffer
for z.buf.inPos < len(z.buf.in) &&
z.buf.in[z.buf.inPos] == 0 {
z.buf.inPos++
z.padding++
}
switch {
case z.buf.inPos == len(z.buf.in) && z.rEOF:
// case: out of padding. no more input data available
if z.padding%4 != 0 {
ret = xzDataError
} else {
ret = xzStreamEnd
}
case z.buf.inPos == len(z.buf.in):
// case: read more padding next loop iteration
ret = xzOK
default:
// case: out of padding. more input data available
if z.padding%4 != 0 {
ret = xzDataError
} else {
xzDecReset(z.dec)
ret = xzStreamEnd
}
}
} else {
ret = xzDecRun(z.dec, z.buf)
}
return
}
func (z *Reader) Read(p []byte) (n int, err error) {
// restore err
err = z.err
// set decoder output buffer to p
z.buf.out = p
z.buf.outPos = 0
for {
// update n
n = z.buf.outPos
// if last call to decoder ended with an error, return that error
if err != nil {
break
}
// if decoder has finished, return with err == io.EOF
if z.dEOF {
err = io.EOF
break
}
// if p full, return with err == nil, unless we have not yet
// read the stream header with Read(nil)
if n == len(p) && z.CheckType != checkUnset {
break
}
// if needed, read more data from z.r
if z.buf.inPos == len(z.buf.in) && !z.rEOF {
rn, e := z.r.Read(z.in[:])
if e != nil && e != io.EOF {
// read error
err = e
break
}
if e == io.EOF {
z.rEOF = true
}
// set new input buffer in z.buf
z.buf.in = z.in[:rn]
z.buf.inPos = 0
}
// decode more data
ret := z.decode()
switch ret {
case xzOK:
// no action needed
case xzStreamEnd:
if z.padding >= 0 {
z.padding = -1
if !z.multistream || z.rEOF {
z.dEOF = true
}
} else {
z.padding = 0
}
case xzUnsupportedCheck:
err = ErrUnsupportedCheck
case xzMemlimitError:
err = ErrMemlimit
case xzFormatError:
err = ErrFormat
case xzOptionsError:
err = ErrOptions
case xzDataError:
err = ErrData
case xzBufError:
err = ErrBuf
}
// save err
z.err = err
}
return
}
// Multistream controls whether the reader is operating in multistream
// mode.
//
// If enabled (the default), the Reader expects the input to be a
// sequence of XZ streams, possibly interspersed with stream padding,
// which it reads one after another. The effect is that the
// concatenation of a sequence of XZ streams or XZ files is
// treated as equivalent to the compressed result of the concatenation
// of the sequence. This is standard behaviour for XZ readers.
//
// Calling Multistream(false) disables this behaviour; disabling the
// behaviour can be useful when reading file formats that distinguish
// individual XZ streams. In this mode, when the Reader reaches the
// end of the stream, Read returns io.EOF. To start the next stream,
// call z.Reset(nil) followed by z.Multistream(false). If there is no
// next stream, z.Reset(nil) will return io.EOF.
func (z *Reader) Multistream(ok bool) {
z.multistream = ok
}
// Reset, for non-nil values of io.Reader r, discards the Reader z's
// state and makes it equivalent to the result of its original state
// from NewReader, but reading from r instead. This permits reusing a
// Reader rather than allocating a new one.
//
// If you wish to leave r unchanged use z.Reset(nil). This keeps r
// unchanged and ensures internal buffering is preserved. If the
// Reader was at the end of a stream it is then ready to read any
// follow on streams. If there are no follow on streams z.Reset(nil)
// returns io.EOF. If the Reader was not at the end of a stream then
// z.Reset(nil) does nothing.
func (z *Reader) Reset(r io.Reader) error {
switch {
case r == nil:
z.multistream = true
if !z.dEOF {
return nil
}
if z.rEOF {
return io.EOF
}
z.dEOF = false
_, err := z.Read(nil) // read stream header
return err
default:
z.r = r
z.multistream = true
z.rEOF = false
z.dEOF = false
z.padding = -1
z.buf.in = nil
z.buf.inPos = 0
xzDecReset(z.dec)
z.err = nil
_, err := z.Read(nil) // read stream header
return err
}
}