build: implement zip64 stripping (#22998)
When zip contains files larger than 4GB 4bytes length headers are not sufficient anymore. Zip64 defines an extra header 0x0001 which may contain 8byte lengthed (16 exabytes) lengths [uncompressed and compressed]. Read this value when performing extra data cleaning and override the bogus value if the header is available. Read https://blog.yaakov.online/zip64-go-big-or-go-home/ for more information on Zip64 extra header. This is the first known implementation of zip64 stripping.
This commit is contained in:
parent
f4cf23f6ac
commit
ff21444429
1 changed files with 20 additions and 7 deletions
|
@ -182,7 +182,7 @@ def zero_zip_date_time(fname):
|
||||||
|
|
||||||
|
|
||||||
def _zero_zip_date_time(zip_):
|
def _zero_zip_date_time(zip_):
|
||||||
def purify_extra_data(mm, offset, length):
|
def purify_extra_data(mm, offset, length, compressed_size=0):
|
||||||
extra_header_struct = Struct("<HH")
|
extra_header_struct = Struct("<HH")
|
||||||
# 0. id
|
# 0. id
|
||||||
# 1. length
|
# 1. length
|
||||||
|
@ -194,6 +194,16 @@ def _zero_zip_date_time(zip_):
|
||||||
UNIX_EXTRA_DATA = 0x7875
|
UNIX_EXTRA_DATA = 0x7875
|
||||||
# Unix extra data; UID / GID stuff, see
|
# Unix extra data; UID / GID stuff, see
|
||||||
# ftp://ftp.info-zip.org/pub/infozip/src/zip30.zip ./proginfo/extrafld.txt
|
# ftp://ftp.info-zip.org/pub/infozip/src/zip30.zip ./proginfo/extrafld.txt
|
||||||
|
ZIP64_EXTRA_HEADER = 0x0001
|
||||||
|
zip64_extra_struct = Struct("<HHQQ")
|
||||||
|
# ZIP64.
|
||||||
|
# When a ZIP64 extra field is present his 8byte length
|
||||||
|
# will override the 4byte length defined in canonical zips.
|
||||||
|
# This is in the form:
|
||||||
|
# - 0x0001 (header_id)
|
||||||
|
# - 0x0010 [16] (header_length)
|
||||||
|
# - ... (8byte uncompressed_length)
|
||||||
|
# - ... (8byte compressed_length)
|
||||||
mlen = offset + length
|
mlen = offset + length
|
||||||
|
|
||||||
while offset < mlen:
|
while offset < mlen:
|
||||||
|
@ -205,15 +215,17 @@ def _zero_zip_date_time(zip_):
|
||||||
|
|
||||||
if header_id in (EXTENDED_TIME_DATA, UNIX_EXTRA_DATA):
|
if header_id in (EXTENDED_TIME_DATA, UNIX_EXTRA_DATA):
|
||||||
values[0] = STRIPZIP_OPTION_HEADER
|
values[0] = STRIPZIP_OPTION_HEADER
|
||||||
for i in xrange(2, len(values)):
|
for i in range(2, len(values)):
|
||||||
values[i] = 0xff
|
values[i] = 0xff
|
||||||
extra_struct.pack_into(mm, offset, *values)
|
extra_struct.pack_into(mm, offset, *values)
|
||||||
elif header_id != STRIPZIP_OPTION_HEADER:
|
if header_id == ZIP64_EXTRA_HEADER:
|
||||||
return False
|
assert header_length == 16
|
||||||
|
values = list(zip64_extra_struct.unpack_from(mm, offset))
|
||||||
|
header_id, header_length, uncompressed_size, compressed_size = values
|
||||||
|
|
||||||
offset += extra_header_struct.size + header_length
|
offset += extra_header_struct.size + header_length
|
||||||
|
|
||||||
return True
|
return compressed_size
|
||||||
|
|
||||||
FILE_HEADER_SIGNATURE = 0x04034b50
|
FILE_HEADER_SIGNATURE = 0x04034b50
|
||||||
CENDIR_HEADER_SIGNATURE = 0x02014b50
|
CENDIR_HEADER_SIGNATURE = 0x02014b50
|
||||||
|
@ -263,9 +275,10 @@ def _zero_zip_date_time(zip_):
|
||||||
# reset last_mod_date
|
# reset last_mod_date
|
||||||
values[5] = 0x21
|
values[5] = 0x21
|
||||||
local_file_header_struct.pack_into(mm, offset, *values)
|
local_file_header_struct.pack_into(mm, offset, *values)
|
||||||
offset += local_file_header_struct.size + compressed_size + name_length + extra_field_length
|
offset += local_file_header_struct.size + name_length
|
||||||
if extra_field_length != 0:
|
if extra_field_length != 0:
|
||||||
purify_extra_data(mm, offset - extra_field_length - compressed_size, extra_field_length)
|
compressed_size = purify_extra_data(mm, offset, extra_field_length, compressed_size)
|
||||||
|
offset += compressed_size + extra_field_length
|
||||||
|
|
||||||
while offset < archive_size:
|
while offset < archive_size:
|
||||||
if signature_struct.unpack_from(mm, offset) != (CENDIR_HEADER_SIGNATURE,):
|
if signature_struct.unpack_from(mm, offset) != (CENDIR_HEADER_SIGNATURE,):
|
||||||
|
|
Loading…
Reference in a new issue