build: implement zip64 stripping (#22998)

When zip contains files larger than 4GB 4bytes length headers are
not sufficient anymore. Zip64 defines an extra header 0x0001 which
may contain 8byte lengthed (16 exabytes) lengths [uncompressed and compressed].

Read this value when performing extra data cleaning and override the
bogus value if the header is available.

Read https://blog.yaakov.online/zip64-go-big-or-go-home/ for more
information on Zip64 extra header.

This is the first known implementation of zip64 stripping.
This commit is contained in:
Andrea Brancaleoni 2020-04-09 19:39:46 +02:00 committed by GitHub
parent f4cf23f6ac
commit ff21444429
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -182,7 +182,7 @@ def zero_zip_date_time(fname):
def _zero_zip_date_time(zip_): def _zero_zip_date_time(zip_):
def purify_extra_data(mm, offset, length): def purify_extra_data(mm, offset, length, compressed_size=0):
extra_header_struct = Struct("<HH") extra_header_struct = Struct("<HH")
# 0. id # 0. id
# 1. length # 1. length
@ -194,6 +194,16 @@ def _zero_zip_date_time(zip_):
UNIX_EXTRA_DATA = 0x7875 UNIX_EXTRA_DATA = 0x7875
# Unix extra data; UID / GID stuff, see # Unix extra data; UID / GID stuff, see
# ftp://ftp.info-zip.org/pub/infozip/src/zip30.zip ./proginfo/extrafld.txt # ftp://ftp.info-zip.org/pub/infozip/src/zip30.zip ./proginfo/extrafld.txt
ZIP64_EXTRA_HEADER = 0x0001
zip64_extra_struct = Struct("<HHQQ")
# ZIP64.
# When a ZIP64 extra field is present his 8byte length
# will override the 4byte length defined in canonical zips.
# This is in the form:
# - 0x0001 (header_id)
# - 0x0010 [16] (header_length)
# - ... (8byte uncompressed_length)
# - ... (8byte compressed_length)
mlen = offset + length mlen = offset + length
while offset < mlen: while offset < mlen:
@ -205,15 +215,17 @@ def _zero_zip_date_time(zip_):
if header_id in (EXTENDED_TIME_DATA, UNIX_EXTRA_DATA): if header_id in (EXTENDED_TIME_DATA, UNIX_EXTRA_DATA):
values[0] = STRIPZIP_OPTION_HEADER values[0] = STRIPZIP_OPTION_HEADER
for i in xrange(2, len(values)): for i in range(2, len(values)):
values[i] = 0xff values[i] = 0xff
extra_struct.pack_into(mm, offset, *values) extra_struct.pack_into(mm, offset, *values)
elif header_id != STRIPZIP_OPTION_HEADER: if header_id == ZIP64_EXTRA_HEADER:
return False assert header_length == 16
values = list(zip64_extra_struct.unpack_from(mm, offset))
header_id, header_length, uncompressed_size, compressed_size = values
offset += extra_header_struct.size + header_length offset += extra_header_struct.size + header_length
return True return compressed_size
FILE_HEADER_SIGNATURE = 0x04034b50 FILE_HEADER_SIGNATURE = 0x04034b50
CENDIR_HEADER_SIGNATURE = 0x02014b50 CENDIR_HEADER_SIGNATURE = 0x02014b50
@ -263,9 +275,10 @@ def _zero_zip_date_time(zip_):
# reset last_mod_date # reset last_mod_date
values[5] = 0x21 values[5] = 0x21
local_file_header_struct.pack_into(mm, offset, *values) local_file_header_struct.pack_into(mm, offset, *values)
offset += local_file_header_struct.size + compressed_size + name_length + extra_field_length offset += local_file_header_struct.size + name_length
if extra_field_length != 0: if extra_field_length != 0:
purify_extra_data(mm, offset - extra_field_length - compressed_size, extra_field_length) compressed_size = purify_extra_data(mm, offset, extra_field_length, compressed_size)
offset += compressed_size + extra_field_length
while offset < archive_size: while offset < archive_size:
if signature_struct.unpack_from(mm, offset) != (CENDIR_HEADER_SIGNATURE,): if signature_struct.unpack_from(mm, offset) != (CENDIR_HEADER_SIGNATURE,):