# ***** BEGIN LICENSE BLOCK ***** # Version: MPL 1.1/GPL 2.0/LGPL 2.1 # # The contents of this file are subject to the Mozilla Public License Version # 1.1 (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # http://www.mozilla.org/MPL/ # # Software distributed under the License is distributed on an "AS IS" basis, # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License # for the specific language governing rights and limitations under the # License. # # The Original Code is mozilla.org code # # The Initial Developer of the Original Code is # Mozilla Foundation. # Portions created by the Initial Developer are Copyright (C) 2010 # the Initial Developer. All Rights Reserved. # # Contributor(s): # Taras Glek # # Alternatively, the contents of this file may be used under the terms of # either the GNU General Public License Version 2 or later (the "GPL"), or # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), # in which case the provisions of the GPL or the LGPL are applicable instead # of those above. If you wish to allow use of your version of this file only # under the terms of either the GPL or the LGPL, and not to allow others to # use your version of this file under the terms of the MPL, indicate your # decision by deleting the provisions above and replace them with the notice # and other provisions required by the GPL or the LGPL. If you do not delete # the provisions above, a recipient may use your version of this file under # the terms of any one of the MPL, the GPL or the LGPL. # # ***** END LICENSE BLOCK ***** import sys, os, subprocess, struct, re local_file_header = [ ("signature", "uint32"), ("min_version", "uint16"), ("general_flag", "uint16"), ("compression", "uint16"), ("lastmod_time", "uint16"), ("lastmod_date", "uint16"), ("crc32", "uint32"), ("compressed_size", "uint32"), ("uncompressed_size", "uint32"), ("filename_size", "uint16"), ("extra_field_size", "uint16"), ("filename", "filename_size"), ("extra_field", "extra_field_size"), ("data", "compressed_size") ] cdir_entry = [ ("signature", "uint32"), ("creator_version", "uint16"), ("min_version", "uint16"), ("general_flag", "uint16"), ("compression", "uint16"), ("lastmod_time", "uint16"), ("lastmod_date", "uint16"), ("crc32", "uint32"), ("compressed_size", "uint32"), ("uncompressed_size", "uint32"), ("filename_size", "uint16"), ("extrafield_size", "uint16"), ("filecomment_size", "uint16"), ("disknum", "uint16"), ("internal_attr", "uint16"), ("external_attr", "uint32"), ("offset", "uint32"), ("filename", "filename_size"), ("extrafield", "extrafield_size"), ("filecomment", "filecomment_size"), ] cdir_end = [ ("signature", "uint32"), ("disk_num", "uint16"), ("cdir_disk", "uint16"), ("disk_entries", "uint16"), ("cdir_entries", "uint16"), ("cdir_size", "uint32"), ("cdir_offset", "uint32"), ("comment_size", "uint16"), ] type_mapping = { "uint32":"I", "uint16":"H"} def format_struct (format): string_fields = {} fmt = "<" for (name,value) in iter(format): try: fmt += type_mapping[value][0] except KeyError: string_fields[name] = value return (fmt, string_fields) def size_of(format): return struct.calcsize(format_struct(format)[0]) class MyStruct: def __init__(self, format, string_fields): self.__dict__["struct_members"] = {} self.__dict__["format"] = format self.__dict__["string_fields"] = string_fields def addMember(self, name, value): self.__dict__["struct_members"][name] = value def __getattr__(self, item): try: return self.__dict__["struct_members"][item] except: pass print("no %s" %item) print(self.__dict__["struct_members"]) raise AttributeError def __setattr__(self, item, value): if item in self.__dict__["struct_members"]: self.__dict__["struct_members"][item] = value else: raise AttributeError def pack(self): extra_data = b"" values = [] string_fields = self.__dict__["string_fields"] struct_members = self.__dict__["struct_members"] format = self.__dict__["format"] for (name,_) in format: if name in string_fields: if not isinstance(struct_members[name], bytes): struct_members[name] = struct_members[name].encode('utf-8') extra_data = extra_data + struct_members[name] else: values.append(struct_members[name]); return struct.pack(format_struct(format)[0], *values) + extra_data ENDSIG = 0x06054b50 def assert_true(cond, msg): if not cond: raise Exception(msg) exit(1) class BinaryBlob: def __init__(self, f): self.data = open(f, "rb").read() self.offset = 0 self.length = len(self.data) def readAt(self, pos, length): self.offset = pos + length return self.data[pos:self.offset] def read_struct (self, format, offset = None): if offset == None: offset = self.offset (fstr, string_fields) = format_struct(format) size = struct.calcsize(fstr) data = self.readAt(offset, size) ret = struct.unpack(fstr, data) retstruct = MyStruct(format, string_fields) i = 0 for (name,_) in iter(format): member_desc = None if not name in string_fields: member_data = ret[i] i = i + 1 else: # zip has data fields which are described by other struct fields, this does # additional reads to fill em in member_desc = string_fields[name] member_data = self.readAt(self.offset, retstruct.__getattr__(member_desc)) retstruct.addMember(name, member_data) # sanity check serialization code data = self.readAt(offset, self.offset - offset) out_data = retstruct.pack() assert_true(out_data == data, "Serialization fail %d !=%d"% (len(out_data), len(data))) return retstruct def optimizejar(jar, outjar, inlog = None): if inlog is not None: inlog = open(inlog).read().rstrip() # in the case of an empty log still move the index forward if len(inlog) == 0: inlog = [] else: inlog = inlog.split("\n") outlog = [] jarblob = BinaryBlob(jar) dirend = jarblob.read_struct(cdir_end, jarblob.length - size_of(cdir_end)) assert_true(dirend.signature == ENDSIG, "no signature in the end"); cdir_offset = dirend.cdir_offset readahead = 0 if inlog is None and cdir_offset == 4: readahead = struct.unpack("= old_entry_offset + len(data): outlog.append(entry.filename) reordered_count += 1 if inlog is None: dirend.cdir_offset = out_offset if dups_found > 0: print("WARNING: Found %d duplicate files taking %d bytes"%(dups_found, dupe_bytes)) dirend.cdir_size = len(cdir_data) dirend.disk_entries = dirend.cdir_entries dirend_data = dirend.pack() assert_true(size_of(cdir_end) == len(dirend_data), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data), size_of(cdir_end))); outfd.seek(dirend.cdir_offset) outfd.write(cdir_data) outfd.write(dirend_data) # for ordered jars the central directory is written in the begining of the file, so a second central-directory # entry has to be written in the end of the file if inlog is not None: outfd.seek(0) outfd.write(struct.pack("