wix-on-linux/makecab.py
Simon Tatham 4aa85aa621 Fix checksum calculation in CAB data records.
[MS-CAB] is pretty unclearly worded in this area. Turns out that if
the sequence of bytes being checksummed is not a multiple of 4 bytes,
you are supposed to _first_ reverse the order of the trailing (n % 4)
bytes, and _then_ append zero bytes to pad to a multiple of 4, before
you do the main checksum operation of XORing together all the 32-bit
words of the input.
2017-05-16 20:11:53 +01:00

111 lines
3.7 KiB
Python
Executable file

#!/usr/bin/env python
import sys
import os
import time
import zlib
import struct
from collections import namedtuple
CFHEADER_s = struct.Struct("<4sLLLLLBBHHHHH")
CFHEADER = namedtuple("CFHEADER", "sig res0 size res1 firstfile res2 "
"verminor vermajor folders files flags setid icabinet")
CFHEADER_sig = "MSCF"
CFFOLDER_s = struct.Struct("<LHH")
CFFOLDER = namedtuple("CFFOLDER", "firstdata ndata compresstype")
CFFILE_s = struct.Struct("<LLHHHH")
CFFILE = namedtuple("CFFILE", "size offset ifolder date time attrs")
CFDATA_s = struct.Struct("<LHH")
CFDATA = namedtuple("CFDATA", "checksum compressedlen uncompressedlen")
def mszip(data):
compressor = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
zlib.DEF_MEM_LEVEL, zlib.Z_DEFAULT_STRATEGY)
compressed = compressor.compress(data)
compressed += compressor.flush()
return "CK" + compressed # add MSZIP header
def packdate(y,m,d):
return ((y - 1980) << 9) | (m << 5) | d
def packtime(h,m,s):
return ((h << 11) | (m << 5) | (s >> 1))
def checksum(data):
data_full_words = data[:len(data) & ~3]
data_last_word = data[len(data_full_words):]
data_last_word = "".join(reversed(data_last_word))
data_last_word += "\0" * (3 & -len(data)) # pad to multiple of 4 bytes
data = data_full_words + data_last_word
toret = 0
for offset in xrange(0, len(data), 4):
toret ^= struct.unpack_from("<L", data, offset)[0]
return toret
def build_cab(files):
uncompressed_data = ""
fileheaders = ""
for name, data, mtime in files:
mtime_u = time.gmtime(mtime)
fileheader = CFFILE(
size=len(data), offset=len(uncompressed_data), ifolder=0, attrs=0,
date=packdate(mtime_u.tm_year, mtime_u.tm_mon, mtime_u.tm_mday),
time=packtime(mtime_u.tm_hour, mtime_u.tm_min, mtime_u.tm_sec))
uncompressed_data += data
fileheaders += CFFILE_s.pack(*fileheader) + name + "\0"
compressed_data = ""
offset = 0
n_data_blocks = 0
while offset < len(uncompressed_data):
uncompressed_block = uncompressed_data[offset:offset+0x8000]
compressed_block = mszip(uncompressed_block)
blockheader = CFDATA(
checksum=0,
compressedlen=len(compressed_block),
uncompressedlen=len(uncompressed_block))
header_after_checksum = CFDATA_s.pack(*blockheader)[4:]
blockheader = blockheader._replace(
checksum=checksum(header_after_checksum + compressed_block))
compressed_data += CFDATA_s.pack(*blockheader) + compressed_block
offset += len(uncompressed_block)
n_data_blocks += 1
totalsize = (CFHEADER_s.size +
CFFOLDER_s.size +
len(fileheaders) +
len(compressed_data))
header = CFHEADER(
sig=CFHEADER_sig, res0=0, res1=0, res2=0,
vermajor=1, verminor=3, folders=1, files=len(files),
flags=0, setid=0, icabinet=0, size=totalsize,
firstfile=CFHEADER_s.size + CFFOLDER_s.size)
folder = CFFOLDER(
ndata=n_data_blocks, compresstype=1,
firstdata = (CFHEADER_s.size + CFFOLDER_s.size + len(fileheaders)))
return (CFHEADER_s.pack(*header) +
CFFOLDER_s.pack(*folder) +
fileheaders +
compressed_data)
def main():
args = sys.argv[1:]
outfile = args.pop(0)
files = []
while len(args) > 0:
cabname = args.pop(0)
filename = args.pop(0)
with open(filename, "rb") as f:
filedata = f.read()
files.append((cabname, filedata, os.stat(filename).st_mtime))
cabdata = build_cab(files)
with open(outfile, "wb") as f:
f.write(cabdata)
if __name__ == '__main__':
main()