
[MS-CAB] is pretty unclearly worded in this area. Turns out that if the sequence of bytes being checksummed is not a multiple of 4 bytes, you are supposed to _first_ reverse the order of the trailing (n % 4) bytes, and _then_ append zero bytes to pad to a multiple of 4, before you do the main checksum operation of XORing together all the 32-bit words of the input.
111 lines
3.7 KiB
Python
Executable file
111 lines
3.7 KiB
Python
Executable file
#!/usr/bin/env python
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import zlib
|
|
import struct
|
|
from collections import namedtuple
|
|
|
|
CFHEADER_s = struct.Struct("<4sLLLLLBBHHHHH")
|
|
CFHEADER = namedtuple("CFHEADER", "sig res0 size res1 firstfile res2 "
|
|
"verminor vermajor folders files flags setid icabinet")
|
|
CFHEADER_sig = "MSCF"
|
|
|
|
CFFOLDER_s = struct.Struct("<LHH")
|
|
CFFOLDER = namedtuple("CFFOLDER", "firstdata ndata compresstype")
|
|
|
|
CFFILE_s = struct.Struct("<LLHHHH")
|
|
CFFILE = namedtuple("CFFILE", "size offset ifolder date time attrs")
|
|
|
|
CFDATA_s = struct.Struct("<LHH")
|
|
CFDATA = namedtuple("CFDATA", "checksum compressedlen uncompressedlen")
|
|
|
|
def mszip(data):
|
|
compressor = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
|
|
zlib.DEF_MEM_LEVEL, zlib.Z_DEFAULT_STRATEGY)
|
|
compressed = compressor.compress(data)
|
|
compressed += compressor.flush()
|
|
return "CK" + compressed # add MSZIP header
|
|
|
|
def packdate(y,m,d):
|
|
return ((y - 1980) << 9) | (m << 5) | d
|
|
def packtime(h,m,s):
|
|
return ((h << 11) | (m << 5) | (s >> 1))
|
|
|
|
def checksum(data):
|
|
data_full_words = data[:len(data) & ~3]
|
|
data_last_word = data[len(data_full_words):]
|
|
data_last_word = "".join(reversed(data_last_word))
|
|
data_last_word += "\0" * (3 & -len(data)) # pad to multiple of 4 bytes
|
|
data = data_full_words + data_last_word
|
|
toret = 0
|
|
for offset in xrange(0, len(data), 4):
|
|
toret ^= struct.unpack_from("<L", data, offset)[0]
|
|
return toret
|
|
|
|
def build_cab(files):
|
|
uncompressed_data = ""
|
|
fileheaders = ""
|
|
for name, data, mtime in files:
|
|
mtime_u = time.gmtime(mtime)
|
|
fileheader = CFFILE(
|
|
size=len(data), offset=len(uncompressed_data), ifolder=0, attrs=0,
|
|
date=packdate(mtime_u.tm_year, mtime_u.tm_mon, mtime_u.tm_mday),
|
|
time=packtime(mtime_u.tm_hour, mtime_u.tm_min, mtime_u.tm_sec))
|
|
uncompressed_data += data
|
|
fileheaders += CFFILE_s.pack(*fileheader) + name + "\0"
|
|
|
|
compressed_data = ""
|
|
offset = 0
|
|
n_data_blocks = 0
|
|
while offset < len(uncompressed_data):
|
|
uncompressed_block = uncompressed_data[offset:offset+0x8000]
|
|
compressed_block = mszip(uncompressed_block)
|
|
blockheader = CFDATA(
|
|
checksum=0,
|
|
compressedlen=len(compressed_block),
|
|
uncompressedlen=len(uncompressed_block))
|
|
header_after_checksum = CFDATA_s.pack(*blockheader)[4:]
|
|
blockheader = blockheader._replace(
|
|
checksum=checksum(header_after_checksum + compressed_block))
|
|
compressed_data += CFDATA_s.pack(*blockheader) + compressed_block
|
|
offset += len(uncompressed_block)
|
|
n_data_blocks += 1
|
|
|
|
totalsize = (CFHEADER_s.size +
|
|
CFFOLDER_s.size +
|
|
len(fileheaders) +
|
|
len(compressed_data))
|
|
|
|
header = CFHEADER(
|
|
sig=CFHEADER_sig, res0=0, res1=0, res2=0,
|
|
vermajor=1, verminor=3, folders=1, files=len(files),
|
|
flags=0, setid=0, icabinet=0, size=totalsize,
|
|
firstfile=CFHEADER_s.size + CFFOLDER_s.size)
|
|
|
|
folder = CFFOLDER(
|
|
ndata=n_data_blocks, compresstype=1,
|
|
firstdata = (CFHEADER_s.size + CFFOLDER_s.size + len(fileheaders)))
|
|
|
|
return (CFHEADER_s.pack(*header) +
|
|
CFFOLDER_s.pack(*folder) +
|
|
fileheaders +
|
|
compressed_data)
|
|
|
|
def main():
|
|
args = sys.argv[1:]
|
|
outfile = args.pop(0)
|
|
files = []
|
|
while len(args) > 0:
|
|
cabname = args.pop(0)
|
|
filename = args.pop(0)
|
|
with open(filename, "rb") as f:
|
|
filedata = f.read()
|
|
files.append((cabname, filedata, os.stat(filename).st_mtime))
|
|
cabdata = build_cab(files)
|
|
with open(outfile, "wb") as f:
|
|
f.write(cabdata)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|