diff options
-rw-r--r-- | README.md | 13 | ||||
-rw-r--r-- | kcc/KCC_gui.py | 24 | ||||
-rw-r--r-- | kcc/kindlesplit.py | 384 | ||||
-rwxr-xr-x | kcc/kindlestrip.py | 236 |
4 files changed, 403 insertions, 254 deletions
diff --git a/README.md b/README.md index 7a2ef48..fd06890 100644 --- a/README.md +++ b/README.md @@ -130,11 +130,10 @@ This script born as a cross-platform alternative to `KindleComicParser` by **Dc5 The app relies and includes the following scripts/binaries: - - `KindleStrip` script © 2010-2012 by **Paul Durrant** and released in public domain -([forum thread](http://www.mobileread.com/forums/showthread.php?t=96903)) - - `rarfile.py` script © 2005-2011 **Marko Kreen** <markokr@gmail.com>, released with ISC License - - `image.py` class from **Alex Yatskov**'s [Mangle](http://foosoft.net/mangle/) with subsequent [proDOOMman](https://github.com/proDOOMman/Mangle)'s and [Birua](https://github.com/Birua/Mangle)'s patches - - Icon is by **Nikolay Verin** ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under [CC BY-NC-SA 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/) License + - `KindleUnpack` script by Charles **M. Hannum, P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding**. Released with GPLv3 License. + - `rarfile.py` script © 2005-2011 **Marko Kreen** <markokr@gmail.com>. Released with ISC License. + - `image.py` class from **Alex Yatskov**'s [Mangle](http://foosoft.net/mangle/) with subsequent [proDOOMman](https://github.com/proDOOMman/Mangle)'s and [Birua](https://github.com/Birua/Mangle)'s patches. + - Icon is by **Nikolay Verin** ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under [CC BY-NC-SA 3.0](http://creativecommons.org/licenses/by-nc-sa/3.0/) License. ## SAMPLE FILES CREATED BY KCC * [Kindle Paperwhite](http://kcc.vulturis.eu/Samples/Ubunchu!-KPW.mobi) @@ -255,6 +254,7 @@ The app relies and includes the following scripts/binaries: * Hotfixed crash occurring on OS with Russian locale ####3.3: +* Created MOBI files are not longer marked as _Personal_ on newer Kindle models * Margins are now automatically omitted in Panel View mode * Layout of panels in Panel View mode is now automatically adjusted to content * Support for Virtual Panel View was removed @@ -264,9 +264,6 @@ The app relies and includes the following scripts/binaries: * Windows release is now bundled with UnRAR and 7za * Small GUI tweaks -## KNOWN ISSUES -* Removing SRCS headers sometimes fail in 32bit enviroments. Due to memory limitations. - ## COPYRIGHT Copyright (c) 2012-2013 Ciro Mattia Gonano and Paweł Jastrzębski. diff --git a/kcc/KCC_gui.py b/kcc/KCC_gui.py index 9ac509d..dbc926b 100644 --- a/kcc/KCC_gui.py +++ b/kcc/KCC_gui.py @@ -30,7 +30,7 @@ import traceback import urllib2 import time import comic2ebook -import kindlestrip +import kindlesplit from image import ProfileData from subprocess import call, Popen, STDOUT, PIPE from PyQt4 import QtGui, QtCore @@ -245,23 +245,27 @@ class WorkerThread(QtCore.QThread): True) else: self.emit(QtCore.SIGNAL("addMessage"), 'Creating MOBI file... Done!', 'info', True) - self.emit(QtCore.SIGNAL("addMessage"), 'Removing SRCS header...', 'info') + self.emit(QtCore.SIGNAL("addMessage"), 'Cleaning MOBI file...', 'info') os.remove(item) mobiPath = item.replace('.epub', '.mobi') - shutil.move(mobiPath, mobiPath + '_tostrip') + shutil.move(mobiPath, mobiPath + '_toclean') try: - kindlestrip.main((mobiPath + '_tostrip', mobiPath)) + if profile in ['K345', 'KHD', 'KF', 'KFHD', 'KFHD8', 'KFA']: + newKindle = True + else: + newKindle = False + mobisplit = kindlesplit.mobi_split(mobiPath + '_toclean', newKindle) + open(mobiPath, 'wb').write(mobisplit.getResult()) except Exception: self.errors = True if not self.errors: - os.remove(mobiPath + '_tostrip') - self.emit(QtCore.SIGNAL("addMessage"), 'Removing SRCS header... Done!', 'info', True) + os.remove(mobiPath + '_toclean') + self.emit(QtCore.SIGNAL("addMessage"), 'Cleaning MOBI file... Done!', 'info', True) else: - shutil.move(mobiPath + '_tostrip', mobiPath) + os.remove(mobiPath + '_toclean') + os.remove(mobiPath) self.emit(QtCore.SIGNAL("addMessage"), - 'KindleStrip failed to remove SRCS header!', 'warning') - self.emit(QtCore.SIGNAL("addMessage"), - 'MOBI file will work correctly but it will be highly oversized.', 'warning') + 'KindleUnpack failed to clean MOBI file!', 'error') else: epubSize = (os.path.getsize(item))/1024/1024 os.remove(item) diff --git a/kcc/kindlesplit.py b/kcc/kindlesplit.py new file mode 100644 index 0000000..3d2aeac --- /dev/null +++ b/kcc/kindlesplit.py @@ -0,0 +1,384 @@ +# Based on initial version of KindleUnpack. Copyright (C) 2009 Charles M. Hannum <root@ihack.net> +# Improvements Copyright (C) 2009-2012 P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding +# Copyright (C) 2013 Pawel Jastrzebski <pawelj@vulturis.eu> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +__license__ = 'ISC' +__copyright__ = '2012-2013, Ciro Mattia Gonano <ciromattia@gmail.com>, Pawel Jastrzebski <pawelj@vulturis.eu>' +__docformat__ = 'restructuredtext en' + +import struct +from uuid import uuid4 + +# important pdb header offsets +unique_id_seed = 68 +number_of_pdb_records = 76 + +# important palmdoc header offsets +book_length = 4 +book_record_count = 8 +first_pdb_record = 78 + +# important rec0 offsets +length_of_book = 4 +mobi_header_base = 16 +mobi_header_length = 20 +mobi_type = 24 +mobi_version = 36 +first_non_text = 80 +title_offset = 84 +first_image_record = 108 +first_content_index = 192 +last_content_index = 194 +kf8_last_content_index = 192 # for KF8 mobi headers +fcis_index = 200 +flis_index = 208 +srcs_index = 224 +srcs_count = 228 +primary_index = 244 +datp_index = 256 +huffoff = 112 +hufftbloff = 120 + + +def getint(datain, ofs, sz='L'): + i, = struct.unpack_from('>'+sz, datain, ofs) + return i + + +def writeint(datain, ofs, n, length='L'): + if length == 'L': + return datain[:ofs]+struct.pack('>L', n)+datain[ofs+4:] + else: + return datain[:ofs]+struct.pack('>H', n)+datain[ofs+2:] + + +def getsecaddr(datain, secno): + nsec = getint(datain, number_of_pdb_records, 'H') + assert secno >= 0 & secno < nsec, 'secno %d out of range (nsec=%d)' % (secno, nsec) + secstart = getint(datain, first_pdb_record+secno*8) + if secno == nsec-1: + secend = len(datain) + else: + secend = getint(datain, first_pdb_record+(secno+1)*8) + return secstart, secend + + +def readsection(datain, secno): + secstart, secend = getsecaddr(datain, secno) + return datain[secstart:secend] + + +def writesection(datain, secno, secdata): # overwrite, accounting for different length + dataout = deletesectionrange(datain, secno, secno) + return insertsection(dataout, secno, secdata) + + +def nullsection(datain, secno): # make it zero-length without deleting it + datalst = [] + nsec = getint(datain, number_of_pdb_records, 'H') + secstart, secend = getsecaddr(datain, secno) + zerosecstart, zerosecend = getsecaddr(datain, 0) + dif = secend-secstart + datalst.append(datain[:first_pdb_record]) + for i in range(0, secno+1): + ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) + datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) + for i in range(secno+1, nsec): + ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) + ofs -= dif + datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) + lpad = zerosecstart - (first_pdb_record + 8*nsec) + if lpad > 0: + datalst.append('\0' * lpad) + datalst.append(datain[zerosecstart: secstart]) + datalst.append(datain[secend:]) + dataout = "".join(datalst) + return dataout + + +def deletesectionrange(datain, firstsec, lastsec): # delete a range of sections + datalst = [] + firstsecstart, firstsecend = getsecaddr(datain, firstsec) + lastsecstart, lastsecend = getsecaddr(datain, lastsec) + zerosecstart, zerosecend = getsecaddr(datain, 0) + dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1) + nsec = getint(datain, number_of_pdb_records, 'H') + datalst.append(datain[:unique_id_seed]) + datalst.append(struct.pack('>L', 2*(nsec-(lastsec-firstsec+1))+1)) + datalst.append(datain[unique_id_seed+4:number_of_pdb_records]) + datalst.append(struct.pack('>H', nsec-(lastsec-firstsec+1))) + newstart = zerosecstart - 8*(lastsec-firstsec+1) + for i in range(0, firstsec): + ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) + ofs -= 8 * (lastsec - firstsec + 1) + datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) + for i in range(lastsec+1, nsec): + ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) + ofs -= dif + flgval = 2*(i-(lastsec-firstsec+1)) + datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) + lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1))) + if lpad > 0: + datalst.append('\0' * lpad) + datalst.append(datain[zerosecstart:firstsecstart]) + datalst.append(datain[lastsecend:]) + dataout = "".join(datalst) + return dataout + + +def insertsection(datain, secno, secdata): # insert a new section + datalst = [] + nsec = getint(datain, number_of_pdb_records, 'H') + secstart, secend = getsecaddr(datain, secno) + zerosecstart, zerosecend = getsecaddr(datain, 0) + dif = len(secdata) + datalst.append(datain[:unique_id_seed]) + datalst.append(struct.pack('>L', 2*(nsec+1)+1)) + datalst.append(datain[unique_id_seed+4:number_of_pdb_records]) + datalst.append(struct.pack('>H', nsec+1)) + newstart = zerosecstart + 8 + for i in range(0, secno): + ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) + ofs += 8 + datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) + datalst.append(struct.pack('>L', secstart + 8) + struct.pack('>L', (2*secno))) + for i in range(secno, nsec): + ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) + ofs = ofs + dif + 8 + flgval = 2*(i+1) + datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) + lpad = newstart - (first_pdb_record + 8*(nsec + 1)) + if lpad > 0: + datalst.append('\0' * lpad) + datalst.append(datain[zerosecstart:secstart]) + datalst.append(secdata) + datalst.append(datain[secstart:]) + dataout = "".join(datalst) + return dataout + + +def insertsectionrange(sectionsource, firstsec, lastsec, sectiontarget, targetsec): # insert a range of sections + dataout = sectiontarget + for idx in range(lastsec, firstsec-1, -1): + dataout = insertsection(dataout, targetsec, readsection(sectionsource, idx)) + return dataout + + +def get_exth_params(rec0): + ebase = mobi_header_base + getint(rec0, mobi_header_length) + elen = getint(rec0, ebase+4) + enum = getint(rec0, ebase+8) + return ebase, elen, enum + + +def add_exth(rec0, exth_num, exth_bytes): + ebase, elen, enum = get_exth_params(rec0) + newrecsize = 8+len(exth_bytes) + newrec0 = rec0[0:ebase+4]+struct.pack('>L', elen+newrecsize)+struct.pack('>L', enum+1) +\ + struct.pack('>L', exth_num) + struct.pack('>L', newrecsize)+exth_bytes+rec0[ebase+12:] + newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+newrecsize) + return newrec0 + + +def read_exth(rec0, exth_num): + exth_values = [] + ebase, elen, enum = get_exth_params(rec0) + ebase += 12 + while enum > 0: + exth_id = getint(rec0, ebase) + if exth_id == exth_num: + # We might have multiple exths, so build a list. + exth_values.append(rec0[ebase+8:ebase+getint(rec0, ebase+4)]) + enum -= 1 + ebase = ebase+getint(rec0, ebase+4) + return exth_values + + +def write_exth(rec0, exth_num, exth_bytes): + ebase, elen, enum = get_exth_params(rec0) + ebase_idx = ebase+12 + enum_idx = enum + while enum_idx > 0: + exth_id = getint(rec0, ebase_idx) + if exth_id == exth_num: + dif = len(exth_bytes)+8-getint(rec0, ebase_idx+4) + newrec0 = rec0 + if dif != 0: + newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+dif) + return newrec0[:ebase+4]+struct.pack('>L', elen+len(exth_bytes)+8-getint(rec0, ebase_idx+4)) +\ + struct.pack('>L', enum)+rec0[ebase+12:ebase_idx+4] +\ + struct.pack('>L', len(exth_bytes)+8)+exth_bytes +\ + rec0[ebase_idx+getint(rec0, ebase_idx+4):] + enum_idx -= 1 + ebase_idx = ebase_idx+getint(rec0, ebase_idx+4) + return rec0 + + +def del_exth(rec0, exth_num): + ebase, elen, enum = get_exth_params(rec0) + ebase_idx = ebase+12 + enum_idx = 0 + while enum_idx < enum: + exth_id = getint(rec0, ebase_idx) + exth_size = getint(rec0, ebase_idx+4) + if exth_id == exth_num: + newrec0 = rec0 + newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)-exth_size) + newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:] + newrec0 = newrec0[0:ebase+4]+struct.pack('>L', elen-exth_size)+struct.pack('>L', enum-1)+newrec0[ebase+12:] + return newrec0 + enum_idx += 1 + ebase_idx = ebase_idx+exth_size + return rec0 + + +class mobi_split: + def __init__(self, infile, newKindle): + try: + datain = open(infile, 'rb').read() + datain_rec0 = readsection(datain, 0) + ver = getint(datain_rec0, mobi_version) + fake_asin = str(uuid4()) + self.combo = (ver != 8) + if not self.combo: + return + exth121 = read_exth(datain_rec0, 121) + if len(exth121) == 0: + self.combo = False + return + else: + # only pay attention to first exth121 + # (there should only be one) + datain_kf8, = struct.unpack_from('>L', exth121[0], 0) + if datain_kf8 == 0xffffffff: + self.combo = False + return + datain_kfrec0 = readsection(datain, datain_kf8) + firstimage = getint(datain_rec0, first_image_record) + lastimage = getint(datain_rec0, last_content_index, 'H') + + if not newKindle: + # create the standalone mobi7 + num_sec = getint(datain, number_of_pdb_records, 'H') + # remove BOUNDARY up to but not including ELF record + self.result_file = deletesectionrange(datain, datain_kf8-1, num_sec-2) + # check if there are SRCS records and delete them + srcs = getint(datain_rec0, srcs_index) + num_srcs = getint(datain_rec0, srcs_count) + if srcs != 0xffffffff and num_srcs > 0: + self.result_file = deletesectionrange(self.result_file, srcs, srcs+num_srcs-1) + datain_rec0 = writeint(datain_rec0, srcs_index, 0xffffffff) + datain_rec0 = writeint(datain_rec0, srcs_count, 0) + # reset the EXTH 121 KF8 Boundary meta data to 0xffffffff + datain_rec0 = write_exth(datain_rec0, 121, struct.pack('>L', 0xffffffff)) + # datain_rec0 = del_exth(datain_rec0,121) + # datain_rec0 = del_exth(datain_rec0,534) + # don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well + # set the EXTH 129 KF8 Masthead / Cover Image string to the null string + datain_rec0 = write_exth(datain_rec0, 129, '') + # don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well + + # Make sure we have an ASIN & cdeType set... + if len(read_exth(datain_rec0, 113)) == 0: + datain_rec0 = add_exth(datain_rec0, 113, fake_asin) + if len(read_exth(datain_rec0, 504)) == 0: + datain_rec0 = add_exth(datain_rec0, 504, fake_asin) + if len(read_exth(datain_rec0, 501)) == 0: + datain_rec0 = add_exth(datain_rec0, 501, b'EBOK') + + # need to reset flags stored in 0x80-0x83 + # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050 + # Bit Flags + # 0x1000 = Bit 12 indicates if embedded fonts are used or not + # 0x0800 = means this Header points to *shared* images/resource/fonts ?? + # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8? + # 0x0040 = exth exists + # 0x0010 = Not sure but this is always set so far + fval, = struct.unpack_from('>L', datain_rec0, 0x80) + # need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts + fval &= 0x07FF + datain_rec0 = datain_rec0[:0x80] + struct.pack('>L', fval) + datain_rec0[0x84:] + self.result_file = writesection(self.result_file, 0, datain_rec0) + if lastimage == 0xffff: + # find the lowest of the next sections and copy up to that. + ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'), + (hufftbloff, 'L')] + for ofs, sz in ofs_list: + n = getint(datain_kfrec0, ofs, sz) + if 0 < n < lastimage: + lastimage = n-1 + + # Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid + for i in range(firstimage, lastimage): + imgsec = readsection(self.result_file, i) + if imgsec[0:4] in ['RESC', 'FONT']: + self.result_file = nullsection(self.result_file, i) + # mobi7 finished + else: + # create standalone mobi8 + self.result_file = deletesectionrange(datain, 0, datain_kf8-1) + target = getint(datain_kfrec0, first_image_record) + self.result_file = insertsectionrange(datain, firstimage, lastimage, self.result_file, target) + datain_kfrec0 = readsection(self.result_file, 0) + + # Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part, + # which then points at garbage in the mobi8 part, and confuses FW 3.4 + kf8starts = read_exth(datain_kfrec0, 116) + # If we have multiple StartOffset, keep only the last one + kf8start_count = len(kf8starts) + while kf8start_count > 1: + kf8start_count -= 1 + datain_kfrec0 = del_exth(datain_kfrec0, 116) + + # update the EXTH 125 KF8 Count of Images/Fonts/Resources + datain_kfrec0 = write_exth(datain_kfrec0, 125, struct.pack('>L', lastimage-firstimage+1)) + + # Same dance for the KF8, we want an ASIN & cdeType :) + if len(read_exth(datain_kfrec0, 113)) == 0: + datain_kfrec0 = add_exth(datain_kfrec0, 113, fake_asin) + if len(read_exth(datain_kfrec0, 504)) == 0: + datain_kfrec0 = add_exth(datain_kfrec0, 504, fake_asin) + if len(read_exth(datain_kfrec0, 501)) == 0: + datain_kfrec0 = add_exth(datain_kfrec0, 501, b'EBOK') + + # need to reset flags stored in 0x80-0x83 + # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050 + # standalone mobi8 with exth: 0x0050 + # Bit Flags + # 0x1000 = Bit 12 indicates if embedded fonts are used or not + # 0x0800 = means this Header points to *shared* images/resource/fonts ?? + # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8? + # 0x0040 = exth exists + # 0x0010 = Not sure but this is always set so far + fval, = struct.unpack_from('>L', datain_kfrec0, 0x80) + fval &= 0x1FFF + fval |= 0x0800 + datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack('>L', fval) + datain_kfrec0[0x84:] + + # properly update other index pointers that have been shifted by the insertion of images + ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'), + (hufftbloff, 'L')] + for ofs, sz in ofs_list: + n = getint(datain_kfrec0, ofs, sz) + if n != 0xffffffff: + datain_kfrec0 = writeint(datain_kfrec0, ofs, n+lastimage-firstimage+1, sz) + self.result_file = writesection(self.result_file, 0, datain_kfrec0) + # mobi8 finished + except Exception: + raise + + def getResult(self): + return self.result_file \ No newline at end of file diff --git a/kcc/kindlestrip.py b/kcc/kindlestrip.py deleted file mode 100755 index 4aea003..0000000 --- a/kcc/kindlestrip.py +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env python -# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -# -# This is a python script. You need a Python interpreter to run it. -# For example, ActiveState Python, which exists for windows. -# -# This script strips the penultimate record from a Mobipocket file. -# This is useful because the current KindleGen add a compressed copy -# of the source files used in this record, making the ebook produced -# about twice as big as it needs to be. -# -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# For more information, please refer to <http://unlicense.org/> -# -# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com -# With enhancements by Kevin Hendricks, KevinH on mobileread.com -# -# Changelog -# 1.00 - Initial version -# 1.10 - Added an option to output the stripped data -# 1.20 - Added check for source files section (thanks Piquan) -# 1.30 - Added prelim Support for K8 style mobis -# 1.31 - removed the SRCS section but kept a 0 size entry for it -# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed -# 1.33 - now uses and modifies mobiheader SRCS and CNT -# 1.34 - added credit for Kevin Hendricks -# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records - -__version__ = '1.35' - -import sys -import struct -import binascii - -class Unbuffered: - def __init__(self, stream): - self.stream = stream - def write(self, data): - self.stream.write(data) - self.stream.flush() - def __getattr__(self, attr): - return getattr(self.stream, attr) - - -class StripException(Exception): - pass - - -class SectionStripper: - def loadSection(self, section): - if (section + 1 == self.num_sections): - endoff = len(self.data_file) - else: - endoff = self.sections[section + 1][0] - off = self.sections[section][0] - return self.data_file[off:endoff] - - def patch(self, off, new): - self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] - - def strip(self, off, len): - self.data_file = self.data_file[:off] + self.data_file[off+len:] - - def patchSection(self, section, new, in_off = 0): - if (section + 1 == self.num_sections): - endoff = len(self.data_file) - else: - endoff = self.sections[section + 1][0] - off = self.sections[section][0] - assert off + in_off + len(new) <= endoff - self.patch(off + in_off, new) - - def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader): - mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18]) - exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84]) - exth = 'NONE' - try: - if exth_flag & 0x40: - exth = mobiheader[16 + mobi_length:] - if (len(exth) >= 4) and (exth[:4] == 'EXTH'): - nitems, = struct.unpack('>I', exth[8:12]) - pos = 12 - for i in xrange(nitems): - type, size = struct.unpack('>II', exth[pos: pos + 8]) - # print type, size - if type == 121: - boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size]) - if srcs_secnum <= boundaryptr: - boundaryptr -= srcs_cnt - prefix = mobiheader[0:16 + mobi_length + pos + 8] - suffix = mobiheader[16 + mobi_length + pos + 8 + 4:] - nval = struct.pack('>L',boundaryptr) - mobiheader = prefix + nval + suffix - pos += size - except: - pass - return mobiheader - - def __init__(self, datain): - if datain[0x3C:0x3C+8] != 'BOOKMOBI': - raise StripException("invalid file format") - self.num_sections, = struct.unpack('>H', datain[76:78]) - - # get mobiheader and check SRCS section number and count - offset0, = struct.unpack_from('>L', datain, 78) - offset1, = struct.unpack_from('>L', datain, 86) - mobiheader = datain[offset0:offset1] - srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0) - if srcs_secnum == 0xffffffff or srcs_cnt == 0: - raise StripException("File doesn't contain the sources section.") - - print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt) - # find its offset and length - next = srcs_secnum + srcs_cnt - srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8)) - next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8)) - srcs_length = next_offset - srcs_offset - if datain[srcs_offset:srcs_offset+4] != 'SRCS': - raise StripException("SRCS section num does not point to SRCS.") - print " beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length) - - # it appears bytes 68-71 always contain (2*num_sections) + 1 - # this is not documented anyplace at all but it appears to be some sort of next - # available unique_id used to identify specific sections in the palm db - self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1)) - self.data_file += datain[72:76] - - # write out the number of sections reduced by srtcs_cnt - self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt) - - # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table - # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 ) - delta = -8 * srcs_cnt - for i in xrange(srcs_secnum): - offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8)) - offset += delta - self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval) - - # for every record after the srcs_cnt SRCS records we must start it - # earlier by 8*srcs_cnt + the length of the srcs sections themselves) - delta = delta - srcs_length - for i in xrange(srcs_secnum+srcs_cnt,self.num_sections): - offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8)) - offset += delta - flgval = 2 * (i - srcs_cnt) - self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval) - - # now pad it out to begin right at the first offset - # typically this is 2 bytes of nulls - first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78) - self.data_file += '\0' * (first_offset - len(self.data_file)) - - # now finally add on every thing up to the original src_offset - self.data_file += datain[offset0: srcs_offset] - - # and everything afterwards - self.data_file += datain[srcs_offset+srcs_length:] - - #store away the SRCS section in case the user wants it output - self.stripped_data_header = datain[srcs_offset:srcs_offset+16] - self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length] - - # update the number of sections count - self.num_section = self.num_sections - srcs_cnt - - # update the srcs_secnum and srcs_cnt in the mobiheader - offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78) - offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86) - mobiheader = self.data_file[offset0:offset1] - mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:] - - # if K8 mobi, handle metadata 121 in old mobiheader - mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) - self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:] - print "done" - - def getResult(self): - return self.data_file - - def getStrippedData(self): - return self.stripped_data - - def getHeader(self): - return self.stripped_data_header - -def main(argv=None): - infile = argv[0] - outfile = argv[1] - data_file = file(infile, 'rb').read() - try: - strippedFile = SectionStripper(data_file) - file(outfile, 'wb').write(strippedFile.getResult()) - print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()) - if len(argv)==3: - file(argv[2], 'wb').write(strippedFile.getStrippedData()) - except StripException, e: - print "Error: %s" % e - sys.exit(1) - -if __name__ == "__main__": - sys.stdout=Unbuffered(sys.stdout) - print ('KindleStrip v%(__version__)s. ' - 'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals()) - if len(sys.argv)<3 or len(sys.argv)>4: - print "Strips the Sources record from Mobipocket ebooks" - print "For ebooks generated using KindleGen 1.1 and later that add the source" - print "Usage:" - print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0] - print "<strippeddatafile> is optional." - sys.exit(1) - else: - main(sys.argv[1:]) - sys.exit(0) |