about summary refs log tree commit diff
path: root/KindleComicConverter.app/Contents/Resources/kindlestrip.py
diff options
context:
space:
mode:
Diffstat (limited to 'KindleComicConverter.app/Contents/Resources/kindlestrip.py')
-rwxr-xr-xKindleComicConverter.app/Contents/Resources/kindlestrip.py233
1 files changed, 233 insertions, 0 deletions
diff --git a/KindleComicConverter.app/Contents/Resources/kindlestrip.py b/KindleComicConverter.app/Contents/Resources/kindlestrip.py
new file mode 100755
index 0000000..234afc0
--- /dev/null
+++ b/KindleComicConverter.app/Contents/Resources/kindlestrip.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+#
+# This is a python script. You need a Python interpreter to run it.
+# For example, ActiveState Python, which exists for windows.
+#
+# This script strips the penultimate record from a Mobipocket file.
+# This is useful because the current KindleGen add a compressed copy
+# of the source files used in this record, making the ebook produced
+# about twice as big as it needs to be.
+#
+#
+# This is free and unencumbered software released into the public domain.
+# 
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+# 
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+# 
+# For more information, please refer to <http://unlicense.org/>
+#
+# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com
+# With enhancements by Kevin Hendricks, KevinH on mobileread.com
+#
+# Changelog
+#  1.00 - Initial version
+#  1.10 - Added an option to output the stripped data
+#  1.20 - Added check for source files section (thanks Piquan)
+#  1.30 - Added prelim Support for K8 style mobis
+#  1.31 - removed the SRCS section but kept a 0 size entry for it
+#  1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed
+#  1.33 - now uses and modifies mobiheader SRCS and CNT
+#  1.34 - added credit for Kevin Hendricks
+#  1.35 - fixed bug when more than one compilation (SRCS/CMET) records
+
+__version__ = '1.35'
+
+import sys
+import struct
+import binascii
+
+class Unbuffered:
+    def __init__(self, stream):
+        self.stream = stream
+    def write(self, data):
+        self.stream.write(data)
+        self.stream.flush()
+    def __getattr__(self, attr):
+        return getattr(self.stream, attr)
+
+
+class StripException(Exception):
+    pass
+
+
+class SectionStripper:
+    def loadSection(self, section):
+        if (section + 1 == self.num_sections):
+            endoff = len(self.data_file)
+        else:
+            endoff = self.sections[section + 1][0]
+        off = self.sections[section][0]
+        return self.data_file[off:endoff]
+
+    def patch(self, off, new):
+        self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):]
+
+    def strip(self, off, len):
+        self.data_file = self.data_file[:off] + self.data_file[off+len:]
+
+    def patchSection(self, section, new, in_off = 0):
+        if (section + 1 == self.num_sections):
+            endoff = len(self.data_file)
+        else:
+            endoff = self.sections[section + 1][0]
+        off = self.sections[section][0]
+        assert off + in_off + len(new) <= endoff
+        self.patch(off + in_off, new)
+
+    def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader):
+        mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18])
+        exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84])
+        exth = 'NONE'
+        try:
+            if exth_flag & 0x40:
+                exth = mobiheader[16 + mobi_length:]
+                if (len(exth) >= 4) and (exth[:4] == 'EXTH'):
+                    nitems, = struct.unpack('>I', exth[8:12])
+                    pos = 12
+                    for i in xrange(nitems):
+                        type, size = struct.unpack('>II', exth[pos: pos + 8])
+                        # print type, size
+                        if type == 121:
+                            boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size])
+                            if srcs_secnum <= boundaryptr:
+                                boundaryptr -= srcs_cnt
+                                prefix = mobiheader[0:16 + mobi_length + pos + 8]
+                                suffix = mobiheader[16 + mobi_length + pos + 8 + 4:]
+                                nval = struct.pack('>L',boundaryptr)
+                                mobiheader = prefix + nval + suffix
+                        pos += size
+        except:
+            pass
+        return mobiheader
+
+    def __init__(self, datain):
+        if datain[0x3C:0x3C+8] != 'BOOKMOBI':
+            raise StripException("invalid file format")
+        self.num_sections, = struct.unpack('>H', datain[76:78])
+        
+        # get mobiheader and check SRCS section number and count
+        offset0, = struct.unpack_from('>L', datain, 78)
+        offset1, = struct.unpack_from('>L', datain, 86)
+        mobiheader = datain[offset0:offset1]
+        srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0)
+        if srcs_secnum == 0xffffffff or srcs_cnt == 0:
+            raise StripException("File doesn't contain the sources section.")
+
+        print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt)
+        # find its offset and length
+        next = srcs_secnum + srcs_cnt
+        srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8))
+        next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8))
+        srcs_length = next_offset - srcs_offset
+        if datain[srcs_offset:srcs_offset+4] != 'SRCS':
+            raise StripException("SRCS section num does not point to SRCS.")
+        print "   beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length)
+
+        # it appears bytes 68-71 always contain (2*num_sections) + 1
+        # this is not documented anyplace at all but it appears to be some sort of next 
+        # available unique_id used to identify specific sections in the palm db
+        self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1))
+        self.data_file += datain[72:76]
+
+        # write out the number of sections reduced by srtcs_cnt
+        self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt)
+
+        # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table
+        # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 )
+        delta = -8 * srcs_cnt
+        for i in xrange(srcs_secnum):
+            offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
+            offset += delta
+            self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
+            
+        # for every record after the srcs_cnt SRCS records we must start it
+        # earlier by 8*srcs_cnt + the length of the srcs sections themselves)
+        delta = delta - srcs_length
+        for i in xrange(srcs_secnum+srcs_cnt,self.num_sections):
+            offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8))
+            offset += delta
+            flgval = 2 * (i - srcs_cnt)
+            self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval)
+
+        # now pad it out to begin right at the first offset
+        # typically this is 2 bytes of nulls
+        first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78)
+        self.data_file += '\0' * (first_offset - len(self.data_file))
+
+        # now finally add on every thing up to the original src_offset
+        self.data_file += datain[offset0: srcs_offset]
+    
+        # and everything afterwards
+        self.data_file += datain[srcs_offset+srcs_length:]
+        
+        #store away the SRCS section in case the user wants it output
+        self.stripped_data_header = datain[srcs_offset:srcs_offset+16]
+        self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length]
+
+        # update the number of sections count
+        self.num_section = self.num_sections - srcs_cnt
+        
+        # update the srcs_secnum and srcs_cnt in the mobiheader
+        offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78)
+        offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86)
+        mobiheader = self.data_file[offset0:offset1]
+        mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:]
+
+        # if K8 mobi, handle metadata 121 in old mobiheader
+        mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader)
+        self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:]
+        print "done"
+
+    def getResult(self):
+        return self.data_file
+
+    def getStrippedData(self):
+        return self.stripped_data
+
+    def getHeader(self):
+        return self.stripped_data_header
+
+if __name__ == "__main__":
+    sys.stdout=Unbuffered(sys.stdout)
+    print ('KindleStrip v%(__version__)s. '
+       'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals())
+    if len(sys.argv)<3 or len(sys.argv)>4:
+        print "Strips the Sources record from Mobipocket ebooks"
+        print "For ebooks generated using KindleGen 1.1 and later that add the source"
+        print "Usage:"
+        print "    %s <infile> <outfile> <strippeddatafile>" % sys.argv[0]
+        print "<strippeddatafile> is optional."
+        sys.exit(1)
+    else:
+        infile = sys.argv[1]
+        outfile = sys.argv[2]
+        data_file = file(infile, 'rb').read()
+        try:
+            strippedFile = SectionStripper(data_file)
+            file(outfile, 'wb').write(strippedFile.getResult())
+            print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader())
+            if len(sys.argv)==4:
+                file(sys.argv[3], 'wb').write(strippedFile.getStrippedData())
+        except StripException, e:
+            print "Error: %s" % e
+            sys.exit(1)
+    sys.exit(0)