diff options
author | Ciro Mattia Gonano <ciromattia@gmail.com> | 2012-11-30 13:18:53 +0100 |
---|---|---|
committer | Ciro Mattia Gonano <ciromattia@gmail.com> | 2012-11-30 13:18:53 +0100 |
commit | fc77c82c7117271db59ec022d823070262fa4fd3 (patch) | |
tree | 79a82bc9aa773d3e23e9874dc6a58b55cb829122 | |
parent | Initial commit (diff) | |
download | kcc-fc77c82c7117271db59ec022d823070262fa4fd3.tar.gz kcc-fc77c82c7117271db59ec022d823070262fa4fd3.tar.bz2 kcc-fc77c82c7117271db59ec022d823070262fa4fd3.zip |
Add scripts and update readme
-rw-r--r-- | KindleComicConverter.app/Contents/Info.plist | 65 | ||||
-rwxr-xr-x | KindleComicConverter.app/Contents/MacOS/droplet | bin | 0 -> 29592 bytes | |||
-rw-r--r-- | KindleComicConverter.app/Contents/PkgInfo | 1 | ||||
-rw-r--r-- | KindleComicConverter.app/Contents/Resources/Scripts/main.scpt | bin | 0 -> 28162 bytes | |||
-rwxr-xr-x | KindleComicConverter.app/Contents/Resources/comic2ebook.py | 212 | ||||
-rw-r--r-- | KindleComicConverter.app/Contents/Resources/description.rtfd/TXT.rtf | 12 | ||||
-rw-r--r-- | KindleComicConverter.app/Contents/Resources/droplet.icns | bin | 0 -> 366517 bytes | |||
-rw-r--r-- | KindleComicConverter.app/Contents/Resources/droplet.rsrc | bin | 0 -> 362 bytes | |||
-rwxr-xr-x | KindleComicConverter.app/Contents/Resources/kindlestrip.py | 233 | ||||
-rw-r--r-- | KindleComicConverter.app/Contents/Resources/rarfile.py | 1706 | ||||
-rw-r--r-- | README.md | 49 | ||||
-rwxr-xr-x | comic2ebook.py | 212 |
12 files changed, 2487 insertions, 3 deletions
diff --git a/KindleComicConverter.app/Contents/Info.plist b/KindleComicConverter.app/Contents/Info.plist new file mode 100644 index 0000000..639da80 --- /dev/null +++ b/KindleComicConverter.app/Contents/Info.plist @@ -0,0 +1,65 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleAllowMixedLocalizations</key> + <true/> + <key>CFBundleDevelopmentRegion</key> + <string>English</string> + <key>CFBundleDocumentTypes</key> + <array> + <dict> + <key>CFBundleTypeExtensions</key> + <array> + <string>*</string> + </array> + <key>CFBundleTypeOSTypes</key> + <array> + <string>****</string> + </array> + <key>CFBundleTypeRole</key> + <string>Viewer</string> + </dict> + </array> + <key>CFBundleExecutable</key> + <string>droplet</string> + <key>CFBundleGetInfoString</key> + <string>KindleComicConverter 1.0, Written 2012 by Ciro Mattia Gonano</string> + <key>CFBundleIconFile</key> + <string>droplet</string> + <key>CFBundleIdentifier</key> + <string>com.apple.ScriptEditor.id.5D4EC602-9033-4D02-AF60-6380F83B0145</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>KindleComicConverter 1.0</string> + <key>CFBundlePackageType</key> + <string>APPL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleSignature</key> + <string>dplt</string> + <key>LSMinimumSystemVersionByArchitecture</key> + <dict> + <key>x86_64</key> + <string>10.6</string> + </dict> + <key>LSRequiresCarbon</key> + <true/> + <key>WindowState</key> + <dict> + <key>dividerCollapsed</key> + <true/> + <key>eventLogLevel</key> + <integer>-1</integer> + <key>name</key> + <string>ScriptWindowState</string> + <key>positionOfDivider</key> + <real>0.0</real> + <key>savedFrame</key> + <string>444 56 1021 972 0 0 1680 1028 </string> + <key>selectedTabView</key> + <string>event log</string> + </dict> +</dict> +</plist> diff --git a/KindleComicConverter.app/Contents/MacOS/droplet b/KindleComicConverter.app/Contents/MacOS/droplet new file mode 100755 index 0000000..c715860 --- /dev/null +++ b/KindleComicConverter.app/Contents/MacOS/droplet Binary files differdiff --git a/KindleComicConverter.app/Contents/PkgInfo b/KindleComicConverter.app/Contents/PkgInfo new file mode 100644 index 0000000..b999e99 --- /dev/null +++ b/KindleComicConverter.app/Contents/PkgInfo @@ -0,0 +1 @@ +APPLdplt \ No newline at end of file diff --git a/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt b/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt new file mode 100644 index 0000000..2823165 --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt Binary files differdiff --git a/KindleComicConverter.app/Contents/Resources/comic2ebook.py b/KindleComicConverter.app/Contents/Resources/comic2ebook.py new file mode 100755 index 0000000..4fe1417 --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/comic2ebook.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# +# Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com> +# +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all +# copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA +# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +# Changelog +# 1.00 - Initial version +# 1.10 - Added support for CBZ/CBR files +# +# Todo: +# - Add gracefully exit for CBR if no rarfile.py and no unrar +# executable are found +# - Improve error reporting +# + +__version__ = '1.10' + +import os +import sys + +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +class CBxArchive: + def __init__(self, origFileName): + self.cbxexts = ['.cbz', '.cbr'] + self.origFileName = origFileName + self.filename = os.path.splitext(origFileName) + self.path = self.filename[0] + + def isCbxFile(self): + result = (self.filename[1].lower() in self.cbxexts) + if result == True: + return result + return False + + def getPath(self): + return self.path + + def extractCBZ(self): + try: + from zipfile import ZipFile + except ImportError: + self.cbzFile = None + cbzFile = ZipFile(self.origFileName) + for f in cbzFile.namelist(): + if (f.startswith('__MACOSX') or f.endswith('.DS_Store')): + pass # skip MacOS special files + elif f.endswith('/'): + try: + os.makedirs(self.path+f) + except: + pass #the dir exists so we are going to extract the images only. + else: + cbzFile.extract(f, self.path) + + def extractCBR(self): + try: + import rarfile + except ImportError: + self.cbrFile = None + cbrFile = rarfile.RarFile(self.origFileName) + for f in cbrFile.namelist(): + if f.endswith('/'): + try: + os.makedirs(self.path+f) + except: + pass #the dir exists so we are going to extract the images only. + else: + cbrFile.extract(f, self.path) + + def extract(self): + if ('.cbr' == self.filename[1].lower()): + self.extractCBR() + elif ('.cbz' == self.filename[1].lower()): + self.extractCBZ() + dir = os.listdir(self.path) + if (len(dir) == 1): + import shutil + for f in os.listdir(self.path + "/" + dir[0]): + shutil.move(self.path + "/" + dir[0] + "/" + f,self.path) + os.rmdir(self.path + "/" + dir[0]) + +class HTMLbuilder: + def getResult(self): + if (self.filename[0].startswith('.') or (self.filename[1] != '.png' and self.filename[1] != '.jpg' and self.filename[1] != '.jpeg')): + return None + return self.filename + + def __init__(self, dstdir, file): + self.filename = os.path.splitext(file) + basefilename = self.filename[0] + ext = self.filename[1] + if (basefilename.startswith('.') or (ext != '.png' and ext != '.jpg' and ext != '.jpeg')): + return + htmlfile = dstdir + '/' + basefilename + '.html' + f = open(htmlfile, "w"); + f.writelines(["<!DOCTYPE html SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", + "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n", + "<head>\n", + "<title>",basefilename,"</title>\n", + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n", + "</head>\n", + "<body>\n", + "<div><img src=\"",file,"\" /></div>\n", + "</body>\n", + "</html>" + ]) + f.close() + +class NCXbuilder: + def __init__(self, dstdir, title): + ncxfile = dstdir + '/content.ncx' + f = open(ncxfile, "w"); + f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + "<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\" \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n", + "<ncx version=\"2005-1\" xml:lang=\"en-US\" xmlns=\"http://www.daisy.org/z3986/2005/ncx/\">\n", + "<head>\n</head>\n", + "<docTitle><text>",title,"</text></docTitle>\n", + "<navMap></navMap>\n</ncx>" + ]) + f.close() + return + +class OPFBuilder: + def __init__(self, dstdir, title, filelist): + opffile = dstdir + '/content.opf' + # read the first file resolution + try: + from PIL import Image + im = Image.open(dstdir + "/" + filelist[0][0] + filelist[0][1]) + width, height = im.size + imgres = str(width) + "x" + str(height) + except ImportError: + print "Could not load PIL, falling back on default HD" + imgres = "758x1024" + f = open(opffile, "w"); + f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + "<package version=\"2.0\" unique-identifier=\"PrimaryID\" xmlns=\"http://www.idpf.org/2007/opf\">\n", + "<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n", + "<dc:title>",title,"</dc:title>\n", + "<dc:language>en-US</dc:language>\n", + "<meta name=\"book-type\" content=\"comic\"/>\n", + "<meta name=\"zero-gutter\" content=\"true\"/>\n", + "<meta name=\"zero-margin\" content=\"true\"/>\n", + "<meta name=\"fixed-layout\" content=\"true\"/>\n", + "<meta name=\"orientation-lock\" content=\"portrait\"/>\n", + "<meta name=\"original-resolution\" content=\"" + imgres + "\"/>\n", + "</metadata><manifest><item id=\"ncx\" href=\"content.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n"]) + for filename in filelist: + f.write("<item id=\"page_" + filename[0] + "\" href=\"" + filename[0] + ".html\" media-type=\"application/xhtml+xml\"/>\n") + for filename in filelist: + if ('.png' == filename[1]): + mt = 'image/png'; + else: + mt = 'image/jpeg'; + f.write("<item id=\"img_" + filename[0] + "\" href=\"" + filename[0] + filename[1] + "\" media-type=\"" + mt + "\"/>\n") + f.write("</manifest>\n<spine toc=\"ncx\">\n") + for filename in filelist: + f.write("<itemref idref=\"page_" + filename[0] + "\" />\n") + f.write("</spine>\n<guide>\n</guide>\n</package>\n") + f.close() + return + +if __name__ == "__main__": + sys.stdout=Unbuffered(sys.stdout) + print ('comic2ebook v%(__version__)s. ' + 'Written 2012 by Ciro Mattia Gonano.' % globals()) + if len(sys.argv)<2 or len(sys.argv)>3: + print "Generates HTML, NCX and OPF for a Comic ebook from a bunch of images" + print "Optimized for creating Mobipockets to be read into Kindle Paperwhite" + print "Usage:" + print " %s <dir> <title>" % sys.argv[0] + print " <title> is optional" + sys.exit(1) + else: + dir = sys.argv[1] + cbx = CBxArchive(dir) + if cbx.isCbxFile(): + cbx.extract() + dir = cbx.getPath() + if len(sys.argv)==3: + title = sys.argv[2] + else: + title = "comic" + filelist = [] + for file in os.listdir(dir): + filename = HTMLbuilder(dir,file).getResult() + if (filename != None): + filelist.append(filename) + NCXbuilder(dir,title) + OPFBuilder(dir,title,filelist) + sys.exit(0) diff --git a/KindleComicConverter.app/Contents/Resources/description.rtfd/TXT.rtf b/KindleComicConverter.app/Contents/Resources/description.rtfd/TXT.rtf new file mode 100644 index 0000000..0132f76 --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/description.rtfd/TXT.rtf @@ -0,0 +1,12 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf340 +{\fonttbl\f0\fnil\fcharset0 Verdana;} +{\colortbl;\red255\green255\blue255;\red76\green78\blue78;} +\pard\tx576\tx1152\tx1728\tx2304\tx2880\tx3456\tx4032\tx4608\tx5184\tx5760\tx6337\tx6913\tx7489\tx8065\tx8641\tx9217\tx9793\tx10369\tx10945\tx11521\tx12097\tx12674\tx13250\tx13826\tx14402\tx14978\tx15554\tx16130\tx16706\tx17282\tx17858\tx18435\tx19011\tx19587\tx20163\tx20739\tx21315\tx21891\tx22467\tx23043\tx23619\tx24195\tx24772\tx25348\tx25924\tx26500\tx27076\tx27652\tx28228\tx28804\tx29380\tx29956\tx30532\tx31109\tx31685\tx32261\tx32837\tx33413\tx33989\tx34565\tx35141\tx35717\tx36293\tx36870\tx37446\tx38022\tx38598\tx39174\tx39750\tx40326\tx40902\tx41478\tx42054\tx42630\tx43207\tx43783\tx44359\tx44935\tx45511\tx46087\tx46663\tx47239\tx47815\tx48391\tx48967\tx49544\tx50120\tx50696\tx51272\tx51848\tx52424\tx53000\tx53576\tx54152\tx54728\tx55305\tx55881\tx56457\tx57033\tx57609\li785\fi-786\pardirnatural + +\f0\fs24 \cf2 \CocoaLigature0 Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com>\ +\ +This script heavily relies on KindleStrip (C) by Paul Durrant and released in public domain (http://www.mobileread.com/forums/showthread.php?t=96903)\ +Also, you need to have kindlegen v2.7 (with KF8 support) which is downloadable from Amazon website.\ +\ +This script is released under The MIT License (http://opensource.org/licenses/MIT)\ +} \ No newline at end of file diff --git a/KindleComicConverter.app/Contents/Resources/droplet.icns b/KindleComicConverter.app/Contents/Resources/droplet.icns new file mode 100644 index 0000000..be1936e --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/droplet.icns Binary files differdiff --git a/KindleComicConverter.app/Contents/Resources/droplet.rsrc b/KindleComicConverter.app/Contents/Resources/droplet.rsrc new file mode 100644 index 0000000..f8bf4f2 --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/droplet.rsrc Binary files differdiff --git a/KindleComicConverter.app/Contents/Resources/kindlestrip.py b/KindleComicConverter.app/Contents/Resources/kindlestrip.py new file mode 100755 index 0000000..234afc0 --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/kindlestrip.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# +# This is a python script. You need a Python interpreter to run it. +# For example, ActiveState Python, which exists for windows. +# +# This script strips the penultimate record from a Mobipocket file. +# This is useful because the current KindleGen add a compressed copy +# of the source files used in this record, making the ebook produced +# about twice as big as it needs to be. +# +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to <http://unlicense.org/> +# +# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk, pdurrant on mobileread.com +# With enhancements by Kevin Hendricks, KevinH on mobileread.com +# +# Changelog +# 1.00 - Initial version +# 1.10 - Added an option to output the stripped data +# 1.20 - Added check for source files section (thanks Piquan) +# 1.30 - Added prelim Support for K8 style mobis +# 1.31 - removed the SRCS section but kept a 0 size entry for it +# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed +# 1.33 - now uses and modifies mobiheader SRCS and CNT +# 1.34 - added credit for Kevin Hendricks +# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records + +__version__ = '1.35' + +import sys +import struct +import binascii + +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + + +class StripException(Exception): + pass + + +class SectionStripper: + def loadSection(self, section): + if (section + 1 == self.num_sections): + endoff = len(self.data_file) + else: + endoff = self.sections[section + 1][0] + off = self.sections[section][0] + return self.data_file[off:endoff] + + def patch(self, off, new): + self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] + + def strip(self, off, len): + self.data_file = self.data_file[:off] + self.data_file[off+len:] + + def patchSection(self, section, new, in_off = 0): + if (section + 1 == self.num_sections): + endoff = len(self.data_file) + else: + endoff = self.sections[section + 1][0] + off = self.sections[section][0] + assert off + in_off + len(new) <= endoff + self.patch(off + in_off, new) + + def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader): + mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18]) + exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84]) + exth = 'NONE' + try: + if exth_flag & 0x40: + exth = mobiheader[16 + mobi_length:] + if (len(exth) >= 4) and (exth[:4] == 'EXTH'): + nitems, = struct.unpack('>I', exth[8:12]) + pos = 12 + for i in xrange(nitems): + type, size = struct.unpack('>II', exth[pos: pos + 8]) + # print type, size + if type == 121: + boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size]) + if srcs_secnum <= boundaryptr: + boundaryptr -= srcs_cnt + prefix = mobiheader[0:16 + mobi_length + pos + 8] + suffix = mobiheader[16 + mobi_length + pos + 8 + 4:] + nval = struct.pack('>L',boundaryptr) + mobiheader = prefix + nval + suffix + pos += size + except: + pass + return mobiheader + + def __init__(self, datain): + if datain[0x3C:0x3C+8] != 'BOOKMOBI': + raise StripException("invalid file format") + self.num_sections, = struct.unpack('>H', datain[76:78]) + + # get mobiheader and check SRCS section number and count + offset0, = struct.unpack_from('>L', datain, 78) + offset1, = struct.unpack_from('>L', datain, 86) + mobiheader = datain[offset0:offset1] + srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0) + if srcs_secnum == 0xffffffff or srcs_cnt == 0: + raise StripException("File doesn't contain the sources section.") + + print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt) + # find its offset and length + next = srcs_secnum + srcs_cnt + srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8)) + next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8)) + srcs_length = next_offset - srcs_offset + if datain[srcs_offset:srcs_offset+4] != 'SRCS': + raise StripException("SRCS section num does not point to SRCS.") + print " beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length) + + # it appears bytes 68-71 always contain (2*num_sections) + 1 + # this is not documented anyplace at all but it appears to be some sort of next + # available unique_id used to identify specific sections in the palm db + self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1)) + self.data_file += datain[72:76] + + # write out the number of sections reduced by srtcs_cnt + self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt) + + # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table + # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 ) + delta = -8 * srcs_cnt + for i in xrange(srcs_secnum): + offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8)) + offset += delta + self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval) + + # for every record after the srcs_cnt SRCS records we must start it + # earlier by 8*srcs_cnt + the length of the srcs sections themselves) + delta = delta - srcs_length + for i in xrange(srcs_secnum+srcs_cnt,self.num_sections): + offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8)) + offset += delta + flgval = 2 * (i - srcs_cnt) + self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval) + + # now pad it out to begin right at the first offset + # typically this is 2 bytes of nulls + first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78) + self.data_file += '\0' * (first_offset - len(self.data_file)) + + # now finally add on every thing up to the original src_offset + self.data_file += datain[offset0: srcs_offset] + + # and everything afterwards + self.data_file += datain[srcs_offset+srcs_length:] + + #store away the SRCS section in case the user wants it output + self.stripped_data_header = datain[srcs_offset:srcs_offset+16] + self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length] + + # update the number of sections count + self.num_section = self.num_sections - srcs_cnt + + # update the srcs_secnum and srcs_cnt in the mobiheader + offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78) + offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86) + mobiheader = self.data_file[offset0:offset1] + mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:] + + # if K8 mobi, handle metadata 121 in old mobiheader + mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) + self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:] + print "done" + + def getResult(self): + return self.data_file + + def getStrippedData(self): + return self.stripped_data + + def getHeader(self): + return self.stripped_data_header + +if __name__ == "__main__": + sys.stdout=Unbuffered(sys.stdout) + print ('KindleStrip v%(__version__)s. ' + 'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals()) + if len(sys.argv)<3 or len(sys.argv)>4: + print "Strips the Sources record from Mobipocket ebooks" + print "For ebooks generated using KindleGen 1.1 and later that add the source" + print "Usage:" + print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0] + print "<strippeddatafile> is optional." + sys.exit(1) + else: + infile = sys.argv[1] + outfile = sys.argv[2] + data_file = file(infile, 'rb').read() + try: + strippedFile = SectionStripper(data_file) + file(outfile, 'wb').write(strippedFile.getResult()) + print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()) + if len(sys.argv)==4: + file(sys.argv[3], 'wb').write(strippedFile.getStrippedData()) + except StripException, e: + print "Error: %s" % e + sys.exit(1) + sys.exit(0) diff --git a/KindleComicConverter.app/Contents/Resources/rarfile.py b/KindleComicConverter.app/Contents/Resources/rarfile.py new file mode 100644 index 0000000..d78aafe --- /dev/null +++ b/KindleComicConverter.app/Contents/Resources/rarfile.py @@ -0,0 +1,1706 @@ +# rarfile.py +# +# Copyright (c) 2005-2011 Marko Kreen <markokr@gmail.com> +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +r"""RAR archive reader. + +This is Python module for Rar archive reading. The interface +is made as zipfile like as possible. + +Basic logic: + - Parse archive structure with Python. + - Extract non-compressed files with Python + - Extract compressed files with unrar. + - Optionally write compressed data to temp file to speed up unrar, + otherwise it needs to scan whole archive on each execution. + +Example:: + + import rarfile + + rf = rarfile.RarFile('myarchive.rar') + for f in rf.infolist(): + print f.filename, f.file_size + if f.filename == 'README': + print rf.read(f) + +There are few module-level parameters to tune behaviour, +here they are with defaults, and reason to change it:: + + import rarfile + + # Set to full path of unrar.exe if it is not in PATH + rarfile.UNRAR_TOOL = "unrar" + + # Set to 0 if you don't look at comments and want to + # avoid wasting time for parsing them + rarfile.NEED_COMMENTS = 1 + + # Set up to 1 if you don't want to deal with decoding comments + # from unknown encoding. rarfile will try couple of common + # encodings in sequence. + rarfile.UNICODE_COMMENTS = 0 + + # Set to 1 if you prefer timestamps to be datetime objects + # instead tuples + rarfile.USE_DATETIME = 0 + + # Set to '/' to be more compatible with zipfile + rarfile.PATH_SEP = '\\' + +For more details, refer to source. + +""" + +__version__ = '2.4' + +# export only interesting items +__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] + +## +## Imports and compat - support both Python 2.x and 3.x +## + +import sys, os, struct +from struct import pack, unpack +from binascii import crc32 +from tempfile import mkstemp +from subprocess import Popen, PIPE, STDOUT +from datetime import datetime + +# only needed for encryped headers +try: + from Crypto.Cipher import AES + try: + from hashlib import sha1 + except ImportError: + from sha import new as sha1 + _have_crypto = 1 +except ImportError: + _have_crypto = 0 + +# compat with 2.x +if sys.hexversion < 0x3000000: + # prefer 3.x behaviour + range = xrange + # py2.6 has broken bytes() + def bytes(s, enc): + return str(s) + +# see if compat bytearray() is needed +try: + bytearray +except NameError: + import array + class bytearray: + def __init__(self, val = ''): + self.arr = array.array('B', val) + self.append = self.arr.append + self.__getitem__ = self.arr.__getitem__ + self.__len__ = self.arr.__len__ + def decode(self, *args): + return self.arr.tostring().decode(*args) + +# Optimized .readinto() requires memoryview +try: + memoryview + have_memoryview = 1 +except NameError: + have_memoryview = 0 + +# Struct() for older python +try: + from struct import Struct +except ImportError: + class Struct: + def __init__(self, fmt): + self.format = fmt + self.size = struct.calcsize(fmt) + def unpack(self, buf): + return unpack(self.format, buf) + def unpack_from(self, buf, ofs = 0): + return unpack(self.format, buf[ofs : ofs + self.size]) + def pack(self, *args): + return pack(self.format, *args) + +# file object superclass +try: + from io import RawIOBase +except ImportError: + class RawIOBase(object): + def close(self): + pass + + +## +## Module configuration. Can be tuned after importing. +## + +# default fallback charset +DEFAULT_CHARSET = "windows-1252" + +# list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed +TRY_ENCODINGS = ('utf8', 'utf-16le') + +# 'unrar', 'rar' or full path to either one +UNRAR_TOOL = "unrar" + +# Command line args to use for opening file for reading. +OPEN_ARGS = ('p', '-inul') + +# Command line args to use for extracting file to disk. +EXTRACT_ARGS = ('x', '-y', '-idq') + +# args for testrar() +TEST_ARGS = ('t', '-idq') + +# whether to speed up decompression by using tmp archive +USE_EXTRACT_HACK = 1 + +# limit the filesize for tmp archive usage +HACK_SIZE_LIMIT = 20*1024*1024 + +# whether to parse file/archive comments. +NEED_COMMENTS = 1 + +# whether to convert comments to unicode strings +UNICODE_COMMENTS = 0 + +# When RAR is corrupt, stopping on bad header is better +# On unknown/misparsed RAR headers reporting is better +REPORT_BAD_HEADER = 0 + +# Convert RAR time tuple into datetime() object +USE_DATETIME = 0 + +# Separator for path name components. RAR internally uses '\\'. +# Use '/' to be similar with zipfile. +PATH_SEP = '\\' + +## +## rar constants +## + +# block types +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { + +# flags for RAR_BLOCK_MAIN +RAR_MAIN_VOLUME = 0x0001 +RAR_MAIN_COMMENT = 0x0002 +RAR_MAIN_LOCK = 0x0004 +RAR_MAIN_SOLID = 0x0008 +RAR_MAIN_NEWNUMBERING = 0x0010 +RAR_MAIN_AUTH = 0x0020 +RAR_MAIN_RECOVERY = 0x0040 +RAR_MAIN_PASSWORD = 0x0080 +RAR_MAIN_FIRSTVOLUME = 0x0100 +RAR_MAIN_ENCRYPTVER = 0x0200 + +# flags for RAR_BLOCK_FILE +RAR_FILE_SPLIT_BEFORE = 0x0001 +RAR_FILE_SPLIT_AFTER = 0x0002 +RAR_FILE_PASSWORD = 0x0004 +RAR_FILE_COMMENT = 0x0008 +RAR_FILE_SOLID = 0x0010 +RAR_FILE_DICTMASK = 0x00e0 +RAR_FILE_DICT64 = 0x0000 +RAR_FILE_DICT128 = 0x0020 +RAR_FILE_DICT256 = 0x0040 +RAR_FILE_DICT512 = 0x0060 +RAR_FILE_DICT1024 = 0x0080 +RAR_FILE_DICT2048 = 0x00a0 +RAR_FILE_DICT4096 = 0x00c0 +RAR_FILE_DIRECTORY = 0x00e0 +RAR_FILE_LARGE = 0x0100 +RAR_FILE_UNICODE = 0x0200 +RAR_FILE_SALT = 0x0400 +RAR_FILE_VERSION = 0x0800 +RAR_FILE_EXTTIME = 0x1000 +RAR_FILE_EXTFLAGS = 0x2000 + +# flags for RAR_BLOCK_ENDARC +RAR_ENDARC_NEXT_VOLUME = 0x0001 +RAR_ENDARC_DATACRC = 0x0002 +RAR_ENDARC_REVSPACE = 0x0004 +RAR_ENDARC_VOLNR = 0x0008 + +# flags common to all blocks +RAR_SKIP_IF_UNKNOWN = 0x4000 +RAR_LONG_BLOCK = 0x8000 + +# Host OS types +RAR_OS_MSDOS = 0 +RAR_OS_OS2 = 1 +RAR_OS_WIN32 = 2 +RAR_OS_UNIX = 3 +RAR_OS_MACOS = 4 +RAR_OS_BEOS = 5 + +# Compression methods - '0'..'5' +RAR_M0 = 0x30 +RAR_M1 = 0x31 +RAR_M2 = 0x32 +RAR_M3 = 0x33 +RAR_M4 = 0x34 +RAR_M5 = 0x35 + +## +## internal constants +## + +RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii') +ZERO = bytes("\0", 'ascii') +EMPTY = bytes("", 'ascii') + +S_BLK_HDR = Struct('<HBHH') +S_FILE_HDR = Struct('<LLBLLBBHL') +S_LONG = Struct('<L') +S_SHORT = Struct('<H') +S_BYTE = Struct('<B') +S_COMMENT_HDR = Struct('<HBBH') + +## +## Public interface +## + +class Error(Exception): + """Base class for rarfile errors.""" +class BadRarFile(Error): + """Incorrect data in archive.""" +class NotRarFile(Error): + """The file is not RAR archive.""" +class BadRarName(Error): + """Cannot guess multipart name components.""" +class NoRarEntry(Error): + """File not found in RAR""" +class PasswordRequired(Error): + """File requires password""" +class NeedFirstVolume(Error): + """Need to start from first volume.""" +class NoCrypto(Error): + """Cannot parse encrypted headers - no crypto available.""" + + +def is_rarfile(fn): + '''Check quickly whether file is rar archive.''' + buf = open(fn, "rb").read(len(RAR_ID)) + return buf == RAR_ID + + +class RarInfo(object): + '''An entry in rar archive. + + @ivar filename: + File name with relative path. + Default path separator is '/', to change set rarfile.PATH_SEP. + Always unicode string. + @ivar date_time: + Modification time, tuple of (year, month, day, hour, minute, second). + Or datetime() object if USE_DATETIME is set. + @ivar file_size: + Uncompressed size. + @ivar compress_size: + Compressed size. + @ivar compress_type: + Compression method: 0x30 - 0x35. + @ivar extract_version: + Minimal Rar version needed for decompressing. + @ivar host_os: + Host OS type, one of RAR_OS_* constants. + @ivar mode: + File attributes. May be either dos-style or unix-style, depending on host_os. + @ivar CRC: + CRC-32 of uncompressed file, unsigned int. + @ivar volume: + Volume nr, starting from 0. + @ivar volume_file: + Volume file name, where file starts. + @ivar type: + One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist(). + @ivar flags: + For files, RAR_FILE_* bits. + @ivar comment: + File comment (unicode string or None). + + @ivar mtime: + Optional time field: Modification time, with float seconds. + Same as .date_time but with more precision. + @ivar ctime: + Optional time field: creation time, with float seconds. + @ivar atime: + Optional time field: last access time, with float seconds. + @ivar arctime: + Optional time field: archival time, with float seconds. + ''' + + __slots__ = ( + # zipfile-compatible fields + 'filename', + 'file_size', + 'compress_size', + 'date_time', + 'comment', + 'CRC', + 'volume', + 'orig_filename', # bytes in unknown encoding + + # rar-specific fields + 'extract_version', + 'compress_type', + 'host_os', + 'mode', + 'type', + 'flags', + + # optional extended time fields + # tuple where the sec is float, or datetime(). + 'mtime', # same as .date_time + 'ctime', + 'atime', + 'arctime', + + # RAR internals + 'name_size', + 'header_size', + 'header_crc', + 'file_offset', + 'add_size', + 'header_data', + 'header_base', + 'header_offset', + 'salt', + 'volume_file', + ) + + def isdir(self): + '''Returns True if the entry is a directory.''' + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + return False + + def needs_password(self): + return self.flags & RAR_FILE_PASSWORD + + +class RarFile(object): + '''Parse RAR structure, provide access to files in archive. + + @ivar comment: + Archive comment (unicode string or None). + ''' + + def __init__(self, rarfile, mode="r", charset=None, info_callback=None, crc_check = True): + """Open and parse a RAR archive. + + @param rarfile: archive file name + @param mode: only 'r' is supported. + @param charset: fallback charset to use, if filenames are not already Unicode-enabled. + @param info_callback: debug callback, gets to see all archive entries. + @param crc_check: set to False to disable CRC checks + """ + self.rarfile = rarfile + self.comment = None + self._charset = charset or DEFAULT_CHARSET + self._info_callback = info_callback + + self._info_list = [] + self._info_map = {} + self._needs_password = False + self._password = None + self._crc_check = crc_check + + self._main = None + + if mode != "r": + raise NotImplementedError("RarFile supports only mode=r") + + self._parse() + + def setpassword(self, password): + '''Sets the password to use when extracting.''' + self._password = password + if not self._main: + self._parse() + + def needs_password(self): + '''Returns True if any archive entries require password for extraction.''' + return self._needs_password + + def namelist(self): + '''Return list of filenames in archive.''' + return [f.filename for f in self._info_list] + + def infolist(self): + '''Return RarInfo objects for all files/directories in archive.''' + return self._info_list + + def getinfo(self, fname): + '''Return RarInfo for file.''' + + if isinstance(fname, RarInfo): + return fname + + # accept both ways here + if PATH_SEP == '/': + fname2 = fname.replace("\\", "/") + else: + fname2 = fname.replace("/", "\\") + + try: + return self._info_map[fname] + except KeyError: + try: + return self._info_map[fname2] + except KeyError: + raise NoRarEntry("No such file: "+fname) + + def open(self, fname, mode = 'r', psw = None): + '''Return open file object, where the data can be read. + + The object implements io.RawIOBase interface, so it can + be further wrapped with io.BufferedReader and io.TextIOWrapper. + + On older Python where io module is not available, it implements + only .read(), .seek(), .tell() and .close() methods. + + The object is seekable, although the seeking is fast only on + uncompressed files, on compressed files the seeking is implemented + by reading ahead and/or restarting the decompression. + + @param fname: file name or RarInfo instance. + @param mode: must be 'r' + @param psw: password to use for extracting. + ''' + + if mode != 'r': + raise NotImplementedError("RarFile.open() supports only mode=r") + + # entry lookup + inf = self.getinfo(fname) + if inf.isdir(): + raise TypeError("Directory does not have any data: " + inf.filename) + + if inf.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + + # check password + if inf.needs_password(): + psw = psw or self._password + if psw is None: + raise PasswordRequired("File %s requires password" % inf.filename) + else: + psw = None + + # is temp write usable? + if not USE_EXTRACT_HACK or not self._main: + use_hack = 0 + elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): + use_hack = 0 + elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + use_hack = 0 + elif inf.file_size > HACK_SIZE_LIMIT: + use_hack = 0 + else: + use_hack = 1 + + # now extract + if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0: + return self._open_clear(inf) + elif use_hack: + return self._open_hack(inf, psw) + else: + return self._open_unrar(self.rarfile, inf, psw) + + def read(self, fname, psw = None): + """Return uncompressed data for archive entry. + + For longer files using .open() may be better idea. + + @param fname: filename or RarInfo instance + @param psw: password to use for extracting. + """ + + f = self.open(fname, 'r', psw) + try: + return f.read() + finally: + f.close() + + def close(self): + """Release open resources.""" + pass + + def printdir(self): + """Print archive file list to stdout.""" + for f in self._info_list: + print(f.filename) + + def extract(self, member, path=None, pwd=None): + """Extract single file into current directory. + + @param member: filename or RarInfo instance + @param path: optional destination path + @param pwd: optional password to use + """ + if isinstance(member, RarInfo): + fname = member.filename + else: + fname = member + self._extract([fname], path, pwd) + + def extractall(self, path=None, members=None, pwd=None): + """Extract all files into current directory. + + @param path: optional destination path + @param members: optional filename or RarInfo instance list to extract + @param pwd: optional password to use + """ + fnlist = [] + if members is not None: + for m in members: + if isinstance(m, RarInfo): + fnlist.append(m.filename) + else: + fnlist.append(m) + self._extract(fnlist, path, pwd) + + def testrar(self): + """Let 'unrar' test the archive. + """ + cmd = [UNRAR_TOOL] + list(TEST_ARGS) + if self._password is not None: + cmd.append('-p' + self._password) + else: + cmd.append('-p-') + cmd.append(self.rarfile) + p = custom_popen(cmd) + p.communicate() + if p.returncode != 0: + raise BadRarFile("Testing failed") + + ## + ## private methods + ## + + # store entry + def _process_entry(self, item): + if item.type == RAR_BLOCK_FILE: + # use only first part + if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + # remember if any items require password + if item.needs_password(): + self._needs_password = True + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old.compress_size += item.compress_size + + # parse new-style comment + if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': + if not NEED_COMMENTS: + pass + elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + pass + elif item.flags & RAR_FILE_SOLID: + # file comment + cmt = self._read_comment_v3(item, self._password) + if len(self._info_list) > 0: + old = self._info_list[-1] + old.comment = cmt + else: + # archive comment + cmt = self._read_comment_v3(item, self._password) + self.comment = cmt + + if self._info_callback: + self._info_callback(item) + + # read rar + def _parse(self): + self._fd = None + try: + self._parse_real() + finally: + if self._fd: + self._fd.close() + self._fd = None + + def _parse_real(self): + fd = open(self.rarfile, "rb") + self._fd = fd + id = fd.read(len(RAR_ID)) + if id != RAR_ID: + raise NotRarFile("Not a Rar archive: "+self.rarfile) + + volume = 0 # first vol (.rar) is 0 + more_vols = 0 + endarc = 0 + volfile = self.rarfile + while 1: + if endarc: + h = None # don't read past ENDARC + else: + h = self._parse_header(fd) + if not h: + if more_vols: + volume += 1 + volfile = self._next_volname(volfile) + fd.close() + fd = open(volfile, "rb") + self._fd = fd + more_vols = 0 + endarc = 0 + continue + break + h.volume = volume + h.volume_file = volfile + + if h.type == RAR_BLOCK_MAIN and not self._main: + self._main = h + if h.flags & RAR_MAIN_NEWNUMBERING: + # RAR 2.x does not set FIRSTVOLUME, + # so check it only if NEWNUMBERING is used + if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: + raise NeedFirstVolume("Need to start from first volume") + if h.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + if not self._password: + self._main = None + break + elif h.type == RAR_BLOCK_ENDARC: + more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME + endarc = 1 + elif h.type == RAR_BLOCK_FILE: + # RAR 2.x does not write RAR_BLOCK_ENDARC + if h.flags & RAR_FILE_SPLIT_AFTER: + more_vols = 1 + # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME + if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Need to start from first volume") + + # store it + self._process_entry(h) + + # go to next header + if h.add_size > 0: + fd.seek(h.file_offset + h.add_size, 0) + + # AES encrypted headers + _last_aes_key = (None, None, None) # (salt, key, iv) + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + salt = fd.read(8) + if self._last_aes_key[0] == salt: + key, iv = self._last_aes_key[1:] + else: + key, iv = rar3_s2k(self._password, salt) + self._last_aes_key = (salt, key, iv) + return HeaderDecrypt(fd, key, iv) + + # read single header + def _parse_header(self, fd): + try: + # handle encrypted headers + if self._main and self._main.flags & RAR_MAIN_PASSWORD: + if not self._password: + return + fd = self._decrypt_header(fd) + + # now read actual header + return self._parse_block_header(fd) + except struct.error: + if REPORT_BAD_HEADER: + raise BadRarFile('Broken header in RAR file') + return None + + # common header + def _parse_block_header(self, fd): + h = RarInfo() + h.header_offset = fd.tell() + h.comment = None + + # read and parse base header + buf = fd.read(S_BLK_HDR.size) + if not buf: + return None + t = S_BLK_HDR.unpack_from(buf) + h.header_crc, h.type, h.flags, h.header_size = t + h.header_base = S_BLK_HDR.size + pos = S_BLK_HDR.size + + # read full header + if h.header_size > S_BLK_HDR.size: + h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size) + else: + h.header_data = buf + h.file_offset = fd.tell() + + # unexpected EOF? + if len(h.header_data) != h.header_size: + if REPORT_BAD_HEADER: + raise BadRarFile('Unexpected EOF when reading header') + return None + + # block has data assiciated with it? + if h.flags & RAR_LONG_BLOCK: + h.add_size = S_LONG.unpack_from(h.header_data, pos)[0] + else: + h.add_size = 0 + + # parse interesting ones, decide header boundaries for crc + if h.type == RAR_BLOCK_MARK: + return h + elif h.type == RAR_BLOCK_MAIN: + h.header_base += 6 + if h.flags & RAR_MAIN_ENCRYPTVER: + h.header_base += 1 + if h.flags & RAR_MAIN_COMMENT: + self._parse_subblocks(h, h.header_base) + self.comment = h.comment + elif h.type == RAR_BLOCK_FILE: + self._parse_file_header(h, pos) + elif h.type == RAR_BLOCK_SUB: + self._parse_file_header(h, pos) + h.header_base = h.header_size + elif h.type == RAR_BLOCK_OLD_AUTH: + h.header_base += 8 + elif h.type == RAR_BLOCK_OLD_EXTRA: + h.header_base += 7 + else: + h.header_base = h.header_size + + # check crc + if h.type == RAR_BLOCK_OLD_SUB: + crcdat = h.header_data[2:] + fd.read(h.add_size) + else: + crcdat = h.header_data[2:h.header_base] + + calc_crc = crc32(crcdat) & 0xFFFF + + # return good header + if h.header_crc == calc_crc: + return h + + # need to panic? + if REPORT_BAD_HEADER: + xlen = len(crcdat) + crcdat = h.header_data[2:] + msg = 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h.type, h.header_crc, calc_crc, xlen ) + xlen = len(crcdat) + while xlen >= S_BLK_HDR.size - 2: + crc = crc32(crcdat[:xlen]) & 0xFFFF + if crc == h.header_crc: + msg += ' / crc match, xlen = %d' % xlen + xlen -= 1 + raise BadRarFile(msg) + + # instead panicing, send eof + return None + + # read file-specific header + def _parse_file_header(self, h, pos): + fld = S_FILE_HDR.unpack_from(h.header_data, pos) + h.compress_size = fld[0] + h.file_size = fld[1] + h.host_os = fld[2] + h.CRC = fld[3] + h.date_time = parse_dos_time(fld[4]) + h.extract_version = fld[5] + h.compress_type = fld[6] + h.name_size = fld[7] + h.mode = fld[8] + pos += S_FILE_HDR.size + + if h.flags & RAR_FILE_LARGE: + h1 = S_LONG.unpack_from(h.header_data, pos)[0] + h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0] + h.compress_size |= h1 << 32 + h.file_size |= h2 << 32 + pos += 8 + h.add_size = h.compress_size + + name = h.header_data[pos : pos + h.name_size ] + pos += h.name_size + if h.flags & RAR_FILE_UNICODE: + nul = name.find(ZERO) + h.orig_filename = name[:nul] + u = UnicodeFilename(h.orig_filename, name[nul + 1 : ]) + h.filename = u.decode() + + # if parsing failed fall back to simple name + if u.failed: + h.filename = self._decode(h.orig_filename) + else: + h.orig_filename = name + h.filename = self._decode(name) + + # change separator, if requested + if PATH_SEP != '\\': + h.filename = h.filename.replace('\\', PATH_SEP) + + if h.flags & RAR_FILE_SALT: + h.salt = h.header_data[pos : pos + 8] + pos += 8 + else: + h.salt = None + + # optional extended time stamps + if h.flags & RAR_FILE_EXTTIME: + pos = self._parse_ext_time(h, pos) + else: + h.mtime = h.atime = h.ctime = h.arctime = None + + # base header end + h.header_base = pos + + if h.flags & RAR_FILE_COMMENT: + self._parse_subblocks(h, pos) + + # convert timestamps + if USE_DATETIME: + h.date_time = to_datetime(h.date_time) + h.mtime = to_datetime(h.mtime) + h.atime = to_datetime(h.atime) + h.ctime = to_datetime(h.ctime) + h.arctime = to_datetime(h.arctime) + + # .mtime is .date_time with more precision + if h.mtime: + if USE_DATETIME: + h.date_time = h.mtime + else: + # keep seconds int + h.date_time = h.mtime[:5] + (int(h.mtime[5]),) + + return pos + + # find old-style comment subblock + def _parse_subblocks(self, h, pos): + hdata = h.header_data + while pos < len(hdata): + # ordinary block header + t = S_BLK_HDR.unpack_from(hdata, pos) + scrc, stype, sflags, slen = t + pos_next = pos + slen + pos += S_BLK_HDR.size + + # corrupt header + if pos_next < pos: + break + + # followed by block-specific header + if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: + declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) + pos += S_COMMENT_HDR.size + data = hdata[pos : pos_next] + cmt = rar_decompress(ver, meth, data, declen, sflags, + crc, self._password) + if not self._crc_check: + h.comment = self._decode_comment(cmt) + elif crc32(cmt) & 0xFFFF == crc: + h.comment = self._decode_comment(cmt) + + pos = pos_next + + def _parse_ext_time(self, h, pos): + data = h.header_data + + # flags and rest of data can be missing + flags = 0 + if pos + 2 <= len(data): + flags = S_SHORT.unpack_from(data, pos)[0] + pos += 2 + + h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time) + h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos) + h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos) + h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos) + return pos + + def _parse_xtime(self, flag, data, pos, dostime = None): + unit = 10000000.0 # 100 ns units + if flag & 8: + if not dostime: + t = S_LONG.unpack_from(data, pos)[0] + dostime = parse_dos_time(t) + pos += 4 + rem = 0 + cnt = flag & 3 + for i in range(cnt): + b = S_BYTE.unpack_from(data, pos)[0] + rem = (b << 16) | (rem >> 8) + pos += 1 + sec = dostime[5] + rem / unit + if flag & 4: + sec += 1 + dostime = dostime[:5] + (sec,) + return dostime, pos + + # given current vol name, construct next one + def _next_volname(self, volfile): + if self._main.flags & RAR_MAIN_NEWNUMBERING: + return self._next_newvol(volfile) + return self._next_oldvol(volfile) + + # new-style next volume + def _next_newvol(self, volfile): + i = len(volfile) - 1 + while i >= 0: + if volfile[i] >= '0' and volfile[i] <= '9': + return self._inc_volname(volfile, i) + i -= 1 + raise BadRarName("Cannot construct volume name: "+volfile) + + # old-style next volume + def _next_oldvol(self, volfile): + # rar -> r00 + if volfile[-4:].lower() == '.rar': + return volfile[:-2] + '00' + return self._inc_volname(volfile, len(volfile) - 1) + + # increase digits with carry, otherwise just increment char + def _inc_volname(self, volfile, i): + fn = list(volfile) + while i >= 0: + if fn[i] != '9': + fn[i] = chr(ord(fn[i]) + 1) + break + fn[i] = '0' + i -= 1 + return ''.join(fn) + + def _open_clear(self, inf): + return DirectReader(self, inf) + + # put file compressed data into temporary .rar archive, and run + # unrar on that, thus avoiding unrar going over whole archive + def _open_hack(self, inf, psw = None): + BSIZE = 32*1024 + + size = inf.compress_size + inf.header_size + rf = open(inf.volume_file, "rb", 0) + rf.seek(inf.header_offset) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + # create main header: crc, type, flags, size, res1, res2 + mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4) + tmpf.write(RAR_ID + mh) + while size > 0: + if size > BSIZE: + buf = rf.read(BSIZE) + else: + buf = rf.read(size) + if not buf: + raise BadRarFile('read failed: ' + inf.filename) + tmpf.write(buf) + size -= len(buf) + tmpf.close() + rf.close() + except: + rf.close() + tmpf.close() + os.unlink(tmpname) + raise + + return self._open_unrar(tmpname, inf, psw, tmpname) + + def _read_comment_v3(self, inf, psw=None): + + # read data + rf = open(inf.volume_file, "rb") + rf.seek(inf.file_offset) + data = rf.read(inf.compress_size) + rf.close() + + # decompress + cmt = rar_decompress(inf.extract_version, inf.compress_type, data, + inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + + # check crc + if self._crc_check: + crc = crc32(cmt) + if crc < 0: + crc += (long(1) << 32) + if crc != inf.CRC: + return None + + return self._decode_comment(cmt) + + # extract using unrar + def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None): + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + if psw is not None: + cmd.append("-p" + psw) + cmd.append(rarfile) + + # not giving filename avoids encoding related problems + if not tmpfile: + fn = inf.filename + if PATH_SEP != os.sep: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # read from unrar pipe + return PipeReader(self, inf, cmd, tmpfile) + + def _decode(self, val): + for c in TRY_ENCODINGS: + try: + return val.decode(c) + except UnicodeError: + pass + return val.decode(self._charset, 'replace') + + def _decode_comment(self, val): + if UNICODE_COMMENTS: + return self._decode(val) + return val + + # call unrar to extract a file + def _extract(self, fnlist, path=None, psw=None): + cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) + + # pasoword + psw = psw or self._password + if psw is not None: + cmd.append('-p' + psw) + else: + cmd.append('-p-') + + # rar file + cmd.append(self.rarfile) + + # file list + for fn in fnlist: + if os.sep != PATH_SEP: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # destination path + if path is not None: + cmd.append(path + os.sep) + + # call + p = custom_popen(cmd) + p.communicate() + +## +## Utility classes +## + +class UnicodeFilename: + """Handle unicode filename decompression""" + + def __init__(self, name, encdata): + self.std_name = bytearray(name) + self.encdata = bytearray(encdata) + self.pos = self.encpos = 0 + self.buf = bytearray() + self.failed = 0 + + def enc_byte(self): + try: + c = self.encdata[self.encpos] + self.encpos += 1 + return c + except IndexError: + self.failed = 1 + return 0 + + def std_byte(self): + try: + return self.std_name[self.pos] + except IndexError: + self.failed = 1 + return ord('?') + + def put(self, lo, hi): + self.buf.append(lo) + self.buf.append(hi) + self.pos += 1 + + def decode(self): + hi = self.enc_byte() + flagbits = 0 + while self.encpos < len(self.encdata): + if flagbits == 0: + flags = self.enc_byte() + flagbits = 8 + flagbits -= 2 + t = (flags >> flagbits) & 3 + if t == 0: + self.put(self.enc_byte(), 0) + elif t == 1: + self.put(self.enc_byte(), hi) + elif t == 2: + self.put(self.enc_byte(), self.enc_byte()) + else: + n = self.enc_byte() + if n & 0x80: + c = self.enc_byte() + for i in range((n & 0x7f) + 2): + lo = (self.std_byte() + c) & 0xFF + self.put(lo, hi) + else: + for i in range(n + 2): + self.put(self.std_byte(), 0) + return self.buf.decode("utf-16le", "replace") + + +class RarExtFile(RawIOBase): + """Base class for 'file-like' object that RarFile.open() returns. + + Provides public methods and common crc checking. + + Behaviour: + - no short reads - .read() and .readinfo() read as much as requested. + - no internal buffer, use io.BufferedReader for that. + + @ivar name: + filename of the archive entry. + """ + + def __init__(self, rf, inf): + """Fill common fields""" + + RawIOBase.__init__(self) + + # standard io.* properties + self.name = inf.filename + self.mode = 'rb' + + self.rf = rf + self.inf = inf + self.crc_check = rf._crc_check + self.fd = None + self.CRC = 0 + self.remain = 0 + + self._open() + + def _open(self): + if self.fd: + self.fd.close() + self.fd = None + self.CRC = 0 + self.remain = self.inf.file_size + + def read(self, cnt = None): + """Read all or specified amount of data from archive entry.""" + + # sanitize cnt + if cnt is None or cnt < 0: + cnt = self.remain + elif cnt > self.remain: + cnt = self.remain + if cnt == 0: + return EMPTY + + # actual read + data = self._read(cnt) + if data: + self.CRC = crc32(data, self.CRC) + self.remain -= len(data) + + # done? + if not data or self.remain == 0: + #self.close() + self._check() + return data + + def _check(self): + """Check final CRC.""" + if not self.crc_check: + return + if self.remain != 0: + raise BadRarFile("Failed the read enough data") + crc = self.CRC + if crc < 0: + crc += (long(1) << 32) + if crc != self.inf.CRC: + raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename) + + def _read(self, cnt): + """Actual read that gets sanitized cnt.""" + + def close(self): + """Close open resources.""" + + RawIOBase.close(self) + + if self.fd: + self.fd.close() + self.fd = None + + def __del__(self): + """Hook delete to make sure tempfile is removed.""" + self.close() + + def readinto(self, buf): + """Zero-copy read directly into buffer. + + Returns bytes read. + """ + + data = self.read(len(buf)) + n = len(data) + try: + buf[:n] = data + except TypeError: + import array + if not isinstance(buf, array.array): + raise + buf[:n] = array.array(buf.typecode, data) + return n + + def tell(self): + """Return current reading position in uncompressed data.""" + return self.inf.file_size - self.remain + + def seek(self, ofs, whence = 0): + """Seek in data.""" + + # disable crc check when seeking + self.crc_check = 0 + + fsize = self.inf.file_size + cur_ofs = self.tell() + + if whence == 0: # seek from beginning of file + new_ofs = ofs + elif whence == 1: # seek from current position + new_ofs = cur_ofs + ofs + elif whence == 2: # seek from end of file + new_ofs = fsize + ofs + else: + raise ValueError('Invalid value for whence') + + # sanity check + if new_ofs < 0: + new_ofs = 0 + elif new_ofs > fsize: + new_ofs = fsize + + # do the actual seek + if new_ofs >= cur_ofs: + self._skip(new_ofs - cur_ofs) + else: + # process old data ? + #self._skip(fsize - cur_ofs) + # reopen and seek + self._open() + self._skip(new_ofs) + return self.tell() + + def _skip(self, cnt): + """Read and discard data""" + while cnt > 0: + if cnt > 8192: + buf = self.read(8192) + else: + buf = self.read(cnt) + if not buf: + break + cnt -= len(buf) + + def readable(self): + """Returns True""" + return True + + def seekable(self): + """Returns True""" + return True + + def readall(self): + """Read all remaining data""" + # avoid RawIOBase default impl + return self.read() + + +class PipeReader(RarExtFile): + """Read data from pipe, handle tempfile cleanup.""" + + def __init__(self, rf, inf, cmd, tempfile=None): + self.cmd = cmd + self.proc = None + self.tempfile = tempfile + RarExtFile.__init__(self, rf, inf) + + def _close_proc(self): + if not self.proc: + return + if self.proc.stdout: + self.proc.stdout.close() + if self.proc.stdin: + self.proc.stdin.close() + if self.proc.stderr: + self.proc.stderr.close() + self.proc.wait() + self.proc = None + + def _open(self): + RarExtFile._open(self) + + # stop old process + self._close_proc() + + # launch new process + self.proc = custom_popen(self.cmd) + self.fd = self.proc.stdout + + # avoid situation where unrar waits on stdin + if self.proc.stdin: + self.proc.stdin.close() + + def _read(self, cnt): + """Read from pipe.""" + return self.fd.read(cnt) + + def close(self): + """Close open resources.""" + + self._close_proc() + RarExtFile.close(self) + + if self.tempfile: + try: + os.unlink(self.tempfile) + except OSError: + pass + self.tempfile = None + + if have_memoryview: + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + cnt = len(buf) + if cnt > self.remain: + cnt = self.remain + vbuf = memoryview(buf) + res = self.fd.readinto(vbuf[0:cnt]) + if res: + if self.crc_check: + self.CRC = crc32(vbuf[:res], self.CRC) + self.remain -= res + return res + + +class DirectReader(RarExtFile): + """Read uncompressed data directly from archive.""" + + def _open(self): + RarExtFile._open(self) + + self.volfile = self.inf.volume_file + self.fd = open(self.volfile, "rb", 0) + self.fd.seek(self.inf.header_offset, 0) + self.cur = self.rf._parse_header(self.fd) + self.cur_avail = self.cur.add_size + + def _skip(self, cnt): + """RAR Seek, skipping through rar files to get to correct position + """ + + while cnt > 0: + # next vol needed? + if self.cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self.cur_avail: + cnt -= self.cur_avail + self.remain -= self.cur_avail + self.cur_avail = 0 + else: + self.fd.seek(cnt, 1) + self.cur_avail -= cnt + self.remain -= cnt + cnt = 0 + + def _read(self, cnt): + """Read from potentially multi-volume archive.""" + + buf = EMPTY + while cnt > 0: + # next vol needed? + if self.cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self.cur_avail: + data = self.fd.read(self.cur_avail) + else: + data = self.fd.read(cnt) + if not data: + break + + # got some data + cnt -= len(data) + self.cur_avail -= len(data) + if buf: + buf += data + else: + buf = data + + return buf + + def _open_next(self): + """Proceed to next volume.""" + + # is the file split over archives? + if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0: + return False + + if self.fd: + self.fd.close() + self.fd = None + + # open next part + self.volfile = self.rf._next_volname(self.volfile) + fd = open(self.volfile, "rb", 0) + self.fd = fd + + # loop until first file header + while 1: + cur = self.rf._parse_header(fd) + if not cur: + raise BadRarFile("Unexpected EOF") + if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): + if cur.add_size: + fd.seek(cur.add_size, 1) + continue + if cur.orig_filename != self.inf.orig_filename: + raise BadRarFile("Did not found file entry") + self.cur = cur + self.cur_avail = cur.add_size + return True + + if have_memoryview: + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + got = 0 + vbuf = memoryview(buf) + while got < len(buf): + # next vol needed? + if self.cur_avail == 0: + if not self._open_next(): + break + + # lenght for next read + cnt = len(buf) - got + if cnt > self.cur_avail: + cnt = self.cur_avail + + # read into temp view + res = self.fd.readinto(vbuf[got : got + cnt]) + if not res: + break + if self.crc_check: + self.CRC = crc32(vbuf[got : got + res], self.CRC) + self.cur_avail -= res + self.remain -= res + got += res + return got + + +class HeaderDecrypt: + """File-like object that decrypts from another file""" + def __init__(self, f, key, iv): + self.f = f + self.ciph = AES.new(key, AES.MODE_CBC, iv) + self.buf = EMPTY + + def tell(self): + return self.f.tell() + + def read(self, cnt=None): + if cnt > 8*1024: + raise BadRarFile('Bad count to header decrypt - wrong password?') + + # consume old data + if cnt <= len(self.buf): + res = self.buf[:cnt] + self.buf = self.buf[cnt:] + return res + res = self.buf + self.buf = EMPTY + cnt -= len(res) + + # decrypt new data + BLK = self.ciph.block_size + while cnt > 0: + enc = self.f.read(BLK) + if len(enc) < BLK: + break + dec = self.ciph.decrypt(enc) + if cnt >= len(dec): + res += dec + cnt -= len(dec) + else: + res += dec[:cnt] + self.buf = dec[cnt:] + cnt = 0 + + return res + +## +## Utility functions +## + +def rar3_s2k(psw, salt): + """String-to-key hash for RAR3.""" + + seed = psw.encode('utf-16le') + salt + iv = EMPTY + h = sha1() + for i in range(16): + for j in range(0x4000): + cnt = S_LONG.pack(i*0x4000 + j) + h.update(seed + cnt[:3]) + if j == 0: + iv += h.digest()[19:20] + key_be = h.digest()[:16] + key_le = pack("<LLLL", *unpack(">LLLL", key_be)) + return key_le, iv + +def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): + """Decompress blob of compressed data. + + Used for data with non-standard header - eg. comments. + """ + + # already uncompressed? + if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: + return data + + # take only necessary flags + flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) + flags |= RAR_LONG_BLOCK + + # file header + fname = bytes('data', 'ascii') + date = 0 + mode = 0x20 + fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, + date, vers, meth, len(fname), mode) + fhdr += fname + if flags & RAR_FILE_SALT: + if not salt: + return EMPTY + fhdr += salt + + # full header + hlen = S_BLK_HDR.size + len(fhdr) + hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr + hcrc = crc32(hdr[2:]) & 0xFFFF + hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr + + # archive main header + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4) + + # decompress via temp rar + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + try: + tmpf.write(RAR_ID + mh + hdr + data) + tmpf.close() + + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + if psw is not None and (flags & RAR_FILE_PASSWORD): + cmd.append("-p" + psw) + else: + cmd.append("-p-") + cmd.append(tmpname) + + p = custom_popen(cmd) + return p.communicate()[0] + finally: + tmpf.close() + os.unlink(tmpname) + +def to_datetime(t): + """Convert 6-part time tuple into datetime object.""" + + if t is None: + return None + + # extract values + year, mon, day, h, m, xs = t + s = int(xs) + us = int(1000000 * (xs - s)) + + # assume the values are valid + try: + return datetime(year, mon, day, h, m, s, us) + except ValueError: + pass + + # sanitize invalid values + MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + if mon < 1: mon = 1 + if mon > 12: mon = 12 + if day < 1: day = 1 + if day > MDAY[mon]: day = MDAY[mon] + if h > 23: h = 23 + if m > 59: m = 59 + if s > 59: s = 59 + if mon == 2 and day == 29: + try: + return datetime(year, mon, day, h, m, s, us) + except ValueError: + day = 28 + return datetime(year, mon, day, h, m, s, us) + +def parse_dos_time(stamp): + """Parse standard 32-bit DOS timestamp.""" + + sec = stamp & 0x1F; stamp = stamp >> 5 + min = stamp & 0x3F; stamp = stamp >> 6 + hr = stamp & 0x1F; stamp = stamp >> 5 + day = stamp & 0x1F; stamp = stamp >> 5 + mon = stamp & 0x0F; stamp = stamp >> 4 + yr = (stamp & 0x7F) + 1980 + return (yr, mon, day, hr, min, sec * 2) + +def custom_popen(cmd): + """Disconnect cmd from parent fds, read only from stdout.""" + + # needed for py2exe + creationflags = 0 + if sys.platform == 'win32': + creationflags = 0x08000000 # CREATE_NO_WINDOW + + # run command + p = Popen(cmd, bufsize = 0, stdout = PIPE, stdin = PIPE, stderr = STDOUT, + creationflags = creationflags) + return p + diff --git a/README.md b/README.md index 17a82dd..adc28f5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,47 @@ -kcc -=== +KindleComicConverter +============= -KindleComicConverter \ No newline at end of file +KindleComicConverter is a MacOS X AppleScript droplet to convert image folders to a comic-type Mobipocket ebook to take advantage of the new Panel View mode on Amazon's Kindle. + +REQUIREMENTS +------------- + - kindlegen in /usr/local/bin/ + - [http://www.rarlab.com/download.htm](unrar) and [http://developer.berlios.de/project/showfiles.php?group_id=5373&release_id=18844](rarfile.py) for calibre2ebook.py automatic CBR extracting + +USAGE +------------- +Drop a folder over the droplet, after a while you'll get a comic-type .mobi to sideload on your kindle. +The script takes care of calling comic2ebook.py, kindlegen and kindlestrip.py. +For the standalone comic2ebook.py script, please refer to CLI help. + +WARNING: at the moment the script does not perform image manipulation. Image optimization and resizing (HD Kindles want 758x1024, non-HD ones 600x800) is up to you. + + +CREDITS +------------- +This script exists as a cross-platform alternative to KindleComicParser by Dc5e +(published in [http://www.mobileread.com/forums/showthread.php?t=192783](http://www.mobileread.com/forums/showthread.php?t=192783)) + +This droplet relies and includes KindleStrip (C) by Paul Durrant and released in public domain +([http://www.mobileread.com/forums/showthread.php?t=96903](http://www.mobileread.com/forums/showthread.php?t=96903)) + +The icon for the droplet is by Nikolay Verin ([http://ncrow.deviantart.com/](http://ncrow.deviantart.com/)) and released under CC Attribution-NonCommercial-ShareAlike 3.0 Unported License + +Also, you need to have kindlegen v2.7 (with KF8 support) which is downloadable from Amazon website +and installed in /usr/local/bin/ + + +CHANGELOG +------------- + - 1.00 - Initial version + - 1.10 - Added support for CBZ/CBR files in comic2ebook.py + +TODO +------------- + - add transparent support for CBZ/CBR archives + - bundle a script to manipulate images (to get rid of Mangle/E-nki/whatsoever) + +TODO for calibre2ebook.py +------------- + - Add gracefully exit for CBR if no rarfile.py and no unrar executable are found + - Improve error reporting \ No newline at end of file diff --git a/comic2ebook.py b/comic2ebook.py new file mode 100755 index 0000000..4fe1417 --- /dev/null +++ b/comic2ebook.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# +# Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com> +# +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all +# copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA +# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +# Changelog +# 1.00 - Initial version +# 1.10 - Added support for CBZ/CBR files +# +# Todo: +# - Add gracefully exit for CBR if no rarfile.py and no unrar +# executable are found +# - Improve error reporting +# + +__version__ = '1.10' + +import os +import sys + +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + +class CBxArchive: + def __init__(self, origFileName): + self.cbxexts = ['.cbz', '.cbr'] + self.origFileName = origFileName + self.filename = os.path.splitext(origFileName) + self.path = self.filename[0] + + def isCbxFile(self): + result = (self.filename[1].lower() in self.cbxexts) + if result == True: + return result + return False + + def getPath(self): + return self.path + + def extractCBZ(self): + try: + from zipfile import ZipFile + except ImportError: + self.cbzFile = None + cbzFile = ZipFile(self.origFileName) + for f in cbzFile.namelist(): + if (f.startswith('__MACOSX') or f.endswith('.DS_Store')): + pass # skip MacOS special files + elif f.endswith('/'): + try: + os.makedirs(self.path+f) + except: + pass #the dir exists so we are going to extract the images only. + else: + cbzFile.extract(f, self.path) + + def extractCBR(self): + try: + import rarfile + except ImportError: + self.cbrFile = None + cbrFile = rarfile.RarFile(self.origFileName) + for f in cbrFile.namelist(): + if f.endswith('/'): + try: + os.makedirs(self.path+f) + except: + pass #the dir exists so we are going to extract the images only. + else: + cbrFile.extract(f, self.path) + + def extract(self): + if ('.cbr' == self.filename[1].lower()): + self.extractCBR() + elif ('.cbz' == self.filename[1].lower()): + self.extractCBZ() + dir = os.listdir(self.path) + if (len(dir) == 1): + import shutil + for f in os.listdir(self.path + "/" + dir[0]): + shutil.move(self.path + "/" + dir[0] + "/" + f,self.path) + os.rmdir(self.path + "/" + dir[0]) + +class HTMLbuilder: + def getResult(self): + if (self.filename[0].startswith('.') or (self.filename[1] != '.png' and self.filename[1] != '.jpg' and self.filename[1] != '.jpeg')): + return None + return self.filename + + def __init__(self, dstdir, file): + self.filename = os.path.splitext(file) + basefilename = self.filename[0] + ext = self.filename[1] + if (basefilename.startswith('.') or (ext != '.png' and ext != '.jpg' and ext != '.jpeg')): + return + htmlfile = dstdir + '/' + basefilename + '.html' + f = open(htmlfile, "w"); + f.writelines(["<!DOCTYPE html SYSTEM \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n", + "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n", + "<head>\n", + "<title>",basefilename,"</title>\n", + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>\n", + "</head>\n", + "<body>\n", + "<div><img src=\"",file,"\" /></div>\n", + "</body>\n", + "</html>" + ]) + f.close() + +class NCXbuilder: + def __init__(self, dstdir, title): + ncxfile = dstdir + '/content.ncx' + f = open(ncxfile, "w"); + f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + "<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\" \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n", + "<ncx version=\"2005-1\" xml:lang=\"en-US\" xmlns=\"http://www.daisy.org/z3986/2005/ncx/\">\n", + "<head>\n</head>\n", + "<docTitle><text>",title,"</text></docTitle>\n", + "<navMap></navMap>\n</ncx>" + ]) + f.close() + return + +class OPFBuilder: + def __init__(self, dstdir, title, filelist): + opffile = dstdir + '/content.opf' + # read the first file resolution + try: + from PIL import Image + im = Image.open(dstdir + "/" + filelist[0][0] + filelist[0][1]) + width, height = im.size + imgres = str(width) + "x" + str(height) + except ImportError: + print "Could not load PIL, falling back on default HD" + imgres = "758x1024" + f = open(opffile, "w"); + f.writelines(["<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", + "<package version=\"2.0\" unique-identifier=\"PrimaryID\" xmlns=\"http://www.idpf.org/2007/opf\">\n", + "<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n", + "<dc:title>",title,"</dc:title>\n", + "<dc:language>en-US</dc:language>\n", + "<meta name=\"book-type\" content=\"comic\"/>\n", + "<meta name=\"zero-gutter\" content=\"true\"/>\n", + "<meta name=\"zero-margin\" content=\"true\"/>\n", + "<meta name=\"fixed-layout\" content=\"true\"/>\n", + "<meta name=\"orientation-lock\" content=\"portrait\"/>\n", + "<meta name=\"original-resolution\" content=\"" + imgres + "\"/>\n", + "</metadata><manifest><item id=\"ncx\" href=\"content.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n"]) + for filename in filelist: + f.write("<item id=\"page_" + filename[0] + "\" href=\"" + filename[0] + ".html\" media-type=\"application/xhtml+xml\"/>\n") + for filename in filelist: + if ('.png' == filename[1]): + mt = 'image/png'; + else: + mt = 'image/jpeg'; + f.write("<item id=\"img_" + filename[0] + "\" href=\"" + filename[0] + filename[1] + "\" media-type=\"" + mt + "\"/>\n") + f.write("</manifest>\n<spine toc=\"ncx\">\n") + for filename in filelist: + f.write("<itemref idref=\"page_" + filename[0] + "\" />\n") + f.write("</spine>\n<guide>\n</guide>\n</package>\n") + f.close() + return + +if __name__ == "__main__": + sys.stdout=Unbuffered(sys.stdout) + print ('comic2ebook v%(__version__)s. ' + 'Written 2012 by Ciro Mattia Gonano.' % globals()) + if len(sys.argv)<2 or len(sys.argv)>3: + print "Generates HTML, NCX and OPF for a Comic ebook from a bunch of images" + print "Optimized for creating Mobipockets to be read into Kindle Paperwhite" + print "Usage:" + print " %s <dir> <title>" % sys.argv[0] + print " <title> is optional" + sys.exit(1) + else: + dir = sys.argv[1] + cbx = CBxArchive(dir) + if cbx.isCbxFile(): + cbx.extract() + dir = cbx.getPath() + if len(sys.argv)==3: + title = sys.argv[2] + else: + title = "comic" + filelist = [] + for file in os.listdir(dir): + filename = HTMLbuilder(dir,file).getResult() + if (filename != None): + filelist.append(filename) + NCXbuilder(dir,title) + OPFBuilder(dir,title,filelist) + sys.exit(0) |