diff options
| -rw-r--r-- | .gitignore | 5 | ||||
| -rw-r--r-- | KindleComicConverter.app/Contents/Info.plist | 10 | ||||
| -rw-r--r-- | KindleComicConverter.app/Contents/Resources/Scripts/main.scpt | bin | 38514 -> 29152 bytes | |||
| -rw-r--r-- | KindleComicConverter.app/Contents/Resources/cbxarchive.py | 1 | ||||
| -rwxr-xr-x | KindleComicConverter.app/Contents/Resources/comic2ebook.py | 11 | ||||
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | ez_setup.py | 284 | ||||
| -rw-r--r-- | kcc/__init__.py | 0 | ||||
| -rw-r--r-- | kcc/cbxarchive.py (renamed from cbxarchive.py) | 0 | ||||
| -rwxr-xr-x | kcc/comic2ebook.py (renamed from comic2ebook.py) | 3 | ||||
| -rwxr-xr-x | kcc/image.py (renamed from image.py) | 0 | ||||
| -rwxr-xr-x | kcc/kindlestrip.py | 233 | ||||
| -rw-r--r-- | kcc/rarfile.py | 1706 | ||||
| -rw-r--r-- | resources/Info.plist | 65 | ||||
| -rw-r--r-- | resources/Scripts/main.scpt | bin | 0 -> 29152 bytes | |||
| -rw-r--r-- | resources/comic2ebook.icns | bin | 0 -> 366517 bytes | |||
| -rw-r--r-- | resources/description.rtfd/TXT.rtf | 22 | ||||
| -rw-r--r-- | resources/droplet.rsrc | bin | 0 -> 362 bytes | |||
| -rw-r--r-- | setup.py | 73 |
19 files changed, 2403 insertions, 11 deletions
diff --git a/.gitignore b/.gitignore index 222880a..88dba75 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +/*.pyc /*.cbz /*.cbr -/.idea \ No newline at end of file +/.idea +/dist +/build \ No newline at end of file diff --git a/KindleComicConverter.app/Contents/Info.plist b/KindleComicConverter.app/Contents/Info.plist index 600af91..0eb6ae9 100644 --- a/KindleComicConverter.app/Contents/Info.plist +++ b/KindleComicConverter.app/Contents/Info.plist @@ -24,15 +24,15 @@ <key>CFBundleExecutable</key> <string>droplet</string> <key>CFBundleGetInfoString</key> - <string>KindleComicConverter 1.0, Written 2012 by Ciro Mattia Gonano</string> + <string>KindleComicConverter 2.0, Written 2012 by Ciro Mattia Gonano</string> <key>CFBundleIconFile</key> <string>droplet</string> <key>CFBundleIdentifier</key> - <string>com.apple.ScriptEditor.id.5D4EC602-9033-4D02-AF60-6380F83B0145</string> + <string>com.github.ciromattia.kcc</string> <key>CFBundleInfoDictionaryVersion</key> <string>6.0</string> <key>CFBundleName</key> - <string>KindleComicConverter 1.0</string> + <string>KindleComicConverter 1.20</string> <key>CFBundlePackageType</key> <string>APPL</string> <key>CFBundleShortVersionString</key> @@ -57,9 +57,9 @@ <key>positionOfDivider</key> <real>568</real> <key>savedFrame</key> - <string>144 338 889 690 0 0 1680 1028 </string> + <string>188 368 889 690 0 0 1920 1058 </string> <key>selectedTabView</key> - <string>result</string> + <string>event log</string> </dict> </dict> </plist> diff --git a/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt b/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt index 02a10ac..48dfd96 100644 --- a/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt +++ b/KindleComicConverter.app/Contents/Resources/Scripts/main.scpt Binary files differdiff --git a/KindleComicConverter.app/Contents/Resources/cbxarchive.py b/KindleComicConverter.app/Contents/Resources/cbxarchive.py index 18fe690..c852bba 100644 --- a/KindleComicConverter.app/Contents/Resources/cbxarchive.py +++ b/KindleComicConverter.app/Contents/Resources/cbxarchive.py @@ -56,6 +56,7 @@ class CBxArchive: import rarfile except ImportError: self.cbrFile = None + return cbrFile = rarfile.RarFile(self.origFileName) for f in cbrFile.namelist(): if (f.startswith('__MACOSX') or f.endswith('.DS_Store')): diff --git a/KindleComicConverter.app/Contents/Resources/comic2ebook.py b/KindleComicConverter.app/Contents/Resources/comic2ebook.py index a4bd469..655f06a 100755 --- a/KindleComicConverter.app/Contents/Resources/comic2ebook.py +++ b/KindleComicConverter.app/Contents/Resources/comic2ebook.py @@ -35,7 +35,7 @@ __version__ = '1.20' import os import sys -import cbxarchive +import image, cbxarchive class HTMLbuilder: @@ -153,7 +153,6 @@ if __name__ == "__main__": title = "comic" filelist = [] try: - import image print "Splitting double pages..." for file in os.listdir(dir): if (getImageFileName(file) != None): @@ -164,7 +163,7 @@ if __name__ == "__main__": print "Optimizing " + file + " for " + profile img = image.ComicPage(dir+'/'+file, profile) img.resizeImage() - img.frameImage() + #img.frameImage() img.quantizeImage() img.saveToDir(dir) except ImportError: @@ -172,9 +171,15 @@ if __name__ == "__main__": for file in os.listdir(dir): if (getImageFileName(file) != None and isInFilelist(file,filelist) == False): + # put credits at the end + if "credits" in file.lower(): + os.rename(dir+'/'+file, dir+'/ZZZ999_'+file) + file = 'ZZZ999_'+file filename = HTMLbuilder(dir,file).getResult() if (filename != None): filelist.append(filename) NCXbuilder(dir,title) + # ensure we're sorting files alphabetically + filelist = sorted(filelist, key=lambda name: name[0]) OPFBuilder(dir,title,filelist) sys.exit(0) diff --git a/README.md b/README.md index 1ef183c..b2236ee 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ and installed in `/usr/local/bin/` - Improve error reporting - Recurse into dirtree for multiple comics - Create a GUI to allow user control more options + - Support pages extraction from PDF files ## COPYRIGHT diff --git a/ez_setup.py b/ez_setup.py new file mode 100644 index 0000000..b74adc0 --- /dev/null +++ b/ez_setup.py @@ -0,0 +1,284 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import sys +DEFAULT_VERSION = "0.6c11" +DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3] + +md5_data = { + 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', + 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', + 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', + 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', + 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', + 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', + 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', + 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', + 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', + 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', + 'setuptools-0.6c10-py2.3.egg': 'ce1e2ab5d3a0256456d9fc13800a7090', + 'setuptools-0.6c10-py2.4.egg': '57d6d9d6e9b80772c59a53a8433a5dd4', + 'setuptools-0.6c10-py2.5.egg': 'de46ac8b1c97c895572e5e8596aeb8c7', + 'setuptools-0.6c10-py2.6.egg': '58ea40aef06da02ce641495523a0b7f5', + 'setuptools-0.6c11-py2.3.egg': '2baeac6e13d414a9d28e7ba5b5a596de', + 'setuptools-0.6c11-py2.4.egg': 'bd639f9b0eac4c42497034dec2ec0c2b', + 'setuptools-0.6c11-py2.5.egg': '64c94f3bf7a72a13ec83e0b24f2749b2', + 'setuptools-0.6c11-py2.6.egg': 'bfa92100bd772d5a213eedd356d64086', + 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', + 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', + 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', + 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', + 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', + 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f', + 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2', + 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc', + 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167', + 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64', + 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d', + 'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20', + 'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab', + 'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53', + 'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2', + 'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e', + 'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372', + 'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902', + 'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de', + 'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b', + 'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03', + 'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a', + 'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6', + 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a', +} + +import sys, os +try: from hashlib import md5 +except ImportError: from md5 import md5 + +def _validate_md5(egg_name, data): + if egg_name in md5_data: + digest = md5(data).hexdigest() + if digest != md5_data[egg_name]: + print >>sys.stderr, ( + "md5 validation of %s failed! (Possible download problem?)" + % egg_name + ) + sys.exit(2) + return data + +def use_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + download_delay=15 +): + """Automatically find/download setuptools and make it available on sys.path + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end with + a '/'). `to_dir` is the directory where setuptools will be downloaded, if + it is not already available. If `download_delay` is specified, it should + be the number of seconds that will be paused before initiating a download, + should one be required. If an older version of setuptools is installed, + this routine will print a message to ``sys.stderr`` and raise SystemExit in + an attempt to abort the calling script. + """ + was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules + def do_download(): + egg = download_setuptools(version, download_base, to_dir, download_delay) + sys.path.insert(0, egg) + import setuptools; setuptools.bootstrap_install_from = egg + try: + import pkg_resources + except ImportError: + return do_download() + try: + pkg_resources.require("setuptools>="+version); return + except pkg_resources.VersionConflict, e: + if was_imported: + print >>sys.stderr, ( + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first, using 'easy_install -U setuptools'." + "\n\n(Currently using %r)" + ) % (version, e.args[0]) + sys.exit(2) + except pkg_resources.DistributionNotFound: + pass + + del pkg_resources, sys.modules['pkg_resources'] # reload ok + return do_download() + +def download_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + delay = 15 +): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download attempt. + """ + import urllib2, shutil + egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + url = download_base + egg_name + saveto = os.path.join(to_dir, egg_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + from distutils import log + if delay: + log.warn(""" +--------------------------------------------------------------------------- +This script requires setuptools version %s to run (even to display +help). I will attempt to download it for you (from +%s), but +you may need to enable firewall access for this script first. +I will start the download in %d seconds. + +(Note: if this machine does not have network access, please obtain the file + + %s + +and place it in this directory before rerunning this script.) +---------------------------------------------------------------------------""", + version, download_base, delay, url + ); from time import sleep; sleep(delay) + log.warn("Downloading %s", url) + src = urllib2.urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = _validate_md5(egg_name, src.read()) + dst = open(saveto,"wb"); dst.write(data) + finally: + if src: src.close() + if dst: dst.close() + return os.path.realpath(saveto) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + try: + import setuptools + except ImportError: + egg = None + try: + egg = download_setuptools(version, delay=0) + sys.path.insert(0,egg) + from setuptools.command.easy_install import main + return main(list(argv)+[egg]) # we're done here + finally: + if egg and os.path.exists(egg): + os.unlink(egg) + else: + if setuptools.__version__ == '0.0.1': + print >>sys.stderr, ( + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." + ) + sys.exit(2) + + req = "setuptools>="+version + import pkg_resources + try: + pkg_resources.require(req) + except pkg_resources.VersionConflict: + try: + from setuptools.command.easy_install import main + except ImportError: + from easy_install import main + main(list(argv)+[download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit + else: + if argv: + from setuptools.command.easy_install import main + main(argv) + else: + print "Setuptools version",version,"or greater has been installed." + print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + +def update_md5(filenames): + """Update our built-in md5 registry""" + + import re + + for name in filenames: + base = os.path.basename(name) + f = open(name,'rb') + md5_data[base] = md5(f.read()).hexdigest() + f.close() + + data = [" %r: %r,\n" % it for it in md5_data.items()] + data.sort() + repl = "".join(data) + + import inspect + srcfile = inspect.getsourcefile(sys.modules[__name__]) + f = open(srcfile, 'rb'); src = f.read(); f.close() + + match = re.search("\nmd5_data = {\n([^}]+)}", src) + if not match: + print >>sys.stderr, "Internal error!" + sys.exit(2) + + src = src[:match.start(1)] + repl + src[match.end(1):] + f = open(srcfile,'w') + f.write(src) + f.close() + + +if __name__=='__main__': + if len(sys.argv)>2 and sys.argv[1]=='--md5update': + update_md5(sys.argv[2:]) + else: + main(sys.argv[1:]) + + + + + + diff --git a/kcc/__init__.py b/kcc/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/kcc/__init__.py diff --git a/cbxarchive.py b/kcc/cbxarchive.py index c852bba..c852bba 100644 --- a/cbxarchive.py +++ b/kcc/cbxarchive.py diff --git a/comic2ebook.py b/kcc/comic2ebook.py index 577cbd2..655f06a 100755 --- a/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -35,7 +35,7 @@ __version__ = '1.20' import os import sys -import cbxarchive +import image, cbxarchive class HTMLbuilder: @@ -153,7 +153,6 @@ if __name__ == "__main__": title = "comic" filelist = [] try: - import image print "Splitting double pages..." for file in os.listdir(dir): if (getImageFileName(file) != None): diff --git a/image.py b/kcc/image.py index 31128fa..31128fa 100755 --- a/image.py +++ b/kcc/image.py diff --git a/kcc/kindlestrip.py b/kcc/kindlestrip.py new file mode 100755 index 0000000..234afc0 --- /dev/null +++ b/kcc/kindlestrip.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +# +# This is a python script. You need a Python interpreter to run it. +# For example, ActiveState Python, which exists for windows. +# +# This script strips the penultimate record from a Mobipocket file. +# This is useful because the current KindleGen add a compressed copy +# of the source files used in this record, making the ebook produced +# about twice as big as it needs to be. +# +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to <http://unlicense.org/> +# +# Written by Paul Durrant, 2010-2011, [email protected], pdurrant on mobileread.com +# With enhancements by Kevin Hendricks, KevinH on mobileread.com +# +# Changelog +# 1.00 - Initial version +# 1.10 - Added an option to output the stripped data +# 1.20 - Added check for source files section (thanks Piquan) +# 1.30 - Added prelim Support for K8 style mobis +# 1.31 - removed the SRCS section but kept a 0 size entry for it +# 1.32 - removes the SRCS section and its entry, now updates metadata 121 if needed +# 1.33 - now uses and modifies mobiheader SRCS and CNT +# 1.34 - added credit for Kevin Hendricks +# 1.35 - fixed bug when more than one compilation (SRCS/CMET) records + +__version__ = '1.35' + +import sys +import struct +import binascii + +class Unbuffered: + def __init__(self, stream): + self.stream = stream + def write(self, data): + self.stream.write(data) + self.stream.flush() + def __getattr__(self, attr): + return getattr(self.stream, attr) + + +class StripException(Exception): + pass + + +class SectionStripper: + def loadSection(self, section): + if (section + 1 == self.num_sections): + endoff = len(self.data_file) + else: + endoff = self.sections[section + 1][0] + off = self.sections[section][0] + return self.data_file[off:endoff] + + def patch(self, off, new): + self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] + + def strip(self, off, len): + self.data_file = self.data_file[:off] + self.data_file[off+len:] + + def patchSection(self, section, new, in_off = 0): + if (section + 1 == self.num_sections): + endoff = len(self.data_file) + else: + endoff = self.sections[section + 1][0] + off = self.sections[section][0] + assert off + in_off + len(new) <= endoff + self.patch(off + in_off, new) + + def updateEXTH121(self, srcs_secnum, srcs_cnt, mobiheader): + mobi_length, = struct.unpack('>L',mobiheader[0x14:0x18]) + exth_flag, = struct.unpack('>L', mobiheader[0x80:0x84]) + exth = 'NONE' + try: + if exth_flag & 0x40: + exth = mobiheader[16 + mobi_length:] + if (len(exth) >= 4) and (exth[:4] == 'EXTH'): + nitems, = struct.unpack('>I', exth[8:12]) + pos = 12 + for i in xrange(nitems): + type, size = struct.unpack('>II', exth[pos: pos + 8]) + # print type, size + if type == 121: + boundaryptr, =struct.unpack('>L',exth[pos+8: pos + size]) + if srcs_secnum <= boundaryptr: + boundaryptr -= srcs_cnt + prefix = mobiheader[0:16 + mobi_length + pos + 8] + suffix = mobiheader[16 + mobi_length + pos + 8 + 4:] + nval = struct.pack('>L',boundaryptr) + mobiheader = prefix + nval + suffix + pos += size + except: + pass + return mobiheader + + def __init__(self, datain): + if datain[0x3C:0x3C+8] != 'BOOKMOBI': + raise StripException("invalid file format") + self.num_sections, = struct.unpack('>H', datain[76:78]) + + # get mobiheader and check SRCS section number and count + offset0, = struct.unpack_from('>L', datain, 78) + offset1, = struct.unpack_from('>L', datain, 86) + mobiheader = datain[offset0:offset1] + srcs_secnum, srcs_cnt = struct.unpack_from('>2L', mobiheader, 0xe0) + if srcs_secnum == 0xffffffff or srcs_cnt == 0: + raise StripException("File doesn't contain the sources section.") + + print "Found SRCS section number %d, and count %d" % (srcs_secnum, srcs_cnt) + # find its offset and length + next = srcs_secnum + srcs_cnt + srcs_offset, flgval = struct.unpack_from('>2L', datain, 78+(srcs_secnum*8)) + next_offset, flgval = struct.unpack_from('>2L', datain, 78+(next*8)) + srcs_length = next_offset - srcs_offset + if datain[srcs_offset:srcs_offset+4] != 'SRCS': + raise StripException("SRCS section num does not point to SRCS.") + print " beginning at offset %0x and ending at offset %0x" % (srcs_offset, srcs_length) + + # it appears bytes 68-71 always contain (2*num_sections) + 1 + # this is not documented anyplace at all but it appears to be some sort of next + # available unique_id used to identify specific sections in the palm db + self.data_file = datain[:68] + struct.pack('>L',((self.num_sections-srcs_cnt)*2+1)) + self.data_file += datain[72:76] + + # write out the number of sections reduced by srtcs_cnt + self.data_file = self.data_file + struct.pack('>H',self.num_sections-srcs_cnt) + + # we are going to remove srcs_cnt SRCS sections so the offset of every entry in the table + # up to the srcs secnum must begin 8 bytes earlier per section removed (each table entry is 8 ) + delta = -8 * srcs_cnt + for i in xrange(srcs_secnum): + offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8)) + offset += delta + self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval) + + # for every record after the srcs_cnt SRCS records we must start it + # earlier by 8*srcs_cnt + the length of the srcs sections themselves) + delta = delta - srcs_length + for i in xrange(srcs_secnum+srcs_cnt,self.num_sections): + offset, flgval = struct.unpack_from('>2L', datain, 78+(i*8)) + offset += delta + flgval = 2 * (i - srcs_cnt) + self.data_file += struct.pack('>L',offset) + struct.pack('>L',flgval) + + # now pad it out to begin right at the first offset + # typically this is 2 bytes of nulls + first_offset, flgval = struct.unpack_from('>2L', self.data_file, 78) + self.data_file += '\0' * (first_offset - len(self.data_file)) + + # now finally add on every thing up to the original src_offset + self.data_file += datain[offset0: srcs_offset] + + # and everything afterwards + self.data_file += datain[srcs_offset+srcs_length:] + + #store away the SRCS section in case the user wants it output + self.stripped_data_header = datain[srcs_offset:srcs_offset+16] + self.stripped_data = datain[srcs_offset+16:srcs_offset+srcs_length] + + # update the number of sections count + self.num_section = self.num_sections - srcs_cnt + + # update the srcs_secnum and srcs_cnt in the mobiheader + offset0, flgval0 = struct.unpack_from('>2L', self.data_file, 78) + offset1, flgval1 = struct.unpack_from('>2L', self.data_file, 86) + mobiheader = self.data_file[offset0:offset1] + mobiheader = mobiheader[:0xe0]+ struct.pack('>L', 0xffffffff) + struct.pack('>L', 0) + mobiheader[0xe8:] + + # if K8 mobi, handle metadata 121 in old mobiheader + mobiheader = self.updateEXTH121(srcs_secnum, srcs_cnt, mobiheader) + self.data_file = self.data_file[0:offset0] + mobiheader + self.data_file[offset1:] + print "done" + + def getResult(self): + return self.data_file + + def getStrippedData(self): + return self.stripped_data + + def getHeader(self): + return self.stripped_data_header + +if __name__ == "__main__": + sys.stdout=Unbuffered(sys.stdout) + print ('KindleStrip v%(__version__)s. ' + 'Written 2010-2012 by Paul Durrant and Kevin Hendricks.' % globals()) + if len(sys.argv)<3 or len(sys.argv)>4: + print "Strips the Sources record from Mobipocket ebooks" + print "For ebooks generated using KindleGen 1.1 and later that add the source" + print "Usage:" + print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0] + print "<strippeddatafile> is optional." + sys.exit(1) + else: + infile = sys.argv[1] + outfile = sys.argv[2] + data_file = file(infile, 'rb').read() + try: + strippedFile = SectionStripper(data_file) + file(outfile, 'wb').write(strippedFile.getResult()) + print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()) + if len(sys.argv)==4: + file(sys.argv[3], 'wb').write(strippedFile.getStrippedData()) + except StripException, e: + print "Error: %s" % e + sys.exit(1) + sys.exit(0) diff --git a/kcc/rarfile.py b/kcc/rarfile.py new file mode 100644 index 0000000..d78aafe --- /dev/null +++ b/kcc/rarfile.py @@ -0,0 +1,1706 @@ +# rarfile.py +# +# Copyright (c) 2005-2011 Marko Kreen <[email protected]> +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +r"""RAR archive reader. + +This is Python module for Rar archive reading. The interface +is made as zipfile like as possible. + +Basic logic: + - Parse archive structure with Python. + - Extract non-compressed files with Python + - Extract compressed files with unrar. + - Optionally write compressed data to temp file to speed up unrar, + otherwise it needs to scan whole archive on each execution. + +Example:: + + import rarfile + + rf = rarfile.RarFile('myarchive.rar') + for f in rf.infolist(): + print f.filename, f.file_size + if f.filename == 'README': + print rf.read(f) + +There are few module-level parameters to tune behaviour, +here they are with defaults, and reason to change it:: + + import rarfile + + # Set to full path of unrar.exe if it is not in PATH + rarfile.UNRAR_TOOL = "unrar" + + # Set to 0 if you don't look at comments and want to + # avoid wasting time for parsing them + rarfile.NEED_COMMENTS = 1 + + # Set up to 1 if you don't want to deal with decoding comments + # from unknown encoding. rarfile will try couple of common + # encodings in sequence. + rarfile.UNICODE_COMMENTS = 0 + + # Set to 1 if you prefer timestamps to be datetime objects + # instead tuples + rarfile.USE_DATETIME = 0 + + # Set to '/' to be more compatible with zipfile + rarfile.PATH_SEP = '\\' + +For more details, refer to source. + +""" + +__version__ = '2.4' + +# export only interesting items +__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] + +## +## Imports and compat - support both Python 2.x and 3.x +## + +import sys, os, struct +from struct import pack, unpack +from binascii import crc32 +from tempfile import mkstemp +from subprocess import Popen, PIPE, STDOUT +from datetime import datetime + +# only needed for encryped headers +try: + from Crypto.Cipher import AES + try: + from hashlib import sha1 + except ImportError: + from sha import new as sha1 + _have_crypto = 1 +except ImportError: + _have_crypto = 0 + +# compat with 2.x +if sys.hexversion < 0x3000000: + # prefer 3.x behaviour + range = xrange + # py2.6 has broken bytes() + def bytes(s, enc): + return str(s) + +# see if compat bytearray() is needed +try: + bytearray +except NameError: + import array + class bytearray: + def __init__(self, val = ''): + self.arr = array.array('B', val) + self.append = self.arr.append + self.__getitem__ = self.arr.__getitem__ + self.__len__ = self.arr.__len__ + def decode(self, *args): + return self.arr.tostring().decode(*args) + +# Optimized .readinto() requires memoryview +try: + memoryview + have_memoryview = 1 +except NameError: + have_memoryview = 0 + +# Struct() for older python +try: + from struct import Struct +except ImportError: + class Struct: + def __init__(self, fmt): + self.format = fmt + self.size = struct.calcsize(fmt) + def unpack(self, buf): + return unpack(self.format, buf) + def unpack_from(self, buf, ofs = 0): + return unpack(self.format, buf[ofs : ofs + self.size]) + def pack(self, *args): + return pack(self.format, *args) + +# file object superclass +try: + from io import RawIOBase +except ImportError: + class RawIOBase(object): + def close(self): + pass + + +## +## Module configuration. Can be tuned after importing. +## + +# default fallback charset +DEFAULT_CHARSET = "windows-1252" + +# list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed +TRY_ENCODINGS = ('utf8', 'utf-16le') + +# 'unrar', 'rar' or full path to either one +UNRAR_TOOL = "unrar" + +# Command line args to use for opening file for reading. +OPEN_ARGS = ('p', '-inul') + +# Command line args to use for extracting file to disk. +EXTRACT_ARGS = ('x', '-y', '-idq') + +# args for testrar() +TEST_ARGS = ('t', '-idq') + +# whether to speed up decompression by using tmp archive +USE_EXTRACT_HACK = 1 + +# limit the filesize for tmp archive usage +HACK_SIZE_LIMIT = 20*1024*1024 + +# whether to parse file/archive comments. +NEED_COMMENTS = 1 + +# whether to convert comments to unicode strings +UNICODE_COMMENTS = 0 + +# When RAR is corrupt, stopping on bad header is better +# On unknown/misparsed RAR headers reporting is better +REPORT_BAD_HEADER = 0 + +# Convert RAR time tuple into datetime() object +USE_DATETIME = 0 + +# Separator for path name components. RAR internally uses '\\'. +# Use '/' to be similar with zipfile. +PATH_SEP = '\\' + +## +## rar constants +## + +# block types +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { + +# flags for RAR_BLOCK_MAIN +RAR_MAIN_VOLUME = 0x0001 +RAR_MAIN_COMMENT = 0x0002 +RAR_MAIN_LOCK = 0x0004 +RAR_MAIN_SOLID = 0x0008 +RAR_MAIN_NEWNUMBERING = 0x0010 +RAR_MAIN_AUTH = 0x0020 +RAR_MAIN_RECOVERY = 0x0040 +RAR_MAIN_PASSWORD = 0x0080 +RAR_MAIN_FIRSTVOLUME = 0x0100 +RAR_MAIN_ENCRYPTVER = 0x0200 + +# flags for RAR_BLOCK_FILE +RAR_FILE_SPLIT_BEFORE = 0x0001 +RAR_FILE_SPLIT_AFTER = 0x0002 +RAR_FILE_PASSWORD = 0x0004 +RAR_FILE_COMMENT = 0x0008 +RAR_FILE_SOLID = 0x0010 +RAR_FILE_DICTMASK = 0x00e0 +RAR_FILE_DICT64 = 0x0000 +RAR_FILE_DICT128 = 0x0020 +RAR_FILE_DICT256 = 0x0040 +RAR_FILE_DICT512 = 0x0060 +RAR_FILE_DICT1024 = 0x0080 +RAR_FILE_DICT2048 = 0x00a0 +RAR_FILE_DICT4096 = 0x00c0 +RAR_FILE_DIRECTORY = 0x00e0 +RAR_FILE_LARGE = 0x0100 +RAR_FILE_UNICODE = 0x0200 +RAR_FILE_SALT = 0x0400 +RAR_FILE_VERSION = 0x0800 +RAR_FILE_EXTTIME = 0x1000 +RAR_FILE_EXTFLAGS = 0x2000 + +# flags for RAR_BLOCK_ENDARC +RAR_ENDARC_NEXT_VOLUME = 0x0001 +RAR_ENDARC_DATACRC = 0x0002 +RAR_ENDARC_REVSPACE = 0x0004 +RAR_ENDARC_VOLNR = 0x0008 + +# flags common to all blocks +RAR_SKIP_IF_UNKNOWN = 0x4000 +RAR_LONG_BLOCK = 0x8000 + +# Host OS types +RAR_OS_MSDOS = 0 +RAR_OS_OS2 = 1 +RAR_OS_WIN32 = 2 +RAR_OS_UNIX = 3 +RAR_OS_MACOS = 4 +RAR_OS_BEOS = 5 + +# Compression methods - '0'..'5' +RAR_M0 = 0x30 +RAR_M1 = 0x31 +RAR_M2 = 0x32 +RAR_M3 = 0x33 +RAR_M4 = 0x34 +RAR_M5 = 0x35 + +## +## internal constants +## + +RAR_ID = bytes("Rar!\x1a\x07\x00", 'ascii') +ZERO = bytes("\0", 'ascii') +EMPTY = bytes("", 'ascii') + +S_BLK_HDR = Struct('<HBHH') +S_FILE_HDR = Struct('<LLBLLBBHL') +S_LONG = Struct('<L') +S_SHORT = Struct('<H') +S_BYTE = Struct('<B') +S_COMMENT_HDR = Struct('<HBBH') + +## +## Public interface +## + +class Error(Exception): + """Base class for rarfile errors.""" +class BadRarFile(Error): + """Incorrect data in archive.""" +class NotRarFile(Error): + """The file is not RAR archive.""" +class BadRarName(Error): + """Cannot guess multipart name components.""" +class NoRarEntry(Error): + """File not found in RAR""" +class PasswordRequired(Error): + """File requires password""" +class NeedFirstVolume(Error): + """Need to start from first volume.""" +class NoCrypto(Error): + """Cannot parse encrypted headers - no crypto available.""" + + +def is_rarfile(fn): + '''Check quickly whether file is rar archive.''' + buf = open(fn, "rb").read(len(RAR_ID)) + return buf == RAR_ID + + +class RarInfo(object): + '''An entry in rar archive. + + @ivar filename: + File name with relative path. + Default path separator is '/', to change set rarfile.PATH_SEP. + Always unicode string. + @ivar date_time: + Modification time, tuple of (year, month, day, hour, minute, second). + Or datetime() object if USE_DATETIME is set. + @ivar file_size: + Uncompressed size. + @ivar compress_size: + Compressed size. + @ivar compress_type: + Compression method: 0x30 - 0x35. + @ivar extract_version: + Minimal Rar version needed for decompressing. + @ivar host_os: + Host OS type, one of RAR_OS_* constants. + @ivar mode: + File attributes. May be either dos-style or unix-style, depending on host_os. + @ivar CRC: + CRC-32 of uncompressed file, unsigned int. + @ivar volume: + Volume nr, starting from 0. + @ivar volume_file: + Volume file name, where file starts. + @ivar type: + One of RAR_BLOCK_* types. Only entries with type==RAR_BLOCK_FILE are shown in .infolist(). + @ivar flags: + For files, RAR_FILE_* bits. + @ivar comment: + File comment (unicode string or None). + + @ivar mtime: + Optional time field: Modification time, with float seconds. + Same as .date_time but with more precision. + @ivar ctime: + Optional time field: creation time, with float seconds. + @ivar atime: + Optional time field: last access time, with float seconds. + @ivar arctime: + Optional time field: archival time, with float seconds. + ''' + + __slots__ = ( + # zipfile-compatible fields + 'filename', + 'file_size', + 'compress_size', + 'date_time', + 'comment', + 'CRC', + 'volume', + 'orig_filename', # bytes in unknown encoding + + # rar-specific fields + 'extract_version', + 'compress_type', + 'host_os', + 'mode', + 'type', + 'flags', + + # optional extended time fields + # tuple where the sec is float, or datetime(). + 'mtime', # same as .date_time + 'ctime', + 'atime', + 'arctime', + + # RAR internals + 'name_size', + 'header_size', + 'header_crc', + 'file_offset', + 'add_size', + 'header_data', + 'header_base', + 'header_offset', + 'salt', + 'volume_file', + ) + + def isdir(self): + '''Returns True if the entry is a directory.''' + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + return False + + def needs_password(self): + return self.flags & RAR_FILE_PASSWORD + + +class RarFile(object): + '''Parse RAR structure, provide access to files in archive. + + @ivar comment: + Archive comment (unicode string or None). + ''' + + def __init__(self, rarfile, mode="r", charset=None, info_callback=None, crc_check = True): + """Open and parse a RAR archive. + + @param rarfile: archive file name + @param mode: only 'r' is supported. + @param charset: fallback charset to use, if filenames are not already Unicode-enabled. + @param info_callback: debug callback, gets to see all archive entries. + @param crc_check: set to False to disable CRC checks + """ + self.rarfile = rarfile + self.comment = None + self._charset = charset or DEFAULT_CHARSET + self._info_callback = info_callback + + self._info_list = [] + self._info_map = {} + self._needs_password = False + self._password = None + self._crc_check = crc_check + + self._main = None + + if mode != "r": + raise NotImplementedError("RarFile supports only mode=r") + + self._parse() + + def setpassword(self, password): + '''Sets the password to use when extracting.''' + self._password = password + if not self._main: + self._parse() + + def needs_password(self): + '''Returns True if any archive entries require password for extraction.''' + return self._needs_password + + def namelist(self): + '''Return list of filenames in archive.''' + return [f.filename for f in self._info_list] + + def infolist(self): + '''Return RarInfo objects for all files/directories in archive.''' + return self._info_list + + def getinfo(self, fname): + '''Return RarInfo for file.''' + + if isinstance(fname, RarInfo): + return fname + + # accept both ways here + if PATH_SEP == '/': + fname2 = fname.replace("\\", "/") + else: + fname2 = fname.replace("/", "\\") + + try: + return self._info_map[fname] + except KeyError: + try: + return self._info_map[fname2] + except KeyError: + raise NoRarEntry("No such file: "+fname) + + def open(self, fname, mode = 'r', psw = None): + '''Return open file object, where the data can be read. + + The object implements io.RawIOBase interface, so it can + be further wrapped with io.BufferedReader and io.TextIOWrapper. + + On older Python where io module is not available, it implements + only .read(), .seek(), .tell() and .close() methods. + + The object is seekable, although the seeking is fast only on + uncompressed files, on compressed files the seeking is implemented + by reading ahead and/or restarting the decompression. + + @param fname: file name or RarInfo instance. + @param mode: must be 'r' + @param psw: password to use for extracting. + ''' + + if mode != 'r': + raise NotImplementedError("RarFile.open() supports only mode=r") + + # entry lookup + inf = self.getinfo(fname) + if inf.isdir(): + raise TypeError("Directory does not have any data: " + inf.filename) + + if inf.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + + # check password + if inf.needs_password(): + psw = psw or self._password + if psw is None: + raise PasswordRequired("File %s requires password" % inf.filename) + else: + psw = None + + # is temp write usable? + if not USE_EXTRACT_HACK or not self._main: + use_hack = 0 + elif self._main.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): + use_hack = 0 + elif inf.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + use_hack = 0 + elif inf.file_size > HACK_SIZE_LIMIT: + use_hack = 0 + else: + use_hack = 1 + + # now extract + if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0: + return self._open_clear(inf) + elif use_hack: + return self._open_hack(inf, psw) + else: + return self._open_unrar(self.rarfile, inf, psw) + + def read(self, fname, psw = None): + """Return uncompressed data for archive entry. + + For longer files using .open() may be better idea. + + @param fname: filename or RarInfo instance + @param psw: password to use for extracting. + """ + + f = self.open(fname, 'r', psw) + try: + return f.read() + finally: + f.close() + + def close(self): + """Release open resources.""" + pass + + def printdir(self): + """Print archive file list to stdout.""" + for f in self._info_list: + print(f.filename) + + def extract(self, member, path=None, pwd=None): + """Extract single file into current directory. + + @param member: filename or RarInfo instance + @param path: optional destination path + @param pwd: optional password to use + """ + if isinstance(member, RarInfo): + fname = member.filename + else: + fname = member + self._extract([fname], path, pwd) + + def extractall(self, path=None, members=None, pwd=None): + """Extract all files into current directory. + + @param path: optional destination path + @param members: optional filename or RarInfo instance list to extract + @param pwd: optional password to use + """ + fnlist = [] + if members is not None: + for m in members: + if isinstance(m, RarInfo): + fnlist.append(m.filename) + else: + fnlist.append(m) + self._extract(fnlist, path, pwd) + + def testrar(self): + """Let 'unrar' test the archive. + """ + cmd = [UNRAR_TOOL] + list(TEST_ARGS) + if self._password is not None: + cmd.append('-p' + self._password) + else: + cmd.append('-p-') + cmd.append(self.rarfile) + p = custom_popen(cmd) + p.communicate() + if p.returncode != 0: + raise BadRarFile("Testing failed") + + ## + ## private methods + ## + + # store entry + def _process_entry(self, item): + if item.type == RAR_BLOCK_FILE: + # use only first part + if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + # remember if any items require password + if item.needs_password(): + self._needs_password = True + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old.compress_size += item.compress_size + + # parse new-style comment + if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': + if not NEED_COMMENTS: + pass + elif item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + pass + elif item.flags & RAR_FILE_SOLID: + # file comment + cmt = self._read_comment_v3(item, self._password) + if len(self._info_list) > 0: + old = self._info_list[-1] + old.comment = cmt + else: + # archive comment + cmt = self._read_comment_v3(item, self._password) + self.comment = cmt + + if self._info_callback: + self._info_callback(item) + + # read rar + def _parse(self): + self._fd = None + try: + self._parse_real() + finally: + if self._fd: + self._fd.close() + self._fd = None + + def _parse_real(self): + fd = open(self.rarfile, "rb") + self._fd = fd + id = fd.read(len(RAR_ID)) + if id != RAR_ID: + raise NotRarFile("Not a Rar archive: "+self.rarfile) + + volume = 0 # first vol (.rar) is 0 + more_vols = 0 + endarc = 0 + volfile = self.rarfile + while 1: + if endarc: + h = None # don't read past ENDARC + else: + h = self._parse_header(fd) + if not h: + if more_vols: + volume += 1 + volfile = self._next_volname(volfile) + fd.close() + fd = open(volfile, "rb") + self._fd = fd + more_vols = 0 + endarc = 0 + continue + break + h.volume = volume + h.volume_file = volfile + + if h.type == RAR_BLOCK_MAIN and not self._main: + self._main = h + if h.flags & RAR_MAIN_NEWNUMBERING: + # RAR 2.x does not set FIRSTVOLUME, + # so check it only if NEWNUMBERING is used + if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: + raise NeedFirstVolume("Need to start from first volume") + if h.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + if not self._password: + self._main = None + break + elif h.type == RAR_BLOCK_ENDARC: + more_vols = h.flags & RAR_ENDARC_NEXT_VOLUME + endarc = 1 + elif h.type == RAR_BLOCK_FILE: + # RAR 2.x does not write RAR_BLOCK_ENDARC + if h.flags & RAR_FILE_SPLIT_AFTER: + more_vols = 1 + # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME + if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Need to start from first volume") + + # store it + self._process_entry(h) + + # go to next header + if h.add_size > 0: + fd.seek(h.file_offset + h.add_size, 0) + + # AES encrypted headers + _last_aes_key = (None, None, None) # (salt, key, iv) + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + salt = fd.read(8) + if self._last_aes_key[0] == salt: + key, iv = self._last_aes_key[1:] + else: + key, iv = rar3_s2k(self._password, salt) + self._last_aes_key = (salt, key, iv) + return HeaderDecrypt(fd, key, iv) + + # read single header + def _parse_header(self, fd): + try: + # handle encrypted headers + if self._main and self._main.flags & RAR_MAIN_PASSWORD: + if not self._password: + return + fd = self._decrypt_header(fd) + + # now read actual header + return self._parse_block_header(fd) + except struct.error: + if REPORT_BAD_HEADER: + raise BadRarFile('Broken header in RAR file') + return None + + # common header + def _parse_block_header(self, fd): + h = RarInfo() + h.header_offset = fd.tell() + h.comment = None + + # read and parse base header + buf = fd.read(S_BLK_HDR.size) + if not buf: + return None + t = S_BLK_HDR.unpack_from(buf) + h.header_crc, h.type, h.flags, h.header_size = t + h.header_base = S_BLK_HDR.size + pos = S_BLK_HDR.size + + # read full header + if h.header_size > S_BLK_HDR.size: + h.header_data = buf + fd.read(h.header_size - S_BLK_HDR.size) + else: + h.header_data = buf + h.file_offset = fd.tell() + + # unexpected EOF? + if len(h.header_data) != h.header_size: + if REPORT_BAD_HEADER: + raise BadRarFile('Unexpected EOF when reading header') + return None + + # block has data assiciated with it? + if h.flags & RAR_LONG_BLOCK: + h.add_size = S_LONG.unpack_from(h.header_data, pos)[0] + else: + h.add_size = 0 + + # parse interesting ones, decide header boundaries for crc + if h.type == RAR_BLOCK_MARK: + return h + elif h.type == RAR_BLOCK_MAIN: + h.header_base += 6 + if h.flags & RAR_MAIN_ENCRYPTVER: + h.header_base += 1 + if h.flags & RAR_MAIN_COMMENT: + self._parse_subblocks(h, h.header_base) + self.comment = h.comment + elif h.type == RAR_BLOCK_FILE: + self._parse_file_header(h, pos) + elif h.type == RAR_BLOCK_SUB: + self._parse_file_header(h, pos) + h.header_base = h.header_size + elif h.type == RAR_BLOCK_OLD_AUTH: + h.header_base += 8 + elif h.type == RAR_BLOCK_OLD_EXTRA: + h.header_base += 7 + else: + h.header_base = h.header_size + + # check crc + if h.type == RAR_BLOCK_OLD_SUB: + crcdat = h.header_data[2:] + fd.read(h.add_size) + else: + crcdat = h.header_data[2:h.header_base] + + calc_crc = crc32(crcdat) & 0xFFFF + + # return good header + if h.header_crc == calc_crc: + return h + + # need to panic? + if REPORT_BAD_HEADER: + xlen = len(crcdat) + crcdat = h.header_data[2:] + msg = 'Header CRC error (%02x): exp=%x got=%x (xlen = %d)' % ( h.type, h.header_crc, calc_crc, xlen ) + xlen = len(crcdat) + while xlen >= S_BLK_HDR.size - 2: + crc = crc32(crcdat[:xlen]) & 0xFFFF + if crc == h.header_crc: + msg += ' / crc match, xlen = %d' % xlen + xlen -= 1 + raise BadRarFile(msg) + + # instead panicing, send eof + return None + + # read file-specific header + def _parse_file_header(self, h, pos): + fld = S_FILE_HDR.unpack_from(h.header_data, pos) + h.compress_size = fld[0] + h.file_size = fld[1] + h.host_os = fld[2] + h.CRC = fld[3] + h.date_time = parse_dos_time(fld[4]) + h.extract_version = fld[5] + h.compress_type = fld[6] + h.name_size = fld[7] + h.mode = fld[8] + pos += S_FILE_HDR.size + + if h.flags & RAR_FILE_LARGE: + h1 = S_LONG.unpack_from(h.header_data, pos)[0] + h2 = S_LONG.unpack_from(h.header_data, pos + 4)[0] + h.compress_size |= h1 << 32 + h.file_size |= h2 << 32 + pos += 8 + h.add_size = h.compress_size + + name = h.header_data[pos : pos + h.name_size ] + pos += h.name_size + if h.flags & RAR_FILE_UNICODE: + nul = name.find(ZERO) + h.orig_filename = name[:nul] + u = UnicodeFilename(h.orig_filename, name[nul + 1 : ]) + h.filename = u.decode() + + # if parsing failed fall back to simple name + if u.failed: + h.filename = self._decode(h.orig_filename) + else: + h.orig_filename = name + h.filename = self._decode(name) + + # change separator, if requested + if PATH_SEP != '\\': + h.filename = h.filename.replace('\\', PATH_SEP) + + if h.flags & RAR_FILE_SALT: + h.salt = h.header_data[pos : pos + 8] + pos += 8 + else: + h.salt = None + + # optional extended time stamps + if h.flags & RAR_FILE_EXTTIME: + pos = self._parse_ext_time(h, pos) + else: + h.mtime = h.atime = h.ctime = h.arctime = None + + # base header end + h.header_base = pos + + if h.flags & RAR_FILE_COMMENT: + self._parse_subblocks(h, pos) + + # convert timestamps + if USE_DATETIME: + h.date_time = to_datetime(h.date_time) + h.mtime = to_datetime(h.mtime) + h.atime = to_datetime(h.atime) + h.ctime = to_datetime(h.ctime) + h.arctime = to_datetime(h.arctime) + + # .mtime is .date_time with more precision + if h.mtime: + if USE_DATETIME: + h.date_time = h.mtime + else: + # keep seconds int + h.date_time = h.mtime[:5] + (int(h.mtime[5]),) + + return pos + + # find old-style comment subblock + def _parse_subblocks(self, h, pos): + hdata = h.header_data + while pos < len(hdata): + # ordinary block header + t = S_BLK_HDR.unpack_from(hdata, pos) + scrc, stype, sflags, slen = t + pos_next = pos + slen + pos += S_BLK_HDR.size + + # corrupt header + if pos_next < pos: + break + + # followed by block-specific header + if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: + declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) + pos += S_COMMENT_HDR.size + data = hdata[pos : pos_next] + cmt = rar_decompress(ver, meth, data, declen, sflags, + crc, self._password) + if not self._crc_check: + h.comment = self._decode_comment(cmt) + elif crc32(cmt) & 0xFFFF == crc: + h.comment = self._decode_comment(cmt) + + pos = pos_next + + def _parse_ext_time(self, h, pos): + data = h.header_data + + # flags and rest of data can be missing + flags = 0 + if pos + 2 <= len(data): + flags = S_SHORT.unpack_from(data, pos)[0] + pos += 2 + + h.mtime, pos = self._parse_xtime(flags >> 3*4, data, pos, h.date_time) + h.ctime, pos = self._parse_xtime(flags >> 2*4, data, pos) + h.atime, pos = self._parse_xtime(flags >> 1*4, data, pos) + h.arctime, pos = self._parse_xtime(flags >> 0*4, data, pos) + return pos + + def _parse_xtime(self, flag, data, pos, dostime = None): + unit = 10000000.0 # 100 ns units + if flag & 8: + if not dostime: + t = S_LONG.unpack_from(data, pos)[0] + dostime = parse_dos_time(t) + pos += 4 + rem = 0 + cnt = flag & 3 + for i in range(cnt): + b = S_BYTE.unpack_from(data, pos)[0] + rem = (b << 16) | (rem >> 8) + pos += 1 + sec = dostime[5] + rem / unit + if flag & 4: + sec += 1 + dostime = dostime[:5] + (sec,) + return dostime, pos + + # given current vol name, construct next one + def _next_volname(self, volfile): + if self._main.flags & RAR_MAIN_NEWNUMBERING: + return self._next_newvol(volfile) + return self._next_oldvol(volfile) + + # new-style next volume + def _next_newvol(self, volfile): + i = len(volfile) - 1 + while i >= 0: + if volfile[i] >= '0' and volfile[i] <= '9': + return self._inc_volname(volfile, i) + i -= 1 + raise BadRarName("Cannot construct volume name: "+volfile) + + # old-style next volume + def _next_oldvol(self, volfile): + # rar -> r00 + if volfile[-4:].lower() == '.rar': + return volfile[:-2] + '00' + return self._inc_volname(volfile, len(volfile) - 1) + + # increase digits with carry, otherwise just increment char + def _inc_volname(self, volfile, i): + fn = list(volfile) + while i >= 0: + if fn[i] != '9': + fn[i] = chr(ord(fn[i]) + 1) + break + fn[i] = '0' + i -= 1 + return ''.join(fn) + + def _open_clear(self, inf): + return DirectReader(self, inf) + + # put file compressed data into temporary .rar archive, and run + # unrar on that, thus avoiding unrar going over whole archive + def _open_hack(self, inf, psw = None): + BSIZE = 32*1024 + + size = inf.compress_size + inf.header_size + rf = open(inf.volume_file, "rb", 0) + rf.seek(inf.header_offset) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + # create main header: crc, type, flags, size, res1, res2 + mh = S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2+4) + tmpf.write(RAR_ID + mh) + while size > 0: + if size > BSIZE: + buf = rf.read(BSIZE) + else: + buf = rf.read(size) + if not buf: + raise BadRarFile('read failed: ' + inf.filename) + tmpf.write(buf) + size -= len(buf) + tmpf.close() + rf.close() + except: + rf.close() + tmpf.close() + os.unlink(tmpname) + raise + + return self._open_unrar(tmpname, inf, psw, tmpname) + + def _read_comment_v3(self, inf, psw=None): + + # read data + rf = open(inf.volume_file, "rb") + rf.seek(inf.file_offset) + data = rf.read(inf.compress_size) + rf.close() + + # decompress + cmt = rar_decompress(inf.extract_version, inf.compress_type, data, + inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + + # check crc + if self._crc_check: + crc = crc32(cmt) + if crc < 0: + crc += (long(1) << 32) + if crc != inf.CRC: + return None + + return self._decode_comment(cmt) + + # extract using unrar + def _open_unrar(self, rarfile, inf, psw = None, tmpfile = None): + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + if psw is not None: + cmd.append("-p" + psw) + cmd.append(rarfile) + + # not giving filename avoids encoding related problems + if not tmpfile: + fn = inf.filename + if PATH_SEP != os.sep: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # read from unrar pipe + return PipeReader(self, inf, cmd, tmpfile) + + def _decode(self, val): + for c in TRY_ENCODINGS: + try: + return val.decode(c) + except UnicodeError: + pass + return val.decode(self._charset, 'replace') + + def _decode_comment(self, val): + if UNICODE_COMMENTS: + return self._decode(val) + return val + + # call unrar to extract a file + def _extract(self, fnlist, path=None, psw=None): + cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) + + # pasoword + psw = psw or self._password + if psw is not None: + cmd.append('-p' + psw) + else: + cmd.append('-p-') + + # rar file + cmd.append(self.rarfile) + + # file list + for fn in fnlist: + if os.sep != PATH_SEP: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # destination path + if path is not None: + cmd.append(path + os.sep) + + # call + p = custom_popen(cmd) + p.communicate() + +## +## Utility classes +## + +class UnicodeFilename: + """Handle unicode filename decompression""" + + def __init__(self, name, encdata): + self.std_name = bytearray(name) + self.encdata = bytearray(encdata) + self.pos = self.encpos = 0 + self.buf = bytearray() + self.failed = 0 + + def enc_byte(self): + try: + c = self.encdata[self.encpos] + self.encpos += 1 + return c + except IndexError: + self.failed = 1 + return 0 + + def std_byte(self): + try: + return self.std_name[self.pos] + except IndexError: + self.failed = 1 + return ord('?') + + def put(self, lo, hi): + self.buf.append(lo) + self.buf.append(hi) + self.pos += 1 + + def decode(self): + hi = self.enc_byte() + flagbits = 0 + while self.encpos < len(self.encdata): + if flagbits == 0: + flags = self.enc_byte() + flagbits = 8 + flagbits -= 2 + t = (flags >> flagbits) & 3 + if t == 0: + self.put(self.enc_byte(), 0) + elif t == 1: + self.put(self.enc_byte(), hi) + elif t == 2: + self.put(self.enc_byte(), self.enc_byte()) + else: + n = self.enc_byte() + if n & 0x80: + c = self.enc_byte() + for i in range((n & 0x7f) + 2): + lo = (self.std_byte() + c) & 0xFF + self.put(lo, hi) + else: + for i in range(n + 2): + self.put(self.std_byte(), 0) + return self.buf.decode("utf-16le", "replace") + + +class RarExtFile(RawIOBase): + """Base class for 'file-like' object that RarFile.open() returns. + + Provides public methods and common crc checking. + + Behaviour: + - no short reads - .read() and .readinfo() read as much as requested. + - no internal buffer, use io.BufferedReader for that. + + @ivar name: + filename of the archive entry. + """ + + def __init__(self, rf, inf): + """Fill common fields""" + + RawIOBase.__init__(self) + + # standard io.* properties + self.name = inf.filename + self.mode = 'rb' + + self.rf = rf + self.inf = inf + self.crc_check = rf._crc_check + self.fd = None + self.CRC = 0 + self.remain = 0 + + self._open() + + def _open(self): + if self.fd: + self.fd.close() + self.fd = None + self.CRC = 0 + self.remain = self.inf.file_size + + def read(self, cnt = None): + """Read all or specified amount of data from archive entry.""" + + # sanitize cnt + if cnt is None or cnt < 0: + cnt = self.remain + elif cnt > self.remain: + cnt = self.remain + if cnt == 0: + return EMPTY + + # actual read + data = self._read(cnt) + if data: + self.CRC = crc32(data, self.CRC) + self.remain -= len(data) + + # done? + if not data or self.remain == 0: + #self.close() + self._check() + return data + + def _check(self): + """Check final CRC.""" + if not self.crc_check: + return + if self.remain != 0: + raise BadRarFile("Failed the read enough data") + crc = self.CRC + if crc < 0: + crc += (long(1) << 32) + if crc != self.inf.CRC: + raise BadRarFile("Corrupt file - CRC check failed: " + self.inf.filename) + + def _read(self, cnt): + """Actual read that gets sanitized cnt.""" + + def close(self): + """Close open resources.""" + + RawIOBase.close(self) + + if self.fd: + self.fd.close() + self.fd = None + + def __del__(self): + """Hook delete to make sure tempfile is removed.""" + self.close() + + def readinto(self, buf): + """Zero-copy read directly into buffer. + + Returns bytes read. + """ + + data = self.read(len(buf)) + n = len(data) + try: + buf[:n] = data + except TypeError: + import array + if not isinstance(buf, array.array): + raise + buf[:n] = array.array(buf.typecode, data) + return n + + def tell(self): + """Return current reading position in uncompressed data.""" + return self.inf.file_size - self.remain + + def seek(self, ofs, whence = 0): + """Seek in data.""" + + # disable crc check when seeking + self.crc_check = 0 + + fsize = self.inf.file_size + cur_ofs = self.tell() + + if whence == 0: # seek from beginning of file + new_ofs = ofs + elif whence == 1: # seek from current position + new_ofs = cur_ofs + ofs + elif whence == 2: # seek from end of file + new_ofs = fsize + ofs + else: + raise ValueError('Invalid value for whence') + + # sanity check + if new_ofs < 0: + new_ofs = 0 + elif new_ofs > fsize: + new_ofs = fsize + + # do the actual seek + if new_ofs >= cur_ofs: + self._skip(new_ofs - cur_ofs) + else: + # process old data ? + #self._skip(fsize - cur_ofs) + # reopen and seek + self._open() + self._skip(new_ofs) + return self.tell() + + def _skip(self, cnt): + """Read and discard data""" + while cnt > 0: + if cnt > 8192: + buf = self.read(8192) + else: + buf = self.read(cnt) + if not buf: + break + cnt -= len(buf) + + def readable(self): + """Returns True""" + return True + + def seekable(self): + """Returns True""" + return True + + def readall(self): + """Read all remaining data""" + # avoid RawIOBase default impl + return self.read() + + +class PipeReader(RarExtFile): + """Read data from pipe, handle tempfile cleanup.""" + + def __init__(self, rf, inf, cmd, tempfile=None): + self.cmd = cmd + self.proc = None + self.tempfile = tempfile + RarExtFile.__init__(self, rf, inf) + + def _close_proc(self): + if not self.proc: + return + if self.proc.stdout: + self.proc.stdout.close() + if self.proc.stdin: + self.proc.stdin.close() + if self.proc.stderr: + self.proc.stderr.close() + self.proc.wait() + self.proc = None + + def _open(self): + RarExtFile._open(self) + + # stop old process + self._close_proc() + + # launch new process + self.proc = custom_popen(self.cmd) + self.fd = self.proc.stdout + + # avoid situation where unrar waits on stdin + if self.proc.stdin: + self.proc.stdin.close() + + def _read(self, cnt): + """Read from pipe.""" + return self.fd.read(cnt) + + def close(self): + """Close open resources.""" + + self._close_proc() + RarExtFile.close(self) + + if self.tempfile: + try: + os.unlink(self.tempfile) + except OSError: + pass + self.tempfile = None + + if have_memoryview: + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + cnt = len(buf) + if cnt > self.remain: + cnt = self.remain + vbuf = memoryview(buf) + res = self.fd.readinto(vbuf[0:cnt]) + if res: + if self.crc_check: + self.CRC = crc32(vbuf[:res], self.CRC) + self.remain -= res + return res + + +class DirectReader(RarExtFile): + """Read uncompressed data directly from archive.""" + + def _open(self): + RarExtFile._open(self) + + self.volfile = self.inf.volume_file + self.fd = open(self.volfile, "rb", 0) + self.fd.seek(self.inf.header_offset, 0) + self.cur = self.rf._parse_header(self.fd) + self.cur_avail = self.cur.add_size + + def _skip(self, cnt): + """RAR Seek, skipping through rar files to get to correct position + """ + + while cnt > 0: + # next vol needed? + if self.cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self.cur_avail: + cnt -= self.cur_avail + self.remain -= self.cur_avail + self.cur_avail = 0 + else: + self.fd.seek(cnt, 1) + self.cur_avail -= cnt + self.remain -= cnt + cnt = 0 + + def _read(self, cnt): + """Read from potentially multi-volume archive.""" + + buf = EMPTY + while cnt > 0: + # next vol needed? + if self.cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self.cur_avail: + data = self.fd.read(self.cur_avail) + else: + data = self.fd.read(cnt) + if not data: + break + + # got some data + cnt -= len(data) + self.cur_avail -= len(data) + if buf: + buf += data + else: + buf = data + + return buf + + def _open_next(self): + """Proceed to next volume.""" + + # is the file split over archives? + if (self.cur.flags & RAR_FILE_SPLIT_AFTER) == 0: + return False + + if self.fd: + self.fd.close() + self.fd = None + + # open next part + self.volfile = self.rf._next_volname(self.volfile) + fd = open(self.volfile, "rb", 0) + self.fd = fd + + # loop until first file header + while 1: + cur = self.rf._parse_header(fd) + if not cur: + raise BadRarFile("Unexpected EOF") + if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): + if cur.add_size: + fd.seek(cur.add_size, 1) + continue + if cur.orig_filename != self.inf.orig_filename: + raise BadRarFile("Did not found file entry") + self.cur = cur + self.cur_avail = cur.add_size + return True + + if have_memoryview: + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + got = 0 + vbuf = memoryview(buf) + while got < len(buf): + # next vol needed? + if self.cur_avail == 0: + if not self._open_next(): + break + + # lenght for next read + cnt = len(buf) - got + if cnt > self.cur_avail: + cnt = self.cur_avail + + # read into temp view + res = self.fd.readinto(vbuf[got : got + cnt]) + if not res: + break + if self.crc_check: + self.CRC = crc32(vbuf[got : got + res], self.CRC) + self.cur_avail -= res + self.remain -= res + got += res + return got + + +class HeaderDecrypt: + """File-like object that decrypts from another file""" + def __init__(self, f, key, iv): + self.f = f + self.ciph = AES.new(key, AES.MODE_CBC, iv) + self.buf = EMPTY + + def tell(self): + return self.f.tell() + + def read(self, cnt=None): + if cnt > 8*1024: + raise BadRarFile('Bad count to header decrypt - wrong password?') + + # consume old data + if cnt <= len(self.buf): + res = self.buf[:cnt] + self.buf = self.buf[cnt:] + return res + res = self.buf + self.buf = EMPTY + cnt -= len(res) + + # decrypt new data + BLK = self.ciph.block_size + while cnt > 0: + enc = self.f.read(BLK) + if len(enc) < BLK: + break + dec = self.ciph.decrypt(enc) + if cnt >= len(dec): + res += dec + cnt -= len(dec) + else: + res += dec[:cnt] + self.buf = dec[cnt:] + cnt = 0 + + return res + +## +## Utility functions +## + +def rar3_s2k(psw, salt): + """String-to-key hash for RAR3.""" + + seed = psw.encode('utf-16le') + salt + iv = EMPTY + h = sha1() + for i in range(16): + for j in range(0x4000): + cnt = S_LONG.pack(i*0x4000 + j) + h.update(seed + cnt[:3]) + if j == 0: + iv += h.digest()[19:20] + key_be = h.digest()[:16] + key_le = pack("<LLLL", *unpack(">LLLL", key_be)) + return key_le, iv + +def rar_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): + """Decompress blob of compressed data. + + Used for data with non-standard header - eg. comments. + """ + + # already uncompressed? + if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: + return data + + # take only necessary flags + flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) + flags |= RAR_LONG_BLOCK + + # file header + fname = bytes('data', 'ascii') + date = 0 + mode = 0x20 + fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, + date, vers, meth, len(fname), mode) + fhdr += fname + if flags & RAR_FILE_SALT: + if not salt: + return EMPTY + fhdr += salt + + # full header + hlen = S_BLK_HDR.size + len(fhdr) + hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr + hcrc = crc32(hdr[2:]) & 0xFFFF + hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr + + # archive main header + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2+4) + + # decompress via temp rar + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + try: + tmpf.write(RAR_ID + mh + hdr + data) + tmpf.close() + + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + if psw is not None and (flags & RAR_FILE_PASSWORD): + cmd.append("-p" + psw) + else: + cmd.append("-p-") + cmd.append(tmpname) + + p = custom_popen(cmd) + return p.communicate()[0] + finally: + tmpf.close() + os.unlink(tmpname) + +def to_datetime(t): + """Convert 6-part time tuple into datetime object.""" + + if t is None: + return None + + # extract values + year, mon, day, h, m, xs = t + s = int(xs) + us = int(1000000 * (xs - s)) + + # assume the values are valid + try: + return datetime(year, mon, day, h, m, s, us) + except ValueError: + pass + + # sanitize invalid values + MDAY = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + if mon < 1: mon = 1 + if mon > 12: mon = 12 + if day < 1: day = 1 + if day > MDAY[mon]: day = MDAY[mon] + if h > 23: h = 23 + if m > 59: m = 59 + if s > 59: s = 59 + if mon == 2 and day == 29: + try: + return datetime(year, mon, day, h, m, s, us) + except ValueError: + day = 28 + return datetime(year, mon, day, h, m, s, us) + +def parse_dos_time(stamp): + """Parse standard 32-bit DOS timestamp.""" + + sec = stamp & 0x1F; stamp = stamp >> 5 + min = stamp & 0x3F; stamp = stamp >> 6 + hr = stamp & 0x1F; stamp = stamp >> 5 + day = stamp & 0x1F; stamp = stamp >> 5 + mon = stamp & 0x0F; stamp = stamp >> 4 + yr = (stamp & 0x7F) + 1980 + return (yr, mon, day, hr, min, sec * 2) + +def custom_popen(cmd): + """Disconnect cmd from parent fds, read only from stdout.""" + + # needed for py2exe + creationflags = 0 + if sys.platform == 'win32': + creationflags = 0x08000000 # CREATE_NO_WINDOW + + # run command + p = Popen(cmd, bufsize = 0, stdout = PIPE, stdin = PIPE, stderr = STDOUT, + creationflags = creationflags) + return p + diff --git a/resources/Info.plist b/resources/Info.plist new file mode 100644 index 0000000..2115763 --- /dev/null +++ b/resources/Info.plist @@ -0,0 +1,65 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>CFBundleAllowMixedLocalizations</key> + <true/> + <key>CFBundleDevelopmentRegion</key> + <string>English</string> + <key>CFBundleDocumentTypes</key> + <array> + <dict> + <key>CFBundleTypeExtensions</key> + <array> + <string>*</string> + </array> + <key>CFBundleTypeOSTypes</key> + <array> + <string>****</string> + </array> + <key>CFBundleTypeRole</key> + <string>Viewer</string> + </dict> + </array> + <key>CFBundleExecutable</key> + <string>droplet</string> + <key>CFBundleGetInfoString</key> + <string>KindleComicConverter 2.0, Written 2012 by Ciro Mattia Gonano</string> + <key>CFBundleIconFile</key> + <string>droplet</string> + <key>CFBundleIdentifier</key> + <string>com.github.ciromattia.kcc</string> + <key>CFBundleInfoDictionaryVersion</key> + <string>6.0</string> + <key>CFBundleName</key> + <string>KindleComicConverter 1.20</string> + <key>CFBundlePackageType</key> + <string>APPL</string> + <key>CFBundleShortVersionString</key> + <string>1.0</string> + <key>CFBundleSignature</key> + <string>dplt</string> + <key>LSMinimumSystemVersionByArchitecture</key> + <dict> + <key>x86_64</key> + <string>10.6</string> + </dict> + <key>LSRequiresCarbon</key> + <true/> + <key>WindowState</key> + <dict> + <key>dividerCollapsed</key> + <true/> + <key>eventLogLevel</key> + <integer>-1</integer> + <key>name</key> + <string>ScriptWindowState</string> + <key>positionOfDivider</key> + <real>568</real> + <key>savedFrame</key> + <string>144 338 889 690 0 0 1680 1028 </string> + <key>selectedTabView</key> + <string>result</string> + </dict> +</dict> +</plist> diff --git a/resources/Scripts/main.scpt b/resources/Scripts/main.scpt new file mode 100644 index 0000000..48dfd96 --- /dev/null +++ b/resources/Scripts/main.scpt Binary files differdiff --git a/resources/comic2ebook.icns b/resources/comic2ebook.icns new file mode 100644 index 0000000..be1936e --- /dev/null +++ b/resources/comic2ebook.icns Binary files differdiff --git a/resources/description.rtfd/TXT.rtf b/resources/description.rtfd/TXT.rtf new file mode 100644 index 0000000..c8de892 --- /dev/null +++ b/resources/description.rtfd/TXT.rtf @@ -0,0 +1,22 @@ +{\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf340 +{\fonttbl\f0\fnil\fcharset0 Verdana;} +{\colortbl;\red255\green255\blue255;\red76\green78\blue78;} +\pard\tx576\tx1152\tx1728\tx2304\tx2880\tx3456\tx4032\tx4608\tx5184\tx5760\tx6337\tx6913\tx7489\tx8065\tx8641\tx9217\tx9793\tx10369\tx10945\tx11521\tx12097\tx12674\tx13250\tx13826\tx14402\tx14978\tx15554\tx16130\tx16706\tx17282\tx17858\tx18435\tx19011\tx19587\tx20163\tx20739\tx21315\tx21891\tx22467\tx23043\tx23619\tx24195\tx24772\tx25348\tx25924\tx26500\tx27076\tx27652\tx28228\tx28804\tx29380\tx29956\tx30532\tx31109\tx31685\tx32261\tx32837\tx33413\tx33989\tx34565\tx35141\tx35717\tx36293\tx36870\tx37446\tx38022\tx38598\tx39174\tx39750\tx40326\tx40902\tx41478\tx42054\tx42630\tx43207\tx43783\tx44359\tx44935\tx45511\tx46087\tx46663\tx47239\tx47815\tx48391\tx48967\tx49544\tx50120\tx50696\tx51272\tx51848\tx52424\tx53000\tx53576\tx54152\tx54728\tx55305\tx55881\tx56457\tx57033\tx57609\li785\fi-786\pardirnatural + +\f0\fs24 \cf2 \CocoaLigature0 Copyright (c) 2012 Ciro Mattia Gonano <[email protected]>\ +\ +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.\ +\ +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.\ +\ +This script heavily relies on KindleStrip (C) by Paul Durrant and released in public domain (http://www.mobileread.com/forums/showthread.php?t=96903)\ +Also, you need to have kindlegen v2.7 (with KF8 support) which is downloadable from Amazon website.\ +\ +Changelog:\ + 1.0: first release\ + 1.10: add CBZ/CBR support to comic2ebook.py\ + 1.11: add CBZ/CBR support to KindleComicConverter\ + 1.2: added image page splitting and optimizations\ +\ +Todo:\ + - bundle a script to manipulate images (to get rid of Mangle/E-nki/whatsoever)} \ No newline at end of file diff --git a/resources/droplet.rsrc b/resources/droplet.rsrc new file mode 100644 index 0000000..f8bf4f2 --- /dev/null +++ b/resources/droplet.rsrc Binary files differdiff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f2628e9 --- /dev/null +++ b/setup.py @@ -0,0 +1,73 @@ +""" +py2app/py2exe build script for MyApplication. + +Will automatically ensure that all build prerequisites are available +via ez_setup + +Usage (Mac OS X): + python setup.py py2app + +Usage (Windows): + python setup.py py2exe +""" +import ez_setup +ez_setup.use_setuptools() + +import sys +from setuptools import setup + +NAME="KindleComicConverter" +VERSION="1.2.0" +IDENTIFIER="com.github.ciromattia.kcc" +EXENAME="KindleComicConverter" + +APP = ['kcc/comic2ebook.py'] +DATA_FILES = [] +OPTIONS = { 'argv_emulation': True, + 'iconfile': 'resources/comic2ebook.icns', + 'includes': 'kcc/*.py'} + +if sys.platform == 'darwin': + extra_options = dict( + setup_requires=['py2app'], + options=dict( + py2app=dict(OPTIONS, + resources=['LICENSE.txt','resources/Scripts','resources/description.rtfd'], + plist=dict( + CFBundleName = NAME, + CFBundleShortVersionString = VERSION, + CFBundleGetInfoString = NAME + " " + VERSION, + CFBundleExecutable = EXENAME, + CFBundleIdentifier = IDENTIFIER, + CFBundleDocumentTypes = dict( + CFBundleTypeExtensions=["zip","rar","cbz","cbr"], + CFBundleTypeName="Comics", + CFBundleTypeRole="Editor", + LSItemContentTypes = [ + "public.plain-text", + "public.text", + "public.data", + "com.apple.application-bundle" + ] + ) + ) + ) + ) + ) +elif sys.platform == 'win32': + extra_options = dict( + setup_requires=['py2exe'], + ) +else: + extra_options = dict( + # Normally unix-like platforms will use "setup.py install" + # and install the main script as such + scripts=APP, + ) + +setup( + name=NAME, + app=APP, + data_files=DATA_FILES, + **extra_options +) |