From 45c1afcad4bff454f99d10e177ee862589bdc3e6 Mon Sep 17 00:00:00 2001 From: Paweł Jastrzębski Date: Fri, 20 Jan 2017 09:44:21 +0100 Subject: Update build environment --- kindlecomicconverter/pdfjpgextract.py | 68 +++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 kindlecomicconverter/pdfjpgextract.py (limited to 'kindlecomicconverter/pdfjpgextract.py') diff --git a/kindlecomicconverter/pdfjpgextract.py b/kindlecomicconverter/pdfjpgextract.py new file mode 100644 index 0000000..90d0643 --- /dev/null +++ b/kindlecomicconverter/pdfjpgextract.py @@ -0,0 +1,68 @@ +# Copyright (c) 2012-2014 Ciro Mattia Gonano +# Copyright (c) 2013-2017 Pawel Jastrzebski +# +# Based upon the code snippet by Ned Batchelder +# (http://nedbatchelder.com/blog/200712/extracting_jpgs_from_pdfs.html) +# +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all +# copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA +# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# + +import os +from random import choice +from string import ascii_uppercase, digits + + +class PdfJpgExtract: + def __init__(self, origFileName): + self.origFileName = origFileName + self.filename = os.path.splitext(origFileName) + # noinspection PyUnusedLocal + self.path = self.filename[0] + "-KCC-" + ''.join(choice(ascii_uppercase + digits) for x in range(3)) + + def getPath(self): + return self.path + + def extract(self): + pdf = open(self.origFileName, "rb").read() + startmark = b"\xff\xd8" + startfix = 0 + endmark = b"\xff\xd9" + endfix = 2 + i = 0 + njpg = 0 + os.makedirs(self.path) + while True: + istream = pdf.find(b"stream", i) + if istream < 0: + break + istart = pdf.find(startmark, istream, istream + 20) + if istart < 0: + i = istream + 20 + continue + iend = pdf.find(b"endstream", istart) + if iend < 0: + raise Exception("Didn't find end of stream!") + iend = pdf.find(endmark, iend - 20) + if iend < 0: + raise Exception("Didn't find end of JPG!") + istart += startfix + iend += endfix + jpg = pdf[istart:iend] + jpgfile = open(self.path + "/jpg%d.jpg" % njpg, "wb") + jpgfile.write(jpg) + jpgfile.close() + njpg += 1 + i = iend + return self.path, njpg -- cgit 1.4.1