Add PDF jpg image extraction (fixes #2)

More work on GUI with Tkinter.
author: Ciro Mattia Gonano <ciromattia@gmail.com> 2013-01-14 12:53:13 +0100
committer: Ciro Mattia Gonano <ciromattia@gmail.com> 2013-01-14 12:53:13 +0100
commit: 2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65 (patch)
tree: 8d8132798df0d1f389ef4e1bdd0e6e0c074dfb62
parent: Fixed an issue in OPF generation for device resolution (fixes #4) (diff)
download: kcc-2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65.tar.gz
kcc-2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65.tar.bz2
kcc-2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65.zip
5 files changed, 187 insertions, 41 deletions
diff --git a/KindleComicConverter.app/Contents/Resources/comic2ebook.py b/KindleComicConverter.app/Contents/Resources/comic2ebook.py
index 0b410f7..05f50db 100755
--- a/KindleComicConverter.app/Contents/Resources/comic2ebook.py
+++ b/KindleComicConverter.app/Contents/Resources/comic2ebook.py
@@ -38,7 +38,7 @@ __version__ = '1.30'
 import os
 import sys
 from optparse import OptionParser
-import image, cbxarchive
+import image, cbxarchive, pdfjpgextract
 
 class HTMLbuilder:
 
@@ -149,15 +149,21 @@ def main(argv=None):
                       help="Comic title")
     parser.add_option("-m", "--manga-style", action="store_true", dest="righttoleft", default=False,
                       help="Split pages 'manga style' (right-to-left reading)")
-    options, args = parser.parse_args()
+    options, args = parser.parse_args(argv)
     if len(args) != 1:
         parser.print_help()
-        sys.exit(1)
+        return
     dir = args[0]
-    cbx = cbxarchive.CBxArchive(dir)
-    if cbx.isCbxFile():
-        cbx.extract()
-        dir = cbx.getPath()
+    fname = os.path.splitext(dir)
+    if (fname[1].lower() == '.pdf'):
+        pdf = pdfjpgextract.PdfJpgExtract(dir)
+        pdf.extract()
+        dir = pdf.getPath()
+    else:
+        cbx = cbxarchive.CBxArchive(dir)
+        if cbx.isCbxFile():
+            cbx.extract()
+            dir = cbx.getPath()
     filelist = []
     try:
         print "Splitting double pages..."
@@ -192,5 +198,5 @@ def main(argv=None):
 
 if __name__ == "__main__":
     Copyright()
-    main()
+    main(sys.argv[1:])
     sys.exit(0)
diff --git a/kcc.py b/kcc.py
index 2d468c5..636b428 100644
--- a/kcc.py
+++ b/kcc.py
@@ -31,14 +31,11 @@
 #   - Improve error reporting
 #   - recurse into dirtree for multiple comics
 
-__version__ = '1.30'
+__version__ = '2.0'
 
 from Tkinter import *
 from kcc import gui
 
 root = Tk()
-app = gui.MainWindow(master=root)
-app.master.title("Kindle Comic Converter v" + __version__)
-app.master.maxsize(1000, 400)
-app.mainloop()
-root.destroy()
+app = gui.MainWindow(master=root,title="Kindle Comic Converter v" + __version__)
+root.mainloop()
diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py
index 0b410f7..05f50db 100755
--- a/kcc/comic2ebook.py
+++ b/kcc/comic2ebook.py
@@ -38,7 +38,7 @@ __version__ = '1.30'
 import os
 import sys
 from optparse import OptionParser
-import image, cbxarchive
+import image, cbxarchive, pdfjpgextract
 
 class HTMLbuilder:
 
@@ -149,15 +149,21 @@ def main(argv=None):
                       help="Comic title")
     parser.add_option("-m", "--manga-style", action="store_true", dest="righttoleft", default=False,
                       help="Split pages 'manga style' (right-to-left reading)")
-    options, args = parser.parse_args()
+    options, args = parser.parse_args(argv)
     if len(args) != 1:
         parser.print_help()
-        sys.exit(1)
+        return
     dir = args[0]
-    cbx = cbxarchive.CBxArchive(dir)
-    if cbx.isCbxFile():
-        cbx.extract()
-        dir = cbx.getPath()
+    fname = os.path.splitext(dir)
+    if (fname[1].lower() == '.pdf'):
+        pdf = pdfjpgextract.PdfJpgExtract(dir)
+        pdf.extract()
+        dir = pdf.getPath()
+    else:
+        cbx = cbxarchive.CBxArchive(dir)
+        if cbx.isCbxFile():
+            cbx.extract()
+            dir = cbx.getPath()
     filelist = []
     try:
         print "Splitting double pages..."
@@ -192,5 +198,5 @@ def main(argv=None):
 
 if __name__ == "__main__":
     Copyright()
-    main()
+    main(sys.argv[1:])
     sys.exit(0)
diff --git a/kcc/gui.py b/kcc/gui.py
index ab685e9..a9b24f6 100644
--- a/kcc/gui.py
+++ b/kcc/gui.py
@@ -18,26 +18,86 @@
 
 from Tkinter import *
 import tkFileDialog
+import comic2ebook
+from image import ProfileData
+
+class MainWindow:
+
+    def clear_files(self):
+        self.files = []
+        self.refresh_list()
 
-class MainWindow(Frame):
     def open_files(self):
-        self.files = tkFileDialog.askopenfilename()
-
-    def createWidgets(self):
-        self.QUIT = Button(self)
-        self.QUIT["text"] = "Quit"
-        self.QUIT["fg"]   = "red"
-        self.QUIT["command"] =  self.quit
-        self.QUIT.pack({"side": "right"})
-
-        self.OPENFILES = Button(self)
-        self.OPENFILES["text"] = "Open files",
-        self.OPENFILES["command"] = self.open_files
-        self.OPENFILES.pack({"side": "left"})
-
-    def __init__(self, master=None):
-        Frame.__init__(self, master)
-        self.pack()
-        self.createWidgets()
+        filetypes = [('all files', '.*'), ('Comic files', ('*.cbr','*.cbz','*.zip','*.rar'))]
+        f = tkFileDialog.askopenfilenames(title="Choose a file...",filetypes=filetypes)
+        if (isinstance(f,tuple) == False):
+            try:
+                import re
+                f = re.findall('\{(.*?)\}', f)
+            except:
+                import tkMessageBox
+                tkMessageBox.showerror(
+                    "Open file",
+                    "askopenfilename() returned other than a tuple and no regex module could be found"
+                )
+                sys.exit(1)
+        self.files.extend(f)
+        self.refresh_list()
+
+    def open_folder(self):
+        self.files = tkFileDialog.askdirectory(title="Choose a folder...")
+        self.refresh_list()
+
+    def refresh_list(self):
+        self.filelocation.config(state=NORMAL)
+        self.filelocation.delete(0, END)
+        for file in self.files:
+            self.filelocation.insert(END, file)
+        self.filelocation.config(state=DISABLED)
+
+    def initialize(self):
+        self.filelocation = Listbox(self.master)
+        self.filelocation.pack(fill=BOTH, expand=1)
+        self.refresh_list()
+
+        self.clear_file = Button(self.master, text="Clear files", command=self.clear_files)
+        self.clear_file.pack(side=LEFT)
+        self.open_file = Button(self.master, text="Add files...", command=self.open_files)
+        self.open_file.pack(side=LEFT)
+        self.open_folder = Button(self.master, text="Add folder...", command=self.open_folder)
+        self.open_folder.pack(side=LEFT)
+
+        self.profile = StringVar()
+        self.profile.set("KHD")
+        for text in ProfileData.Profiles:
+            b = Radiobutton(self.master, text=text,
+                            variable=self.profile, value=text)
+            b.pack(anchor=W,fill=BOTH)
+
+        self.mangastyle = BooleanVar()
+        self.mangastyle = False
+        self.c = Checkbutton(self.master, text="Split manga-style (right-to-left reading)",
+                             variable=self.mangastyle)
+        self.c.pack()
+
+        #now for a button
+        self.submit = Button(self.master, text="Execute!", command=self.convert, fg="red")
+        self.submit.pack()
+
+    def convert(self):
+        argv = ["-p",self.profile.get()]
+        if (self.mangastyle == True):
+            argv.append("-m")
+        for entry in self.files:
+            subargv = list(argv)
+            subargv.append(entry)
+            comic2ebook.main(subargv)
+        print "Done!"
+
+    def __init__(self, master, title):
+        self.files = []
+        self.master = master
+        self.master.title(title)
+        self.initialize()
 
 
diff --git a/kcc/pdfjpgextract.py b/kcc/pdfjpgextract.py
new file mode 100644
index 0000000..0787100
--- /dev/null
+++ b/kcc/pdfjpgextract.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2012 Ciro Mattia Gonano <ciromattia@gmail.com>
+#
+# Based upon the code snippet by Ned Batchelder
+# (http://nedbatchelder.com/blog/200712/extracting_jpgs_from_pdfs.html)
+#
+# Permission to use, copy, modify, and/or distribute this software for
+# any purpose with or without fee is hereby granted, provided that the
+# above copyright notice and this permission notice appear in all
+# copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
+# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+#
+__version__ = '1.0'
+
+import os
+
+class PdfJpgExtract:
+    def __init__(self, origFileName):
+        self.cbxexts = ['.zip','.cbz','.rar','.cbr']
+        self.origFileName = origFileName
+        self.filename = os.path.splitext(origFileName)
+        self.path = self.filename[0]
+
+    def getPath(self):
+        return self.path
+
+    def extract(self):
+        pdf = file(self.origFileName, "rb").read()
+
+        startmark = "\xff\xd8"
+        startfix = 0
+        endmark = "\xff\xd9"
+        endfix = 2
+        i = 0
+
+        njpg = 0
+        os.makedirs(self.path)
+        while True:
+            istream = pdf.find("stream", i)
+            if istream < 0:
+                break
+            istart = pdf.find(startmark, istream, istream+20)
+            if istart < 0:
+                i = istream+20
+                continue
+            iend = pdf.find("endstream", istart)
+            if iend < 0:
+                raise Exception("Didn't find end of stream!")
+            iend = pdf.find(endmark, iend-20)
+            if iend < 0:
+                raise Exception("Didn't find end of JPG!")
+
+            istart += startfix
+            iend += endfix
+            print "JPG %d from %d to %d" % (njpg, istart, iend)
+            jpg = pdf[istart:iend]
+            jpgfile = file(self.path+"/jpg%d.jpg" % njpg, "wb")
+            jpgfile.write(jpg)
+            jpgfile.close()
+
+            njpg += 1
+            i = iend
+
+        dir = os.listdir(self.path)
+        if len(dir) == 1:
+            import shutil
+            for f in os.listdir(self.path + "/" + dir[0]):
+                shutil.move(self.path + "/" + dir[0] + "/" + f, self.path)
+            os.rmdir(self.path + "/" + dir[0])
+
author	Ciro Mattia Gonano <ciromattia@gmail.com>	2013-01-14 12:53:13 +0100
committer	Ciro Mattia Gonano <ciromattia@gmail.com>	2013-01-14 12:53:13 +0100
commit	2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65 (patch)
tree	8d8132798df0d1f389ef4e1bdd0e6e0c074dfb62
parent	Fixed an issue in OPF generation for device resolution (fixes #4) (diff)
download	kcc-2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65.tar.gz kcc-2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65.tar.bz2 kcc-2a7b2c9e3d301a4f22270e5c2ab10e19d3bcbc65.zip