diff options
author | Paweł Jastrzębski <pawelj@vulturis.eu> | 2014-05-05 10:42:16 +0200 |
---|---|---|
committer | Paweł Jastrzębski <pawelj@vulturis.eu> | 2014-05-05 10:42:16 +0200 |
commit | 7f80dacea8f2a0e454badbca765e3c48c1481a17 (patch) | |
tree | 2cc93ecbcf92fcdba6f03ef9a70bf33e76fdbe8b | |
parent | Preliminary implementation of DualMetaFix (diff) | |
download | kcc-7f80dacea8f2a0e454badbca765e3c48c1481a17.tar.gz kcc-7f80dacea8f2a0e454badbca765e3c48c1481a17.tar.bz2 kcc-7f80dacea8f2a0e454badbca765e3c48c1481a17.zip |
Finished implementation of DualMetaFix
-rw-r--r-- | kcc/KCC_gui.py | 34 | ||||
-rw-r--r-- | kcc/dualmetafix.py | 19 | ||||
-rw-r--r-- | kcc/kindlesplit.py | 382 |
3 files changed, 24 insertions, 411 deletions
diff --git a/kcc/KCC_gui.py b/kcc/KCC_gui.py index e7cb18e..75d3aff 100644 --- a/kcc/KCC_gui.py +++ b/kcc/KCC_gui.py @@ -39,7 +39,7 @@ from html.parser import HTMLParser from psutil import virtual_memory, Popen, Process from .shared import md5Checksum from . import comic2ebook -from . import kindlesplit +from . import dualmetafix from . import KCC_rc_web if sys.platform.startswith('darwin'): from . import KCC_ui_osx as KCC_ui @@ -303,27 +303,20 @@ class KindleGenThread(QtCore.QRunnable): self.signals.result.emit([kindlegenErrorCode, kindlegenError, self.work]) -class KindleUnpackThread(QtCore.QRunnable): +class DualMetaFixThread(QtCore.QRunnable): def __init__(self, batch): - super(KindleUnpackThread, self).__init__() + super(DualMetaFixThread, self).__init__() self.signals = WorkerSignals() self.work = batch def run(self): - item = self.work[0] - profile = self.work[1] + item = self.work os.remove(item) mobiPath = item.replace('.epub', '.mobi') move(mobiPath, mobiPath + '_toclean') try: - # MOBI file produced by KindleGen is hybrid. KF8 + M7 + Source header - # KindleSplit is removing redundant data as we need only KF8 part for new Kindle models - if profile in ['K345', 'KHD', 'KF', 'KFHD', 'KFHD8', 'KFHDX', 'KFHDX8', 'KFA']: - newKindle = True - else: - newKindle = False - mobisplit = kindlesplit.mobi_split(mobiPath + '_toclean', newKindle) - open(mobiPath, 'wb').write(mobisplit.getResult()) + mobiedit = dualmetafix.DualMobiMetaFix(mobiPath + '_toclean') + open(mobiPath, 'wb').write(mobiedit.getresult()) self.signals.result.emit([True]) except Exception as err: self.signals.result.emit([False, format(err)]) @@ -501,14 +494,13 @@ class WorkerThread(QtCore.QThread): if self.kindlegenErrorCode[0] == 0: GUI.progress.content = '' MW.addMessage.emit('Creating MOBI files... <b>Done!</b>', 'info', True) - MW.addMessage.emit('Cleaning MOBI files', 'info', False) - GUI.progress.content = 'Cleaning MOBI files' + MW.addMessage.emit('Processing MOBI files', 'info', False) + GUI.progress.content = 'Processing MOBI files' self.workerOutput = [] - # Multithreading KindleUnpack in current form is a waste of resources. - # Unless we higly optimise KindleUnpack or drop 32bit support this will not change. + # DualMetaFix is very fast and there is not reason to use multithreading. self.pool.setMaxThreadCount(1) for item in outputPath: - worker = KindleUnpackThread([item, profile]) + worker = DualMetaFixThread(item) worker.signals.result.connect(self.addResult) self.pool.start(worker) self.pool.waitForDone() @@ -529,7 +521,7 @@ class WorkerThread(QtCore.QThread): except Exception: pass GUI.completedWork[os.path.basename(mobiPath)] = mobiPath - MW.addMessage.emit('Cleaning MOBI files... <b>Done!</b>', 'info', True) + MW.addMessage.emit('Processing MOBI files... <b>Done!</b>', 'info', True) else: GUI.progress.content = '' for item in outputPath: @@ -538,8 +530,8 @@ class WorkerThread(QtCore.QThread): os.remove(mobiPath) if os.path.exists(mobiPath + '_toclean'): os.remove(mobiPath + '_toclean') - MW.addMessage.emit('KindleUnpack failed to clean MOBI file!', 'error', False) - MW.addTrayMessage.emit('KindleUnpack failed to clean MOBI file!', 'Critical') + MW.addMessage.emit('Failed to process MOBI file!', 'error', False) + MW.addTrayMessage.emit('Failed to process MOBI file!', 'Critical') else: GUI.progress.content = '' epubSize = (os.path.getsize(self.kindlegenErrorCode[2]))//1024//1024 diff --git a/kcc/dualmetafix.py b/kcc/dualmetafix.py index db07933..b8c3d06 100644 --- a/kcc/dualmetafix.py +++ b/kcc/dualmetafix.py @@ -17,6 +17,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import struct +from uuid import uuid4 class DualMetaFixException(Exception): @@ -70,7 +71,7 @@ def replacesection(datain, secno, secdata): if len(secdata) != seclen: raise DualMetaFixException('section length change in replacesection') datalst = [datain[0:secstart], secdata, datain[secend:]] - dataout = b"".join(datalst) + dataout = b''.join(datalst) return dataout @@ -135,9 +136,11 @@ def del_exth(rec0, exth_num): class DualMobiMetaFix: - def __init__(self, infile, asin): + def __init__(self, infile): self.datain = open(infile, 'rb').read() self.datain_rec0 = readsection(self.datain, 0) + # noinspection PyArgumentList + self.asin = bytes(uuid4(), 'UTF-8') # in the first mobi header # add 501 to "EBOK", add 113 as asin, add 504 as asin @@ -145,9 +148,9 @@ class DualMobiMetaFix: rec0 = del_exth(rec0, 501) rec0 = del_exth(rec0, 113) rec0 = del_exth(rec0, 504) - rec0 = add_exth(rec0, 501, b"EBOK") - rec0 = add_exth(rec0, 113, asin) - rec0 = add_exth(rec0, 504, asin) + rec0 = add_exth(rec0, 501, b'EBOK') + rec0 = add_exth(rec0, 113, self.asin) + rec0 = add_exth(rec0, 504, self.asin) self.datain = replacesection(self.datain, 0, rec0) ver = getint(self.datain_rec0, mobi_version) @@ -174,9 +177,9 @@ class DualMobiMetaFix: rec0 = del_exth(rec0, 501) rec0 = del_exth(rec0, 113) rec0 = del_exth(rec0, 504) - rec0 = add_exth(rec0, 501, b"EBOK") - rec0 = add_exth(rec0, 113, asin) - rec0 = add_exth(rec0, 504, asin) + rec0 = add_exth(rec0, 501, b'EBOK') + rec0 = add_exth(rec0, 113, self.asin) + rec0 = add_exth(rec0, 504, self.asin) self.datain = replacesection(self.datain, datain_kf8, rec0) def getresult(self): diff --git a/kcc/kindlesplit.py b/kcc/kindlesplit.py deleted file mode 100644 index fb7385b..0000000 --- a/kcc/kindlesplit.py +++ /dev/null @@ -1,382 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Based on initial version of KindleUnpack. Copyright (C) 2009 Charles M. Hannum <root@ihack.net> -# Improvements Copyright (C) 2009-2012 P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding -# Changes for KCC Copyright (C) 2013-2014 Pawel Jastrzebski <pawelj@vulturis.eu> -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -import struct -# from uuid import uuid4 - -# important pdb header offsets -unique_id_seed = 68 -number_of_pdb_records = 76 - -# important palmdoc header offsets -book_length = 4 -book_record_count = 8 -first_pdb_record = 78 - -# important rec0 offsets -length_of_book = 4 -mobi_header_base = 16 -mobi_header_length = 20 -mobi_type = 24 -mobi_version = 36 -first_non_text = 80 -title_offset = 84 -first_image_record = 108 -first_content_index = 192 -last_content_index = 194 -kf8_last_content_index = 192 # for KF8 mobi headers -fcis_index = 200 -flis_index = 208 -srcs_index = 224 -srcs_count = 228 -primary_index = 244 -datp_index = 256 -huffoff = 112 -hufftbloff = 120 - - -def getint(datain, ofs, sz='L'): - i, = struct.unpack_from('>'+sz, datain, ofs) - return i - - -def writeint(datain, ofs, n, length='L'): - if length == 'L': - return datain[:ofs]+struct.pack('>L', n)+datain[ofs+4:] - else: - return datain[:ofs]+struct.pack('>H', n)+datain[ofs+2:] - - -def getsecaddr(datain, secno): - nsec = getint(datain, number_of_pdb_records, 'H') - assert secno >= 0 & secno < nsec, 'secno %d out of range (nsec=%d)' % (secno, nsec) - secstart = getint(datain, first_pdb_record+secno*8) - if secno == nsec-1: - secend = len(datain) - else: - secend = getint(datain, first_pdb_record+(secno+1)*8) - return secstart, secend - - -def readsection(datain, secno): - secstart, secend = getsecaddr(datain, secno) - return datain[secstart:secend] - - -def writesection(datain, secno, secdata): # overwrite, accounting for different length - dataout = deletesectionrange(datain, secno, secno) - return insertsection(dataout, secno, secdata) - - -def nullsection(datain, secno): # make it zero-length without deleting it - datalst = [] - nsec = getint(datain, number_of_pdb_records, 'H') - secstart, secend = getsecaddr(datain, secno) - zerosecstart, zerosecend = getsecaddr(datain, 0) - dif = secend-secstart - datalst.append(datain[:first_pdb_record]) - for i in range(0, secno+1): - ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) - datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) - for i in range(secno+1, nsec): - ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) - ofs -= dif - datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) - lpad = zerosecstart - (first_pdb_record + 8*nsec) - if lpad > 0: - datalst.append(b'\0' * lpad) - datalst.append(datain[zerosecstart: secstart]) - datalst.append(datain[secend:]) - dataout = b"".join(datalst) - return dataout - - -def deletesectionrange(datain, firstsec, lastsec): # delete a range of sections - datalst = [] - firstsecstart, firstsecend = getsecaddr(datain, firstsec) - lastsecstart, lastsecend = getsecaddr(datain, lastsec) - zerosecstart, zerosecend = getsecaddr(datain, 0) - dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1) - nsec = getint(datain, number_of_pdb_records, 'H') - datalst.append(datain[:unique_id_seed]) - datalst.append(struct.pack('>L', 2*(nsec-(lastsec-firstsec+1))+1)) - datalst.append(datain[unique_id_seed+4:number_of_pdb_records]) - datalst.append(struct.pack('>H', nsec-(lastsec-firstsec+1))) - newstart = zerosecstart - 8*(lastsec-firstsec+1) - for i in range(0, firstsec): - ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) - ofs -= 8 * (lastsec - firstsec + 1) - datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) - for i in range(lastsec+1, nsec): - ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) - ofs -= dif - flgval = 2*(i-(lastsec-firstsec+1)) - datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) - lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1))) - if lpad > 0: - datalst.append(b'\0' * lpad) - datalst.append(datain[zerosecstart:firstsecstart]) - datalst.append(datain[lastsecend:]) - dataout = b"".join(datalst) - return dataout - - -def insertsection(datain, secno, secdata): # insert a new section - datalst = [] - nsec = getint(datain, number_of_pdb_records, 'H') - secstart, secend = getsecaddr(datain, secno) - zerosecstart, zerosecend = getsecaddr(datain, 0) - dif = len(secdata) - datalst.append(datain[:unique_id_seed]) - datalst.append(struct.pack('>L', 2*(nsec+1)+1)) - datalst.append(datain[unique_id_seed+4:number_of_pdb_records]) - datalst.append(struct.pack('>H', nsec+1)) - newstart = zerosecstart + 8 - for i in range(0, secno): - ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) - ofs += 8 - datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) - datalst.append(struct.pack('>L', secstart + 8) + struct.pack('>L', (2*secno))) - for i in range(secno, nsec): - ofs, flgval = struct.unpack_from('>2L', datain, first_pdb_record+i*8) - ofs = ofs + dif + 8 - flgval = 2*(i+1) - datalst.append(struct.pack('>L', ofs) + struct.pack('>L', flgval)) - lpad = newstart - (first_pdb_record + 8*(nsec + 1)) - if lpad > 0: - datalst.append(b'\0' * lpad) - datalst.append(datain[zerosecstart:secstart]) - datalst.append(secdata) - datalst.append(datain[secstart:]) - dataout = b"".join(datalst) - return dataout - - -def insertsectionrange(sectionsource, firstsec, lastsec, sectiontarget, targetsec): # insert a range of sections - dataout = sectiontarget - for idx in range(lastsec, firstsec-1, -1): - dataout = insertsection(dataout, targetsec, readsection(sectionsource, idx)) - return dataout - - -def get_exth_params(rec0): - ebase = mobi_header_base + getint(rec0, mobi_header_length) - elen = getint(rec0, ebase+4) - enum = getint(rec0, ebase+8) - return ebase, elen, enum - - -def add_exth(rec0, exth_num, exth_bytes): - ebase, elen, enum = get_exth_params(rec0) - newrecsize = 8+len(exth_bytes) - newrec0 = rec0[0:ebase+4]+struct.pack('>L', elen+newrecsize)+struct.pack('>L', enum+1) +\ - struct.pack('>L', exth_num) + struct.pack('>L', newrecsize)+exth_bytes+rec0[ebase+12:] - newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+newrecsize) - return newrec0 - - -def read_exth(rec0, exth_num): - exth_values = [] - ebase, elen, enum = get_exth_params(rec0) - ebase += 12 - while enum > 0: - exth_id = getint(rec0, ebase) - if exth_id == exth_num: - # We might have multiple exths, so build a list. - exth_values.append(rec0[ebase+8:ebase+getint(rec0, ebase+4)]) - enum -= 1 - ebase = ebase+getint(rec0, ebase+4) - return exth_values - - -def write_exth(rec0, exth_num, exth_bytes): - ebase, elen, enum = get_exth_params(rec0) - ebase_idx = ebase+12 - enum_idx = enum - while enum_idx > 0: - exth_id = getint(rec0, ebase_idx) - if exth_id == exth_num: - dif = len(exth_bytes)+8-getint(rec0, ebase_idx+4) - newrec0 = rec0 - if dif != 0: - newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)+dif) - return newrec0[:ebase+4]+struct.pack('>L', elen+len(exth_bytes)+8-getint(rec0, ebase_idx+4)) +\ - struct.pack('>L', enum)+rec0[ebase+12:ebase_idx+4] +\ - struct.pack('>L', len(exth_bytes)+8)+exth_bytes +\ - rec0[ebase_idx+getint(rec0, ebase_idx+4):] - enum_idx -= 1 - ebase_idx = ebase_idx+getint(rec0, ebase_idx+4) - return rec0 - - -def del_exth(rec0, exth_num): - ebase, elen, enum = get_exth_params(rec0) - ebase_idx = ebase+12 - enum_idx = 0 - while enum_idx < enum: - exth_id = getint(rec0, ebase_idx) - exth_size = getint(rec0, ebase_idx+4) - if exth_id == exth_num: - newrec0 = rec0 - newrec0 = writeint(newrec0, title_offset, getint(newrec0, title_offset)-exth_size) - newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:] - newrec0 = newrec0[0:ebase+4]+struct.pack('>L', elen-exth_size)+struct.pack('>L', enum-1)+newrec0[ebase+12:] - return newrec0 - enum_idx += 1 - ebase_idx = ebase_idx+exth_size - return rec0 - - -class mobi_split: - def __init__(self, infile, newKindle): - try: - datain = open(infile, 'rb').read() - datain_rec0 = readsection(datain, 0) - ver = getint(datain_rec0, mobi_version) - # fake_asin = str(uuid4()) - self.combo = (ver != 8) - if not self.combo: - return - exth121 = read_exth(datain_rec0, 121) - if len(exth121) == 0: - self.combo = False - return - else: - # only pay attention to first exth121 - # (there should only be one) - datain_kf8, = struct.unpack_from('>L', exth121[0], 0) - if datain_kf8 == 0xffffffff: - self.combo = False - return - datain_kfrec0 = readsection(datain, datain_kf8) - firstimage = getint(datain_rec0, first_image_record) - lastimage = getint(datain_rec0, last_content_index, 'H') - - if not newKindle: - # create the standalone mobi7 - num_sec = getint(datain, number_of_pdb_records, 'H') - # remove BOUNDARY up to but not including ELF record - self.result_file = deletesectionrange(datain, datain_kf8-1, num_sec-2) - # check if there are SRCS records and delete them - srcs = getint(datain_rec0, srcs_index) - num_srcs = getint(datain_rec0, srcs_count) - if srcs != 0xffffffff and num_srcs > 0: - self.result_file = deletesectionrange(self.result_file, srcs, srcs+num_srcs-1) - datain_rec0 = writeint(datain_rec0, srcs_index, 0xffffffff) - datain_rec0 = writeint(datain_rec0, srcs_count, 0) - # reset the EXTH 121 KF8 Boundary meta data to 0xffffffff - datain_rec0 = write_exth(datain_rec0, 121, struct.pack('>L', 0xffffffff)) - # datain_rec0 = del_exth(datain_rec0,121) - # datain_rec0 = del_exth(datain_rec0,534) - # don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well - # set the EXTH 129 KF8 Masthead / Cover Image string to the null string - datain_rec0 = write_exth(datain_rec0, 129, b'') - # don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well - - # Make sure we have an ASIN & cdeType set... - # if len(read_exth(datain_rec0, 113)) == 0: - # datain_rec0 = add_exth(datain_rec0, 113, fake_asin) - # if len(read_exth(datain_rec0, 504)) == 0: - # datain_rec0 = add_exth(datain_rec0, 504, fake_asin) - if len(read_exth(datain_rec0, 501)) == 0: - datain_rec0 = add_exth(datain_rec0, 501, b'EBOK') - - # need to reset flags stored in 0x80-0x83 - # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050 - # Bit Flags - # 0x1000 = Bit 12 indicates if embedded fonts are used or not - # 0x0800 = means this Header points to *shared* images/resource/fonts ?? - # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8? - # 0x0040 = exth exists - # 0x0010 = Not sure but this is always set so far - fval, = struct.unpack_from('>L', datain_rec0, 0x80) - # need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts - fval &= 0x07FF - datain_rec0 = datain_rec0[:0x80] + struct.pack('>L', fval) + datain_rec0[0x84:] - self.result_file = writesection(self.result_file, 0, datain_rec0) - if lastimage == 0xffff: - # find the lowest of the next sections and copy up to that. - ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'), - (hufftbloff, 'L')] - for ofs, sz in ofs_list: - n = getint(datain_kfrec0, ofs, sz) - if 0 < n < lastimage: - lastimage = n-1 - - # Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid - for i in range(firstimage, lastimage): - imgsec = readsection(self.result_file, i) - if imgsec[0:4] in ['RESC', 'FONT']: - self.result_file = nullsection(self.result_file, i) - # mobi7 finished - else: - # create standalone mobi8 - self.result_file = deletesectionrange(datain, 0, datain_kf8-1) - target = getint(datain_kfrec0, first_image_record) - self.result_file = insertsectionrange(datain, firstimage, lastimage, self.result_file, target) - datain_kfrec0 = readsection(self.result_file, 0) - - # Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part, - # which then points at garbage in the mobi8 part, and confuses FW 3.4 - kf8starts = read_exth(datain_kfrec0, 116) - # If we have multiple StartOffset, keep only the last one - kf8start_count = len(kf8starts) - while kf8start_count > 1: - kf8start_count -= 1 - datain_kfrec0 = del_exth(datain_kfrec0, 116) - - # update the EXTH 125 KF8 Count of Images/Fonts/Resources - datain_kfrec0 = write_exth(datain_kfrec0, 125, struct.pack('>L', lastimage-firstimage+1)) - - # Same dance for the KF8, we want an ASIN & cdeType :) - # if len(read_exth(datain_kfrec0, 113)) == 0: - # datain_kfrec0 = add_exth(datain_kfrec0, 113, fake_asin) - # if len(read_exth(datain_kfrec0, 504)) == 0: - # datain_kfrec0 = add_exth(datain_kfrec0, 504, fake_asin) - if len(read_exth(datain_kfrec0, 501)) == 0: - datain_kfrec0 = add_exth(datain_kfrec0, 501, b'EBOK') - - # need to reset flags stored in 0x80-0x83 - # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050 - # standalone mobi8 with exth: 0x0050 - # Bit Flags - # 0x1000 = Bit 12 indicates if embedded fonts are used or not - # 0x0800 = means this Header points to *shared* images/resource/fonts ?? - # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8? - # 0x0040 = exth exists - # 0x0010 = Not sure but this is always set so far - fval, = struct.unpack_from('>L', datain_kfrec0, 0x80) - fval &= 0x1FFF - fval |= 0x0800 - datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack('>L', fval) + datain_kfrec0[0x84:] - - # properly update other index pointers that have been shifted by the insertion of images - ofs_list = [(kf8_last_content_index, 'L'), (fcis_index, 'L'), (flis_index, 'L'), (datp_index, 'L'), - (hufftbloff, 'L')] - for ofs, sz in ofs_list: - n = getint(datain_kfrec0, ofs, sz) - if n != 0xffffffff: - datain_kfrec0 = writeint(datain_kfrec0, ofs, n+lastimage-firstimage+1, sz) - self.result_file = writesection(self.result_file, 0, datain_kfrec0) - # mobi8 finished - except Exception: - raise - - def getResult(self): - return self.result_file |