diff options
author | Paweł Jastrzębski <pawelj@iosphe.re> | 2015-01-24 18:36:15 +0100 |
---|---|---|
committer | Paweł Jastrzębski <pawelj@iosphe.re> | 2015-01-24 18:36:15 +0100 |
commit | 7d529a2acc05c84c0b4581eb1e5261227515833b (patch) | |
tree | b973670692e18845714fe791f45615fcd27cd8ed | |
parent | Yet another workaround for file lock problems (#125) (diff) | |
download | kcc-7d529a2acc05c84c0b4581eb1e5261227515833b.tar.gz kcc-7d529a2acc05c84c0b4581eb1e5261227515833b.tar.bz2 kcc-7d529a2acc05c84c0b4581eb1e5261227515833b.zip |
Added Metadata editor class
-rwxr-xr-x | kcc/comic2ebook.py | 46 | ||||
-rw-r--r-- | kcc/metadata.py | 159 | ||||
-rw-r--r-- | kcc/shared.py | 17 |
3 files changed, 191 insertions, 31 deletions
diff --git a/kcc/comic2ebook.py b/kcc/comic2ebook.py index c57a351..d1bbeb4 100755 --- a/kcc/comic2ebook.py +++ b/kcc/comic2ebook.py @@ -24,14 +24,13 @@ from copy import copy from glob import glob from json import loads from urllib.request import Request, urlopen -from re import split, sub, compile +from re import split, sub from stat import S_IWRITE, S_IREAD, S_IEXEC from zipfile import ZipFile, ZIP_STORED, ZIP_DEFLATED from tempfile import mkdtemp from shutil import move, copytree, rmtree from optparse import OptionParser, OptionGroup from multiprocessing import Pool -from xml.dom.minidom import parse from uuid import uuid4 from slugify import slugify as slugifyExt from PIL import Image @@ -48,6 +47,7 @@ from . import image from . import cbxarchive from . import pdfjpgextract from . import dualmetafix +from . import metadata from . import __version__ @@ -165,7 +165,7 @@ def buildHTML(path, imgfile, imgfilepath): "'{\"targetId\":\"" + boxes[i] + "-Panel-Parent\", \"ordinal\":" + str(order[i]), "}'></a></div>\n"]) if options.quality == 2: - imgfilepv = str.split(imgfile, ".") + imgfilepv = imgfile.split(".") imgfilepv[0] += "-hq" imgfilepv = ".".join(imgfilepv) else: @@ -641,45 +641,29 @@ def getComicInfo(path, originalPath): defaultTitle = False if os.path.exists(xmlPath): try: - xml = parse(xmlPath) + xml = metadata.MetadataParser(xmlPath) except Exception: os.remove(xmlPath) return options.authors = [] if defaultTitle: - if len(xml.getElementsByTagName('Series')) != 0: - options.title = xml.getElementsByTagName('Series')[0].firstChild.nodeValue - if len(xml.getElementsByTagName('Volume')) != 0: - titleSuffix += ' V' + xml.getElementsByTagName('Volume')[0].firstChild.nodeValue - if len(xml.getElementsByTagName('Number')) != 0: - titleSuffix += ' #' + xml.getElementsByTagName('Number')[0].firstChild.nodeValue + if xml.data['Series']: + options.title = xml.data['Series'] + if xml.data['Volume']: + titleSuffix += ' V' + xml.data['Volume'] + if xml.data['Number']: + titleSuffix += ' #' + xml.data['Number'] options.title += titleSuffix - if len(xml.getElementsByTagName('Writer')) != 0: - authorsTemp = str.split(xml.getElementsByTagName('Writer')[0].firstChild.nodeValue, ', ') - for author in authorsTemp: - options.authors.append(author) - if len(xml.getElementsByTagName('Penciller')) != 0: - authorsTemp = str.split(xml.getElementsByTagName('Penciller')[0].firstChild.nodeValue, ', ') - for author in authorsTemp: - options.authors.append(author) - if len(xml.getElementsByTagName('Inker')) != 0: - authorsTemp = str.split(xml.getElementsByTagName('Inker')[0].firstChild.nodeValue, ', ') - for author in authorsTemp: - options.authors.append(author) - if len(xml.getElementsByTagName('Colorist')) != 0: - authorsTemp = str.split(xml.getElementsByTagName('Colorist')[0].firstChild.nodeValue, ', ') - for author in authorsTemp: - options.authors.append(author) + for field in ['Writers', 'Pencillers', 'Inkers', 'Colorists']: + for person in xml.data[field]: + options.authors.append(person) if len(options.authors) > 0: options.authors = list(set(options.authors)) options.authors.sort() else: options.authors = ['KCC'] - if len(xml.getElementsByTagName('ScanInformation')) != 0: - coverId = xml.getElementsByTagName('ScanInformation')[0].firstChild.nodeValue - coverId = compile('(MCD\\()(\\d+)(\\))').search(coverId) - if coverId: - options.remoteCovers = getCoversFromMCB(coverId.group(2)) + if xml.data['MUid']: + options.remoteCovers = getCoversFromMCB(xml.data['MUid']) os.remove(xmlPath) diff --git a/kcc/metadata.py b/kcc/metadata.py new file mode 100644 index 0000000..f6383f4 --- /dev/null +++ b/kcc/metadata.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2013-2015 Pawel Jastrzebski <pawelj@iosphe.re> +# +# Permission to use, copy, modify, and/or distribute this software for +# any purpose with or without fee is hereby granted, provided that the +# above copyright notice and this permission notice appear in all +# copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE +# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL +# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA +# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +import os +from xml.dom.minidom import parse, Document +from re import compile +from zipfile import is_zipfile, ZipFile, ZIP_DEFLATED +from subprocess import STDOUT, PIPE +from psutil import Popen +from tempfile import mkdtemp +from shutil import rmtree +from .shared import removeFromZIP, check7ZFile as is_7zfile +from . import rarfile + + +class MetadataParser: + def __init__(self, source): + self.source = source + self.data = {'Series': '', + 'Volume': '', + 'Number': '', + 'Writers': [], + 'Pencillers': [], + 'Inkers': [], + 'Colorists': [], + 'MUid': ''} + self.rawdata = None + if self.source.endswith('.xml'): + self.rawdata = parse(self.source) + self.parseXML() + else: + if is_zipfile(self.source): + with ZipFile(self.source) as zip_file: + for member in zip_file.namelist(): + if member != 'ComicInfo.xml': + continue + with zip_file.open(member) as xml_file: + self.rawdata = parse(xml_file) + elif rarfile.is_rarfile(self.source): + with rarfile.RarFile(self.source) as rar_file: + for member in rar_file.namelist(): + if member != 'ComicInfo.xml': + continue + with rar_file.open(member) as xml_file: + self.rawdata = parse(xml_file) + elif is_7zfile(self.source): + workdir = mkdtemp('', 'KCC-TMP-') + tmpXML = os.path.join(workdir, 'ComicInfo.xml') + output = Popen('7za e "' + self.source + '" ComicInfo.xml -o"' + workdir + '"', + stdout=PIPE, stderr=STDOUT, shell=True) + extracted = False + for line in output.stdout: + if b"Everything is Ok" in line: + extracted = True + if not extracted: + rmtree(workdir) + raise OSError + if os.path.isfile(tmpXML): + self.rawdata = parse(tmpXML) + rmtree(workdir) + else: + raise OSError + if self.rawdata: + self.parseXML() + + def parseXML(self): + if len(self.rawdata.getElementsByTagName('Series')) != 0: + self.data['Series'] = self.rawdata.getElementsByTagName('Series')[0].firstChild.nodeValue + if len(self.rawdata.getElementsByTagName('Volume')) != 0: + self.data['Volume'] = self.rawdata.getElementsByTagName('Volume')[0].firstChild.nodeValue + if len(self.rawdata.getElementsByTagName('Number')) != 0: + self.data['Number'] = self.rawdata.getElementsByTagName('Number')[0].firstChild.nodeValue + for field in ['Writer', 'Penciller', 'Inker', 'Colorist']: + if len(self.rawdata.getElementsByTagName(field)) != 0: + for person in self.rawdata.getElementsByTagName(field)[0].firstChild.nodeValue.split(', '): + self.data[field + 's'].append(person) + self.data[field + 's'] = list(set(self.data[field + 's'])) + self.data[field + 's'].sort() + if len(self.rawdata.getElementsByTagName('ScanInformation')) != 0: + coverId = compile('(MCD\\()(\\d+)(\\))')\ + .search(self.rawdata.getElementsByTagName('ScanInformation')[0].firstChild.nodeValue) + if coverId: + self.data['MUid'] = coverId.group(2) + + def saveXML(self): + if self.rawdata: + root = self.rawdata.getElementsByTagName('ComicInfo')[0] + for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']], + ['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])], + ['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])], + ['Colorist', ', '.join(self.data['Colorists'])], + ['ScanInformation', 'MCD(' + self.data['MUid'] + ')' if self.data['MUid'] else '']): + if self.rawdata.getElementsByTagName(row[0]): + node = self.rawdata.getElementsByTagName(row[0])[0] + if row[1]: + node.firstChild.replaceWholeText(row[1]) + else: + root.removeChild(node) + elif row[1]: + main = self.rawdata.createElement(row[0]) + root.appendChild(main) + text = self.rawdata.createTextNode(row[1]) + main.appendChild(text) + else: + doc = Document() + root = doc.createElement('ComicInfo') + root.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema') + root.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance') + doc.appendChild(root) + for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']], + ['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])], + ['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])], + ['Colorist', ', '.join(self.data['Colorists'])], + ['ScanInformation', 'MCD(' + self.data['MUid'] + ')' if self.data['MUid'] else '']): + if row[1]: + main = doc.createElement(row[0]) + root.appendChild(main) + text = doc.createTextNode(row[1]) + main.appendChild(text) + self.rawdata = doc + if self.source.endswith('.xml'): + with open(self.source, 'w') as f: + self.rawdata.writexml(f) + else: + workdir = mkdtemp('', 'KCC-TMP-') + tmpXML = os.path.join(workdir, 'ComicInfo.xml') + with open(tmpXML, 'w') as f: + self.rawdata.writexml(f) + if is_zipfile(self.source): + removeFromZIP(self.source, 'ComicInfo.xml') + with ZipFile(self.source, mode='a', compression=ZIP_DEFLATED) as zip_file: + zip_file.write(tmpXML, arcname=tmpXML.split(os.sep)[-1]) + elif rarfile.is_rarfile(self.source): + raise NotImplementedError + elif is_7zfile(self.source): + output = Popen('7za a "' + self.source + '" "' + tmpXML + '"', stdout=PIPE, stderr=STDOUT, shell=True) + extracted = False + for line in output.stdout: + if b"Everything is Ok" in line: + extracted = True + if not extracted: + rmtree(workdir) + raise OSError + rmtree(workdir) diff --git a/kcc/shared.py b/kcc/shared.py index 59ece74..4558fa1 100644 --- a/kcc/shared.py +++ b/kcc/shared.py @@ -22,6 +22,9 @@ from html.parser import HTMLParser from distutils.version import StrictVersion from scandir import walk from time import sleep +from shutil import rmtree, move +from tempfile import mkdtemp +from zipfile import ZipFile, ZIP_DEFLATED class HTMLStripper(HTMLParser): @@ -87,6 +90,20 @@ def saferReplace(old, new): raise PermissionError +def removeFromZIP(zipfname, *filenames): + tempdir = mkdtemp('', 'KCC-TMP-') + try: + tempname = os.path.join(tempdir, 'KCC-TMP.zip') + with ZipFile(zipfname, 'r') as zipread: + with ZipFile(tempname, 'w', compression=ZIP_DEFLATED) as zipwrite: + for item in zipread.infolist(): + if item.filename not in filenames: + zipwrite.writestr(item, zipread.read(item.filename)) + move(tempname, zipfname) + finally: + rmtree(tempdir) + + # noinspection PyUnresolvedReferences def dependencyCheck(level): missing = [] |