1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
# -*- coding: utf-8 -*-
#
# Copyright (c) 2013-2019 Pawel Jastrzebski <pawelj@iosphe.re>
#
# Permission to use, copy, modify, and/or distribute this software for
# any purpose with or without fee is hereby granted, provided that the
# above copyright notice and this permission notice appear in all
# copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
# OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
import os
from xml.dom.minidom import parse, Document
from tempfile import mkdtemp
from shutil import rmtree
from . import comicarchive
class MetadataParser:
def __init__(self, source):
self.source = source
self.data = {'Series': '',
'Volume': '',
'Number': '',
'Writers': [],
'Pencillers': [],
'Inkers': [],
'Colorists': [],
'Summary': '',
'Bookmarks': []}
self.rawdata = None
self.format = None
if self.source.endswith('.xml') and os.path.exists(self.source):
self.rawdata = parse(self.source)
elif not self.source.endswith('.xml'):
try:
cbx = comicarchive.ComicArchive(self.source)
self.rawdata = cbx.extractMetadata()
self.format = cbx.type
except OSError as e:
raise UserWarning(e.strerror)
if self.rawdata:
self.parseXML()
def parseXML(self):
if len(self.rawdata.getElementsByTagName('Series')) != 0:
self.data['Series'] = self.rawdata.getElementsByTagName('Series')[0].firstChild.nodeValue
if len(self.rawdata.getElementsByTagName('Volume')) != 0:
self.data['Volume'] = self.rawdata.getElementsByTagName('Volume')[0].firstChild.nodeValue
if len(self.rawdata.getElementsByTagName('Number')) != 0:
self.data['Number'] = self.rawdata.getElementsByTagName('Number')[0].firstChild.nodeValue
if len(self.rawdata.getElementsByTagName('Summary')) != 0:
self.data['Summary'] = self.rawdata.getElementsByTagName('Summary')[0].firstChild.nodeValue
for field in ['Writer', 'Penciller', 'Inker', 'Colorist']:
if len(self.rawdata.getElementsByTagName(field)) != 0:
for person in self.rawdata.getElementsByTagName(field)[0].firstChild.nodeValue.split(', '):
self.data[field + 's'].append(person)
self.data[field + 's'] = list(set(self.data[field + 's']))
self.data[field + 's'].sort()
if len(self.rawdata.getElementsByTagName('Page')) != 0:
for page in self.rawdata.getElementsByTagName('Page'):
if 'Bookmark' in page.attributes and 'Image' in page.attributes:
self.data['Bookmarks'].append((int(page.attributes['Image'].value),
page.attributes['Bookmark'].value))
def saveXML(self):
if self.rawdata:
root = self.rawdata.getElementsByTagName('ComicInfo')[0]
for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']],
['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])],
['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])],
['Colorist', ', '.join(self.data['Colorists'])], ['Summary', self.data['Summary']]):
if self.rawdata.getElementsByTagName(row[0]):
node = self.rawdata.getElementsByTagName(row[0])[0]
if row[1]:
node.firstChild.replaceWholeText(row[1])
else:
root.removeChild(node)
elif row[1]:
main = self.rawdata.createElement(row[0])
root.appendChild(main)
text = self.rawdata.createTextNode(row[1])
main.appendChild(text)
else:
doc = Document()
root = doc.createElement('ComicInfo')
root.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
root.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
doc.appendChild(root)
for row in (['Series', self.data['Series']], ['Volume', self.data['Volume']],
['Number', self.data['Number']], ['Writer', ', '.join(self.data['Writers'])],
['Penciller', ', '.join(self.data['Pencillers'])], ['Inker', ', '.join(self.data['Inkers'])],
['Colorist', ', '.join(self.data['Colorists'])], ['Summary', self.data['Summary']]):
if row[1]:
main = doc.createElement(row[0])
root.appendChild(main)
text = doc.createTextNode(row[1])
main.appendChild(text)
self.rawdata = doc
if self.source.endswith('.xml'):
with open(self.source, 'w', encoding='utf-8') as f:
self.rawdata.writexml(f, encoding='utf-8')
else:
workdir = mkdtemp('', 'KCC-')
tmpXML = os.path.join(workdir, 'ComicInfo.xml')
with open(tmpXML, 'w', encoding='utf-8') as f:
self.rawdata.writexml(f, encoding='utf-8')
try:
cbx = comicarchive.ComicArchive(self.source)
cbx.addFile(tmpXML)
except OSError as e:
raise UserWarning(e.strerror)
rmtree(workdir)
|