My favorites | Sign in
Project Home Downloads Wiki Issues Source
Repository:
Checkout   Browse   Changes   Clones    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Library: pyflowctrl
# Module: xml2dict1
# Dependency: core3
# Examples:
#

import re
from xml.etree import cElementTree

from core3 import WAITING, PROCESSING
from core3 import EmptyStream, Stream, Process, ProcessFlow

class xml2dict(Process):
def __init__(self, remove_xmlns=True):
super(xml2dict, self).__init__()
self.io = {
'input': Stream(),
'output': Stream(),
}
self.remove_xmlns = remove_xmlns

@staticmethod
def _removeEmptyStr(s):
if isinstance(s, str):
return s.rstrip().lstrip()
else:
return s

def doc2dict(self, document):
return self._handleElement(document, {})

def _handleElement(self, element, elementDict):
''' element handling '''

tag = self._removeEmptyStr(element.tag)
text = self._removeEmptyStr(element.text)

elementDict[tag] = {}
if len(text):
elementDict[tag]['$'] = text

for k,v in element.attrib.items():
elementDict[tag]['@%s' % k] = v

for child in element:
child_data = self._handleElement(child, {})
for k in child_data.keys():
if k not in elementDict[tag]:
elementDict[tag][k] = child_data[k]
else:
if isinstance(elementDict[tag][k], dict):
t = elementDict[tag][k]
elementDict[tag][k] = [t,]
elementDict[tag][k].append(child_data[k])
return elementDict

def main(self):
while True:
try:
(filename, xml_data) = self.io['input'].get()
except EmptyStream:
yield WAITING
continue

# remove xmlns
if self.remove_xmlns:
xml_data = re.sub(r'xmlns=".+?"','', xml_data)

self.io['output'].put((filename, self.doc2dict(cElementTree.fromstring(xml_data))))
yield

Change log

8ab3b50418f8 by ownport <ownport> on Aug 21, 2011   Diff
pyflowctrl: added process for conversion
xml data to dictionary (xml2dict1.py)
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 2167 bytes, 74 lines
Powered by Google Project Hosting