| 1 |
|
|---|
| 2 |
"""Gaphor file reader. |
|---|
| 3 |
|
|---|
| 4 |
This module contains only one interesting function: |
|---|
| 5 |
|
|---|
| 6 |
parse(filename) |
|---|
| 7 |
|
|---|
| 8 |
which returns a dictionary of ID -> <parsed_object> pairs. |
|---|
| 9 |
|
|---|
| 10 |
A parsed_object is one of element, canvas or canvasitem. |
|---|
| 11 |
|
|---|
| 12 |
A parsed_object contains values and references. values is a dictionary of |
|---|
| 13 |
name -> value pairs. A value contains a string with the value read from the |
|---|
| 14 |
model file. references contains a list of name -> reference_list pairs, where |
|---|
| 15 |
reference_list is a list of ID's. |
|---|
| 16 |
|
|---|
| 17 |
element objects can contain a canvas object (which is the case for elements |
|---|
| 18 |
of type Diagram). Each element has a type, which corresponds to a class name |
|---|
| 19 |
in the gaphor.UML module. Elements also have a unique ID, by which they are |
|---|
| 20 |
referered to in the dictionary returned by parse(). |
|---|
| 21 |
|
|---|
| 22 |
canvas does not have an ID, but contains a list of canvasitems (which is a |
|---|
| 23 |
list of real canvasitem objects, not references). |
|---|
| 24 |
|
|---|
| 25 |
canvasitem objects can also contain a list of canvasitems (canvasitems can be |
|---|
| 26 |
nested). They also have a unique ID by which they have been added to the |
|---|
| 27 |
dictionary returned by parse(). Each canvasitem has a type, which maps to a |
|---|
| 28 |
class name in the gaphor.diagram module. |
|---|
| 29 |
|
|---|
| 30 |
The generator parse_generator(filename, loader) may be used if the loading |
|---|
| 31 |
takes a long time. The yielded values are the percentage of the file read. |
|---|
| 32 |
""" |
|---|
| 33 |
|
|---|
| 34 |
from __future__ import generators |
|---|
| 35 |
|
|---|
| 36 |
__all__ = [ 'parse', 'ParserException' ] |
|---|
| 37 |
|
|---|
| 38 |
import os |
|---|
| 39 |
from xml.sax import handler |
|---|
| 40 |
|
|---|
| 41 |
import gaphor.misc.odict |
|---|
| 42 |
|
|---|
| 43 |
class base(object): |
|---|
| 44 |
"""Simple base class for element, canvas and canvasitem. |
|---|
| 45 |
""" |
|---|
| 46 |
|
|---|
| 47 |
def __init__(self): |
|---|
| 48 |
self.values = { } |
|---|
| 49 |
self.references = { } |
|---|
| 50 |
|
|---|
| 51 |
def __getattr__(self, key): |
|---|
| 52 |
return self[key] |
|---|
| 53 |
|
|---|
| 54 |
def __getitem__(self, key): |
|---|
| 55 |
try: |
|---|
| 56 |
return self.values[key] |
|---|
| 57 |
except: |
|---|
| 58 |
return self.references[key] |
|---|
| 59 |
|
|---|
| 60 |
def get(self, key): |
|---|
| 61 |
try: |
|---|
| 62 |
return self.__getitem__(key) |
|---|
| 63 |
except: |
|---|
| 64 |
return None |
|---|
| 65 |
|
|---|
| 66 |
class element(base): |
|---|
| 67 |
|
|---|
| 68 |
|
|---|
| 69 |
def __init__(self, id, type): |
|---|
| 70 |
base.__init__(self) |
|---|
| 71 |
self.id = id |
|---|
| 72 |
self.type = type |
|---|
| 73 |
self.canvas = None |
|---|
| 74 |
|
|---|
| 75 |
class canvas(base): |
|---|
| 76 |
|
|---|
| 77 |
|
|---|
| 78 |
def __init__(self): |
|---|
| 79 |
base.__init__(self) |
|---|
| 80 |
self.canvasitems = [] |
|---|
| 81 |
|
|---|
| 82 |
class canvasitem(base): |
|---|
| 83 |
|
|---|
| 84 |
|
|---|
| 85 |
def __init__(self, id, type): |
|---|
| 86 |
base.__init__(self) |
|---|
| 87 |
self.id = id |
|---|
| 88 |
self.type = type |
|---|
| 89 |
self.canvasitems = [] |
|---|
| 90 |
|
|---|
| 91 |
|
|---|
| 92 |
XMLNS='http://gaphor.sourceforge.net/gaphor' |
|---|
| 93 |
|
|---|
| 94 |
class ParserException(Exception): |
|---|
| 95 |
pass |
|---|
| 96 |
|
|---|
| 97 |
|
|---|
| 98 |
[ ROOT, |
|---|
| 99 |
GAPHOR, |
|---|
| 100 |
ELEMENT, |
|---|
| 101 |
DIAGRAM, |
|---|
| 102 |
CANVAS, |
|---|
| 103 |
ITEM, |
|---|
| 104 |
ATTR, |
|---|
| 105 |
VAL, |
|---|
| 106 |
REFLIST, |
|---|
| 107 |
REF |
|---|
| 108 |
] = xrange(10) |
|---|
| 109 |
|
|---|
| 110 |
class GaphorLoader(handler.ContentHandler): |
|---|
| 111 |
"""Create a list of elements. an element may contain a canvas and a |
|---|
| 112 |
canvas may contain canvas items. Each element can have values and |
|---|
| 113 |
references to other elements. |
|---|
| 114 |
|
|---|
| 115 |
Data read in non-CDATA text is stripped. If a CDATA section is found all |
|---|
| 116 |
non-CDATA text is ignored. |
|---|
| 117 |
""" |
|---|
| 118 |
|
|---|
| 119 |
def __init__(self): |
|---|
| 120 |
handler.ContentHandler.__init__(self) |
|---|
| 121 |
|
|---|
| 122 |
self.startDocument() |
|---|
| 123 |
|
|---|
| 124 |
def push(self, element, state): |
|---|
| 125 |
"""Add an element to the item stack. |
|---|
| 126 |
""" |
|---|
| 127 |
self.__stack.append((element, state)) |
|---|
| 128 |
|
|---|
| 129 |
def pop(self): |
|---|
| 130 |
"""Return the last item on the stack. The item is removed from |
|---|
| 131 |
the stack. |
|---|
| 132 |
""" |
|---|
| 133 |
return self.__stack.pop()[0] |
|---|
| 134 |
|
|---|
| 135 |
def peek(self, depth=1): |
|---|
| 136 |
"""Return the last item on the stack. The item is not removed. |
|---|
| 137 |
""" |
|---|
| 138 |
return self.__stack[-1 * depth][0] |
|---|
| 139 |
|
|---|
| 140 |
def state(self): |
|---|
| 141 |
"""Return the current state of the parser. |
|---|
| 142 |
""" |
|---|
| 143 |
try: |
|---|
| 144 |
return self.__stack[-1][1] |
|---|
| 145 |
except IndexError: |
|---|
| 146 |
return ROOT |
|---|
| 147 |
|
|---|
| 148 |
def endDTD(self): |
|---|
| 149 |
pass |
|---|
| 150 |
|
|---|
| 151 |
def startDocument(self): |
|---|
| 152 |
"""Start of document: all our attributes are initialized. |
|---|
| 153 |
""" |
|---|
| 154 |
self.version = None |
|---|
| 155 |
self.gaphor_version = None |
|---|
| 156 |
self.elements = gaphor.misc.odict.odict() |
|---|
| 157 |
self.__stack = [] |
|---|
| 158 |
self.value_is_cdata = 0 |
|---|
| 159 |
self.cdata = '' |
|---|
| 160 |
|
|---|
| 161 |
|
|---|
| 162 |
|
|---|
| 163 |
|
|---|
| 164 |
self.in_cdata = 0 |
|---|
| 165 |
|
|---|
| 166 |
def endDocument(self): |
|---|
| 167 |
if len(self.__stack) != 0: |
|---|
| 168 |
raise ParserException, 'Invalid XML document.' |
|---|
| 169 |
|
|---|
| 170 |
def startElement(self, name, attrs): |
|---|
| 171 |
self.cdata = '' |
|---|
| 172 |
self.in_cdata = 2 |
|---|
| 173 |
|
|---|
| 174 |
state = self.state() |
|---|
| 175 |
|
|---|
| 176 |
|
|---|
| 177 |
if state == GAPHOR: |
|---|
| 178 |
id = attrs['id'] |
|---|
| 179 |
e = element(id, name) |
|---|
| 180 |
assert id not in self.elements.keys(), '%s already defined' % (id) |
|---|
| 181 |
self.elements[id] = e |
|---|
| 182 |
self.push(e, name == 'Diagram' and DIAGRAM or ELEMENT) |
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 |
elif state == DIAGRAM and name == 'canvas': |
|---|
| 186 |
c = canvas() |
|---|
| 187 |
self.peek().canvas = c |
|---|
| 188 |
self.push(c, CANVAS) |
|---|
| 189 |
|
|---|
| 190 |
|
|---|
| 191 |
elif state in (CANVAS, ITEM) and name == 'item': |
|---|
| 192 |
id = attrs['id'] |
|---|
| 193 |
c = canvasitem(id, attrs['type']) |
|---|
| 194 |
assert id not in self.elements.keys(), '%s already defined' % (id) |
|---|
| 195 |
self.elements[id] = c |
|---|
| 196 |
self.peek().canvasitems.append(c) |
|---|
| 197 |
self.push(c, ITEM) |
|---|
| 198 |
|
|---|
| 199 |
|
|---|
| 200 |
|
|---|
| 201 |
elif state in (ELEMENT, DIAGRAM, CANVAS, ITEM): |
|---|
| 202 |
|
|---|
| 203 |
|
|---|
| 204 |
self.push(name, ATTR) |
|---|
| 205 |
|
|---|
| 206 |
|
|---|
| 207 |
elif state == ATTR and name == 'reflist': |
|---|
| 208 |
self.push(self.peek(), REFLIST) |
|---|
| 209 |
|
|---|
| 210 |
|
|---|
| 211 |
elif state == ATTR and name == 'ref': |
|---|
| 212 |
n = self.peek(1) |
|---|
| 213 |
|
|---|
| 214 |
r = self.peek(2).references[n] = attrs['refid'] |
|---|
| 215 |
self.push(None, REF) |
|---|
| 216 |
|
|---|
| 217 |
|
|---|
| 218 |
elif state == REFLIST and name == 'ref': |
|---|
| 219 |
n = self.peek(1) |
|---|
| 220 |
|
|---|
| 221 |
r = self.peek(3).references |
|---|
| 222 |
refid = attrs['refid'] |
|---|
| 223 |
try: |
|---|
| 224 |
r[n].append(refid) |
|---|
| 225 |
except KeyError: |
|---|
| 226 |
r[n] = [refid] |
|---|
| 227 |
self.push(None, REF) |
|---|
| 228 |
|
|---|
| 229 |
|
|---|
| 230 |
elif state == ATTR and name == 'val': |
|---|
| 231 |
self.value_is_cdata = 1 |
|---|
| 232 |
self.push(None, VAL) |
|---|
| 233 |
|
|---|
| 234 |
|
|---|
| 235 |
elif state == ROOT and name == 'gaphor': |
|---|
| 236 |
assert attrs['version'] in ('3.0',) |
|---|
| 237 |
self.version = attrs['version'] |
|---|
| 238 |
self.gaphor_version = attrs.get('gaphor-version') |
|---|
| 239 |
if not self.gaphor_version: |
|---|
| 240 |
self.gaphor_version = attrs.get('gaphor_version') |
|---|
| 241 |
self.push(None, GAPHOR) |
|---|
| 242 |
|
|---|
| 243 |
else: |
|---|
| 244 |
raise ParserException, 'Invalid XML: tag <%s> not known (state = %s)' % (name, state) |
|---|
| 245 |
|
|---|
| 246 |
def endElement(self, name): |
|---|
| 247 |
|
|---|
| 248 |
if self.state() == VAL: |
|---|
| 249 |
if self.value_is_cdata: |
|---|
| 250 |
|
|---|
| 251 |
n = self.peek(2) |
|---|
| 252 |
if self.in_cdata == 2: |
|---|
| 253 |
self.cdata = self.cdata.strip() |
|---|
| 254 |
|
|---|
| 255 |
self.peek(3).values[n] = self.cdata |
|---|
| 256 |
self.pop() |
|---|
| 257 |
|
|---|
| 258 |
def startElementNS(self, name, qname, attrs): |
|---|
| 259 |
|
|---|
| 260 |
|
|---|
| 261 |
|
|---|
| 262 |
if not name[0] or name[0] == XMLNS: |
|---|
| 263 |
a = { } |
|---|
| 264 |
for key, val in attrs.items(): |
|---|
| 265 |
a[key[1]] = val |
|---|
| 266 |
self.startElement(name[1], a) |
|---|
| 267 |
|
|---|
| 268 |
def endElementNS(self, name, qname): |
|---|
| 269 |
if not name[0] or name[0] == XMLNS: |
|---|
| 270 |
self.endElement(name[1]) |
|---|
| 271 |
|
|---|
| 272 |
def characters(self, content): |
|---|
| 273 |
"""Read characters.""" |
|---|
| 274 |
if self.in_cdata: |
|---|
| 275 |
self.cdata = self.cdata + content |
|---|
| 276 |
|
|---|
| 277 |
|
|---|
| 278 |
|
|---|
| 279 |
|
|---|
| 280 |
def comment(self, comment): |
|---|
| 281 |
|
|---|
| 282 |
pass |
|---|
| 283 |
|
|---|
| 284 |
def startCDATA(self): |
|---|
| 285 |
"""Start a CDATA section. In case no CDATA section has been read |
|---|
| 286 |
before, the self.cdata is cleared.""" |
|---|
| 287 |
if self.in_cdata == 2: |
|---|
| 288 |
self.cdata = '' |
|---|
| 289 |
self.in_cdata = 1 |
|---|
| 290 |
|
|---|
| 291 |
def endCDATA(self): |
|---|
| 292 |
"""End of CDATA section. No more data is read, unless another CDATA |
|---|
| 293 |
section is opened.""" |
|---|
| 294 |
self.in_cdata = 0 |
|---|
| 295 |
|
|---|
| 296 |
|
|---|
| 297 |
def parse(filename): |
|---|
| 298 |
"""Parse a file and return a dictionary ID:element/canvasitem. |
|---|
| 299 |
""" |
|---|
| 300 |
loader = GaphorLoader() |
|---|
| 301 |
|
|---|
| 302 |
for x in parse_generator(filename, loader): |
|---|
| 303 |
pass |
|---|
| 304 |
return loader.elements |
|---|
| 305 |
|
|---|
| 306 |
|
|---|
| 307 |
def parse_generator(filename, loader): |
|---|
| 308 |
"""The generator based version of parse(). |
|---|
| 309 |
parses the file filename and load it with ContentHandler loader. |
|---|
| 310 |
""" |
|---|
| 311 |
assert isinstance(loader, GaphorLoader), 'loader should be a GaphorLoader' |
|---|
| 312 |
from xml.sax import make_parser |
|---|
| 313 |
parser = make_parser() |
|---|
| 314 |
|
|---|
| 315 |
parser.setProperty(handler.property_lexical_handler, loader) |
|---|
| 316 |
parser.setFeature(handler.feature_namespaces, 1) |
|---|
| 317 |
parser.setContentHandler(loader) |
|---|
| 318 |
|
|---|
| 319 |
for percentage in parse_file(filename, parser): |
|---|
| 320 |
yield percentage |
|---|
| 321 |
|
|---|
| 322 |
|
|---|
| 323 |
def parse_file(filename, parser): |
|---|
| 324 |
"""Parse the file filename with parser. |
|---|
| 325 |
""" |
|---|
| 326 |
file_size = os.stat(filename)[6] |
|---|
| 327 |
f = open(filename, 'rb') |
|---|
| 328 |
block_size = 512 |
|---|
| 329 |
|
|---|
| 330 |
block = f.read(block_size) |
|---|
| 331 |
read_size = len(block) |
|---|
| 332 |
while block: |
|---|
| 333 |
parser.feed(block) |
|---|
| 334 |
block = f.read(block_size) |
|---|
| 335 |
read_size = read_size + len(block) |
|---|
| 336 |
yield (read_size * 100) / file_size |
|---|
| 337 |
|
|---|
| 338 |
parser.close() |
|---|
| 339 |
f.close() |
|---|
| 340 |
|
|---|
| 341 |
if __name__ == '__main__': |
|---|
| 342 |
parse('ns.xml') |
|---|
| 343 |
|
|---|
| 344 |
|
|---|