root/gaphor/tags/gaphor-0.12.0/gaphor/parser.py

Revision 2032, 10.9 kB (checked in by wrobe..@pld-linux.org, 1 year ago)

- allow to load files from file and cStringIO.StringIO objects

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 # vim:sw=4:et
2 """Gaphor file reader.
3
4 This module contains only one interesting function:
5
6     parse(filename)
7
8 which returns a dictionary of ID -> <parsed_object> pairs.
9
10 A parsed_object is one of element, canvas or canvasitem.
11
12 A parsed_object contains values and references. values is a dictionary of
13 name -> value pairs. A value contains a string with the value read from the
14 model file. references contains a list of name -> reference_list pairs, where
15 reference_list is a list of ID's.
16
17 element objects can contain a canvas object (which is the case for elements
18 of type Diagram). Each element has a type, which corresponds to a class name
19 in the gaphor.UML module. Elements also have a unique ID, by which they are
20 referered to in the dictionary returned by parse().
21
22 canvas does not have an ID, but contains a list of canvasitems (which is a
23 list of real canvasitem objects, not references).
24
25 canvasitem objects can also contain a list of canvasitems (canvasitems can be
26 nested). They also have a unique ID by which they have been added to the
27 dictionary returned by parse(). Each canvasitem has a type, which maps to a
28 class name in the gaphor.diagram module.
29
30 The generator parse_generator(filename, loader) may be used if the loading
31 takes a long time. The yielded values are the percentage of the file read.
32 """
33
34 __all__ = [ 'parse', 'ParserException' ]
35
36 import os
37 from xml.sax import handler
38 from cStringIO import InputType
39
40 from gaphor.misc.odict import odict
41
42 class base(object):
43     """Simple base class for element, canvas and canvasitem.
44     """
45
46     def __init__(self):
47         self.values = { }
48         self.references = { }
49
50     def __getattr__(self, key):
51         return self[key]
52
53     def __getitem__(self, key):
54         try:
55             return self.values[key]
56         except:
57             return self.references[key]
58
59     def get(self, key):
60         try:
61             return self.__getitem__(key)
62         except:
63             return None
64
65 class element(base):
66     #__slots__ = ('id', 'type', 'canvas', 'values', 'references')
67
68     def __init__(self, id, type):
69         base.__init__(self)
70         self.id = id
71         self.type = type
72         self.canvas = None
73
74 class canvas(base):
75     #__slots__ = ('canvasitems', 'values', 'references')
76
77     def __init__(self):
78         base.__init__(self)
79         self.canvasitems = []
80
81 class canvasitem(base):
82     #__slots__ = ('id', 'type', 'canvasitems', 'values', 'references')
83
84     def __init__(self, id, type):
85         base.__init__(self)
86         self.id = id
87         self.type = type
88         self.canvasitems = []
89
90
91 XMLNS='http://gaphor.sourceforge.net/gaphor'
92
93 class ParserException(Exception):
94     pass
95
96 # Loader state:
97 [ ROOT,         # Expect 'gaphor' element
98   GAPHOR,       # Expect UML classes (tag name is the UML class name)
99   ELEMENT,      # Expect properties of UML object
100   DIAGRAM,      # Expect properties of Diagram object + canvas
101   CANVAS,       # Expect canvas properties + <item> tags
102   ITEM,         # Expect item attributes and nested items
103   ATTR,         # Reading contents of an attribute (such as a <val> or <ref>)
104   VAL,          # Redaing contents of a <val> tag
105   REFLIST,      # In a <reflist>
106   REF           # Reading contents of a <ref> tag
107 ] = xrange(10)
108
109 class GaphorLoader(handler.ContentHandler):
110     """Create a list of elements. an element may contain a canvas and a
111     canvas may contain canvas items. Each element can have values and
112     references to other elements.
113
114     Data read in non-CDATA text is stripped. If a CDATA section is found all
115     non-CDATA text is ignored.
116     """
117
118     def __init__(self):
119         handler.ContentHandler.__init__(self)
120         # make sure all variables are initialized:
121         self.startDocument()
122
123     def push(self, element, state):
124         """Add an element to the item stack.
125         """
126         self.__stack.append((element, state))
127
128     def pop(self):
129         """Return the last item on the stack. The item is removed from
130         the stack.
131         """
132         return self.__stack.pop()[0]
133
134     def peek(self, depth=1):
135         """Return the last item on the stack. The item is not removed.
136         """
137         return self.__stack[-1 * depth][0]
138
139     def state(self):
140         """Return the current state of the parser.
141         """
142         try:
143             return self.__stack[-1][1]
144         except IndexError:
145             return ROOT
146
147     def endDTD(self):
148         pass
149
150     def startDocument(self):
151         """Start of document: all our attributes are initialized.
152         """
153         self.version = None
154         self.gaphor_version = None
155         self.elements = odict() # map id: element/canvasitem
156         self.__stack = []
157         self.value_is_cdata = 0
158         self.cdata = ''
159         # may have 3 states:
160         #  2: simple data, should be stripped
161         #  1: CDATA block,
162         #  0: end CDATA, read no more data till the next element
163         self.in_cdata = 0
164
165     def endDocument(self):
166         if len(self.__stack) != 0:
167             raise ParserException, 'Invalid XML document.'
168
169     def startElement(self, name, attrs):
170         self.cdata = ''
171         self.in_cdata = 2 # initial, just read text
172        
173         state = self.state()
174
175         # Read a element class. The name of the tag is the class name:
176         if state == GAPHOR:
177             id = attrs['id']
178             e = element(id, name)
179             assert id not in self.elements.keys(), '%s already defined' % (id)#, self.elements[id])
180             self.elements[id] = e
181             self.push(e, name == 'Diagram' and DIAGRAM or ELEMENT)
182
183         # Special treatment for the <canvas> tag in a Diagram:
184         elif state == DIAGRAM and name == 'canvas':
185             c = canvas()
186             self.peek().canvas = c
187             self.push(c, CANVAS)
188
189         # Items in a canvas are referenced by the <item> tag:
190         elif state in (CANVAS, ITEM) and name == 'item':
191             id = attrs['id']
192             c = canvasitem(id, attrs['type'])
193             assert id not in self.elements.keys(), '%s already defined' % (id) #, self.elements[id])
194             self.elements[id] = c
195             self.peek().canvasitems.append(c)
196             self.push(c, ITEM)
197
198         # Store the attribute name on the stack, so we can use it later
199         # to store the <ref>, <reflist> or <val> content:
200         elif state in (ELEMENT, DIAGRAM, CANVAS, ITEM):
201             # handle 'normal' attributes
202             # Note that Value may contain CDATA
203             self.push(name, ATTR)
204
205         # Reference list:
206         elif state == ATTR and name == 'reflist':
207             self.push(self.peek(), REFLIST)
208
209         # Reference with multiplicity 1:
210         elif state  == ATTR and name == 'ref':
211             n = self.peek(1)
212             # Fetch the element instance from the stack
213             r = self.peek(2).references[n] = attrs['refid']
214             self.push(None, REF)
215
216         # Reference with multiplicity *:
217         elif state == REFLIST and name == 'ref':
218             n = self.peek(1)
219             # Fetch the element instance from the stack
220             r = self.peek(3).references
221             refid = attrs['refid']
222             try:
223                 r[n].append(refid)
224             except KeyError:
225                 r[n] = [refid]
226             self.push(None, REF)
227
228         # We need to get the text within the <val> tag:
229         elif state == ATTR and name == 'val':
230             self.value_is_cdata = 1
231             self.push(None, VAL)
232
233         # The <gaphor> tag is the toplevel tag:
234         elif state == ROOT and name == 'gaphor':
235             assert attrs['version'] in ('3.0',)
236             self.version = attrs['version']
237             self.gaphor_version = attrs.get('gaphor-version')
238             if not self.gaphor_version:
239                 self.gaphor_version = attrs.get('gaphor_version')
240             self.push(None, GAPHOR)
241
242         else:
243             raise ParserException, 'Invalid XML: tag <%s> not known (state = %s)' % (name, state)
244
245     def endElement(self, name):
246         # Put the text on the value
247         if self.state() == VAL:
248             if self.value_is_cdata:
249                 # Two levels up: the attribute name
250                 n = self.peek(2)
251                 if self.in_cdata == 2:
252                     self.cdata = self.cdata.strip()
253                 # Three levels up: the element instance (element or canvasitem)
254                 self.peek(3).values[n] = self.cdata
255         self.pop()
256
257     def startElementNS(self, name, qname, attrs):
258         #print 'name=', name
259         #print 'qname=', qname
260         #print 'attrs=', attrs
261         if not name[0] or name[0] == XMLNS:
262             a = { }
263             for key, val in attrs.items():
264                 a[key[1]] = val
265             self.startElement(name[1], a)
266
267     def endElementNS(self, name, qname):
268         if not name[0] or name[0] == XMLNS:
269             self.endElement(name[1])
270
271     def characters(self, content):
272         """Read characters."""
273         if self.in_cdata:
274             self.cdata = self.cdata + content
275             #print 'characters: "%s"' % self.cdata
276
277     # Lexical handler stuff:
278
279     def comment(self, comment):
280         #print 'comment: "%s"' % comment
281         pass
282
283     def startCDATA(self):
284         """Start a CDATA section. In case no CDATA section has been read
285         before, the self.cdata is cleared."""
286         if self.in_cdata == 2:
287             self.cdata = ''
288         self.in_cdata = 1
289
290     def endCDATA(self):
291         """End of CDATA section. No more data is read, unless another CDATA
292         section is opened."""
293         self.in_cdata = 0
294
295
296 def parse(filename):
297     """Parse a file and return a dictionary ID:element/canvasitem.
298     """
299     loader = GaphorLoader()
300
301     for x in parse_generator(filename, loader):
302         pass
303     return loader.elements
304
305
306 def parse_generator(filename, loader):
307     """The generator based version of parse().
308     parses the file filename and load it with ContentHandler loader.
309     """
310     assert isinstance(loader, GaphorLoader), 'loader should be a GaphorLoader'
311     from xml.sax import make_parser
312     parser = make_parser()
313
314     parser.setProperty(handler.property_lexical_handler, loader)
315     parser.setFeature(handler.feature_namespaces, 1)
316     parser.setContentHandler(loader)
317
318     for percentage in parse_file(filename, parser):
319         yield percentage
320
321
322 def parse_file(filename, parser):
323     """Parse the file filename with parser.
324     """
325     is_fd = True
326     if isinstance(filename, file):
327         f = filename
328         file_size = os.fstat(f.fileno())[6]
329     elif isinstance(filename, InputType):
330         f = filename
331         data = f.getvalue()
332         file_size = len(data)
333         f.reset()
334     else:
335         file_size = os.stat(filename)[6]
336         f = open(filename, 'rb')
337         is_fd = False
338
339     block_size = 512
340
341     block = f.read(block_size)
342     read_size = len(block)
343     while block:
344         parser.feed(block)
345         block = f.read(block_size)
346         read_size = read_size + len(block)
347         yield (read_size * 100) / file_size
348
349     parser.close()
350     if not is_fd:
351         f.close()
352
353 if __name__ == '__main__':
354     parse('ns.xml')
355     #parser.parse('ns2.xml')
356     #print len(ch.elements)
Note: See TracBrowser for help on using the browser.