root/gaphor/tags/release-0.10.2/gaphor/parser.py

Revision 1121, 10.6 kB (checked in by arjanmol, 2 years ago)

Merged changed from new-canvas branch to trunk

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 # vim:sw=4:et
2 """Gaphor file reader.
3
4 This module contains only one interesting function:
5
6     parse(filename)
7
8 which returns a dictionary of ID -> <parsed_object> pairs.
9
10 A parsed_object is one of element, canvas or canvasitem.
11
12 A parsed_object contains values and references. values is a dictionary of
13 name -> value pairs. A value contains a string with the value read from the
14 model file. references contains a list of name -> reference_list pairs, where
15 reference_list is a list of ID's.
16
17 element objects can contain a canvas object (which is the case for elements
18 of type Diagram). Each element has a type, which corresponds to a class name
19 in the gaphor.UML module. Elements also have a unique ID, by which they are
20 referered to in the dictionary returned by parse().
21
22 canvas does not have an ID, but contains a list of canvasitems (which is a
23 list of real canvasitem objects, not references).
24
25 canvasitem objects can also contain a list of canvasitems (canvasitems can be
26 nested). They also have a unique ID by which they have been added to the
27 dictionary returned by parse(). Each canvasitem has a type, which maps to a
28 class name in the gaphor.diagram module.
29
30 The generator parse_generator(filename, loader) may be used if the loading
31 takes a long time. The yielded values are the percentage of the file read.
32 """
33
34 from __future__ import generators
35
36 __all__ = [ 'parse', 'ParserException' ]
37
38 import os
39 from xml.sax import handler
40
41 import gaphor.misc.odict
42
43 class base(object):
44     """Simple base class for element, canvas and canvasitem.
45     """
46
47     def __init__(self):
48         self.values = { }
49         self.references = { }
50
51     def __getattr__(self, key):
52         return self[key]
53
54     def __getitem__(self, key):
55         try:
56             return self.values[key]
57         except:
58             return self.references[key]
59
60     def get(self, key):
61         try:
62             return self.__getitem__(key)
63         except:
64             return None
65
66 class element(base):
67     #__slots__ = ('id', 'type', 'canvas', 'values', 'references')
68
69     def __init__(self, id, type):
70         base.__init__(self)
71         self.id = id
72         self.type = type
73         self.canvas = None
74
75 class canvas(base):
76     #__slots__ = ('canvasitems', 'values', 'references')
77
78     def __init__(self):
79         base.__init__(self)
80         self.canvasitems = []
81
82 class canvasitem(base):
83     #__slots__ = ('id', 'type', 'canvasitems', 'values', 'references')
84
85     def __init__(self, id, type):
86         base.__init__(self)
87         self.id = id
88         self.type = type
89         self.canvasitems = []
90
91
92 XMLNS='http://gaphor.sourceforge.net/gaphor'
93
94 class ParserException(Exception):
95     pass
96
97 # Loader state:
98 [ ROOT,         # Expect 'gaphor' element
99   GAPHOR,       # Expect UML classes (tag name is the UML class name)
100   ELEMENT,      # Expect properties of UML object
101   DIAGRAM,      # Expect properties of Diagram object + canvas
102   CANVAS,       # Expect canvas properties + <item> tags
103   ITEM,         # Expect item attributes and nested items
104   ATTR,         # Reading contents of an attribute (such as a <val> or <ref>)
105   VAL,          # Redaing contents of a <val> tag
106   REFLIST,      # In a <reflist>
107   REF           # Reading contents of a <ref> tag
108 ] = xrange(10)
109
110 class GaphorLoader(handler.ContentHandler):
111     """Create a list of elements. an element may contain a canvas and a
112     canvas may contain canvas items. Each element can have values and
113     references to other elements.
114
115     Data read in non-CDATA text is stripped. If a CDATA section is found all
116     non-CDATA text is ignored.
117     """
118
119     def __init__(self):
120         handler.ContentHandler.__init__(self)
121         # make sure all variables are initialized:
122         self.startDocument()
123
124     def push(self, element, state):
125         """Add an element to the item stack.
126         """
127         self.__stack.append((element, state))
128
129     def pop(self):
130         """Return the last item on the stack. The item is removed from
131         the stack.
132         """
133         return self.__stack.pop()[0]
134
135     def peek(self, depth=1):
136         """Return the last item on the stack. The item is not removed.
137         """
138         return self.__stack[-1 * depth][0]
139
140     def state(self):
141         """Return the current state of the parser.
142         """
143         try:
144             return self.__stack[-1][1]
145         except IndexError:
146             return ROOT
147
148     def endDTD(self):
149         pass
150
151     def startDocument(self):
152         """Start of document: all our attributes are initialized.
153         """
154         self.version = None
155         self.gaphor_version = None
156         self.elements = gaphor.misc.odict.odict() # map id: element/canvasitem
157         self.__stack = []
158         self.value_is_cdata = 0
159         self.cdata = ''
160         # may have 3 states:
161         #  2: simple data, should be stripped
162         #  1: CDATA block,
163         #  0: end CDATA, read no more data till the next element
164         self.in_cdata = 0
165
166     def endDocument(self):
167         if len(self.__stack) != 0:
168             raise ParserException, 'Invalid XML document.'
169
170     def startElement(self, name, attrs):
171         self.cdata = ''
172         self.in_cdata = 2 # initial, just read text
173        
174         state = self.state()
175
176         # Read a element class. The name of the tag is the class name:
177         if state == GAPHOR:
178             id = attrs['id']
179             e = element(id, name)
180             assert id not in self.elements.keys(), '%s already defined' % (id)#, self.elements[id])
181             self.elements[id] = e
182             self.push(e, name == 'Diagram' and DIAGRAM or ELEMENT)
183
184         # Special treatment for the <canvas> tag in a Diagram:
185         elif state == DIAGRAM and name == 'canvas':
186             c = canvas()
187             self.peek().canvas = c
188             self.push(c, CANVAS)
189
190         # Items in a canvas are referenced by the <item> tag:
191         elif state in (CANVAS, ITEM) and name == 'item':
192             id = attrs['id']
193             c = canvasitem(id, attrs['type'])
194             assert id not in self.elements.keys(), '%s already defined' % (id) #, self.elements[id])
195             self.elements[id] = c
196             self.peek().canvasitems.append(c)
197             self.push(c, ITEM)
198
199         # Store the attribute name on the stack, so we can use it later
200         # to store the <ref>, <reflist> or <val> content:
201         elif state in (ELEMENT, DIAGRAM, CANVAS, ITEM):
202             # handle 'normal' attributes
203             # Note that Value may contain CDATA
204             self.push(name, ATTR)
205
206         # Reference list:
207         elif state == ATTR and name == 'reflist':
208             self.push(self.peek(), REFLIST)
209
210         # Reference with multiplicity 1:
211         elif state  == ATTR and name == 'ref':
212             n = self.peek(1)
213             # Fetch the element instance from the stack
214             r = self.peek(2).references[n] = attrs['refid']
215             self.push(None, REF)
216
217         # Reference with multiplicity *:
218         elif state == REFLIST and name == 'ref':
219             n = self.peek(1)
220             # Fetch the element instance from the stack
221             r = self.peek(3).references
222             refid = attrs['refid']
223             try:
224                 r[n].append(refid)
225             except KeyError:
226                 r[n] = [refid]
227             self.push(None, REF)
228
229         # We need to get the text within the <val> tag:
230         elif state == ATTR and name == 'val':
231             self.value_is_cdata = 1
232             self.push(None, VAL)
233
234         # The <gaphor> tag is the toplevel tag:
235         elif state == ROOT and name == 'gaphor':
236             assert attrs['version'] in ('3.0',)
237             self.version = attrs['version']
238             self.gaphor_version = attrs.get('gaphor-version')
239             if not self.gaphor_version:
240                 self.gaphor_version = attrs.get('gaphor_version')
241             self.push(None, GAPHOR)
242
243         else:
244             raise ParserException, 'Invalid XML: tag <%s> not known (state = %s)' % (name, state)
245
246     def endElement(self, name):
247         # Put the text on the value
248         if self.state() == VAL:
249             if self.value_is_cdata:
250                 # Two levels up: the attribute name
251                 n = self.peek(2)
252                 if self.in_cdata == 2:
253                     self.cdata = self.cdata.strip()
254                 # Three levels up: the element instance (element or canvasitem)
255                 self.peek(3).values[n] = self.cdata
256         self.pop()
257
258     def startElementNS(self, name, qname, attrs):
259         #print 'name=', name
260         #print 'qname=', qname
261         #print 'attrs=', attrs
262         if not name[0] or name[0] == XMLNS:
263             a = { }
264             for key, val in attrs.items():
265                 a[key[1]] = val
266             self.startElement(name[1], a)
267
268     def endElementNS(self, name, qname):
269         if not name[0] or name[0] == XMLNS:
270             self.endElement(name[1])
271
272     def characters(self, content):
273         """Read characters."""
274         if self.in_cdata:
275             self.cdata = self.cdata + content
276             #print 'characters: "%s"' % self.cdata
277
278     # Lexical handler stuff:
279
280     def comment(self, comment):
281         #print 'comment: "%s"' % comment
282         pass
283
284     def startCDATA(self):
285         """Start a CDATA section. In case no CDATA section has been read
286         before, the self.cdata is cleared."""
287         if self.in_cdata == 2:
288             self.cdata = ''
289         self.in_cdata = 1
290
291     def endCDATA(self):
292         """End of CDATA section. No more data is read, unless another CDATA
293         section is opened."""
294         self.in_cdata = 0
295
296
297 def parse(filename):
298     """Parse a file and return a dictionary ID:element/canvasitem.
299     """
300     loader = GaphorLoader()
301
302     for x in parse_generator(filename, loader):
303         pass
304     return loader.elements
305
306
307 def parse_generator(filename, loader):
308     """The generator based version of parse().
309     parses the file filename and load it with ContentHandler loader.
310     """
311     assert isinstance(loader, GaphorLoader), 'loader should be a GaphorLoader'
312     from xml.sax import make_parser
313     parser = make_parser()
314
315     parser.setProperty(handler.property_lexical_handler, loader)
316     parser.setFeature(handler.feature_namespaces, 1)
317     parser.setContentHandler(loader)
318
319     for percentage in parse_file(filename, parser):
320         yield percentage
321
322
323 def parse_file(filename, parser):
324     """Parse the file filename with parser.
325     """
326     file_size = os.stat(filename)[6]
327     f = open(filename, 'rb')
328     block_size = 512
329
330     block = f.read(block_size)
331     read_size = len(block)
332     while block:
333         parser.feed(block)
334         block = f.read(block_size)
335         read_size = read_size + len(block)
336         yield (read_size * 100) / file_size
337
338     parser.close()
339     f.close()
340
341 if __name__ == '__main__':
342     parse('ns.xml')
343     #parser.parse('ns2.xml')
344     #print len(ch.elements)
Note: See TracBrowser for help on using the browser.