root/gaphor/tags/gaphor-0.7.0/gaphor/parser.py

Revision 424, 10.4 kB (checked in by arjanmol, 4 years ago)

*** empty log message ***

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 # vim:sw=4:et
2 """Gaphor file reader.
3
4 This module contains only one interesting function:
5
6     parse(filename)
7
8 which returns a dictionary of ID -> <parsed_object> pairs.
9
10 A parsed_object is one of element, canvas or canvasitem.
11
12 A parsed_object contains values and references. values is a dictionary of
13 name -> value pairs. A value contains a string with the value read from the
14 model file. references contains a list of name -> reference_list pairs, where
15 reference_list is a list of ID's.
16
17 element objects can contain a canvas object (which is the case for elements
18 of type Diagram). Each element has a type, which corresponds to a class name
19 in the gaphor.UML module. Elements also have a unique ID, by which they are
20 referered to in the dictionary returned by parse().
21
22 canvas does not have an ID, but contains a list of canvasitems (which is a
23 list of real canvasitem objects, not references).
24
25 canvasitem objects can also contain a list of canvasitems (canvasitems can be
26 nested). They also have a unique ID by which they have been added to the
27 dictionary returned by parse(). Each canvasitem has a type, which maps to a
28 class name in the gaphor.diagram module.
29
30 The generator parse_generator(filename, loader) may be used if the loading
31 takes a long time. The yielded values are the percentage of the file read.
32 """
33
34 __all__ = [ 'parse', 'ParserException' ]
35
36 from __future__ import generators
37
38 import os
39 from xml.sax import handler
40
41 import gaphor.misc.odict
42
43 class base(object):
44     """Simple base class for element, canvas and canvasitem.
45     """
46
47     def __init__(self):
48         self.values = { }
49         self.references = { }
50
51     def __getattr__(self, key):
52         return self[key]
53
54     def __getitem__(self, key):
55         try:
56             return self.values[key]
57         except:
58             return self.references[key]
59
60     def get(self, key):
61         try:
62             return self.__getitem__(key)
63         except:
64             return None
65
66 class element(base):
67     #__slots__ = ('id', 'type', 'canvas', 'values', 'references')
68
69     def __init__(self, id, type):
70         base.__init__(self)
71         self.id = id
72         self.type = type
73         self.canvas = None
74
75 class canvas(base):
76     #__slots__ = ('canvasitems', 'values', 'references')
77
78     def __init__(self):
79         base.__init__(self)
80         self.canvasitems = []
81
82 class canvasitem(base):
83     #__slots__ = ('id', 'type', 'canvasitems', 'values', 'references')
84
85     def __init__(self, id, type):
86         base.__init__(self)
87         self.id = id
88         self.type = type
89         self.canvasitems = []
90
91
92 XMLNS='http://gaphor.sourceforge.net/gaphor'
93
94 class ParserException(Exception):
95     pass
96
97 # Loader state:
98 [ ROOT,         # Expect 'gaphor' element
99   GAPHOR,       # Expect UML classes (tag name is the UML class name)
100   ELEMENT,      # Expect properties of UML object
101   DIAGRAM,      # Expect properties of Diagram object + canvas
102   CANVAS,       # Expect canvas properties + <item> tags
103   ITEM,         # Expect item attributes and nested items
104   ATTR,         # Reading contents of an attribute (such as a <val> or <ref>)
105   VAL,          # Redaing contents of a <val> tag
106   REFLIST,      # In a <reflist>
107   REF           # Reading contents of a <ref> tag
108 ] = xrange(10)
109
110 class GaphorLoader(handler.ContentHandler):
111     """Create a list of elements. an element may contain a canvas and a
112     canvas may contain canvas items. Each element can have values and
113     references to other elements.
114
115     Data read in non-CDATA text is stripped. If a CDATA section is found all
116     non-CDATA text is ignored.
117     """
118
119     def __init__(self):
120         handler.ContentHandler.__init__(self)
121         # make sure all variables are initialized:
122         self.startDocument()
123
124     def push(self, element, state):
125         """Add an element to the item stack.
126         """
127         self.__stack.append((element, state))
128
129     def pop(self):
130         """Return the last item on the stack. The item is removed from
131         the stack.
132         """
133         return self.__stack.pop()[0]
134
135     def peek(self, depth=1):
136         """Return the last item on the stack. The item is not removed.
137         """
138         return self.__stack[-1 * depth][0]
139
140     def state(self):
141         """Return the current state of the parser.
142         """
143         try:
144             return self.__stack[-1][1]
145         except IndexError:
146             return ROOT
147
148     def endDTD(self):
149         pass
150
151     def startDocument(self):
152         """Start of document: all our attributes are initialized.
153         """
154         self.version = None
155         self.gaphor_version = None
156         self.elements = gaphor.misc.odict.odict() # map id: element/canvasitem
157         self.__stack = []
158         self.value_is_cdata = 0
159         self.cdata = ''
160         # may have 3 states:
161         #  2: simple data, should be stripped
162         #  1: CDATA block,
163         #  0: end CDATA, read no more data till the next element
164         self.in_cdata = 0
165
166     def endDocument(self):
167         if len(self.__stack) != 0:
168             raise ParserException, 'Invalid XML document.'
169
170     def startElement(self, name, attrs):
171         self.cdata = ''
172         self.in_cdata = 2 # initial, just read text
173        
174         state = self.state()
175
176         # Read a element class. The name of the tag is the class name:
177         if state == GAPHOR:
178             id = attrs['id']
179             e = element(id, name)
180             self.elements[id] = e
181             self.push(e, name == 'Diagram' and DIAGRAM or ELEMENT)
182
183         # Special treatment for the <canvas> tag in a Diagram:
184         elif state == DIAGRAM and name == 'canvas':
185             c = canvas()
186             self.peek().canvas = c
187             self.push(c, CANVAS)
188
189         # Items in a canvas are referenced by the <item> tag:
190         elif state in (CANVAS, ITEM) and name == 'item':
191             id = attrs['id']
192             c = canvasitem(id, attrs['type'])
193             self.elements[id] = c
194             self.peek().canvasitems.append(c)
195             self.push(c, ITEM)
196
197         # Store the attribute name on the stack, so we can use it later
198         # to store the <ref>, <reflist> or <val> content:
199         elif state in (ELEMENT, DIAGRAM, CANVAS, ITEM):
200             # handle 'normal' attributes
201             # Note that Value may contain CDATA
202             self.push(name, ATTR)
203
204         # Reference list:
205         elif state == ATTR and name == 'reflist':
206             self.push(self.peek(), REFLIST)
207
208         # Reference with multiplicity 1:
209         elif state  == ATTR and name == 'ref':
210             n = self.peek(1)
211             # Fetch the element instance from the stack
212             r = self.peek(2).references[n] = attrs['refid']
213             self.push(None, REF)
214
215         # Reference with multiplicity *:
216         elif state == REFLIST and name == 'ref':
217             n = self.peek(1)
218             # Fetch the element instance from the stack
219             r = self.peek(3).references
220             refid = attrs['refid']
221             try:
222                 r[n].append(refid)
223             except KeyError:
224                 r[n] = [refid]
225             self.push(None, REF)
226
227         # We need to get the text within the <val> tag:
228         elif state == ATTR and name == 'val':
229             self.value_is_cdata = 1
230             self.push(None, VAL)
231
232         # The <gaphor> tag is the toplevel tag:
233         elif state == ROOT and name == 'gaphor':
234             assert attrs['version'] in ('3.0',)
235             self.version = attrs['version']
236             self.gaphor_version = attrs.get('gaphor-version')
237             if not self.gaphor_version:
238                 self.gaphor_version = attrs.get('gaphor_version')
239             self.push(None, GAPHOR)
240
241         else:
242             raise ParserException, 'Invalid XML: tag <%s> not known (state = %s)' % (name, state)
243
244     def endElement(self, name):
245         # Put the text on the value
246         if self.state() == VAL:
247             if self.value_is_cdata:
248                 # Two levels up: the attribute name
249                 n = self.peek(2)
250                 if self.in_cdata == 2:
251                     self.cdata = self.cdata.strip()
252                 # Three levels up: the element instance (element or canvasitem)
253                 self.peek(3).values[n] = self.cdata
254         self.pop()
255
256     def startElementNS(self, name, qname, attrs):
257         #print 'name=', name
258         #print 'qname=', qname
259         #print 'attrs=', attrs
260         if not name[0] or name[0] == XMLNS:
261             a = { }
262             for key, val in attrs.items():
263                 a[key[1]] = val
264             self.startElement(name[1], a)
265
266     def endElementNS(self, name, qname):
267         if not name[0] or name[0] == XMLNS:
268             self.endElement(name[1])
269
270     def characters(self, content):
271         """Read characters."""
272         if self.in_cdata:
273             self.cdata = self.cdata + content
274             #print 'characters: "%s"' % self.cdata
275
276     # Lexical handler stuff:
277
278     def comment(self, comment):
279         #print 'comment: "%s"' % comment
280         pass
281
282     def startCDATA(self):
283         """Start a CDATA section. In case no CDATA section has been read
284         before, the self.cdata is cleared."""
285         if self.in_cdata == 2:
286             self.cdata = ''
287         self.in_cdata = 1
288
289     def endCDATA(self):
290         """End of CDATA section. No more data is read, unless another CDATA
291         section is opened."""
292         self.in_cdata = 0
293
294
295 def parse(filename):
296     """Parse a file and return a dictionary ID:element/canvasitem.
297     """
298     loader = GaphorLoader()
299
300     for x in parse_generator(filename, loader):
301         pass
302     return loader.elements
303
304
305 def parse_generator(filename, loader):
306     """The generator based version of parse().
307     parses the file filename and load it with ContentHandler loader.
308     """
309     assert isinstance(loader, GaphorLoader), 'loader should be a GaphorLoader'
310     from xml.sax import make_parser
311     parser = make_parser()
312
313     parser.setProperty(handler.property_lexical_handler, loader)
314     parser.setFeature(handler.feature_namespaces, 1)
315     parser.setContentHandler(loader)
316
317     for percentage in parse_file(filename, parser):
318         yield percentage
319
320
321 def parse_file(filename, parser):
322     """Parse the file filename with parser.
323     """
324     file_size = os.stat(filename)[6]
325     f = open(filename, 'rb')
326     block_size = 512
327
328     block = f.read(block_size)
329     read_size = len(block)
330     while block:
331         parser.feed(block)
332         block = f.read(block_size)
333         read_size = read_size + len(block)
334         yield (read_size * 100) / file_size
335
336     parser.close()
337     f.close()
338
339 if __name__ == '__main__':
340     parse('ns.xml')
341     #parser.parse('ns2.xml')
342     #print len(ch.elements)
Note: See TracBrowser for help on using the browser.