root/gaphor/tags/gaphor-0.11.1/gaphor/parser.py

Revision 1241, 10.6 kB (checked in by arj..@yirdis.nl, 2 years ago)

Cleanup: removed resource() from gaphor.diagram; removed future imports.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 # vim:sw=4:et
2 """Gaphor file reader.
3
4 This module contains only one interesting function:
5
6     parse(filename)
7
8 which returns a dictionary of ID -> <parsed_object> pairs.
9
10 A parsed_object is one of element, canvas or canvasitem.
11
12 A parsed_object contains values and references. values is a dictionary of
13 name -> value pairs. A value contains a string with the value read from the
14 model file. references contains a list of name -> reference_list pairs, where
15 reference_list is a list of ID's.
16
17 element objects can contain a canvas object (which is the case for elements
18 of type Diagram). Each element has a type, which corresponds to a class name
19 in the gaphor.UML module. Elements also have a unique ID, by which they are
20 referered to in the dictionary returned by parse().
21
22 canvas does not have an ID, but contains a list of canvasitems (which is a
23 list of real canvasitem objects, not references).
24
25 canvasitem objects can also contain a list of canvasitems (canvasitems can be
26 nested). They also have a unique ID by which they have been added to the
27 dictionary returned by parse(). Each canvasitem has a type, which maps to a
28 class name in the gaphor.diagram module.
29
30 The generator parse_generator(filename, loader) may be used if the loading
31 takes a long time. The yielded values are the percentage of the file read.
32 """
33
34 __all__ = [ 'parse', 'ParserException' ]
35
36 import os
37 from xml.sax import handler
38
39 import gaphor.misc.odict
40
41 class base(object):
42     """Simple base class for element, canvas and canvasitem.
43     """
44
45     def __init__(self):
46         self.values = { }
47         self.references = { }
48
49     def __getattr__(self, key):
50         return self[key]
51
52     def __getitem__(self, key):
53         try:
54             return self.values[key]
55         except:
56             return self.references[key]
57
58     def get(self, key):
59         try:
60             return self.__getitem__(key)
61         except:
62             return None
63
64 class element(base):
65     #__slots__ = ('id', 'type', 'canvas', 'values', 'references')
66
67     def __init__(self, id, type):
68         base.__init__(self)
69         self.id = id
70         self.type = type
71         self.canvas = None
72
73 class canvas(base):
74     #__slots__ = ('canvasitems', 'values', 'references')
75
76     def __init__(self):
77         base.__init__(self)
78         self.canvasitems = []
79
80 class canvasitem(base):
81     #__slots__ = ('id', 'type', 'canvasitems', 'values', 'references')
82
83     def __init__(self, id, type):
84         base.__init__(self)
85         self.id = id
86         self.type = type
87         self.canvasitems = []
88
89
90 XMLNS='http://gaphor.sourceforge.net/gaphor'
91
92 class ParserException(Exception):
93     pass
94
95 # Loader state:
96 [ ROOT,         # Expect 'gaphor' element
97   GAPHOR,       # Expect UML classes (tag name is the UML class name)
98   ELEMENT,      # Expect properties of UML object
99   DIAGRAM,      # Expect properties of Diagram object + canvas
100   CANVAS,       # Expect canvas properties + <item> tags
101   ITEM,         # Expect item attributes and nested items
102   ATTR,         # Reading contents of an attribute (such as a <val> or <ref>)
103   VAL,          # Redaing contents of a <val> tag
104   REFLIST,      # In a <reflist>
105   REF           # Reading contents of a <ref> tag
106 ] = xrange(10)
107
108 class GaphorLoader(handler.ContentHandler):
109     """Create a list of elements. an element may contain a canvas and a
110     canvas may contain canvas items. Each element can have values and
111     references to other elements.
112
113     Data read in non-CDATA text is stripped. If a CDATA section is found all
114     non-CDATA text is ignored.
115     """
116
117     def __init__(self):
118         handler.ContentHandler.__init__(self)
119         # make sure all variables are initialized:
120         self.startDocument()
121
122     def push(self, element, state):
123         """Add an element to the item stack.
124         """
125         self.__stack.append((element, state))
126
127     def pop(self):
128         """Return the last item on the stack. The item is removed from
129         the stack.
130         """
131         return self.__stack.pop()[0]
132
133     def peek(self, depth=1):
134         """Return the last item on the stack. The item is not removed.
135         """
136         return self.__stack[-1 * depth][0]
137
138     def state(self):
139         """Return the current state of the parser.
140         """
141         try:
142             return self.__stack[-1][1]
143         except IndexError:
144             return ROOT
145
146     def endDTD(self):
147         pass
148
149     def startDocument(self):
150         """Start of document: all our attributes are initialized.
151         """
152         self.version = None
153         self.gaphor_version = None
154         self.elements = gaphor.misc.odict.odict() # map id: element/canvasitem
155         self.__stack = []
156         self.value_is_cdata = 0
157         self.cdata = ''
158         # may have 3 states:
159         #  2: simple data, should be stripped
160         #  1: CDATA block,
161         #  0: end CDATA, read no more data till the next element
162         self.in_cdata = 0
163
164     def endDocument(self):
165         if len(self.__stack) != 0:
166             raise ParserException, 'Invalid XML document.'
167
168     def startElement(self, name, attrs):
169         self.cdata = ''
170         self.in_cdata = 2 # initial, just read text
171        
172         state = self.state()
173
174         # Read a element class. The name of the tag is the class name:
175         if state == GAPHOR:
176             id = attrs['id']
177             e = element(id, name)
178             assert id not in self.elements.keys(), '%s already defined' % (id)#, self.elements[id])
179             self.elements[id] = e
180             self.push(e, name == 'Diagram' and DIAGRAM or ELEMENT)
181
182         # Special treatment for the <canvas> tag in a Diagram:
183         elif state == DIAGRAM and name == 'canvas':
184             c = canvas()
185             self.peek().canvas = c
186             self.push(c, CANVAS)
187
188         # Items in a canvas are referenced by the <item> tag:
189         elif state in (CANVAS, ITEM) and name == 'item':
190             id = attrs['id']
191             c = canvasitem(id, attrs['type'])
192             assert id not in self.elements.keys(), '%s already defined' % (id) #, self.elements[id])
193             self.elements[id] = c
194             self.peek().canvasitems.append(c)
195             self.push(c, ITEM)
196
197         # Store the attribute name on the stack, so we can use it later
198         # to store the <ref>, <reflist> or <val> content:
199         elif state in (ELEMENT, DIAGRAM, CANVAS, ITEM):
200             # handle 'normal' attributes
201             # Note that Value may contain CDATA
202             self.push(name, ATTR)
203
204         # Reference list:
205         elif state == ATTR and name == 'reflist':
206             self.push(self.peek(), REFLIST)
207
208         # Reference with multiplicity 1:
209         elif state  == ATTR and name == 'ref':
210             n = self.peek(1)
211             # Fetch the element instance from the stack
212             r = self.peek(2).references[n] = attrs['refid']
213             self.push(None, REF)
214
215         # Reference with multiplicity *:
216         elif state == REFLIST and name == 'ref':
217             n = self.peek(1)
218             # Fetch the element instance from the stack
219             r = self.peek(3).references
220             refid = attrs['refid']
221             try:
222                 r[n].append(refid)
223             except KeyError:
224                 r[n] = [refid]
225             self.push(None, REF)
226
227         # We need to get the text within the <val> tag:
228         elif state == ATTR and name == 'val':
229             self.value_is_cdata = 1
230             self.push(None, VAL)
231
232         # The <gaphor> tag is the toplevel tag:
233         elif state == ROOT and name == 'gaphor':
234             assert attrs['version'] in ('3.0',)
235             self.version = attrs['version']
236             self.gaphor_version = attrs.get('gaphor-version')
237             if not self.gaphor_version:
238                 self.gaphor_version = attrs.get('gaphor_version')
239             self.push(None, GAPHOR)
240
241         else:
242             raise ParserException, 'Invalid XML: tag <%s> not known (state = %s)' % (name, state)
243
244     def endElement(self, name):
245         # Put the text on the value
246         if self.state() == VAL:
247             if self.value_is_cdata:
248                 # Two levels up: the attribute name
249                 n = self.peek(2)
250                 if self.in_cdata == 2:
251                     self.cdata = self.cdata.strip()
252                 # Three levels up: the element instance (element or canvasitem)
253                 self.peek(3).values[n] = self.cdata
254         self.pop()
255
256     def startElementNS(self, name, qname, attrs):
257         #print 'name=', name
258         #print 'qname=', qname
259         #print 'attrs=', attrs
260         if not name[0] or name[0] == XMLNS:
261             a = { }
262             for key, val in attrs.items():
263                 a[key[1]] = val
264             self.startElement(name[1], a)
265
266     def endElementNS(self, name, qname):
267         if not name[0] or name[0] == XMLNS:
268             self.endElement(name[1])
269
270     def characters(self, content):
271         """Read characters."""
272         if self.in_cdata:
273             self.cdata = self.cdata + content
274             #print 'characters: "%s"' % self.cdata
275
276     # Lexical handler stuff:
277
278     def comment(self, comment):
279         #print 'comment: "%s"' % comment
280         pass
281
282     def startCDATA(self):
283         """Start a CDATA section. In case no CDATA section has been read
284         before, the self.cdata is cleared."""
285         if self.in_cdata == 2:
286             self.cdata = ''
287         self.in_cdata = 1
288
289     def endCDATA(self):
290         """End of CDATA section. No more data is read, unless another CDATA
291         section is opened."""
292         self.in_cdata = 0
293
294
295 def parse(filename):
296     """Parse a file and return a dictionary ID:element/canvasitem.
297     """
298     loader = GaphorLoader()
299
300     for x in parse_generator(filename, loader):
301         pass
302     return loader.elements
303
304
305 def parse_generator(filename, loader):
306     """The generator based version of parse().
307     parses the file filename and load it with ContentHandler loader.
308     """
309     assert isinstance(loader, GaphorLoader), 'loader should be a GaphorLoader'
310     from xml.sax import make_parser
311     parser = make_parser()
312
313     parser.setProperty(handler.property_lexical_handler, loader)
314     parser.setFeature(handler.feature_namespaces, 1)
315     parser.setContentHandler(loader)
316
317     for percentage in parse_file(filename, parser):
318         yield percentage
319
320
321 def parse_file(filename, parser):
322     """Parse the file filename with parser.
323     """
324     file_size = os.stat(filename)[6]
325     f = open(filename, 'rb')
326     block_size = 512
327
328     block = f.read(block_size)
329     read_size = len(block)
330     while block:
331         parser.feed(block)
332         block = f.read(block_size)
333         read_size = read_size + len(block)
334         yield (read_size * 100) / file_size
335
336     parser.close()
337     f.close()
338
339 if __name__ == '__main__':
340     parse('ns.xml')
341     #parser.parse('ns2.xml')
342     #print len(ch.elements)
Note: See TracBrowser for help on using the browser.