root/gaphor/tags/gaphor-0.3.0/gaphor/parser.py

Revision 222, 9.2 kB (checked in by arjanmol, 5 years ago)

Added endDTD

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 # vim:sw=4:et
2 """Gaphor file reader.
3
4 This module contains only one interesting function:
5
6     parse(filename)
7
8 which returns a dictionary of ID -> <parsed_object> pairs.
9
10 A parsed_object is one of element, canvas or canvasitem.
11
12 A parsed_object contains values and references. values is a dictionary of
13 name -> value pairs. A value contains a string with the value read from the
14 model file. references contains a list of name -> reference_list pairs, where
15 reference_list is a list of ID's.
16
17 element objects can contain a canvas object (which is the case for elements
18 of type Diagram). Each element has a type, which corresponds to a class name
19 in the gaphor.UML module. Elements also have a unique ID, by which they are
20 referered to in the dictionary returned by parse().
21
22 canvas does not have an ID, but contains a list of canvasitems (which is a
23 list of real canvasitem objects, not references).
24
25 canvasitem objects can also contain a list of canvasitems (canvasitems can be
26 nested). They also have a unique ID by which they have been added to the
27 dictionary returned by parse(). Each canvasitem has a type, which maps to a
28 class name in the gaphor.diagram module.
29 """
30
31 __all__ = [ 'parse', 'ParserException' ]
32
33 from xml.sax import handler
34
35 class base(object):
36     """Simple base class for element, canvas and canvasitem.
37     """
38
39     def __init__(self):
40         self.values = { }
41         self.references = { }
42
43     def __getattr__(self, key):
44         return self[key]
45
46     def __getitem__(self, key):
47         try:
48             return self.values[key]
49         except:
50             return self.references[key]
51
52     def get(self, key):
53         try:
54             return self.__getitem__(key)
55         except:
56             return None
57
58 class element(base):
59     #__slots__ = ('id', 'type', 'canvas', 'values', 'references')
60
61     def __init__(self, id, type):
62         base.__init__(self)
63         self.id = id
64         self.type = type
65         self.canvas = None
66
67 class canvas(base):
68     #__slots__ = ('canvasitems', 'values', 'references')
69
70     def __init__(self):
71         base.__init__(self)
72         self.canvasitems = []
73
74 class canvasitem(base):
75     #__slots__ = ('id', 'type', 'canvasitems', 'values', 'references')
76
77     def __init__(self, id, type):
78         base.__init__(self)
79         self.id = id
80         self.type = type
81         self.canvasitems = []
82
83
84 XMLNS='http://gaphor.sourceforge.net/gaphor'
85
86 class ParserException(Exception):
87     pass
88
89 # Loader state:
90 [ ROOT,         # Expect 'gaphor' element
91   GAPHOR,       # Expect UML classes (tag name is the UML class name)
92   ELEMENT,      # Expect properties of UML object
93   DIAGRAM,      # Expect properties of Diagram object + canvas
94   CANVAS,       # Expect canvas properties + <item> tags
95   ITEM,         # Expect item attributes and nested items
96   ATTR,         # Reading contents of an attribute (such as a <val> or <ref>)
97   VAL,          # Redaing contents of a <val> tag
98   REFLIST,      # In a <reflist>
99   REF           # Reading contents of a <ref> tag
100 ] = xrange(10)
101
102 class GaphorLoader(handler.ContentHandler):
103     """Create a list of elements. an element may contain a canvas and a
104     canvas may contain canvas items. Each element can have values and
105     references to other elements.
106
107     Data read in non-CDATA text is stripped. If a CDATA section is found all
108     non-CDATA text is ignored.
109     """
110
111     def __init__(self):
112         handler.ContentHandler.__init__(self)
113         # make sure all variables are initialized:
114         self.startDocument()
115
116     def push(self, element, state):
117         """Add an element to the item stack.
118         """
119         self.__stack.append((element, state))
120
121     def pop(self):
122         """Return the last item on the stack. The item is removed from
123         the stack.
124         """
125         return self.__stack.pop()[0]
126
127     def peek(self, depth=1):
128         """Return the last item on the stack. The item is not removed.
129         """
130         return self.__stack[-1 * depth][0]
131
132     def state(self):
133         """Return the current state of the parser.
134         """
135         try:
136             return self.__stack[-1][1]
137         except IndexError:
138             return ROOT
139
140     def endDTD(self):
141         pass
142
143     def startDocument(self):
144         """Start of document: all our attributes are initialized.
145         """
146         self.elements = {} # map id: element/canvasitem
147         self.__stack = []
148         self.value_is_cdata = 0
149         self.cdata = ''
150         # may have 3 states:
151         #  2: simple data, should be stripped
152         #  1: CDATA block,
153         #  0: end CDATA, read no more data till the next element
154         self.in_cdata = 0
155
156     def endDocument(self):
157         if len(self.__stack) != 0:
158             raise ParserException, 'Invalid XML document.'
159
160     def startElement(self, name, attrs):
161         self.cdata = ''
162         self.in_cdata = 2 # initial, just read text
163        
164         state = self.state()
165
166         # Read a element class. The name of the tag is the class name:
167         if state == GAPHOR:
168             id = attrs['id']
169             e = element(id, name)
170             self.elements[id] = e
171             self.push(e, name == 'Diagram' and DIAGRAM or ELEMENT)
172
173         # Special treatment for the <canvas> tag in a Diagram:
174         elif state == DIAGRAM and name == 'canvas':
175             c = canvas()
176             self.peek().canvas = c
177             self.push(c, CANVAS)
178
179         # Items in a canvas are referenced by the <item> tag:
180         elif state in (CANVAS, ITEM) and name == 'item':
181             id = attrs['id']
182             c = canvasitem(id, attrs['type'])
183             self.elements[id] = c
184             self.peek().canvasitems.append(c)
185             self.push(c, ITEM)
186
187         # Store the attribute name on the stack, so we can use it later
188         # to store the <ref>, <reflist> or <val> content:
189         elif state in (ELEMENT, DIAGRAM, CANVAS, ITEM):
190             # handle 'normal' attributes
191             # Note that Value may contain CDATA
192             self.push(name, ATTR)
193
194         # Reference list:
195         elif state == ATTR and name == 'reflist':
196             self.push(self.peek(), REFLIST)
197
198         # Reference with multiplicity 1:
199         elif state  == ATTR and name == 'ref':
200             n = self.peek(1)
201             # Fetch the element instance from the stack
202             r = self.peek(2).references[n] = attrs['refid']
203             self.push(None, REF)
204
205         # Reference with multiplicity *:
206         elif state == REFLIST and name == 'ref':
207             n = self.peek(1)
208             # Fetch the element instance from the stack
209             r = self.peek(3).references
210             refid = attrs['refid']
211             try:
212                 r[n].append(refid)
213             except KeyError:
214                 r[n] = [refid]
215             self.push(None, REF)
216
217         # We need to get the text within the <val> tag:
218         elif state == ATTR and name == 'val':
219             self.value_is_cdata = 1
220             self.push(None, VAL)
221
222         # The <gaphor> tag is the toplevel tag:
223         elif state == ROOT and name == 'gaphor':
224             assert attrs['version'] in ('3.0',)
225             self.push(None, GAPHOR)
226
227         else:
228             raise ParserException, 'Invalid XML: tag <%s> not known (state = %s)' % (name, state)
229
230     def endElement(self, name):
231         # Put the text on the value
232         if self.state() == VAL:
233             if self.value_is_cdata:
234                 # Two levels up: the attribute name
235                 n = self.peek(2)
236                 if self.in_cdata == 2:
237                     self.cdata = self.cdata.strip()
238                 # Three levels up: the element instance (element or canvasitem)
239                 self.peek(3).values[n] = self.cdata
240         self.pop()
241
242     def startElementNS(self, name, qname, attrs):
243         #print 'name=', name
244         #print 'qname=', qname
245         #print 'attrs=', attrs
246         if not name[0] or name[0] == XMLNS:
247             a = { }
248             for key, val in attrs.items():
249                 a[key[1]] = val
250             self.startElement(name[1], a)
251
252     def endElementNS(self, name, qname):
253         if not name[0] or name[0] == XMLNS:
254             self.endElement(name[1])
255
256     def characters(self, content):
257         """Read characters."""
258         if self.in_cdata:
259             self.cdata = self.cdata + content
260             #print 'characters: "%s"' % self.cdata
261
262     # Lexical handler stuff:
263
264     def comment(self, comment):
265         #print 'comment: "%s"' % comment
266         pass
267
268     def startCDATA(self):
269         """Start a CDATA section. In case no CDATA section has been read
270         before, the self.cdata is cleared."""
271         if self.in_cdata == 2:
272             self.cdata = ''
273         self.in_cdata = 1
274
275     def endCDATA(self):
276         """End of CDATA section. No more data is read, unless another CDATA
277         section is opened."""
278         self.in_cdata = 0
279
280 def parse(filename):
281     """Parse a file and return a dictionary ID:element/canvasitem.
282     """
283     from xml.sax import make_parser
284     parser = make_parser()
285
286     loader = GaphorLoader()
287
288     parser.setProperty(handler.property_lexical_handler, loader)
289     parser.setFeature(handler.feature_namespaces, 1)
290     parser.setContentHandler(loader)
291
292     parser.parse(filename)
293     #parser.close()
294     return loader.elements
295
296 if __name__ == '__main__':
297     parse('ns.xml')
298     #parser.parse('ns2.xml')
299     print len(ch.elements)
Note: See TracBrowser for help on using the browser.