View Javadoc

1   /**
2    * Copyright (c) 2011, University of Konstanz, Distributed Systems Group
3    * All rights reserved.
4    * 
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions are met:
7    * * Redistributions of source code must retain the above copyright
8    * notice, this list of conditions and the following disclaimer.
9    * * Redistributions in binary form must reproduce the above copyright
10   * notice, this list of conditions and the following disclaimer in the
11   * documentation and/or other materials provided with the distribution.
12   * * Neither the name of the University of Konstanz nor the
13   * names of its contributors may be used to endorse or promote products
14   * derived from this software without specific prior written permission.
15   * 
16   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20   * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26   */
27  
28  package org.treetank.service.xml.serialize;
29  
30  import static org.treetank.node.IConstants.ELEMENT;
31  import static org.treetank.node.IConstants.ROOT;
32  import static org.treetank.node.IConstants.TEXT;
33  import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_ID;
34  import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_INDENT;
35  import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_INDENT_SPACES;
36  import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_REST;
37  import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_XMLDECL;
38  
39  import java.io.BufferedOutputStream;
40  import java.io.File;
41  import java.io.FileOutputStream;
42  import java.io.IOException;
43  import java.io.OutputStream;
44  import java.io.UnsupportedEncodingException;
45  import java.util.Properties;
46  import java.util.concurrent.ConcurrentMap;
47  
48  import org.treetank.access.Storage;
49  import org.treetank.access.conf.ConstructorProps;
50  import org.treetank.access.conf.ModuleSetter;
51  import org.treetank.access.conf.ResourceConfiguration;
52  import org.treetank.access.conf.SessionConfiguration;
53  import org.treetank.access.conf.StandardSettings;
54  import org.treetank.access.conf.StorageConfiguration;
55  import org.treetank.api.INodeReadTrx;
56  import org.treetank.api.ISession;
57  import org.treetank.api.IStorage;
58  import org.treetank.exception.TTIOException;
59  import org.treetank.io.IBackend.IBackendFactory;
60  import org.treetank.node.ElementNode;
61  import org.treetank.node.NodeMetaPageFactory;
62  import org.treetank.node.TreeNodeFactory;
63  import org.treetank.node.interfaces.INameNode;
64  import org.treetank.node.interfaces.IStructNode;
65  import org.treetank.revisioning.IRevisioning;
66  
67  import com.google.inject.Guice;
68  import com.google.inject.Injector;
69  
70  /**
71   * <h1>XMLSerializer</h1>
72   * 
73   * <p>
74   * Most efficient way to serialize a subtree into an OutputStream. The encoding always is UTF-8. Note that the
75   * OutputStream internally is wrapped by a BufferedOutputStream. There is no need to buffer it again outside
76   * of this class.
77   * </p>
78   */
79  public final class XMLSerializer extends AbsSerializer {
80  
81      enum ECharsForSerializing {
82  
83          /** " ". */
84          SPACE(new byte[] {
85              32
86          }),
87  
88          /** "&lt;". */
89          OPEN(new byte[] {
90              60
91          }),
92  
93          /** "&gt;". */
94          CLOSE(new byte[] {
95              62
96          }),
97  
98          /** "/". */
99          SLASH(new byte[] {
100             47
101         }),
102 
103         /** "=". */
104         EQUAL(new byte[] {
105             61
106         }),
107 
108         /** "\"". */
109         QUOTE(new byte[] {
110             34
111         }),
112 
113         /** "=\"". */
114         EQUAL_QUOTE(EQUAL.getBytes(), QUOTE.getBytes()),
115 
116         /** "&lt;/". */
117         OPEN_SLASH(OPEN.getBytes(), SLASH.getBytes()),
118 
119         /** "/&gt;". */
120         SLASH_CLOSE(SLASH.getBytes(), CLOSE.getBytes()),
121 
122         /** " rest:"". */
123         REST_PREFIX(SPACE.getBytes(), new byte[] {
124             114, 101, 115, 116, 58
125         }),
126 
127         /** "ttid". */
128         ID(new byte[] {
129             116, 116, 105, 100
130         }),
131 
132         /** " xmlns=\"". */
133         XMLNS(SPACE.getBytes(), new byte[] {
134             120, 109, 108, 110, 115
135         }, EQUAL.getBytes(), QUOTE.getBytes()),
136 
137         /** " xmlns:". */
138         XMLNS_COLON(SPACE.getBytes(), new byte[] {
139             120, 109, 108, 110, 115, 58
140         }),
141 
142         /** Newline. */
143         NEWLINE(System.getProperty("line.separator").getBytes());
144 
145         /** Getting the bytes for the char. */
146         private final byte[] mBytes;
147 
148         /**
149          * Private constructor.
150          * 
151          * @param paramBytes
152          *            the bytes for the chars
153          */
154         ECharsForSerializing(final byte[]... paramBytes) {
155             int index = 0;
156             for (final byte[] runner : paramBytes) {
157                 index = index + runner.length;
158             }
159             this.mBytes = new byte[index];
160             index = 0;
161             for (final byte[] runner : paramBytes) {
162                 System.arraycopy(runner, 0, mBytes, index, runner.length);
163                 index = index + runner.length;
164             }
165         }
166 
167         /**
168          * Getting the bytes.
169          * 
170          * @return the bytes for the char.
171          */
172         public byte[] getBytes() {
173             return mBytes;
174         }
175 
176     }
177 
178     /** Offset that must be added to digit to make it ASCII. */
179     private static final int ASCII_OFFSET = 48;
180 
181     /** Precalculated powers of each available long digit. */
182     private static final long[] LONG_POWERS = {
183         1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L, 1000000000L, 10000000000L,
184         100000000000L, 1000000000000L, 10000000000000L, 100000000000000L, 1000000000000000L,
185         10000000000000000L, 100000000000000000L, 1000000000000000000L
186     };
187 
188     /** OutputStream to write to. */
189     private final OutputStream mOut;
190 
191     /** Indent output. */
192     private final boolean mIndent;
193 
194     /** Serialize XML declaration. */
195     private final boolean mSerializeXMLDeclaration;
196 
197     /** Serialize rest header and closer and rest:id. */
198     private final boolean mSerializeRest;
199 
200     /** Serialize id. */
201     private final boolean mSerializeId;
202 
203     /** Number of spaces to indent. */
204     private final int mIndentSpaces;
205 
206     /**
207      * Initialize XMLStreamReader implementation with transaction. The cursor
208      * points to the node the XMLStreamReader starts to read.
209      * 
210      * @param paramSession
211      *            Session for read XML
212      * @param paramNodeKey
213      *            Node Key
214      * @param paramBuilder
215      *            Builder of XML Serializer
216      * @param paramVersions
217      *            Version to serailze
218      */
219     private XMLSerializer(final ISession paramSession, final long paramNodeKey,
220         final XMLSerializerBuilder paramBuilder, final long... paramVersions) {
221         super(paramSession, paramNodeKey, paramVersions);
222         mOut = new BufferedOutputStream(paramBuilder.mStream, 4096);
223         mIndent = paramBuilder.mIndent;
224         mSerializeXMLDeclaration = paramBuilder.mDeclaration;
225         mSerializeRest = paramBuilder.mREST;
226         mSerializeId = paramBuilder.mID;
227         mIndentSpaces = paramBuilder.mIndentSpaces;
228     }
229 
230     /**
231      * Emit node (start element or characters).
232      * 
233      * @throws TTIOException
234      */
235     @Override
236     protected void emitStartElement(final INodeReadTrx paramRTX) throws TTIOException {
237         try {
238             switch (paramRTX.getNode().getKind()) {
239             case ROOT:
240                 if (mIndent) {
241                     mOut.write(ECharsForSerializing.NEWLINE.getBytes());
242                 }
243                 break;
244             case ELEMENT:
245                 // Emit start element.
246                 indent();
247                 final INameNode namenode = (INameNode)paramRTX.getNode();
248                 mOut.write(ECharsForSerializing.OPEN.getBytes());
249                 mOut.write(paramRTX.nameForKey(namenode.getNameKey()).getBytes());
250                 final long key = paramRTX.getNode().getDataKey();
251                 // Emit namespace declarations.
252                 for (int index = 0, length = ((ElementNode)namenode).getNamespaceCount(); index < length; index++) {
253                     paramRTX.moveToNamespace(index);
254                     if (paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()).length() == 0) {
255                         mOut.write(ECharsForSerializing.XMLNS.getBytes());
256                         write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getURIKey()));
257                         mOut.write(ECharsForSerializing.QUOTE.getBytes());
258                     } else {
259                         mOut.write(ECharsForSerializing.XMLNS_COLON.getBytes());
260                         write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()));
261                         mOut.write(ECharsForSerializing.EQUAL_QUOTE.getBytes());
262                         write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getURIKey()));
263                         mOut.write(ECharsForSerializing.QUOTE.getBytes());
264                     }
265                     paramRTX.moveTo(key);
266                 }
267                 // Emit attributes.
268                 // Add virtual rest:id attribute.
269                 if (mSerializeId) {
270                     if (mSerializeRest) {
271                         mOut.write(ECharsForSerializing.REST_PREFIX.getBytes());
272                     } else {
273                         mOut.write(ECharsForSerializing.SPACE.getBytes());
274                     }
275                     mOut.write(ECharsForSerializing.ID.getBytes());
276                     mOut.write(ECharsForSerializing.EQUAL_QUOTE.getBytes());
277                     write(paramRTX.getNode().getDataKey());
278                     mOut.write(ECharsForSerializing.QUOTE.getBytes());
279                 }
280 
281                 // Iterate over all persistent attributes.
282                 for (int index = 0; index < ((ElementNode)paramRTX.getNode()).getAttributeCount(); index++) {
283                     paramRTX.moveToAttribute(index);
284                     mOut.write(ECharsForSerializing.SPACE.getBytes());
285                     mOut.write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()).getBytes());
286                     mOut.write(ECharsForSerializing.EQUAL_QUOTE.getBytes());
287                     mOut.write(paramRTX.getValueOfCurrentNode().getBytes());
288                     mOut.write(ECharsForSerializing.QUOTE.getBytes());
289                     paramRTX.moveTo(key);
290                 }
291                 if (((IStructNode)paramRTX.getNode()).hasFirstChild()) {
292                     mOut.write(ECharsForSerializing.CLOSE.getBytes());
293                 } else {
294                     mOut.write(ECharsForSerializing.SLASH_CLOSE.getBytes());
295                 }
296                 if (mIndent) {
297                     mOut.write(ECharsForSerializing.NEWLINE.getBytes());
298                 }
299                 break;
300             case TEXT:
301                 indent();
302                 mOut.write(paramRTX.getValueOfCurrentNode().getBytes());
303                 if (mIndent) {
304                     mOut.write(ECharsForSerializing.NEWLINE.getBytes());
305                 }
306                 break;
307             }
308         } catch (final IOException exc) {
309             exc.printStackTrace();
310         }
311     }
312 
313     /**
314      * Emit end element.
315      * 
316      * @param paramRTX
317      *            Read Transaction
318      */
319     @Override
320     protected void emitEndElement(final INodeReadTrx paramRTX) {
321         try {
322             indent();
323             mOut.write(ECharsForSerializing.OPEN_SLASH.getBytes());
324             mOut.write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()).getBytes());
325             mOut.write(ECharsForSerializing.CLOSE.getBytes());
326             if (mIndent) {
327                 mOut.write(ECharsForSerializing.NEWLINE.getBytes());
328             }
329         } catch (final IOException exc) {
330             exc.printStackTrace();
331         }
332     }
333 
334     /** {@inheritDoc} */
335     @Override
336     protected void emitStartDocument() {
337         try {
338             if (mSerializeXMLDeclaration) {
339                 write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
340             }
341             if (mSerializeRest) {
342                 write("<rest:sequence xmlns:rest=\"REST\"><rest:item>");
343             }
344         } catch (final IOException exc) {
345             exc.printStackTrace();
346         }
347     }
348 
349     /** {@inheritDoc} */
350     @Override
351     protected void emitEndDocument() {
352         try {
353             if (mSerializeRest) {
354                 write("</rest:item></rest:sequence>");
355             }
356             mOut.flush();
357         } catch (final IOException exc) {
358             exc.printStackTrace();
359         }
360 
361     }
362 
363     /** {@inheritDoc} */
364     @Override
365     protected void emitStartManualElement(final long mVersion) {
366         try {
367             write("<tt revision=\"");
368             write(Long.toString(mVersion));
369             write("\">");
370         } catch (final IOException exc) {
371             exc.printStackTrace();
372         }
373 
374     }
375 
376     /** {@inheritDoc} */
377     @Override
378     protected void emitEndManualElement(final long mVersion) {
379         try {
380             write("</tt>");
381         } catch (final IOException exc) {
382             exc.printStackTrace();
383         }
384     }
385 
386     /**
387      * Indentation of output.
388      * 
389      * @throws IOException
390      *             if can't indent output
391      */
392     private void indent() throws IOException {
393         if (mIndent) {
394             for (int i = 0; i < mStack.size() * mIndentSpaces; i++) {
395                 mOut.write(" ".getBytes());
396             }
397         }
398     }
399 
400     /**
401      * Write characters of string.
402      * 
403      * @param mString
404      *            String to write
405      * @throws IOException
406      *             if can't write to string
407      * @throws UnsupportedEncodingException
408      *             if unsupport encoding
409      */
410     protected void write(final String mString) throws UnsupportedEncodingException, IOException {
411         mOut.write(mString.getBytes("UTF-8"));
412     }
413 
414     /**
415      * Write non-negative non-zero long as UTF-8 bytes.
416      * 
417      * @param mValue
418      *            Value to write
419      * @throws IOException
420      *             if can't write to string
421      */
422     private void write(final long mValue) throws IOException {
423         final int length = (int)Math.log10((double)mValue);
424         int digit = 0;
425         long remainder = mValue;
426         for (int i = length; i >= 0; i--) {
427             digit = (byte)(remainder / LONG_POWERS[i]);
428             mOut.write((byte)(digit + ASCII_OFFSET));
429             remainder -= digit * LONG_POWERS[i];
430         }
431     }
432 
433     /**
434      * Main method.
435      * 
436      * @param args
437      *            args[0] specifies the input-TT file/folder; args[1] specifies
438      *            the output XML file.
439      * @throws Exception
440      *             Any exception.
441      */
442     public static void main(final String... args) throws Exception {
443         if (args.length < 2 || args.length > 3) {
444             System.out.println("Usage: XMLSerializer input-TT output.xml");
445             System.exit(1);
446         }
447 
448         System.out.print("Serializing '" + args[0] + "' to '" + args[1] + "' ... ");
449         final long time = System.currentTimeMillis();
450 
451         Injector injector = Guice.createInjector(new ModuleSetter().setDataFacClass(TreeNodeFactory.class).setMetaFacClass(NodeMetaPageFactory.class).createModule());
452         IBackendFactory storage = injector.getInstance(IBackendFactory.class);
453         IRevisioning revision = injector.getInstance(IRevisioning.class);
454 
455         final File target = new File(args[1]);
456         target.delete();
457         final FileOutputStream outputStream = new FileOutputStream(target);
458 
459         final StorageConfiguration config = new StorageConfiguration(new File(args[0]));
460         Storage.createStorage(config);
461         final IStorage db = Storage.openStorage(new File(args[0]));
462         Properties props = new Properties();
463         props.setProperty(ConstructorProps.STORAGEPATH, target.getAbsolutePath());
464         props.setProperty(ConstructorProps.RESOURCE, "shredded");
465         db.createResource(new ResourceConfiguration(props, storage, revision, new TreeNodeFactory(),new NodeMetaPageFactory()));
466         final ISession session = db.getSession(new SessionConfiguration("shredded", StandardSettings.KEY));
467 
468         final XMLSerializer serializer = new XMLSerializerBuilder(session, outputStream).build();
469         serializer.call();
470 
471         session.close();
472         outputStream.close();
473         db.close();
474 
475         System.out.println(" done [" + (System.currentTimeMillis() - time) + "ms].");
476     }
477 
478     /**
479      * XMLSerializerBuilder to setup the XMLSerializer.
480      */
481     public static final class XMLSerializerBuilder {
482         /**
483          * Intermediate boolean for indendation, not necessary.
484          */
485         private transient boolean mIndent;
486 
487         /**
488          * Intermediate boolean for rest serialization, not necessary.
489          */
490         private transient boolean mREST;
491 
492         /**
493          * Intermediate boolean for XML-Decl serialization, not necessary.
494          */
495         private transient boolean mDeclaration = true;
496 
497         /**
498          * Intermediate boolean for ids, not necessary.
499          */
500         private transient boolean mID;
501 
502         /**
503          * Intermediate number of spaces to indent, not necessary.
504          */
505         private transient int mIndentSpaces = 2;
506 
507         /** Stream to pipe to. */
508         private final OutputStream mStream;
509 
510         /** Session to use. */
511         private final ISession mSession;
512 
513         /** Versions to use. */
514         private transient long[] mVersions;
515 
516         /** Node key of subtree to shredder. */
517         private final long mNodeKey;
518 
519         /**
520          * Constructor, setting the necessary stuff.
521          * 
522          * @param paramSession
523          *            {@link ISession} to Serialize
524          * @param paramStream
525          *            {@link OutputStream}
526          * @param paramVersions
527          *            version(s) to Serialize
528          */
529         public XMLSerializerBuilder(final ISession paramSession, final OutputStream paramStream,
530             final long... paramVersions) {
531             mNodeKey = 0;
532             mStream = paramStream;
533             mSession = paramSession;
534             mVersions = paramVersions;
535         }
536 
537         /**
538          * Constructor.
539          * 
540          * @param paramSession
541          *            {@link ISession}
542          * @param paramNodeKey
543          *            root node key of subtree to shredder
544          * @param paramStream
545          *            {@link OutputStream}
546          * @param paramProperties
547          *            {@link XMLSerializerProperties}
548          * @param paramVersions
549          *            version(s) to serialize
550          */
551         public XMLSerializerBuilder(final ISession paramSession, final long paramNodeKey,
552             final OutputStream paramStream, final XMLSerializerProperties paramProperties,
553             final long... paramVersions) {
554             mSession = paramSession;
555             mNodeKey = paramNodeKey;
556             mStream = paramStream;
557             mVersions = paramVersions;
558             final ConcurrentMap<?, ?> map = paramProperties.getmProps();
559             mIndent = (Boolean)map.get(S_INDENT[0]);
560             mREST = (Boolean)map.get(S_REST[0]);
561             mID = (Boolean)map.get(S_ID[0]);
562             mIndentSpaces = (Integer)map.get(S_INDENT_SPACES[0]);
563             mDeclaration = (Boolean)map.get(S_XMLDECL[0]);
564         }
565 
566         /**
567          * Setting the indention.
568          * 
569          * @param paramIndent
570          *            to set
571          * @return XMLSerializerBuilder reference.
572          */
573         public XMLSerializerBuilder setIndend(final boolean paramIndent) {
574             mIndent = paramIndent;
575             return this;
576         }
577 
578         /**
579          * Setting the RESTful output.
580          * 
581          * @param paramREST
582          *            to set
583          * @return XMLSerializerBuilder reference.
584          */
585         public XMLSerializerBuilder setREST(final boolean paramREST) {
586             mREST = paramREST;
587             return this;
588         }
589 
590         /**
591          * Setting the declaration.
592          * 
593          * @param paramDeclaration
594          *            to set
595          * @return XMLSerializerBuilder reference.
596          */
597         public XMLSerializerBuilder setDeclaration(final boolean paramDeclaration) {
598             mDeclaration = paramDeclaration;
599             return this;
600         }
601 
602         /**
603          * Setting the ids on nodes.
604          * 
605          * @param paramID
606          *            to set
607          * @return XMLSerializerBuilder reference.
608          */
609         public XMLSerializerBuilder setID(final boolean paramID) {
610             mID = paramID;
611             return this;
612         }
613 
614         /**
615          * Setting the ids on nodes.
616          * 
617          * @param paramVersions
618          *            to set
619          * @return XMLSerializerBuilder reference.
620          */
621         public XMLSerializerBuilder setVersions(final long[] paramVersions) {
622             mVersions = paramVersions;
623             return this;
624         }
625 
626         /**
627          * Building new Serializer.
628          * 
629          * @return a new instance
630          */
631         public XMLSerializer build() {
632             return new XMLSerializer(mSession, mNodeKey, this, mVersions);
633         }
634     }
635 
636 }