1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.treetank.service.xml.serialize;
29
30 import static org.treetank.node.IConstants.ELEMENT;
31 import static org.treetank.node.IConstants.ROOT;
32 import static org.treetank.node.IConstants.TEXT;
33 import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_ID;
34 import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_INDENT;
35 import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_INDENT_SPACES;
36 import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_REST;
37 import static org.treetank.service.xml.serialize.XMLSerializerProperties.S_XMLDECL;
38
39 import java.io.BufferedOutputStream;
40 import java.io.File;
41 import java.io.FileOutputStream;
42 import java.io.IOException;
43 import java.io.OutputStream;
44 import java.io.UnsupportedEncodingException;
45 import java.util.Properties;
46 import java.util.concurrent.ConcurrentMap;
47
48 import org.treetank.access.Storage;
49 import org.treetank.access.conf.ConstructorProps;
50 import org.treetank.access.conf.ModuleSetter;
51 import org.treetank.access.conf.ResourceConfiguration;
52 import org.treetank.access.conf.SessionConfiguration;
53 import org.treetank.access.conf.StandardSettings;
54 import org.treetank.access.conf.StorageConfiguration;
55 import org.treetank.api.INodeReadTrx;
56 import org.treetank.api.ISession;
57 import org.treetank.api.IStorage;
58 import org.treetank.exception.TTIOException;
59 import org.treetank.io.IBackend.IBackendFactory;
60 import org.treetank.node.ElementNode;
61 import org.treetank.node.NodeMetaPageFactory;
62 import org.treetank.node.TreeNodeFactory;
63 import org.treetank.node.interfaces.INameNode;
64 import org.treetank.node.interfaces.IStructNode;
65 import org.treetank.revisioning.IRevisioning;
66
67 import com.google.inject.Guice;
68 import com.google.inject.Injector;
69
70
71
72
73
74
75
76
77
78
79 public final class XMLSerializer extends AbsSerializer {
80
81 enum ECharsForSerializing {
82
83
84 SPACE(new byte[] {
85 32
86 }),
87
88
89 OPEN(new byte[] {
90 60
91 }),
92
93
94 CLOSE(new byte[] {
95 62
96 }),
97
98
99 SLASH(new byte[] {
100 47
101 }),
102
103
104 EQUAL(new byte[] {
105 61
106 }),
107
108
109 QUOTE(new byte[] {
110 34
111 }),
112
113
114 EQUAL_QUOTE(EQUAL.getBytes(), QUOTE.getBytes()),
115
116
117 OPEN_SLASH(OPEN.getBytes(), SLASH.getBytes()),
118
119
120 SLASH_CLOSE(SLASH.getBytes(), CLOSE.getBytes()),
121
122
123 REST_PREFIX(SPACE.getBytes(), new byte[] {
124 114, 101, 115, 116, 58
125 }),
126
127
128 ID(new byte[] {
129 116, 116, 105, 100
130 }),
131
132
133 XMLNS(SPACE.getBytes(), new byte[] {
134 120, 109, 108, 110, 115
135 }, EQUAL.getBytes(), QUOTE.getBytes()),
136
137
138 XMLNS_COLON(SPACE.getBytes(), new byte[] {
139 120, 109, 108, 110, 115, 58
140 }),
141
142
143 NEWLINE(System.getProperty("line.separator").getBytes());
144
145
146 private final byte[] mBytes;
147
148
149
150
151
152
153
154 ECharsForSerializing(final byte[]... paramBytes) {
155 int index = 0;
156 for (final byte[] runner : paramBytes) {
157 index = index + runner.length;
158 }
159 this.mBytes = new byte[index];
160 index = 0;
161 for (final byte[] runner : paramBytes) {
162 System.arraycopy(runner, 0, mBytes, index, runner.length);
163 index = index + runner.length;
164 }
165 }
166
167
168
169
170
171
172 public byte[] getBytes() {
173 return mBytes;
174 }
175
176 }
177
178
179 private static final int ASCII_OFFSET = 48;
180
181
182 private static final long[] LONG_POWERS = {
183 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L, 1000000000L, 10000000000L,
184 100000000000L, 1000000000000L, 10000000000000L, 100000000000000L, 1000000000000000L,
185 10000000000000000L, 100000000000000000L, 1000000000000000000L
186 };
187
188
189 private final OutputStream mOut;
190
191
192 private final boolean mIndent;
193
194
195 private final boolean mSerializeXMLDeclaration;
196
197
198 private final boolean mSerializeRest;
199
200
201 private final boolean mSerializeId;
202
203
204 private final int mIndentSpaces;
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 private XMLSerializer(final ISession paramSession, final long paramNodeKey,
220 final XMLSerializerBuilder paramBuilder, final long... paramVersions) {
221 super(paramSession, paramNodeKey, paramVersions);
222 mOut = new BufferedOutputStream(paramBuilder.mStream, 4096);
223 mIndent = paramBuilder.mIndent;
224 mSerializeXMLDeclaration = paramBuilder.mDeclaration;
225 mSerializeRest = paramBuilder.mREST;
226 mSerializeId = paramBuilder.mID;
227 mIndentSpaces = paramBuilder.mIndentSpaces;
228 }
229
230
231
232
233
234
235 @Override
236 protected void emitStartElement(final INodeReadTrx paramRTX) throws TTIOException {
237 try {
238 switch (paramRTX.getNode().getKind()) {
239 case ROOT:
240 if (mIndent) {
241 mOut.write(ECharsForSerializing.NEWLINE.getBytes());
242 }
243 break;
244 case ELEMENT:
245
246 indent();
247 final INameNode namenode = (INameNode)paramRTX.getNode();
248 mOut.write(ECharsForSerializing.OPEN.getBytes());
249 mOut.write(paramRTX.nameForKey(namenode.getNameKey()).getBytes());
250 final long key = paramRTX.getNode().getDataKey();
251
252 for (int index = 0, length = ((ElementNode)namenode).getNamespaceCount(); index < length; index++) {
253 paramRTX.moveToNamespace(index);
254 if (paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()).length() == 0) {
255 mOut.write(ECharsForSerializing.XMLNS.getBytes());
256 write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getURIKey()));
257 mOut.write(ECharsForSerializing.QUOTE.getBytes());
258 } else {
259 mOut.write(ECharsForSerializing.XMLNS_COLON.getBytes());
260 write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()));
261 mOut.write(ECharsForSerializing.EQUAL_QUOTE.getBytes());
262 write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getURIKey()));
263 mOut.write(ECharsForSerializing.QUOTE.getBytes());
264 }
265 paramRTX.moveTo(key);
266 }
267
268
269 if (mSerializeId) {
270 if (mSerializeRest) {
271 mOut.write(ECharsForSerializing.REST_PREFIX.getBytes());
272 } else {
273 mOut.write(ECharsForSerializing.SPACE.getBytes());
274 }
275 mOut.write(ECharsForSerializing.ID.getBytes());
276 mOut.write(ECharsForSerializing.EQUAL_QUOTE.getBytes());
277 write(paramRTX.getNode().getDataKey());
278 mOut.write(ECharsForSerializing.QUOTE.getBytes());
279 }
280
281
282 for (int index = 0; index < ((ElementNode)paramRTX.getNode()).getAttributeCount(); index++) {
283 paramRTX.moveToAttribute(index);
284 mOut.write(ECharsForSerializing.SPACE.getBytes());
285 mOut.write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()).getBytes());
286 mOut.write(ECharsForSerializing.EQUAL_QUOTE.getBytes());
287 mOut.write(paramRTX.getValueOfCurrentNode().getBytes());
288 mOut.write(ECharsForSerializing.QUOTE.getBytes());
289 paramRTX.moveTo(key);
290 }
291 if (((IStructNode)paramRTX.getNode()).hasFirstChild()) {
292 mOut.write(ECharsForSerializing.CLOSE.getBytes());
293 } else {
294 mOut.write(ECharsForSerializing.SLASH_CLOSE.getBytes());
295 }
296 if (mIndent) {
297 mOut.write(ECharsForSerializing.NEWLINE.getBytes());
298 }
299 break;
300 case TEXT:
301 indent();
302 mOut.write(paramRTX.getValueOfCurrentNode().getBytes());
303 if (mIndent) {
304 mOut.write(ECharsForSerializing.NEWLINE.getBytes());
305 }
306 break;
307 }
308 } catch (final IOException exc) {
309 exc.printStackTrace();
310 }
311 }
312
313
314
315
316
317
318
319 @Override
320 protected void emitEndElement(final INodeReadTrx paramRTX) {
321 try {
322 indent();
323 mOut.write(ECharsForSerializing.OPEN_SLASH.getBytes());
324 mOut.write(paramRTX.nameForKey(((INameNode)paramRTX.getNode()).getNameKey()).getBytes());
325 mOut.write(ECharsForSerializing.CLOSE.getBytes());
326 if (mIndent) {
327 mOut.write(ECharsForSerializing.NEWLINE.getBytes());
328 }
329 } catch (final IOException exc) {
330 exc.printStackTrace();
331 }
332 }
333
334
335 @Override
336 protected void emitStartDocument() {
337 try {
338 if (mSerializeXMLDeclaration) {
339 write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
340 }
341 if (mSerializeRest) {
342 write("<rest:sequence xmlns:rest=\"REST\"><rest:item>");
343 }
344 } catch (final IOException exc) {
345 exc.printStackTrace();
346 }
347 }
348
349
350 @Override
351 protected void emitEndDocument() {
352 try {
353 if (mSerializeRest) {
354 write("</rest:item></rest:sequence>");
355 }
356 mOut.flush();
357 } catch (final IOException exc) {
358 exc.printStackTrace();
359 }
360
361 }
362
363
364 @Override
365 protected void emitStartManualElement(final long mVersion) {
366 try {
367 write("<tt revision=\"");
368 write(Long.toString(mVersion));
369 write("\">");
370 } catch (final IOException exc) {
371 exc.printStackTrace();
372 }
373
374 }
375
376
377 @Override
378 protected void emitEndManualElement(final long mVersion) {
379 try {
380 write("</tt>");
381 } catch (final IOException exc) {
382 exc.printStackTrace();
383 }
384 }
385
386
387
388
389
390
391
392 private void indent() throws IOException {
393 if (mIndent) {
394 for (int i = 0; i < mStack.size() * mIndentSpaces; i++) {
395 mOut.write(" ".getBytes());
396 }
397 }
398 }
399
400
401
402
403
404
405
406
407
408
409
410 protected void write(final String mString) throws UnsupportedEncodingException, IOException {
411 mOut.write(mString.getBytes("UTF-8"));
412 }
413
414
415
416
417
418
419
420
421
422 private void write(final long mValue) throws IOException {
423 final int length = (int)Math.log10((double)mValue);
424 int digit = 0;
425 long remainder = mValue;
426 for (int i = length; i >= 0; i--) {
427 digit = (byte)(remainder / LONG_POWERS[i]);
428 mOut.write((byte)(digit + ASCII_OFFSET));
429 remainder -= digit * LONG_POWERS[i];
430 }
431 }
432
433
434
435
436
437
438
439
440
441
442 public static void main(final String... args) throws Exception {
443 if (args.length < 2 || args.length > 3) {
444 System.out.println("Usage: XMLSerializer input-TT output.xml");
445 System.exit(1);
446 }
447
448 System.out.print("Serializing '" + args[0] + "' to '" + args[1] + "' ... ");
449 final long time = System.currentTimeMillis();
450
451 Injector injector = Guice.createInjector(new ModuleSetter().setDataFacClass(TreeNodeFactory.class).setMetaFacClass(NodeMetaPageFactory.class).createModule());
452 IBackendFactory storage = injector.getInstance(IBackendFactory.class);
453 IRevisioning revision = injector.getInstance(IRevisioning.class);
454
455 final File target = new File(args[1]);
456 target.delete();
457 final FileOutputStream outputStream = new FileOutputStream(target);
458
459 final StorageConfiguration config = new StorageConfiguration(new File(args[0]));
460 Storage.createStorage(config);
461 final IStorage db = Storage.openStorage(new File(args[0]));
462 Properties props = new Properties();
463 props.setProperty(ConstructorProps.STORAGEPATH, target.getAbsolutePath());
464 props.setProperty(ConstructorProps.RESOURCE, "shredded");
465 db.createResource(new ResourceConfiguration(props, storage, revision, new TreeNodeFactory(),new NodeMetaPageFactory()));
466 final ISession session = db.getSession(new SessionConfiguration("shredded", StandardSettings.KEY));
467
468 final XMLSerializer serializer = new XMLSerializerBuilder(session, outputStream).build();
469 serializer.call();
470
471 session.close();
472 outputStream.close();
473 db.close();
474
475 System.out.println(" done [" + (System.currentTimeMillis() - time) + "ms].");
476 }
477
478
479
480
481 public static final class XMLSerializerBuilder {
482
483
484
485 private transient boolean mIndent;
486
487
488
489
490 private transient boolean mREST;
491
492
493
494
495 private transient boolean mDeclaration = true;
496
497
498
499
500 private transient boolean mID;
501
502
503
504
505 private transient int mIndentSpaces = 2;
506
507
508 private final OutputStream mStream;
509
510
511 private final ISession mSession;
512
513
514 private transient long[] mVersions;
515
516
517 private final long mNodeKey;
518
519
520
521
522
523
524
525
526
527
528
529 public XMLSerializerBuilder(final ISession paramSession, final OutputStream paramStream,
530 final long... paramVersions) {
531 mNodeKey = 0;
532 mStream = paramStream;
533 mSession = paramSession;
534 mVersions = paramVersions;
535 }
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551 public XMLSerializerBuilder(final ISession paramSession, final long paramNodeKey,
552 final OutputStream paramStream, final XMLSerializerProperties paramProperties,
553 final long... paramVersions) {
554 mSession = paramSession;
555 mNodeKey = paramNodeKey;
556 mStream = paramStream;
557 mVersions = paramVersions;
558 final ConcurrentMap<?, ?> map = paramProperties.getmProps();
559 mIndent = (Boolean)map.get(S_INDENT[0]);
560 mREST = (Boolean)map.get(S_REST[0]);
561 mID = (Boolean)map.get(S_ID[0]);
562 mIndentSpaces = (Integer)map.get(S_INDENT_SPACES[0]);
563 mDeclaration = (Boolean)map.get(S_XMLDECL[0]);
564 }
565
566
567
568
569
570
571
572
573 public XMLSerializerBuilder setIndend(final boolean paramIndent) {
574 mIndent = paramIndent;
575 return this;
576 }
577
578
579
580
581
582
583
584
585 public XMLSerializerBuilder setREST(final boolean paramREST) {
586 mREST = paramREST;
587 return this;
588 }
589
590
591
592
593
594
595
596
597 public XMLSerializerBuilder setDeclaration(final boolean paramDeclaration) {
598 mDeclaration = paramDeclaration;
599 return this;
600 }
601
602
603
604
605
606
607
608
609 public XMLSerializerBuilder setID(final boolean paramID) {
610 mID = paramID;
611 return this;
612 }
613
614
615
616
617
618
619
620
621 public XMLSerializerBuilder setVersions(final long[] paramVersions) {
622 mVersions = paramVersions;
623 return this;
624 }
625
626
627
628
629
630
631 public XMLSerializer build() {
632 return new XMLSerializer(mSession, mNodeKey, this, mVersions);
633 }
634 }
635
636 }