View Javadoc

1   /**
2    * Copyright (c) 2011, University of Konstanz, Distributed Systems Group
3    * All rights reserved.
4    * 
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions are met:
7    * * Redistributions of source code must retain the above copyright
8    * notice, this list of conditions and the following disclaimer.
9    * * Redistributions in binary form must reproduce the above copyright
10   * notice, this list of conditions and the following disclaimer in the
11   * documentation and/or other materials provided with the distribution.
12   * * Neither the name of the University of Konstanz nor the
13   * names of its contributors may be used to endorse or promote products
14   * derived from this software without specific prior written permission.
15   * 
16   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18   * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20   * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26   */
27  
28  package org.treetank.service.xml.diff;
29  
30  import static org.treetank.node.IConstants.ELEMENT;
31  import static org.treetank.node.IConstants.ROOT;
32  import static org.treetank.node.IConstants.ROOT_NODE;
33  import static org.treetank.node.IConstants.TEXT;
34  
35  import javax.xml.namespace.QName;
36  
37  import org.treetank.access.NodeReadTrx;
38  import org.treetank.access.NodeWriteTrx.HashKind;
39  import org.treetank.api.INodeReadTrx;
40  import org.treetank.exception.TTException;
41  import org.treetank.exception.TTIOException;
42  import org.treetank.node.interfaces.IStructNode;
43  import org.treetank.service.xml.diff.DiffFactory.Builder;
44  import org.treetank.service.xml.diff.DiffFactory.EDiff;
45  import org.treetank.service.xml.diff.DiffFactory.EDiffOptimized;
46  
47  /**
48   * Abstract diff class which implements common functionality.
49   * 
50   * @author Johannes Lichtenberger, University of Konstanz
51   * 
52   */
53  abstract class AbsDiff extends AbsDiffObservable {
54  
55      /** Determines if a diff should be fired or not. */
56      enum EFireDiff {
57          /** Yes, it should be fired. */
58          TRUE,
59  
60          /** No, it shouldn't be fired. */
61          FALSE
62      }
63  
64      /** Determines the current revision. */
65      enum ERevision {
66  
67          /** Old revision. */
68          OLD,
69  
70          /** New revision. */
71          NEW;
72      }
73  
74      /**
75       * Kind of hash method.
76       * 
77       * @see HashKind
78       */
79      transient HashKind mHashKind;
80  
81      /**
82       * Kind of difference.
83       * 
84       * @see EDiff
85       */
86      private transient EDiff mDiff;
87  
88      /** Diff kind. */
89      private transient EDiffOptimized mDiffKind;
90  
91      /** {@link DepthCounter} instance. */
92      private transient DepthCounter mDepth;
93  
94      /** Key of "root" node in new revision. */
95      private transient long mRootKey;
96  
97      /**
98       * Constructor.
99       * 
100      * @param paramBuilder
101      *            {@link Builder} reference
102      * @throws TTException
103      *             if setting up transactions failes
104      */
105     AbsDiff(final Builder paramBuilder) throws TTException {
106         assert paramBuilder != null;
107 
108         mDiffKind = paramBuilder.mKind;
109         synchronized (paramBuilder.mSession) {
110             mNewRtx = new NodeReadTrx(paramBuilder.mSession.beginBucketRtx(paramBuilder.mNewRev));
111             mOldRtx = new NodeReadTrx(paramBuilder.mSession.beginBucketRtx(paramBuilder.mOldRev));
112             mHashKind = HashKind.Postorder;
113         }
114         mNewRtx.moveTo(paramBuilder.mKey);
115         mOldRtx.moveTo(paramBuilder.mKey);
116         mRootKey = paramBuilder.mKey;
117 
118         synchronized (paramBuilder.mObservers) {
119             for (final IDiffObserver observer : paramBuilder.mObservers) {
120                 addObserver(observer);
121             }
122         }
123         mDiff = EDiff.SAME;
124         mDiffKind = paramBuilder.mKind;
125         mDepth = new DepthCounter(paramBuilder.mNewDepth, paramBuilder.mOldDepth);
126         // System.out.println("NEW REV: " + paramBuilder.mNewRev + " new rev: "
127         // +
128         // mNewRtx.getRevisionNumber());
129     }
130 
131     /**
132      * Do the diff.
133      * 
134      * @throws TTException
135      *             if setting up transactions failes
136      */
137     void diffMovement() throws TTException {
138         assert mHashKind != null;
139         assert mNewRtx != null;
140         assert mOldRtx != null;
141         assert mDiff != null;
142         assert mDiffKind != null;
143 
144         // Check first nodes.
145         if (mNewRtx.getNode().getKind() != ROOT) {
146             if (mHashKind == HashKind.None || mDiffKind == EDiffOptimized.NO) {
147                 mDiff = diff(mNewRtx, mOldRtx, mDepth, EFireDiff.TRUE);
148             } else {
149                 mDiff = optimizedDiff(mNewRtx, mOldRtx, mDepth, EFireDiff.TRUE);
150             }
151         }
152 
153         // Iterate over new revision.
154         while ((mOldRtx.getNode().getKind() != ROOT && mDiff == EDiff.DELETED)
155             || moveCursor(mNewRtx, ERevision.NEW)) {
156             if (mDiff != EDiff.INSERTED) {
157                 moveCursor(mOldRtx, ERevision.OLD);
158             }
159 
160             if (mNewRtx.getNode().getKind() != ROOT || mOldRtx.getNode().getKind() != ROOT) {
161                 if (mHashKind == HashKind.None || mDiffKind == EDiffOptimized.NO) {
162                     mDiff = diff(mNewRtx, mOldRtx, mDepth, EFireDiff.TRUE);
163                 } else {
164                     mDiff = optimizedDiff(mNewRtx, mOldRtx, mDepth, EFireDiff.TRUE);
165                 }
166             }
167         }
168 
169         // Nodes deleted in old rev at the end of the tree.
170         if (mOldRtx.getNode().getKind() != ROOT) {
171             // First time it might be EDiff.INSERTED where the cursor doesn't
172             // move.
173             while (mDiff == EDiff.INSERTED || moveCursor(mOldRtx, ERevision.OLD)) {
174                 if (mHashKind == HashKind.None || mDiffKind == EDiffOptimized.NO) {
175                     mDiff = diff(mNewRtx, mOldRtx, mDepth, EFireDiff.TRUE);
176                 } else {
177                     mDiff = optimizedDiff(mNewRtx, mOldRtx, mDepth, EFireDiff.TRUE);
178                 }
179             }
180         }
181 
182         done();
183     }
184 
185     /**
186      * Move cursor one node forward in pre order.
187      * 
188      * @param paramRtx
189      *            the {@link IReadTransaction} to use
190      * @param paramRevision
191      *            the {@link ERevision} constant
192      * @return true, if cursor moved, false otherwise
193      * @throws TTIOException
194      */
195     boolean moveCursor(final INodeReadTrx paramRtx, final ERevision paramRevision) throws TTIOException {
196         assert paramRtx != null;
197 
198         boolean moved = false;
199 
200         final IStructNode node = ((IStructNode)paramRtx.getNode());
201         if (node.hasFirstChild()) {
202             if (node.getKind() != ROOT && mDiffKind == EDiffOptimized.HASHED && mHashKind != HashKind.None
203                 && (mDiff == EDiff.SAMEHASH || mDiff == EDiff.DELETED)) {
204                 moved = paramRtx.moveTo(((IStructNode)paramRtx.getNode()).getRightSiblingKey());
205 
206                 if (!moved) {
207                     moved = moveToFollowingNode(paramRtx, paramRevision);
208                 }
209             } else {
210                 moved = paramRtx.moveTo(((IStructNode)paramRtx.getNode()).getFirstChildKey());
211                 if (moved) {
212                     switch (paramRevision) {
213                     case NEW:
214                         mDepth.incrementNewDepth();
215                         break;
216                     case OLD:
217                         mDepth.incrementOldDepth();
218                         break;
219                     }
220                 }
221             }
222         } else if (node.hasRightSibling()) {
223             if (paramRtx.getNode().getDataKey() == mRootKey) {
224                 paramRtx.moveTo(ROOT_NODE);
225             } else {
226                 moved = paramRtx.moveTo(((IStructNode)paramRtx.getNode()).getRightSiblingKey());
227             }
228         } else {
229             moved = moveToFollowingNode(paramRtx, paramRevision);
230         }
231 
232         return moved;
233     }
234 
235     /**
236      * Move to next following node.
237      * 
238      * @param paramRtx
239      *            the {@link IReadTransaction} to use
240      * @param paramRevision
241      *            the {@link ERevision} constant
242      * @return true, if cursor moved, false otherwise
243      * @throws TTIOException
244      */
245     private boolean moveToFollowingNode(final INodeReadTrx paramRtx, final ERevision paramRevision)
246         throws TTIOException {
247         boolean moved = false;
248         while (!((IStructNode)paramRtx.getNode()).hasRightSibling()
249             && ((IStructNode)paramRtx.getNode()).hasParent() && paramRtx.getNode().getDataKey() != mRootKey) {
250             moved = paramRtx.moveTo(paramRtx.getNode().getParentKey());
251             if (moved) {
252                 switch (paramRevision) {
253                 case NEW:
254                     mDepth.decrementNewDepth();
255                     break;
256                 case OLD:
257                     mDepth.decrementOldDepth();
258                     break;
259                 }
260             }
261         }
262 
263         if (paramRtx.getNode().getDataKey() == mRootKey) {
264             paramRtx.moveTo(ROOT_NODE);
265         }
266 
267         moved = paramRtx.moveTo(((IStructNode)paramRtx.getNode()).getRightSiblingKey());
268         return moved;
269     }
270 
271     /**
272      * Diff of nodes.
273      * 
274      * @param paramNewRtx
275      *            {@link IReadTransaction} on new revision
276      * @param paramOldRtx
277      *            {@link IReadTransaction} on old revision
278      * @param paramDepth
279      *            {@link DepthCounter} container for current depths of both
280      *            transaction cursors
281      * @param paramFireDiff
282      *            determines if a diff should be fired
283      * @return kind of difference
284      * @throws TTIOException
285      */
286     EDiff diff(final INodeReadTrx paramNewRtx, final INodeReadTrx paramOldRtx, final DepthCounter paramDepth,
287         final EFireDiff paramFireDiff) throws TTIOException {
288         assert paramNewRtx != null;
289         assert paramOldRtx != null;
290         assert paramDepth != null;
291 
292         EDiff diff = EDiff.SAME;
293 
294         // Check for modifications.
295         switch (paramNewRtx.getNode().getKind()) {
296         case ROOT:
297         case TEXT:
298         case ELEMENT:
299             if (!checkNodes(paramNewRtx, paramOldRtx)) {
300                 diff = diffAlgorithm(paramNewRtx, paramOldRtx, paramDepth);
301             }
302             break;
303         default:
304             // Do nothing.
305         }
306 
307         if (paramFireDiff == EFireDiff.TRUE) {
308             fireDiff(diff, ((IStructNode)paramNewRtx.getNode()), ((IStructNode)paramOldRtx.getNode()),
309                 new DiffDepth(paramDepth.getNewDepth(), paramDepth.getOldDepth()));
310         }
311         return diff;
312     }
313 
314     /**
315      * Optimized diff, which skips unnecessary comparsions.
316      * 
317      * @param paramNewRtx
318      *            {@link IReadTransaction} on new revision
319      * @param paramOldRtx
320      *            {@link IReadTransaction} on old revision
321      * @param paramDepth
322      *            {@link DepthCounter} container for current depths of both
323      *            transaction cursors
324      * @param paramFireDiff
325      *            determines if a diff should be fired
326      * @return kind of difference
327      * @throws TTIOException
328      */
329     EDiff optimizedDiff(final INodeReadTrx paramNewRtx, final INodeReadTrx paramOldRtx,
330         final DepthCounter paramDepth, final EFireDiff paramFireDiff) throws TTIOException {
331         assert paramNewRtx != null;
332         assert paramOldRtx != null;
333         assert paramDepth != null;
334 
335         EDiff diff = EDiff.SAMEHASH;
336 
337         // Check for modifications.
338         switch (paramNewRtx.getNode().getKind()) {
339         case ROOT:
340         case TEXT:
341         case ELEMENT:
342             if (paramNewRtx.getNode().getDataKey() != paramOldRtx.getNode().getDataKey()
343                 || paramNewRtx.getNode().getHash() != paramOldRtx.getNode().getHash()) {
344                 // Check if nodes are the same (even if subtrees may vary).
345                 if (checkNodes(paramNewRtx, paramOldRtx)) {
346                     diff = EDiff.SAME;
347                 } else {
348                     diff = diffAlgorithm(paramNewRtx, paramOldRtx, paramDepth);
349                 }
350             }
351             break;
352         default:
353             // Do nothing.
354         }
355 
356         if (paramFireDiff == EFireDiff.TRUE) {
357             if (diff == EDiff.SAMEHASH) {
358                 fireDiff(EDiff.SAME, ((IStructNode)paramNewRtx.getNode()), ((IStructNode)paramOldRtx
359                     .getNode()), new DiffDepth(paramDepth.getNewDepth(), paramDepth.getOldDepth()));
360             } else {
361                 fireDiff(diff, ((IStructNode)paramNewRtx.getNode()), ((IStructNode)paramOldRtx.getNode()),
362                     new DiffDepth(paramDepth.getNewDepth(), paramDepth.getOldDepth()));
363             }
364         }
365         return diff;
366     }
367 
368     /**
369      * Main algorithm to compute diffs between two nodes.
370      * 
371      * @param paramNewRtx
372      *            {@link IReadTransaction} on new revision
373      * @param paramOldRtx
374      *            {@link IReadTransaction} on old revision
375      * @param paramDepth
376      *            {@link DepthCounter} container for current depths of both
377      *            transaction cursors
378      * @return kind of diff
379      * @throws TTIOException
380      */
381     private EDiff diffAlgorithm(final INodeReadTrx paramNewRtx, final INodeReadTrx paramOldRtx,
382         final DepthCounter paramDepth) throws TTIOException {
383         EDiff diff = null;
384 
385         // Check if node has been deleted.
386         if (paramDepth.getOldDepth() > paramDepth.getNewDepth()) {
387             diff = EDiff.DELETED;
388         } else if (checkUpdate(paramNewRtx, paramOldRtx)) { // Check if node has
389                                                             // been updated.
390             diff = EDiff.UPDATED;
391         } else {
392             // See if one of the right sibling matches.
393             EFoundEqualNode found = EFoundEqualNode.FALSE;
394             final long key = paramOldRtx.getNode().getDataKey();
395 
396             while (((IStructNode)paramOldRtx.getNode()).hasRightSibling()
397                 && paramOldRtx.moveTo(((IStructNode)paramOldRtx.getNode()).getRightSiblingKey())
398                 && found == EFoundEqualNode.FALSE) {
399                 if (checkNodes(paramNewRtx, paramOldRtx)) {
400                     found = EFoundEqualNode.TRUE;
401                 }
402             }
403 
404             paramOldRtx.moveTo(key);
405             diff = found.kindOfDiff();
406         }
407 
408         assert diff != null;
409         return diff;
410     }
411 
412     /**
413      * Check {@link QName} of nodes.
414      * 
415      * @param paramNewRtx
416      *            {@link IReadTransaction} on new revision
417      * @param paramOldRtx
418      *            {@link IReadTransaction} on old revision
419      * @return true if nodes are "equal" according to their {@link QName}s,
420      *         otherwise false
421      */
422     boolean checkName(final INodeReadTrx paramNewRtx, final INodeReadTrx paramOldRtx) {
423         assert paramNewRtx != null;
424         assert paramOldRtx != null;
425         boolean found = false;
426         if (paramNewRtx.getNode().getKind() == paramOldRtx.getNode().getKind()) {
427             switch (paramNewRtx.getNode().getKind()) {
428             case ELEMENT:
429                 if (paramNewRtx.getQNameOfCurrentNode().equals(paramOldRtx.getQNameOfCurrentNode())) {
430                     found = true;
431                 }
432                 break;
433             case TEXT:
434                 if (paramNewRtx.getValueOfCurrentNode().equals(paramOldRtx.getValueOfCurrentNode())) {
435                     found = true;
436                 }
437                 break;
438             default:
439             }
440         }
441         return found;
442     }
443 
444     /**
445      * Check if nodes are equal excluding subtrees.
446      * 
447      * @param paramNewRtx
448      *            {@link IReadTransaction} on new revision
449      * @param paramOldRtx
450      *            {@link IReadTransaction} on old revision
451      * @return true if nodes are "equal", otherwise false
452      */
453     abstract boolean checkNodes(final INodeReadTrx paramNewRtx, final INodeReadTrx paramOldRtx)
454         throws TTIOException;
455 
456     /**
457      * Check for an update of a node.
458      * 
459      * @param paramNewRtx
460      *            first {@link IReadTransaction} instance
461      * @param paramOldRtx
462      *            second {@link IReadTransaction} instance
463      * @return kind of diff
464      * @throws TTIOException
465      */
466     boolean checkUpdate(final INodeReadTrx paramNewRtx, final INodeReadTrx paramOldRtx) throws TTIOException {
467         assert paramNewRtx != null;
468         assert paramOldRtx != null;
469         boolean updated = false;
470         final long newKey = paramNewRtx.getNode().getDataKey();
471         boolean movedNewRtx = paramNewRtx.moveTo(((IStructNode)paramNewRtx.getNode()).getRightSiblingKey());
472         final long oldKey = paramOldRtx.getNode().getDataKey();
473         boolean movedOldRtx = paramOldRtx.moveTo(((IStructNode)paramOldRtx.getNode()).getRightSiblingKey());
474         if (movedNewRtx && movedOldRtx && checkNodes(paramNewRtx, paramOldRtx)) {
475             updated = true;
476         } else if (!movedNewRtx && !movedOldRtx) {
477             movedNewRtx = paramNewRtx.moveTo(paramNewRtx.getNode().getParentKey());
478             movedOldRtx = paramOldRtx.moveTo(paramOldRtx.getNode().getParentKey());
479 
480             if (movedNewRtx && movedOldRtx && checkNodes(paramNewRtx, paramOldRtx)) {
481                 updated = true;
482             }
483         }
484         paramNewRtx.moveTo(newKey);
485         paramOldRtx.moveTo(oldKey);
486         if (!updated) {
487             updated = paramNewRtx.getNode().getDataKey() == paramOldRtx.getNode().getDataKey();
488         }
489         return updated;
490     }
491 }