diff --git a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoIndexer.java b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoIndexer.java index 135b551e..71ded3cc 100644 --- a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoIndexer.java +++ b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoIndexer.java @@ -89,8 +89,12 @@ public void flush(Directory directory, String segmentName) throws IOException { boolean success = false; try { String fileName = config.getGeoFileName(segmentName); - geoRecordBTree = new GeoSegmentWriter(treeToFlush, directory, - fileName, geoSegmentInfo, geoRecordSerializer); + try { + geoRecordBTree = new GeoSegmentWriter(treeToFlush, directory, + fileName, geoSegmentInfo, geoRecordSerializer); + } catch (InvalidTreeSizeException e) { + throw new IOException(e); + } success = true; } finally { diff --git a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriter.java b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriter.java index da91045a..23242650 100644 --- a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriter.java +++ b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriter.java @@ -27,14 +27,13 @@ public class GeoSegmentWriter extends BTree implements IGeoRecordSerializer geoRecordSerializer; IndexOutput indexOutput; GeoSegmentInfo geoSegmentInfo; - int maxIndex; int arrayLength; long treeDataStart; public GeoSegmentWriter(Set tree, Directory directory, String fileName, GeoSegmentInfo geoSegmentInfo, IGeoRecordSerializer geoRecordSerializer) - throws IOException { + throws IOException, InvalidTreeSizeException { super(tree.size(), false); this.arrayLength = tree.size(); this.geoSegmentInfo = geoSegmentInfo; @@ -44,12 +43,13 @@ public GeoSegmentWriter(Set tree, Directory directory, String fileName, try { buildBTreeFromSet(tree); } catch (IOException e) { - close(); + indexOutput.close(); + throw e; } } public GeoSegmentWriter(int treeSize, Iterator inputIterator, Directory directory, String fileName, - GeoSegmentInfo geoSegmentInfo, IGeoRecordSerializer geoRecordSerializer) throws IOException { + GeoSegmentInfo geoSegmentInfo, IGeoRecordSerializer geoRecordSerializer) throws IOException, InvalidTreeSizeException { super(treeSize, false); this.arrayLength = treeSize; this.geoSegmentInfo = geoSegmentInfo; @@ -60,28 +60,33 @@ public GeoSegmentWriter(int treeSize, Iterator inputIterator, Directory direc try { buildBTreeFromIterator(inputIterator); } catch (IOException e) { - close(); + indexOutput.close(); + throw e; } } - private void buildBTreeFromIterator(Iterator geoIter) throws IOException { + private void buildBTreeFromIterator(Iterator geoIter) throws IOException, InvalidTreeSizeException { writeGeoInfo(); + int recordCount = 0; int index = getLeftMostLeafIndex(); ensureNotWritingPastEndOfFile(index); while (geoIter.hasNext()) { - setValueAtIndex(index, geoIter.next()); - index = getNextIndex(index); - - if(index >= this.arrayLength) { - throw new IllegalArgumentException("Tree only created for " + arrayLength + " nodes but iterator contains more than that"); + G nextValue = geoIter.next(); + if (index != -1) { + setValueAtIndex(index, nextValue); + index = getNextIndex(index); } + + recordCount++; } - maxIndex = index; + if (arrayLength != recordCount) { + throw new InvalidTreeSizeException(arrayLength, recordCount); + } } - private void buildBTreeFromSet(Set geoSet) throws IOException { + private void buildBTreeFromSet(Set geoSet) throws IOException, InvalidTreeSizeException { buildBTreeFromIterator(geoSet.iterator()); } @@ -171,9 +176,4 @@ public void close() throws IOException { indexOutput.close(); } - // Test if full binary tree. - public int getMaxIndex() { - return maxIndex; - } - } diff --git a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/InvalidTreeSizeException.java b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/InvalidTreeSizeException.java new file mode 100644 index 00000000..02c49aa9 --- /dev/null +++ b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/index/impl/InvalidTreeSizeException.java @@ -0,0 +1,36 @@ +package com.browseengine.bobo.geosearch.index.impl; + +/** + * + * This exception indicates that the size of the compact binary tree, + * does not match the number of elements actually written to it. + * + * @author Geoff Cooney + * + */ +public class InvalidTreeSizeException extends Exception { + private static final long serialVersionUID = -586521581062650233L; + private final int treeSize; + private final int recordSize; + + public InvalidTreeSizeException(int treeSize, int recordSize) { + super("Explicit tree size(" + treeSize + ") does not match the number of records attempted to be written to the tree(" + + recordSize + ")"); + this.treeSize = treeSize; + this.recordSize = recordSize; + } + + /** + * @return The explicit size of the tree that this exception was generated for + */ + public int getTreeSize() { + return treeSize; + } + + /** + * @return The actual number of records in the tree this exception was generated for + */ + public int getRecordSize() { + return recordSize; + } +} diff --git a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMerger.java b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMerger.java index ca2eec0a..f665397e 100644 --- a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMerger.java +++ b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMerger.java @@ -27,6 +27,7 @@ import com.browseengine.bobo.geosearch.impl.CartesianGeoRecordSerializer; import com.browseengine.bobo.geosearch.index.impl.GeoSegmentReader; import com.browseengine.bobo.geosearch.index.impl.GeoSegmentWriter; +import com.browseengine.bobo.geosearch.index.impl.InvalidTreeSizeException; import com.browseengine.bobo.geosearch.merge.IGeoMergeInfo; import com.browseengine.bobo.geosearch.merge.IGeoMerger; @@ -96,8 +97,9 @@ public void merge(IGeoMergeInfo geoMergeInfo, GeoSearchConfig config) throws IOE } int newSegmentSize = calculateMergedSegmentSize(deletedDocsList, mergeInputBTrees, geoConverter); + buildMergedSegmentWithRetry(mergeInputBTrees, deletedDocsList, newSegmentSize, + geoMergeInfo, config, fieldNameFilterConverter); - buildMergedSegment(mergeInputBTrees, deletedDocsList, newSegmentSize, geoMergeInfo, config, fieldNameFilterConverter); success = true; } finally { @@ -110,6 +112,23 @@ public void merge(IGeoMergeInfo geoMergeInfo, GeoSearchConfig config) throws IOE } } + protected void buildMergedSegmentWithRetry(List> mergeInputBTrees, List deletedDocsList, + int newSegmentSize, IGeoMergeInfo geoMergeInfo, GeoSearchConfig config, IFieldNameFilterConverter fieldNameFilterConverter) throws IOException { + try { + buildMergedSegment(mergeInputBTrees, deletedDocsList, newSegmentSize, geoMergeInfo, config, fieldNameFilterConverter); + } catch (InvalidTreeSizeException e) { + LOGGER.warn("Number of records does not match expected number of merged records. Attempting to repair.", e); + + newSegmentSize = e.getRecordSize(); + try { + buildMergedSegment(mergeInputBTrees, deletedDocsList, newSegmentSize, geoMergeInfo, config, fieldNameFilterConverter); + } catch (InvalidTreeSizeException e2) { + LOGGER.error("Unable to merge geo segments", e2); + throw new IOException(e2); + } + } + } + /** * * @param directory @@ -139,7 +158,7 @@ protected boolean loadFieldNameFilterConverter(Directory directory, String geoFi private void buildMergedSegment(List> mergeInputBTrees, List deletedDocsList, int newSegmentSize, IGeoMergeInfo geoMergeInfo, GeoSearchConfig config, - IFieldNameFilterConverter fieldNameFilterConverter) throws IOException { + IFieldNameFilterConverter fieldNameFilterConverter) throws IOException, InvalidTreeSizeException { Directory directory = geoMergeInfo.getDirectory(); IGeoConverter geoConverter = config.getGeoConverter(); @@ -176,7 +195,7 @@ private GeoSegmentInfo buildGeoSegmentInfo(String segmentName, IFieldNameFilterC } protected BTree getOutputBTree(int newSegmentSize, Iterator inputIterator, - Directory directory, String outputFileName, GeoSegmentInfo geoSegmentInfo) throws IOException { + Directory directory, String outputFileName, GeoSegmentInfo geoSegmentInfo) throws IOException, InvalidTreeSizeException { return new GeoSegmentWriter(newSegmentSize, inputIterator, directory, outputFileName, geoSegmentInfo, geoRecordSerializer); } diff --git a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/solo/index/impl/GeoOnlyIndexer.java b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/solo/index/impl/GeoOnlyIndexer.java index ac16c35b..fd6ce40c 100644 --- a/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/solo/index/impl/GeoOnlyIndexer.java +++ b/bobo-contrib/src/main/java/com/browseengine/bobo/geosearch/solo/index/impl/GeoOnlyIndexer.java @@ -22,6 +22,7 @@ import com.browseengine.bobo.geosearch.index.bo.GeoCoordinateField; import com.browseengine.bobo.geosearch.index.impl.GeoSegmentReader; import com.browseengine.bobo.geosearch.index.impl.GeoSegmentWriter; +import com.browseengine.bobo.geosearch.index.impl.InvalidTreeSizeException; import com.browseengine.bobo.geosearch.solo.bo.IDGeoRecord; import com.browseengine.bobo.geosearch.solo.bo.IndexTooLargeException; import com.browseengine.bobo.geosearch.solo.impl.IDGeoRecordComparator; @@ -123,9 +124,13 @@ private void flushInMemoryIndex() throws IOException { GeoSegmentWriter getGeoSegmentWriter(Set dataToFlush) throws IOException { String fileName = indexName + "." + config.getGeoFileExtension(); - return new GeoSegmentWriter( - dataToFlush, directory, fileName, - buildGeoSegmentInfo(indexName), geoRecordSerializer); + try { + return new GeoSegmentWriter( + dataToFlush, directory, fileName, + buildGeoSegmentInfo(indexName), geoRecordSerializer); + } catch (InvalidTreeSizeException e) { + throw new IOException(e); + } } private void loadCurrentIndex() throws IOException { diff --git a/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentReaderTest.java b/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentReaderTest.java index 98cc9608..2cadff02 100644 --- a/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentReaderTest.java +++ b/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentReaderTest.java @@ -42,7 +42,7 @@ public void test_fileNotFoundGivesZeroGeoRecords() throws Exception { } @Test - public void test_WriteThenRead() { + public void test_WriteThenRead() throws InvalidTreeSizeException { for(int i = 0; i < 100; i++) { try { @@ -60,8 +60,6 @@ public void test_WriteThenRead() { String fileName = geoSegmentInfo.getSegmentName() + "." + geoConf.getGeoFileExtension(); GeoSegmentWriter geoOut = new GeoSegmentWriter( tree, dir, fileName, geoSegmentInfo, geoRecordSerializer); - assertTrue("Not a full binary tree. ", - geoOut.getMaxIndex() < geoOut.getArrayLength()); geoOut.close(); GeoSegmentReader geoRand = @@ -77,7 +75,7 @@ public void test_WriteThenRead() { } @Test - public void test_WriteThenRead_V1() throws IOException { + public void test_WriteThenRead_V1() throws IOException, InvalidTreeSizeException { int len = 100; int idBytes = 16; @@ -105,8 +103,6 @@ public void test_WriteThenRead_V1() throws IOException { //write data GeoSegmentWriter geoOut = new GeoSegmentWriter( tree, dir, fileName, geoSegmentInfo, geoRecordSerializer); - assertTrue("Not a full binary tree. ", - geoOut.getMaxIndex() < geoOut.getArrayLength()); geoOut.close(); //read and verify data diff --git a/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriterTest.java b/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriterTest.java index d3644023..0e7fc74c 100644 --- a/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriterTest.java +++ b/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/index/impl/GeoSegmentWriterTest.java @@ -1,5 +1,8 @@ package com.browseengine.bobo.geosearch.index.impl; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + import java.io.IOException; import java.util.Comparator; import java.util.TreeSet; @@ -83,43 +86,77 @@ public void tearDown() { } @Test - public void createOutputBTree() throws IOException { + public void createOutputBTree() throws IOException, InvalidTreeSizeException { final int docsToAdd = 20; - - for (int i=0; i bTree = new GeoSegmentWriter(docsToAdd + 1, treeSet.iterator(), + directory, fileName, info, geoRecordSerializer); + fail("Expected InvalidTreeSizeException but it did not occur"); + } catch (InvalidTreeSizeException e) { + assertEquals("Expected tree size to be as specified", docsToAdd + 1, e.getTreeSize()); + assertEquals("Expected recordSize to equal records in iterator", docsToAdd, e.getRecordSize()); + } + } + + @Test + public void treeTooSmall() throws IOException { + final int docsToAdd = 20; + int version = GeoVersion.VERSION_0; + buildTreeSet(docsToAdd); + + + buildWriteExpectations(docsToAdd, docsToAdd - 1, version); + + info.setGeoVersion(version); + String fileName = config.getGeoFileName(info.getSegmentName()); + + try { + GeoSegmentWriter bTree = new GeoSegmentWriter(docsToAdd - 1, treeSet.iterator(), + directory, fileName, info, geoRecordSerializer); + fail("Expected InvalidTreeSizeException but it did not occur"); + } catch (InvalidTreeSizeException e) { + assertEquals("Expected tree size to be as specified", docsToAdd -1, e.getTreeSize()); + assertEquals("Expected recordSize to equal records in iterator", docsToAdd, e.getRecordSize()); + } + } + + private void buildTreeSet(int docsToAdd) { for (int i=0; i bTree = new GeoSegmentWriter(treeSet, directory, + fileName, info, geoRecordSerializer); + bTree.close(); + context.assertIsSatisfied(); + } + + private void buildWriteExpectations(final int docsToAdd, final int expectedDocs, final int version) throws IOException { final byte[] byteBuf = new byte[10]; final Class clazz = byteBuf.getClass(); @@ -156,7 +209,7 @@ private void doCreateAndTest(final int docsToAdd, final int version) throws IOEx inSequence(outputSequence); one(mockOutput).writeInt(0); inSequence(outputSequence); - one(mockOutput).writeVInt(docsToAdd); + one(mockOutput).writeVInt(expectedDocs); inSequence(outputSequence); if(version > GeoVersion.VERSION_0) { one(mockOutput).writeVInt(GeoSegmentInfo.BYTES_PER_RECORD_V1); @@ -185,24 +238,14 @@ private void doCreateAndTest(final int docsToAdd, final int version) throws IOEx inSequence(outputSequence); //write actual tree - for (int i = 0; i < docsToAdd; i++) { + for (int i = 0; i < Math.min(docsToAdd, expectedDocs); i++) { one(mockOutput).seek(with(any(Long.class))); inSequence(outputSequence); one(geoRecordSerializer).writeGeoRecord(with(mockOutput), with(any(CartesianGeoRecord.class)), with(any(Integer.class))); inSequence(outputSequence); } - - //close - one(mockOutput).close(); } }); - - info.setGeoVersion(version); - String fileName = config.getGeoFileName(info.getSegmentName()); - GeoSegmentWriter bTree = new GeoSegmentWriter(treeSet, directory, - fileName, info, geoRecordSerializer); - bTree.close(); - context.assertIsSatisfied(); } } diff --git a/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMergerTest.java b/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMergerTest.java index 36866823..dca4b656 100644 --- a/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMergerTest.java +++ b/bobo-contrib/src/test/java/com/browseengine/bobo/geosearch/merge/impl/BufferedGeoMergerTest.java @@ -3,6 +3,7 @@ import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -17,6 +18,7 @@ import org.apache.lucene.index.SegmentReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BitVector; import org.jmock.Expectations; import org.jmock.Mockery; import org.junit.Before; @@ -34,7 +36,11 @@ import com.browseengine.bobo.geosearch.bo.GeoSegmentInfo; import com.browseengine.bobo.geosearch.bo.LatitudeLongitudeDocId; import com.browseengine.bobo.geosearch.impl.BTree; +import com.browseengine.bobo.geosearch.impl.CartesianGeoRecordComparator; +import com.browseengine.bobo.geosearch.impl.CartesianGeoRecordSerializer; import com.browseengine.bobo.geosearch.impl.GeoRecordBTree; +import com.browseengine.bobo.geosearch.impl.MappedFieldNameFilterConverter; +import com.browseengine.bobo.geosearch.index.impl.GeoSegmentReader; import com.browseengine.bobo.geosearch.merge.IGeoMergeInfo; /** @@ -129,7 +135,7 @@ private void setUpMergeObjects(int[] docsPerSegment, int[] deletedDocsPerSegment segmentStart += (segmentSize - deletedDocs); } - String newSegmentName = "newSegment"; + newSegmentName = "newSegment"; newSegment = LuceneUtils.buildSegmentInfo(newSegmentName, segmentStart, 0, dir); } @@ -204,6 +210,10 @@ private void checkOutputTreeAgainstExpected() throws IOException { if (!isVerifyOutputTreeAgainstExpected) { return; } + checkTreeAgainstExpected(outputTree); + } + + private void checkTreeAgainstExpected(BTree outputTree) throws IOException { assertEquals("trees sould be equal in size", expectedOutputTree.size(), outputTree.getArrayLength()); Iterator outputIterator = outputTree.getIterator(CartesianGeoRecord.MIN_VALID_GEORECORD, CartesianGeoRecord.MAX_VALID_GEORECORD); Iterator expectedIterator = expectedOutputTree.iterator(); @@ -310,6 +320,8 @@ private void verifyMissingGeoFile() throws IOException { } private Set noGeoFileNames; + + private String newSegmentName; private void initNoGeoFile() { @@ -421,4 +433,40 @@ public void testMergeAndDelete_VariedSegmentSize_VariedDeletes() throws IOExcept doMerge(); } + + @Test + public void testIncorrectNewSegmentSize() throws IOException { + bufferedGeoMerger = new BufferedGeoMerger(); + + int[] docsPerSegment = new int[] {10, 10}; + int[] deletedDocsPerSegment = new int[] {0, 0}; + setUpMergeObjects(docsPerSegment, deletedDocsPerSegment); + + context.checking(new Expectations() { + { + ignoring(geoMergeInfo).getDirectory(); + will(returnValue(dir)); + + atLeast(1).of(geoMergeInfo).getNewSegment(); + will(returnValue(newSegment)); + } + }); + + + List> mergeInputBTrees = new ArrayList(inputTrees.values()); + List deletedDocsList = new ArrayList(); + deletedDocsList.add(new BitVector(10)); + deletedDocsList.add(new BitVector(10)); + + int newSegmentSize = 21; + GeoSearchConfig config = geoConfig; + MappedFieldNameFilterConverter fieldNameFilterConverter = new MappedFieldNameFilterConverter(); + fieldNameFilterConverter.addFieldBitMask("test", (byte) 1); + bufferedGeoMerger.buildMergedSegmentWithRetry(mergeInputBTrees, deletedDocsList, newSegmentSize, + geoMergeInfo, config, fieldNameFilterConverter); + + GeoSegmentReader myTree = new GeoSegmentReader(dir, newSegmentName + ".geo", 20, 4096, + new CartesianGeoRecordSerializer(), new CartesianGeoRecordComparator()); + checkTreeAgainstExpected(myTree); + } }