forked from datacleaner/DataCleaner
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue datacleaner#506: Big refactor of RowAnnotationFactor implementa…
…tions to get memory footprint under control. Introducing "max sets" flag.
- Loading branch information
1 parent
7fb50aa
commit 5f161aa
Showing
54 changed files
with
443 additions
and
2,162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
api/src/main/java/org/datacleaner/storage/RowAnnotationHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/** | ||
* DataCleaner (community edition) | ||
* Copyright (C) 2014 Neopost - Customer Information Management | ||
* | ||
* This copyrighted material is made available to anyone wishing to use, modify, | ||
* copy, or redistribute it subject to the terms and conditions of the GNU | ||
* Lesser General Public License, as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | ||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License | ||
* for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this distribution; if not, write to: | ||
* Free Software Foundation, Inc. | ||
* 51 Franklin Street, Fifth Floor | ||
* Boston, MA 02110-1301 USA | ||
*/ | ||
package org.datacleaner.storage; | ||
|
||
import org.datacleaner.api.InputRow; | ||
|
||
/** | ||
* Represents a component that is capable of connecting {@link RowAnnotation} to | ||
* {@link InputRow}s, typically to publish them via a | ||
* {@link RowAnnotationSampleContainer}. | ||
*/ | ||
public interface RowAnnotationHandler { | ||
|
||
/** | ||
* Annotates/labels a row with an annotation. The row will be sampled and | ||
* usually retrievable using the getRows(...) method later in the process. | ||
* | ||
* @param row | ||
* @param annotation | ||
*/ | ||
public void annotate(InputRow row, RowAnnotation annotation); | ||
|
||
public void annotate(InputRow row, int distinctCount, RowAnnotation annotation); | ||
|
||
/** | ||
* Transfers registered annotated rows from one annotation to the other. | ||
* | ||
* @param from | ||
* @param to | ||
*/ | ||
public void transferAnnotations(RowAnnotation from, RowAnnotation to); | ||
|
||
/** | ||
* Removes/resets all annotations of a specific kind. This method can be | ||
* used for situations where eg. an analyzer is annotating extreme values | ||
* (highest/lowest values etc.) and the threshold is changing, cancelling | ||
* all previous annotations. | ||
* | ||
* @param annotation | ||
*/ | ||
public void resetAnnotation(RowAnnotation annotation); | ||
} |
48 changes: 48 additions & 0 deletions
48
api/src/main/java/org/datacleaner/storage/RowAnnotationSampleContainer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/** | ||
* DataCleaner (community edition) | ||
* Copyright (C) 2014 Neopost - Customer Information Management | ||
* | ||
* This copyrighted material is made available to anyone wishing to use, modify, | ||
* copy, or redistribute it subject to the terms and conditions of the GNU | ||
* Lesser General Public License, as published by the Free Software Foundation. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | ||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License | ||
* for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this distribution; if not, write to: | ||
* Free Software Foundation, Inc. | ||
* 51 Franklin Street, Fifth Floor | ||
* Boston, MA 02110-1301 USA | ||
*/ | ||
package org.datacleaner.storage; | ||
|
||
import java.util.List; | ||
|
||
import org.datacleaner.api.InputRow; | ||
|
||
/** | ||
* A component for retrieving sample {@link InputRow}s that are annotated using | ||
* with a {@link RowAnnotation}. | ||
*/ | ||
public interface RowAnnotationSampleContainer { | ||
|
||
/** | ||
* Determines if there are sample rows available for a specific | ||
* {@link RowAnnotation}. | ||
* | ||
* @param annotation | ||
* @return | ||
*/ | ||
public boolean hasSampleRows(RowAnnotation annotation); | ||
|
||
/** | ||
* Gets all the available sample rows with a given annotation. | ||
* | ||
* @param annotation | ||
* @return | ||
*/ | ||
public List<InputRow> getSampleRows(RowAnnotation annotation); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.