Skip to content

Commit d11ddb4

Browse files
authored
[HUDI-8077] Improve logging in clean planning (apache#11979)
1 parent d4aba1a commit d11ddb4

File tree

2 files changed

+29
-14
lines changed

2 files changed

+29
-14
lines changed

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java

+18-8
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,16 @@ HoodieCleanerPlan requestClean(HoodieEngineContext context) {
114114
LOG.info("Nothing to clean here. It is already clean");
115115
return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build();
116116
}
117-
LOG.info("Earliest commit to retain for clean : " + (earliestInstant.isPresent() ? earliestInstant.get().getTimestamp() : "null"));
118-
LOG.info("Total partitions to clean : " + partitionsToClean.size() + ", with policy " + config.getCleanerPolicy());
117+
LOG.info(
118+
"Earliest commit to retain for clean : {}",
119+
earliestInstant.isPresent() ? earliestInstant.get().getTimestamp() : "null");
120+
LOG.info(
121+
"Total partitions to clean : {}, with policy {}",
122+
partitionsToClean.size(),
123+
config.getCleanerPolicy());
119124
int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
120-
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
125+
LOG.info(
126+
"Using cleanerParallelism: {}", cleanerParallelism);
121127

122128
context.setJobStatus(this.getClass().getSimpleName(), "Generating list of file slices to be cleaned: " + config.getTableName());
123129

@@ -145,11 +151,15 @@ HoodieCleanerPlan requestClean(HoodieEngineContext context) {
145151
.collect(Collectors.toList()));
146152
}
147153

148-
return new HoodieCleanerPlan(earliestInstant
149-
.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null),
150-
planner.getLastCompletedCommitTimestamp(),
151-
config.getCleanerPolicy().name(), Collections.emptyMap(),
152-
CleanPlanner.LATEST_CLEAN_PLAN_VERSION, cleanOps, partitionsToDelete, prepareExtraMetadata(planner.getSavepointedTimestamps()));
154+
return new HoodieCleanerPlan(
155+
earliestInstant.map(x -> new HoodieActionInstant(x.getTimestamp(), x.getAction(), x.getState().name())).orElse(null),
156+
planner.getLastCompletedCommitTimestamp(), // Note: This is the start time of the last completed ingestion before this clean.
157+
config.getCleanerPolicy().name(),
158+
Collections.emptyMap(),
159+
CleanPlanner.LATEST_CLEAN_PLAN_VERSION,
160+
cleanOps,
161+
partitionsToDelete,
162+
prepareExtraMetadata(planner.getSavepointedTimestamps()));
153163
} catch (IOException e) {
154164
throw new HoodieIOException("Failed to schedule clean operation", e);
155165
}

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java

+11-6
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ private List<String> getPartitionPathsForCleanByCommits(Option<HoodieInstant> in
180180
HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
181181
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
182182
if ((cleanMetadata.getEarliestCommitToRetain() != null)
183-
&& (cleanMetadata.getEarliestCommitToRetain().length() > 0)
183+
&& !cleanMetadata.getEarliestCommitToRetain().trim().isEmpty()
184184
&& !hoodieTable.getActiveTimeline().getCommitsTimeline().isBeforeTimelineStarts(cleanMetadata.getEarliestCommitToRetain())) {
185185
return getPartitionPathsForIncrementalCleaning(cleanMetadata, instantToRetain);
186186
}
@@ -204,9 +204,11 @@ private List<String> getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata
204204
LOG.info("Since savepoints have been removed compared to previous clean, triggering clean planning for all partitions");
205205
return getPartitionPathsForFullCleaning();
206206
} else {
207-
LOG.info("Incremental Cleaning mode is enabled. Looking up partition-paths that have since changed "
208-
+ "since last cleaned at " + cleanMetadata.getEarliestCommitToRetain()
209-
+ ". New Instant to retain : " + newInstantToRetain);
207+
LOG.info(
208+
"Incremental Cleaning mode is enabled. Looking up partition-paths that have changed "
209+
+ "since last clean at {}. New Instant to retain {}.",
210+
cleanMetadata.getEarliestCommitToRetain(),
211+
newInstantToRetain);
210212

211213
return hoodieTable.getCompletedCommitsTimeline().getInstantsAsStream()
212214
.filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.GREATER_THAN_OR_EQUALS,
@@ -288,8 +290,11 @@ private boolean isFileSliceExistInSavepointedFiles(FileSlice fs, List<String> sa
288290
* single file (i.e., run it with versionsRetained = 1)
289291
*/
290292
private Pair<Boolean, List<CleanFileInfo>> getFilesToCleanKeepingLatestVersions(String partitionPath) {
291-
LOG.info("Cleaning " + partitionPath + ", retaining latest " + config.getCleanerFileVersionsRetained()
292-
+ " file versions. ");
293+
LOG.info(
294+
"Cleaning {}, retaining latest {} file versions.",
295+
partitionPath,
296+
config.getCleanerFileVersionsRetained());
297+
293298
List<CleanFileInfo> deletePaths = new ArrayList<>();
294299
// Collect all the datafiles savepointed by all the savepoints
295300
List<String> savepointedFiles = hoodieTable.getSavepointTimestamps().stream()

0 commit comments

Comments
 (0)