Repository: drill
Updated Branches:
refs/heads/master ed2f1ca8e -> 7bfcb40a0
DRILL-4380: Fix performance regression: in creation of FileSelection in ParquetFormatPlugin
to not set files if metadata cache is available. This closes #369
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/7bfcb40a
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/7bfcb40a
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/7bfcb40a
Branch: refs/heads/master
Commit: 7bfcb40a0ffa49a1ed27e1ff1f57378aa1136bbd
Parents: ed2f1ca
Author: Parth Chandra <parthc@apache.org>
Authored: Thu Dec 17 16:30:42 2015 -0800
Committer: Parth Chandra <parthc@apache.org>
Committed: Tue Feb 9 14:23:39 2016 -0800
----------------------------------------------------------------------
.../drill/exec/store/dfs/FileSelection.java | 21 +++++++++++++++++---
.../exec/store/parquet/ParquetFormatPlugin.java | 2 +-
2 files changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/7bfcb40a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index bc3cef3..1d79dfb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -20,11 +20,13 @@ package org.apache.drill.exec.store.dfs;
import java.io.IOException;
import java.net.URI;
import java.util.List;
+import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
+import com.google.common.base.Stopwatch;
import com.google.common.base.Strings;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
@@ -73,13 +75,18 @@ public class FileSelection {
}
public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException
{
- if (statuses == null) {
+ Stopwatch timer = Stopwatch.createStarted();
+
+ if (statuses == null) {
final List<FileStatus> newStatuses = Lists.newArrayList();
for (final String pathStr:files) {
newStatuses.add(fs.getFileStatus(new Path(pathStr)));
}
statuses = newStatuses;
}
+ logger.debug("FileSelection.getStatuses() took {} ms, numFiles: {}",
+ timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
+
return statuses;
}
@@ -104,6 +111,7 @@ public class FileSelection {
}
public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
+ Stopwatch timer = Stopwatch.createStarted();
final List<FileStatus> statuses = getStatuses(fs);
final int total = statuses.size();
final Path[] paths = new Path[total];
@@ -118,7 +126,10 @@ public class FileSelection {
}
}));
- return create(nonDirectories, null, selectionRoot);
+ final FileSelection fileSel = create(nonDirectories, null, selectionRoot);
+ logger.debug("FileSelection.minusDirectories() took {} ms, numFiles: {}",
+ timer.elapsed(TimeUnit.MILLISECONDS), total);
+ return fileSel;
}
public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
@@ -183,12 +194,16 @@ public class FileSelection {
}
public static FileSelection create(final DrillFileSystem fs, final String parent, final
String path) throws IOException {
+ Stopwatch timer = Stopwatch.createStarted();
final Path combined = new Path(parent, removeLeadingSlash(path));
final FileStatus[] statuses = fs.globStatus(combined);
if (statuses == null) {
return null;
}
- return create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+ final FileSelection fileSel = create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+ logger.debug("FileSelection.create() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
+ return fileSel;
+
}
/**
http://git-wip-us.apache.org/repos/asf/drill/blob/7bfcb40a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index e2cc670..a924bea 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -233,7 +233,7 @@ public class ParquetFormatPlugin implements FormatPlugin{
// /a/b/c.parquet and the format of the selection root must match that of the file
names
// otherwise downstream operations such as partition pruning can break.
final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
- final FileSelection newSelection = FileSelection.create(null, fileNames, metaRootPath.toString());
+ final FileSelection newSelection = new FileSelection(selection.getStatuses(fs), fileNames,
metaRootPath.toString());
return ParquetFileSelection.create(newSelection, metadata);
} else {
// don't expand yet; ParquetGroupScan's metadata gathering operation
|