drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From par...@apache.org
Subject [5/7] drill git commit: DRILL-5429: Improve query performance for MapR DB JSON Tables Cache and reuse table and tabletInfo per query instead of fetching them multiple times. Compute rowCount from tabletInfo instead of expensive tableStats call.
Date Sat, 13 May 2017 17:39:03 GMT
DRILL-5429: Improve query performance for MapR DB JSON Tables Cache and reuse table and tabletInfo
per query instead of fetching them multiple times. Compute rowCount from tabletInfo instead
of expensive tableStats call.

This closes #817


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/27c5f458
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/27c5f458
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/27c5f458

Branch: refs/heads/master
Commit: 27c5f4583a3440d17f483affe6bf0a3e5c0b2c68
Parents: 6741e68
Author: Padma Penumarthy <ppenumar97@yahoo.com>
Authored: Tue Apr 11 16:34:14 2017 -0700
Committer: Parth Chandra <pchandra@maprtech.com>
Committed: Fri May 12 17:07:23 2017 -0700

----------------------------------------------------------------------
 .../store/mapr/db/MapRDBPushFilterIntoScan.java |  7 +-
 .../store/mapr/db/json/JsonTableGroupScan.java  | 74 ++++++++++++++------
 2 files changed, 56 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/27c5f458/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBPushFilterIntoScan.java
----------------------------------------------------------------------
diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBPushFilterIntoScan.java
b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBPushFilterIntoScan.java
index b363262..601fa02 100644
--- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBPushFilterIntoScan.java
+++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/MapRDBPushFilterIntoScan.java
@@ -137,11 +137,8 @@ public abstract class MapRDBPushFilterIntoScan extends StoragePluginOptimizerRul
       return; //no filter pushdown ==> No transformation.
     }
 
-    final JsonTableGroupScan newGroupsScan = new JsonTableGroupScan(groupScan.getUserName(),
-                                                                    groupScan.getStoragePlugin(),
-                                                                    groupScan.getFormatPlugin(),
-                                                                    newScanSpec,
-                                                                    groupScan.getColumns());
+    // clone the groupScan with the newScanSpec.
+    final JsonTableGroupScan newGroupsScan = groupScan.clone(newScanSpec);
     newGroupsScan.setFilterPushedDown(true);
 
     final ScanPrel newScanPrel = ScanPrel.create(scan, filter.getTraitSet(), newGroupsScan,
scan.getRowType());

http://git-wip-us.apache.org/repos/asf/drill/blob/27c5f458/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/JsonTableGroupScan.java
----------------------------------------------------------------------
diff --git a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/JsonTableGroupScan.java
b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/JsonTableGroupScan.java
index 0c8ffda..06c4e7a 100644
--- a/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/JsonTableGroupScan.java
+++ b/contrib/format-maprdb/src/main/java/org/apache/drill/exec/store/mapr/db/json/JsonTableGroupScan.java
@@ -58,7 +58,9 @@ public class JsonTableGroupScan extends MapRDBGroupScan {
 
   public static final String TABLE_JSON = "json";
 
-  private MapRDBTableStats tableStats;
+  private long totalRowCount;
+  private Table table;
+  private TabletInfo[] tabletInfos;
 
   private JsonScanSpec scanSpec;
 
@@ -90,7 +92,12 @@ public class JsonTableGroupScan extends MapRDBGroupScan {
     super(that);
     this.scanSpec = that.scanSpec;
     this.endpointFragmentMapping = that.endpointFragmentMapping;
-    this.tableStats = that.tableStats;
+
+    // Reusing the table handle, tabletInfos and totalRowCount saves expensive
+    // calls to MapR DB client to get them again.
+    this.table = that.table;
+    this.tabletInfos = that.tabletInfos;
+    this.totalRowCount = that.totalRowCount;
   }
 
   @Override
@@ -100,30 +107,57 @@ public class JsonTableGroupScan extends MapRDBGroupScan {
     return newScan;
   }
 
+  /**
+   * Create a new groupScan, which is a clone of this.
+   * Initialize scanSpec.
+   * We should recompute regionsToScan as it depends upon scanSpec.
+   * @param scanSpec
+   */
+  public JsonTableGroupScan clone(JsonScanSpec scanSpec) {
+    JsonTableGroupScan newScan = new JsonTableGroupScan(this);
+    newScan.scanSpec = scanSpec;
+    newScan.computeRegionsToScan();
+    return newScan;
+  }
+
+  /**
+   * Compute regions to scan based on the scanSpec
+   */
+  private void computeRegionsToScan() {
+    boolean foundStartRegion = false;
+
+    regionsToScan = new TreeMap<TabletFragmentInfo, String>();
+    for (TabletInfo tabletInfo : tabletInfos) {
+      TabletInfoImpl tabletInfoImpl = (TabletInfoImpl) tabletInfo;
+      if (!foundStartRegion && !isNullOrEmpty(scanSpec.getStartRow()) &&
!tabletInfoImpl.containsRow(scanSpec.getStartRow())) {
+        continue;
+      }
+      foundStartRegion = true;
+      regionsToScan.put(new TabletFragmentInfo(tabletInfoImpl), tabletInfo.getLocations()[0]);
+      if (!isNullOrEmpty(scanSpec.getStopRow()) && tabletInfoImpl.containsRow(scanSpec.getStopRow()))
{
+        break;
+      }
+    }
+  }
+
   private void init() {
     logger.debug("Getting tablet locations");
     try {
       Configuration conf = new Configuration();
-      Table t = MapRDB.getTable(scanSpec.getTableName());
-      TabletInfo[] tabletInfos = t.getTabletInfos(scanSpec.getCondition());
-      tableStats = new MapRDBTableStats(conf, scanSpec.getTableName());
 
-      boolean foundStartRegion = false;
-      regionsToScan = new TreeMap<TabletFragmentInfo, String>();
+      // Fetch table and tabletInfo only once and cache.
+      table = MapRDB.getTable(scanSpec.getTableName());
+      tabletInfos = table.getTabletInfos(scanSpec.getCondition());
+
+      // Calculate totalRowCount for the table from tabletInfos estimatedRowCount.
+      // This will avoid calling expensive MapRDBTableStats API to get total rowCount, avoiding
+      // duplicate work and RPCs to MapR DB server.
       for (TabletInfo tabletInfo : tabletInfos) {
-        TabletInfoImpl tabletInfoImpl = (TabletInfoImpl) tabletInfo;
-        if (!foundStartRegion
-            && !isNullOrEmpty(scanSpec.getStartRow())
-            && !tabletInfoImpl.containsRow(scanSpec.getStartRow())) {
-          continue;
-        }
-        foundStartRegion = true;
-        regionsToScan.put(new TabletFragmentInfo(tabletInfoImpl), tabletInfo.getLocations()[0]);
-        if (!isNullOrEmpty(scanSpec.getStopRow())
-            && tabletInfoImpl.containsRow(scanSpec.getStopRow())) {
-          break;
-        }
+        totalRowCount += tabletInfo.getEstimatedNumRows();
       }
+
+      computeRegionsToScan();
+
     } catch (Exception e) {
       throw new DrillRuntimeException("Error getting region info for table: " + scanSpec.getTableName(),
e);
     }
@@ -153,7 +187,7 @@ public class JsonTableGroupScan extends MapRDBGroupScan {
   @Override
   public ScanStats getScanStats() {
     //TODO: look at stats for this.
-    long rowCount = (long) ((scanSpec.getSerializedFilter() != null ? .5 : 1) * tableStats.getNumRows());
+    long rowCount = (long) ((scanSpec.getSerializedFilter() != null ? .5 : 1) * totalRowCount);
     int avgColumnSize = 10;
     int numColumns = (columns == null || columns.isEmpty()) ? 100 : columns.size();
     return new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, rowCount, 1, avgColumnSize
* numColumns * rowCount);


Mime
View raw message