drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [drill] paul-rogers commented on a change in pull request #2026: DRILL-7330: Implement metadata usage for all format plugins
Date Sat, 14 Mar 2020 19:37:16 GMT
paul-rogers commented on a change in pull request #2026: DRILL-7330: Implement metadata usage
for all format plugins
URL: https://github.com/apache/drill/pull/2026#discussion_r392610460
 
 

 ##########
 File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/ColumnExplorer.java
 ##########
 @@ -292,39 +289,86 @@ public static int getPartitionDepth(FileSelection selection) {
    * @param includeFileImplicitColumns if file implicit columns should be included into the
result
    * @param fs                         file system
    * @param index                      index of row group to populate
+   * @param start                      start of row group to populate
+   * @param length                     length of row group to populate
    * @return implicit columns map
    */
   public Map<String, String> populateImplicitAndInternalColumns(Path filePath,
       List<String> partitionValues, boolean includeFileImplicitColumns, FileSystem
fs, int index, long start, long length) {
 
     Map<String, String> implicitValues =
-        new LinkedHashMap<>(populateImplicitColumns(filePath, partitionValues, includeFileImplicitColumns));
+        new LinkedHashMap<>(populateImplicitAndInternalColumns(filePath, partitionValues,
includeFileImplicitColumns, fs));
 
-    selectedInternalColumns.forEach((key, value) -> {
-      switch (value) {
+    selectedInternalColumns.forEach(
+        (key, value) -> implicitValues.put(key, getImplicitColumnValue(value, filePath,
fs, index, start, length)));
+
+    return implicitValues;
+  }
+
+  /**
+   * Returns implicit column value for specified implicit file column.
+   *
+   * @param column   implicit file column
+   * @param filePath file path, used to populate file implicit columns
+   * @param fs       file system
+   * @param index    row group index
+   * @param start    row group start
+   * @param length   row group length
+   * @return implicit column value for specified implicit file column
+   */
+  private static String getImplicitColumnValue(ImplicitFileColumn column, Path filePath,
+      FileSystem fs, Integer index, Long start, Long length) {
+    if (column instanceof ImplicitFileColumns) {
+      ImplicitFileColumns fileColumn = (ImplicitFileColumns) column;
+      return fileColumn.getValue(filePath);
+    } else if (column instanceof ImplicitInternalFileColumns) {
+      ImplicitInternalFileColumns fileColumn = (ImplicitInternalFileColumns) column;
+      switch (fileColumn) {
         case ROW_GROUP_INDEX:
-          implicitValues.put(key, String.valueOf(index));
-          break;
+          return index != null ? String.valueOf(index) : null;
         case ROW_GROUP_START:
-          implicitValues.put(key, String.valueOf(start));
-          break;
+          return start != null ? String.valueOf(start) : null;
         case ROW_GROUP_LENGTH:
-          implicitValues.put(key, String.valueOf(length));
-          break;
+          return length != null ? String.valueOf(length) : null;
         case PROJECT_METADATA:
-          implicitValues.put(key, Boolean.TRUE.toString());
-          break;
+          return Boolean.TRUE.toString();
         case LAST_MODIFIED_TIME:
           try {
-            implicitValues.put(key, String.valueOf(fs.getFileStatus(filePath).getModificationTime()));
+            return fs != null ? String.valueOf(fs.getFileStatus(filePath).getModificationTime())
: null;
           } catch (IOException e) {
             throw new DrillRuntimeException(e);
           }
-          break;
       }
-    });
+    }
+    return null;
+  }
 
-    return implicitValues;
+  /**
+   * Returns implicit column value for specified implicit file column.
+   *
+   * @param column   implicit file column
+   * @param filePath file path
+   * @param fs       file system
+   * @return implicit column value for specified implicit file column
+   */
+  public static String getImplicitColumnValue(ImplicitFileColumn column, Path filePath, FileSystem
fs) {
+    return getImplicitColumnValue(column, filePath, fs, null, null, null);
+  }
+
+  /**
+   * Returns list of implicit file columns which includes all elements from {@link ImplicitFileColumns},
+   * {@link ImplicitInternalFileColumns#LAST_MODIFIED_TIME} and {@link ImplicitInternalFileColumns#PROJECT_METADATA}
+   * columns.
+   *
+   * @return list of implicit file columns
+   */
+  public static List<ImplicitFileColumn> getImplicitFileColumns() {
+    List<ImplicitFileColumn> implicitColumns = new ArrayList<>();
+    Collections.addAll(implicitColumns, ImplicitFileColumns.values());
+
+    implicitColumns.add(ImplicitInternalFileColumns.LAST_MODIFIED_TIME);
+    implicitColumns.add(ImplicitInternalFileColumns.PROJECT_METADATA);
 
 Review comment:
   Please explain this one. What does it mean and how is it used? I'm a bit confused because
the reader does not know how its data will be used. When would the reader/scan set the value
for this field? Only in that EOF case discussed above? If so, then may not be needed for the
"new" scan. (And, perhaps we should fix the case in the old one since it seems silly to include
billions of copies of this value so we can mark one or two empty batches, if I understand
the meaning correctly.)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message