drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [drill] dvjyothsna commented on a change in pull request #1723: DRILL-7063: Seperate metadata cache file into summary, file metadata
Date Sun, 07 Apr 2019 07:55:24 GMT
dvjyothsna commented on a change in pull request #1723: DRILL-7063: Seperate metadata cache
file into summary, file metadata
URL: https://github.com/apache/drill/pull/1723#discussion_r272822424
 
 

 ##########
 File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/Metadata_V4.java
 ##########
 @@ -0,0 +1,540 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet.metadata;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import com.fasterxml.jackson.databind.KeyDeserializer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.drill.common.expression.SchemaPath;
+
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.ColumnMetadata;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetFileMetadata;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetTableMetadataBase;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.RowGroupMetadata;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.ColumnTypeMetadata;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.V4;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+
+public class Metadata_V4 {
+
+  public static class ParquetTableMetadata_v4 extends ParquetTableMetadataBase {
+
+    MetadataSummary metadataSummary = new MetadataSummary();
+    FileMetadata fileMetadata = new FileMetadata();
+
+    public ParquetTableMetadata_v4(MetadataSummary metadataSummary) {
+      this.metadataSummary = metadataSummary;
+    }
+
+    public ParquetTableMetadata_v4(MetadataSummary metadataSummary, FileMetadata fileMetadata)
{
+      this.metadataSummary = metadataSummary;
+      this.fileMetadata = fileMetadata;
+    }
+
+    public ParquetTableMetadata_v4(String metadataVersion, ParquetTableMetadataBase parquetTableMetadata,
+                                   List<ParquetFileMetadata_v4> files, List<Path>
directories, String drillVersion, long totalRowCount, boolean allColumnsInteresting) {
+      this.metadataSummary.metadataVersion = metadataVersion;
+      this.fileMetadata.files = files;
+      this.metadataSummary.directories = directories;
+      this.metadataSummary.columnTypeInfo = ((ParquetTableMetadata_v4) parquetTableMetadata).metadataSummary.columnTypeInfo;
+      this.metadataSummary.drillVersion = drillVersion;
+      this.metadataSummary.totalRowCount = totalRowCount;
+      this.metadataSummary.allColumnsInteresting = allColumnsInteresting;
+    }
+
+    public ColumnTypeMetadata_v4 getColumnTypeInfo(String[] name) {
+      return metadataSummary.getColumnTypeInfo(name);
+    }
+
+    @Override
+    public List<Path> getDirectories() {
+      return metadataSummary.getDirectories();
+    }
+
+    @Override
+    public List<? extends ParquetFileMetadata> getFiles() {
+      return fileMetadata.getFiles();
+    }
+
+    @Override
+    public String getMetadataVersion() {
+      return metadataSummary.getMetadataVersion();
+    }
+
+    /**
+     * If directories list and file metadata list contain relative paths, update it to absolute
ones
+     *
+     * @param baseDir base parent directory
+     */
+    public void updateRelativePaths(String baseDir) {
+      // update directories paths to absolute ones
+      this.metadataSummary.directories = MetadataPathUtils.convertToAbsolutePaths(metadataSummary.directories,
baseDir);
+
+      // update files paths to absolute ones
+      this.fileMetadata.files = (List<ParquetFileMetadata_v4>) MetadataPathUtils.convertToFilesWithAbsolutePaths(fileMetadata.files,
baseDir);
+    }
+
+    @Override
+    public void assignFiles(List<? extends ParquetFileMetadata> newFiles) {
+      this.fileMetadata.assignFiles(newFiles);
+    }
+
+    @Override
+    public boolean hasColumnMetadata() {
+      return true;
+    }
+
+    @Override
+    public PrimitiveType.PrimitiveTypeName getPrimitiveType(String[] columnName) {
+      return getColumnTypeInfo(columnName).primitiveType;
+    }
+
+    @Override
+    public OriginalType getOriginalType(String[] columnName) {
+      return getColumnTypeInfo(columnName).originalType;
+    }
+
+    @Override
+    public Integer getRepetitionLevel(String[] columnName) {
+      return getColumnTypeInfo(columnName).repetitionLevel;
+    }
+
+    @Override
+    public Integer getDefinitionLevel(String[] columnName) {
+      return getColumnTypeInfo(columnName).definitionLevel;
+    }
+
+    @Override
+    public Integer getScale(String[] columnName) {
+      return getColumnTypeInfo(columnName).scale;
+    }
+
+    @Override
+    public Integer getPrecision(String[] columnName) {
+      return getColumnTypeInfo(columnName).precision;
+    }
+
+    @Override
+    public boolean isRowGroupPrunable() {
+      return true;
+    }
+
+    @Override
+    public ParquetTableMetadataBase clone() {
+      return new ParquetTableMetadata_v4(metadataSummary, fileMetadata);
+    }
+
+    @Override
+    public String getDrillVersion() {
+      return metadataSummary.drillVersion;
+    }
+
+    public MetadataSummary getSummary() {
+      return metadataSummary;
+    }
+
+    public long getTotalRowCount() {
+      return metadataSummary.getTotalRowCount();
+    }
+
+    public long getTotalNullCount(String[] columnName) {
+      return getColumnTypeInfo(columnName).totalNullCount;
+    }
+
+    public boolean isAllColumnsInteresting() {
+      return metadataSummary.isAllColumnsInteresting();
+    }
+
+    public ConcurrentHashMap<ColumnTypeMetadata_v4.Key, ColumnTypeMetadata_v4> getColumnTypeInfoMap()
{
+      return metadataSummary.columnTypeInfo;
+    }
+
+    @Override
+    public List<? extends MetadataBase.ColumnTypeMetadata> getColumnTypeInfoList()
{
+      return new ArrayList<>(metadataSummary.columnTypeInfo.values());
+    }
+
+    public void setTotalRowCount(long totalRowCount) {
+      metadataSummary.setTotalRowCount(totalRowCount);
+    }
+
+    public void setAllColumnsInteresting(boolean allColumnsInteresting) {
+      metadataSummary.allColumnsInteresting = allColumnsInteresting;
+    }
+  }
+
+  /**
+   * Struct which contains the metadata for a single parquet file
+   */
+  public static class ParquetFileMetadata_v4 extends ParquetFileMetadata {
+    @JsonProperty
+    public Path path;
+    @JsonProperty
+    public Long length;
+    @JsonProperty
+    public List<RowGroupMetadata_v4> rowGroups;
+
+    public ParquetFileMetadata_v4() {
+
+    }
+
+    public ParquetFileMetadata_v4(Path path, Long length, List<RowGroupMetadata_v4>
rowGroups) {
+      this.path = path;
+      this.length = length;
+      this.rowGroups = rowGroups;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("path: %s rowGroups: %s", path, rowGroups);
+    }
+
+    @JsonIgnore
+    @Override
+    public Path getPath() {
+      return path;
+    }
+
+    @JsonIgnore
+    @Override
+    public Long getLength() {
+      return length;
+    }
+
+    @JsonIgnore
+    @Override
+    public List<? extends RowGroupMetadata> getRowGroups() {
+      return rowGroups;
+    }
+  }
+
+
+  /**
+   * A struct that contains the metadata for a parquet row group
+   */
+  public static class RowGroupMetadata_v4 extends RowGroupMetadata {
+    @JsonProperty
+    public Long start;
+    @JsonProperty
+    public Long length;
+    @JsonProperty
+    public Long rowCount;
+    @JsonProperty
+    public Map<String, Float> hostAffinity;
+    @JsonProperty
+    public List<ColumnMetadata_v4> columns;
+
+    public RowGroupMetadata_v4() {
+    }
+
+    public RowGroupMetadata_v4(Long start, Long length, Long rowCount, Map<String, Float>
hostAffinity,
+                               List<ColumnMetadata_v4> columns) {
+      this.start = start;
+      this.length = length;
+      this.rowCount = rowCount;
+      this.hostAffinity = hostAffinity;
+      this.columns = columns;
+    }
+
+    @Override
+    public Long getStart() {
+      return start;
+    }
+
+    @Override
+    public Long getLength() {
+      return length;
+    }
+
+    @Override
+    public Long getRowCount() {
+      return rowCount;
+    }
+
+    @Override
+    public Map<String, Float> getHostAffinity() {
+      return hostAffinity;
+    }
+
+    @Override
+    public List<? extends ColumnMetadata> getColumns() {
+      return columns;
+    }
+  }
+
+
+  public static class ColumnTypeMetadata_v4 extends ColumnTypeMetadata {
+    @JsonProperty
+    public String[] name;
+    @JsonProperty
+    public PrimitiveType.PrimitiveTypeName primitiveType;
+    @JsonProperty
+    public OriginalType originalType;
+    @JsonProperty
+    public int precision;
+    @JsonProperty
+    public int scale;
+    @JsonProperty
+    public int repetitionLevel;
+    @JsonProperty
+    public int definitionLevel;
+    @JsonProperty
+    public long totalNullCount = 0;
+    @JsonProperty
+    public boolean isInteresting = false;
+
+    // Key to find by name only
+    @JsonIgnore
+    private Key key;
+
+    public ColumnTypeMetadata_v4() {
+    }
+
+    public ColumnTypeMetadata_v4(String[] name, PrimitiveType.PrimitiveTypeName primitiveType,
OriginalType originalType, int precision, int scale, int repetitionLevel, int definitionLevel,
long totalNullCount, boolean isInteresting) {
+      this.name = name;
+      this.primitiveType = primitiveType;
+      this.originalType = originalType;
+      this.precision = precision;
+      this.scale = scale;
+      this.repetitionLevel = repetitionLevel;
+      this.definitionLevel = definitionLevel;
+      this.key = new Key(name);
+      this.totalNullCount = totalNullCount;
+      this.isInteresting = isInteresting;
+    }
+
+    @JsonIgnore
+    private Key key() {
+      return this.key;
+    }
+
+    public static class Key {
+      private SchemaPath name;
+      private int hashCode = 0;
+
+      public Key(String[] name) {
+        this.name = SchemaPath.getCompoundPath(name);
+      }
+
+      public Key(SchemaPath name) {
+        this.name = new SchemaPath(name);
+      }
+
+      @Override
+      public int hashCode() {
+        if (hashCode == 0) {
+          hashCode = name.hashCode();
+        }
+        return hashCode;
+      }
+
+      @Override
+      public boolean equals(Object obj) {
+        if (obj == null) {
+          return false;
+        }
+        if (getClass() != obj.getClass()) {
+          return false;
+        }
+        final Key other = (Key) obj;
+        return this.name.equals(other.name);
+      }
+
+      @Override
+      public String toString() {
+        return name.toString();
+      }
+
+      public static class DeSerializer extends KeyDeserializer {
+
+        public DeSerializer() {
+        }
+
+        @Override
+        public Object deserializeKey(String key, com.fasterxml.jackson.databind.DeserializationContext
ctxt) {
+          // key string should contain '`' char if the field was serialized as SchemaPath
object
 
 Review comment:
   Looks like just adding SchemaPath deserializer is not working

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message