trafodion-codereview mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From DaveBirdsall <...@git.apache.org>
Subject [GitHub] incubator-trafodion pull request #1206: [TRAFODION-2617] Optionally push est...
Date Tue, 15 Aug 2017 19:08:17 GMT
Github user DaveBirdsall commented on a diff in the pull request:

    https://github.com/apache/incubator-trafodion/pull/1206#discussion_r133275366
  
    --- Diff: core/sqf/src/seatrans/hbase-trx/src/main/java/org/apache/hadoop/hbase/coprocessor/transactional/TrxRegionEndpoint.java.tmpl
---
    @@ -7335,6 +7338,245 @@ TrxTransactionState isTransactionOnCommittedList(final long transactionId)
       public void setClosing(boolean value) {
         closing.set(value);
       }
    +
    +  // The following are methods for the Trafodion SQL coprocessors.
    +
    +  // compares two qualifiers as unsigned, lexicographically ordered byte strings
    +
    +  static private boolean isQualifierLessThanOrEqual(Cell nextKv, Cell currKv) {
    +    int currLength = currKv.getQualifierLength(); 
    +    int currOffset = currKv.getQualifierOffset();
    +    byte [] currQual = currKv.getQualifierArray();
    +    int nextLength = nextKv.getQualifierLength(); 
    +    int nextOffset = nextKv.getQualifierOffset();
    +    byte [] nextQual = nextKv.getQualifierArray();   
    +
    +    int minLength = nextLength;
    +    if (currLength < nextLength)
    +      minLength = currLength;
    +
    +    for (int i = 0; i < minLength; i++) {
    +      // ugh... have to do some gymnastics to make this an
    +      // unsigned comparison
    +      int nextQualI = nextQual[i+nextOffset];
    +      if (nextQualI < 0)
    +        nextQualI = nextQualI + 256;
    +      int currQualI = currQual[i+currOffset];
    +      if (currQualI < 0)
    +        currQualI = currQualI + 256;
    +
    +      if (nextQualI < currQualI)
    +        return true;
    +      else if (nextQualI > currQualI)
    +        return false;
    +      // else equal, move on to next byte
    +    }
    +
    +    // the first minLength bytes are the same; the shorter array
    +    // is regarded as less
    +
    +    boolean rc = (nextLength <= currLength);      
    +
    +    return rc;
    +  }
    +
    +  
    +  // debugging function
    +
    +  private static String bytesToHex(byte[] in) {
    +    final StringBuilder builder = new StringBuilder();
    +    for(byte b : in) {
    +        builder.append(String.format("%02x", b));
    +    }
    +    return builder.toString();
    +  }
    +
    +  // Returns data needed to estimate the row count in the table.
    +  // Entry counts and total size in bytes are extracted from HFiles.
    +  // For non-aligned tables (numCols > 1), sampling is done in order
    +  // to estimate how many entries make up a row on average.
    +
    +  @Override
    +  public void trafEstimateRowCount(RpcController controller,
    +			           TrafEstimateRowCountRequest request,
    +			           RpcCallback<TrafEstimateRowCountResponse> done) {
    +
    +    TrafEstimateRowCountResponse response = null;
    +    Throwable t = null;
    +
    +    int numCols = request.getNumCols();
    +
    +    // To estimate incidence of nulls, read the first 500 rows worth
    +    // of KeyValues.
    +    final int ROWS_TO_SAMPLE = 500;
    +    int putKVsSampled = 0;
    +    int nonPutKVsSampled = 0;
    +    int missingKVsCount = 0;
    +    int sampleRowCount = 0;
    +    long totalEntries = 0;   // KeyValues in all HFiles for table
    +    long totalSizeBytes = 0; // Size of all HFiles for table 
    +    long estimatedTotalPuts = 0;
    +    boolean more = true;
    +    long estimatedRowCount = 0;
    +
    +    // Access the file system to go directly to the table's HFiles.
    +    // Create a reader for the file to access the KV entry count and
    +    // size in bytes stored in the trailer block.
    +    
    +    // For aligned format tables, the number of rows equals the
    +    // number of KeyValue entries. For non-aligned format, it's
    +    // more complicated. There is a KeyValue entry for each 
    +    // column value, except the KeyValue may be missing because
    +    // the column has a null value or because the column has a
    +    // default value that has not been materialized.
    +
    +    // For non-aligned format tables, we sample some rows and
    +    // count how many entries there are per row, so our caller
    +    // can estimate the average number of missing values per row.
    +    // Once our caller has that estimate, it can estimate the
    +    // number of rows.
    +
    +    // We only do the sampling for non-aligned tables (numCols > 1),
    +    // and we only do it on the first HFile of the first Region.
    +    // The first Region is detected by having a null start key.
    +
    +    CacheConfig cacheConf = new CacheConfig(config);
    +    byte[] startKey = regionInfo.getStartKey();
    +
    +    // Get the list of store files in column family '#1'. There might
    +    // not be any. For example, a new Trafodion table might be entirely
    +    // in memstore with nothing written out yet. Or we may be accessing
    +    // a native HBase table which lacks the '#1' colum family.
    +    List<String> storeFileList = null;
    +    try {
    +      byte[] familyName = "#1".getBytes();
    +      byte[][] familyNames = { familyName };
    +      storeFileList = m_Region.getStoreFileList(familyNames);
    +    }
    +    catch (IllegalArgumentException iae) {
    +      // this gets thrown when the column family doesn't exist;
    +      // we'll just use an empty list instead
    +      storeFileList = new ArrayList<String>();
    +    }
    +
    +    if (LOG.isDebugEnabled()) {
    +      LOG.debug("Trafodion estimate row count sees " + storeFileList.size() + " files.");
    +      for (String sfn : storeFileList) {
    +        LOG.debug("*** " + sfn);
    +      }
    +      if (startKey == null)
    +        LOG.debug("startKey is null.");
    +      else
    +        LOG.debug("startKey.length is " + startKey.length + ", startKey is hex " + bytesToHex(startKey));
    --- End diff --
    
    No, because we are still getting entry and byte counts from each of the HFiles no matter
what region we are reading.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message