[ https://issues.apache.org/jira/browse/SPARK-17827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15571348#comment-15571348
]
Pete Robbins edited comment on SPARK-17827 at 10/13/16 9:11 AM:
----------------------------------------------------------------
In Statistics.scala
{code}
case class StringColumnStat(statRow: InternalRow) {
println("StringColumnStat: " + statRow)
// The indices here must be consistent with `ColumnStatStruct.stringColumnStat`.
val numNulls: Long = statRow.getLong(0)
val avgColLen: Double = statRow.getDouble(1)
val maxColLen: Long = statRow.getLong(2) <<<<<< Actual type
in statRow is Int
val ndv: Long = statRow.getLong(3)
}
case class BinaryColumnStat(statRow: InternalRow) {
// The indices here must be consistent with `ColumnStatStruct.binaryColumnStat`.
val numNulls: Long = statRow.getLong(0)
val avgColLen: Double = statRow.getDouble(1)
val maxColLen: Long = statRow.getLong(2) <<<<<< Actual
type in statRow is Int
}
{code}
So either the code above should be using getInt for the maxColLen or the code generating the
row should be creating a Long
was (Author: robbinspg):
In Statistics.scala
case class StringColumnStat(statRow: InternalRow) {
println("StringColumnStat: " + statRow)
// The indices here must be consistent with `ColumnStatStruct.stringColumnStat`.
val numNulls: Long = statRow.getLong(0)
val avgColLen: Double = statRow.getDouble(1)
val maxColLen: Long = statRow.getLong(2) <<<<<< Actual type
in statRow is Int
val ndv: Long = statRow.getLong(3)
}
case class BinaryColumnStat(statRow: InternalRow) {
// The indices here must be consistent with `ColumnStatStruct.binaryColumnStat`.
val numNulls: Long = statRow.getLong(0)
val avgColLen: Double = statRow.getDouble(1)
val maxColLen: Long = statRow.getLong(2) <<<<<< Actual
type in statRow is Int
}
So either the code above should be using getInt for the maxColLen or the code generating the
row should be creating a Long
> StatisticsColumnSuite failures on big endian platforms
> ------------------------------------------------------
>
> Key: SPARK-17827
> URL: https://issues.apache.org/jira/browse/SPARK-17827
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.1.0
> Environment: big endian
> Reporter: Pete Robbins
> Labels: big-endian
>
> https://issues.apache.org/jira/browse/SPARK-17073
> introduces new tests/function that fails on big endian platforms
> Failing tests:
> org.apache.spark.sql.StatisticsColumnSuite.column-level statistics for string column
> org.apache.spark.sql.StatisticsColumnSuite.column-level statistics for binary column
> org.apache.spark.sql.StatisticsColumnSuite.column-level statistics for columns with
different types
> org.apache.spark.sql.hive.StatisticsSuite.generate column-level statistics and load
them from hive metastore
> all fail in checkColStat eg:
> java.lang.AssertionError: assertion failed
> at scala.Predef$.assert(Predef.scala:156)
> at org.apache.spark.sql.StatisticsTest$.checkColStat(StatisticsTest.scala:92)
> at org.apache.spark.sql.StatisticsTest$$anonfun$checkColStats$1$$anonfun$apply$mcV$sp$1.apply(StatisticsTest.scala:43)
> at org.apache.spark.sql.StatisticsTest$$anonfun$checkColStats$1$$anonfun$apply$mcV$sp$1.apply(StatisticsTest.scala:40)
> at scala.collection.immutable.List.foreach(List.scala:381)
> at org.apache.spark.sql.StatisticsTest$$anonfun$checkColStats$1.apply$mcV$sp(StatisticsTest.scala:40)
> at org.apache.spark.sql.test.SQLTestUtils$class.withTable(SQLTestUtils.scala:168)
> at org.apache.spark.sql.StatisticsColumnSuite.withTable(StatisticsColumnSuite.scala:30)
> at org.apache.spark.sql.StatisticsTest$class.checkColStats(StatisticsTest.scala:33)
> at org.apache.spark.sql.StatisticsColumnSuite.checkColStats(StatisticsColumnSuite.scala:30)
> at org.apache.spark.sql.StatisticsColumnSuite$$anonfun$7.apply$mcV$sp(StatisticsColumnSuite.scala:171)
> at org.apache.spark.sql.StatisticsColumnSuite$$anonfun$7.apply(StatisticsColumnSuite.scala:160)
> at org.apache.spark.sql.StatisticsColumnSuite$$anonfun$7.apply(StatisticsColumnSuite.scala:160)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org
|