[ https://issues.apache.org/jira/browse/SPARK-5821?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Cheng Lian updated SPARK-5821:
------------------------------
Description:
When you run CTAS command such as
{code:sql}
CREATE TEMPORARY TABLE jsonTable
USING org.apache.spark.sql.json.DefaultSource
OPTIONS (
path /a/b/c/d
) AS
SELECT a, b FROM jt
{code}
you will run into failure if you don't have write permission for directory /a/b/c whether
d is a directory or file.
{noformat}
Exception in thread "main" org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory
file:/a/b/c/d already exists
at org.apache.hadoop.mapred.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:132)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1053)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:954)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:863)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1284)
at org.apache.spark.sql.json.DefaultSource.createRelation(JSONRelation.scala:81)
at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:300)
at org.apache.spark.sql.sources.CreateTempTableUsingAsSelect.run(ddl.scala:388)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:55)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:55)
at org.apache.spark.sql.execution.ExecutedCommand.execute(commands.scala:65)
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:927)
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:927)
at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:71)
at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:58)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:35)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:778)
at org.apache.spark.sql.Test$.main(Test.scala:149)
at org.apache.spark.sql.Test.main(Test.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
{noformat}
was:
When you run CTAS command such as
{code sql}
CREATE TEMPORARY TABLE jsonTable
USING org.apache.spark.sql.json.DefaultSource
OPTIONS (
path /a/b/c/d
) AS
SELECT a, b FROM jt
{code}
you will run into failure if you don't have write permission for directory /a/b/c whether
d is a directory or file.
{noformat}
Exception in thread "main" org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory
file:/a/b/c/d already exists
at org.apache.hadoop.mapred.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:132)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1053)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:954)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:863)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1284)
at org.apache.spark.sql.json.DefaultSource.createRelation(JSONRelation.scala:81)
at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:300)
at org.apache.spark.sql.sources.CreateTempTableUsingAsSelect.run(ddl.scala:388)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:55)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:55)
at org.apache.spark.sql.execution.ExecutedCommand.execute(commands.scala:65)
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:927)
at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:927)
at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:71)
at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:58)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:35)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:778)
at org.apache.spark.sql.Test$.main(Test.scala:149)
at org.apache.spark.sql.Test.main(Test.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
{noformat}
> JSONRelation should check if delete is successful for the overwrite operation.
> ------------------------------------------------------------------------------
>
> Key: SPARK-5821
> URL: https://issues.apache.org/jira/browse/SPARK-5821
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 1.3.0
> Reporter: Yanbo Liang
>
> When you run CTAS command such as
> {code:sql}
> CREATE TEMPORARY TABLE jsonTable
> USING org.apache.spark.sql.json.DefaultSource
> OPTIONS (
> path /a/b/c/d
> ) AS
> SELECT a, b FROM jt
> {code}
> you will run into failure if you don't have write permission for directory /a/b/c whether
d is a directory or file.
> {noformat}
> Exception in thread "main" org.apache.hadoop.mapred.FileAlreadyExistsException: Output
directory file:/a/b/c/d already exists
> at org.apache.hadoop.mapred.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:132)
> at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1053)
> at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:954)
> at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:863)
> at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1284)
> at org.apache.spark.sql.json.DefaultSource.createRelation(JSONRelation.scala:81)
> at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:300)
> at org.apache.spark.sql.sources.CreateTempTableUsingAsSelect.run(ddl.scala:388)
> at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:55)
> at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:55)
> at org.apache.spark.sql.execution.ExecutedCommand.execute(commands.scala:65)
> at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:927)
> at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:927)
> at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:71)
> at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:58)
> at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:35)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:778)
> at org.apache.spark.sql.Test$.main(Test.scala:149)
> at org.apache.spark.sql.Test.main(Test.scala)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:483)
> at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134)
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org
|