flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-2951) Add Union operator to Table API.
Date Mon, 02 Nov 2015 09:15:27 GMT

    [ https://issues.apache.org/jira/browse/FLINK-2951?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14984927#comment-14984927
] 

ASF GitHub Bot commented on FLINK-2951:
---------------------------------------

Github user aljoscha commented on a diff in the pull request:

    https://github.com/apache/flink/pull/1315#discussion_r43608448
  
    --- Diff: flink-staging/flink-table/src/test/java/org/apache/flink/api/java/table/test/UnionITCase.java
---
    @@ -0,0 +1,170 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.api.java.table.test;
    +
    +import org.apache.flink.api.common.InvalidProgramException;
    +import org.apache.flink.api.java.DataSet;
    +import org.apache.flink.api.java.ExecutionEnvironment;
    +import org.apache.flink.api.java.table.TableEnvironment;
    +import org.apache.flink.api.java.tuple.Tuple3;
    +import org.apache.flink.api.java.tuple.Tuple5;
    +import org.apache.flink.api.table.ExpressionException;
    +import org.apache.flink.api.table.Row;
    +import org.apache.flink.api.table.Table;
    +import org.apache.flink.core.fs.FileSystem;
    +import org.apache.flink.test.javaApiOperators.util.CollectionDataSets;
    +import org.apache.flink.test.util.MultipleProgramsTestBase;
    +import org.junit.After;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TemporaryFolder;
    +import org.junit.runner.RunWith;
    +import org.junit.runners.Parameterized;
    +
    +@RunWith(Parameterized.class)
    +public class UnionITCase extends MultipleProgramsTestBase {
    +
    +
    +	public UnionITCase(TestExecutionMode mode) {
    +		super(mode);
    +	}
    +
    +	private String resultPath;
    +	private String expected = "";
    +
    +	@Rule
    +	public TemporaryFolder tempFolder = new TemporaryFolder();
    +
    +	@Before
    +	public void before() throws Exception {
    +		resultPath = tempFolder.newFile().toURI().toString();
    +	}
    +
    +	@After
    +	public void after() throws Exception {
    +		compareResultsByLinesInMemory(expected, resultPath);
    +	}
    +
    +	@Test
    +	public void testJoin() throws Exception {
    +		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    +		TableEnvironment tableEnv = new TableEnvironment();
    +
    +		DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
    +		DataSet<Tuple3<Integer, Long, String>> ds2 = CollectionDataSets.getSmall3TupleDataSet(env);
    +
    +		Table in1 = tableEnv.fromDataSet(ds1, "a, b, c");
    +		Table in2 = tableEnv.fromDataSet(ds2, "a, b, c");
    +
    +		Table result = in1.unionAll(in2).select("c");
    +
    +		DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
    +		ds.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    +
    +		env.execute();
    +
    +		expected = "Hi\n" + "Hello\n" + "Hello world\n" + "Hi\n" + "Hello\n" + "Hello world\n";
    +	}
    +
    +	@Test
    +	public void testJoinWithFilter() throws Exception {
    +		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    +		TableEnvironment tableEnv = new TableEnvironment();
    +
    +		DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
    +		DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
    +
    +		Table in1 = tableEnv.fromDataSet(ds1, "a, b, c");
    +		Table in2 = tableEnv.fromDataSet(ds2, "a, b, d, c, e").select("a, b, c");
    +
    +		Table result = in1.unionAll(in2).where("b < 2").select("c");
    +
    +		DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
    +		ds.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    +
    +		env.execute();
    +
    +		expected = "Hi\n" + "Hallo\n";
    +	}
    +
    +	@Test(expected = ExpressionException.class)
    +	public void testUnionFieldsNameNotOverlap1() throws Exception {
    +		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    +		TableEnvironment tableEnv = new TableEnvironment();
    +
    +		DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
    +		DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
    +
    +		Table in1 = tableEnv.fromDataSet(ds1, "a, b, c");
    +		Table in2 = tableEnv.fromDataSet(ds2, "d, e, f, g, h");
    +
    +		Table result = in1.unionAll(in2);
    +
    +		DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
    +		ds.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    +
    +		env.execute();
    +
    +		expected = "";
    +	}
    +
    +	@Test(expected = InvalidProgramException.class)
    +	public void testUnionFieldsNameNotOverlap2() throws Exception {
    +		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    +		TableEnvironment tableEnv = new TableEnvironment();
    +
    +		DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
    +		DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
    +
    +		Table in1 = tableEnv.fromDataSet(ds1, "a, b, c");
    +		Table in2 = tableEnv.fromDataSet(ds2, "a, b, c, d, e").select("a, b, c");
    +
    +		Table result = in1.unionAll(in2);
    +
    +		DataSet<Row> ds = tableEnv.toDataSet(result, Row.class);
    +		ds.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
    +
    +		env.execute();
    +
    +		expected = "";
    +	}
    +
    +	@Test
    +	public void testJoinWithAggregation() throws Exception {
    --- End diff --
    
    Should be unionWithAggregation, I assume?


> Add Union operator to Table API.
> --------------------------------
>
>                 Key: FLINK-2951
>                 URL: https://issues.apache.org/jira/browse/FLINK-2951
>             Project: Flink
>          Issue Type: New Feature
>          Components: Documentation, Table API
>            Reporter: Chengxiang Li
>            Assignee: Chengxiang Li
>            Priority: Minor
>
> Currently, union operation is supported by DataSet/DataStream API, not available in Table
API. To union two tables, user has to transform the parameter input to DataSet/DataStream,
for example:
> {code:java}
> val unionDs = left.union(right.toDataSet[Row])
> {code}
> It should be more API friendly to user if add union operation to Table API, like:
> {code:java}
> val unionDs = left.union(right)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message