flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-2785) Implement Graph's fromCsvReader in Gelly-Scala
Date Tue, 06 Oct 2015 16:23:26 GMT

    [ https://issues.apache.org/jira/browse/FLINK-2785?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14945294#comment-14945294
] 

ASF GitHub Bot commented on FLINK-2785:
---------------------------------------

Github user fhueske commented on a diff in the pull request:

    https://github.com/apache/flink/pull/1205#discussion_r41287065
  
    --- Diff: flink-staging/flink-gelly-scala/src/main/scala/org/apache/flink/graph/scala/Graph.scala
---
    @@ -126,6 +126,122 @@ object Graph {
         wrapGraph(jg.Graph.fromTupleDataSet[K, VV, EV](javaTupleEdges, mapper, env.getJavaEnv))
       }
     
    +  /**
    +  * Creates a Graph with from a CSV file of vertices and a CSV file of edges
    +  * 
    +  * @param pathVertices The file path containing the vertices.
    +  * @param vertexValue Defines whether the vertices have associated values.
    +  * If set to false, the vertex input is ignored and vertices are created from the edges
file.
    +  * True by default.
    +  * @param lineDelimiterVertices The string that separates lines in the vertices file.
    +  * It defaults to newline.
    +  * @param fieldDelimiterVertices The string that separates vertex Ids from vertex values
    +  * in the vertices file.
    +  * @param quoteCharacterVertices The character to use for quoted String parsing
    +  * in the vertices file. Disabled by default.
    +  * @param ignoreFirstLineVertices Whether the first line in the vertices file should
be ignored.
    +  * @param ignoreCommentsVertices Lines that start with the given String in the vertices
file
    +  * are ignored, disabled by default.
    +  * @param lenientVertices Whether the parser should silently ignore malformed lines
in the
    +  * vertices file.
    +  * @param includedFieldsVertices The fields in the vertices file that should be read.
    +  * By default all fields are read.
    +  * @param pathEdges The file path containing the edges.
    +  * @param edgeValue Defines whether the edges have associated values. True by default.
    +  * @param lineDelimiterEdges The string that separates lines in the edges file.
    +  * It defaults to newline.
    +  * @param fieldDelimiterEdges The string that separates fileds in the edges file.
    +  * @param quoteCharacterEdges The character to use for quoted String parsing
    +  * in the edges file. Disabled by default.
    +  * @param ignoreFirstLineEdges Whether the first line in the vertices file should be
ignored.
    +  * @param ignoreCommentsEdges Lines that start with the given String in the edges file
    +  * are ignored, disabled by default.
    +  * @param lenientEdges Whether the parser should silently ignore malformed lines in
the
    +  * edges file.
    +  * @param includedFieldsEdges The fields in the edges file that should be read.
    +  * By default all fields are read.
    +  * 
    +  */
    +  // scalastyle:off
    +  // This method exceeds the max allowed number of parameters -->  
    +  def fromCsvReader[K: TypeInformation : ClassTag, VV: TypeInformation : ClassTag,
    +    EV: TypeInformation : ClassTag](
    +      pathVertices: String = null,
    +      vertexValue: Boolean = true,
    +      lineDelimiterVertices: String = "\n",
    +      fieldDelimiterVertices: String = ",",
    +      quoteCharacterVertices: Character = null,
    +      ignoreFirstLineVertices: Boolean = false,
    +      ignoreCommentsVertices: String = null,
    +      lenientVertices: Boolean = false,
    +      includedFieldsVertices: Array[Int] = null,
    +      pathEdges: String,
    +      edgeValue: Boolean = true,
    +      lineDelimiterEdges: String = "\n",
    +      fieldDelimiterEdges: String = ",",
    +      quoteCharacterEdges: Character = null,
    +      ignoreFirstLineEdges: Boolean = false,
    +      ignoreCommentsEdges: String = null,
    +      lenientEdges: Boolean = false,
    +      includedFieldsEdges: Array[Int] = null,
    +      mapper: MapFunction[K, VV] = null,
    +      env: ExecutionEnvironment) = {
    +
    +    // with vertex and edge values
    +    if (vertexValue && edgeValue) {
    +      val vertices = env.readCsvFile[(K, VV)](pathVertices, lineDelimiterVertices,
    +        fieldDelimiterVertices, quoteCharacterVertices, ignoreFirstLineVertices,
    +        ignoreCommentsVertices, lenientVertices, includedFieldsVertices)
    +
    +      val edges = env.readCsvFile[(K, K, EV)](pathEdges, lineDelimiterEdges, fieldDelimiterEdges,
    +        quoteCharacterEdges, ignoreFirstLineEdges, ignoreCommentsEdges, lenientEdges,
    +        includedFieldsEdges)
    +     
    +      fromTupleDataSet[K, VV, EV](vertices, edges, env)
    +    }
    +    // with vertex value and no edge value
    +    else if (vertexValue && (!edgeValue)) {
    +      val vertices = env.readCsvFile[(K, VV)](pathVertices, lineDelimiterVertices,
    --- End diff --
    
    check if `pathVertices` is correctly set.


> Implement Graph's fromCsvReader in Gelly-Scala
> ----------------------------------------------
>
>                 Key: FLINK-2785
>                 URL: https://issues.apache.org/jira/browse/FLINK-2785
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Gelly
>            Reporter: Vasia Kalavri
>            Assignee: Vasia Kalavri
>            Priority: Minor
>
> Graphs's {{fromCsvReader}} method is currently missing from the Gelly Scala API. It cannot
be implemented as a simple wrapper over the Java method, because the Java method returns a
{{GraphCsvReader}}, which in return creates a Graph after specifying types with appropriate
methods. The Scala version of the method can be more nicely implemented using the {{ScalaCsvInputFormat}}.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message