beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pabl...@apache.org
Subject [beam] branch master updated: Update doc/examples: BigQuerySource to ReadFromBigQuery (#13239)
Date Mon, 11 Jan 2021 21:28:04 GMT
This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new dd71c9a  Update doc/examples: BigQuerySource to ReadFromBigQuery (#13239)
dd71c9a is described below

commit dd71c9ac472b62ff3598d3db47b0f26985bbc82a
Author: Xinbin Huang <bin.huangxb@gmail.com>
AuthorDate: Mon Jan 11 13:27:13 2021 -0800

    Update doc/examples: BigQuerySource to ReadFromBigQuery (#13239)
    
    * Update doc: BigQuerySource to ReadFromBigQuery
    
    * Resolve comment
    
    * Update ReadFromBigQuery in examples
    
    * Add deprecation notice
    
    * Use kwarg `table` to `ReadFromBigQuery`
    
    * fixup! Use kwarg `table` to `ReadFromBigQuery`
---
 .../apache_beam/examples/cookbook/bigquery_side_input.py      |  7 +++----
 sdks/python/apache_beam/examples/cookbook/filters.py          |  2 +-
 sdks/python/apache_beam/examples/snippets/snippets.py         | 10 +++++-----
 sdks/python/apache_beam/io/gcp/bigquery.py                    |  2 +-
 .../content/en/documentation/io/built-in/google-bigquery.md   | 11 ++++++++---
 5 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
index 1b909a3..28ab7d9 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_side_input.py
@@ -99,10 +99,9 @@ def run(argv=None):
     ignore_corpus = known_args.ignore_corpus
     ignore_word = known_args.ignore_word
 
-    pcoll_corpus = p | 'read corpus' >> beam.io.Read(
-        beam.io.BigQuerySource(query=query_corpus))
-    pcoll_word = p | 'read_words' >> beam.io.Read(
-        beam.io.BigQuerySource(query=query_word))
+    pcoll_corpus = p | 'read corpus' >> beam.io.ReadFromBigQuery(
+        query=query_corpus)
+    pcoll_word = p | 'read_words' >> beam.io.ReadFromBigQuery(query=query_word)
     pcoll_ignore_corpus = p | 'create_ignore_corpus' >> beam.Create(
         [ignore_corpus])
     pcoll_ignore_word = p | 'create_ignore_word' >> beam.Create([ignore_word])
diff --git a/sdks/python/apache_beam/examples/cookbook/filters.py b/sdks/python/apache_beam/examples/cookbook/filters.py
index 95f32f9..9381234 100644
--- a/sdks/python/apache_beam/examples/cookbook/filters.py
+++ b/sdks/python/apache_beam/examples/cookbook/filters.py
@@ -90,7 +90,7 @@ def run(argv=None):
 
   with beam.Pipeline(argv=pipeline_args) as p:
 
-    input_data = p | beam.io.Read(beam.io.BigQuerySource(known_args.input))
+    input_data = p | beam.io.ReadFromBigQuery(table=known_args.input)
 
     # pylint: disable=expression-not-assigned
     (
diff --git a/sdks/python/apache_beam/examples/snippets/snippets.py b/sdks/python/apache_beam/examples/snippets/snippets.py
index 42e5886..5155ee4 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets.py
@@ -1117,7 +1117,7 @@ def model_bigqueryio(p, write_project='', write_dataset='', write_table=''):
   # [START model_bigqueryio_read_table]
   max_temperatures = (
       p
-      | 'ReadTable' >> beam.io.Read(beam.io.BigQuerySource(table_spec))
+      | 'ReadTable' >> beam.io.ReadFromBigQuery(table=table_spec)
       # Each row is a dictionary where the keys are the BigQuery columns
       | beam.Map(lambda elem: elem['max_temperature']))
   # [END model_bigqueryio_read_table]
@@ -1125,9 +1125,9 @@ def model_bigqueryio(p, write_project='', write_dataset='', write_table=''):
   # [START model_bigqueryio_read_query]
   max_temperatures = (
       p
-      | 'QueryTable' >> beam.io.Read(beam.io.BigQuerySource(
+      | 'QueryTable' >> beam.io.ReadFromBigQuery(
           query='SELECT max_temperature FROM '\
-                '[clouddataflow-readonly:samples.weather_stations]'))
+                '[clouddataflow-readonly:samples.weather_stations]')
       # Each row is a dictionary where the keys are the BigQuery columns
       | beam.Map(lambda elem: elem['max_temperature']))
   # [END model_bigqueryio_read_query]
@@ -1135,10 +1135,10 @@ def model_bigqueryio(p, write_project='', write_dataset='', write_table=''):
   # [START model_bigqueryio_read_query_std_sql]
   max_temperatures = (
       p
-      | 'QueryTableStdSQL' >> beam.io.Read(beam.io.BigQuerySource(
+      | 'QueryTableStdSQL' >> beam.io.ReadFromBigQuery(
           query='SELECT max_temperature FROM '\
                 '`clouddataflow-readonly.samples.weather_stations`',
-          use_standard_sql=True))
+          use_standard_sql=True)
       # Each row is a dictionary where the keys are the BigQuery columns
       | beam.Map(lambda elem: elem['max_temperature']))
   # [END model_bigqueryio_read_query_std_sql]
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index a5ceb5b..2858b98 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -45,7 +45,7 @@ call *one* row of the main table and *all* rows of the side table. The runner
 may use some caching techniques to share the side inputs between calls in order
 to avoid excessive reading:::
 
-  main_table = pipeline | 'VeryBig' >> beam.io.ReadFroBigQuery(...)
+  main_table = pipeline | 'VeryBig' >> beam.io.ReadFromBigQuery(...)
   side_table = pipeline | 'NotBig' >> beam.io.ReadFromBigQuery(...)
   results = (
       main_table
diff --git a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
index 79f1ef7..9c87ec3d 100644
--- a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
+++ b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md
@@ -252,13 +252,18 @@ them into JSON `TableRow` objects.
 <!-- Python specific -->
 
 {{< paragraph class="language-py" >}}
-To read from a BigQuery table using the Beam SDK for Python, apply a `Read`
-transform on a `BigQuerySource`. Read returns a `PCollection` of dictionaries,
+To read from a BigQuery table using the Beam SDK for Python, apply a `ReadFromBigQuery`
+transfrom. `ReadFromBigQuery` returns a `PCollection` of dictionaries,
 where each element in the `PCollection` represents a single row in the table.
 Integer values in the `TableRow` objects are encoded as strings to match
 BigQuery's exported JSON format.
 {{< /paragraph >}}
 
+{{< paragraph class="language-py" >}}
+***Note:*** `BigQuerySource()` is deprecated as of Beam SDK 2.25.0. Before 2.25.0, to read
from
+a BigQuery table using the Beam SDK, you will apply a `Read` transform on a `BigQuerySource`.
For example,
+`beam.io.Read(beam.io.BigQuerySource(table_spec))`.
+{{< /paragraph >}}
 
 ### Reading from a table
 
@@ -293,7 +298,7 @@ the `fromQuery` method.
 
 {{< paragraph class="language-py" >}}
 If you don't want to read an entire table, you can supply a query string to
-`BigQuerySource` by specifying the `query` parameter.
+`ReadFromBigQuery` by specifying the `query` parameter.
 {{< /paragraph >}}
 
 {{< paragraph class="language-py" >}}


Mime
View raw message