beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From al...@apache.org
Subject [beam] branch master updated: Moved application of lower bound from _get_split_keys to split to avoid source bundles of weight=0
Date Tue, 07 Jul 2020 01:18:18 GMT
This is an automated email from the ASF dual-hosted git repository.

altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 8de544a  Moved application of lower bound from _get_split_keys to split to avoid
source bundles of weight=0
     new 811d41f  Merge pull request #12142 from corvin-quodai/mdbio-zerodivision
8de544a is described below

commit 8de544a2669e5f19987a4b4b8a8213610d8a0dc5
Author: Corvin Deboeser <corvin@quod.ai>
AuthorDate: Wed Jul 1 08:28:18 2020 +0800

    Moved application of lower bound from _get_split_keys to split to avoid source bundles
of weight=0
---
 sdks/python/apache_beam/io/mongodbio.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sdks/python/apache_beam/io/mongodbio.py b/sdks/python/apache_beam/io/mongodbio.py
index 324f1e3..aed0ff2 100644
--- a/sdks/python/apache_beam/io/mongodbio.py
+++ b/sdks/python/apache_beam/io/mongodbio.py
@@ -171,6 +171,12 @@ class _BoundedMongoSource(iobase.BoundedSource):
         start_position, stop_position)
 
     desired_bundle_size_in_mb = desired_bundle_size // 1024 // 1024
+
+    # for desired bundle size, if desired chunk size smaller than 1mb, use
+    # mongodb default split size of 1mb.
+    if desired_bundle_size_in_mb < 1:
+      desired_bundle_size_in_mb = 1
+
     split_keys = self._get_split_keys(
         desired_bundle_size_in_mb, start_position, stop_position)
 
@@ -221,10 +227,6 @@ class _BoundedMongoSource(iobase.BoundedSource):
 
   def _get_split_keys(self, desired_chunk_size_in_mb, start_pos, end_pos):
     # calls mongodb splitVector command to get document ids at split position
-    # for desired bundle size, if desired chunk size smaller than 1mb, use
-    # mongodb default split size of 1mb.
-    if desired_chunk_size_in_mb < 1:
-      desired_chunk_size_in_mb = 1
     if start_pos >= end_pos:
       # single document not splittable
       return []


Mime
View raw message