beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Work logged] (BEAM-5270) Finish Python 3 porting for coders module
Date Fri, 07 Sep 2018 01:45:00 GMT

     [ https://issues.apache.org/jira/browse/BEAM-5270?focusedWorklogId=141999&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-141999
]

ASF GitHub Bot logged work on BEAM-5270:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 07/Sep/18 01:44
            Start Date: 07/Sep/18 01:44
    Worklog Time Spent: 10m 
      Work Description: aaltay closed pull request #6310: [BEAM-5270] Finish Python 3 porting
for coders subpackage
URL: https://github.com/apache/beam/pull/6310
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py
index cf4b9b5d520..ad4edbbb374 100644
--- a/sdks/python/apache_beam/coders/coders.py
+++ b/sdks/python/apache_beam/coders/coders.py
@@ -62,12 +62,13 @@
 
 def serialize_coder(coder):
   from apache_beam.internal import pickler
-  return '%s$%s' % (coder.__class__.__name__, pickler.dumps(coder))
+  return b'%s$%s' % (coder.__class__.__name__.encode('utf-8'),
+                     pickler.dumps(coder))
 
 
 def deserialize_coder(serialized):
   from apache_beam.internal import pickler
-  return pickler.loads(serialized.split('$', 1)[1])
+  return pickler.loads(serialized.split(b'$', 1)[1])
 # pylint: enable=wrong-import-order, wrong-import-position
 
 
diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py
index 0b8b4c20fde..969c1de10c4 100644
--- a/sdks/python/apache_beam/coders/coders_test_common.py
+++ b/sdks/python/apache_beam/coders/coders_test_common.py
@@ -20,6 +20,7 @@
 
 import logging
 import math
+import sys
 import unittest
 from builtins import range
 
@@ -41,7 +42,7 @@
 class CustomCoder(coders.Coder):
 
   def encode(self, x):
-    return str(x+1)
+    return str(x+1).encode('utf-8')
 
   def decode(self, encoded):
     return int(encoded) - 1
@@ -56,6 +57,9 @@ class CodersTest(unittest.TestCase):
   def setUpClass(cls):
     cls.seen = set()
     cls.seen_nested = set()
+    # Method has been renamed in Python 3
+    if sys.version_info[0] < 3:
+      cls.assertCountEqual = cls.assertItemsEqual
 
   @classmethod
   def tearDownClass(cls):
@@ -272,7 +276,7 @@ def iter_generator(count):
         yield i
 
     iterable_coder = coders.IterableCoder(coders.VarIntCoder())
-    self.assertItemsEqual(list(iter_generator(count)),
+    self.assertCountEqual(list(iter_generator(count)),
                           iterable_coder.decode(
                               iterable_coder.encode(iter_generator(count))))
 
@@ -374,8 +378,8 @@ def test_global_window_coder(self):
     self.assertEqual({'@type': 'kind:global_window'},
                      coder.as_cloud_object())
     # Test binary representation
-    self.assertEqual('', coder.encode(value))
-    self.assertEqual(value, coder.decode(''))
+    self.assertEqual(b'', coder.encode(value))
+    self.assertEqual(value, coder.decode(b''))
     # Test unnested
     self.check_coder(coder, value)
     # Test nested
diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py
index da27a49883a..4bdece6072b 100644
--- a/sdks/python/apache_beam/coders/slow_stream.py
+++ b/sdks/python/apache_beam/coders/slow_stream.py
@@ -22,6 +22,7 @@
 from __future__ import absolute_import
 
 import struct
+import sys
 from builtins import chr
 from builtins import object
 
@@ -70,7 +71,7 @@ def write_bigendian_double(self, v):
     self.write(struct.pack('>d', v))
 
   def get(self):
-    return ''.join(self.data)
+    return b''.join(self.data)
 
   def size(self):
     return len(self.data)
@@ -114,6 +115,19 @@ def __init__(self, data):
     self.data = data
     self.pos = 0
 
+    # The behavior of looping over a byte-string and obtaining byte characters
+    # has been changed between python 2 and 3.
+    # b = b'\xff\x01'
+    # Python 2:
+    # b[0] = '\xff'
+    # ord(b[0]) = 255
+    # Python 3:
+    # b[0] = 255
+    if sys.version_info[0] >= 3:
+      self.read_byte = self.read_byte_py3
+    else:
+      self.read_byte = self.read_byte_py2
+
   def size(self):
     return len(self.data) - self.pos
 
@@ -124,10 +138,14 @@ def read(self, size):
   def read_all(self, nested):
     return self.read(self.read_var_int64() if nested else self.size())
 
-  def read_byte(self):
+  def read_byte_py2(self):
     self.pos += 1
     return ord(self.data[self.pos - 1])
 
+  def read_byte_py3(self):
+    self.pos += 1
+    return self.data[self.pos - 1]
+
   def read_var_int64(self):
     shift = 0
     result = 0
diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py
index f704c490c97..031406f40ac 100644
--- a/sdks/python/apache_beam/coders/standard_coders_test.py
+++ b/sdks/python/apache_beam/coders/standard_coders_test.py
@@ -67,7 +67,7 @@ class StandardCodersTest(unittest.TestCase):
   }
 
   _urn_to_json_value_parser = {
-      'beam:coder:bytes:v1': lambda x: x,
+      'beam:coder:bytes:v1': lambda x: x.encode('utf-8'),
       'beam:coder:varint:v1': lambda x: x,
       'beam:coder:kv:v1':
           lambda x, key_parser, value_parser: (key_parser(x['key']),
diff --git a/sdks/python/apache_beam/coders/stream_test.py b/sdks/python/apache_beam/coders/stream_test.py
index 641fefad45d..ad046fb5598 100644
--- a/sdks/python/apache_beam/coders/stream_test.py
+++ b/sdks/python/apache_beam/coders/stream_test.py
@@ -36,20 +36,20 @@ class StreamTest(unittest.TestCase):
 
   def test_read_write(self):
     out_s = self.OutputStream()
-    out_s.write('abc')
-    out_s.write('\0\t\n')
-    out_s.write('xyz', True)
-    out_s.write('', True)
+    out_s.write(b'abc')
+    out_s.write(b'\0\t\n')
+    out_s.write(b'xyz', True)
+    out_s.write(b'', True)
     in_s = self.InputStream(out_s.get())
-    self.assertEquals('abc\0\t\n', in_s.read(6))
-    self.assertEquals('xyz', in_s.read_all(True))
-    self.assertEquals('', in_s.read_all(True))
+    self.assertEquals(b'abc\0\t\n', in_s.read(6))
+    self.assertEquals(b'xyz', in_s.read_all(True))
+    self.assertEquals(b'', in_s.read_all(True))
 
   def test_read_all(self):
     out_s = self.OutputStream()
-    out_s.write('abc')
+    out_s.write(b'abc')
     in_s = self.InputStream(out_s.get())
-    self.assertEquals('abc', in_s.read_all(False))
+    self.assertEquals(b'abc', in_s.read_all(False))
 
   def test_read_write_byte(self):
     out_s = self.OutputStream()
@@ -129,15 +129,15 @@ def test_read_write_bigendian_int32(self):
   def test_byte_counting(self):
     bc_s = self.ByteCountingOutputStream()
     self.assertEquals(0, bc_s.get_count())
-    bc_s.write('def')
+    bc_s.write(b'def')
     self.assertEquals(3, bc_s.get_count())
-    bc_s.write('')
+    bc_s.write(b'')
     self.assertEquals(3, bc_s.get_count())
     bc_s.write_byte(10)
     self.assertEquals(4, bc_s.get_count())
     # "nested" also writes the length of the string, which should
     # cause 1 extra byte to be counted.
-    bc_s.write('2345', nested=True)
+    bc_s.write(b'2345', nested=True)
     self.assertEquals(9, bc_s.get_count())
     bc_s.write_var_int64(63)
     self.assertEquals(10, bc_s.get_count())
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index f425ba7673b..b12e35d2e67 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -56,11 +56,13 @@ commands =
 [testenv:py3]
 setenv =
   BEAM_EXPERIMENTAL_PY3=1
+modules =
+  apache_beam.coders,apache_beam.tools
 commands =
   python --version
   pip --version
   {toxinidir}/scripts/run_tox_cleanup.sh
-  python setup.py nosetests --tests apache_beam.runners.direct.direct_metrics_test:DirectMetricsTest.test_combiner_functions
+  python setup.py nosetests --tests {[testenv:py3]modules}
   {toxinidir}/scripts/run_tox_cleanup.sh
 
 [testenv:py27-cython]


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 141999)
    Time Spent: 2.5h  (was: 2h 20m)

> Finish Python 3 porting for coders module
> -----------------------------------------
>
>                 Key: BEAM-5270
>                 URL: https://issues.apache.org/jira/browse/BEAM-5270
>             Project: Beam
>          Issue Type: Sub-task
>          Components: sdk-py-core
>            Reporter: Robbe
>            Assignee: Robbe
>            Priority: Major
>          Time Spent: 2.5h
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message