kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dral...@apache.org
Subject [2/2] kudu git commit: KUDU-721: [Python] Add DECIMAL column type support
Date Wed, 07 Mar 2018 19:48:18 GMT
KUDU-721: [Python] Add DECIMAL column type support

This patch adds basic support to the Python client to
create, read, and write tables with DECIMAL columns.

Change-Id: I8e0855100ab1ea891f990931ec94d0b98c0dece1
Reviewed-on: http://gerrit.cloudera.org:8080/9496
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <davidralves@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e6aedc99
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e6aedc99
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e6aedc99

Branch: refs/heads/master
Commit: e6aedc99d25ba8bc8f1fc37259404d55c802953f
Parents: 2b68040
Author: Grant Henke <granthenke@gmail.com>
Authored: Mon Mar 5 14:23:08 2018 -0600
Committer: David Ribeiro Alves <davidralves@gmail.com>
Committed: Wed Mar 7 19:46:57 2018 +0000

----------------------------------------------------------------------
 python/kudu/__init__.py             |   2 +-
 python/kudu/client.pyx              |  20 +++++-
 python/kudu/libkudu_client.pxd      |  36 ++++++++++-
 python/kudu/schema.pxd              |   8 +++
 python/kudu/schema.pyx              | 106 +++++++++++++++++++++++++++++--
 python/kudu/tests/test_scanner.py   |   4 ++
 python/kudu/tests/test_scantoken.py |   4 ++
 python/kudu/tests/test_schema.py    |  40 ++++++++++++
 python/kudu/tests/test_util.py      |  80 +++++++++++++++++++++++
 python/kudu/tests/util.py           |  14 +++-
 python/kudu/util.py                 |  65 +++++++++++++++++++
 11 files changed, 368 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/__init__.py
----------------------------------------------------------------------
diff --git a/python/kudu/__init__.py b/python/kudu/__init__.py
index 8f0c8d9..75260d6 100644
--- a/python/kudu/__init__.py
+++ b/python/kudu/__init__.py
@@ -36,7 +36,7 @@ from kudu.errors import (KuduException, KuduBadStatus, KuduNotFound,  #
noqa
 
 from kudu.schema import (int8, int16, int32, int64, string_ as string,  # noqa
                          double_ as double, float_, float_ as float, binary,
-                         unixtime_micros, bool_ as bool,
+                         unixtime_micros, bool_ as bool, decimal,
                          KuduType,
                          SchemaBuilder, ColumnSpec, Schema, ColumnSchema,
                          COMPRESSION_DEFAULT,

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/client.pyx
----------------------------------------------------------------------
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 0b28856..34cb53a 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -29,7 +29,8 @@ from libkudu_client cimport *
 from kudu.compat import tobytes, frombytes, dict_iter
 from kudu.schema cimport Schema, ColumnSchema, ColumnSpec, KuduValue, KuduType
 from kudu.errors cimport check_status
-from kudu.util import to_unixtime_micros, from_unixtime_micros, from_hybridtime
+from kudu.util import to_unixtime_micros, from_unixtime_micros, \
+    from_hybridtime, to_unscaled_decimal, from_unscaled_decimal
 from errors import KuduException
 
 import six
@@ -64,7 +65,8 @@ cdef dict _type_names = {
     KUDU_FLOAT : "KUDU_FLOAT",
     KUDU_DOUBLE : "KUDU_DOUBLE",
     KUDU_BINARY : "KUDU_BINARY",
-    KUDU_UNIXTIME_MICROS : "KUDU_UNIXTIME_MICROS"
+    KUDU_UNIXTIME_MICROS : "KUDU_UNIXTIME_MICROS",
+    KUDU_DECIMAL : "KUDU_DECIMAL"
 }
 
 # Range Partition Bound Type enums
@@ -1314,6 +1316,15 @@ cdef class Row:
         check_status(self.row.GetUnixTimeMicros(i, &val))
         return val
 
+    cdef inline __get_unscaled_decimal(self, int i):
+        cdef int128_t val
+        check_status(self.row.GetUnscaledDecimal(i, &val))
+        return val
+
+    cdef inline get_decimal(self, int i):
+        scale = self.parent.batch.projection_schema().Column(i).type_attributes().scale()
+        return from_unscaled_decimal(self.__get_unscaled_decimal(i), scale)
+
     cdef inline get_slot(self, int i):
         cdef:
             Status s
@@ -1339,6 +1350,8 @@ cdef class Row:
             return self.get_binary(i)
         elif t == KUDU_UNIXTIME_MICROS:
             return from_unixtime_micros(self.get_unixtime_micros(i))
+        elif t == KUDU_DECIMAL:
+            return self.get_decimal(i)
         else:
             raise TypeError("Cannot get kudu type <{0}>"
                                 .format(_type_names[t]))
@@ -2451,6 +2464,9 @@ cdef class PartialRow:
         elif t == KUDU_UNIXTIME_MICROS:
             check_status(self.row.SetUnixTimeMicros(i, <int64_t>
                 to_unixtime_micros(value)))
+        elif t == KUDU_DECIMAL:
+            check_status(self.row.SetUnscaledDecimal(i, <int128_t>
+                to_unscaled_decimal(value)))
         else:
             raise TypeError("Cannot set kudu type <{0}>.".format(_type_names[t]))
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/libkudu_client.pxd
----------------------------------------------------------------------
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index 9d1dfb7..b834bf0 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -62,6 +62,8 @@ cdef extern from "kudu/util/status.h" namespace "kudu" nogil:
         c_bool IsNotAuthorized()
         c_bool IsAborted()
 
+cdef extern from "kudu/util/int128.h" namespace "kudu":
+    ctypedef int int128_t
 
 cdef extern from "kudu/util/monotime.h" namespace "kudu" nogil:
 
@@ -119,6 +121,7 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil:
         KUDU_DOUBLE " kudu::client::KuduColumnSchema::DOUBLE"
         KUDU_BINARY " kudu::client::KuduColumnSchema::BINARY"
         KUDU_UNIXTIME_MICROS " kudu::client::KuduColumnSchema::UNIXTIME_MICROS"
+        KUDU_DECIMAL " kudu::client::KuduColumnSchema::DECIMAL"
 
     enum EncodingType" kudu::client::KuduColumnStorageAttributes::EncodingType":
         EncodingType_AUTO " kudu::client::KuduColumnStorageAttributes::AUTO_ENCODING"
@@ -142,6 +145,17 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil:
         CompressionType compression
         string ToString()
 
+    cdef cppclass KuduColumnTypeAttributes:
+        KuduColumnTypeAttributes()
+        KuduColumnTypeAttributes(const KuduColumnTypeAttributes& other)
+        KuduColumnTypeAttributes(int8_t precision, int8_t scale)
+
+        int8_t precision()
+        int8_t scale()
+
+        c_bool Equals(KuduColumnTypeAttributes& other)
+        void CopyFrom(KuduColumnTypeAttributes& other)
+
     cdef cppclass KuduColumnSchema:
         KuduColumnSchema(const KuduColumnSchema& other)
         KuduColumnSchema(const string& name, DataType type)
@@ -152,6 +166,7 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil:
         string& name()
         c_bool is_nullable()
         DataType type()
+        KuduColumnTypeAttributes type_attributes()
 
         c_bool Equals(KuduColumnSchema& other)
         void CopyFrom(KuduColumnSchema& other)
@@ -183,6 +198,9 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil:
          KuduColumnSpec* Nullable()
          KuduColumnSpec* Type(DataType type_)
 
+         KuduColumnSpec* Precision(int8_t precision);
+         KuduColumnSpec* Scale(int8_t scale);
+
          KuduColumnSpec* RenameTo(const string& new_name)
 
 
@@ -227,8 +245,6 @@ cdef extern from "kudu/client/scan_batch.h" namespace "kudu::client" nogil:
         Status GetUnixTimeMicros(int col_idx,
                             int64_t* micros_since_utc_epoch)
 
-        Status GetString(Slice& col_name, Slice* val)
-        Status GetString(int col_idx, Slice* val)
 
         Status GetFloat(Slice& col_name, float* val)
         Status GetFloat(int col_idx, float* val)
@@ -236,6 +252,12 @@ cdef extern from "kudu/client/scan_batch.h" namespace "kudu::client"
nogil:
         Status GetDouble(Slice& col_name, double* val)
         Status GetDouble(int col_idx, double* val)
 
+        Status GetUnscaledDecimal(Slice& col_name, int128_t* val)
+        Status GetUnscaledDecimal(int col_idx, int128_t* val)
+
+        Status GetString(Slice& col_name, Slice* val)
+        Status GetString(int col_idx, Slice* val)
+
         Status GetBinary(const Slice& col_name, Slice* val)
         Status GetBinary(int col_idx, Slice* val)
 
@@ -306,6 +328,8 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil:
         Status SetDouble(Slice& col_name, double val)
         Status SetFloat(Slice& col_name, float val)
 
+        Status SetUnscaledDecimal(const Slice& col_name, int128_t val)
+
         # Integer setters
         Status SetBool(int col_idx, c_bool val)
 
@@ -317,6 +341,8 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil:
         Status SetDouble(int col_idx, double val)
         Status SetFloat(int col_idx, float val)
 
+        Status SetUnscaledDecimal(int col_idx, int128_t val)
+
         # Set, but does not copy string
         Status SetString(Slice& col_name, Slice& val)
         Status SetString(int col_idx, Slice& val)
@@ -370,6 +396,9 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil:
         Status GetFloat(Slice& col_name, float* val)
         Status GetFloat(int col_idx, float* val)
 
+        Status GetUnscaledDecimal(Slice& col_name, int128_t* val)
+        Status GetUnscaledDecimal(int col_idx, int128_t* val)
+
         # Gets the string but does not copy the value. Callers should
         # copy the resulting Slice if necessary.
         Status GetString(Slice& col_name, Slice* val)
@@ -451,6 +480,9 @@ cdef extern from "kudu/client/value.h" namespace "kudu::client" nogil:
         C_KuduValue* FromBool(c_bool val);
 
         @staticmethod
+        C_KuduValue* FromDecimal(int128_t val, int8_t scale);
+
+        @staticmethod
         C_KuduValue* CopyString(const Slice& s);
 
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/schema.pxd
----------------------------------------------------------------------
diff --git a/python/kudu/schema.pxd b/python/kudu/schema.pxd
index c1cfc2e..56a4068 100644
--- a/python/kudu/schema.pxd
+++ b/python/kudu/schema.pxd
@@ -26,6 +26,14 @@ cdef class KuduType(object):
         DataType type
 
 
+cdef class ColumnTypeAttributes:
+    """
+    Wraps a Kudu client ColumnTypeAttributes object
+    """
+    cdef:
+        KuduColumnTypeAttributes* type_attributes
+
+
 cdef class ColumnSchema:
     """
     Wraps a Kudu client ColumnSchema object

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/schema.pyx
----------------------------------------------------------------------
diff --git a/python/kudu/schema.pyx b/python/kudu/schema.pyx
index 558fcf3..ed17378 100644
--- a/python/kudu/schema.pyx
+++ b/python/kudu/schema.pyx
@@ -24,7 +24,7 @@ from kudu.compat import tobytes, frombytes
 from kudu.schema cimport *
 from kudu.errors cimport check_status
 from kudu.client cimport PartialRow
-from kudu.util import to_unixtime_micros
+from kudu.util import get_decimal_scale, to_unixtime_micros, to_unscaled_decimal
 
 import six
 
@@ -44,6 +44,7 @@ DOUBLE = KUDU_DOUBLE
 UNIXTIME_MICROS = KUDU_UNIXTIME_MICROS
 BINARY = KUDU_BINARY
 
+DECIMAL = KUDU_DECIMAL
 
 cdef dict _reverse_dict(d):
     return dict((v, k) for k, v in d.items())
@@ -118,6 +119,7 @@ float_ = KuduType(KUDU_FLOAT)
 double_ = KuduType(KUDU_DOUBLE)
 binary = KuduType(KUDU_BINARY)
 unixtime_micros = KuduType(KUDU_UNIXTIME_MICROS)
+decimal = KuduType(KUDU_DECIMAL)
 
 
 cdef dict _type_names = {
@@ -130,7 +132,8 @@ cdef dict _type_names = {
     FLOAT: 'float',
     DOUBLE: 'double',
     BINARY: 'binary',
-    UNIXTIME_MICROS: 'unixtime_micros'
+    UNIXTIME_MICROS: 'unixtime_micros',
+    DECIMAL: 'decimal'
 }
 
 
@@ -146,7 +149,8 @@ cdef dict _type_to_obj = {
     FLOAT: float_,
     DOUBLE: double_,
     BINARY: binary,
-    UNIXTIME_MICROS: unixtime_micros
+    UNIXTIME_MICROS: unixtime_micros,
+    DECIMAL: decimal
 }
 
 
@@ -160,6 +164,40 @@ cdef KuduType to_data_type(object obj):
     else:
         raise ValueError('Invalid type: {0}'.format(obj))
 
+cdef cppclass KuduColumnTypeAttributes:
+        KuduColumnTypeAttributes()
+        KuduColumnTypeAttributes(const KuduColumnTypeAttributes& other)
+        KuduColumnTypeAttributes(int8_t precision, int8_t scale)
+
+        int8_t precision()
+        int8_t scale()
+
+        c_bool Equals(KuduColumnTypeAttributes& other)
+        void CopyFrom(KuduColumnTypeAttributes& other)
+
+cdef class ColumnTypeAttributes:
+    """
+    Wraps a Kudu client ColumnTypeAttributes object.
+    """
+    def __cinit__(self):
+        self.type_attributes = NULL
+
+    def __dealloc__(self):
+        if self.type_attributes is not NULL:
+            del self.type_attributes
+
+    property precision:
+        def __get__(self):
+            return self.type_attributes.precision()
+
+    property scale:
+        def __get__(self):
+            return self.type_attributes.scale()
+
+    def __repr__(self):
+        return ('ColumnTypeAttributes(precision=%s, scale=%s)'
+                % (self.type_attributes.precision(),
+                   self.type_attributes.scale()))
 
 cdef class ColumnSchema:
     """
@@ -189,6 +227,12 @@ cdef class ColumnSchema:
         def __get__(self):
             return self.schema.is_nullable()
 
+    property type_attributes:
+        def __get__(self):
+            cdef ColumnTypeAttributes result = ColumnTypeAttributes()
+            result.type_attributes = new KuduColumnTypeAttributes(self.schema.type_attributes())
+            return result
+
     def equals(self, other):
         if not isinstance(other, ColumnSchema):
             return False
@@ -295,6 +339,46 @@ cdef class ColumnSpec:
         self.spec.Encoding(type)
         return self
 
+    def precision(self, precision):
+        """
+        Set the precision for the column.
+
+        Clients must specify a precision for decimal columns. Precision is the total
+        number of digits that can be represented by the column, regardless of the
+        location of the decimal point. For example, representing integer values up to 9999,
+        and fractional values up to 99.99, both require a precision of 4. You can also
+        represent corresponding negative values, without any change in the precision.
+        For example, the range -9999 to 9999 still only requires a precision of 4.
+
+        The precision must be between 1 and 38.
+
+        Returns
+        -------
+        self
+        """
+        self.spec.Precision(precision)
+        return self
+
+    def scale(self, scale):
+        """
+        Set the scale for the column.
+
+        Clients can specify a scale for decimal columns. Scale represents the number
+        of fractional digits. This value must be less than or equal to precision.
+        A scale of 0 produces integral values, with no fractional part. If precision
+        and scale are equal, all the digits come after the decimal point, making all
+        the values between 0.9999 and -0.9999.
+
+        The scale must be greater than 0 and less than the column's precision.
+        If no scale is provided a default scale of 0 is used.
+
+        Returns
+        -------
+        self
+        """
+        self.spec.Scale(scale)
+        return self
+
     def primary_key(self):
         """
         Make this column a primary key. If you use this method, it will be the
@@ -386,7 +470,7 @@ cdef class SchemaBuilder:
 
     def add_column(self, name, type_=None, nullable=None, compression=None,
                    encoding=None, primary_key=False, block_size=None,
-                   default= None):
+                   default=None, precision=None, scale=None):
         """
         Add a new column to the schema. Returns a ColumnSpec object for further
         configuration and use in a fluid programming style.
@@ -411,6 +495,10 @@ cdef class SchemaBuilder:
           Block size (in bytes) to use for the target column.
         default : obj
           Use this to set the column default value
+        precision : int
+          Use this precision for the decimal column
+        scale : int
+          Use this scale for the decimal column
 
         Examples
         --------
@@ -440,6 +528,12 @@ cdef class SchemaBuilder:
         if encoding is not None:
             result.encoding(encoding)
 
+        if precision is not None:
+            result.precision(precision)
+
+        if scale is not None:
+            result.scale(scale)
+
         if primary_key:
             result.primary_key()
 
@@ -667,6 +761,10 @@ cdef class KuduValue:
         elif (type_.name == 'unixtime_micros'):
             value = to_unixtime_micros(value)
             self._value = C_KuduValue.FromInt(value)
+        elif (type_.name == 'decimal'):
+            scale = get_decimal_scale(value)
+            value = to_unscaled_decimal(value)
+            self._value = C_KuduValue.FromDecimal(value, scale)
         else:
             raise TypeError("Cannot initialize KuduValue for kudu type <{0}>"
                             .format(type_.name))

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/tests/test_scanner.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py
index 2db0411..fa94ea3 100644
--- a/python/kudu/tests/test_scanner.py
+++ b/python/kudu/tests/test_scanner.py
@@ -260,6 +260,10 @@ class TestScanner(TestScanBase):
         # Does a row check count only
         self._test_float_pred()
 
+    def test_decimal_pred(self):
+        # Test a decimal predicate
+        self._test_decimal_pred()
+
     def test_binary_pred(self):
         # Test a binary predicate
         self._test_binary_pred()

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/tests/test_scantoken.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_scantoken.py b/python/kudu/tests/test_scantoken.py
index c675a92..c274668 100644
--- a/python/kudu/tests/test_scantoken.py
+++ b/python/kudu/tests/test_scantoken.py
@@ -242,6 +242,10 @@ class TestScanToken(TestScanBase):
         # Test unixtime_micros value predicate
         self._test_unixtime_micros_pred()
 
+    def test_decimal_pred(self):
+        # Test decimal value predicate
+        self._test_decimal_pred()
+
     def test_bool_pred(self):
         # Test a boolean value predicate
         self._test_bool_pred()

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_schema.py b/python/kudu/tests/test_schema.py
index ee89452..4870ab7 100644
--- a/python/kudu/tests/test_schema.py
+++ b/python/kudu/tests/test_schema.py
@@ -142,6 +142,46 @@ class TestSchema(unittest.TestCase):
         # TODO(wesm): The C++ client does not give us an API to see the storage
         # attributes of a column
 
+    def test_decimal(self):
+        builder = kudu.schema_builder()
+        (builder.add_column('key')
+         .type('decimal')
+         .primary_key()
+         .nullable(False)
+         .precision(9)
+         .scale(2))
+        schema = builder.build()
+
+        column = schema[0]
+        tp = column.type
+        assert tp.name == 'decimal'
+        assert tp.type == kudu.schema.DECIMAL
+        ta = column.type_attributes
+        assert ta.precision == 9
+        assert ta.scale == 2
+
+    def test_decimal_without_precision(self):
+        builder = kudu.schema_builder()
+        (builder.add_column('key')
+         .type('decimal')
+         .primary_key()
+         .nullable(False))
+
+        with self.assertRaises(kudu.KuduInvalidArgument):
+            builder.build()
+
+    def test_precision_on_non_decimal_column(self):
+        builder = kudu.schema_builder()
+        (builder.add_column('key')
+         .type('int32')
+         .primary_key()
+         .nullable(False)
+         .precision(9)
+         .scale(2))
+
+        with self.assertRaises(kudu.KuduInvalidArgument):
+            builder.build()
+
     def test_unsupported_col_spec_methods_for_create_table(self):
         builder = kudu.schema_builder()
         builder.add_column('test', 'int64').rename('test')

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/tests/test_util.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_util.py b/python/kudu/tests/test_util.py
new file mode 100644
index 0000000..0d649fc
--- /dev/null
+++ b/python/kudu/tests/test_util.py
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import division
+
+from kudu.compat import unittest
+from kudu.util import *
+
+
+class TestUtil(unittest.TestCase):
+
+    def setUp(self):
+        context = getcontext()
+        # By default Decimal objects in Python use a precision of 28
+        # Kudu can support up to 38. We support passing this context
+        # on a per call basis if the user wants to adjust this value.
+        context.prec = 38
+
+    def test_to_unscaled_decimal(self):
+        self.assertEqual(0, to_unscaled_decimal(Decimal('0')))
+        self.assertEqual(12345, to_unscaled_decimal(Decimal('123.45')))
+        self.assertEqual(-12345, to_unscaled_decimal(Decimal('-123.45')))
+        self.assertEqual(12345, to_unscaled_decimal(Decimal('12345')))
+        self.assertEqual(10000, to_unscaled_decimal(Decimal('10000')))
+        self.assertEqual(10000, to_unscaled_decimal(Decimal('0.10000')))
+        self.assertEqual(1, to_unscaled_decimal(Decimal('1')))
+        self.assertEqual(1, to_unscaled_decimal(Decimal('.1')))
+        self.assertEqual(1, to_unscaled_decimal(Decimal('0.1')))
+        self.assertEqual(1, to_unscaled_decimal(Decimal('0.01')))
+        self.assertEqual(999999999, to_unscaled_decimal(Decimal('0.999999999')))
+        self.assertEqual(999999999999999999, to_unscaled_decimal(Decimal('0.999999999999999999')))
+        self.assertEqual(99999999999999999999999999999999999999,
+               to_unscaled_decimal(Decimal('0.99999999999999999999999999999999999999')))
+
+    def test_from_unscaled_decimal(self):
+        self.assertEqual(0, from_unscaled_decimal(0, 0))
+        self.assertEqual(Decimal('123.45'), from_unscaled_decimal(12345, 2))
+        self.assertEqual(Decimal('-123.45'), from_unscaled_decimal(-12345, 2))
+        self.assertEqual(Decimal('12345'), from_unscaled_decimal(12345, 0))
+        self.assertEqual(Decimal('10000'), from_unscaled_decimal(10000, 0))
+        self.assertEqual(Decimal('0.10000'), from_unscaled_decimal(10000, 5))
+        self.assertEqual(Decimal('1'), from_unscaled_decimal(1, 0))
+        self.assertEqual(Decimal('.1'), from_unscaled_decimal(1, 1))
+        self.assertEqual(Decimal('0.1'), from_unscaled_decimal(1, 1))
+        self.assertEqual(Decimal('0.01'), from_unscaled_decimal(1, 2))
+        self.assertEqual(Decimal('0.999999999'), from_unscaled_decimal(999999999, 9))
+        self.assertEqual(Decimal('0.999999999999999999'),
+               from_unscaled_decimal(999999999999999999, 18))
+        self.assertEqual(Decimal('0.99999999999999999999999999999999999999'),
+               from_unscaled_decimal(99999999999999999999999999999999999999, 38))
+
+    def test_get_decimal_scale(self):
+        self.assertEqual(0, get_decimal_scale(Decimal('0')))
+        self.assertEqual(2, get_decimal_scale(Decimal('123.45')))
+        self.assertEqual(2, get_decimal_scale(Decimal('-123.45')))
+        self.assertEqual(0, get_decimal_scale(Decimal('12345')))
+        self.assertEqual(0, get_decimal_scale(Decimal('10000')))
+        self.assertEqual(5, get_decimal_scale(Decimal('0.10000')))
+        self.assertEqual(0, get_decimal_scale(Decimal('1')))
+        self.assertEqual(1, get_decimal_scale(Decimal('.1')))
+        self.assertEqual(1, get_decimal_scale(Decimal('0.1')))
+        self.assertEqual(2, get_decimal_scale(Decimal('0.01')))
+        self.assertEqual(9, get_decimal_scale(Decimal('0.999999999')))
+        self.assertEqual(18, get_decimal_scale(Decimal('0.999999999999999999')))
+        self.assertEqual(38, get_decimal_scale(Decimal('0.99999999999999999999999999999999999999')))

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/tests/util.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/util.py b/python/kudu/tests/util.py
index be7f6d8..e6d1334 100644
--- a/python/kudu/tests/util.py
+++ b/python/kudu/tests/util.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from decimal import Decimal
 from kudu.compat import unittest
 from kudu.client import Partitioning
 from kudu.tests.common import KuduTestBase
@@ -71,6 +72,7 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
         builder = kudu.schema_builder()
         builder.add_column('key').type(kudu.int64).nullable(False)
         builder.add_column('unixtime_micros_val', type_=kudu.unixtime_micros, nullable=False)
+        builder.add_column('decimal_val', type_=kudu.decimal, precision=5, scale=2)
         builder.add_column('string_val', type_=kudu.string, compression=kudu.COMPRESSION_LZ4,
encoding='prefix')
         builder.add_column('bool_val', type_=kudu.bool)
         builder.add_column('double_val', type_=kudu.double)
@@ -103,11 +105,11 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
 
         # Insert new rows
         self.type_test_rows = [
-            (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
+            (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc), Decimal('111.11'),
              "Test One", True, 1.7976931348623157 * (10^308), 127,
              b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
              3.402823 * (10^38)),
-            (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
+            (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc), Decimal('0.99'),
              "测试二", False, 200.1, -1,
              b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
              -150.2)
@@ -231,6 +233,14 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
             count_only=True
         )
 
+    def _test_decimal_pred(self):
+        self.verify_pred_type_scans(
+            preds=[
+                self.type_table['decimal_val'] == Decimal('111.11')
+            ],
+            row_indexes=slice(0, 1),
+        )
+
     def _test_binary_pred(self):
         self.verify_pred_type_scans(
             preds=[

http://git-wip-us.apache.org/repos/asf/kudu/blob/e6aedc99/python/kudu/util.py
----------------------------------------------------------------------
diff --git a/python/kudu/util.py b/python/kudu/util.py
index 350a011..61062fb 100644
--- a/python/kudu/util.py
+++ b/python/kudu/util.py
@@ -17,6 +17,7 @@
 
 import datetime
 import six
+from decimal import Decimal, getcontext
 from pytz import utc
 
 
@@ -82,6 +83,7 @@ def to_unixtime_micros(timestamp, format = "%Y-%m-%dT%H:%M:%S.%f"):
     td_micros = td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6
     return int(td_micros)
 
+
 def from_unixtime_micros(unixtime_micros):
     """
     Convert the input unixtime_micros value to a datetime in UTC.
@@ -101,6 +103,7 @@ def from_unixtime_micros(unixtime_micros):
         raise ValueError("Invalid unixtime_micros value." +
                          "You must provide an integer value.")
 
+
 def from_hybridtime(hybridtime):
     """
     Convert a raw HybridTime value to a datetime in UTC.
@@ -115,3 +118,65 @@ def from_hybridtime(hybridtime):
     """
     # Add 1 so the value is usable for snapshot scans
     return from_unixtime_micros(int(hybridtime >> 12) + 1)
+
+
+def to_unscaled_decimal(decimal, context=None):
+    """
+    Convert incoming decimal value to a int representing
+    the unscaled decimal value.
+
+    Parameters
+    ---------
+    decimal : Decimal
+      The decimal value to convert to an unscaled int
+    context :  Context
+      The optional context to use
+
+    Returns
+    -------
+    int : The unscaled decimal int
+    """
+    if context is None:
+        context = getcontext()
+
+    scale = get_decimal_scale(decimal)
+    return decimal.scaleb(scale, context).to_integral_exact(None, context)
+
+
+def from_unscaled_decimal(unscaled_decimal, scale, context=None):
+    """
+    Convert the input unscaled_decimal value to a Decimal instance.
+
+    Parameters
+    ----------
+    unscaled_decimal : int
+      The unscaled int value of a decimal
+    scale : int
+      The scale that should be used when converting
+    context :  Context
+      The optional context to use
+
+    Returns
+    -------
+    decimal : The scaled Decimal
+    """
+    if context is None:
+        context = getcontext()
+
+    return Decimal(unscaled_decimal, context).scaleb(-scale, context)
+
+
+def get_decimal_scale(decimal):
+    """
+       Get the scale of the decimal.
+
+       Parameters
+       ---------
+       decimal : Decimal
+         The decimal value
+
+       Returns
+       -------
+       int : The calculated scale
+       """
+    return max(0, -decimal.as_tuple().exponent)


Mime
View raw message