avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rskr...@apache.org
Subject [avro] 03/23: AVRO-2413: Refactor io.Validate to use a mapping (#516)
Date Wed, 29 Jan 2020 08:54:22 GMT
This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch branch-1.9
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 3fc05a3800318a405aab00e345886e9885dd1339
Author: Michael A. Smith <michaels@syapse.com>
AuthorDate: Mon Jun 24 05:27:14 2019 -0400

    AVRO-2413: Refactor io.Validate to use a mapping (#516)
    
    * AVRO-2413: Refactor Validate to Use a Mapping
    
    * AVRO-2443: Fix Invalid AvroTypeException Invocation
---
 lang/py/src/avro/io.py | 112 +++++++++++++++++++++++--------------------------
 lang/py3/avro/io.py    |  71 +++++++++++--------------------
 2 files changed, 78 insertions(+), 105 deletions(-)

diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py
index 3fdf602..8c2ef10 100644
--- a/lang/py/src/avro/io.py
+++ b/lang/py/src/avro/io.py
@@ -5,9 +5,9 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 # https://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -34,7 +34,7 @@ uses the following mapping:
   * Schema longs are implemented as long.
   * Schema floats are implemented as float.
   * Schema doubles are implemented as float.
-  * Schema booleans are implemented as bool. 
+  * Schema booleans are implemented as bool.
 """
 import struct
 from avro import schema
@@ -106,65 +106,59 @@ class SchemaResolutionException(schema.AvroException):
 #
 # Validate
 #
-
 def _is_timezone_aware_datetime(dt):
   return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
 
+_valid = {
+  'null': lambda s, d: d is None,
+  'boolean': lambda s, d: isinstance(d, bool),
+  'string': lambda s, d: isinstance(d, basestring),
+  'bytes': lambda s, d: ((isinstance(d, str)) or
+                         (isinstance(d, Decimal) and
+                          getattr(s, 'logical_type', None) == constants.DECIMAL)),
+  'int': lambda s, d: ((isinstance(d, (int, long))) and (INT_MIN_VALUE <= d <= INT_MAX_VALUE)
or
+                       (isinstance(d, datetime.date) and
+                        getattr(s, 'logical_type', None) == constants.DATE) or
+                       (isinstance(d, datetime.time) and
+                        getattr(s, 'logical_type', None) == constants.TIME_MILLIS)),
+  'long': lambda s, d: ((isinstance(d, (int, long))) and (LONG_MIN_VALUE <= d <= LONG_MAX_VALUE)
or
+                        (isinstance(d, datetime.time) and
+                         getattr(s, 'logical_type', None) == constants.TIME_MICROS) or
+                        (isinstance(d, datetime.date) and
+                         _is_timezone_aware_datetime(d) and
+                        getattr(s, 'logical_type', None) in (constants.TIMESTAMP_MILLIS,
+                                                             constants.TIMESTAMP_MICROS))),
+  'float': lambda s, d: isinstance(d, (int, long, float)),
+  'fixed': lambda s, d: ((isinstance(d, str) and len(d) == s.size) or
+                         (isinstance(d, Decimal) and
+                          getattr(s, 'logical_type', None) == constants.DECIMAL)),
+  'enum': lambda s, d: d in s.symbols,
+  'array': lambda s, d: isinstance(d, list) and all(validate(s.items, item) for item in d),
+  'map': lambda s, d: (isinstance(d, dict) and all(isinstance(key, basestring) for key in
d)
+                       and all(validate(s.values, value) for value in d.values())),
+  'union': lambda s, d: any(validate(branch, d) for branch in s.schemas),
+  'record': lambda s, d: (isinstance(d, dict)
+                          and all(validate(f.type, d.get(f.name)) for f in s.fields)
+                          and {f.name for f in s.fields}.issuperset(d.keys())),
+}
+_valid['double'] = _valid['float']
+_valid['error_union'] = _valid['union']
+_valid['error'] = _valid['request'] = _valid['record']
+
+
 def validate(expected_schema, datum):
-  """Determine if a python datum is an instance of a schema."""
-  schema_type = expected_schema.type
-  if schema_type == 'null':
-    return datum is None
-  elif schema_type == 'boolean':
-    return isinstance(datum, bool)
-  elif schema_type == 'string':
-    return isinstance(datum, basestring)
-  elif schema_type == 'bytes':
-    if (hasattr(expected_schema, 'logical_type') and
-            expected_schema.logical_type == 'decimal'):
-      return isinstance(datum, Decimal)
-    return isinstance(datum, str)
-  elif schema_type == 'int':
-    if hasattr(expected_schema, 'logical_type'):
-      if expected_schema.logical_type == constants.DATE:
-        return isinstance(datum, datetime.date)
-      elif expected_schema.logical_type == constants.TIME_MILLIS:
-        return isinstance(datum, datetime.time)
-    return (isinstance(datum, (int, long))
-            and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
-  elif schema_type == 'long':
-    if hasattr(expected_schema, 'logical_type'):
-      if expected_schema.logical_type == constants.TIME_MICROS:
-        return isinstance(datum, datetime.time)
-      elif expected_schema.logical_type in [constants.TIMESTAMP_MILLIS, constants.TIMESTAMP_MICROS]:
-        return isinstance(datum, datetime.datetime) and _is_timezone_aware_datetime(datum)
-    return (isinstance(datum, (int, long))
-            and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE)
-  elif schema_type in ['float', 'double']:
-    return (isinstance(datum, int) or isinstance(datum, long)
-            or isinstance(datum, float))
-  # Check for int, float, long and decimal
-  elif schema_type == 'fixed':
-    if (hasattr(expected_schema, 'logical_type') and
-                    expected_schema.logical_type == 'decimal'):
-      return isinstance(datum, Decimal)
-    return isinstance(datum, str) and len(datum) == expected_schema.size
-  elif schema_type == 'enum':
-    return datum in expected_schema.symbols
-  elif schema_type == 'array':
-    return (isinstance(datum, list) and
-      False not in [validate(expected_schema.items, d) for d in datum])
-  elif schema_type == 'map':
-    return (isinstance(datum, dict) and
-      False not in [isinstance(k, basestring) for k in datum.keys()] and
-      False not in
-        [validate(expected_schema.values, v) for v in datum.values()])
-  elif schema_type in ['union', 'error_union']:
-    return True in [validate(s, datum) for s in expected_schema.schemas]
-  elif schema_type in ['record', 'error', 'request']:
-    return (isinstance(datum, dict) and
-      False not in
-        [validate(f.type, datum.get(f.name)) for f in expected_schema.fields])
+  """Determines if a python datum is an instance of a schema.
+
+  Args:
+    expected_schema: Schema to validate against.
+    datum: Datum to validate.
+  Returns:
+    True if the datum is an instance of the schema.
+  """
+  try:
+    return _valid[expected_schema.type](expected_schema, datum)
+  except KeyError:
+    raise AvroTypeException('Unknown Avro schema type: %r' % schema_type)
 
 #
 # Decoder/Encoder
@@ -1040,7 +1034,7 @@ class DatumWriter(object):
     # validate datum
     if not validate(self.writers_schema, datum):
       raise AvroTypeException(self.writers_schema, datum)
-    
+
     self.write_data(self.writers_schema, datum, encoder)
 
   def write_data(self, writers_schema, datum, encoder):
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
index 9fe7f09..cd79393 100644
--- a/lang/py3/avro/io.py
+++ b/lang/py3/avro/io.py
@@ -92,6 +92,27 @@ class SchemaResolutionException(schema.AvroException):
 # ------------------------------------------------------------------------------
 # Validate
 
+_valid = {
+  'null': lambda s, d: d is None,
+  'boolean': lambda s, d: isinstance(d, bool),
+  'string': lambda s, d: isinstance(d, str),
+  'bytes': lambda s, d: isinstance(d, bytes),
+  'int': lambda s, d: isinstance(d, int) and (INT_MIN_VALUE <= d <= INT_MAX_VALUE),
+  'long': lambda s, d: isinstance(d, int) and (LONG_MIN_VALUE <= d <= LONG_MAX_VALUE),
+  'float': lambda s, d: isinstance(d, (int, float)),
+  'fixed': lambda s, d: isinstance(d, bytes) and len(d) == s.size,
+  'enum': lambda s, d: d in s.symbols,
+  'array': lambda s, d: isinstance(d, list) and all(Validate(s.items, item) for item in d),
+  'map': lambda s, d: (isinstance(d, dict) and all(isinstance(key, str) for key in d)
+                       and all(Validate(s.values, value) for value in d.values())),
+  'union': lambda s, d: any(Validate(branch, d) for branch in s.schemas),
+  'record': lambda s, d: (isinstance(d, dict)
+                          and all(Validate(f.type, d.get(f.name)) for f in s.fields)
+                          and {f.name for f in s.fields}.issuperset(d.keys()))
+}
+_valid['double'] = _valid['float']
+_valid['error_union'] = _valid['union']
+_valid['error'] = _valid['request'] = _valid['record']
 
 def Validate(expected_schema, datum):
   """Determines if a python datum is an instance of a schema.
@@ -102,52 +123,10 @@ def Validate(expected_schema, datum):
   Returns:
     True if the datum is an instance of the schema.
   """
-  schema_type = expected_schema.type
-  if schema_type == 'null':
-    return datum is None
-  elif schema_type == 'boolean':
-    return isinstance(datum, bool)
-  elif schema_type == 'string':
-    return isinstance(datum, str)
-  elif schema_type == 'bytes':
-    return isinstance(datum, bytes)
-  elif schema_type == 'int':
-    return (isinstance(datum, int)
-        and (INT_MIN_VALUE <= datum <= INT_MAX_VALUE))
-  elif schema_type == 'long':
-    return (isinstance(datum, int)
-        and (LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE))
-  elif schema_type in ['float', 'double']:
-    return (isinstance(datum, int) or isinstance(datum, float))
-  elif schema_type == 'fixed':
-    return isinstance(datum, bytes) and (len(datum) == expected_schema.size)
-  elif schema_type == 'enum':
-    return datum in expected_schema.symbols
-  elif schema_type == 'array':
-    return (isinstance(datum, list)
-        and all(Validate(expected_schema.items, item) for item in datum))
-  elif schema_type == 'map':
-    return (isinstance(datum, dict)
-        and all(isinstance(key, str) for key in datum.keys())
-        and all(Validate(expected_schema.values, value)
-                for value in datum.values()))
-  elif schema_type in ['union', 'error_union']:
-    return any(Validate(union_branch, datum)
-               for union_branch in expected_schema.schemas)
-  elif schema_type in ['record', 'error', 'request']:
-    if not isinstance(datum, dict):
-        return False
-    expected_schema_field_names = set()
-    for field in expected_schema.fields:
-        expected_schema_field_names.add(field.name)
-        if not Validate(field.type, datum.get(field.name)):
-            return False
-    for datum_field in datum.keys():
-        if datum_field not in expected_schema_field_names:
-            return False
-    return True
-  else:
-    raise AvroTypeException('Unknown Avro schema type: %r' % schema_type)
+  try:
+    return _valid[expected_schema.type](expected_schema, datum)
+  except KeyError:
+    raise AvroTypeException(expected_schema, datum)
 
 
 # ------------------------------------------------------------------------------


Mime
View raw message