avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rskr...@apache.org
Subject [avro] branch master updated: AVRO-2689: add reader-schema to DataFileReadTool (#785)
Date Tue, 28 Jan 2020 10:03:46 GMT
This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new afbda07  AVRO-2689:  add reader-schema to DataFileReadTool (#785)
afbda07 is described below

commit afbda0798a4fe662bd5379cf535da79800c4e840
Author: Roger Peppe <rogpeppe@gmail.com>
AuthorDate: Tue Jan 28 10:03:38 2020 +0000

    AVRO-2689:  add reader-schema to DataFileReadTool (#785)
    
    * AVRO-2689: add reader schema to DataFileReadTool
    
    This PR adds --reader-schema and --reader-schema-file flags
    to the tojson tool.
    
    * AVRO-2689: Unit test for reader schema on tojson tool.
    
    Co-authored-by: RyanSkraba <ryan@skraba.com>
---
 .../org/apache/avro/tool/DataFileReadTool.java     | 21 +++++++++++++++++++--
 .../org/apache/avro/tool/TestDataFileTools.java    | 22 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
index 8d4ef00..fb5ef72 100644
--- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
+++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java
@@ -56,11 +56,25 @@ public class DataFileReadTool implements Tool {
     OptionSpec<Void> prettyOption = optionParser.accepts("pretty", "Turns on pretty
printing.");
     String headDesc = String.format("Converts the first X records (default is %d).", DEFAULT_HEAD_COUNT);
     OptionSpec<String> headOption = optionParser.accepts("head", headDesc).withOptionalArg();
+    OptionSpec<String> readerSchemaFileOption = optionParser.accepts("reader-schema-file",
"Reader schema file")
+        .withOptionalArg().ofType(String.class);
+    OptionSpec<String> readerSchemaOption = optionParser.accepts("reader-schema", "Reader
schema").withOptionalArg()
+        .ofType(String.class);
 
     OptionSet optionSet = optionParser.parse(args.toArray(new String[0]));
     Boolean pretty = optionSet.has(prettyOption);
     List<String> nargs = new ArrayList<>((List<String>) optionSet.nonOptionArguments());
 
+    String readerSchemaStr = readerSchemaOption.value(optionSet);
+    String readerSchemaFile = readerSchemaFileOption.value(optionSet);
+
+    Schema readerSchema = null;
+    if (readerSchemaFile != null) {
+      readerSchema = Util.parseSchemaFromFS(readerSchemaFile);
+    } else if (readerSchemaStr != null) {
+      readerSchema = new Schema.Parser().parse(readerSchemaStr);
+    }
+
     long headCount = getHeadCount(optionSet, headOption, nargs);
 
     if (nargs.size() != 1) {
@@ -73,9 +87,12 @@ public class DataFileReadTool implements Tool {
     BufferedInputStream inStream = Util.fileOrStdin(nargs.get(0), stdin);
 
     GenericDatumReader<Object> reader = new GenericDatumReader<>();
+    if (readerSchema != null) {
+      reader.setExpected(readerSchema);
+    }
     try (DataFileStream<Object> streamReader = new DataFileStream<>(inStream,
reader)) {
-      Schema schema = streamReader.getSchema();
-      DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
+      Schema schema = readerSchema != null ? readerSchema : streamReader.getSchema();
+      DatumWriter writer = new GenericDatumWriter<>(schema);
       JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty);
       for (long recordCount = 0; streamReader.hasNext() && recordCount < headCount;
recordCount++) {
         Object datum = streamReader.next();
diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
index 00538aa..7f694c8 100644
--- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
+++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileTools.java
@@ -35,6 +35,7 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.avro.AvroRuntimeException;
+import org.apache.avro.AvroTypeException;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.file.DataFileReader;
@@ -114,6 +115,27 @@ public class TestDataFileTools {
   }
 
   @Test
+  public void testReadWithReaderSchema() throws Exception {
+    assertEquals(jsonData, run(new DataFileReadTool(), "--reader-schema", "\"long\"", sampleFile.getPath()));
+  }
+
+  @Test(expected = AvroTypeException.class)
+  public void testReadWithIncompatibleReaderSchema() throws Exception {
+    // Fails: an int can't be read as a string.
+    run(new DataFileReadTool(), "--reader-schema", "\"string\"", sampleFile.getPath());
+  }
+
+  @Test
+  public void testReadWithReaderSchemaFile() throws Exception {
+    File readerSchemaFile = new File(DIR.getRoot(), "reader-schema-temp.schema");
+    try (FileWriter fw = new FileWriter(readerSchemaFile)) {
+      fw.append("\"long\"");
+    }
+    assertEquals(jsonData,
+        run(new DataFileReadTool(), "--reader-schema-file", readerSchemaFile.getPath(), sampleFile.getPath()));
+  }
+
+  @Test
   public void testReadHeadDefaultCount() throws Exception {
     String expectedJson = jsonData.substring(0, 20); // first 10 numbers
     assertEquals(expectedJson, run(new DataFileReadTool(), "--head", sampleFile.getPath()));


Mime
View raw message