ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1817318 - /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
Date Wed, 06 Dec 2017 20:33:34 GMT
Author: seanfinan
Date: Wed Dec  6 20:33:34 2017
New Revision: 1817318

URL: http://svn.apache.org/viewvc?rev=1817318&view=rev
Log:
CTAKES-489 : Fix for unwanted cr removal.  Parameter "KeepCR" added.

Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1817318&r1=1817317&r2=1817318&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Wed
Dec  6 20:33:34 2017
@@ -87,6 +87,18 @@ final public class FileTreeReader extend
    private String[] _explicitExtensions;
 
    /**
+    * Name of configuration parameter that must be set to false to remove windows \r characters
+    */
+   public static final String PARAM_KEEP_CR = "KeepCR";
+   @ConfigurationParameter(
+         name = PARAM_KEEP_CR,
+         description = "Keep windows-format carriage return characters at line endings."
+
+               "  This will only keep existing characters, it will not add them.",
+         mandatory = false
+   )
+   private boolean _keepCrChar = true;
+
+   /**
     * The patient id for each note is set using a directory name.
     * By default this is the directory directly under the root directory (PatientLevel=1).
     * This is appropriate for files such as in rootDir=data/, file in data/patientA/Text1.txt
@@ -304,12 +316,14 @@ final public class FileTreeReader extend
     */
    private String readFile( final File file ) throws IOException {
       LOGGER.info( "Reading " + file.getPath() );
-      try {
-         return readByPath( file );
-      } catch ( IOException ioE ) {
-         // This is a pretty bad way to handle a MalformedInputException, but that can be
thrown by the collector
-         // in the stream, and java streams and exceptions do not go well together
-         LOGGER.warn( "Bad characters in " + file.getPath() );
+      if ( !_keepCrChar ) {
+         try {
+            return readByPath( file );
+         } catch ( IOException ioE ) {
+            // This is a pretty bad way to handle a MalformedInputException, but that can
be thrown by the collector
+            // in the stream, and java streams and exceptions do not go well together
+            LOGGER.warn( "Bad characters in " + file.getPath() );
+         }
       }
       return readByBuffer( file );
    }
@@ -329,9 +343,6 @@ final public class FileTreeReader extend
          }
       } else {
          return safeReadByPath( file );
-//         try ( Stream<String> stream = Files.lines( file.toPath() ) ) {
-//            return stream.collect( Collectors.joining( "\n" ) );
-//         }
       }
    }
 



Mime
View raw message