ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1825464 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core: cc/CuiLookupLister.java cr/FileTreeReader.java util/SourceMetadataUtil.java
Date Tue, 27 Feb 2018 15:44:07 GMT
Author: seanfinan
Date: Tue Feb 27 15:44:07 2018
New Revision: 1825464

URL: http://svn.apache.org/viewvc?rev=1825464&view=rev
Log:
Add simple cui list bsv file writer.
Add setPatientIdentifier to SourceMetadataUtil.
Add improved patient resolution to FileTreeReader.

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java?rev=1825464&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java
(added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/CuiLookupLister.java
Tue Feb 27 15:44:07 2018
@@ -0,0 +1,53 @@
+package org.apache.ctakes.core.cc;
+
+
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 2/22/2018
+ */
+public class CuiLookupLister extends AbstractJCasFileWriter {
+
+   static private final Logger LOGGER = Logger.getLogger( "CuiLookupLister" );
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void writeFile( final JCas jCas,
+                          final String outputDir,
+                          final String documentId,
+                          final String fileName ) throws IOException {
+      try ( Writer writer = new BufferedWriter( new FileWriter( outputDir + "/" + fileName
+ "_cui.txt" ) ) ) {
+         final Map<Sentence, Collection<IdentifiedAnnotation>> sentenceCodes
+               = JCasUtil.indexCovered( jCas, Sentence.class, IdentifiedAnnotation.class
);
+         for ( Map.Entry<Sentence, Collection<IdentifiedAnnotation>> entry :
sentenceCodes.entrySet() ) {
+            final int sentenceBegin = entry.getKey().getBegin();
+            final int sentenceEnd = entry.getKey().getEnd();
+            for ( IdentifiedAnnotation annotation : entry.getValue() ) {
+               if ( annotation.getBegin() == sentenceBegin && annotation.getEnd()
== sentenceEnd ) {
+                  for ( UmlsConcept umls : OntologyConceptUtil.getUmlsConcepts( annotation
) ) {
+                     writer.write( umls.getCui() + '|' + umls.getTui() + '|' + annotation.getCoveredText()
+ '\n' );
+                  }
+               }
+            }
+         }
+      }
+   }
+
+}

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java?rev=1825464&r1=1825463&r2=1825464&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cr/FileTreeReader.java Tue
Feb 27 15:44:07 2018
@@ -4,6 +4,7 @@ import org.apache.ctakes.core.config.Con
 import org.apache.ctakes.core.patient.PatientNoteStore;
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
 import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.SourceMetadataUtil;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.ctakes.typesystem.type.structured.DocumentIdPrefix;
 import org.apache.ctakes.typesystem.type.structured.DocumentPath;
@@ -118,6 +119,7 @@ final public class FileTreeReader extend
    private File _rootDir;
    private Collection<String> _validExtensions;
    private List<File> _files;
+   private Map<File, String> _filePatients;
    private int _currentIndex;
    private Map<String, Integer> _patientDocCounts = new HashMap<>();
 
@@ -136,11 +138,13 @@ final public class FileTreeReader extend
       _currentIndex = 0;
       if ( _rootDir.isFile() ) {
          // does not check for valid extensions.  With one file just trust the user.
-         _files = Collections.singletonList( _rootDir );
          final String patient = _rootDir.getParentFile().getName();
+         _files = Collections.singletonList( _rootDir );
+         _filePatients = Collections.singletonMap( _rootDir, patient );
          PatientNoteStore.getInstance().setWantedDocCount( patient, 1 );
       } else {
          // gather all of the files and set the document counts per patient.
+         _filePatients = new HashMap<>();
          _files = getDescendentFiles( _rootDir, _validExtensions, 0 );
          _patientDocCounts.forEach( ( k, v ) -> PatientNoteStore.getInstance().setWantedDocCount(
k, v ) );
       }
@@ -175,7 +179,9 @@ final public class FileTreeReader extend
     * @param level directory level beneath the root directory
     * @return List of files descending from the parent directory
     */
-   private List<File> getDescendentFiles( final File parentDir, final Collection<String>
validExtensions, final int level ) {
+   private List<File> getDescendentFiles( final File parentDir,
+                                          final Collection<String> validExtensions,
+                                          final int level ) {
       final File[] children = parentDir.listFiles();
       if ( children == null || children.length == 0 ) {
          return Collections.emptyList();
@@ -191,6 +197,8 @@ final public class FileTreeReader extend
             descendentFiles.add( child );
          }
       }
+      // TODO copy in TextNumberComparator and delegate ...
+//      Collections.sort( descendentFiles, FileComparator );
       for ( File childDir : childDirs ) {
          descendentFiles.addAll( getDescendentFiles( childDir, validExtensions, level + 1
) );
       }
@@ -198,6 +206,7 @@ final public class FileTreeReader extend
          final String patientId = parentDir.getName();
          final int count = _patientDocCounts.getOrDefault( patientId, 0 );
          _patientDocCounts.put( patientId, count + descendentFiles.size() );
+         descendentFiles.forEach( f -> _filePatients.put( f, patientId ) );
       }
       return descendentFiles;
    }
@@ -302,6 +311,8 @@ final public class FileTreeReader extend
       final String idPrefix = createDocumentIdPrefix( file, _rootDir );
       documentIdPrefix.setDocumentIdPrefix( idPrefix );
       documentIdPrefix.addToIndexes();
+      final String patientId = _filePatients.get( file );
+      SourceMetadataUtil.setPatientIdentifier( jcas, patientId );
       final DocumentPath documentPath = new DocumentPath( jcas );
       documentPath.setDocumentPath( file.getAbsolutePath() );
       documentPath.addToIndexes();

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java?rev=1825464&r1=1825463&r2=1825464&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java
(original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java
Tue Feb 27 15:44:07 2018
@@ -48,6 +48,15 @@ final public class SourceMetadataUtil {
    }
 
    /**
+    * @param jCas      ye olde
+    * @param patientId the patient identifier for the source
+    */
+   static public void setPatientIdentifier( final JCas jCas, final String patientId ) {
+      final Metadata metadata = getOrCreateMetadata( jCas );
+      metadata.setPatientIdentifier( patientId );
+   }
+
+   /**
     * @param jcas ye olde jay-cas
     * @return the patient id for the source or {@link #UNKNOWN_PATIENT_NUM} if one is not
found
     */



Mime
View raw message