ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1801809 - in /ctakes/trunk/ctakes-examples: pom.xml src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
Date Thu, 13 Jul 2017 08:00:30 GMT
Author: tmill
Date: Thu Jul 13 08:00:30 2017
New Revision: 1801809

URL: http://svn.apache.org/viewvc?rev=1801809&view=rev
Log:
Added example showing how to run multi-threaded pipelines.

Added:
    ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
Modified:
    ctakes/trunk/ctakes-examples/pom.xml

Modified: ctakes/trunk/ctakes-examples/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/pom.xml?rev=1801809&r1=1801808&r2=1801809&view=diff
==============================================================================
--- ctakes/trunk/ctakes-examples/pom.xml (original)
+++ ctakes/trunk/ctakes-examples/pom.xml Thu Jul 13 08:00:30 2017
@@ -46,5 +46,10 @@
         <groupId>org.apache.ctakes</groupId>
         <artifactId>ctakes-smoking-status</artifactId>
      </dependency>
+     <dependency>
+     	<groupId>org.apache.uima</groupId>
+     	<artifactId>uimafit-cpe</artifactId>
+     	<version>2.3.0</version>
+     </dependency>
   </dependencies>
 </project>

Added: ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java?rev=1801809&view=auto
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
(added)
+++ ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipeline/MultiThreadedPipeline.java
Thu Jul 13 08:00:30 2017
@@ -0,0 +1,153 @@
+package org.apache.ctakes.examples.pipeline;
+
+import java.io.IOException;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.config.ConfigParameterConstants;
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
+import org.apache.ctakes.lvg.ae.ThreadSafeLvg;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionProcessingEngine;
+import org.apache.uima.collection.CollectionReaderDescription;
+import org.apache.uima.collection.EntityProcessStatus;
+import org.apache.uima.collection.StatusCallbackListener;
+import org.apache.uima.fit.cpe.CpeBuilder;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.ProcessTrace;
+
+public class MultiThreadedPipeline {
+
+  public static final int NUM_THREADS = 3;
+  
+  public static void main(String[] args) throws ResourceInitializationException {
+    CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
+        FilesInDirectoryCollectionReader.class,
+        ConfigParameterConstants.PARAM_INPUTDIR,
+        "org/apache/ctakes/examples/annotation/anafora_annotated/",
+        FilesInDirectoryCollectionReader.PARAM_RECURSE,
+        true);
+    
+    AnalysisEngineDescription aed = getThreadsafePipeline();
+    CpeBuilder cpeBuilder = new CpeBuilder();
+    try{
+      cpeBuilder.setReader(reader);
+      cpeBuilder.setAnalysisEngine(aed);
+      cpeBuilder.setMaxProcessingUnitThreadCount(NUM_THREADS);
+      cpeBuilder.getCpeDescription().getCpeCasProcessors().setPoolSize(NUM_THREADS);
+      cpeBuilder.getCpeDescription().getCpeCasProcessors().setConcurrentPUCount(NUM_THREADS);
+      CollectionProcessingEngine cpe = cpeBuilder.createCpe(null);
+      cpe.addStatusCallbackListener(new UimaCallbackListener(cpe));
+      cpe.process();
+    }catch(Exception e){
+      e.printStackTrace();
+    }
+  }
+  
+  public static AnalysisEngineDescription getThreadsafePipeline() throws ResourceInitializationException{
+    AggregateBuilder builder = new AggregateBuilder();
+    builder.add( SimpleSegmentAnnotator.createAnnotatorDescription() );
+    builder.add( SentenceDetector.createAnnotatorDescription() );
+    builder.add( TokenizerAnnotatorPTB.createAnnotatorDescription() );
+    try{
+      builder.add( ThreadSafeLvg.createAnnotatorDescription() );
+    }catch(IOException e){
+      throw new ResourceInitializationException(e);
+    }
+    builder.add( ContextDependentTokenizerAnnotator.createAnnotatorDescription() );
+    builder.add( POSTagger.createAnnotatorDescription() );
+    builder.add( DefaultJCasTermAnnotator.createAnnotatorDescription() );
+    builder.add( ClearNLPDependencyParserAE.createAnnotatorDescription() );
+    builder.add( PolarityCleartkAnalysisEngine.createAnnotatorDescription() );
+    builder.add( UncertaintyCleartkAnalysisEngine.createAnnotatorDescription() );
+    builder.add( HistoryCleartkAnalysisEngine.createAnnotatorDescription() );
+    builder.add( ConditionalCleartkAnalysisEngine.createAnnotatorDescription() );
+    builder.add( GenericCleartkAnalysisEngine.createAnnotatorDescription() );
+    builder.add( SubjectCleartkAnalysisEngine.createAnnotatorDescription() );
+
+    return builder.createAggregateDescription();
+  }
+  
+  public static class UimaCallbackListener implements StatusCallbackListener {
+
+    CollectionProcessingEngine cpe = null;
+    long startTime;
+    
+    public UimaCallbackListener(CollectionProcessingEngine cpe) {
+      this.cpe = cpe;
+    }
+    
+    @Override
+    public void initializationComplete() {
+      System.out.println("CPE Initialization complete.");
+      startTime = System.currentTimeMillis();
+    }
+
+    @Override
+    public void batchProcessComplete() {
+    }
+
+    @Override
+    public void collectionProcessComplete() {
+      System.out.println("Processing complete!");
+      
+      ProcessTrace perf = cpe.getPerformanceReport();
+      System.out.println("Performance: " + perf.toString());
+      
+      long duration = System.currentTimeMillis() - startTime;
+      System.out.println("Total run time: " + duration + "ms");
+
+    }
+
+    @Override
+    public void paused() {
+    }
+
+    @Override
+    public void resumed() {
+    }
+
+    @Override
+    public void aborted() {
+    }
+
+    @Override
+    public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
+      try {
+        JCas jcas = aCas.getJCas();
+        String docId = DocumentIDAnnotationUtil.getDeepDocumentId(jcas);
+        System.out.println("Doc id for entity process complete: " + docId);
+        System.out.println("Found " + JCasUtil.select(jcas, IdentifiedAnnotation.class).size()
+ " medical terms.");
+        // The following was a bit verbose, but here's how you'd print out the text of each
discovered entity span:
+        /*
+        for(IdentifiedAnnotation annot : JCasUtil.select(jcas, IdentifiedAnnotation.class)){
+          System.out.println("Found entity: " + annot.getCoveredText());
+        }
+        */
+      } catch (CASException e) {
+        e.printStackTrace();
+        throw new RuntimeException(e);
+      }
+    }
+    
+  }
+}



Mime
View raw message