ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mattco...@apache.org
Subject svn commit: r1438676 - in /incubator/ctakes/trunk/ctakes-assertion: resources/launch/ src/main/java/org/apache/ctakes/assertion/eval/ src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ src/main/java/org/apache/ctakes/assertion/medfacts/cleartk...
Date Fri, 25 Jan 2013 19:59:20 GMT
Author: mattcoarr
Date: Fri Jan 25 19:59:20 2013
New Revision: 1438676

URL: http://svn.apache.org/viewvc?rev=1438676&view=rev
Log:
adding changes to include cue phrase and zone features

Added:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest2.java
Modified:
    incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java

Modified: incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch?rev=1438676&r1=1438675&r2=1438676&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/GoldEntityAndAttributeReaderPipelineForSeedCorpus.launch
Fri Jan 25 19:59:20 2013
@@ -8,7 +8,7 @@
 </listAttribute>
 <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.pipelines.GoldEntityAndAttributeReaderPipelineForSeedCorpus"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/Mayo/UMLS_CEM"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="/work/medfacts/sharp/data/2012-12-10_full_november_seed_corpus/clean_seed_corpus_data/all_batches_UMLS_CEM"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
 </launchConfiguration>

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java?rev=1438676&r1=1438675&r2=1438676&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
Fri Jan 25 19:59:20 2013
@@ -22,6 +22,7 @@ import java.io.File;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -179,10 +180,10 @@ public class AssertionEvalBasedOnModifie
     //Options options = new Options();
     options.parseOptions(args);
     
-//    System.err.println("forcing skipping of subject processing!!!");
-//    options.runSubject = false;
-//    System.err.println("forcing skipping of generic processing!!!");
-//    options.runGeneric = false;
+    System.err.println("forcing skipping of subject processing!!!");
+    options.runSubject = false;
+    System.err.println("forcing skipping of generic processing!!!");
+    options.runGeneric = false;
 //    System.err.println("forcing skipping of polarity processing!!!");
 //    options.runPolarity = false;
 //    System.err.println("forcing skipping of uncertainty processing!!!");
@@ -385,29 +386,34 @@ public static void printScore(Map<String
     AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
     builder.add(assertionAttributeClearerAnnotator);
     
-    URI generalSectionRegexFileUri =
-        this.getClass().getClassLoader().getResource("org/mitre/medfacts/zoner/section_regex.xml").toURI();
-//      ExternalResourceDescription generalSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
-//          SectionRegexConfigurationResource.class, new File(generalSectionRegexFileUri));
-      AnalysisEngineDescription zonerAnnotator =
-          AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
-              ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
-              generalSectionRegexFileUri
-              );
-      builder.add(zonerAnnotator);
-
-      URI mayoSectionRegexFileUri =
-          this.getClass().getClassLoader().getResource("org/mitre/medfacts/uima/mayo_sections.xml").toURI();
-//        ExternalResourceDescription mayoSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
-//            SectionRegexConfigurationResource.class, new File(mayoSectionRegexFileUri));
-      AnalysisEngineDescription mayoZonerAnnotator =
-          AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
-              ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
-              mayoSectionRegexFileUri
-              );
-      builder.add(mayoZonerAnnotator);
-    
+    String generalSectionRegexFileUri =
+        "org/mitre/medfacts/zoner/section_regex.xml";
+    AnalysisEngineDescription zonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            generalSectionRegexFileUri
+            );
+    builder.add(zonerAnnotator);
+
+    String mayoSectionRegexFileUri =
+        "org/mitre/medfacts/uima/mayo_sections.xml";
+    AnalysisEngineDescription mayoZonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            mayoSectionRegexFileUri
+            );
+    builder.add(mayoZonerAnnotator);
+  
+    URL assertionCuePhraseLookupAnnotatorDescriptorUrl1 = this.getClass().getClassLoader().getResource("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml");
+    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl1 (slashes):
%s", assertionCuePhraseLookupAnnotatorDescriptorUrl1));
+    URL assertionCuePhraseLookupAnnotatorDescriptorUrl2 = this.getClass().getClassLoader().getResource("org.apache.ctakes.dictionary.lookup.AssertionCuePhraseDictionaryLookupAnnotator.xml");
+    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl2 (periods):
%s", assertionCuePhraseLookupAnnotatorDescriptorUrl2));
+
     
+    AnalysisEngineDescription cuePhraseLookupAnnotator =
+        AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
+    builder.add(cuePhraseLookupAnnotator);
+
     if (options.runPolarity)
     {
 	    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
@@ -524,6 +530,28 @@ public static void printScore(Map<String
     AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
     builder.add(assertionAttributeClearerAnnotator);
     
+    AnalysisEngineDescription cuePhraseLookupAnnotator =
+        AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
+    builder.add(cuePhraseLookupAnnotator);
+    
+    String generalSectionRegexFileUri =
+      "org/mitre/medfacts/zoner/section_regex.xml";
+    AnalysisEngineDescription zonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            generalSectionRegexFileUri
+            );
+    builder.add(zonerAnnotator);
+
+    String mayoSectionRegexFileUri =
+      "org/mitre/medfacts/uima/mayo_sections.xml";
+    AnalysisEngineDescription mayoZonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            mayoSectionRegexFileUri
+            );
+    builder.add(mayoZonerAnnotator);
+    
     if (options.runPolarity)
     {
 	    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
@@ -651,7 +679,7 @@ public static void printScore(Map<String
       
       Collection<IdentifiedAnnotation> goldEntitiesAndEvents = new ArrayList<IdentifiedAnnotation>();

       Collection<EntityMention> goldEntities = JCasUtil.select(goldView, EntityMention.class);
-	  goldEntitiesAndEvents.addAll(goldEntities);
+      goldEntitiesAndEvents.addAll(goldEntities);
       Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
       goldEntitiesAndEvents.addAll(goldEvents);
       System.out.format("gold entities: %d%ngold events: %d%n%n", goldEntities.size(), goldEvents.size());

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1438676&r1=1438675&r2=1438676&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Fri Jan 25 19:59:20 2013
@@ -49,6 +49,7 @@ import org.cleartk.classifier.feature.ex
 import org.cleartk.classifier.feature.extractor.ContextExtractor.Preceding;
 import org.cleartk.classifier.feature.extractor.ContextExtractor.Following;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SpannedTextExtractor;
@@ -69,8 +70,10 @@ import org.uimafit.util.JCasUtil;
 
 import org.apache.commons.lang.StringUtils;
 
+import org.apache.ctakes.assertion.zoner.types.Zone;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -135,6 +138,8 @@ public abstract class AssertionCleartkAn
   protected List<ContextExtractor<BaseToken>> tokenContextFeatureExtractors;
   protected List<CleartkExtractor> tokenCleartkExtractors;
   protected List<SimpleFeatureExtractor> entityFeatureExtractors;
+
+  protected CleartkExtractor cuePhraseInWindowExtractor;
   
   @SuppressWarnings("deprecation")
   public void initialize(UimaContext context) throws ResourceInitializationException {
@@ -201,6 +206,22 @@ public abstract class AssertionCleartkAn
     //List<Feature> features = new ArrayList<Feature>();
     //ConllDependencyNode node1 = findAnnotationHead(jCas, arg1);
 
+    CombinedExtractor baseExtractorCuePhraseCategory =
+        new CombinedExtractor
+          (
+           new CoveredTextExtractor(),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
+          );
+    
+    cuePhraseInWindowExtractor = new CleartkExtractor(
+          AssertionCuePhraseAnnotation.class,
+          baseExtractorCuePhraseCategory,
+          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
+          new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
+          );
+    
   }
 
   public abstract void setClassLabel(IdentifiedAnnotation entityMention, Instance<String>
instance) throws AnalysisEngineProcessException;
@@ -244,6 +265,9 @@ public abstract class AssertionCleartkAn
 //    Map<IdentifiedAnnotation, Collection<Sentence>> coveringSentenceMap = JCasUtil.indexCovering(identifiedAnnotationView,
IdentifiedAnnotation.class, Sentence.class);
 //    Map<Sentence, Collection<BaseToken>> tokensCoveredInSentenceMap = JCasUtil.indexCovered(identifiedAnnotationView,
Sentence.class, BaseToken.class);
 
+    Map<EntityMention, Collection<Zone>> coveringZoneMap =
+        JCasUtil.indexCovering(jCas, EntityMention.class, Zone.class);
+    
     List<Instance<String>> instances = new ArrayList<Instance<String>>();
     // generate a list of training instances for each sentence in the document
     Collection<IdentifiedAnnotation> entities = JCasUtil.select(identifiedAnnotationView,
IdentifiedAnnotation.class);
@@ -312,6 +336,16 @@ public abstract class AssertionCleartkAn
           //instance.addAll(extractor.extractWithin(identifiedAnnotationView, entityMention,
sentence));
     	  instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
         }
+      
+      List<Feature> cuePhraseFeatures =
+          cuePhraseInWindowExtractor.extract(jCas, entityMention);
+          //cuePhraseInWindowExtractor.extractWithin(jCas, entityMention, firstCoveringSentence);
+      
+      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
+      {
+        instance.addAll(cuePhraseFeatures);
+      }
+
 
         
       /*
@@ -320,11 +354,44 @@ public abstract class AssertionCleartkAn
       }
       */
       
+      List<Feature> zoneFeatures = extractZoneFeatures(coveringZoneMap, entityMention);
+      if (zoneFeatures != null && !zoneFeatures.isEmpty())
+      {
+        instance.addAll(zoneFeatures);
+      }
+       
+
       setClassLabel(entityMention, instance);
       
     }
     
   }
+  
+  public List<Feature> extractZoneFeatures(Map<EntityMention, Collection<Zone>>
coveringZoneMap, IdentifiedAnnotation entityMention)
+  {
+    final Collection<Zone> zoneList = coveringZoneMap.get(entityMention);
+    
+    if (zoneList == null || zoneList.isEmpty())
+    {
+      //return null;
+      logger.info("AssertionCleartkAnalysisEngine.extractZoneFeatures() early END (no zones)");
+      new ArrayList<Feature>();
+    } else
+    {
+      logger.info("AssertionCleartkAnalysisEngine.extractZoneFeatures() found zones and adding
zone features");
+    }
+    
+    ArrayList<Feature> featureList = new ArrayList<Feature>();
+    for (Zone zone : zoneList)
+    {
+      Feature currentFeature = new Feature("zone", zone.getLabel());
+      logger.info(String.format("zone: %s", zone.getLabel()));
+      logger.info(String.format("zone feature: %s", currentFeature.toString()));
+      featureList.add(currentFeature);
+    }
+    
+    return featureList;
+  }
 
   public static AnalysisEngineDescription getDescription(Object... additionalConfiguration)
 	      throws ResourceInitializationException {

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java?rev=1438676&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/CreateAssertionDescriptor.java
Fri Jan 25 19:59:20 2013
@@ -0,0 +1,260 @@
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.ctakes.assertion.eval.AssertionEvalBasedOnModifier;
+import org.apache.ctakes.assertion.eval.AssertionEvalBasedOnModifier.ReferenceAnnotationsSystemAssertionClearer;
+import org.apache.ctakes.assertion.eval.AssertionEvalBasedOnModifier.ReferenceIdentifiedAnnotationsSystemToGoldCopier;
+import org.apache.ctakes.assertion.eval.AssertionEvalBasedOnModifier.ReferenceSupportingAnnotationsSystemToGoldCopier;
+import org.apache.ctakes.core.ae.DocumentIdPrinterAnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.cleartk.classifier.opennlp.DefaultMaxentDataWriterFactory;
+import org.mitre.medfacts.uima.ZoneAnnotator;
+import org.uimafit.component.xwriter.XWriter;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.ConfigurationParameterFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.xml.sax.SAXException;
+
+public class CreateAssertionDescriptor
+{
+  
+  public static final Class<? extends DataWriterFactory<String>> dataWriterFactoryClass
= DefaultMaxentDataWriterFactory.class;
+
+  /**
+   * @param args
+   * @throws URISyntaxException 
+   * @throws FileNotFoundException 
+   * @throws ResourceInitializationException 
+   */
+  public static void main(String[] args) throws Exception
+  {
+    CreateAssertionDescriptor creator = new CreateAssertionDescriptor();
+    
+    creator.execute();
+
+  }
+  
+  public void execute() throws Exception
+  {
+    createTrainDescriptor();
+    createTestDescriptor();
+  }
+  
+  public void createTrainDescriptor() throws Exception
+  {
+    File trainDirectory = new File("/tmp/assertion_data/train");
+    File directory = trainDirectory;
+    AggregateBuilder builder = new AggregateBuilder();
+
+////
+    AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
+    builder.add(goldCopierIdentifiedAnnotsAnnotator);
+    
+    AnalysisEngineDescription goldCopierSupportingAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceSupportingAnnotationsSystemToGoldCopier.class);
+    builder.add(goldCopierSupportingAnnotsAnnotator);
+    
+    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
+    builder.add(assertionAttributeClearerAnnotator);
+    
+    String generalSectionRegexFileUri =
+        "org/mitre/medfacts/zoner/section_regex.xml";
+    AnalysisEngineDescription zonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            generalSectionRegexFileUri
+            );
+    builder.add(zonerAnnotator);
+
+    String mayoSectionRegexFileUri =
+        "org/mitre/medfacts/uima/mayo_sections.xml";
+    AnalysisEngineDescription mayoZonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            mayoSectionRegexFileUri
+            );
+    builder.add(mayoZonerAnnotator);
+    
+    
+    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        polarityAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+        this.dataWriterFactoryClass.getName(),
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        new File(directory, "polarity").getPath()
+        );
+    builder.add(polarityAnnotator);
+
+    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        conditionalAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+        this.dataWriterFactoryClass.getName(),
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        new File(directory, "conditional").getPath()
+        );
+    builder.add(conditionalAnnotator);
+
+    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        uncertaintyAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+        this.dataWriterFactoryClass.getName(),
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        new File(directory, "uncertainty").getPath()
+        );
+    builder.add(uncertaintyAnnotator);
+
+    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        subjectAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+        this.dataWriterFactoryClass.getName(),
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        new File(directory, "subject").getPath()
+        );
+    builder.add(subjectAnnotator);
+
+    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        genericAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+        this.dataWriterFactoryClass.getName(),
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        new File(directory, "generic").getPath()
+        );
+    builder.add(genericAnnotator);
+    
+////
+    
+    FileOutputStream outputStream = new FileOutputStream("desc/analysis_engine/assertion_train.xml");
+    
+    AnalysisEngineDescription description = builder.createAggregateDescription();
+    
+    description.toXML(outputStream);
+  }
+
+  public void createTestDescriptor() throws Exception
+  {
+    File testDirectory = new File("/tmp/assertion_data/test");
+    File directory = testDirectory;
+    File testOutputDirectory = new File("/tmp/assertion_data/test_output");
+    AggregateBuilder builder = new AggregateBuilder();
+
+////
+    AnalysisEngineDescription goldCopierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
+    builder.add(goldCopierAnnotator);
+    
+    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
+    builder.add(assertionAttributeClearerAnnotator);
+    
+    String generalSectionRegexFileUri =
+      "org/mitre/medfacts/zoner/section_regex.xml";
+    AnalysisEngineDescription zonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            generalSectionRegexFileUri
+            );
+    builder.add(zonerAnnotator);
+
+    String mayoSectionRegexFileUri =
+      "org/mitre/medfacts/uima/mayo_sections.xml";
+    AnalysisEngineDescription mayoZonerAnnotator =
+        AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+            ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+            mayoSectionRegexFileUri
+            );
+    builder.add(mayoZonerAnnotator);
+    
+    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        polarityAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(new File(directory, "polarity"), "model.jar").getPath()
+        );
+    builder.add(polarityAnnotator);
+
+    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ConditionalCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        conditionalAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(new File(directory, "conditional"), "model.jar").getPath()
+        );
+    builder.add(conditionalAnnotator);
+  
+    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(UncertaintyCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        uncertaintyAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(new File(directory, "uncertainty"), "model.jar").getPath()
+        );
+    builder.add(uncertaintyAnnotator);
+
+    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createPrimitiveDescription(SubjectCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        subjectAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(new File(directory, "subject"), "model.jar").getPath()
+        );
+    builder.add(subjectAnnotator);
+
+    AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createPrimitiveDescription(GenericCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
+    ConfigurationParameterFactory.addConfigurationParameters(
+        genericAnnotator,
+        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+        AssertionEvalBasedOnModifier.GOLD_VIEW_NAME,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(new File(directory, "generic"), "model.jar").getPath()
+        );
+    builder.add(genericAnnotator);
+
+    AnalysisEngineDescription xwriter =
+    AnalysisEngineFactory.createPrimitiveDescription(
+          XWriter.class,
+          AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
+          XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+          testOutputDirectory,
+          XWriter.PARAM_XML_SCHEME_NAME,
+          XWriter.XMI);
+    builder.add(xwriter);
+////
+    
+    FileOutputStream outputStream = new FileOutputStream("desc/analysis_engine/assertion_test.xml");
+    
+    AnalysisEngineDescription description = builder.createAggregateDescription();
+    
+    description.toXML(outputStream);
+  }
+  
+  
+}

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest.java?rev=1438676&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest.java
Fri Jan 25 19:59:20 2013
@@ -0,0 +1,149 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.core.ae.DocumentIdPrinterAnalysisEngine;
+import org.apache.ctakes.core.cr.XMIReader;
+import org.apache.ctakes.core.util.CtakesFileNamer;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.NamingExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.uimafit.component.xwriter.XWriter;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.JCasFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+public class CueWordTest
+{
+  Logger logger = Logger.getLogger(CueWordTest.class.getName());
+
+  /**
+   * @param args
+   * @throws IOException 
+   * @throws UIMAException 
+   */
+  public static void main(String[] args) throws UIMAException, IOException
+  {
+    CueWordTest t = new CueWordTest();
+    t.execute();
+  }
+  
+  public void execute() throws UIMAException, IOException
+  {
+    logger.info("starting");
+
+    AggregateBuilder builder = new AggregateBuilder();
+    
+    TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription();
+    String filename = "/work/medfacts/sharp/data/2013-01-11_cue_phrase_feature_test/ON03FP00037D00207__merged.txt.xmi";
+    
+    JCas jcas = JCasFactory.createJCas(filename, typeSystemDescription);
+    
+    logger.info("=====");
+
+    Collection<BaseToken> tokens = JCasUtil.select(jcas,  BaseToken.class);
+    for (BaseToken currentToken : tokens)
+    {
+      logger.info(String.format("token \"%s\" [%s]", currentToken.getCoveredText(), currentToken.getClass().getName()));
+    }
+    
+    logger.info("=====");
+
+    Map<IdentifiedAnnotation, Collection<Sentence>> entityToSentenceMap =
+        JCasUtil.indexCovering(jcas, IdentifiedAnnotation.class, Sentence.class);
+
+    Map<Sentence, Collection<AssertionCuePhraseAnnotation>>
+      sentenceToCoveredCuePhraseMap =
+        JCasUtil.indexCovered(jcas, Sentence.class, AssertionCuePhraseAnnotation.class);
+    
+    CombinedExtractor baseExtractorCuePhraseCategory =
+        new CombinedExtractor
+          (
+           new CoveredTextExtractor(),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
+          );
+    
+    CleartkExtractor cuePhraseInWindowExtractor =
+        new CleartkExtractor(
+              AssertionCuePhraseAnnotation.class,
+              baseExtractorCuePhraseCategory,
+              new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
+              new CleartkExtractor.Bag(new CleartkExtractor.Following(5))
+              );
+//              new CleartkExtractor.Ngram(new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(5)));
+              //new CoveredTextExtractor(),
+//              new CleartkExtractor.Covered());
+//              new CleartkExtractor.Preceding(5),
+//              new CleartkExtractor.Following(5));
+    
+    List<CleartkExtractor> extractorList = new ArrayList<CleartkExtractor>();
+    extractorList.add(cuePhraseInWindowExtractor);
+    
+    //NamingExtractor cuePhraseInWindowNamingExtractor = new NamingExtractor("cuePhraseCategory__",
cuePhraseInWindowExtractor); 
+
+    Collection<IdentifiedAnnotation> identifiedAnnotations = JCasUtil.select(jcas,
 IdentifiedAnnotation.class);
+    for (IdentifiedAnnotation current : identifiedAnnotations)
+    {
+      if (!(current instanceof EntityMention) && !(current instanceof EventMention))
continue;
+      
+      // otherwise current is an entity or event mention...
+      logger.info(String.format("identified annotation (event or entity) [%d-%d] \"%s\" [%s]",
current.getBegin(), current.getEnd(), current.getCoveredText(), current.getClass().getName()));
+      
+      Collection<Sentence> coveringSentences = entityToSentenceMap.get(current);
+      if (coveringSentences == null || coveringSentences.isEmpty())
+      {
+        logger.info("no covering sentences found!!! continuing with next entity/event...");
+        continue;
+      }
+      logger.info(String.format("covering sentence count: %d", coveringSentences.size()));
+      Sentence firstCoveringSentence = coveringSentences.iterator().next();
+      
+      logger.info(String.format(
+          "first covering sentence: [%d-%d] \"%s\" (%s)", 
+          firstCoveringSentence.getBegin(), firstCoveringSentence.getEnd(),
+          firstCoveringSentence.getCoveredText(),
+          firstCoveringSentence.getClass().getName()));
+      
+      List<Feature> cuePhraseFeatures =
+          //cuePhraseInSentenceExtractor.extract(jcas, firstCoveringSentence);
+          cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence);
+          //cuePhraseInWindowNamingExtractor.extract(jcas, current);
+      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
+      {
+        String featureDebugString = (cuePhraseFeatures == null) ? "(no cue phrase features)"
: cuePhraseFeatures.toString();
+        logger.info("### cue phrase features: " + featureDebugString);
+      }
+    }
+    
+    logger.info("=====");
+     
+    logger.info("finished");
+  }
+
+}

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest2.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest2.java?rev=1438676&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest2.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/CueWordTest2.java
Fri Jan 25 19:59:20 2013
@@ -0,0 +1,182 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.core.ae.DocumentIdPrinterAnalysisEngine;
+import org.apache.ctakes.core.cr.XMIReader;
+import org.apache.ctakes.core.util.CtakesFileNamer;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.NamingExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.uimafit.component.xwriter.XWriter;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.factory.JCasFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+public class CueWordTest2
+{
+  Logger logger = Logger.getLogger(CueWordTest2.class.getName());
+
+  /**
+   * @param args
+   * @throws IOException 
+   * @throws UIMAException 
+   */
+  public static void main(String[] args) throws UIMAException, IOException
+  {
+    CueWordTest2 t = new CueWordTest2();
+    t.execute();
+  }
+  
+  public void execute() throws UIMAException, IOException
+  {
+    logger.info("starting");
+
+    AggregateBuilder builder = new AggregateBuilder();
+    
+    TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription();
+    String filename = "/work/medfacts/sharp/data/2013-01-11_cue_phrase_feature_test/ON03FP00037D00207__merged.txt.xmi";
+    
+    JCas jcas = JCasFactory.createJCas(filename, typeSystemDescription);
+    
+    logger.info("=====");
+
+    Collection<BaseToken> tokens = JCasUtil.select(jcas,  BaseToken.class);
+    for (BaseToken currentToken : tokens)
+    {
+      logger.info(String.format("token \"%s\" [%s]", currentToken.getCoveredText(), currentToken.getClass().getName()));
+    }
+    
+    logger.info("=====");
+
+    Map<IdentifiedAnnotation, Collection<Sentence>> entityToSentenceMap =
+        JCasUtil.indexCovering(jcas, IdentifiedAnnotation.class, Sentence.class);
+
+    Map<Sentence, Collection<AssertionCuePhraseAnnotation>>
+      sentenceToCoveredCuePhraseMap =
+        JCasUtil.indexCovered(jcas, Sentence.class, AssertionCuePhraseAnnotation.class);
+    
+    CombinedExtractor baseExtractorCuePhraseCategory =
+        new CombinedExtractor
+          (
+           new CoveredTextExtractor(),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
+           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
+          );
+    
+    CleartkExtractor cuePhraseInWindowExtractor =
+        new CleartkExtractor(
+              AssertionCuePhraseAnnotation.class,
+              baseExtractorCuePhraseCategory,
+              new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
+              new CleartkExtractor.Bag(new CleartkExtractor.Following(5))
+              );
+//              new CleartkExtractor.Ngram(new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(5)));
+              //new CoveredTextExtractor(),
+//              new CleartkExtractor.Covered());
+//              new CleartkExtractor.Preceding(5),
+//              new CleartkExtractor.Following(5));
+    
+    CleartkExtractor tokenExtraction1 = 
+        new CleartkExtractor(
+            BaseToken.class, 
+            new CoveredTextExtractor(),
+            //new CleartkExtractor.Covered(),
+            new CleartkExtractor.LastCovered(2),
+            new CleartkExtractor.Preceding(5),
+            new CleartkExtractor.Following(4),
+            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
+            new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
+            );
+    
+    CleartkExtractor posExtraction1 = 
+        new CleartkExtractor(
+            BaseToken.class,
+            new TypePathExtractor(BaseToken.class, "partOfSpeech"),
+            new CleartkExtractor.LastCovered(2),
+            new CleartkExtractor.Preceding(3),
+            new CleartkExtractor.Following(2)
+            );
+    
+    
+    
+    List<CleartkExtractor> extractorList = new ArrayList<CleartkExtractor>();
+    extractorList.add(cuePhraseInWindowExtractor);
+    
+    //NamingExtractor cuePhraseInWindowNamingExtractor = new NamingExtractor("cuePhraseCategory__",
cuePhraseInWindowExtractor); 
+
+    Collection<IdentifiedAnnotation> identifiedAnnotations = JCasUtil.select(jcas,
 IdentifiedAnnotation.class);
+    for (IdentifiedAnnotation current : identifiedAnnotations)
+    {
+      if (!(current instanceof EntityMention) && !(current instanceof EventMention))
continue;
+      
+      // otherwise current is an entity or event mention...
+      logger.info(String.format("identified annotation (event or entity) [%d-%d] \"%s\" [%s]",
current.getBegin(), current.getEnd(), current.getCoveredText(), current.getClass().getName()));
+      
+      Collection<Sentence> coveringSentences = entityToSentenceMap.get(current);
+      if (coveringSentences == null || coveringSentences.isEmpty())
+      {
+        logger.info("no covering sentences found!!! continuing with next entity/event...");
+        continue;
+      }
+      logger.info(String.format("covering sentence count: %d", coveringSentences.size()));
+      Sentence firstCoveringSentence = coveringSentences.iterator().next();
+      
+      logger.info(String.format(
+          "first covering sentence: [%d-%d] \"%s\" (%s)", 
+          firstCoveringSentence.getBegin(), firstCoveringSentence.getEnd(),
+          firstCoveringSentence.getCoveredText(),
+          firstCoveringSentence.getClass().getName()));
+      
+      List<Feature> cuePhraseFeatures =
+          //cuePhraseInSentenceExtractor.extract(jcas, firstCoveringSentence);
+          cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence);
+          //cuePhraseInWindowNamingExtractor.extract(jcas, current);
+      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
+      {
+        String featureDebugString = (cuePhraseFeatures == null) ? "(no cue phrase features)"
: cuePhraseFeatures.toString();
+        logger.info("### cue phrase features: " + featureDebugString);
+      }
+
+    
+      List<Feature> tokenFeatures =
+          tokenExtraction1.extractWithin(jcas, current, firstCoveringSentence);
+      if (tokenFeatures != null && !tokenFeatures.isEmpty())
+      {
+        String featureDebugString = (tokenFeatures == null) ? "(no token phrase features)"
: tokenFeatures.toString();
+        logger.info("### token phrase features: " + featureDebugString);
+      }
+
+    }
+    
+    logger.info("=====");
+     
+    logger.info("finished");
+  }
+
+}



Mime
View raw message