ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1814594 [1/2] - in /ctakes/trunk: ctakes-core/src/main/java/org/apache/ctakes/core/patient/ ctakes-coreference/ ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/ ctakes-coreference/src/main/java/org/apache/ctakes/coreferen...
Date Wed, 08 Nov 2017 16:09:47 GMT
Author: tmill
Date: Wed Nov  8 16:09:47 2017
New Revision: 1814594

URL: http://svn.apache.org/viewvc?rev=1814594&view=rev
Log:
New patient collector model working for coreference evaluation.

Added:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java
      - copied, changed from r1814586, ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableCacheRelationExtractor.java
Removed:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MarkableHeadTreeCreator.java
    ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/struct/MapFactory.java
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java
    ctakes/trunk/ctakes-coreference/pom.xml
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfEventCoreference.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/factory/CoreferenceAnnotatorFactory.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/MarkableUtilities.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/patient/AbstractPatientConsumer.java Wed Nov  8 16:09:47 2017
@@ -61,7 +61,7 @@ abstract public class AbstractPatientCon
     * {@inheritDoc}
     */
    @Override
-   final public void collectionProcessComplete() throws AnalysisEngineProcessException {
+   public void collectionProcessComplete() throws AnalysisEngineProcessException {
       super.collectionProcessComplete();
       final Collection<String> allPatientIds = PatientNoteStore.getInstance().getPatientIds();
       for ( String id : allPatientIds ) {

Modified: ctakes/trunk/ctakes-coreference/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/pom.xml?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/pom.xml (original)
+++ ctakes/trunk/ctakes-coreference/pom.xml Wed Nov  8 16:09:47 2017
@@ -52,6 +52,11 @@
 			<groupId>org.apache.ctakes</groupId>
 			<artifactId>ctakes-assertion</artifactId>
 		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-ml-liblinear</artifactId>
+			<version>2.0.0</version>
+		</dependency>
 	</dependencies>
 	<build>
 		<plugins>

Copied: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java (from r1814586, ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java?p2=ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java&p1=ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java&r1=1814586&r2=1814594&rev=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainCoNLLWriter.java Wed Nov  8 16:09:47 2017
@@ -34,78 +34,36 @@ import org.cleartk.util.ViewUriUtil;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Multiset;
 
-@PipeBitInfo(
-      name = "Coreference Score Writer",
-      description = "Writes scores of system coreference chains compared to chains in a Gold View.",
-      role = PipeBitInfo.Role.SPECIAL,
-      dependencies = { PipeBitInfo.TypeProduct.MARKABLE, PipeBitInfo.TypeProduct.COREFERENCE_RELATION }
-)
-public class CoreferenceChainScoringOutput extends JCasAnnotator_ImplBase{
-  @ConfigurationParameter(
-      name = ConfigParameterConstants.PARAM_OUTPUTDIR,
-      mandatory = true,
-      description = "Name of chain file in CoNLL format"
-      )
-  private String outputFilename;
+public class CoreferenceChainCoNLLWriter {
   private PrintWriter out = null;
   private PrintWriter icOut = null;
-  
-  public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
-  @ConfigurationParameter(
-      name = PARAM_GOLD_VIEW_NAME,
-      mandatory = false,
-      description = "Name of gold view in jcas"
-      )
-  private String goldViewName = null;
-  boolean isGold;
-  
-  private int docNum = 0;
-  
-  @Override
-  public void initialize(final UimaContext context) throws ResourceInitializationException{
-    super.initialize(context);
-    
-    try {
-      out = new PrintWriter(outputFilename);
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-      throw new ResourceInitializationException(e);
-    }
-    
-    if(goldViewName != null) isGold = true;
-    else{
-      isGold = false;
-      try {
-        icOut = new PrintWriter(outputFilename + ".icarus");
-      } catch (FileNotFoundException e) {
-        e.printStackTrace();
-        throw new ResourceInitializationException(e);
-      }
-    }
+  int docNum=0;
+
+  public CoreferenceChainCoNLLWriter(String outputFile) throws FileNotFoundException {
+    out = new PrintWriter(outputFile);
   }
   
-  @Override
-  public void process(JCas jCas) throws AnalysisEngineProcessException {
+  public void writeCas(JCas jCas) throws AnalysisEngineProcessException {
     String myView = jCas.getViewName();
     File filename = new File(ViewUriUtil.getURI(jCas));
-    JCas chainsCas = null;
-    try {
-       chainsCas = goldViewName != null ? jCas.getView(goldViewName) : jCas;
-    } catch (CASException e) {
-      e.printStackTrace();
-      throw new AnalysisEngineProcessException(e);
-    }
+//    JCas chainsCas = null;
+//    try {
+//       chainsCas = goldViewName != null ? jCas.getView(goldViewName) : jCas;
+//    } catch (CASException e) {
+//      e.printStackTrace();
+//      throw new AnalysisEngineProcessException(e);
+//    }
     int chainNum = 1;
     HashMap<Annotation, Integer> ent2chain = new HashMap<>();
     
-    if(isGold) System.out.println("\nGold chains:");
-    else{
-      icOut.println(String.format("#begin document (%s); part 000", filename.getPath()));
-      System.out.println("\nSystem chains:");
-    }
+//    if(isGold) System.out.println("\nGold chains:");
+//    else{
+//      icOut.println(String.format("#begin document (%s); part 000", filename.getPath()));
+//      System.out.println("\nChains:");
+//    }
     
     
-    Collection<CollectionTextRelation> rels = JCasUtil.select(chainsCas, CollectionTextRelation.class);
+    Collection<CollectionTextRelation> rels = JCasUtil.select(jCas, CollectionTextRelation.class);
     if(rels.size() == 0){
       return;
     }
@@ -133,7 +91,7 @@ public class CoreferenceChainScoringOutp
         members = ((NonEmptyFSList)members).getTail();
         System.out.print("Mention: " + mention.getCoveredText().replace("\n", "<CR>"));
         System.out.print(" (" + mention.getBegin() + ", " + mention.getEnd() + ")");
-        if(!isGold && !mention.getView().getViewName().equals(myView)){
+        if(!mention.getView().getViewName().equals(myView)){
           System.out.print("[DOC:" + mention.getView().getViewName() + "]");
         }
         System.out.print("  ----->    ");
@@ -173,7 +131,7 @@ public class CoreferenceChainScoringOutp
       if(token.getCoveredText().length() > 1 && token.getCoveredText().endsWith(".")){
         lastInd = token.getEnd()-1;
       }
-      List<Markable> markables = new ArrayList<>(JCasUtil.selectCovering(chainsCas, Markable.class, token.getBegin(), lastInd));
+      List<Markable> markables = new ArrayList<>(JCasUtil.selectCovering(jCas, Markable.class, token.getBegin(), lastInd));
       List<Annotation> startMention = new ArrayList<>();
       Multiset<Integer> endMention = HashMultiset.create();
       List<Integer> wholeMention = new ArrayList<>();
@@ -196,9 +154,9 @@ public class CoreferenceChainScoringOutp
             endMention.add(ent2chain.get(markable));
           }
           
-          if(!isGold){
-            icOut.println(String.format("%d-%d-%d\n", sentId, markable.getBegin(), markable.getEnd()));
-          }
+//          if(!isGold){
+//            icOut.println(String.format("%d-%d-%d\n", sentId, markable.getBegin(), markable.getEnd()));
+//          }
         }
       }
 
@@ -298,9 +256,9 @@ public class CoreferenceChainScoringOutp
         sentId++;
       }
     }
-    if(!isGold){
-      icOut.println("#end document");
-    }
+//    if(!isGold){
+//      icOut.println("#end document");
+//    }
     out.println("#end document " + filename.getPath());
     out.flush();
     docNum++;

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/CoreferenceChainScoringOutput.java Wed Nov  8 16:09:47 2017
@@ -11,6 +11,7 @@ import java.util.List;
 import org.apache.ctakes.constituency.parser.util.TreeUtils;
 import org.apache.ctakes.core.config.ConfigParameterConstants;
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -19,6 +20,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.CASRuntimeException;
 import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.util.JCasUtil;
@@ -87,13 +89,23 @@ public class CoreferenceChainScoringOutp
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
     String myView = jCas.getViewName();
-    File filename = new File(ViewUriUtil.getURI(jCas));
+    File filename = null;
+    try{
+      filename = new File(ViewUriUtil.getURI(jCas));
+    }catch(Exception e){
+      filename = new File(DocumentIDAnnotationUtil.getDocumentID(jCas));
+    }
+
     JCas chainsCas = null;
     try {
        chainsCas = goldViewName != null ? jCas.getView(goldViewName) : jCas;
-    } catch (CASException e) {
-      e.printStackTrace();
-      throw new AnalysisEngineProcessException(e);
+    } catch (CASRuntimeException|CASException e) {
+      try{
+        chainsCas = goldViewName != null ? jCas.getView(goldViewName + "_" + filename) : jCas;
+      } catch (CASException e2) {
+        e.printStackTrace();
+        throw new AnalysisEngineProcessException(e2);
+      }
     }
     int chainNum = 1;
     HashMap<Annotation, Integer> ent2chain = new HashMap<>();

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Wed Nov  8 16:09:47 2017
@@ -7,7 +7,9 @@ import org.apache.ctakes.core.util.ListF
 import org.apache.ctakes.coreference.ae.features.cluster.*;
 import org.apache.ctakes.coreference.ae.pairing.cluster.*;
 import org.apache.ctakes.coreference.util.ClusterMentionFetcher;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
 import org.apache.ctakes.coreference.util.MarkableUtilities;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
 import org.apache.ctakes.temporal.utils.PatientViewsUtil;
@@ -15,6 +17,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
 import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.*;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.utils.struct.CounterMap;
@@ -49,69 +52,69 @@ import static org.apache.ctakes.corefere
 
 
 @PipeBitInfo(
-	      name = "Coreference (Clusters)",
-	      description = "Coreference annotator using mention-synchronous paradigm.",
-   	      dependencies = { BASE_TOKEN, SENTENCE, SECTION, IDENTIFIED_ANNOTATION, MARKABLE },
-   	      products = { COREFERENCE_RELATION }
-	)
+        name = "Coreference (Clusters)",
+        description = "Coreference annotator using mention-synchronous paradigm.",
+        dependencies = { BASE_TOKEN, SENTENCE, SECTION, IDENTIFIED_ANNOTATION, MARKABLE },
+        products = { COREFERENCE_RELATION }
+)
 public class MentionClusterCoreferenceAnnotator extends CleartkAnnotator<String> {
   static private final Logger LOGGER = Logger.getLogger( MentionClusterCoreferenceAnnotator.class.getSimpleName() );
 
   public static final String NO_RELATION_CATEGORY = "-NONE-";
   public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
-      "ProbabilityOfKeepingANegativeExample";
+          "ProbabilityOfKeepingANegativeExample";
   @ConfigurationParameter(
-      name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-      mandatory = false,
-      description = "probability that a negative example should be retained for training")
+          name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+          mandatory = false,
+          description = "probability that a negative example should be retained for training")
   protected double probabilityOfKeepingANegativeExample = 0.5;
 
   public static final String PARAM_USE_EXISTING_ENCODERS="UseExistingEncoders";
   @ConfigurationParameter(name = PARAM_USE_EXISTING_ENCODERS,
-      mandatory=false,
-      description = "Whether to use encoders in output directory during data writing; if we are making multiple calls")
+          mandatory=false,
+          description = "Whether to use encoders in output directory during data writing; if we are making multiple calls")
   private boolean useExistingEncoders=false;
 
   public static final String PARAM_SINGLE_DOCUMENT = "SingleDocument";
   @ConfigurationParameter(
-        name = PARAM_SINGLE_DOCUMENT,
-        mandatory = false,
-        description = "Specify that coreferences should be sought for a single document.",
-        defaultValue = "true" )
+          name = PARAM_SINGLE_DOCUMENT,
+          mandatory = false,
+          description = "Specify that coreferences should be sought for a single document.",
+          defaultValue = "true" )
   private boolean singleDocument;
 
   protected Random coin = new Random(0);
 
   boolean greedyFirst = true;
-  
+
   private static DataWriter<String> classDataWriter = null;
-  
+
   public static AnalysisEngineDescription createDataWriterDescription(
-      Class<? extends DataWriter<String>> dataWriterClass,
-      File outputDirectory,
-      float downsamplingRate) throws ResourceInitializationException {
+          Class<? extends DataWriter<String>> dataWriterClass,
+          File outputDirectory,
+          float downsamplingRate) throws ResourceInitializationException {
     return AnalysisEngineFactory.createEngineDescription(
-        MentionClusterCoreferenceAnnotator.class,
-        CleartkAnnotator.PARAM_IS_TRAINING,
-        true,
-        MentionClusterCoreferenceAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
-        downsamplingRate,
-        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
-        dataWriterClass,
-        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
-        outputDirectory,
-        MentionClusterCoreferenceAnnotator.PARAM_SINGLE_DOCUMENT,
-        false);
+            MentionClusterCoreferenceAnnotator.class,
+            CleartkAnnotator.PARAM_IS_TRAINING,
+            true,
+            MentionClusterCoreferenceAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+            downsamplingRate,
+            DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+            dataWriterClass,
+            DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+            outputDirectory,
+            MentionClusterCoreferenceAnnotator.PARAM_SINGLE_DOCUMENT,
+            false);
   }
 
   public static AnalysisEngineDescription createAnnotatorDescription(
-      String modelPath) throws ResourceInitializationException {
+          String modelPath) throws ResourceInitializationException {
     return AnalysisEngineFactory.createEngineDescription(
-        MentionClusterCoreferenceAnnotator.class,
-        CleartkAnnotator.PARAM_IS_TRAINING,
-        false,
-        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        modelPath);
+            MentionClusterCoreferenceAnnotator.class,
+            CleartkAnnotator.PARAM_IS_TRAINING,
+            false,
+            GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+            modelPath);
   }
 
   public static AnalysisEngineDescription createMultidocAnnotatorDescription(
@@ -129,9 +132,9 @@ public class MentionClusterCoreferenceAn
   private List<RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>> relationExtractors = this.getFeatureExtractors();
   private List<FeatureExtractor1<Markable>> mentionExtractors = this.getMentionExtractors();
   private List<ClusterMentionPairer_ImplBase> pairExtractors = this.getPairExtractors();
-  
+
 //  private Set<String> markableStrings = null;
-  
+
   protected List<RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>> getFeatureExtractors() {
     List<RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>> extractors = new ArrayList<>();
     extractors.add(new MentionClusterAgreementFeaturesExtractor());
@@ -143,9 +146,9 @@ public class MentionClusterCoreferenceAn
     extractors.add(new MentionClusterSalienceFeaturesExtractor());
     extractors.add(new MentionClusterAttributeFeaturesExtractor());
 //    extractors.add(new MentionClusterAttributeVectorExtractor()); // does nothing yet
-    
+
 //    extractors.add(new MentionClusterDistanceFeaturesExtractor());
-    
+
     try {
 //      extractors.add(new MentionClusterDistSemExtractor("org/apache/ctakes/coreference/distsem/mimic_vectors.txt"));
 //      extractors.add(new MentionClusterDistSemExtractor("org/apache/ctakes/coreference/distsem/deps.words"));
@@ -153,10 +156,10 @@ public class MentionClusterCoreferenceAn
     } catch (IOException e) {
       e.printStackTrace();
     }
-    
+
     return extractors;
   }
-  
+
   protected List<FeatureExtractor1<Markable>> getMentionExtractors(){
     List<FeatureExtractor1<Markable>> extractors = new ArrayList<>();
     // mention features from pairwise system:
@@ -175,7 +178,7 @@ public class MentionClusterCoreferenceAn
 
     return extractors;
   }
-  
+
   protected List<ClusterMentionPairer_ImplBase> getPairExtractors(){
     List<ClusterMentionPairer_ImplBase> pairers = new ArrayList<>();
     int sentDist = 5;
@@ -186,24 +189,25 @@ public class MentionClusterCoreferenceAn
     pairers.add(new PreviousDocumentPairer());
     return pairers;
   }
-  
+
   protected Iterable<CollectionTextRelationIdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
-      JCas jcas,
-      Markable mention){
-    LinkedHashSet<CollectionTextRelationIdentifiedAnnotationPair> pairs = new LinkedHashSet<>();   
+          JCas jcas,
+          Markable mention){
+    LinkedHashSet<CollectionTextRelationIdentifiedAnnotationPair> pairs = new LinkedHashSet<>();
     for(ClusterMentionPairer_ImplBase pairer : this.pairExtractors){
       pairs.addAll(pairer.getPairs(jcas, mention));
     }
-   
+
     return pairs;
   }
-  
-  private void resetPairers(JCas jcas){
+
+  private void resetPairers(JCas jcas, Map<Markable,ConllDependencyNode> cache){
     for(ClusterMentionPairer_ImplBase pairer : this.pairExtractors){
       pairer.reset(jcas);
+      pairer.setCache(cache);
     }
   }
-   
+
   @Override
   public void initialize( final UimaContext context ) throws ResourceInitializationException {
     LOGGER.info( "Initializing ..." );
@@ -217,7 +221,7 @@ public class MentionClusterCoreferenceAn
     LOGGER.info( "Finished." );
   }
 
-  public void notYetProcess( final JCas jCas ) throws AnalysisEngineProcessException {
+  public void process( final JCas jCas ) throws AnalysisEngineProcessException {
     //this.dataWriter.write(new Instance<String>("#DEBUG " + ViewUriUtil.getURI(docCas)));
     LOGGER.info( "Finding Coreferences ..." );
 
@@ -239,63 +243,38 @@ public class MentionClusterCoreferenceAn
     LOGGER.info( "Finished." );
   }
 
-
-
-
-  @Override
-  public void process( final JCas jCas ) throws AnalysisEngineProcessException {
-
-    //this.dataWriter.write(new Instance<String>("#DEBUG " + ViewUriUtil.getURI(docCas)));
-    LOGGER.info( "Finding Coreferences ..." );
-
-    if ( singleDocument ) {
-      processDocument( jCas );
-      LOGGER.info( "Finished." );
-      return;
-    }
-
-    int numDocs;
-    try {
-      numDocs = Integer.valueOf( jCas.getView( PatientViewsUtil.NUM_DOCS_NAME ).getDocumentText() );
-    } catch ( NumberFormatException | CASException e ) {
-      // TODO remove stack trace when ready
-      e.printStackTrace();
-      throw new AnalysisEngineProcessException( e );
-    }
-    try ( DotLogger dotter = new DotLogger() ) {
-      for ( int docNum = 0; docNum < numDocs; docNum++ ) {
-        JCas docCas;
-        try {
-          docCas = jCas.getView( PatientViewsUtil.getViewName( docNum ) );
-        } catch ( CASException casE ) {
-          // TODO remove stack trace when ready
-          casE.printStackTrace();
-          throw new AnalysisEngineProcessException( casE );
-        }
-        processDocument( docCas );
-      }
-    } catch ( IOException ioE ) {
-      LOGGER.error( ioE.getMessage() );
-    }
-    LOGGER.info( "Finished." );
-  }
-
   private void processDocument( final JCas jCas ) throws AnalysisEngineProcessException {
     // lookup from pair of annotations to binary text relation
     // note: assumes that there will be at most one relation per pair
-    this.resetPairers( jCas );
     Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation>
-          relationLookup;
+            relationLookup;
     if ( this.isTraining() ) {
       relationLookup = ClusterMentionFetcher.getPairRelations( jCas );
     } else {
       relationLookup = new HashMap<>();
     }
 
-     final Map<Segment, Collection<Markable>> segmentMarkables = JCasUtil.indexCovered( jCas, Segment.class, Markable.class );
-     for ( Collection<Markable> markables : segmentMarkables.values() ) {
-        for ( Markable mention : markables ) {
-//System.out.println( "MCCA Markable: " + mention.getCoveredText() + " :" + mention.getBegin() + "," + mention.getEnd() );
+    Map<Markable,ConllDependencyNode> depHeadMap = new HashMap<>();
+    for(Markable m: JCasUtil.select(jCas, Markable.class)){
+      ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, m);
+      depHeadMap.put(m, headNode);
+    }
+    for(RelationFeaturesExtractor featEx : this.relationExtractors){
+      if(featEx instanceof MarkableCacheRelationExtractor){
+        ((MarkableCacheRelationExtractor)featEx).setCache(depHeadMap);
+      }
+    }
+    for(FeatureExtractor1 featEx : this.mentionExtractors){
+      if(featEx instanceof MarkableCacheRelationExtractor){
+        ((MarkableCacheRelationExtractor)featEx).setCache(depHeadMap);
+      }
+    }
+    this.resetPairers( jCas, depHeadMap );
+
+    final Map<Segment, Collection<Markable>> segmentMarkables = JCasUtil.indexCovered( jCas, Segment.class, Markable.class );
+    for ( Segment segment : JCasUtil.select(jCas, Segment.class) ) {
+      for ( Markable mention : segmentMarkables.get(segment) ) {
+//        System.out.println( "MCCA Markable: " + mention.getCoveredText() + " :" + mention.getBegin() + "," + mention.getEnd() );
         //        ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
         boolean singleton = true;
         double maxScore = 0.0;
@@ -306,7 +285,8 @@ public class MentionClusterCoreferenceAn
           CollectionTextRelation cluster = pair.getCluster();
           Markable firstElement = JCasUtil.select(cluster.getMembers(), Markable.class).iterator().next();
           String clusterHeadView = firstElement.getView().getViewName();
-//System.out.println( "   MCCA Pair Cluster: " + pair.getCluster().getCategory() );
+//          System.out.println( "   MCCA Pair Cluster: " + pair.getCluster().getCategory() );
+//          System.out.println("MCCA Cluster head: " + firstElement.getCoveredText() + " :" + firstElement.getBegin() + "," + firstElement.getEnd());
           // apply all the feature extractors to extract the list of features
           List<Feature> features = new ArrayList<>();
           for ( RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> extractor : this.relationExtractors ) {
@@ -406,21 +386,21 @@ public class MentionClusterCoreferenceAn
     createEventClusters( jCas );
 
   }
-  
- 
+
+
   /**
    * Looks up the arguments in the specified lookup table and converts the
    * relation into a label for classification
-   * 
+   *
    * @return If this category should not be processed for training return
    *         <i>null</i> otherwise it returns the label sent to the datawriter
    */
   protected String getRelationCategory(
-      Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation> relationLookup,
-      CollectionTextRelation cluster,
-      IdentifiedAnnotation mention) {
-    CollectionTextRelationIdentifiedAnnotationRelation relation = 
-        relationLookup.get(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention));
+          Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation> relationLookup,
+          CollectionTextRelation cluster,
+          IdentifiedAnnotation mention) {
+    CollectionTextRelationIdentifiedAnnotationRelation relation =
+            relationLookup.get(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention));
     String category;
     if (relation != null) {
       category = relation.getCategory();
@@ -436,7 +416,7 @@ public class MentionClusterCoreferenceAn
    * Predict an outcome given a set of features. By default, this simply
    * delegates to the object's <code>classifier</code>. Subclasses may override
    * this method to implement more complex classification procedures.
-   * 
+   *
    * @param features
    *          The features to be classified.
    * @return The predicted outcome (label) for the features.
@@ -449,7 +429,7 @@ public class MentionClusterCoreferenceAn
    * Create a UIMA relation type based on arguments and the relation label. This
    * allows subclasses to create/define their own types: e.g. coreference can
    * create CoreferenceRelation instead of BinaryTextRelation
-   * 
+   *
    * @param jCas
    *          - JCas object, needed to create new UIMA types
   //   * @param arg1
@@ -460,11 +440,11 @@ public class MentionClusterCoreferenceAn
    *          - Name of relation
    */
   protected void createRelation(
-      JCas jCas,
-      CollectionTextRelation cluster,
-      IdentifiedAnnotation mention,
-      String predictedCategory,
-      Double confidence) {
+          JCas jCas,
+          CollectionTextRelation cluster,
+          IdentifiedAnnotation mention,
+          String predictedCategory,
+          Double confidence) {
     // add the relation to the CAS
     CollectionTextRelationIdentifiedAnnotationRelation relation = new CollectionTextRelationIdentifiedAnnotationRelation(jCas);
     relation.setCluster(cluster);
@@ -472,25 +452,25 @@ public class MentionClusterCoreferenceAn
     relation.setCategory(predictedCategory);
     relation.setConfidence(confidence);
     relation.addToIndexes();
-    
+
 //    RelationArgument arg = new RelationArgument(jCas);
 //    arg.setArgument(mention);
-    ListFactory.append(jCas, cluster.getMembers(), mention);    
+    ListFactory.append(jCas, cluster.getMembers(), mention);
   }
 
   /**
    * Create the set of Event types for every chain we found in the document.
    * Event is a non-Annotation type (i.e., no span) that has its own attributes
    * but points to an FSArray of mentions which each have their own attributes.
-   * 
+   *
    * @param jCas
    *        - JCas object, needed to create UIMA types
-   * @throws AnalysisEngineProcessException 
+   * @throws AnalysisEngineProcessException
    */
   private static void createEventClusters(JCas jCas) throws AnalysisEngineProcessException{
     // First, find the largest span identified annotation that shares a headword with the markable
     // do that by finding the head of the markable, then finding the identifiedannotations that cover it:
-    
+
     Map<Markable, List<IdentifiedAnnotation>> markable2annotations = MarkableUtilities.indexCoveringUmlsAnnotations(jCas);
     for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){
       CounterMap<Class<? extends IdentifiedAnnotation>> headCounts = new CounterMap<>();
@@ -515,10 +495,10 @@ public class MentionClusterCoreferenceAn
         element = new Event(jCas);
       }else{
         Class<? extends IdentifiedAnnotation> mostCommon = headCounts.entrySet().stream()
-            .sorted(Map.Entry.<Class<? extends IdentifiedAnnotation>,Integer>comparingByValue().reversed())
-            .limit(1)
-            .map(f -> f.getKey())
-            .collect(Collectors.toList()).get(0);
+                .sorted(Map.Entry.<Class<? extends IdentifiedAnnotation>,Integer>comparingByValue().reversed())
+                .limit(1)
+                .map(f -> f.getKey())
+                .collect(Collectors.toList()).get(0);
         if(mostCommon.equals(DiseaseDisorderMention.class)){
           element = new DiseaseDisorder(jCas);
         }else if(mostCommon.equals(ProcedureMention.class)){
@@ -541,19 +521,19 @@ public class MentionClusterCoreferenceAn
 
   private static void removeSingletonClusters(JCas jcas){
     List<CollectionTextRelation> toRemove = new ArrayList<>();
-    for(CollectionTextRelation rel : JCasUtil.select(jcas, CollectionTextRelation.class)){     
+    for(CollectionTextRelation rel : JCasUtil.select(jcas, CollectionTextRelation.class)){
       NonEmptyFSList head = (NonEmptyFSList) rel.getMembers();
       if(head.getTail() instanceof EmptyFSList){
         toRemove.add(rel);
       }
     }
-    
+
     for(CollectionTextRelation rel : toRemove){
       rel.removeFromIndexes();
     }
   }
-  
- 
+
+
 //  private static final boolean dominates(Annotation arg1, Annotation arg2) {
 //    return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
 //  }
@@ -599,7 +579,7 @@ public class MentionClusterCoreferenceAn
     return bestEnts;
   }
   */
-  
+
   public Map<HashableArguments, Double> getMarkablePairScores(JCas jCas){
     Map<HashableArguments, Double> scoreMap = new HashMap<>();
     for(CoreferenceRelation reln : JCasUtil.select(jCas, CoreferenceRelation.class)){

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/PatientMentionClusterCoreferencer.java Wed Nov  8 16:09:47 2017
@@ -74,5 +74,9 @@ public class PatientMentionClusterCorefe
       _delegate.process( patientJcas );
    }
 
-
+   @Override
+   public void collectionProcessComplete() throws AnalysisEngineProcessException {
+      super.collectionProcessComplete();
+      _delegate.collectionProcessComplete();
+   }
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java Wed Nov  8 16:09:47 2017
@@ -1,24 +1,26 @@
 package org.apache.ctakes.coreference.ae.features;
 
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
-
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.constants.CONST;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.utils.struct.MapFactory;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.ml.Feature;
 
-public class TokenFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> {
+public class TokenFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>, MarkableCacheRelationExtractor {
+
+	private Map<Markable,ConllDependencyNode> cache = null;
 
 	@Override
 	public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
@@ -36,7 +38,7 @@ public class TokenFeatureExtractor imple
 		feats.add(new Feature("TOKEN_DEF1", isDefinite(s1)));
 		feats.add(new Feature("TOKEN_DEF2", isDefinite(s2)));
 		feats.add(new Feature("TOKEN_NUMAGREE",
-				numberSingular(jCas, arg1, s1) == numberSingular(jCas, arg2, s2)));
+				numberSingular(jCas, arg1, s1, cache.get((Markable)arg1)) == numberSingular(jCas, arg2, s2, cache.get((Markable)arg2))));
 
 		String gen1 = getGender(s1);
 		String gen2 = getGender(s2);
@@ -102,8 +104,7 @@ public class TokenFeatureExtractor imple
 
 	// FYI - old code used treebanknode types and found head using head rules filled in by the parser
 	// not sure if there is an appreciable difference...
-	public static boolean numberSingular(JCas jcas, Annotation arg, String s1){
-    ConllDependencyNode head = MapFactory.get(getKey(jcas), arg);
+	public static boolean numberSingular(JCas jcas, Annotation arg, String s1, ConllDependencyNode head){
 //		List<BaseToken> tokens = new ArrayList<>(JCasUtil.selectCovered(BaseToken.class, arg));
 //		for (int i = tokens.size()-1; i >=0; i--){
 //			BaseToken t = tokens.get(i);
@@ -166,4 +167,9 @@ public class TokenFeatureExtractor imple
 	public static boolean isHistory(IdentifiedAnnotation mention){
 	  return mention.getHistoryOf() == CONST.NE_HISTORY_OF_PRESENT;
 	}
+
+	@Override
+	public void setCache(Map<Markable, ConllDependencyNode> cache) {
+		this.cache = cache;
+	}
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java Wed Nov  8 16:09:47 2017
@@ -7,10 +7,13 @@ import static org.apache.ctakes.corefere
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -19,17 +22,22 @@ import org.cleartk.ml.Feature;
 import org.cleartk.ml.feature.extractor.CleartkExtractorException;
 import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
-public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>, FeatureExtractor1<Markable>, MarkableCacheRelationExtractor {
+
+  private Map<Markable, ConllDependencyNode> cache = null;
 
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+    if(cache == null){
+      throw new RuntimeException("This extractor requires a call to setCache()");
+    }
     List<Feature> features = new ArrayList<>();
     
     String s = mention.getCoveredText().toLowerCase();
     boolean isDem = isDemonstrative(s);
     boolean isDef = isDefinite(s);
     String gender = getGender(s);
-    boolean singular = numberSingular(jCas, mention, s);
+    boolean singular = numberSingular(jCas, mention, s, cache.get(mention));
 
     boolean matchDem = false;
     boolean matchDef = false;
@@ -55,7 +63,7 @@ public class MentionClusterAgreementFeat
       if(!matchGender && getGender(m).equals(gender)){
         matchGender = true;
       }
-      if(!matchNumber && numberSingular(jCas, member, m) == singular){
+      if(!matchNumber && numberSingular(jCas, member, m, cache.get(member)) == singular){
         matchNumber = true;
       }
     }
@@ -70,6 +78,9 @@ public class MentionClusterAgreementFeat
 
   @Override
   public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+    if(cache == null){
+      throw new RuntimeException("This extractor requires a call to setCache()");
+    }
     List<Feature> features = new ArrayList<>();
 
     String s = mention.getCoveredText().toLowerCase();
@@ -82,9 +93,16 @@ public class MentionClusterAgreementFeat
     String gender = getGender(s);
     features.add(new Feature("MC_MENTION_GENDER", gender));
 
-    boolean singular = numberSingular(jCas, mention, s);
+    boolean singular = numberSingular(jCas, mention, s, cache.get(mention));
     features.add(new Feature("MC_MENTION_NUMBER", singular));
 
     return features;
   }
+
+  @Override
+  public void setCache(Map<Markable, ConllDependencyNode> cache) {
+    this.cache = cache;
+  }
+
+
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDepHeadExtractor.java Wed Nov  8 16:09:47 2017
@@ -1,20 +1,15 @@
 package org.apache.ctakes.coreference.ae.features.cluster;
 
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
 
 import org.apache.ctakes.core.util.ListIterable;
 import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.utils.struct.MapFactory;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.ml.Feature;
@@ -22,20 +17,27 @@ import org.cleartk.ml.feature.extractor.
 import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
 public class MentionClusterDepHeadExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable>,
+        MarkableCacheRelationExtractor{
+
+  Map<Markable,ConllDependencyNode> cache = null;
 
   @Override
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
     List<Feature> feats = new ArrayList<>();
-    
-    ConllDependencyNode mentionHead = MapFactory.get(getKey(jCas), mention);
+
+    if(cache == null){
+      throw new RuntimeException("This extractor requires a cached Markable->ConllDependencyNode map to be set with setCache()");
+    }
+
+    ConllDependencyNode mentionHead = cache.get(mention);
     Set<String> memberHeads = new HashSet<>();
     Set<String> memberPaths = new HashSet<>();
     
     for(Markable member : new ListIterable<Markable>(cluster.getMembers())){
       if(member.getBegin() > mention.getEnd()) break;
-      ConllDependencyNode memberHead = MapFactory.get(getKey(jCas), member);
+      ConllDependencyNode memberHead = cache.get(member);
       if(memberHead != null){
         String headWord = memberHead.getCoveredText().toLowerCase();
         memberHeads.add(headWord);
@@ -64,8 +66,12 @@ public class MentionClusterDepHeadExtrac
 
   @Override
   public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException {
+    if(cache == null){
+      throw new RuntimeException("This extractor requires a cached Markable->ConllDependencyNode map to be set with setCache()");
+    }
+
     List<Feature> feats = new ArrayList<>();
-    ConllDependencyNode mentionHead = MapFactory.get(getKey(jCas), mention);
+    ConllDependencyNode mentionHead = cache.get(mention);
 
     if(mentionHead != null){
       feats.add(new Feature("MentionRel", mentionHead.getDeprel()));
@@ -74,4 +80,8 @@ public class MentionClusterDepHeadExtrac
     return feats;
   }
 
+  @Override
+  public void setCache(Map<Markable, ConllDependencyNode> cache) {
+    this.cache = cache;
+  }
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterStringFeaturesExtractor.java Wed Nov  8 16:09:47 2017
@@ -1,6 +1,5 @@
 package org.apache.ctakes.coreference.ae.features.cluster;
 
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
 import static org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor.contentWords;
 import static org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor.endMatch;
 import static org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor.soonMatch;
@@ -11,25 +10,33 @@ import static org.apache.ctakes.corefere
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.ctakes.core.util.ListIterable;
 import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
 import org.apache.ctakes.utils.struct.CounterMap;
-import org.apache.ctakes.utils.struct.MapFactory;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.ml.Feature;
 
 public class MentionClusterStringFeaturesExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>,
+        MarkableCacheRelationExtractor{
+
+  private Map<Markable, ConllDependencyNode> cache = null;
 
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+    if(cache == null){
+      throw new RuntimeException("This extractor requires a cached Markable->ConllDependencyNode map to be set with setCache()");
+    }
     List<Feature> feats = new ArrayList<>();
     CounterMap<String> featCounts = new CounterMap<>();
     
@@ -38,7 +45,7 @@ public class MentionClusterStringFeature
     String m = mention.getCoveredText();
     Set<String> mentionWords = contentWords(mention);
     Set<String> nonHeadMentionWords = new HashSet<>(mentionWords);
-    ConllDependencyNode mentionHead = MapFactory.get(getKey(jCas), mention);
+    ConllDependencyNode mentionHead = cache.get(mention);
     
     String mentionHeadString = null;
     if(mentionHead != null){
@@ -62,7 +69,7 @@ public class MentionClusterStringFeature
         String s = member.getCoveredText();
         Set<String> memberWords = contentWords(member);
         Set<String> nonHeadMemberWords = new HashSet<>(memberWords);
-        ConllDependencyNode memberHead = MapFactory.get(getKey(jCas), member);
+        ConllDependencyNode memberHead = cache.get(member);
         String memberHeadString = null;
         if(memberHead != null){
           memberHeadString = memberHead.getCoveredText().toLowerCase();
@@ -110,4 +117,8 @@ public class MentionClusterStringFeature
     return count;
   }
 
+  @Override
+  public void setCache(Map<Markable, ConllDependencyNode> cache) {
+    this.cache = cache;
+  }
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterUMLSFeatureExtractor.java Wed Nov  8 16:09:47 2017
@@ -1,6 +1,5 @@
 package org.apache.ctakes.coreference.ae.features.cluster;
 
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
 import static org.apache.ctakes.coreference.ae.features.UMLSFeatureExtractor.alias;
 import static org.apache.ctakes.coreference.ae.features.UMLSFeatureExtractor.getDocId;
 
@@ -12,6 +11,8 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.coreference.util.HashableMarkable;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
@@ -20,7 +21,6 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.utils.struct.MapFactory;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
@@ -30,14 +30,21 @@ import org.cleartk.ml.feature.extractor.
 import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 
 public class MentionClusterUMLSFeatureExtractor implements
-    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable> {
+    RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation>, FeatureExtractor1<Markable>,
+        MarkableCacheRelationExtractor{
 
   String docId = null;
   Map<ConllDependencyNode,Collection<IdentifiedAnnotation>> coveringMap = null;
+  Map<Markable, ConllDependencyNode> cache = null;
 
   @Override
   public List<Feature> extract(JCas jCas, CollectionTextRelation cluster,
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
+
+    if(cache == null){
+      throw new RuntimeException("This extractor requires a Markable cache.");
+    }
+
     List<Feature> feats = new ArrayList<>();
     Set<String> trueFeats = new HashSet<>();
     
@@ -46,7 +53,7 @@ public class MentionClusterUMLSFeatureEx
       coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
     }
     
-    ConllDependencyNode head = MapFactory.get(getKey(jCas), mention);
+    ConllDependencyNode head = cache.get(mention);
     
     if(head != null){
       List<IdentifiedAnnotation> rmList = new ArrayList<>();
@@ -63,7 +70,7 @@ public class MentionClusterUMLSFeatureEx
       
       Set<IdentifiedAnnotation> clusterEnts = new HashSet<>();
       for(Markable member : new ListIterable<Markable>(cluster.getMembers())){
-        ConllDependencyNode memberHead = MapFactory.get(getKey(jCas), member);
+        ConllDependencyNode memberHead = cache.get(member);
         rmList.clear();
         // get the named entities covering this cluster member:
         List<IdentifiedAnnotation> ents2 = new ArrayList<>(coveringMap.get(memberHead)); //JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class, head2.getBegin(), head2.getEnd());
@@ -189,7 +196,7 @@ public class MentionClusterUMLSFeatureEx
         coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
     }
     
-    ConllDependencyNode head = MapFactory.get(getKey(jCas), mention);
+    ConllDependencyNode head = cache.get(mention);
 
     List<IdentifiedAnnotation> rmList = new ArrayList<>();
     // get the entities covering this markable:
@@ -215,4 +222,8 @@ public class MentionClusterUMLSFeatureEx
     return feats;
   }
 
+  @Override
+  public void setCache(Map<Markable, ConllDependencyNode> cache) {
+    this.cache = cache;
+  }
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java?rev=1814594&r1=1814593&r2=1814594&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java Wed Nov  8 16:09:47 2017
@@ -2,25 +2,26 @@ package org.apache.ctakes.coreference.ae
 
 //import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 import org.apache.ctakes.coreference.ae.pairing.AnnotationPairer;
+import org.apache.ctakes.coreference.util.MarkableCacheRelationExtractor;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.utils.struct.MapFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 
 import java.util.*;
 
-import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
 import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
 
 //import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 
-public abstract class ClusterMentionPairer_ImplBase implements AnnotationPairer<Markable, CollectionTextRelationIdentifiedAnnotationPair> {
+public abstract class ClusterMentionPairer_ImplBase implements AnnotationPairer<Markable, CollectionTextRelationIdentifiedAnnotationPair>, MarkableCacheRelationExtractor {
   public abstract List<CollectionTextRelationIdentifiedAnnotationPair> getPairs(JCas jcas, Markable m);
   private Map<ConllDependencyNode,Collection<IdentifiedAnnotation>> nodeEntMap = null;
+  private Map<Markable,ConllDependencyNode> cache = null;
 
   @Override
   public void reset(JCas jcas){
@@ -39,7 +40,7 @@ public abstract class ClusterMentionPair
     Set<String> bestEnts = new HashSet<>();
     IdentifiedAnnotation bestEnt = null;
     Set<IdentifiedAnnotation> otherBestEnts = new HashSet<>();
-    ConllDependencyNode head = MapFactory.get(getKey(jcas), markable);
+    ConllDependencyNode head = cache.get(markable);
     if ( head == null ) {
       return Collections.emptySet();
     }
@@ -47,7 +48,7 @@ public abstract class ClusterMentionPair
     Collection<IdentifiedAnnotation> coveringEnts = nodeEntMap.get(head);
     for(IdentifiedAnnotation ent : coveringEnts){
       if(ent.getOntologyConceptArr() == null) continue; // skip non-umls entities.
-      ConllDependencyNode entHead = MapFactory.get(getKey(jcas), ent);
+      ConllDependencyNode entHead = DependencyUtility.getNominalHeadNode(jcas, ent);
       if(entHead == head){
         if(bestEnt == null){
           bestEnt = ent;
@@ -74,4 +75,9 @@ public abstract class ClusterMentionPair
   protected static final boolean dominates(Annotation arg1, Annotation arg2) {
     return (arg1.getBegin() <= arg2.getBegin() && arg1.getEnd() >= arg2.getEnd());
   }
+
+  @Override
+  public void setCache(Map<Markable,ConllDependencyNode> cache){
+    this.cache = cache;
+  }
 }



Mime
View raw message