ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1861144 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn: ae/WindowBasedAnnotator.java eval/EvaluationOfNeuralJointRelations.java
Date Wed, 12 Jun 2019 15:57:41 GMT
Author: clin
Date: Wed Jun 12 15:57:41 2019
New Revision: 1861144

URL: http://svn.apache.org/viewvc?rev=1861144&view=rev
Log:
add window-based contains relation annotator and evaluation code

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/WindowBasedAnnotator.java
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EvaluationOfNeuralJointRelations.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/WindowBasedAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/WindowBasedAnnotator.java?rev=1861144&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/WindowBasedAnnotator.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/ae/WindowBasedAnnotator.java
Wed Jun 12 15:57:41 2019
@@ -0,0 +1,352 @@
+package org.apache.ctakes.temporal.nn.ae;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor;
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.util.OntologyConceptUtil;
+import org.apache.ctakes.temporal.ae.TemporalRelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.temporal.nn.ae.EventTimeTokenBasedAnnotator.OutputMode;
+import org.apache.ctakes.temporal.nn.data.ArgContextProvider;
+import org.apache.ctakes.temporal.utils.TokenPreprocForWord2Vec;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.relation.TemporalTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Classifier;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.CleartkProcessingException;
+import org.cleartk.ml.DataWriter;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.util.ViewUriUtil;
+
+import com.google.common.collect.Lists;
+/**
+ * a joint annotator for annotating both event-time and event-event relations using neural
models.
+ * @author chenlin
+ *
+ */
+public class WindowBasedAnnotator extends CleartkAnnotator<String> {
+
+	public static final String NO_RELATION_CATEGORY = "none";
+	public static final int WINDOW_SIZE = 60;
+	public static List<Integer> tokenCount;
+
+	//output modes:
+	//"TokenSeq": original token sequence
+	//"TokenTimeclass": original token sequence + timeClass tag
+	//"TokenTimeclassPosSeq": original token sequence + timeClass + pos seq tag
+	//"timeclass": <timex_timeClass>
+	//"timeclassPosSeq": <timex_timeClass_POStags>
+	//"posSeq": <timex_jj_nn>
+	//"singleTag": <timex>
+	//"indexTags": <timex_0>
+	public static OutputMode timexMode;
+
+	private static FileWriter fstream;
+	private static BufferedWriter out;
+
+	public WindowBasedAnnotator() {
+		timexMode = OutputMode.Timeclass; //set the mode here
+		tokenCount= new ArrayList<>();
+	}
+
+
+	public static void TokenStatisticWriter() throws IOException{
+		fstream = new FileWriter("target/eval/thyme/train_and_test/tokenStatistic.txt");
+		out = new BufferedWriter(fstream);
+
+		for(int i = 0; i < tokenCount.size(); i++){
+			out.write(tokenCount.get(i)+"\n");
+		}
+		out.close();
+	}
+	@Override
+	public void process(JCas jCas) throws AnalysisEngineProcessException {
+		//		Map<EventMention, Collection<EventMention>> coveringMap =
+		//				JCasUtil.indexCovering(jCas, EventMention.class, EventMention.class);
+
+		// get all gold relation lookup
+		Map<List<Annotation>, BinaryTextRelation> relationLookup;
+		relationLookup = new HashMap<>();
+		if (this.isTraining()) {
+			relationLookup = new HashMap<>();
+			for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+				Annotation arg1 = relation.getArg1().getArgument();
+				Annotation arg2 = relation.getArg2().getArgument();
+				// The key is a list of args so we can do bi-directional lookup
+				//				int tokenNum = JCasUtil.selectCovered(jCas, BaseToken.class, arg1.getEnd(), arg2.getBegin()).size();
+				//				tokenCount.add(tokenNum);
+				List<Annotation> key = Arrays.asList(arg1, arg2);
+				if(relationLookup.containsKey(key)){
+					String reln = relationLookup.get(key).getCategory();
+					System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+					System.err.println("Error! This attempted relation " + relation.getCategory() + 
+							" already has a relation " + reln + " at this span: " + 
+							arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+				} else {
+					relationLookup.put(key, relation);
+				}
+			}
+		}
+
+		//		Boolean expandEvents = false;
+
+		Collection<Segment> segments = JCasUtil.select(jCas, Segment.class);
+		List<Segment> segList = Lists.newArrayList();
+		for(Segment seg: segments){
+			if (!seg.getId().equals("SIMPLE_SEGMENT")){//remove simple segment
+				segList.add(seg);
+			}
+		}
+
+		List<IdentifiedAnnotationPair> candidatePairs = Lists.newArrayList();
+		for(Segment segment : segList){
+			List<IdentifiedAnnotation> entities = JCasUtil.selectCovered(jCas, IdentifiedAnnotation.class,
segment);
+			//filter entities:
+			List<IdentifiedAnnotation> realEntities = new ArrayList<>();
+			//filtering events
+			for(IdentifiedAnnotation entity : entities){
+				// filter out ctakes events
+				if(entity instanceof EventMention && entity.getClass().equals(EventMention.class)){
+					realEntities.add(entity);
+				}else if( entity instanceof TimeMention){
+					realEntities.add(entity);
+				}
+			}
+			entities = realEntities;
+			int entityNum = entities.size();
+			for(int i=0; i< entityNum -1; i++){
+				IdentifiedAnnotation entityA = entities.get(i);
+				for(int j=i+1; j< entityNum; j++){
+					IdentifiedAnnotation entityB = entities.get(j);
+					//check if two entity are two far from each other
+					int baseTokenNum = JCasUtil.selectCovered(jCas, BaseToken.class, entityA.getEnd(), entityB.getBegin()).size();
+					if(baseTokenNum > WINDOW_SIZE){
+						break;
+					}
+					if(entityA instanceof EventMention || entityB instanceof EventMention){//don't consider
Time-Time relations
+						candidatePairs.add(new IdentifiedAnnotationPair(entityA, entityB));
+					}
+				}
+			}
+		}
+
+		// walk through the pairs of annotations
+		for (IdentifiedAnnotationPair pair : candidatePairs) {
+			IdentifiedAnnotation arg1 = pair.getArg1();
+			IdentifiedAnnotation arg2 = pair.getArg2();
+
+			String context;
+			if(arg1 instanceof TimeMention){
+				context = getTokenContext(jCas, arg1, "time", arg2, "event");
+			}else if(arg2 instanceof TimeMention){
+				context = getTokenContext(jCas, arg1, "event", arg2, "time");
+			}else{
+				context = getTokenContext(jCas, arg1, "event a", arg2, "event b");
+			}
+
+			List<Feature> feats = new ArrayList<>();
+			String[] tokens = context.split(" ");
+			for (String token: tokens){
+				feats.add(new Feature(token));//.toLowerCase()
+			}
+
+			// during training, feed the features to the data writer
+			if(this.isTraining()) {
+				String category = getRelationCategory(relationLookup, arg1, arg2);
+
+				if(category == null) {
+					category = NO_RELATION_CATEGORY;
+				} else{
+					category = category.toLowerCase();
+				}
+				this.dataWriter.write(new Instance<>(category, feats));
+			} else {
+				String predictedCategory = this.classifier.classify(feats);
+
+				if (predictedCategory != null && !predictedCategory.equals(NO_RELATION_CATEGORY))
{
+
+					if (predictedCategory.endsWith("-1")) {
+						predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+						IdentifiedAnnotation temp = arg1;
+						arg1 = arg2;
+						arg2 = temp;
+					}
+					createRelation(jCas, arg1, arg2, predictedCategory.toUpperCase(), 0.0);
+				}
+			}
+		}
+
+
+		//
+		//		if(this.isTraining()){//in training time, output the token statistics for relations
+		//			try {
+		//				TokenStatisticWriter();
+		//			} catch (IOException e) {
+		//				// TODO Auto-generated catch block
+		//				e.printStackTrace();
+		//			}
+		//		}
+	}
+
+
+	private static String getTokenContext(JCas jCas, IdentifiedAnnotation arg1, String type1,
IdentifiedAnnotation arg2,
+			String type2) {
+		List<String> tokens = new ArrayList<>();
+
+		//Two tokens prior
+		for(BaseToken baseToken :  JCasUtil.selectPreceding(jCas, BaseToken.class, arg1, 2) ) {
+			String stringValue = TokenPreprocForWord2Vec.tokenToString(baseToken);
+			tokens.add(stringValue);
+		}
+
+		//arg1
+		tokens.add(type1 + " start");
+		if (arg1 instanceof TimeMention){
+			String timeTag = generateTimeTag(jCas, (TimeMention)arg1);
+			tokens.add(timeTag);
+		}else{
+			tokens.add(arg1.getCoveredText().replaceAll("[\r\n]"," newline").toLowerCase());//.toLowerCase()
+		}
+		tokens.add(type1 + " end");
+
+		//tokens in the middle
+		for(BaseToken baseToken :  JCasUtil.selectCovered(jCas, BaseToken.class, arg1.getEnd(),
arg2.getBegin()) ) {
+			String stringValue = TokenPreprocForWord2Vec.tokenToString(baseToken);
+			tokens.add(stringValue);
+		}
+
+		//arg2
+		tokens.add(type2 + " start");
+		if (arg2 instanceof TimeMention){
+			String timeTag = generateTimeTag(jCas, (TimeMention)arg2);
+			tokens.add(timeTag);
+		}else{
+			tokens.add(arg2.getCoveredText().replaceAll("[\r\n]"," newline").toLowerCase());//.toLowerCase()
+		}
+		tokens.add(type2 + " end");
+
+		//two tokens after
+		for(BaseToken baseToken :  JCasUtil.selectFollowing(jCas, BaseToken.class, arg2, 2) ) {
+			String stringValue = TokenPreprocForWord2Vec.tokenToString(baseToken);
+			tokens.add(stringValue);
+		}
+
+		return String.join(" ", tokens).replaceAll("[\r\n]", "newline");
+	}
+
+
+	private static String generateTimeTag(JCas jCas, TimeMention timex) {
+		String timeTag = "<timex";
+		if(timexMode == OutputMode.IndexTags){
+			timeTag = timeTag+"_";
+			int idx = 0;
+			timeTag = timeTag+ idx+">";
+		}else if(timexMode == OutputMode.Timeclass){
+			//timeTag = "<timex_"+timex.getTimeClass()+">";
+			timeTag = timex.getTimeClass().toLowerCase();
+		}else if(timexMode == OutputMode.TimeclassPosSeq){
+			timeTag = "<timex_"+timex.getTimeClass();
+			for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, timex)){
+				timeTag = timeTag+"_"+token.getPartOfSpeech();
+			}
+			timeTag = timeTag+">";
+		}else if(timexMode == OutputMode.PosSeq){
+			timeTag = "<timex";
+			for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, timex)){
+				timeTag = timeTag+"_"+token.getPartOfSpeech();
+			}
+			timeTag = timeTag+">";
+		}else if(timexMode == OutputMode.SingleTag){
+			timeTag = timeTag+">";
+		}else if(timexMode == OutputMode.TokenTimeclass){
+			timeTag = timex.getCoveredText()+" <timex_"+timex.getTimeClass()+">";
+		}else if(timexMode == OutputMode.TokenTimeclassPosSeq){
+			timeTag = timex.getCoveredText()+" <timex_"+timex.getTimeClass();
+			for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, timex)){
+				timeTag = timeTag+"_"+token.getPartOfSpeech();
+			}
+			timeTag = timeTag+">";
+		}else if(timexMode == OutputMode.NoTag){
+			timeTag="";
+		}
+		return timeTag;
+	}
+
+
+	/**
+	 * original way of getting label
+	 * @param relationLookup
+	 * @param arg1
+	 * @param arg2
+	 * @return
+	 */
+	protected String getRelationCategory(
+			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2) {
+
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		String category = null;
+		//for event-event relations:
+		if (relation != null) {
+			category = relation.getCategory();
+		} else {
+			relation = relationLookup.get(Arrays.asList(arg2, arg1));
+			if (relation != null) {
+				category = relation.getCategory() + "-1";
+			}
+		}
+
+
+		return category;
+	}
+
+	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
+		RelationArgument relArg1 = new RelationArgument(jCas);
+		relArg1.setArgument(arg1);
+		relArg1.setRole("Arg1");
+		relArg1.addToIndexes();
+		RelationArgument relArg2 = new RelationArgument(jCas);
+		relArg2.setArgument(arg2);
+		relArg2.setRole("Arg2");
+		relArg2.addToIndexes();
+		TemporalTextRelation relation = new TemporalTextRelation(jCas);
+		relation.setArg1(relArg1);
+		relation.setArg2(relArg2);
+		relation.setCategory(predictedCategory);
+		relation.setConfidence(confidence);
+		relation.addToIndexes();
+	}
+}

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EvaluationOfNeuralJointRelations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EvaluationOfNeuralJointRelations.java?rev=1861144&r1=1861143&r2=1861144&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EvaluationOfNeuralJointRelations.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/eval/EvaluationOfNeuralJointRelations.java
Wed Jun 12 15:57:41 2019
@@ -34,12 +34,15 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
-import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator;
-import org.apache.ctakes.temporal.ae.EventEventRelationAnnotator;
+import org.apache.ctakes.temporal.eval.EvaluationOfEventEventThymeRelations.AddEEPotentialRelations;
 import org.apache.ctakes.temporal.eval.EvaluationOfEventTimeRelations.ParameterSettings;
+import org.apache.ctakes.temporal.eval.EvaluationOfEventTimeRelations.AddPotentialRelations;
 import org.apache.ctakes.temporal.keras.KerasStringOutcomeDataWriter;
 import org.apache.ctakes.temporal.keras.ScriptStringFeatureDataWriter;
-import org.apache.ctakes.temporal.nn.ae.JointRelationTokenBasedAnnotator;
+//import org.apache.ctakes.temporal.nn.ae.JointRelationTokenBasedAnnotator;
+//import org.apache.ctakes.temporal.nn.ae.TwoSentenceTokenBasedAnnotator;
+import org.apache.ctakes.temporal.nn.ae.WindowBasedAnnotator;
+import org.apache.ctakes.temporal.nn.ae.WindowBasedCasedAnnotator;
 import org.apache.ctakes.temporal.eval.EvaluationOfTemporalRelations_ImplBase;
 import org.apache.ctakes.temporal.eval.Evaluation_ImplBase;
 import org.apache.ctakes.temporal.eval.I2B2Data;
@@ -77,7 +80,6 @@ import org.cleartk.ml.jar.DefaultDataWri
 import org.cleartk.ml.jar.DirectoryDataWriterFactory;
 import org.cleartk.ml.jar.GenericJarClassifierFactory;
 import org.cleartk.ml.jar.JarClassifierBuilder;
-import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
 //import org.cleartk.ml.libsvm.tk.TkLibSvmStringOutcomeDataWriter;
 //import org.cleartk.ml.libsvm.LIBSVMStringOutcomeDataWriter;
 //import org.cleartk.ml.tksvmlight.TKSVMlightStringOutcomeDataWriter;
@@ -170,7 +172,7 @@ EvaluationOfTemporalRelations_ImplBase{
 
 		//    for(ParameterSettings params : possibleParams){
 		try{
-			File workingDir = new File("/Volumes/chip-nlp/Public/THYME/eval/thyme/");//"target/eval/thyme/");
+			File workingDir = new File("/Users/chenlin/Projects/THYME/modelFile");///Volumes/chip-nlp/Public/THYME/eval/thyme/");//"/Users/chenlin/Projects/deepLearning/models/selfTrainModel");//"target/eval/thyme/");//"/Volumes/chip-nlp/Public/THYME/eval/thyme/");
 			if(!workingDir.exists()) workingDir.mkdirs();
 			if(options.getUseTmp()){
 				File tempModelDir = File.createTempFile("temporal", null, workingDir);
@@ -194,10 +196,10 @@ EvaluationOfTemporalRelations_ImplBase{
 					options.getKernelParams(),
 					params);
 			//			evaluation.prepareXMIsFor(patientSets);
-			if(options.getI2B2Output()!=null) evaluation.setI2B2Output(options.getI2B2Output() + "/temporal-relations/joint");
+			if(options.getI2B2Output()!=null) evaluation.setI2B2Output(options.getI2B2Output() + "/train_and_test/joint");//"/temporal-relations/joint");
 			if(options.getAnaforaOutput()!=null) evaluation.anaforaOutput = options.getAnaforaOutput();
 
-			List<Integer> training = trainItems;
+			List<Integer> training = trainItems; //change train, dev, test here
 			List<Integer> testing = null;
 			if(options.getTest()){
 				training.addAll(devItems);
@@ -295,7 +297,7 @@ EvaluationOfTemporalRelations_ImplBase{
 		if(!this.skipWrite){
 			AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
 			aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class,
BinaryTextRelation.class));
-			aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemoveCrossSentenceRelations.class));
+//			aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemoveCrossSentenceRelations.class));
 			if(!this.useGoldAttributes){
 				aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemoveGoldAttributes.class));
 			}
@@ -313,8 +315,8 @@ EvaluationOfTemporalRelations_ImplBase{
 			//		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonUMLSEvents.class));
 
 			//add unlabeled nearby system events as potential links: 
-			//		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(AddEEPotentialRelations.class));
-			//		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(AddPotentialRelations.class));

+//						aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(AddEEPotentialRelations.class));
+//						aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(AddPotentialRelations.class));

 
 			//		aggregateBuilder.add(
 			//				AnalysisEngineFactory.createEngineDescription(EventEventTokenBasedAnnotator.class,//EventEventTokenBasedAnnotator.class,EventEventPathsBasedAnnotator.class,
EventEventTokenAndPosBasedAnnotator, EventEventPathsBasedAnnotator
@@ -329,7 +331,7 @@ EvaluationOfTemporalRelations_ImplBase{
 			//						) );
 
 			aggregateBuilder.add(
-					AnalysisEngineFactory.createEngineDescription(JointRelationTokenBasedAnnotator.class,//EventTimeTokenAndPathBasedAnnotator.class,//
+					AnalysisEngineFactory.createEngineDescription(WindowBasedAnnotator.class,//WindowBasedAnnotator.class,//EventTimeTokenAndPathBasedAnnotator.class,//
 							CleartkAnnotator.PARAM_IS_TRAINING,
 							true,
 							DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
@@ -355,19 +357,19 @@ EvaluationOfTemporalRelations_ImplBase{
 
 		aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class));
 
-		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
-				RemoveCrossSentenceRelations.class,
-				RemoveCrossSentenceRelations.PARAM_SENTENCE_VIEW,
-				CAS.NAME_DEFAULT_SOFA,
-				RemoveCrossSentenceRelations.PARAM_RELATION_VIEW,
-				GOLD_VIEW_NAME));
-
-		if (!recallModeEvaluation && this.useClosure) { //closure for gold
-			aggregateBuilder.add(
-					AnalysisEngineFactory.createEngineDescription(AddClosure.class),//AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class),
-					CAS.NAME_DEFAULT_SOFA,
-					GOLD_VIEW_NAME);
-		}
+		aggregateBuilder.add(CopyFromSystem.getDescription(Sentence.class));
+//		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
+//				RemoveCrossSentenceRelations.class,
+//				RemoveCrossSentenceRelations.PARAM_SENTENCE_VIEW,
+//				CAS.NAME_DEFAULT_SOFA,
+//				RemoveCrossSentenceRelations.PARAM_RELATION_VIEW,
+//				GOLD_VIEW_NAME));
+
+		//closure for gold:
+//			aggregateBuilder.add(
+//					AnalysisEngineFactory.createEngineDescription(AddClosure.class),//AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class),
+//					CAS.NAME_DEFAULT_SOFA,
+//					GOLD_VIEW_NAME);
 
 		//keep event event tlinks, remove the other relations
 		//		aggregateBuilder.add(
@@ -384,7 +386,7 @@ EvaluationOfTemporalRelations_ImplBase{
 		//		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemoveNonUMLSEvents.class));
 
 
-
+		
 		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(RemoveNonContainsRelations.class),
 				CAS.NAME_DEFAULT_SOFA,
 				GOLD_VIEW_NAME);
@@ -398,15 +400,20 @@ EvaluationOfTemporalRelations_ImplBase{
 		//				new File(new File(directory,"event-event"), "model.jar").getPath());
 		//		aed = EventEventRelationAnnotator.createAnnotatorDescription((new File(directory,"event-event/model.jar")).getAbsolutePath());
 		//		aggregateBuilder.add(aed);
-		aed = AnalysisEngineFactory.createEngineDescription(JointRelationTokenBasedAnnotator.class,
+		aed = AnalysisEngineFactory.createEngineDescription(WindowBasedAnnotator.class,//WindowBasedAnnotator.class,
 				CleartkAnnotator.PARAM_IS_TRAINING,
 				false,
 				GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
 				new File(new File(directory,"joint"), "model.jar").getPath());
 		aggregateBuilder.add(aed);
+		
+		//closure for system:
+//		aggregateBuilder.add(
+//				AnalysisEngineFactory.createEngineDescription(AddClosure.class)//AnalysisEngineFactory.createPrimitiveDescription(AddTransitiveContainsRelations.class),
+//				);
 
-//		aed = DocTimeRelAnnotator.createAnnotatorDescription(new File("target/eval/event-properties/train_and_test/docTimeRel/model.jar").getAbsolutePath());
	
-//		aggregateBuilder.add(aed);
+		//		aed = DocTimeRelAnnotator.createAnnotatorDescription(new File("target/eval/event-properties/train_and_test/docTimeRel/model.jar").getAbsolutePath());
	
+		//		aggregateBuilder.add(aed);
 
 		//		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CrossSentenceTemporalRelationAnnotator.class));
 		//		aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(WithinSentenceBeforeRelationAnnotator.class));
@@ -423,15 +430,15 @@ EvaluationOfTemporalRelations_ImplBase{
 					GOLD_VIEW_NAME,
 					CAS.NAME_DEFAULT_SOFA
 					);
-			outf =  new File("target/eval/thyme/SystemError_eventEvent_recall_test.txt");
+			outf =  new File("target/brain_biLstm_recall_dev.txt");
 		}else if (!recallModeEvaluation && this.useClosure){
-			outf =  new File("target/eval/thyme/SystemError_eventEvent_precision_test.txt");
+			outf =  new File("target/brain_biLstm_precision_dev.txt");
 		}else{
-			outf =  new File("target/eval/thyme/SystemError_eventEvent_plain_test.txt");
+			outf =  new File("target/colon_bioBert_pmc_dev_closure.txt");
 		}
 
 		PrintWriter outDrop =null;
-//		outDrop = new PrintWriter(new BufferedWriter(new FileWriter(outf, false)));
+		outDrop = new PrintWriter(new BufferedWriter(new FileWriter(outf, false)));
 
 		Function<BinaryTextRelation, ?> getSpan = new Function<BinaryTextRelation, HashableArguments>()
{
 			public HashableArguments apply(BinaryTextRelation relation) {
@@ -440,6 +447,14 @@ EvaluationOfTemporalRelations_ImplBase{
 		};
 		Function<BinaryTextRelation, String> getOutcome = AnnotationStatistics.annotationToFeatureValue("category");
 
+		int withinSentRelations = 0;
+		int crossSentRelations = 0;
+		int withinSentCorrect = 0;
+		int crossSentCorrect = 0;
+		
+		int withinSentGolds = 0;
+		int crossSentGolds = 0;
+		
 		AnnotationStatistics<String> stats = new AnnotationStatistics<>();
 		JCasIterator jcasIter =new JCasIterator(collectionReader, aggregateBuilder.createAggregate());
 		JCas jCas = null;
@@ -447,6 +462,10 @@ EvaluationOfTemporalRelations_ImplBase{
 			jCas = jcasIter.next();
 			JCas goldView = jCas.getView(GOLD_VIEW_NAME);
 			JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+			Map<Annotation, Collection<Sentence>> sentCoveringMap = JCasUtil.indexCovering(systemView,
Annotation.class, Sentence.class);
+			Map<Annotation, Collection<Sentence>> goldSentCoveringMap = JCasUtil.indexCovering(goldView,
Annotation.class, Sentence.class);
+
+			
 			Collection<BinaryTextRelation> goldRelations = JCasUtil.select(
 					goldView,
 					BinaryTextRelation.class);
@@ -472,27 +491,91 @@ EvaluationOfTemporalRelations_ImplBase{
 				Set<HashableArguments> all = Sets.union(goldMap.keySet(), systemMap.keySet());
 				List<HashableArguments> sorted = Lists.newArrayList(all);
 				Collections.sort(sorted);
-				outDrop.println("Doc id: " + ViewUriUtil.getURI(jCas).toString());
-				for (HashableArguments key : sorted) {
-					BinaryTextRelation goldRelation = goldMap.get(key);
-					BinaryTextRelation systemRelation = systemMap.get(key);
-					if (goldRelation == null) {
-						outDrop.println("System added: " + formatRelation(systemRelation));
-					} else if (systemRelation == null) {
-						outDrop.println("System dropped: " + formatRelation(goldRelation));
-					} else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
-						String label = systemRelation.getCategory();
-						outDrop.printf("System labeled %s for %s\n", label, formatRelation(goldRelation));
-					} else{
-						outDrop.println("Nailed it! " + formatRelation(systemRelation));
+				if(jCas != null){
+					outDrop.println("Doc id: " + ViewUriUtil.getURI(jCas).toString());
+					for (HashableArguments key : sorted) {
+						BinaryTextRelation goldRelation = goldMap.get(key);
+						BinaryTextRelation systemRelation = systemMap.get(key);
+						if (goldRelation == null) {
+							//outDrop.println("System added: " + formatRelation(systemRelation));
+							if(checkArgumentsInTheSameSent(systemRelation, sentCoveringMap)){
+								withinSentRelations+=1;
+								outDrop.println("System added within-sent: " + formatRelation(systemRelation));
+							}else{
+								crossSentRelations+=1;
+								outDrop.println("System added cross-sent: " + formatRelation(systemRelation));
+							}
+						} else if (systemRelation == null) {
+							//outDrop.println("System dropped: " + formatRelation(goldRelation));
+							if(checkArgumentsInTheSameSent(goldRelation, goldSentCoveringMap)){
+								withinSentGolds+=1;
+								outDrop.println("System dropped within-sent: " + formatRelation(goldRelation));
+							}else{
+								crossSentGolds+=1;
+								outDrop.println("System dropped cross-sent: " + formatRelation(goldRelation));
+							}
+						} else if (!systemRelation.getCategory().equals(goldRelation.getCategory())) {
+							String label = systemRelation.getCategory();
+							//outDrop.printf("System labeled %s for %s\n", label, formatRelation(goldRelation));
+							if(checkArgumentsInTheSameSent(systemRelation, sentCoveringMap)){
+								withinSentRelations+=1;
+								outDrop.printf("System labeled within-sent %s for %s\n", label, formatRelation(goldRelation));
+							}else{
+								crossSentRelations+=1;
+								outDrop.printf("System labeled cross-sent %s for %s\n", label, formatRelation(goldRelation));
+							}
+							if(checkArgumentsInTheSameSent(goldRelation, goldSentCoveringMap)){
+								withinSentGolds+=1;
+							}else{
+								crossSentGolds+=1;
+							}
+						} else{
+							//outDrop.println("Nailed it! " + formatRelation(systemRelation));
+							if(checkArgumentsInTheSameSent(systemRelation, sentCoveringMap)){
+								withinSentRelations+=1;
+								withinSentCorrect +=1;
+								outDrop.println("Nailed it within-sent! " + formatRelation(systemRelation));
+							}else{
+								crossSentRelations+=1;
+								crossSentCorrect +=1;
+								outDrop.println("Nailed it cross-sent! " + formatRelation(systemRelation));
+							}
+							
+							if(checkArgumentsInTheSameSent(goldRelation, goldSentCoveringMap)){
+								withinSentGolds+=1;
+							}else{
+								crossSentGolds+=1;
+							}
+						}
 					}
 				}
 			}
 		}
-//		outDrop.close();
+		System.out.print("There are "+ withinSentRelations + " within Sentence Predictions; " +
withinSentCorrect+ " are correct predictions\n");
+		System.out.print("There are "+ crossSentRelations + " cross Sentence Predictions; " + crossSentCorrect+
" are correct predictions\n");
+		System.out.print("There are "+ crossSentGolds + " cross Sentence Gold Relations; " + withinSentGolds+
" are within-sent gold relations\n");
+		
+		outDrop.close();
 		return stats;
 	}
 
+	private static boolean checkArgumentsInTheSameSent(BinaryTextRelation systemRelation,
+			Map<Annotation, Collection<Sentence>> sentCoveringMap) {
+		Annotation arg1 = systemRelation.getArg1().getArgument();
+		Annotation arg2 = systemRelation.getArg2().getArgument();
+		Collection<Sentence> sent1List = sentCoveringMap.get(arg1);
+		Collection<Sentence> sent2List = sentCoveringMap.get(arg2);
+		for(Sentence sent1 : sent1List){
+			for(Sentence sent2 : sent2List){
+				if(sent1==sent2){
+					return true;
+				}
+			}
+		}
+		
+		return false;
+	}
+
 	public static class RemoveNonUMLSEvents extends org.apache.uima.fit.component.JCasAnnotator_ImplBase
{
 		public static final String PARAM_GOLD_VIEW = "GoldView";
 



Mime
View raw message