Author: dligach
Date: Thu Apr 3 20:32:58 2014
New Revision: 1584384
URL: http://svn.apache.org/r1584384
Log:
Moved some common code to Utils and modifed the logic a bit.
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java?rev=1584384&r1=1584383&r2=1584384&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
Thu Apr 3 20:32:58 2014
@@ -41,8 +41,10 @@ public class DurationEventEventFeatureEx
throws AnalysisEngineProcessException {
List<Feature> features = new ArrayList<Feature>();
- String arg1text = arg1.getCoveredText().toLowerCase();
- String arg2text = arg2.getCoveredText().toLowerCase();
+
+ String arg1text = Utils.getText(jCas, arg1);
+ String arg2text = Utils.getText(jCas, arg2);
+
Float expectedDuration1;
Float expectedDuration2;
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java?rev=1584384&r1=1584383&r2=1584384&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
Thu Apr 3 20:32:58 2014
@@ -10,7 +10,6 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.util.JCasUtil;
@@ -51,8 +50,8 @@ public class PreserveCertainEventEventRe
String event1Text;
String event2Text;
if(arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof
EventMention) {
- event1Text = getText(jCas, arg1.getArgument());
- event2Text = getText(jCas, arg2.getArgument());
+ event1Text = Utils.getText(jCas, arg1.getArgument());
+ event2Text = Utils.getText(jCas, arg2.getArgument());
} else {
// this is not an event-event relation
continue;
@@ -70,7 +69,7 @@ public class PreserveCertainEventEventRe
// remove events (that didn't participate in relations) that have no data
for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class)))
{
- String mentionText = getText(jCas, mention);
+ String mentionText = Utils.getText(jCas, mention);
if(textToDistribution.containsKey(mentionText)) {
// these are the kind we keep
continue;
@@ -79,41 +78,4 @@ public class PreserveCertainEventEventRe
mention.removeFromIndexes();
}
}
-
- /**
- * Lemmatize this annotation if this is a verb.
- * Otherwise return as is. Lowercase before returning.
- *
- * TODO: check if there's a covering UMLS concept before lemmatizing
- */
- public static String getText(JCas jCas, Annotation annotation)
- throws AnalysisEngineProcessException {
-
- JCas systemView;
- try {
- systemView = jCas.getView("_InitialView");
- } catch (CASException e) {
- throw new AnalysisEngineProcessException(e);
- }
-
- String pos = Utils.getPosTag(systemView, annotation);
- if(pos == null) {
- return annotation.getCoveredText().toLowerCase();
- }
-
- String text;
- if(pos.startsWith("V")) {
- try {
- text = Utils.lemmatize(annotation.getCoveredText(), pos);
- } catch (IOException e) {
- System.out.println("couldn't lemmatize: " + annotation.getCoveredText());
- e.printStackTrace();
- return annotation.getCoveredText().toLowerCase();
- }
- } else {
- text = annotation.getCoveredText();
- }
-
- return text.toLowerCase();
- }
}
\ No newline at end of file
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java?rev=1584384&r1=1584383&r2=1584384&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Thu Apr 3 20:32:58 2014
@@ -20,6 +20,9 @@ import java.util.Map;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.temporal.ae.feature.duration.DurationEventTimeFeatureExtractor;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.threeten.bp.temporal.TemporalField;
@@ -221,6 +224,9 @@ public class Utils {
return joiner.join(distribution);
}
+ /**
+ * Lemmatize word using ClearNLP lemmatizer.
+ */
public static String lemmatize(String word, String pos) throws IOException {
final String ENG_LEMMATIZER_DATA_FILE = "org/apache/ctakes/dependency/parser/models/lemmatizer/dictionary-1.3.1.jar";
@@ -252,6 +258,54 @@ public class Utils {
}
/**
+ * Check if the annotation is a UMLS concept. If it is, return as is.
+ * Otherwise, lemmatize this annotation if this is a verb.
+ * Return as is if not verb.
+ * Lowercase before returning.
+ */
+ public static String getText(JCas jCas, Annotation annotation)
+ throws AnalysisEngineProcessException {
+
+ JCas systemView;
+ try {
+ systemView = jCas.getView("_InitialView");
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ List<EventMention> coveringSystemEventMentions = JCasUtil.selectCovered(
+ systemView,
+ EventMention.class,
+ annotation.getBegin(),
+ annotation.getEnd());
+ for(EventMention systemEventMention : coveringSystemEventMentions) {
+ if(systemEventMention.getTypeID() != 0) {
+ return annotation.getCoveredText().toLowerCase();
+ }
+ }
+
+ String pos = Utils.getPosTag(systemView, annotation);
+ if(pos == null) {
+ return annotation.getCoveredText().toLowerCase();
+ }
+
+ String text;
+ if(pos.startsWith("V")) {
+ try {
+ text = Utils.lemmatize(annotation.getCoveredText().toLowerCase(), pos);
+ } catch (IOException e) {
+ System.out.println("couldn't lemmatize: " + annotation.getCoveredText());
+ e.printStackTrace();
+ return annotation.getCoveredText().toLowerCase();
+ }
+ } else {
+ text = annotation.getCoveredText();
+ }
+
+ return text.toLowerCase();
+ }
+
+ /**
* Read event duration distributions from file.
*/
public static class Callback implements LineProcessor <Map<String, Map<String,
Float>>> {
|