Author: dligach
Date: Fri Apr 4 15:45:34 2014
New Revision: 1584759
URL: http://svn.apache.org/r1584759
Log:
Updates due to changes in time normalization logic
Modified:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
Fri Apr 4 15:45:34 2014
@@ -42,8 +42,8 @@ public class DurationEventEventFeatureEx
List<Feature> features = new ArrayList<Feature>();
- String arg1text = Utils.getText(jCas, arg1);
- String arg2text = Utils.getText(jCas, arg2);
+ String arg1text = Utils.normalizeEventText(jCas, arg1);
+ String arg2text = Utils.normalizeEventText(jCas, arg2);
Float expectedDuration1;
Float expectedDuration2;
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
Fri Apr 4 15:45:34 2014
@@ -21,6 +21,7 @@ package org.apache.ctakes.temporal.ae.fe
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -30,9 +31,6 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.cleartk.classifier.Feature;
-import org.threeten.bp.temporal.TemporalUnit;
-
-import scala.collection.immutable.Set;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
@@ -48,7 +46,7 @@ public class DurationEventTimeFeatureExt
List<Feature> features = new ArrayList<Feature>();
- String eventText = arg1.getCoveredText().toLowerCase(); // arg1 is an event
+ String eventText = Utils.normalizeEventText(jCas, arg1); // arg1 is an event
String timeText = arg2.getCoveredText().toLowerCase(); // arg2 is a time mention
File durationLookup = new File(Utils.durationDistributionPath);
@@ -63,15 +61,13 @@ public class DurationEventTimeFeatureExt
Map<String, Float> eventDistribution = textToDistribution.get(eventText);
float eventExpectedDuration = Utils.expectedDuration(eventDistribution);
- Set<TemporalUnit> units = Utils.runTimexParser(timeText);
- scala.collection.Iterator<TemporalUnit> iterator = units.iterator();
- while(iterator.hasNext()) {
- TemporalUnit unit = iterator.next();
- Map<String, Float> distribution = Utils.convertToDistribution(unit.getName());
- float timeExpectedDuration = Utils.expectedDuration(distribution);
- features.add(new Feature("expected_duration_difference", timeExpectedDuration - eventExpectedDuration));
- continue; // ignore multiple time units (almost never happens)
- }
+ HashSet<String> timeUnits = Utils.getTimeUnits(timeText);
+ for(String timeUnit : timeUnits) {
+ Map<String, Float> timeDistribution = Utils.convertToDistribution(timeUnit);
+ float timeExpectedDuration = Utils.expectedDuration(timeDistribution);
+ features.add(new Feature("duration_difference", timeExpectedDuration - eventExpectedDuration));
+ break; // for now only use firs time unit
+ }
return features;
}
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
Fri Apr 4 15:45:34 2014
@@ -50,8 +50,8 @@ public class PreserveCertainEventEventRe
String event1Text;
String event2Text;
if(arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof
EventMention) {
- event1Text = Utils.getText(jCas, arg1.getArgument());
- event2Text = Utils.getText(jCas, arg2.getArgument());
+ event1Text = Utils.normalizeEventText(jCas, arg1.getArgument());
+ event2Text = Utils.normalizeEventText(jCas, arg2.getArgument());
} else {
// this is not an event-event relation
continue;
@@ -69,7 +69,7 @@ public class PreserveCertainEventEventRe
// remove events (that didn't participate in relations) that have no data
for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class)))
{
- String mentionText = Utils.getText(jCas, mention);
+ String mentionText = Utils.normalizeEventText(jCas, mention);
if(textToDistribution.containsKey(mentionText)) {
// these are the kind we keep
continue;
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
Fri Apr 4 15:45:34 2014
@@ -2,9 +2,9 @@ package org.apache.ctakes.temporal.durat
import java.io.File;
import java.io.IOException;
+import java.util.HashSet;
import java.util.Map;
-import org.apache.ctakes.temporal.duration.Utils.Callback;
import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
@@ -12,12 +12,9 @@ import org.apache.ctakes.typesystem.type
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
-import org.threeten.bp.temporal.TemporalUnit;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.util.JCasUtil;
-import scala.collection.immutable.Set;
-
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
@@ -36,7 +33,7 @@ public class PreserveCertainEventTimeRel
File durationLookup = new File(Utils.durationDistributionPath);
Map<String, Map<String, Float>> textToDistribution = null;
try {
- textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
+ textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
} catch(IOException e) {
e.printStackTrace();
return;
@@ -58,21 +55,21 @@ public class PreserveCertainEventTimeRel
String timeText;
if(arg1.getArgument() instanceof TimeMention && arg2.getArgument() instanceof
EventMention) {
timeText = arg1.getArgument().getCoveredText().toLowerCase();
- eventText = arg2.getArgument().getCoveredText().toLowerCase();
+ eventText = Utils.normalizeEventText(jCas, arg2.getArgument());
} else if(arg1.getArgument() instanceof EventMention && arg2.getArgument()
instanceof TimeMention) {
- eventText = arg1.getArgument().getCoveredText().toLowerCase();
+ eventText = Utils.normalizeEventText(jCas, arg1.getArgument());
timeText = arg2.getArgument().getCoveredText().toLowerCase();
} else {
// this is not a event-time relation
continue;
}
- Set<TemporalUnit> units = Utils.runTimexParser(timeText);
- if(textToDistribution.containsKey(eventText) && units != null) {
+ HashSet<String> timeUnits = Utils.getTimeUnits(timeText);
+ if(textToDistribution.containsKey(eventText) && timeUnits.size() > 0) {
// there is duration information and we are able to get time units, so keep this
continue;
}
-
+
arg1.removeFromIndexes();
arg2.removeFromIndexes();
relation.removeFromIndexes();
@@ -80,7 +77,8 @@ public class PreserveCertainEventTimeRel
// remove events (that didn't participate in relations) that have no data
for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class)))
{
- if(textToDistribution.containsKey(mention.getCoveredText().toLowerCase())) {
+ String eventText = Utils.normalizeEventText(jCas, mention);
+ if(textToDistribution.containsKey(eventText)) {
// these are the kind we keep
continue;
}
@@ -89,10 +87,8 @@ public class PreserveCertainEventTimeRel
// finally remove time expressions (that didn't participate in relations) that have no
data
for(TimeMention mention : Lists.newArrayList(JCasUtil.select(goldView, TimeMention.class)))
{
- String timeText = mention.getCoveredText().toLowerCase();
- Set<TemporalUnit> units = Utils.runTimexParser(timeText);
- if(units != null) {
- // these are the kind we keep
+ HashSet<String> timeUnits = Utils.getTimeUnits(mention.getCoveredText().toLowerCase());
+ if(timeUnits.size() > 0) {
continue;
}
mention.removeFromIndexes();
Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Fri Apr 4 15:45:34 2014
@@ -52,9 +52,9 @@ public class Utils {
public static final String[] bins = {"second", "minute", "hour", "day", "week", "month",
"year", "decade"};
/**
- * Extract time unit(s) from a temporal expression.
- * Extracted time units should be a subset of the bins above.
- * Return empty set if time units couldnot be extracted.
+ * Extract time unit(s) from a temporal expression
+ * and put in one of the eight bins above.
+ * Return empty set if time units could not be extracted.
* E.g. July 5, 1984 -> day
*/
public static HashSet<String> getTimeUnits(String timex) {
@@ -258,12 +258,10 @@ public class Utils {
}
/**
- * Check if the annotation is a UMLS concept. If it is, return as is.
- * Otherwise, lemmatize this annotation if this is a verb.
- * Return as is if not verb.
+ * Keep UMLS concepts and non-verbs intact. Lemmatize verbs.
* Lowercase before returning.
*/
- public static String getText(JCas jCas, Annotation annotation)
+ public static String normalizeEventText(JCas jCas, Annotation annotation)
throws AnalysisEngineProcessException {
JCas systemView;
|