ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stevenbeth...@apache.org
Subject svn commit: r1397326 [1/2] - in /incubator/ctakes/trunk: ./ ctakes-temporal/ ctakes-temporal/.settings/ ctakes-temporal/src/ ctakes-temporal/src/main/ ctakes-temporal/src/main/java/ ctakes-temporal/src/main/java/org/ ctakes-temporal/src/main/java/org/a...
Date Thu, 11 Oct 2012 21:33:05 GMT
Author: stevenbethard
Date: Thu Oct 11 21:33:04 2012
New Revision: 1397326

URL: http://svn.apache.org/viewvc?rev=1397326&view=rev
Log:
Adds ctakes-temporal module

Added:
    incubator/ctakes/trunk/ctakes-temporal/   (with props)
    incubator/ctakes/trunk/ctakes-temporal/.classpath   (with props)
    incubator/ctakes/trunk/ctakes-temporal/.project   (with props)
    incubator/ctakes/trunk/ctakes-temporal/.settings/
    incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs
    incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs
    incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs
    incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs
    incubator/ctakes/trunk/ctakes-temporal/pom.xml   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/
    incubator/ctakes/trunk/ctakes-temporal/src/main/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java   (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/PrintRelations.java   (with props)
Modified:
    incubator/ctakes/trunk/pom.xml

Propchange: incubator/ctakes/trunk/ctakes-temporal/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Thu Oct 11 21:33:04 2012
@@ -0,0 +1 @@
+target

Added: incubator/ctakes/trunk/ctakes-temporal/.classpath
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.classpath?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.classpath (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.classpath Thu Oct 11 21:33:04 2012
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>

Propchange: incubator/ctakes/trunk/ctakes-temporal/.classpath
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/ctakes/trunk/ctakes-temporal/.project
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.project?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.project (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.project Thu Oct 11 21:33:04 2012
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>ctakes-temporal</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>

Propchange: incubator/ctakes/trunk/ctakes-temporal/.project
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.core.resources.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,4 @@
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding//src/test/java=UTF-8
+encoding/<project>=UTF-8

Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.jdt.core.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.6

Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.ltk.core.refactoring.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,2 @@
+eclipse.preferences.version=1
+org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false

Added: incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs (added)
+++ incubator/ctakes/trunk/ctakes-temporal/.settings/org.eclipse.m2e.core.prefs Thu Oct 11 21:33:04 2012
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1

Added: incubator/ctakes/trunk/ctakes-temporal/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/pom.xml?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/pom.xml (added)
+++ incubator/ctakes/trunk/ctakes-temporal/pom.xml Thu Oct 11 21:33:04 2012
@@ -0,0 +1,78 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<artifactId>ctakes-temporal</artifactId>
+    <packaging>jar</packaging>
+    <name>Apache cTAKES Temporal Information Extraction</name>
+	<parent>
+		<groupId>org.apache.ctakes</groupId>
+		<artifactId>ctakes</artifactId>
+		<version>3.0.0-SNAPSHOT</version>
+	</parent>
+	<dependencies>
+		<dependency>
+			<groupId>org.jdom</groupId>
+			<artifactId>jdom2</artifactId>
+		</dependency>
+        <dependency>
+            <groupId>com.lexicalscope.jewelcli</groupId>
+            <artifactId>jewelcli</artifactId>
+        </dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-util</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-ml</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-ml-opennlp-maxent</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-ml-libsvm</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-eval</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.cleartk</groupId>
+			<artifactId>cleartk-timeml</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-type-system</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-core</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-context-tokenizer</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-pos-tagger</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-chunker</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-dictionary-lookup</artifactId>
+		</dependency>
+			<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-lvg</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.ctakes</groupId>
+			<artifactId>ctakes-dependency-parser</artifactId>
+		</dependency>
+	</dependencies>
+</project>
\ No newline at end of file

Propchange: incubator/ctakes/trunk/ctakes-temporal/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/pom.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.JarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class DocTimeRelAnnotator extends CleartkAnnotator<String> {
+
+  public static AnalysisEngineDescription createDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      File outputDirectory) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        DocTimeRelAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        DocTimeRelAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        JarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+  }
+
+  private CleartkExtractor contextExtractor;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    CombinedExtractor baseExtractor = new CombinedExtractor(
+        new CoveredTextExtractor(),
+        new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+    this.contextExtractor = new CleartkExtractor(
+        BaseToken.class,
+        baseExtractor,
+        new Preceding(3),
+        new Covered(),
+        new Following(3));
+  }
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
+      List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+      if (this.isTraining()) {
+        String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
+        this.dataWriter.write(new Instance<String>(outcome, features));
+      } else {
+        String outcome = this.classifier.classify(features);
+        eventMention.getEvent().getProperties().setDocTimeRel(outcome);
+      }
+    }
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.chunking.BIOChunking;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.JarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Predicate;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.io.LineProcessor;
+
+public class EventAnnotator extends CleartkAnnotator<String> {
+
+  public static AnalysisEngineDescription createDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      File outputDirectory) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        EventAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        EventAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        JarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+  }
+
+  protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
+
+  protected List<CleartkExtractor> contextFeatureExtractors;
+
+  private BIOChunking<BaseToken, EntityMention> entityChunking;
+
+  private BIOChunking<BaseToken, EventMention> eventChunking;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+
+    // define chunkings
+    this.entityChunking = new BIOChunking<BaseToken, EntityMention>(
+        BaseToken.class,
+        EntityMention.class,
+        "typeID");
+    this.eventChunking = new BIOChunking<BaseToken, EventMention>(
+        BaseToken.class,
+        EventMention.class);
+
+    // add features: word, stem, pos and more
+    this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+    // try {
+    this.tokenFeatureExtractors.addAll(Arrays.asList(
+        new CoveredTextExtractor(),
+        new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+        new TypePathExtractor(BaseToken.class, "partOfSpeech")));
+    // new SRLExtractor(),
+    // new CoveredTextToValuesExtractor("ACF", parseStringDoublesMap("/word_freq.lst")),
+    // new CoveredTextToValuesExtractor("PCA", parseStringDoublesMap("/word_pca.lst")),
+    // new CoveredTextToValuesExtractor("TimPCA", parseStringDoublesMap("/tim_word_pca.txt")),
+    // new PhraseExtractor()));
+    // } catch (IOException e) {
+    // throw new ResourceInitializationException(e);
+    // }
+
+    // add window of features before and after
+    CombinedExtractor subExtractor = new CombinedExtractor(
+        new CoveredTextExtractor(),
+        new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+    // new SRLExtractor(),
+    // new PhraseExtractor());
+    this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+    this.contextFeatureExtractors.add(new CleartkExtractor(
+        BaseToken.class,
+        subExtractor,
+        new Preceding(3),
+        new Following(3)));
+  }
+
+  // private static Map<String, double[]> parseStringDoublesMap(String resourcePath)
+  // throws IOException {
+  // StringToDoublesProcessor processor = new StringToDoublesProcessor();
+  // URL url = EventAnnotator.class.getResource(resourcePath);
+  // return Resources.readLines(url, Charsets.US_ASCII, processor);
+  // }
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    // classify tokens within each sentence
+    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+
+      // during training, the list of all outcomes for the tokens
+      List<String> outcomes;
+      if (this.isTraining()) {
+        List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, sentence);
+        outcomes = this.eventChunking.createOutcomes(jCas, tokens, events);
+      }
+      // during prediction, the list of outcomes predicted so far
+      else {
+        outcomes = new ArrayList<String>();
+      }
+
+      // get BIO entity tags for each entity type
+      int[] entityTypeIDs = new int[] {
+          CONST.NE_TYPE_ID_ANATOMICAL_SITE,
+          CONST.NE_TYPE_ID_DISORDER,
+          CONST.NE_TYPE_ID_DRUG,
+          CONST.NE_TYPE_ID_FINDING,
+          CONST.NE_TYPE_ID_PROCEDURE,
+          CONST.NE_TYPE_ID_UNKNOWN };
+      List<EntityMention> entities = JCasUtil.selectCovered(jCas, EntityMention.class, sentence);
+      Map<Integer, List<String>> entityTagsByType = new HashMap<Integer, List<String>>();
+      for (int typeID : entityTypeIDs) {
+        Predicate<EntityMention> hasTypeID = hasEntityType(typeID);
+        List<EntityMention> subEntities = Lists.newArrayList(Iterables.filter(entities, hasTypeID));
+        entityTagsByType.put(typeID, this.entityChunking.createOutcomes(jCas, tokens, subEntities));
+      }
+
+      // extract features for all tokens
+      int tokenIndex = -1;
+      int window = 2;
+      for (BaseToken token : tokens) {
+        ++tokenIndex;
+
+        List<Feature> features = new ArrayList<Feature>();
+        // features from token attributes
+        for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
+          features.addAll(extractor.extract(jCas, token));
+        }
+        // features from surrounding tokens
+        for (CleartkExtractor extractor : this.contextFeatureExtractors) {
+          features.addAll(extractor.extractWithin(jCas, token, sentence));
+        }
+        // features from surrounding entities
+        for (int typeID : entityTypeIDs) {
+          List<String> tokenEntityTags = entityTagsByType.get(typeID);
+          int begin = Math.max(tokenIndex - window, 0);
+          int end = Math.min(tokenIndex + window, tokenEntityTags.size());
+          for (int i = begin; i < end; ++i) {
+            String name = String.format("EntityTag_%d_%d", typeID, i - begin);
+            features.add(new Feature(name, tokenEntityTags.get(i)));
+          }
+        }
+        // features from previous classifications
+        int nPreviousClassifications = 2;
+        for (int i = nPreviousClassifications; i > 0; --i) {
+          int index = tokenIndex - i;
+          String previousOutcome = index < 0 ? "O" : outcomes.get(index);
+          features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
+        }
+        // if training, write to data file
+        if (this.isTraining()) {
+          String outcome = outcomes.get(tokenIndex);
+          this.dataWriter.write(new Instance<String>(outcome, features));
+        }
+
+        // if predicting, add prediction to outcomes
+        else {
+          outcomes.add(this.classifier.classify(features));
+        }
+      }
+
+      // during prediction, convert chunk labels to events and add them to the CAS
+      if (!this.isTraining()) {
+        this.eventChunking.createChunks(jCas, tokens, outcomes);
+      }
+    }
+  }
+
+  private static Predicate<EntityMention> hasEntityType(final int typeID) {
+    return new Predicate<EntityMention>() {
+      public boolean apply(EntityMention mention) {
+        return mention.getTypeID() == typeID;
+      }
+    };
+  }
+
+  private static class StringToDoublesProcessor implements LineProcessor<Map<String, double[]>> {
+    private Logger logger = Logger.getLogger(this.getClass().getName());
+
+    private Map<String, double[]> result = new HashMap<String, double[]>();
+
+    private int length = -1;
+
+    @Override
+    public Map<String, double[]> getResult() {
+      return this.result;
+    }
+
+    @Override
+    public boolean processLine(String line) throws IOException {
+      String[] parts = line.trim().split(",");
+      String key = parts[0];
+      int partsOffset = 0;
+      if (this.length == -1) {
+        this.length = parts.length;
+      } else if (parts.length != this.length) {
+        String message = "expected %d parts, found %d, skipping line '%s'";
+        this.logger.warning(String.format(message, this.length, parts.length, line));
+        return true;
+      }
+      double[] values = new double[parts.length - 1];
+      for (int i = 0; i < values.length; ++i) {
+        values[i] = Double.parseDouble(parts[i + 1 + partsOffset]);
+      }
+      this.result.put(key, values);
+      return true;
+    }
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.net.URI;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+
+public class THYMEKnowtatorXMLReader extends SHARPKnowtatorXMLReader {
+
+  public static final String PARAM_KNOWTATOR_XML_DIRECTORY = "knowtatorXMLDirectory";
+
+  @ConfigurationParameter(name = PARAM_KNOWTATOR_XML_DIRECTORY, mandatory = true)
+  protected File knowtatorXMLDirectory;
+
+  public static AnalysisEngineDescription getDescription(File knowtatorXMLDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        THYMEKnowtatorXMLReader.class,
+        THYMEKnowtatorXMLReader.PARAM_KNOWTATOR_XML_DIRECTORY,
+        knowtatorXMLDirectory);
+  }
+
+  @Override
+  protected URI getKnowtatorXML(JCas jCas) throws AnalysisEngineProcessException {
+    URI uri = ViewURIUtil.getURI(jCas);
+    File file = new File(uri.getPath());
+    String subDir = file.getParentFile().getName();
+    Matcher matcher = Pattern.compile("^doc(\\d+)$").matcher(subDir);
+    if (!matcher.matches()) {
+      throw new IllegalArgumentException("Unrecognized subdirectory naming: " + subDir);
+    }
+    subDir = String.format("Set%02d", Integer.parseInt(matcher.group(1)));
+    String fileName = file.getName() + ".knowtator.xml";
+    return new File(new File(this.knowtatorXMLDirectory, subDir), fileName).toURI();
+  }
+
+  @Override
+  protected String[] getAnnotatorNames() {
+    return new String[] { "consensus set annotator team", "consensus set_rel annotator team" };
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEKnowtatorXMLReader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.chunking.BIOChunking;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
+import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.JarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class TimeAnnotator extends CleartkAnnotator<String> {
+
+  public static AnalysisEngineDescription createDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      File outputDirectory) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        TimeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DefaultDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        TimeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        JarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+  }
+
+  protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
+
+  protected List<CleartkExtractor> contextFeatureExtractors;
+
+  private BIOChunking<BaseToken, TimeMention> timeChunking;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+
+    // define chunking
+    this.timeChunking = new BIOChunking<BaseToken, TimeMention>(BaseToken.class, TimeMention.class);
+
+    CombinedExtractor allExtractors = new CombinedExtractor(
+        new CoveredTextExtractor(),
+        new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
+        new TypePathExtractor(BaseToken.class, "partOfSpeech"));
+
+    this.tokenFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+    this.tokenFeatureExtractors.add(allExtractors);
+
+    this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
+    this.contextFeatureExtractors.add(new CleartkExtractor(
+        BaseToken.class,
+        allExtractors,
+        new Preceding(3),
+        new Following(3)));
+  }
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    // classify tokens within each sentence
+    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+
+      // during training, the list of all outcomes for the tokens
+      List<String> outcomes;
+      if (this.isTraining()) {
+        List<TimeMention> times = JCasUtil.selectCovered(jCas, TimeMention.class, sentence);
+        outcomes = this.timeChunking.createOutcomes(jCas, tokens, times);
+      }
+      // during prediction, the list of outcomes predicted so far
+      else {
+        outcomes = new ArrayList<String>();
+      }
+
+      // extract features for all tokens
+      int tokenIndex = -1;
+      for (BaseToken token : tokens) {
+        ++tokenIndex;
+
+        List<Feature> features = new ArrayList<Feature>();
+        // features from token attributes
+        for (SimpleFeatureExtractor extractor : this.tokenFeatureExtractors) {
+          features.addAll(extractor.extract(jCas, token));
+        }
+        // features from surrounding tokens
+        for (CleartkExtractor extractor : this.contextFeatureExtractors) {
+          features.addAll(extractor.extractWithin(jCas, token, sentence));
+        }
+        // features from previous classifications
+        int nPreviousClassifications = 2;
+        for (int i = nPreviousClassifications; i > 0; --i) {
+          int index = tokenIndex - i;
+          String previousOutcome = index < 0 ? "O" : outcomes.get(index);
+          features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
+        }
+        // if training, write to data file
+        if (this.isTraining()) {
+          String outcome = outcomes.get(tokenIndex);
+          this.dataWriter.write(new Instance<String>(outcome, features));
+        }
+
+        // if predicting, add prediction to outcomes
+        else {
+          outcomes.add(this.classifier.classify(features));
+        }
+      }
+
+      // during prediction, convert chunk labels to times and add them to the CAS
+      if (!this.isTraining()) {
+        this.timeChunking.createChunks(jCas, tokens, outcomes);
+      }
+    }
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+
+public class CoveredTextToValuesExtractor implements SimpleFeatureExtractor {
+
+  private String name;
+
+  private Map<String, double[]> textDoublesMap;
+
+  private double[] meanValues;
+
+  public CoveredTextToValuesExtractor(String name, Map<String, double[]> textDoublesMap) {
+    super();
+    this.name = name;
+    this.textDoublesMap = textDoublesMap;
+    int nMapEntries = this.textDoublesMap.size();
+    if (nMapEntries == 0) {
+      throw new IllegalArgumentException("textDoublesMap cannot be empty");
+    }
+    int nValues = textDoublesMap.entrySet().iterator().next().getValue().length;
+    this.meanValues = new double[nValues];
+    for (double[] values : textDoublesMap.values()) {
+      for (int i = 0; i < values.length; ++i) {
+        this.meanValues[i] += values[i];
+      }
+    }
+    for (int i = 0; i < this.meanValues.length; ++i) {
+      this.meanValues[i] /= nMapEntries;
+    }
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException {
+    double[] values = this.textDoublesMap.get(annotation.getCoveredText());
+    if (values == null) {
+      values = this.meanValues;
+    }
+    ArrayList<Feature> features = new ArrayList<Feature>();
+    for (int i = 0; i < values.length; ++i) {
+      String name = Feature.createName(this.name, String.valueOf(i));
+      features.add(new Feature(name, values[i]));
+    }
+    return features;
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/CoveredTextToValuesExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class PhraseExtractor implements SimpleFeatureExtractor {
+
+  @Override
+  public List<Feature> extract(JCas jCas, Annotation token) throws CleartkExtractorException {
+    String featureValue = "NotNPVP";
+    for (Chunk chunk : JCasUtil.selectCovered(jCas, Chunk.class, token)) {
+      String chunkType = chunk.getChunkType();
+      if (chunkType.equals("NP")) {
+        featureValue = "NP";
+        break;
+      } else if (chunkType.equals("VP")) {
+        featureValue = "VP";
+        break;
+      }
+    }
+    return Collections.singletonList(new Feature(featureValue));
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/PhraseExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.Predicate;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class SRLExtractor implements SimpleFeatureExtractor {
+
+  @Override
+  public List<Feature> extract(JCas jCas, Annotation focusAnnotation)
+      throws CleartkExtractorException {
+    // TODO: don't iterate over the entire CAS for each focusAnnotation; use JCasUtil.indexCovering
+    // and cache the results so that we only do this once per CAS
+
+    Feature feature = new Feature("NoRole");
+    for (Predicate predicate : JCasUtil.select(jCas, Predicate.class)) {
+
+      for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, predicate)) {
+        if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
+          feature = new Feature("Predicate");
+          // System.out.println("*******************\tPredicate is :"+ predicate.getCoveredText());
+          return Collections.singletonList(feature);
+        }
+      }
+
+      for (SemanticRoleRelation relation : JCasUtil.select(
+          predicate.getRelations(),
+          SemanticRoleRelation.class)) {
+        SemanticArgument arg = relation.getArgument();
+        // System.out.format("\tArg: %s=%s \n", arg.getLabel(), arg.getCoveredText());
+        for (BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg)) {
+          if (token.equals(focusAnnotation)) {// token.getBegin()==focusAnnotation.getBegin()){
+            String label = arg.getLabel();
+            feature = new Feature(label);
+            // System.out.println("*******************\tfeature is :");
+            return Collections.singletonList(feature);
+          }
+        }
+      }
+    }
+
+    return Collections.singletonList(feature);
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasCopier;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+public class AnnotationCopier extends JCasAnnotator_ImplBase {
+
+  public static AnalysisEngineDescription getDescription(
+      String sourceView,
+      String targetView,
+      Class<? extends TOP> annotationClass) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        AnnotationCopier.class,
+        PARAM_SOURCE_VIEW,
+        sourceView,
+        PARAM_TARGET_VIEW,
+        targetView,
+        PARAM_ANNOTATION_CLASS,
+        annotationClass);
+  }
+
+  public static final String PARAM_SOURCE_VIEW = "SourceView";
+
+  @ConfigurationParameter(name = PARAM_SOURCE_VIEW, mandatory = true)
+  private String sourceView;
+
+  public static final String PARAM_TARGET_VIEW = "TargetView";
+
+  @ConfigurationParameter(name = PARAM_TARGET_VIEW, mandatory = true)
+  private String targetView;
+
+  public static final String PARAM_ANNOTATION_CLASS = "AnnotationClass";
+
+  @ConfigurationParameter(name = PARAM_ANNOTATION_CLASS, mandatory = true)
+  private Class<? extends TOP> annotationClass;
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    JCas sourceView, targetView;
+    try {
+      sourceView = jCas.getView(this.sourceView);
+      targetView = jCas.getView(this.targetView);
+    } catch (CASException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+    CasCopier copier = new CasCopier(sourceView.getCas(), targetView.getCas());
+    for (TOP annotation : JCasUtil.select(sourceView, this.annotationClass)) {
+      TOP copy = (TOP) copier.copyFs(annotation);
+      // CasCopier does not change sofa of annotation; without the code below, you get the error:
+      // the Annotation "..." is over view "GoldView" and cannot be added to indexes associated
+      // with the different view "_InitialView".
+      Feature sofaFeature = copy.getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_SOFA);
+      if (sofaFeature != null) {
+        copy.setFeatureValue(sofaFeature, targetView.getSofa());
+      }
+      copy.addToIndexes();
+    }
+
+  }
+
+}
\ No newline at end of file

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class CommandLine {
+
+  public static class IntegerRanges {
+
+    private List<Integer> items = new ArrayList<Integer>();
+
+    public List<Integer> getList() {
+      return this.items;
+    }
+
+    public IntegerRanges(String string) {
+      for (String part : string.split("\\s*,\\s*")) {
+        Matcher matcher = Pattern.compile("(\\d+)-(\\d+)").matcher(part);
+        if (matcher.matches()) {
+          int begin = Integer.parseInt(matcher.group(1));
+          int end = Integer.parseInt(matcher.group(2));
+          for (int i = begin; i <= end; ++i) {
+            this.items.add(i);
+          }
+        } else {
+          this.items.add(Integer.parseInt(part));
+        }
+      }
+    }
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/CommandLine.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.util.ViewURIUtil;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Ordering;
+
+public abstract class EvaluationOfAnnotationSpans_ImplBase extends
+    Evaluation_ImplBase<AnnotationStatistics<String>> {
+
+  private final Logger logger = Logger.getLogger(this.getClass().getName());
+
+  public void setLogging(Level level, File outputFile) throws IOException {
+    if (!outputFile.getParentFile().exists()) {
+      outputFile.getParentFile().mkdirs();
+    }
+    this.logger.setLevel(level);
+    FileHandler handler = new FileHandler(outputFile.getPath());
+    handler.setFormatter(new Formatter() {
+      @Override
+      public String format(LogRecord record) {
+        return record.getMessage() + '\n';
+      }
+    });
+    this.logger.addHandler(handler);
+  }
+
+  public EvaluationOfAnnotationSpans_ImplBase(
+      File baseDirectory,
+      File rawTextDirectory,
+      File knowtatorXMLDirectory,
+      List<Integer> patientSets,
+      Set<AnnotatorType> annotatorFlags) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, patientSets, annotatorFlags);
+  }
+
+  protected abstract AnalysisEngineDescription getDataWriterDescription(File directory)
+      throws ResourceInitializationException;
+
+  protected abstract void trainAndPackage(File directory) throws Exception;
+
+  @Override
+  protected void train(CollectionReader collectionReader, File directory) throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(this.getPreprocessorTrainDescription());
+    aggregateBuilder.add(this.getDataWriterDescription(directory));
+    SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+    this.trainAndPackage(directory);
+  }
+
+  protected abstract AnalysisEngineDescription getAnnotatorDescription(File directory)
+      throws ResourceInitializationException;
+
+  protected abstract Collection<? extends Annotation> getGoldAnnotations(JCas jCas);
+
+  protected abstract Collection<? extends Annotation> getSystemAnnotations(JCas jCas);
+
+  @Override
+  protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
+      throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(this.getPreprocessorTestDescription());
+    aggregateBuilder.add(this.getAnnotatorDescription(directory));
+
+    AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
+    Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
+        new Function<Annotation, List<Integer>>() {
+          @Override
+          public List<Integer> apply(Annotation annotation) {
+            return Arrays.asList(annotation.getBegin(), annotation.getEnd());
+          }
+        });
+    for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+      JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+      JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+      Collection<? extends Annotation> goldAnnotations = this.getGoldAnnotations(goldView);
+      Collection<? extends Annotation> systemAnnotations = this.getSystemAnnotations(systemView);
+      stats.add(goldAnnotations, systemAnnotations);
+
+      Set<Annotation> goldSet = new TreeSet<Annotation>(bySpans);
+      goldSet.addAll(goldAnnotations);
+      Set<Annotation> systemSet = new TreeSet<Annotation>(bySpans);
+      systemSet.addAll(systemAnnotations);
+
+      Set<Annotation> goldOnly = new TreeSet<Annotation>(bySpans);
+      goldOnly.addAll(goldSet);
+      goldOnly.removeAll(systemSet);
+
+      Set<Annotation> systemOnly = new TreeSet<Annotation>(bySpans);
+      systemOnly.addAll(systemSet);
+      systemOnly.removeAll(goldSet);
+
+      String text = jCas.getDocumentText().replaceAll("[\r\n]", " ");
+      if (!goldOnly.isEmpty() || !systemOnly.isEmpty()) {
+        this.logger.fine("Errors in : " + ViewURIUtil.getURI(jCas).toString());
+        Set<Annotation> errors = new TreeSet<Annotation>(bySpans);
+        errors.addAll(goldOnly);
+        errors.addAll(systemOnly);
+        for (Annotation annotation : errors) {
+          int begin = annotation.getBegin();
+          int end = annotation.getEnd();
+          int windowBegin = Math.max(0, begin - 50);
+          int windowEnd = Math.min(text.length(), end + 50);
+          String label = goldOnly.contains(annotation) ? "DROPPED:" : "ADDED:  ";
+          this.logger.fine(String.format(
+              "%s  ...%s[!%s!]%s...",
+              label,
+              text.substring(windowBegin, begin),
+              text.substring(begin, end),
+              text.substring(end, windowEnd)));
+        }
+      }
+    }
+    return stats;
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.syntax.opennlp.ParserAnnotator;
+import org.cleartk.syntax.opennlp.PosTaggerAnnotator;
+import org.cleartk.syntax.opennlp.SentenceAnnotator;
+import org.cleartk.timeml.event.EventAnnotator;
+import org.cleartk.timeml.type.Event;
+import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
+import org.cleartk.token.tokenizer.TokenAnnotator;
+import org.uimafit.component.NoOpAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfClearTKEventSpans extends EvaluationOfAnnotationSpans_ImplBase {
+
+  public static void main(String[] args) throws Exception {
+    Options options = CliFactory.parseArguments(Options.class, args);
+    EvaluationOfClearTKEventSpans evaluation = new EvaluationOfClearTKEventSpans(
+        new File("target/eval"),
+        options.getRawTextDirectory(),
+        options.getKnowtatorXMLDirectory(),
+        options.getPatients().getList());
+    evaluation.setLogging(Level.FINE, new File("target/eval/cleartk-event-errors.log"));
+    List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(4);
+    for (AnnotationStatistics<String> stats : foldStats) {
+      System.err.println(stats);
+    }
+    System.err.println("OVERALL");
+    System.err.println(AnnotationStatistics.addAll(foldStats));
+  }
+
+  public EvaluationOfClearTKEventSpans(
+      File baseDirectory,
+      File rawTextDirectory,
+      File knowtatorXMLDirectory,
+      List<Integer> patientSets) {
+    super(
+        baseDirectory,
+        rawTextDirectory,
+        knowtatorXMLDirectory,
+        patientSets,
+        EnumSet.noneOf(AnnotatorType.class));
+  }
+
+  @Override
+  protected AnalysisEngineDescription getDataWriterDescription(File directory)
+      throws ResourceInitializationException {
+    // not training a model - just using the ClearTK one
+    return AnalysisEngineFactory.createPrimitiveDescription(NoOpAnnotator.class);
+  }
+
+  @Override
+  protected void trainAndPackage(File directory) throws Exception {
+    // not training a model - just using the ClearTK one
+  }
+
+  @Override
+  protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+      throws ResourceInitializationException {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(SentenceAnnotator.getDescription());
+    aggregateBuilder.add(TokenAnnotator.getDescription());
+    aggregateBuilder.add(PosTaggerAnnotator.getDescription());
+    aggregateBuilder.add(DefaultSnowballStemmer.getDescription("English"));
+    aggregateBuilder.add(ParserAnnotator.getDescription());
+    aggregateBuilder.add(EventAnnotator.FACTORY.getAnnotatorDescription());
+    return aggregateBuilder.createAggregateDescription();
+  }
+
+  @Override
+  protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
+    return JCasUtil.select(jCas, EventMention.class);
+  }
+
+  @Override
+  protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
+    return JCasUtil.select(jCas, Event.class);
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.logging.Level;
+
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.syntax.opennlp.PosTaggerAnnotator;
+import org.cleartk.syntax.opennlp.SentenceAnnotator;
+import org.cleartk.timeml.time.TimeAnnotator;
+import org.cleartk.timeml.type.Time;
+import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
+import org.cleartk.token.tokenizer.TokenAnnotator;
+import org.uimafit.component.NoOpAnnotator;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfClearTKTimeSpans extends EvaluationOfAnnotationSpans_ImplBase {
+
+  public static void main(String[] args) throws Exception {
+    Options options = CliFactory.parseArguments(Options.class, args);
+    EvaluationOfClearTKTimeSpans evaluation = new EvaluationOfClearTKTimeSpans(
+        new File("target/eval"),
+        options.getRawTextDirectory(),
+        options.getKnowtatorXMLDirectory(),
+        options.getPatients().getList());
+    evaluation.setLogging(Level.FINE, new File("target/eval/cleartk-time-errors.log"));
+    List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(4);
+    for (AnnotationStatistics<String> stats : foldStats) {
+      System.err.println(stats);
+    }
+    System.err.println("OVERALL");
+    System.err.println(AnnotationStatistics.addAll(foldStats));
+  }
+
+  public EvaluationOfClearTKTimeSpans(
+      File baseDirectory,
+      File rawTextDirectory,
+      File knowtatorXMLDirectory,
+      List<Integer> patientSets) {
+    super(
+        baseDirectory,
+        rawTextDirectory,
+        knowtatorXMLDirectory,
+        patientSets,
+        EnumSet.noneOf(AnnotatorType.class));
+  }
+
+  @Override
+  protected AnalysisEngineDescription getDataWriterDescription(File directory)
+      throws ResourceInitializationException {
+    // not training a model - just using the ClearTK one
+    return AnalysisEngineFactory.createPrimitiveDescription(NoOpAnnotator.class);
+  }
+
+  @Override
+  protected void trainAndPackage(File directory) throws Exception {
+    // not training a model - just using the ClearTK one
+  }
+
+  @Override
+  protected AnalysisEngineDescription getAnnotatorDescription(File directory)
+      throws ResourceInitializationException {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(SentenceAnnotator.getDescription());
+    aggregateBuilder.add(TokenAnnotator.getDescription());
+    aggregateBuilder.add(PosTaggerAnnotator.getDescription());
+    aggregateBuilder.add(DefaultSnowballStemmer.getDescription("English"));
+    aggregateBuilder.add(TimeAnnotator.FACTORY.getAnnotatorDescription());
+    return aggregateBuilder.createAggregateDescription();
+  }
+
+  @Override
+  protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
+    return JCasUtil.select(jCas, TimeMention.class);
+  }
+
+  @Override
+  protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
+    return JCasUtil.select(jCas, Time.class);
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java?rev=1397326&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java Thu Oct 11 21:33:04 2012
@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.pipeline.JCasIterable;
+import org.uimafit.pipeline.SimplePipeline;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class EvaluationOfEventProperties extends
+    Evaluation_ImplBase<Map<String, AnnotationStatistics<String>>> {
+
+  private static final String DOC_TIME_REL = "docTimeRel";
+
+  private static final List<String> PROPERTY_NAMES = Arrays.asList(DOC_TIME_REL);
+
+  public static void main(String[] args) throws Exception {
+    Options options = CliFactory.parseArguments(Options.class, args);
+    EvaluationOfEventProperties evaluation = new EvaluationOfEventProperties(
+        new File("target/eval"),
+        options.getRawTextDirectory(),
+        options.getKnowtatorXMLDirectory(),
+        options.getPatients().getList());
+    List<Map<String, AnnotationStatistics<String>>> foldStats = evaluation.crossValidation(4);
+    Map<String, AnnotationStatistics<String>> overallStats = new HashMap<String, AnnotationStatistics<String>>();
+    for (String name : PROPERTY_NAMES) {
+      overallStats.put(name, new AnnotationStatistics<String>());
+    }
+    for (Map<String, AnnotationStatistics<String>> propertyStats : foldStats) {
+      for (String key : propertyStats.keySet()) {
+        overallStats.get(key).addAll(propertyStats.get(key));
+      }
+    }
+    for (String name : PROPERTY_NAMES) {
+      System.err.println("====================");
+      System.err.println(name);
+      for (int i = 0; i < foldStats.size(); ++i) {
+        System.err.println("--------------------");
+        System.err.println("Fold " + i);
+        System.err.println(foldStats.get(i).get(name));
+      }
+      System.err.println("--------------------");
+      System.err.println("Overall");
+      System.err.println(overallStats.get(name));
+    }
+  }
+
+  public EvaluationOfEventProperties(
+      File baseDirectory,
+      File rawTextDirectory,
+      File knowtatorXMLDirectory,
+      List<Integer> patientSets) {
+    super(
+        baseDirectory,
+        rawTextDirectory,
+        knowtatorXMLDirectory,
+        patientSets,
+        EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+  }
+
+  @Override
+  protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime() {
+    List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
+    result.add(EventMention.class);
+    return result;
+  }
+
+  @Override
+  protected void train(CollectionReader collectionReader, File directory) throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(this.getPreprocessorTrainDescription());
+    aggregateBuilder.add(DocTimeRelAnnotator.createDataWriterDescription(
+        LIBSVMStringOutcomeDataWriter.class,
+        directory));
+    SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+    JarClassifierBuilder.trainAndPackage(directory, "-c", "1000");
+  }
+
+  @Override
+  protected Map<String, AnnotationStatistics<String>> test(
+      CollectionReader collectionReader,
+      File directory) throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(this.getPreprocessorTestDescription());
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearEventProperties.class));
+    aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription(directory));
+
+    Function<EventMention, ?> eventMentionToSpan = AnnotationStatistics.annotationToSpan();
+    Map<String, Function<EventMention, String>> propertyGetters;
+    propertyGetters = new HashMap<String, Function<EventMention, String>>();
+    for (String name : PROPERTY_NAMES) {
+      propertyGetters.put(name, getPropertyGetter(name));
+    }
+
+    Map<String, AnnotationStatistics<String>> statsMap = new HashMap<String, AnnotationStatistics<String>>();
+    statsMap.put(DOC_TIME_REL, new AnnotationStatistics<String>());
+    for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) {
+      JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+      JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+      Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
+      Collection<EventMention> systemEvents = JCasUtil.select(systemView, EventMention.class);
+      for (String name : PROPERTY_NAMES) {
+        statsMap.get(name).add(
+            goldEvents,
+            systemEvents,
+            eventMentionToSpan,
+            propertyGetters.get(name));
+      }
+    }
+    return statsMap;
+  }
+
+  private static Function<EventMention, String> getPropertyGetter(final String propertyName) {
+    return new Function<EventMention, String>() {
+      @Override
+      public String apply(EventMention eventMention) {
+        EventProperties eventProperties = eventMention.getEvent().getProperties();
+        Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName);
+        return eventProperties.getFeatureValueAsString(feature);
+      }
+    };
+  }
+
+  public static class ClearEventProperties extends JCasAnnotator_ImplBase {
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) {
+        eventProperties.setAspect(null);
+        eventProperties.setCategory(null);
+        eventProperties.setContextualAspect(null);
+        eventProperties.setContextualModality(null);
+        eventProperties.setDegree(null);
+        eventProperties.setDocTimeRel(null);
+        eventProperties.setPermanence(null);
+        eventProperties.setPolarity(0);
+      }
+    }
+
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message