ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stevenbeth...@apache.org
Subject svn commit: r1394261 - in /incubator/ctakes/trunk: ./ ctakes-core/ ctakes-core/src/main/java/org/apache/ctakes/core/ae/ ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/
Date Thu, 04 Oct 2012 20:55:06 GMT
Author: stevenbethard
Date: Thu Oct  4 20:55:06 2012
New Revision: 1394261

URL: http://svn.apache.org/viewvc?rev=1394261&view=rev
Log:
Adds first draft of Knowtator XML reader for SHARP annotations

Added:
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
  (with props)
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java
  (with props)
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java
  (with props)
Modified:
    incubator/ctakes/trunk/ctakes-core/pom.xml
    incubator/ctakes/trunk/pom.xml

Modified: incubator/ctakes/trunk/ctakes-core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/pom.xml?rev=1394261&r1=1394260&r2=1394261&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/pom.xml (original)
+++ incubator/ctakes/trunk/ctakes-core/pom.xml Thu Oct  4 20:55:06 2012
@@ -24,6 +24,14 @@
 			<artifactId>jdom</artifactId>
 		</dependency>
 		<dependency>
+			<groupId>org.jdom</groupId>
+			<artifactId>jdom2</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>com.google.guava</groupId>
+			<artifactId>guava</artifactId>
+		</dependency>
+		<dependency>
 			<groupId>log4j</groupId>
 			<artifactId>log4j</artifactId>
 		</dependency>

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java?rev=1394261&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
(added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
Thu Oct  4 20:55:06 2012
@@ -0,0 +1,532 @@
+package org.apache.ctakes.core.ae;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.core.knowtator.KnowtatorAnnotation;
+import org.apache.ctakes.core.knowtator.KnowtatorXMLParser;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.BodySide;
+import org.apache.ctakes.typesystem.type.refsem.Course;
+import org.apache.ctakes.typesystem.type.refsem.Event;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
+import org.apache.ctakes.typesystem.type.refsem.Severity;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.jdom2.JDOMException;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+
+public abstract class SHARPKnowtatorXMLReader extends JCasAnnotator_ImplBase {
+
+  public static final String PARAM_KNOWTATOR_XML_DIRECTORY = "knowtatorXMLDirectory";
+
+  @ConfigurationParameter(name = PARAM_KNOWTATOR_XML_DIRECTORY, mandatory = true)
+  protected File knowtatorXMLDirectory;
+
+  /**
+   * Given the URI of the plain text file, determines the URI of the Knowtator XML file
+   */
+  protected abstract URI getKnowtatorXML(JCas jCas) throws AnalysisEngineProcessException;
+
+  /**
+   * Returns the names of the annotators in the Knowtator files that represent the gold standard
+   */
+  protected abstract String[] getAnnotatorNames();
+
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+    // determine Knowtator XML file from the CAS
+    URI knowtatorXML = this.getKnowtatorXML(jCas);
+
+    // parse the Knowtator XML file into annotation objects
+    KnowtatorXMLParser parser = new KnowtatorXMLParser(this.getAnnotatorNames());
+    Collection<KnowtatorAnnotation> annotations;
+    try {
+      annotations = parser.parse(knowtatorXML);
+    } catch (JDOMException e) {
+      throw new AnalysisEngineProcessException(e);
+    } catch (IOException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+
+    // the relation types
+    Set<String> entityRelationTypes = new HashSet<String>();
+    entityRelationTypes.add("location_of");
+    entityRelationTypes.add("degree_of");
+    Set<String> eventRelationTypes = new HashSet<String>();
+    eventRelationTypes.add("TLINK");
+    eventRelationTypes.add("ALINK");
+
+    // create a CAS object for each annotation
+    Map<String, Annotation> idAnnotationMap = new HashMap<String, Annotation>();
+    Map<String, TOP> idTopMap = new HashMap<String, TOP>();
+    List<DelayedRelation> delayedRelations = new ArrayList<DelayedRelation>();
+    List<DelayedFeature<?>> delayedFeatures = new ArrayList<DelayedFeature<?>>();
+    for (KnowtatorAnnotation annotation : annotations) {
+
+      // copy the slots so we can remove them as we use them
+      Map<String, String> stringSlots = new HashMap<String, String>(annotation.stringSlots);
+      Map<String, Boolean> booleanSlots = new HashMap<String, Boolean>(annotation.booleanSlots);
+      Map<String, KnowtatorAnnotation> annotationSlots = new HashMap<String, KnowtatorAnnotation>(
+          annotation.annotationSlots);
+      KnowtatorAnnotation.Span coveringSpan = annotation.getCoveringSpan();
+
+      if ("Anatomical_site".equals(annotation.type)) {
+        EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
+        addEntityMentionFeatures(
+            annotation,
+            entityMention,
+            jCas,
+            CONST.NE_TYPE_ID_ANATOMICAL_SITE,
+            stringSlots,
+            booleanSlots,
+            annotationSlots,
+            idAnnotationMap,
+            delayedFeatures);
+        KnowtatorAnnotation bodySide = annotationSlots.remove("body_side");
+        if (bodySide != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, bodySide)
{
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setBodySide(...)
+            }
+          });
+        }
+        KnowtatorAnnotation bodyLaterality = annotationSlots.remove("body_laterality");
+        if (bodyLaterality != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, bodyLaterality)
{
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setBodyLaterality(...)
+            }
+          });
+        }
+
+      } else if ("Disease_Disorder".equals(annotation.type)) {
+        EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
+        addEntityMentionFeatures(
+            annotation,
+            entityMention,
+            jCas,
+            CONST.NE_TYPE_ID_DISORDER,
+            stringSlots,
+            booleanSlots,
+            annotationSlots,
+            idAnnotationMap,
+            delayedFeatures);
+        KnowtatorAnnotation bodyLocation = annotationSlots.remove("body_location");
+        if (bodyLocation != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, bodyLocation)
{
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setBodyLocation(...)
+            }
+          });
+        }
+        KnowtatorAnnotation severity = annotationSlots.remove("severity");
+        if (severity != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, severity)
{
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setSeverity(...)
+            }
+          });
+        }
+
+      } else if ("Medications/Drugs".equals(annotation.type)) {
+        EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
+        addEntityMentionFeatures(
+            annotation,
+            entityMention,
+            jCas,
+            CONST.NE_TYPE_ID_DRUG,
+            stringSlots,
+            booleanSlots,
+            annotationSlots,
+            idAnnotationMap,
+            delayedFeatures);
+
+      } else if ("Procedure".equals(annotation.type)) {
+        EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
+        addEntityMentionFeatures(
+            annotation,
+            entityMention,
+            jCas,
+            CONST.NE_TYPE_ID_PROCEDURE,
+            stringSlots,
+            booleanSlots,
+            annotationSlots,
+            idAnnotationMap,
+            delayedFeatures);
+
+      } else if ("Sign_symptom".equals(annotation.type)) {
+        EntityMention entityMention = new EntityMention(jCas, coveringSpan.begin, coveringSpan.end);
+        addEntityMentionFeatures(
+            annotation,
+            entityMention,
+            jCas,
+            CONST.NE_TYPE_ID_FINDING,
+            stringSlots,
+            booleanSlots,
+            annotationSlots,
+            idAnnotationMap,
+            delayedFeatures);
+        KnowtatorAnnotation bodyLocation = annotationSlots.remove("body_location");
+        if (bodyLocation != null) {
+          delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, bodyLocation)
{
+            @Override
+            protected void setValue(TOP valueAnnotation) {
+              // TODO: this.annotation.setBodyLocation(...)
+            }
+          });
+        }
+
+      } else if ("EVENT".equals(annotation.type)) {
+
+        // collect the event properties (setting defaults as necessary)
+        EventProperties eventProperties = new EventProperties(jCas);
+        eventProperties.setCategory(stringSlots.remove("type"));
+        if (eventProperties.getCategory() == null) {
+          eventProperties.setCategory("N/A");
+        }
+        eventProperties.setContextualModality(stringSlots.remove("contextualmoduality"));
+        if (eventProperties.getContextualModality() == null) {
+          eventProperties.setContextualModality("ACTUAL");
+        }
+        eventProperties.setContextualAspect(stringSlots.remove("contextualaspect"));
+        if (eventProperties.getContextualAspect() == null) {
+          eventProperties.setContextualAspect("N/A");
+        }
+        eventProperties.setDegree(stringSlots.remove("degree"));
+        if (eventProperties.getDegree() == null) {
+          eventProperties.setDegree("N/A");
+        }
+        eventProperties.setDocTimeRel(stringSlots.remove("DocTimeRel"));
+        if (eventProperties.getDocTimeRel() == null) {
+          // TODO: this should not be necessary - DocTimeRel should always be specified
+          eventProperties.setDocTimeRel("OVERLAP");
+        }
+        eventProperties.setPermanence(stringSlots.remove("permanence"));
+        if (eventProperties.getPermanence() == null) {
+          eventProperties.setPermanence("UNDETERMINED");
+        }
+        String polarityStr = stringSlots.remove("polarity");
+        int polarity;
+        if (polarityStr == null || polarityStr.equals("POS")) {
+          polarity = CONST.NE_POLARITY_NEGATION_ABSENT;
+        } else if (polarityStr.equals("NEG")) {
+          polarity = CONST.NE_POLARITY_NEGATION_PRESENT;
+        } else {
+          throw new IllegalArgumentException("Invalid polarity: " + polarityStr);
+        }
+        eventProperties.setPolarity(polarity);
+
+        // create the event object
+        Event event = new Event(jCas);
+        event.setConfidence(1.0f);
+        event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+
+        // create the event mention
+        EventMention eventMention = new EventMention(jCas, coveringSpan.begin, coveringSpan.end);
+        eventMention.setConfidence(1.0f);
+        eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+
+        // add the links between event, mention and properties
+        event.setProperties(eventProperties);
+        event.setMentions(new FSArray(jCas, 1));
+        event.setMentions(0, eventMention);
+        eventMention.setEvent(event);
+
+        // add the annotations to the indexes
+        eventProperties.addToIndexes();
+        event.addToIndexes();
+        eventMention.addToIndexes();
+        idAnnotationMap.put(annotation.id, eventMention);
+
+      } else if ("DOCTIME".equals(annotation.type)) {
+        // TODO
+
+      } else if ("SECTIONTIME".equals(annotation.type)) {
+        // TODO
+
+      } else if ("TIMEX3".equals(annotation.type)) {
+        String timexClass = stringSlots.remove("class");
+        TimeMention timeMention = new TimeMention(jCas, coveringSpan.begin, coveringSpan.end);
+        timeMention.addToIndexes();
+        idAnnotationMap.put(annotation.id, timeMention);
+        // TODO
+
+      } else if ("generic_class".equals(annotation.type)) {
+        // TODO: there's currently no Generic in the type system
+        boolean value = booleanSlots.remove("generic_normalization");
+
+      } else if ("severity_class".equals(annotation.type)) {
+        // TODO: severity has a span, but it extends TOP
+        Severity severity = new Severity(jCas);
+        severity.setValue(stringSlots.remove("severity_normalization"));
+        severity.addToIndexes();
+        idTopMap.put(annotation.id, severity);
+
+      } else if ("conditional_class".equals(annotation.type)) {
+        // TODO: there's currently no Generic in the type system
+        boolean value = booleanSlots.remove("conditional_normalization");
+
+      } else if ("course_class".equals(annotation.type)) {
+        // TODO: course has a span, but it extends TOP
+        Course course = new Course(jCas);
+        course.setValue(stringSlots.remove("course_normalization"));
+        course.addToIndexes();
+        idTopMap.put(annotation.id, course);
+
+      } else if ("uncertainty_indicator_class".equals(annotation.type)) {
+        // TODO: there's currently no Uncertainty in the type system
+        String value = stringSlots.remove("uncertainty_indicator_normalization");
+
+      } else if ("distal_or_proximal".equals(annotation.type)) {
+        // TODO: there's currently no Distal or Proximal in the type system
+        String value = stringSlots.remove("distal_or_proximal_normalization");
+
+      } else if ("Person".equals(annotation.type)) {
+        // TODO: there's currently no Subject in the type system
+        String value = stringSlots.remove("subject_normalization_CU");
+
+      } else if ("body_side_class".equals(annotation.type)) {
+        // TODO: BodySide has a span, but it extends TOP
+        BodySide bodySide = new BodySide(jCas);
+        bodySide.setValue(stringSlots.remove("body_side_normalization"));
+        bodySide.addToIndexes();
+        idTopMap.put(annotation.id, bodySide);
+
+      } else if ("negation_indicator_class".equals(annotation.type)) {
+        // TODO: there's currently no Negation in the type system
+        String value = stringSlots.remove("negation_indicator_normalization");
+
+      } else if (eventRelationTypes.contains(annotation.type)) {
+        // store the ALINK information for later, once all annotations are in the CAS
+        DelayedRelation relation = new DelayedRelation();
+        relation.annotation = annotation;
+        relation.source = annotationSlots.remove("Event");
+        relation.target = annotationSlots.remove("related_to");
+        relation.type = stringSlots.remove("Relationtype");
+        delayedRelations.add(relation);
+
+      } else if (entityRelationTypes.contains(annotation.type)) {
+        // store the relation information for later, once all annotations are in the CAS
+        DelayedRelation relation = new DelayedRelation();
+        relation.annotation = annotation;
+        relation.source = annotationSlots.remove("Argument_CU");
+        relation.target = annotationSlots.remove("Related_to_CU");
+        relation.uncertainty = annotationSlots.remove("uncertainty_indicator_CU");
+        delayedRelations.add(relation);
+
+      } else {
+        throw new IllegalArgumentException("Unrecognized type: " + annotation.type);
+      }
+
+      // make sure all slots have been consumed
+      Map<String, Set<String>> slotGroups = new HashMap<String, Set<String>>();
+      slotGroups.put("stringSlots", stringSlots.keySet());
+      slotGroups.put("booleanSlots", booleanSlots.keySet());
+      slotGroups.put("annotationSlots", annotationSlots.keySet());
+      for (Map.Entry<String, Set<String>> entry : slotGroups.entrySet()) {
+        Set<String> remainingSlots = entry.getValue();
+        if (!remainingSlots.isEmpty()) {
+          String format = "%s has unprocessed %s: %s";
+          String message = String.format(format, annotation.type, entry.getKey(), remainingSlots);
+          throw new UnsupportedOperationException(message);
+        }
+      }
+    }
+
+    // add all annotations to the TOP map
+    idTopMap.putAll(idAnnotationMap);
+
+    // all mentions should be added, so add features that required other annotations
+    for (DelayedFeature<?> delayedFeature : delayedFeatures) {
+      delayedFeature.setValueFrom(idAnnotationMap);
+    }
+
+    // all mentions should be added, so add relations between annotations
+    for (DelayedRelation delayedRelation : delayedRelations) {
+      delayedRelation.addToIndexes(jCas, idAnnotationMap);
+    }
+  }
+
+  private static void addEntityMentionFeatures(
+      KnowtatorAnnotation annotation,
+      EntityMention entityMention,
+      JCas jCas,
+      int typeID,
+      Map<String, String> stringSlots,
+      Map<String, Boolean> booleanSlots,
+      Map<String, KnowtatorAnnotation> annotationSlots,
+      Map<String, Annotation> idAnnotationMap,
+      List<DelayedFeature<?>> delayedFeatures) {
+    entityMention.setTypeID(typeID);
+    entityMention.setConfidence(1.0f);
+    entityMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+
+    // convert negation to an integer
+    Boolean negation = booleanSlots.remove("Negation");
+    entityMention.setPolarity(negation == null
+        ? CONST.NE_POLARITY_NEGATION_ABSENT
+        : negation == true ? CONST.NE_POLARITY_NEGATION_PRESENT : CONST.NE_POLARITY_NEGATION_ABSENT);
+
+    // negation must be delayed until the Negation annotations are present
+    KnowtatorAnnotation negationIndicator = annotationSlots.remove("negation_indicator_CU");
+    if (negationIndicator != null) {
+      delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, negationIndicator)
{
+        @Override
+        protected void setValue(TOP valueAnnotation) {
+          // TODO: this.annotation.setPolarity(...)
+        }
+      });
+    }
+
+    // conditional must be delayed until the Conditional annotations are present
+    KnowtatorAnnotation conditional = annotationSlots.remove("conditional_CU");
+    if (conditional != null) {
+      delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, conditional)
{
+        @Override
+        protected void setValue(TOP valueAnnotation) {
+          // TODO: this.annotation.setConditional(...)
+        }
+      });
+    }
+
+    // subject must be delayed until the Subject annotations are present
+    KnowtatorAnnotation subject = annotationSlots.remove("subject_CU");
+    if (subject != null) {
+      delayedFeatures.add(new DelayedFeature<EntityMention>(entityMention, subject)
{
+        @Override
+        protected void setValue(TOP valueAnnotation) {
+          // TODO: this.annotation.setSubject(...)
+        }
+      });
+    }
+
+    // convert status as necessary
+    String status = stringSlots.remove("Status");
+    if (status != null) {
+      if ("HistoryOf".equals(status)) {
+        // TODO
+      } else if ("FamilyHistoryOf".equals(status)) {
+        // TODO
+      } else if ("Possible".equals(status)) {
+        // TODO
+      } else {
+        throw new UnsupportedOperationException("Unknown status: " + status);
+      }
+    }
+
+    // convert code to ontology concept or CUI
+    String code = stringSlots.remove("AssociateCode");
+    if (code == null) {
+      code = stringSlots.remove("associatedCode");
+    }
+    OntologyConcept ontologyConcept;
+    if (entityMention.getTypeID() == CONST.NE_TYPE_ID_DRUG) {
+      ontologyConcept = new OntologyConcept(jCas);
+      ontologyConcept.setCode(code);
+    } else {
+      UmlsConcept umlsConcept = new UmlsConcept(jCas);
+      umlsConcept.setCui(code);
+      ontologyConcept = umlsConcept;
+    }
+    ontologyConcept.addToIndexes();
+    entityMention.setOntologyConceptArr(new FSArray(jCas, 1));
+    entityMention.setOntologyConceptArr(0, ontologyConcept);
+
+    // add entity mention to CAS
+    entityMention.addToIndexes();
+    idAnnotationMap.put(annotation.id, entityMention);
+  }
+
+  private static class DelayedRelation {
+    public KnowtatorAnnotation annotation;
+
+    public KnowtatorAnnotation source;
+
+    public KnowtatorAnnotation target;
+
+    public String type;
+
+    public KnowtatorAnnotation uncertainty;
+
+    public void addToIndexes(JCas jCas, Map<String, Annotation> idAnnotationMap) {
+      // look up the relations in the map and issue an error if they're missing
+      Annotation sourceMention = idAnnotationMap.get(this.source.id);
+      Annotation targetMention = idAnnotationMap.get(this.target.id);
+      String badId = null;
+      if (sourceMention == null) {
+        badId = this.source.id;
+      } else if (targetMention == null) {
+        badId = this.target.id;
+      }
+      if (badId != null) {
+        String message = String.format("no annotation with id '%s'", badId);
+        throw new UnsupportedOperationException(message);
+      }
+
+      // get the uncertainty
+      if (this.uncertainty != null) {
+        Annotation uncertainty = idAnnotationMap.get(this.uncertainty);
+        System.err.println(uncertainty.getCoveredText());
+      }
+
+      // add the relation to the CAS
+      RelationArgument sourceRA = new RelationArgument(jCas);
+      sourceRA.setArgument(sourceMention);
+      sourceRA.addToIndexes();
+      RelationArgument targetRA = new RelationArgument(jCas);
+      targetRA.setArgument(targetMention);
+      targetRA.addToIndexes();
+      BinaryTextRelation relation = new BinaryTextRelation(jCas);
+      if (this.type != null) {
+        // TODO: do something better with knowtatorRelation.annotation.type
+        relation.setCategory(this.annotation.type + '_' + this.type);
+      } else {
+        relation.setCategory(this.annotation.type);
+      }
+      relation.setArg1(sourceRA);
+      relation.setArg2(targetRA);
+      relation.addToIndexes();
+    }
+  }
+
+  private static abstract class DelayedFeature<ANNOTATION_TYPE extends TOP> {
+    protected ANNOTATION_TYPE annotation;
+
+    private String featureValueID;
+
+    public DelayedFeature(ANNOTATION_TYPE annotation, KnowtatorAnnotation featureValue) {
+      this.annotation = annotation;
+      this.featureValueID = featureValue.id;
+    }
+
+    public void setValueFrom(Map<String, ? extends TOP> idAnnotationMap) {
+      this.setValue(idAnnotationMap.get(this.featureValueID));
+    }
+
+    protected abstract void setValue(TOP valueAnnotation);
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java?rev=1394261&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java
(added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java
Thu Oct  4 20:55:06 2012
@@ -0,0 +1,165 @@
+package org.apache.ctakes.core.knowtator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.base.Objects;
+import com.google.common.base.Objects.ToStringHelper;
+
+/**
+ * Represents a Knowtator annotation.
+ */
+public class KnowtatorAnnotation {
+  /**
+   * The unique identifier assigned to this annotation by Knowtator
+   */
+  public String id;
+
+  /**
+   * The character offsets of this annotation (empty if not associated with a span of text).
+   */
+  public List<Span> spans = new ArrayList<Span>();
+
+  /**
+   * Get a span that approximates {@link #spans}, giving the earliest begin offset and the
latest
+   * end offset.
+   */
+  public Span getCoveringSpan() {
+    int begin = Integer.MAX_VALUE;
+    int end = Integer.MIN_VALUE;
+    for (KnowtatorAnnotation.Span span : this.spans) {
+      if (span.begin < begin) {
+        begin = span.begin;
+      }
+      if (span.end > end) {
+        end = span.end;
+      }
+    }
+    return new Span(begin, end);
+  }
+
+  /**
+   * Create a new span and add it to the list (not publicly available)
+   */
+  void addSpan(int begin, int end) {
+    this.spans.add(new Span(begin, end));
+  }
+
+  /**
+   * The text spanned by this annnotation (<code>null</code> if not associated
with a span of text).
+   */
+  public String spannedText;
+
+  /**
+   * The type (or "class") of annotation
+   */
+  public String type;
+
+  /**
+   * The string-valued annotation attributes
+   */
+  public Map<String, String> stringSlots = new HashMap<String, String>();
+
+  /**
+   * The boolean-valued annotation attributes
+   */
+  public Map<String, Boolean> booleanSlots = new HashMap<String, Boolean>();
+
+  /**
+   * The annotation-valued annotation attributes (i.e. links between annotations)
+   */
+  public Map<String, KnowtatorAnnotation> annotationSlots = new HashMap<String,
KnowtatorAnnotation>();
+
+  /**
+   * Construct a new KnowtatorAnnotation. (Not publicly available.)
+   */
+  KnowtatorAnnotation() {
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(
+        this.id,
+        this.spans,
+        this.spannedText,
+        this.type,
+        this.stringSlots,
+        this.booleanSlots,
+        this.annotationSlots);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || obj.getClass() != this.getClass()) {
+      return false;
+    }
+    KnowtatorAnnotation that = (KnowtatorAnnotation) obj;
+    return Objects.equal(this.id, that.id) && Objects.equal(this.spans, that.spans)
+        && Objects.equal(this.spannedText, that.spannedText) && Objects.equal(this.type,
that.type)
+        && Objects.equal(this.stringSlots, that.stringSlots)
+        && Objects.equal(this.booleanSlots, that.booleanSlots)
+        && Objects.equal(this.annotationSlots, that.annotationSlots);
+  }
+
+  @Override
+  public String toString() {
+    ToStringHelper builder = Objects.toStringHelper(this);
+    builder.add("id", this.id);
+    builder.add("spans", this.spans);
+    builder.add("spannedText", this.spannedText);
+    builder.add("type", this.type);
+    builder.add("stringSlots", this.stringSlots);
+    builder.add("booleanSlots", this.booleanSlots);
+    builder.add("mentionSlots", this.annotationSlots);
+    return builder.toString();
+  }
+
+  /**
+   * Represents the character offsets of a Knowtator annotation.
+   */
+  public static class Span {
+    /**
+     * The offset of the first character in the text span.
+     */
+    public int begin;
+
+    /**
+     * The offset immediately after the last character in the text span.
+     */
+    public int end;
+
+    /**
+     * Construct a new Span. (Not publicly available.)
+     */
+    Span(int begin, int end) {
+      this.begin = begin;
+      this.end = end;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(this.begin, this.end);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj == null || obj.getClass() != this.getClass()) {
+        return false;
+      }
+      Span that = (Span) obj;
+      return this.begin == that.begin && this.end == that.end;
+    }
+
+    @Override
+    public String toString() {
+      ToStringHelper builder = Objects.toStringHelper(this);
+      builder.add("begin", this.begin);
+      builder.add("end", this.end);
+      return builder.toString();
+    }
+
+  }
+
+}
\ No newline at end of file

Propchange: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorAnnotation.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java?rev=1394261&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java
(added)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java
Thu Oct  4 20:55:06 2012
@@ -0,0 +1,235 @@
+package org.apache.ctakes.core.knowtator;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import org.jdom2.Element;
+import org.jdom2.JDOMException;
+import org.jdom2.input.SAXBuilder;
+import org.jdom2.output.XMLOutputter;
+
+public class KnowtatorXMLParser {
+
+  private static final Logger LOGGER = Logger.getLogger(KnowtatorXMLParser.class.getName());
+
+  private XMLOutputter xmlOutputter = new XMLOutputter();
+
+  private Set<String> annotatorNames;
+
+  public KnowtatorXMLParser(String... annotatorNames) {
+    this(new HashSet<String>(Arrays.asList(annotatorNames)));
+  }
+
+  public KnowtatorXMLParser(Set<String> annotatorNames) {
+    this.annotatorNames = annotatorNames;
+  }
+
+  public Collection<KnowtatorAnnotation> parse(URI knowtatorXML) throws JDOMException,
IOException {
+
+    Element annotationsElem = new SAXBuilder().build(knowtatorXML.toURL()).getRootElement();
+
+    // parse <annotation> elements
+    Set<String> ignoredAnnotators = new HashSet<String>();
+    Map<String, KnowtatorAnnotation> annotations = new HashMap<String, KnowtatorAnnotation>();
+    for (Element annotationElem : annotationsElem.getChildren("annotation")) {
+      for (Element annotatorElem : this.getChild(annotationElem, "annotator")) {
+        String annotatorName = annotatorElem.getText();
+        if (!this.annotatorNames.contains(annotatorName)) {
+          ignoredAnnotators.add(annotatorName);
+        } else {
+          for (Element mentionElem : this.getChild(annotationElem, "mention")) {
+            for (String id : this.getAttributeValue(mentionElem, "id")) {
+              KnowtatorAnnotation annotation = new KnowtatorAnnotation();
+              annotation.id = id;
+              annotations.put(id, annotation);
+              List<Element> spanElems = annotationElem.getChildren("span");
+              if (!spanElems.isEmpty()) {
+                for (Element spannedTextElem : this.getChild(annotationElem, "spannedText"))
{
+                  annotation.spannedText = spannedTextElem.getText();
+                }
+                for (Element spanElem : spanElems) {
+                  for (String startStr : this.getAttributeValue(spanElem, "start")) {
+                    for (String endStr : this.getAttributeValue(spanElem, "end")) {
+                      annotation.addSpan(Integer.parseInt(startStr), Integer.parseInt(endStr));
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    LOGGER.fine(String.format("Ignored annotators %s in %s", ignoredAnnotators, knowtatorXML));
+
+    // parse <stringSlotMention> elements
+    Map<String, Slot<String>> stringSlots = new HashMap<String, Slot<String>>();
+    for (Element slotMentionElem : annotationsElem.getChildren("stringSlotMention")) {
+      for (IdAndSlot<String> idAndSlot : this.parseSlotMention(
+          slotMentionElem,
+          "stringSlotMentionValue")) {
+        stringSlots.put(idAndSlot.id, idAndSlot.slot);
+      }
+    }
+
+    // parse <booleanSlotMention> elements
+    Map<String, Slot<Boolean>> booleanSlots = new HashMap<String, Slot<Boolean>>();
+    for (Element slotMentionElem : annotationsElem.getChildren("booleanSlotMention")) {
+      for (IdAndSlot<String> idAndSlot : this.parseSlotMention(
+          slotMentionElem,
+          "booleanSlotMentionValue")) {
+        Slot<String> slot = idAndSlot.slot;
+        Boolean value = Boolean.parseBoolean(slot.value);
+        booleanSlots.put(idAndSlot.id, new Slot<Boolean>(slot.name, value));
+      }
+    }
+
+    // parse <complexSlotMention> elements
+    Map<String, Slot<KnowtatorAnnotation>> mentionSlots = new HashMap<String,
Slot<KnowtatorAnnotation>>();
+    for (Element slotMentionElem : annotationsElem.getChildren("complexSlotMention")) {
+      for (IdAndSlot<String> idAndSlot : this.parseSlotMention(
+          slotMentionElem,
+          "complexSlotMentionValue")) {
+        Slot<String> slot = idAndSlot.slot;
+        KnowtatorAnnotation mention = annotations.get(slot.value);
+        if (mention != null) {
+          mentionSlots.put(idAndSlot.id, new Slot<KnowtatorAnnotation>(slot.name, mention));
+        }
+      }
+    }
+
+    // parse <classMention> elements
+    for (Element classMentionElem : annotationsElem.getChildren("classMention")) {
+      for (String id : this.getAttributeValue(classMentionElem, "id")) {
+        KnowtatorAnnotation annotation = annotations.get(id);
+        if (annotation == null) {
+          continue;
+        }
+        annotation.type = classMentionElem.getChildText("mentionClass");
+        for (Element hasSlotMentionElem : classMentionElem.getChildren("hasSlotMention"))
{
+          for (String slotId : this.getAttributeValue(hasSlotMentionElem, "id")) {
+            Slot<String> stringSlot = stringSlots.get(slotId);
+            if (stringSlot != null) {
+              annotation.stringSlots.put(stringSlot.name, stringSlot.value);
+            } else {
+              Slot<Boolean> booleanSlot = booleanSlots.get(slotId);
+              if (booleanSlot != null) {
+                annotation.booleanSlots.put(booleanSlot.name, booleanSlot.value);
+              } else {
+                Slot<KnowtatorAnnotation> mentionSlot = mentionSlots.get(slotId);
+                if (mentionSlot != null) {
+                  annotation.annotationSlots.put(mentionSlot.name, mentionSlot.value);
+                } else {
+                  throw new RuntimeException("no slot for " + slotId);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    return annotations.values();
+  }
+
+  private Option<Element> getChild(final Element element, final String cname) {
+    final Element child = element.getChild(cname);
+    if (child == null) {
+      String xml = this.xmlOutputter.outputString(element);
+      LOGGER.warning(String.format("no %s for %s", cname, xml));
+    }
+    return new Option<Element>(child);
+  }
+
+  private Option<String> getAttributeValue(final Element element, final String attname)
{
+    final String value = element.getAttributeValue(attname);
+    if (value == null) {
+      String xml = this.xmlOutputter.outputString(element);
+      LOGGER.warning(String.format("no %s for %s", attname, xml));
+    }
+    return new Option<String>(value);
+  }
+
+  private Option<IdAndSlot<String>> parseSlotMention(
+      Element slotMentionElem,
+      String slotMentionValueElemName) {
+    IdAndSlot<String> result = null;
+    for (String slotId : this.getAttributeValue(slotMentionElem, "id")) {
+      for (Element mentionSlotElem : this.getChild(slotMentionElem, "mentionSlot")) {
+        for (String slotName : this.getAttributeValue(mentionSlotElem, "id")) {
+          for (Element slotMentionValueElem : this.getChild(
+              slotMentionElem,
+              slotMentionValueElemName)) {
+            for (String slotValue : this.getAttributeValue(slotMentionValueElem, "value"))
{
+              result = new IdAndSlot<String>(slotId, new Slot<String>(slotName,
slotValue));
+            }
+          }
+        }
+      }
+    }
+    return new Option<IdAndSlot<String>>(result);
+  }
+
+  private static class Option<T> implements Iterable<T> {
+    private T value;
+
+    public Option(T value) {
+      this.value = value;
+    }
+
+    @Override
+    public Iterator<T> iterator() {
+      return new Iterator<T>() {
+        private T next = value;
+
+        @Override
+        public boolean hasNext() {
+          return this.next != null;
+        }
+
+        @Override
+        public T next() {
+          T result = this.next;
+          this.next = null;
+          return result;
+        }
+
+        @Override
+        public void remove() {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+  }
+
+  private static class Slot<T> {
+    public String name;
+
+    public T value;
+
+    public Slot(String name, T value) {
+      this.name = name;
+      this.value = value;
+    }
+  }
+
+  private static class IdAndSlot<T> {
+    public String id;
+
+    public Slot<T> slot;
+
+    public IdAndSlot(String id, Slot<T> slot) {
+      this.id = id;
+      this.slot = slot;
+    }
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/knowtator/KnowtatorXMLParser.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/ctakes/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/pom.xml?rev=1394261&r1=1394260&r2=1394261&view=diff
==============================================================================
--- incubator/ctakes/trunk/pom.xml (original)
+++ incubator/ctakes/trunk/pom.xml Thu Oct  4 20:55:06 2012
@@ -160,6 +160,11 @@
 				<artifactId>jdom</artifactId>
 				<version>1.0</version>
 			</dependency>
+			<dependency>
+				<groupId>org.jdom</groupId>
+				<artifactId>jdom2</artifactId>
+				<version>2.0.3</version>
+			</dependency>
 			<!-- <dependency> <groupId>jama</groupId> <artifactId>jama</artifactId>

 				<version>1.0.2</version> </dependency> -->
 			<dependency>



Mime
View raw message