Author: clin
Date: Fri Sep 16 21:26:38 2016
New Revision: 1761094
URL: http://svn.apache.org/viewvc?rev=1761094&view=rev
Log:
write evaluation code to load deepPhe docTimeRel data
Added:
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java
ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java?rev=1761094&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java
Fri Sep 16 21:26:38 2016
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.Event;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.util.ViewUriUtil;
+import org.cleartk.util.cr.UriCollectionReader;
+import org.jdom2.Element;
+import org.jdom2.JDOMException;
+import org.jdom2.input.SAXBuilder;
+
+import com.google.common.collect.Lists;
+
+public class DeepPheAnaforaXMLReader extends JCasAnnotator_ImplBase {
+ private static Logger LOGGER = Logger.getLogger(DeepPheAnaforaXMLReader.class);
+
+ public static final String PARAM_ANAFORA_DIRECTORY = "anaforaDirectory";
+
+ @ConfigurationParameter(
+ name = PARAM_ANAFORA_DIRECTORY,
+ description = "root directory of the Anafora-annotated files, with one subdirectory
for "
+ + "each annotated file")
+ private File anaforaDirectory;
+
+ public static final String PARAM_ANAFORA_XML_SUFFIXES = "anaforaSuffixes";
+
+ @ConfigurationParameter(
+ name = PARAM_ANAFORA_XML_SUFFIXES,
+ mandatory = false,
+ description = "list of suffixes that might be added to a file name to identify the
Anafora "
+ + "XML annotations file; only the first suffix corresponding to a file will be
used")
+ private String[] anaforaXMLSuffixes = new String[] {
+ ".UmlsDeepPhe.dave.completed.xml"};
+
+ public static AnalysisEngineDescription getDescription() throws ResourceInitializationException
{
+ return AnalysisEngineFactory.createEngineDescription(DeepPheAnaforaXMLReader.class);
+ }
+
+ public static AnalysisEngineDescription getDescription(File anaforaDirectory)
+ throws ResourceInitializationException {
+ return AnalysisEngineFactory.createEngineDescription(
+ DeepPheAnaforaXMLReader.class,
+ DeepPheAnaforaXMLReader.PARAM_ANAFORA_DIRECTORY,
+ anaforaDirectory);
+ }
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ // determine source text file
+ File textFile = new File(ViewUriUtil.getURI(jCas));
+ LOGGER.info("processing " + textFile);
+
+ // determine possible Anafora XML file names
+ File corefFile = new File(textFile.getPath() + ".UmlsDeepPhe.dave.completed.xml");
+ List<File> possibleXMLFiles = Lists.newArrayList();
+ for (String anaforaXMLSuffix : this.anaforaXMLSuffixes) {
+ if (this.anaforaDirectory == null) {
+ possibleXMLFiles.add(new File(textFile + anaforaXMLSuffix));
+ } else {
+ possibleXMLFiles.add(new File(textFile.getPath() + anaforaXMLSuffix));
+ }
+ }
+
+ // find an Anafora XML file that actually exists
+ File xmlFile = null;
+ for (File possibleXMLFile : possibleXMLFiles) {
+ if (possibleXMLFile.exists()) {
+ xmlFile = possibleXMLFile;
+ break;
+ }
+ }
+ if (this.anaforaXMLSuffixes.length > 0 && xmlFile == null) {
+ throw new IllegalArgumentException("no Anafora XML file found from " + possibleXMLFiles);
+ }
+
+ if(xmlFile != null){
+ processXmlFile(jCas, xmlFile);
+ }
+ if(corefFile.exists()){
+ processXmlFile(jCas, corefFile);
+ }
+ }
+
+ private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException{
+ // load the XML
+ Element dataElem;
+ try {
+ dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement();
+ } catch (MalformedURLException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (JDOMException e) {
+ throw new AnalysisEngineProcessException(e);
+ } catch (IOException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ int curEventId = 1;
+ int docLen = jCas.getDocumentText().length();
+
+ for (Element annotationsElem : dataElem.getChildren("annotations")) {
+
+ for (Element entityElem : annotationsElem.getChildren("entity")) {
+ String id = removeSingleChildText(entityElem, "id", null);
+ Element spanElem = removeSingleChild(entityElem, "span", id);
+ String type = removeSingleChildText(entityElem, "type", id);
+ String parType = removeSingleChildText(entityElem, "parentsType", id);
+ Element propertiesElem = removeSingleChild(entityElem, "properties", id);
+
+ // UIMA doesn't support disjoint spans, so take the span enclosing
+ // everything
+ int begin = Integer.MAX_VALUE;
+ int end = Integer.MIN_VALUE;
+ for (String spanString : spanElem.getText().split(";")) {
+ String[] beginEndStrings = spanString.split(",");
+ if (beginEndStrings.length != 2) {
+ error("span not of the format 'number,number'", id);
+ }
+ int spanBegin = Integer.parseInt(beginEndStrings[0]);
+ int spanEnd = Integer.parseInt(beginEndStrings[1]);
+ if (spanBegin < begin) {
+ begin = spanBegin;
+ }
+ if (spanEnd > end) {
+ end = spanEnd;
+ }
+ }
+ if(begin < 0 || end >= docLen){
+ error("Illegal begin or end boundary", id);
+ continue;
+ }
+
+ if (!type.equals("Anatomical_site") && parType.equals("UMLSEntities") ||
parType.equals("Metastasis_Entities")) {
+ String docTimeRel = removeSingleChildText(propertiesElem, "DocTimeRel", id);
+ if (docTimeRel == null) {
+ error("no docTimeRel, assuming OVERLAP", id);
+// docTimeRel = "OVERLAP";
+ continue;
+ }
+ EventMention eventMention = new EventMention(jCas, begin, end);
+ Event event = new Event(jCas);
+ EventProperties eventProperties = new EventProperties(jCas);
+ eventProperties.setDocTimeRel(docTimeRel);
+ eventProperties.setCategory(type);
+ eventProperties.addToIndexes();
+ event.setConfidence(1.0f);
+ event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+ event.setProperties(eventProperties);
+ event.setMentions(new FSArray(jCas, 1));
+ event.setMentions(0, eventMention);
+ event.addToIndexes();
+ eventMention.setId(curEventId++);
+ eventMention.setConfidence(1.0f);
+ eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
+ eventMention.setEvent(event);
+ eventMention.addToIndexes();
+ }
+// else if (type.equals("TIMEX3")) {
+// String timeClass = removeSingleChildText(propertiesElem, "Class", id);
+// TimeMention timeMention = new TimeMention(jCas, begin, end);
+// timeMention.setId(curTimexId++);
+// timeMention.setTimeClass(timeClass);
+// timeMention.addToIndexes();
+// annotation = timeMention;
+//
+// } else if (type.equals("DOCTIME")) {
+// TimeMention timeMention = new TimeMention(jCas, begin, end);
+// timeMention.setId(curTimexId++);
+// timeMention.setTimeClass(type);
+// timeMention.addToIndexes();
+// annotation = timeMention;
+//
+// } else if (type.equals("SECTIONTIME")) {
+// TimeMention timeMention = new TimeMention(jCas, begin, end);
+// timeMention.setId(curTimexId++);
+// timeMention.setTimeClass(type);
+// timeMention.addToIndexes();
+// annotation = timeMention;
+//
+// } else if (type.equals("Markable")) {
+// while(end >= begin && (jCas.getDocumentText().charAt(end-1) == '\n'
|| jCas.getDocumentText().charAt(end-1) == '\r')){
+// end--;
+// }
+// Markable markable = new Markable(jCas, begin, end);
+// markable.addToIndexes();
+// annotation = markable;
+//
+// } else if (type.equals("DUPLICATE")) {
+// LOGGER.warn("Ignoring duplicate sections in annotations.");
+// continue;
+// }
+// else {
+// throw new UnsupportedOperationException("unsupported entity type: " + type);
+// }
+//
+// // match the annotation to it's ID for later use
+// idToAnnotation.put(id, annotation);
+
+ // make sure all XML has been consumed
+ removeSingleChild(entityElem, "parentsType", id);
+ if (!propertiesElem.getChildren().isEmpty() || !entityElem.getChildren().isEmpty())
{
+ List<String> children = Lists.newArrayList();
+ for (Element child : propertiesElem.getChildren()) {
+ children.add(child.getName());
+ }
+ for (Element child : entityElem.getChildren()) {
+ children.add(child.getName());
+ }
+ error("unprocessed children " + children, id);
+ }
+ }
+ }
+ }
+
+ private static Element getSingleChild(Element elem, String elemName, String causeID) {
+ List<Element> children = elem.getChildren(elemName);
+ if (children.size() != 1) {
+ error(String.format("not exactly one '%s' child", elemName), causeID);
+ }
+ return children.size() > 0 ? children.get(0) : null;
+ }
+
+ private static Element removeSingleChild(Element elem, String elemName, String causeID)
{
+ Element child = getSingleChild(elem, elemName, causeID);
+ elem.removeChildren(elemName);
+ return child;
+ }
+
+ private static String removeSingleChildText(Element elem, String elemName, String causeID)
{
+ Element child = getSingleChild(elem, elemName, causeID);
+ String text = null;
+ if(child != null){
+ text = child.getText();
+ }
+ if (text==null || text.isEmpty()) {
+ error(String.format("an empty '%s' child", elemName), causeID);
+ text = null;
+ }
+ elem.removeChildren(elemName);
+ return text;
+ }
+
+ private static void error(String found, String id) {
+ LOGGER.error(String.format("found %s in annotation with ID %s", found, id));
+ }
+
+ public static void main(String[] args) throws Exception {
+ List<File> files = Lists.newArrayList();
+ for (String path : args) {
+ files.add(new File(path));
+ }
+ CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
+ AnalysisEngine engine = AnalysisEngineFactory.createEngine(DeepPheAnaforaXMLReader.class);
+ SimplePipeline.runPipeline(reader, engine);
+ }
+}
Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java?rev=1761094&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java
Fri Sep 16 21:26:38 2016
@@ -0,0 +1,500 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.FileHandler;
+import java.util.logging.Formatter;
+import java.util.logging.Level;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.relationextractor.eval.SHARPXMI;
+import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator;
+import org.apache.ctakes.temporal.eval.EvaluationOfEventTimeRelations.ParameterSettings;
+import org.apache.ctakes.typesystem.type.refsem.Event;
+import org.apache.ctakes.typesystem.type.refsem.EventProperties;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasCopier;
+import org.apache.uima.util.FileUtils;
+import org.cleartk.eval.AnnotationStatistics;
+import org.cleartk.ml.jar.JarClassifierBuilder;
+import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
+import org.cleartk.ml.tksvmlight.model.CompositeKernel.ComboOperator;
+import org.cleartk.util.ViewUriUtil;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.JCasIterator;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.collect.Maps;
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+public class EvaluationOfEventDocTimeRelDeepPhe extends
+Evaluation_ImplBase<Map<String, AnnotationStatistics<String>>>{
+ static interface TempRelOptions extends Evaluation_ImplBase.Options{
+ @Option
+ public boolean getPrintFormattedRelations();
+
+ @Option
+ public boolean getBaseline();
+
+ @Option
+ public boolean getClosure();
+
+ @Option
+ public boolean getUseTmp();
+
+ @Option
+ public boolean getUseGoldAttributes();
+
+ @Option
+ public boolean getSkipTrain();
+ }
+
+ // protected static ParameterSettings flatParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS,
DEFAULT_DOWNSAMPLE, "linear",
+ // 10.0, 1.0, "linear", ComboOperator.VECTOR_ONLY, DEFAULT_TK, DEFAULT_LAMBDA);
+ // protected static ParameterSettings allBagsParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS,
DEFAULT_DOWNSAMPLE, "tk",
+ // 100.0, 0.1, "radial basis function", ComboOperator.SUM, 0.5, 0.5);
+ // protected static ParameterSettings ftParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS,
DEFAULT_DOWNSAMPLE, "tk",
+ // 1.0, 0.1, "radial basis function", ComboOperator.SUM, 0.5, 0.5);
+ // private static Boolean recallModeEvaluation = true;
+ protected static boolean DEFAULT_BOTH_DIRECTIONS = false;
+ protected static float DEFAULT_DOWNSAMPLE = 1.0f;
+ protected static ParameterSettings allParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS,
DEFAULT_DOWNSAMPLE, "tk",
+ 10.0, 1.0, "polynomial", ComboOperator.SUM, 0.1, 0.5); // (0.3, 0.4 for tklibsvm)
+ private static final String DOC_TIME_REL = "docTimeRel";
+ private static final int DISCOVERTY_TYPE = 100;
+
+ public static void main(String[] args) throws Exception {
+ TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args);
+ List<Integer> trainItems = Arrays.asList(3, 11, 92, 93 );
+ List<Integer> testItems = Arrays.asList(2, 21);
+
+ // possibleParams.add(defaultParams);
+
+ ParameterSettings params = allParams;
+ try{
+ File workingDir = new File("target/eval/event-properties");
+ if(!workingDir.exists()) workingDir.mkdirs();
+ if(options.getUseTmp()){
+ File tempModelDir = File.createTempFile("temporal", null, workingDir);
+ tempModelDir.delete();
+ tempModelDir.mkdir();
+ workingDir = tempModelDir;
+ }
+ EvaluationOfEventDocTimeRelDeepPhe evaluation = new EvaluationOfEventDocTimeRelDeepPhe(
+ workingDir,
+ options.getRawTextDirectory(),
+ options.getXMLDirectory(),
+ options.getXMLFormat(),
+ options.getSubcorpus(),
+ options.getXMIDirectory(),
+ options.getTreebankDirectory(),
+ options.getClosure(),
+ options.getPrintFormattedRelations(),
+ options.getUseGoldAttributes(),
+ params);
+ // evaluation.prepareXMIsFor(patientSets);
+ List<Integer> training = trainItems;
+ List<Integer> testing = testItems;
+
+ evaluation.logClassificationErrors(workingDir, "deepPhe-event-property-errors");
+
+ //do closure on system, but not on gold, to calculate recall
+ evaluation.skipTrain = options.getSkipTrain();
+ if(!evaluation.skipTrain){
+ evaluation.prepareXMIsFor(training);
+ }
+ evaluation.prepareXMIsFor(testing);
+
+ Map<String, AnnotationStatistics<String>> stats = null;
+
+ stats = evaluation.trainAndTest(trainItems, testItems);//training
+
+ String name = DOC_TIME_REL;
+ System.err.println("====================");
+ System.err.println(name);
+ System.err.println("--------------------");
+ System.err.println(stats.get(name));
+
+
+ if(options.getUseTmp()){
+ // won't work because it's not empty. should we be concerned with this or is it responsibility
of
+ // person invoking the tmp flag?
+ FileUtils.deleteRecursive(workingDir);
+ }
+ }catch(ResourceInitializationException e){
+ System.err.println("Error with Initialization");
+ e.printStackTrace();
+ }
+ }
+
+ private ParameterSettings params;
+ protected boolean useClosure;
+ protected boolean useGoldAttributes;
+ protected boolean skipTrain=false;
+ private Map<String, Logger> loggers = Maps.newHashMap();
+ // protected boolean printRelations = false;
+
+ public EvaluationOfEventDocTimeRelDeepPhe(
+ File baseDirectory,
+ File rawTextDirectory,
+ File xmlDirectory,
+ XMLFormat xmlFormat,
+ Subcorpus subcorpus,
+ File xmiDirectory,
+ File treebankDirectory,
+ boolean useClosure,
+ boolean printErrors,
+ boolean useGoldAttributes,
+ ParameterSettings params
+ ){
+ super(
+ baseDirectory,
+ rawTextDirectory,
+ xmlDirectory,
+ xmlFormat,
+ subcorpus,
+ xmiDirectory,
+ treebankDirectory);
+ this.useClosure = useClosure;
+ this.printErrors = printErrors;
+ this.params = params;
+ this.useGoldAttributes = useGoldAttributes;
+ this.loggers.put(DOC_TIME_REL, Logger.getLogger(String.format("%s.%s", this.getClass().getName(),
DOC_TIME_REL)));
+ }
+
+ @Override
+ protected void train(CollectionReader collectionReader, File directory) throws Exception
{
+ if(this.skipTrain) return;
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(KeepEventMentionsCoveredByGoldMentions.class));
+ aggregateBuilder.add(DocTimeRelAnnotator.createDataWriterDescription(
+// LibSvmStringOutcomeDataWriter.class,
+ LibLinearStringOutcomeDataWriter.class,
+ new File(directory, DOC_TIME_REL)));
+ SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+ String[] optArray;
+
+ if(this.kernelParams == null){
+ ArrayList<String> svmOptions = new ArrayList<>();
+ svmOptions.add("-c"); svmOptions.add(""+params.svmCost); // svm cost
+ svmOptions.add("-t"); svmOptions.add(""+params.svmKernelIndex); // kernel index
+ svmOptions.add("-d"); svmOptions.add("3"); // degree parameter for
polynomial
+ svmOptions.add("-g"); svmOptions.add(""+params.svmGamma);
+ if(params.svmKernelIndex==ParameterSettings.SVM_KERNELS.indexOf("tk")){
+ svmOptions.add("-S"); svmOptions.add(""+params.secondKernelIndex); // second kernel
index (similar to -t) for composite kernel
+ String comboFlag = (params.comboOperator == ComboOperator.SUM ? "+" : params.comboOperator
== ComboOperator.PRODUCT ? "*" : params.comboOperator == ComboOperator.TREE_ONLY ? "T" : "V");
+ svmOptions.add("-C"); svmOptions.add(comboFlag);
+ svmOptions.add("-L"); svmOptions.add(""+params.lambda);
+ svmOptions.add("-T"); svmOptions.add(""+params.tkWeight);
+ svmOptions.add("-N"); svmOptions.add("3"); // normalize trees and features
+ }
+ optArray = svmOptions.toArray(new String[]{});
+ }else{
+ optArray = this.kernelParams;
+ for(int i = 0; i < optArray.length; i+=2){
+ optArray[i] = "-" + optArray[i];
+ }
+ }
+
+ //calculate class-wise weights:
+ String[] weightArray=new String[2];
+ weightArray[0] = "-c";
+ weightArray[1] = optArray[1];
+ JarClassifierBuilder.trainAndPackage(new File(directory, DOC_TIME_REL),weightArray);
+ }
+
+ @SuppressWarnings("deprecation")
+ @Override
+ protected Map<String, AnnotationStatistics<String>> test(CollectionReader collectionReader,
File directory)
+ throws Exception {
+ this.useClosure=false;//don't do closure for test
+ AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+// aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class));
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ReplaceCTakesMentionsWithGoldMentions.class));
+
+// aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ClearEventProperties.class));
+
+ aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription(new File(directory,
DOC_TIME_REL)));
+
+ aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyHeadEventDocTimeRel2GoldEvent.class));
+
+ Function<EventMention, ?> eventMentionToSpan = AnnotationStatistics.annotationToSpan();
+ Map<String, Function<EventMention, String>> propertyGetters;
+ propertyGetters = new HashMap<>();
+ propertyGetters.put(DOC_TIME_REL, getPropertyGetter(DOC_TIME_REL));
+
+ Map<String, AnnotationStatistics<String>> statsMap = new HashMap<>();
+
+ statsMap.put(DOC_TIME_REL, new AnnotationStatistics<String>());
+
+ for (Iterator<JCas> casIter = new JCasIterator(collectionReader, aggregateBuilder.createAggregate());
casIter.hasNext();){
+ JCas jCas = casIter.next();
+ JCas goldView = jCas.getView(GOLD_VIEW_NAME);
+ JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ String text = goldView.getDocumentText();
+
+ List<EventMention> goldEvents = new ArrayList<>(JCasUtil.select(goldView,
EventMention.class));
+ List<EventMention> systemEvents = new ArrayList<>(JCasUtil.select(systemView,
EventMention.class));
+ String name = DOC_TIME_REL;
+ this.loggers.get(name).fine("Errors in : " + ViewUriUtil.getURI(jCas).toString());
+ Function<EventMention, String> getProperty = propertyGetters.get(name);
+ statsMap.get(name).add(
+ goldEvents,
+ systemEvents,
+ eventMentionToSpan,
+ getProperty);
+ for (int i = 0; i < goldEvents.size(); ++i) {
+ String goldOutcome = getProperty.apply(goldEvents.get(i));
+ String systemOutcome = getProperty.apply(systemEvents.get(i));
+ EventMention event = goldEvents.get(i);
+ int begin = event.getBegin();
+ int end = event.getEnd();
+ int windowBegin = Math.max(0, begin - 100);
+ int windowEnd = Math.min(text.length(), end + 100);
+ if (!goldOutcome.equals(systemOutcome)) {
+ this.loggers.get(name).fine(String.format(
+ "%s was %s but should be %s, in ...%s[!%s!:%d-%d]%s...",
+ name,
+ systemOutcome,
+ goldOutcome,
+ text.substring(windowBegin, begin).replaceAll("[\r\n]", " "),
+ text.substring(begin, end),
+ begin,
+ end,
+ text.substring(end, windowEnd).replaceAll("[\r\n]", " ")));
+ }else{//if gold outcome equals system outcome
+ this.loggers.get(name).fine(String.format(
+ "%s was correctly labeled as %s, in ...%s[!%s!:%d-%d]%s...",
+ name,
+ goldOutcome,
+ text.substring(windowBegin, begin).replaceAll("[\r\n]", " "),
+ text.substring(begin, end),
+ begin,
+ end,
+ text.substring(end, windowEnd).replaceAll("[\r\n]", " ")));
+ }
+ }
+
+
+ }
+ return statsMap;
+ }
+
+ public void logClassificationErrors(File outputDir, String outputFilePrefix) throws IOException
{
+ if (!outputDir.exists()) {
+ outputDir.mkdirs();
+ }
+ String name = DOC_TIME_REL;
+ Logger logger = this.loggers.get(name);
+ logger.setLevel(Level.FINE);
+ File outputFile = new File(outputDir, String.format("%s.%s.log", outputFilePrefix, name));
+ FileHandler handler = new FileHandler(outputFile.getPath());
+ handler.setFormatter(new Formatter() {
+ @Override
+ public String format(LogRecord record) {
+ return record.getMessage() + '\n';
+ }
+ });
+ logger.addHandler(handler);
+
+ }
+
+ private static Function<EventMention, String> getPropertyGetter(final String propertyName)
{
+ return new Function<EventMention, String>() {
+ @Override
+ public String apply(EventMention eventMention) {
+ EventProperties eventProperties = eventMention.getEvent().getProperties();
+ Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName);
+ return eventProperties.getFeatureValueAsString(feature);
+ }
+ };
+ }
+
+ public static class ClearEventProperties extends org.apache.uima.fit.component.JCasAnnotator_ImplBase
{
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) {
+ eventProperties.setAspect(null);
+ eventProperties.setCategory(null);
+ eventProperties.setContextualAspect(null);
+ eventProperties.setContextualModality(null);
+ eventProperties.setDegree(null);
+ eventProperties.setDocTimeRel(null);
+ eventProperties.setPermanence(null);
+ eventProperties.setPolarity(0);
+ }
+ }
+
+ }
+
+ /**
+ * Annotator that removes cTAKES Mentions and Modifiers from the system view,
+ * and copies over the manually annotated Mentions and Modifiers from the gold
+ * view.
+ */
+ public static class ReplaceCTakesMentionsWithGoldMentions extends JCasAnnotator_ImplBase
{
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas goldView, systemView;
+ try {
+ goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
+ systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // remove cTAKES Mentions and Modifiers from system view
+// List<IdentifiedAnnotation> cTakesMentions = new ArrayList<>();
+// cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
+// for (IdentifiedAnnotation cTakesMention : cTakesMentions) {
+// cTakesMention.removeFromIndexes();
+// }
+
+ // copy gold Mentions and Modifiers to the system view
+ List<EventMention> goldMentions = new ArrayList<>();
+ goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
+ CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+ for (EventMention goldMention : goldMentions) {
+ EventMention copy = (EventMention) copier.copyFs(goldMention);
+ Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+ copy.setFeatureValue(sofaFeature, systemView.getSofa());
+ copy.setDiscoveryTechnique(DISCOVERTY_TYPE);//mark copied events
+ copy.addToIndexes();
+ }
+ }
+ }
+
+ public static class KeepEventMentionsCoveredByGoldMentions extends JCasAnnotator_ImplBase
{
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas goldView, systemView;
+ try {
+ goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
+ systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ // copy gold events to the system view
+ List<EventMention> goldMentions = new ArrayList<>();
+ goldMentions.addAll(JCasUtil.select(goldView, EventMention.class));
+ CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+ for (EventMention goldMention : goldMentions) {
+ //find system Event that is covered by goldEvent:
+ boolean findCoveredSystemEvent = false;
+ for(EventMention sysEvent: JCasUtil.selectCovered(systemView, EventMention.class, goldMention.getBegin(),
goldMention.getEnd())){
+ String goldDocTimeRel = goldMention.getEvent().getProperties().getDocTimeRel();
+ sysEvent.setDiscoveryTechnique(DISCOVERTY_TYPE);//mark copied events
+ findCoveredSystemEvent = true;
+ if(sysEvent.getEvent()==null){
+ Event event = new Event(systemView);
+ EventProperties props = new EventProperties(systemView);
+ props.setDocTimeRel(goldDocTimeRel);
+ event.setProperties(props);
+ sysEvent.setEvent(event);
+ }else{
+ sysEvent.getEvent().getProperties().setDocTimeRel(goldDocTimeRel);
+ }
+ }
+
+ if( !findCoveredSystemEvent ){// if we didn't find covered system event for the given
gold event
+ EventMention copy = (EventMention) copier.copyFs(goldMention);
+ Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa");
+ copy.setFeatureValue(sofaFeature, systemView.getSofa());
+ copy.setDiscoveryTechnique(DISCOVERTY_TYPE);//mark copied events
+ copy.addToIndexes();
+ }
+ }
+
+ //remove non-gold events:
+ List<EventMention> cTakesMentions = new ArrayList<>();
+ cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
+ for (EventMention aEvent: cTakesMentions){
+ if( aEvent.getDiscoveryTechnique() != DISCOVERTY_TYPE){//if this is not an gold event
+ aEvent.removeFromIndexes();
+ }
+ }
+ }
+ }
+
+ /**
+ * copy covered event's DocTimeRel to the gold event
+ * remove non-gold eventMentions
+ */
+ public static class CopyHeadEventDocTimeRel2GoldEvent extends JCasAnnotator_ImplBase {
+
+ @Override
+ public void process(JCas jCas) throws AnalysisEngineProcessException {
+ JCas systemView;
+ try {
+ systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+ } catch (CASException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ //build an eventMention-eventMention covered map
+ Map<EventMention, Collection<EventMention>> coveredMap =
+ JCasUtil.indexCovered(jCas, EventMention.class, EventMention.class);
+
+ // copy covered event's DocTimeRel to the gold event
+ for (EventMention aEvent: JCasUtil.select(systemView, EventMention.class)){
+ if( aEvent.getDiscoveryTechnique()== DISCOVERTY_TYPE){//if this is an gold event
+ for(EventMention coveredEvent: coveredMap.get(aEvent)){
+ String covDocTimeRel = coveredEvent.getEvent().getProperties().getDocTimeRel();
+ aEvent.getEvent().getProperties().setDocTimeRel(covDocTimeRel);
+ break;
+ }
+ }
+ }
+
+
+ List<EventMention> cTakesMentions = new ArrayList<>();
+ cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class));
+ for (EventMention aEvent: cTakesMentions){
+ if( aEvent.getDiscoveryTechnique() != DISCOVERTY_TYPE){//if this is not an gold event
+ aEvent.removeFromIndexes();
+ }
+ }
+ }
+ }
+}
|