ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mattco...@apache.org
Subject svn commit: r1425059 - in /incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: eval/ medfacts/cleartk/ medfacts/cleartk/extractors/
Date Fri, 21 Dec 2012 17:56:03 GMT
Author: mattcoarr
Date: Fri Dec 21 17:56:03 2012
New Revision: 1425059

URL: http://svn.apache.org/viewvc?rev=1425059&view=rev
Log:
added latest work for ctakes-assertion including adding "zone" features

Added:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionSampleFeatureGenerator.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/SurroundingExtractor.java
Modified:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java?rev=1425059&r1=1425058&r2=1425059&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
Fri Dec 21 17:56:03 2012
@@ -21,6 +21,7 @@ package org.apache.ctakes.assertion.eval
 import java.io.File;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -56,7 +57,9 @@ import org.cleartk.eval.Evaluation_ImplB
 import org.cleartk.util.Options_ImplBase;
 import org.kohsuke.args4j.Option;
 import org.kohsuke.args4j.spi.BooleanOptionHandler;
+import org.mitre.medfacts.uima.ZoneAnnotator;
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.AssertionComponents;
 import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
@@ -64,6 +67,7 @@ import org.apache.ctakes.assertion.medfa
 import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
 import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.xwriter.XWriter;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
@@ -115,6 +119,12 @@ public class AssertionEvalBasedOnModifie
     public File modelsDirectory;
     
     @Option(
+            name = "--evaluation-output-dir",
+            usage = "specify the directory where the evaluation output xmi files will go",
+            required = false)
+    public File evaluationOutputDirectory;
+        
+    @Option(
             name = "--run-polarity",
             usage = "specify whether polarity processing should be run (true or false). default:
true",
             required = false)
@@ -153,6 +163,8 @@ public class AssertionEvalBasedOnModifie
   private Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass;
 
   private Class<? extends DataWriterFactory<String>> dataWriterFactoryClass;
+  
+  private File evaluationOutputDirectory;
 
   
   protected static Options options = new Options();
@@ -169,6 +181,7 @@ public class AssertionEvalBasedOnModifie
     List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
     //File modelsDir = new File("models/modifier");
     File modelsDir = options.modelsDirectory;
+    File evaluationOutputDirectory = options.evaluationOutputDirectory;
 
     // determine the type of classifier to be trained
     Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = DefaultMaxentDataWriterFactory.class;
@@ -193,6 +206,7 @@ public class AssertionEvalBasedOnModifie
     
     AssertionEvalBasedOnModifier evaluation = new AssertionEvalBasedOnModifier(
         modelsDir,
+        evaluationOutputDirectory,
         annotationTypes,
         annotatorClass,
         dataWriterFactoryClass
@@ -299,13 +313,14 @@ public static void printScore(Map<String
   private String[] trainingArguments;
 
   public AssertionEvalBasedOnModifier(
-      File directory,
+      File modelDirectory,
+      File evaluationOutputDirectory,
       ArrayList<String> annotationTypes,
       Class<? extends AssertionCleartkAnalysisEngine> classifierAnnotatorClass,
       Class<? extends DataWriterFactory<String>> dataWriterFactoryClass,
       String... trainingArguments
       ) {
-    super(directory);
+    super(modelDirectory);
     
     this.annotationTypes = annotationTypes;
 
@@ -313,6 +328,7 @@ public static void printScore(Map<String
     this.dataWriterFactoryClass = dataWriterFactoryClass;
 
     this.trainingArguments = trainingArguments;
+    this.evaluationOutputDirectory = evaluationOutputDirectory;
   }
 
   @Override
@@ -352,6 +368,29 @@ public static void printScore(Map<String
     AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceAnnotationsSystemAssertionClearer.class);
     builder.add(assertionAttributeClearerAnnotator);
     
+    URI generalSectionRegexFileUri =
+        this.getClass().getClassLoader().getResource("org/mitre/medfacts/zoner/section_regex.xml").toURI();
+//      ExternalResourceDescription generalSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
+//          SectionRegexConfigurationResource.class, new File(generalSectionRegexFileUri));
+      AnalysisEngineDescription zonerAnnotator =
+          AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+              ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+              generalSectionRegexFileUri
+              );
+      builder.add(zonerAnnotator);
+
+      URI mayoSectionRegexFileUri =
+          this.getClass().getClassLoader().getResource("org/mitre/medfacts/zoner/mayo_sections.xml").toURI();
+//        ExternalResourceDescription mayoSectionRegexDescription = ExternalResourceFactory.createExternalResourceDescription(
+//            SectionRegexConfigurationResource.class, new File(mayoSectionRegexFileUri));
+      AnalysisEngineDescription mayoZonerAnnotator =
+          AnalysisEngineFactory.createPrimitiveDescription(ZoneAnnotator.class,
+              ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
+              mayoSectionRegexFileUri
+              );
+      builder.add(mayoZonerAnnotator);
+    
+    
     if (options.runPolarity)
     {
 	    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
@@ -533,6 +572,19 @@ public static void printScore(Map<String
 	    builder.add(genericAnnotator);
     }
     
+    if (evaluationOutputDirectory != null)
+    {
+        AnalysisEngineDescription xwriter =
+    		AnalysisEngineFactory.createPrimitiveDescription(
+	            XWriter.class,
+	            AssertionComponents.CTAKES_CTS_TYPE_SYSTEM_DESCRIPTION,
+	            XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+	            evaluationOutputDirectory,
+	            XWriter.PARAM_XML_SCHEME_NAME,
+	            XWriter.XMI);
+        builder.add(xwriter);
+    }
+    
     //SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
     AnalysisEngineDescription aggregateDescription = builder.createAggregateDescription();
     AnalysisEngine aggregate = builder.createAggregate();

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1425059&r1=1425058&r2=1425059&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Fri Dec 21 17:56:03 2012
@@ -32,12 +32,15 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 //import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
 import org.cleartk.classifier.CleartkAnnotator;
 import org.cleartk.classifier.CleartkAnnotatorDescriptionFactory;
 import org.cleartk.classifier.CleartkSequenceAnnotator;
+import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
 import org.cleartk.classifier.feature.extractor.ContextExtractor;
 import org.cleartk.classifier.feature.extractor.ContextExtractor.Covered;
 import org.cleartk.classifier.feature.extractor.ContextExtractor.Preceding;
@@ -59,6 +62,8 @@ import org.uimafit.factory.AnalysisEngin
 import org.uimafit.factory.ConfigurationParameterFactory;
 import org.uimafit.util.JCasUtil;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.SurroundingExtractor;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -96,6 +101,7 @@ public abstract class AssertionCleartkAn
   private List<ContextExtractor<IdentifiedAnnotation>> contextFeatureExtractors;
   private List<ContextExtractor<BaseToken>> tokenContextFeatureExtractors;
   private List<SimpleFeatureExtractor> entityFeatureExtractors;
+  private List<SimpleFeatureExtractor> surroundingFeatureExtractors;
   
   public void initialize(UimaContext context) throws ResourceInitializationException {
     super.initialize(context);
@@ -167,6 +173,10 @@ public abstract class AssertionCleartkAn
         */
         );
     tokenContextFeatureExtractors.add(extractor2);
+    
+    this.surroundingFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+    SimpleFeatureExtractor surround1 = new SurroundingExtractor();
+    this.surroundingFeatureExtractors.add(surround1);
 
   }
 
@@ -272,6 +282,17 @@ public abstract class AssertionCleartkAn
         instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
       }
       
+      for (SimpleFeatureExtractor extractor : this.surroundingFeatureExtractors)
+      {
+    	  instance.addAll(extractor.extract(identifiedAnnotationView,  entityMention));
+      }
+      
+      logger.log(Level.INFO,  String.format("[%s] expected: ''; actual: ''; features: %s",
+    		  this.getClass().getSimpleName(),
+    		  instance.toString()
+    		  //StringUtils.join(instance.getFeatures(), ", ")
+    		  ));
+      
       setClassLabel(entityMention, instance);
       
     }

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionSampleFeatureGenerator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionSampleFeatureGenerator.java?rev=1425059&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionSampleFeatureGenerator.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionSampleFeatureGenerator.java
Fri Dec 21 17:56:03 2012
@@ -0,0 +1,57 @@
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import java.util.Arrays;
+
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SpannedTextExtractor;
+import org.cleartk.classifier.feature.proliferate.CapitalTypeProliferator;
+import org.cleartk.classifier.feature.proliferate.CharacterNGramProliferator;
+import org.cleartk.classifier.feature.proliferate.LowerCaseProliferator;
+import org.cleartk.classifier.feature.proliferate.NumericTypeProliferator;
+import org.cleartk.classifier.feature.proliferate.ProliferatingExtractor;
+import org.cleartk.util.cr.XReader;
+import org.uimafit.factory.CollectionReaderFactory;
+
+public class AssertionSampleFeatureGenerator
+{
+	public AssertionSampleFeatureGenerator()
+	{
+	}
+	
+	public void main(String args[]) throws ResourceInitializationException
+	{
+		AssertionSampleFeatureGenerator generator =
+			new AssertionSampleFeatureGenerator();
+		
+		generator.execute();
+	}
+
+	public void execute() throws ResourceInitializationException
+	{
+		String filename = "/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/clean_dirs/splits/official/train";
+		
+	    CollectionReader reader = CollectionReaderFactory.createCollectionReader(
+		        XReader.class,
+		        XReader.PARAM_ROOT_FILE,
+		        filename,
+		        XReader.PARAM_XML_SCHEME,
+		        XReader.XMI);
+	    
+		    
+		
+//	    entityFeatureExtractors = Arrays.asList(
+//	            new CoveredTextExtractor(),
+//	            //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
+//	            new ProliferatingExtractor(
+//	                new SpannedTextExtractor(),
+//	                new LowerCaseProliferator(),    
+//	                new CapitalTypeProliferator(),
+//	                new NumericTypeProliferator(),
+//	                new CharacterNGramProliferator(fromRight, 0, 2),
+//	                new CharacterNGramProliferator(fromRight, 0, 3)));
+		
+	}
+
+}

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/SurroundingExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/SurroundingExtractor.java?rev=1425059&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/SurroundingExtractor.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/SurroundingExtractor.java
Fri Dec 21 17:56:03 2012
@@ -0,0 +1,64 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.assertion.zoner.types.Zone;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class SurroundingExtractor implements SimpleFeatureExtractor
+{
+	protected static final Logger logger = Logger.getLogger(SurroundingExtractor.class);
+	
+//	Class<? extends Annotation> ancestorAnnotationClass;
+//	
+//	public SurroundingExtractor(Class<? extends Annotation> ancestorAnnotationClass)
+//	{
+//		this.ancestorAnnotationClass = ancestorAnnotationClass;
+//	}
+
+	@Override
+	public List<Feature> extract(JCas jcas, Annotation targetAnnotation)
+			throws CleartkExtractorException
+	{
+		logger.info("SurroundingExtractor.extract() BEGIN");
+		//JCasUtil.indexCovering(jcas, IdentifiedAnnotation.class, this.ancestorAnnotationClass)
+		
+		Map<EntityMention, Collection<Zone>> coveringMap =
+			JCasUtil.indexCovering(jcas, EntityMention.class, Zone.class);
+		
+		IdentifiedAnnotation targetEntityAnnotation = (IdentifiedAnnotation)targetAnnotation;
+		
+		Collection<Zone> zoneList = coveringMap.get(targetEntityAnnotation);
+		
+		if (zoneList == null || zoneList.isEmpty())
+		{
+			//return null;
+			logger.info("SurroundingExtractor.extract() early END (no zones)");
+			new ArrayList<Feature>();
+		}
+		
+		ArrayList<Feature> featureList = new ArrayList<Feature>();
+		for (Zone zone : zoneList)
+		{
+			Feature currentFeature = new Feature("zone", zone.getLabel());
+			logger.info(String.format("zone: %s", zone.getLabel()));
+			logger.info(String.format("zone feature: %s", currentFeature.toString()));
+			featureList.add(currentFeature);
+		}
+		
+		logger.debug("SurroundingExtractor.extract() END");
+		return featureList;
+	}
+
+}



Mime
View raw message