ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1442975 - /incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/
Date Wed, 06 Feb 2013 14:15:47 GMT
Author: tmill
Date: Wed Feb  6 14:15:47 2013
New Revision: 1442975

URL: http://svn.apache.org/viewvc?rev=1442975&view=rev
Log:
Addresses ctakes-143. Adds interface method getSortedLookupTokens to allow/ensure optimal
handling of sorted underlying types in dictionary lookup.
Implemented changed versions of other classes implementing classes but did not test since
they are not used in ctakes as far as I can tell.
See discussion on ctakes-dev for further information (http://mail-archives.apache.org/mod_mbox/incubator-ctakes-dev/201302.mbox/%3C51102B9B.3000206%40childrens.harvard.edu%3E)


Modified:
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DirectLookupInitializerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java?rev=1442975&r1=1442974&r2=1442975&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java
Wed Feb  6 14:15:47 2013
@@ -115,10 +115,8 @@ public class DictionaryLookupAnnotator e
 				while (windowItr.hasNext()) {
 
 					Annotation window = (Annotation) windowItr.next();
-					List lookupTokensInWindow = constrainToWindow(
-							window,
-							lInit.getLookupTokenIterator(jcas));
-
+					List lookupTokensInWindow = lInit.getSortedLookupTokens(jcas, window);
+											
 					Map ctxMap = lInit.getContextMap(
 							jcas,
 							window.getBegin(),

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DirectLookupInitializerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DirectLookupInitializerImpl.java?rev=1442975&r1=1442974&r2=1442975&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DirectLookupInitializerImpl.java
(original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/DirectLookupInitializerImpl.java
Wed Feb  6 14:15:47 2013
@@ -29,6 +29,7 @@ import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 
 
 import org.apache.ctakes.dictionary.lookup.DictionaryEngine;
@@ -44,6 +45,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
 import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.uimafit.util.JCasUtil;
 
 /**
  * @author Mayo Clinic
@@ -115,4 +117,37 @@ public class DirectLookupInitializerImpl
 		// not used for direct pass through algorithm, return empty map
 		return new HashMap();
 	}
-}
\ No newline at end of file
+
+	@Override
+	public List getSortedLookupTokens(JCas jcas, Annotation annotation) throws AnnotatorInitializationException
{
+		List ltList = new ArrayList();
+
+		List<BaseToken> inList = JCasUtil.selectCovered(jcas, BaseToken.class, annotation);
+		
+		for(BaseToken bta : inList)
+		{
+			if (!((bta instanceof NewlineToken)
+					|| (bta instanceof PunctuationToken)
+					|| (bta instanceof ContractionToken)
+					|| (bta instanceof SymbolToken)))
+			{
+				LookupToken lt = new LookupAnnotationToJCasAdapter(bta);
+
+				if (bta instanceof WordToken)
+				{
+					WordToken wta = (WordToken) bta;
+					String canonicalForm = wta.getCanonicalForm();
+					if (canonicalForm != null)
+					{
+						lt.addStringAttribute(
+								CANONICAL_VARIANT_ATTR,
+								canonicalForm);
+					}
+				}
+
+				ltList.add(lt);
+			}
+		}
+		return ltList;
+	}
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java?rev=1442975&r1=1442974&r2=1442975&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java
(original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java
Wed Feb  6 14:15:47 2013
@@ -251,4 +251,48 @@ public class FirstTokenPermLookupInitial
 		}
 		return list;
 	}
+
+	@Override
+	public List getSortedLookupTokens(JCas jcas, Annotation covering) {
+		List ltList = new ArrayList();
+
+		List<BaseToken> inputList = org.uimafit.util.JCasUtil.selectCovered(jcas, BaseToken.class,
covering);
+		for(BaseToken bta : inputList)
+		{
+			if (!((bta instanceof NewlineToken)
+					|| (bta instanceof PunctuationToken)
+					|| (bta instanceof ContractionToken)
+					|| (bta instanceof SymbolToken)))
+			{
+				LookupToken lt = new LookupAnnotationToJCasAdapter(bta);
+
+				// POS exclusion logic for first word lookup
+				if (isTagExcluded(bta.getPartOfSpeech()))
+				{
+					lt.addStringAttribute(
+							FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP,
+							"false");
+				}
+				else
+				{
+					lt.addStringAttribute(
+							FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP,
+							"true");
+				}
+
+				if (bta instanceof WordToken)
+				{
+					WordToken wta = (WordToken) bta;
+					String canonicalForm = wta.getCanonicalForm();
+					if (canonicalForm != null)
+					{
+						lt.addStringAttribute(CANONICAL_VARIANT_ATTR, canonicalForm);
+					}
+				}
+
+				ltList.add(lt);
+			}
+		}
+		return ltList;		
+	}
 }
\ No newline at end of file

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java?rev=1442975&r1=1442974&r2=1442975&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java
(original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java
Wed Feb  6 14:15:47 2013
@@ -19,12 +19,14 @@
 package org.apache.ctakes.dictionary.lookup.ae;
 
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.dictionary.lookup.DictionaryEngine;
 import org.apache.ctakes.dictionary.lookup.algorithms.LookupAlgorithm;
 import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 
 
 /**
@@ -68,6 +70,17 @@ public interface LookupInitializer
             throws AnnotatorInitializationException;
 
     /**
+     * Gets a list of tokens that we promise to return in sorted order that are constrained
by the
+     * input annotation.  Puts the onus for sorting performance on the implementing methods
since a sort is
+     * implicitly required at some point.
+     * @param jcas
+     * @param annotation
+     * @return List over tokens that are in the window specified.
+     * @throws AnnotatorInitializationException
+     */
+    public List getSortedLookupTokens(JCas jcas, Annotation annotation) throws AnnotatorInitializationException;
+    
+    /**
      * Gets the LookupAlgorithm to be used to perform the lookup operations.
      * Properties specified from the descriptor will be passed in to customize
      * the behavior of the algorithm.



Mime
View raw message