ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1801741 - in /ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae: ClearNLPDependencyParserAE.java shared/DependencySharedModel.java shared/LemmatizerSharedModel.java
Date Wed, 12 Jul 2017 14:52:22 GMT
Author: seanfinan
Date: Wed Jul 12 14:52:21 2017
New Revision: 1801741

URL: http://svn.apache.org/viewvc?rev=1801741&view=rev
Log:
backwards compatibility for dependency parser, minor refactoring

Modified:
    ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
    ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/DependencySharedModel.java
    ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/LemmatizerSharedModel.java

Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java?rev=1801741&r1=1801740&r2=1801741&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
(original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/ClearNLPDependencyParserAE.java
Wed Jul 12 14:52:21 2017
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,13 +18,13 @@
  */
 package org.apache.ctakes.dependency.parser.ae;
 
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
+import com.googlecode.clearnlp.component.AbstractComponent;
+import com.googlecode.clearnlp.dependency.DEPFeat;
+import com.googlecode.clearnlp.dependency.DEPNode;
+import com.googlecode.clearnlp.dependency.DEPTree;
+import com.googlecode.clearnlp.morphology.AbstractMPAnalyzer;
+import com.googlecode.clearnlp.reader.AbstractReader;
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
-import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.dependency.parser.ae.shared.DependencySharedModel;
 import org.apache.ctakes.dependency.parser.ae.shared.LemmatizerSharedModel;
 import org.apache.ctakes.dependency.parser.util.ClearDependencyUtility;
@@ -48,18 +48,12 @@ import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ExternalResourceDescription;
 import org.apache.uima.resource.ResourceInitializationException;
 
-import com.googlecode.clearnlp.component.AbstractComponent;
-import com.googlecode.clearnlp.dependency.DEPFeat;
-import com.googlecode.clearnlp.dependency.DEPNode;
-import com.googlecode.clearnlp.dependency.DEPTree;
-import com.googlecode.clearnlp.engine.EngineGetter;
-import com.googlecode.clearnlp.morphology.AbstractMPAnalyzer;
-import com.googlecode.clearnlp.nlp.NLPLib;
-import com.googlecode.clearnlp.reader.AbstractReader;
+import java.util.ArrayList;
+import java.util.List;
 
 /**
  * <br>
-* This class provides a UIMA wrapper for the CLEAR dependency parser. This parser is available
here:
+ * This class provides a UIMA wrapper for the CLEAR dependency parser. This parser is available
here:
  * <p>
  * http://code.google.com/p/clearnlp
  * <p>
@@ -69,17 +63,17 @@ import com.googlecode.clearnlp.reader.Ab
  * that the output of the CLEAR parser is different than that of the Malt parser and so these
two
  * parsers may not be interchangeable (without some effort) for most use cases.
  * <p>
- * 
- * 
+ *
+ *
  */
 @TypeCapability(
-		inputs = { 
-				"org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech",
-				"org.apache.ctakes.typesystem.type.syntax.BaseToken:normalizedForm",
-				"org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber",
-				"org.apache.ctakes.typesystem.type.syntax.BaseToken:end",
-				"org.apache.ctakes.typesystem.type.syntax.BaseToken:begin"
-		})
+      inputs = {
+            "org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech",
+            "org.apache.ctakes.typesystem.type.syntax.BaseToken:normalizedForm",
+            "org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber",
+            "org.apache.ctakes.typesystem.type.syntax.BaseToken:end",
+            "org.apache.ctakes.typesystem.type.syntax.BaseToken:begin"
+      } )
 @PipeBitInfo(
       name = "ClearNLP Dependency Parser",
       description = "Analyses Sentence Structure, storing information in nodes.",
@@ -89,104 +83,137 @@ import com.googlecode.clearnlp.reader.Ab
 )
 public class ClearNLPDependencyParserAE extends JCasAnnotator_ImplBase {
 
-  final String language = AbstractReader.LANG_EN;
-  public Logger logger = Logger.getLogger(getClass().getName());
+   final String language = AbstractReader.LANG_EN;
+   //  public Logger logger = Logger.getLogger(getClass().getName());
+   static private final Logger LOGGER = Logger.getLogger( ClearNLPDependencyParserAE.class.getSimpleName()
);
+
+   // single class-based model:
+   private static ExternalResourceDescription defaultParserResource = ExternalResourceFactory.createExternalResourceDescription(
+         DependencySharedModel.class,
+         DependencySharedModel.DEFAULT_MODEL_FILE_NAME );
+   private static ExternalResourceDescription defaultLemmatizerResource = ExternalResourceFactory.createExternalResourceDescription(
+         LemmatizerSharedModel.class,
+         LemmatizerSharedModel.ENG_LEMMATIZER_DATA_FILE );
+
+   // Configuration Parameters
+   @Deprecated
+   public static final String PARAM_PARSER_MODEL_FILE_NAME = "ParserModelFileName";
+   @Deprecated
+   @ConfigurationParameter(
+         name = PARAM_PARSER_MODEL_FILE_NAME,
+         description = "This parameter provides the file name of the dependency parser model
required " +
+               "by the factory method provided by ClearNLPUtil.  If not specified, this "
+
+               "analysis engine will use a default model from the resources directory",
+         defaultValue = DependencySharedModel.DEFAULT_MODEL_FILE_NAME )
+   protected String parserModelPath;
+
+   @Deprecated
+   static private final String PARAM_LEMMATIZER_DATA_FILE = "LemmatizerDataFile";
+   @Deprecated
+   @ConfigurationParameter(
+         name = PARAM_LEMMATIZER_DATA_FILE,
+         description = "This parameter provides the data file required for the MorphEnAnalyzer.
If not "
+               + "specified, this analysis engine will use a default model from the resources
directory",
+         defaultValue = LemmatizerSharedModel.ENG_LEMMATIZER_DATA_FILE )
+   private String lemmaDataPath;
+
+
+   static private final String PARAM_USE_LEMMATIZER = "UseLemmatizer";
+   @ConfigurationParameter(
+         name = PARAM_USE_LEMMATIZER,
+         defaultValue = "true",
+         description = "If true, use the default ClearNLP lemmatizer, otherwise use lemmas
from the BaseToken normalizedToken field" )
+   private boolean useLemmatizer;
+
+   public static final String DEP_MODEL_KEY = "DepModel";
+   @ExternalResource( key = DEP_MODEL_KEY, mandatory = false )
+   private DependencySharedModel parserModel = null;
+
+   public static final String LEM_MODEL_KEY = "LemmatizerModel";
+   @ExternalResource( key = LEM_MODEL_KEY, mandatory = false )
+   private LemmatizerSharedModel lemmatizerModel = null;
+
+   protected AbstractComponent parser = null;
+   protected AbstractMPAnalyzer lemmatizer = null;
+
+   @Override
+   public void initialize( UimaContext context ) throws ResourceInitializationException {
+      super.initialize( context );
+      LOGGER.info( "Initializing ClearNLP dependency parser, using lemmatizer: " + useLemmatizer
);
+
+      if ( useLemmatizer ) {
+         if ( lemmatizerModel == null ) {
+//            try {
+//          this.lemmatizer = EngineGetter.getMPAnalyzer(language, FileLocator.getAsStream(LemmatizerSharedModel.ENG_LEMMATIZER_DATA_FILE));
+//            } catch ( FileNotFoundException e ) {
+//               e.printStackTrace();
+//               throw new ResourceInitializationException( e );
+//            }
+            logDeprecation( PARAM_LEMMATIZER_DATA_FILE, LEM_MODEL_KEY );
+            this.lemmatizer = LemmatizerSharedModel.getAnalyzer( lemmaDataPath, LemmatizerSharedModel.DEFAULT_LANGUAGE
);
+         } else {
+            // Note: If lemmatizer data file is not specified, then use lemmas from the BaseToken
normalizedToken field.
+            // Initialize lemmatizer
+            this.lemmatizer = lemmatizerModel.getLemmatizerModel();
+         }
+      }
+      if ( this.parserModel == null ) {
+//      this.parser = DependencySharedModel.getDefaultModel();
+         logDeprecation( PARAM_PARSER_MODEL_FILE_NAME, DEP_MODEL_KEY );
+         this.parser = DependencySharedModel.getModel( parserModelPath, DependencySharedModel.DEFAULT_LANGUAGE
);
+      } else {
+         this.parser = parserModel.getParser();
+      }
+   }
 
-  // single class-based model:
-  private static ExternalResourceDescription defaultParserResource = ExternalResourceFactory.createExternalResourceDescription(
-      DependencySharedModel.class,
-      DependencySharedModel.DEFAULT_MODEL_FILE_NAME);
-  private static ExternalResourceDescription defaultLemmatizerResource = ExternalResourceFactory.createExternalResourceDescription(
-      LemmatizerSharedModel.class, 
-      LemmatizerSharedModel.ENG_LEMMATIZER_DATA_FILE);
-  
-	public static final String PARAM_USE_LEMMATIZER = "UseLemmatizer";
-	@ConfigurationParameter(
-			name = PARAM_USE_LEMMATIZER,
-			defaultValue = "true",
-			description = "If true, use the default ClearNLP lemmatizer, otherwise use lemmas from
the BaseToken normalizedToken field")
-	protected boolean useLemmatizer;
-
-  public static final String DEP_MODEL_KEY = "DepModel";
-  @ExternalResource(key = DEP_MODEL_KEY, mandatory=false)
-  private DependencySharedModel parserModel=null;
-  
-  public static final String LEM_MODEL_KEY = "LemmatizerModel";
-  @ExternalResource(key = LEM_MODEL_KEY, mandatory=false)
-  private LemmatizerSharedModel lemmatizerModel=null;
-  
-	protected AbstractComponent parser=null;
-	protected AbstractMPAnalyzer lemmatizer=null;
-
-	@Override
-	public void initialize(UimaContext context) throws ResourceInitializationException {
-		super.initialize(context);
-		logger.info("Initializing ClearNLP dependency parser with lemmatizer=" + useLemmatizer);
-
-    if (useLemmatizer) {
-      if(lemmatizerModel == null){
-        try {
-          this.lemmatizer = EngineGetter.getMPAnalyzer(language, FileLocator.getAsStream(LemmatizerSharedModel.ENG_LEMMATIZER_DATA_FILE));
-        } catch (FileNotFoundException e) {
-          e.printStackTrace();
-          throw new ResourceInitializationException(e);
-        }
-      }else{
-        // Note: If lemmatizer data file is not specified, then use lemmas from the BaseToken
normalizedToken field.
-        // Initialize lemmatizer
-        this.lemmatizer = lemmatizerModel.getLemmatizerModel();
+   @Override
+   public synchronized void process( JCas jCas ) throws AnalysisEngineProcessException {
+      LOGGER.info( "Dependency parser starting with thread:" + Thread.currentThread().getName()
);
+      for ( Sentence sentence : JCasUtil.select( jCas, Sentence.class ) ) {
+         List<BaseToken> printableTokens = new ArrayList<>();
+         for ( BaseToken token : JCasUtil.selectCovered( jCas, BaseToken.class, sentence
) ) {
+            if ( token instanceof NewlineToken ) continue;
+            printableTokens.add( token );
+         }
+
+         if ( printableTokens.isEmpty() ) {
+            // If there are no printable tokens then #convert fails
+            continue;
+         }
+         DEPTree tree = new DEPTree();
+
+         // Convert CAS data into structures usable by ClearNLP
+         for ( int i = 0; i < printableTokens.size(); i++ ) {
+            BaseToken token = printableTokens.get( i );
+            String lemma = useLemmatizer ? lemmatizer.getLemma( token.getCoveredText(), token.getPartOfSpeech()
) : token.getNormalizedForm();
+            DEPNode node = new DEPNode( i + 1, token.getCoveredText(), lemma, token.getPartOfSpeech(),
new DEPFeat() );
+            tree.add( node );
+         }
+
+         // Run parser and convert output back to CAS friendly data types
+         parser.process( tree );
+         ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas,
tree, sentence, printableTokens );
+         DependencyUtility.addToIndexes( jCas, nodes );
       }
-		}
-    if(this.parserModel == null){
-      this.parser = DependencySharedModel.getDefaultModel();
-    }else{
-      this.parser = parserModel.getParser();		    
-    }
-	}
-
-	@Override
-	public synchronized void process(JCas jCas) throws AnalysisEngineProcessException {
-	  logger.info("Dependency parser starting with thread:" + Thread.currentThread().getName());
-		for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
-			List<BaseToken> printableTokens = new ArrayList<>();
-			for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sentence)){
-			  if(token instanceof NewlineToken) continue;
-			  printableTokens.add(token);
-			}
-
-			if ( printableTokens.isEmpty() ) {
-				// If there are no printable tokens then #convert fails
-				continue;
-			}
-			DEPTree tree = new DEPTree();
-
-			// Convert CAS data into structures usable by ClearNLP
-			for (int i = 0; i < printableTokens.size(); i++) {
-				BaseToken token = printableTokens.get(i);
-				String lemma = useLemmatizer ? lemmatizer.getLemma(token.getCoveredText(), token.getPartOfSpeech())
: token.getNormalizedForm();
-				DEPNode node = new DEPNode(i+1, token.getCoveredText(), lemma, token.getPartOfSpeech(),
new DEPFeat());
-				tree.add(node);
-			}
-
-			// Run parser and convert output back to CAS friendly data types
-			parser.process(tree);
-			ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas, tree,
sentence, printableTokens );
-			DependencyUtility.addToIndexes( jCas, nodes );
-		}
-    logger.info("Dependency parser ending with thread:" + Thread.currentThread().getName());
-	}
-	
-	// If someone calls this, they want the default model, lazy initialization of the external
resources:
-	public static synchronized AnalysisEngineDescription createAnnotatorDescription() throws
ResourceInitializationException{
-	  return createAnnotatorDescription(defaultParserResource, defaultLemmatizerResource);
-	}
-	
-	public static AnalysisEngineDescription createAnnotatorDescription(ExternalResourceDescription
parserDesc, ExternalResourceDescription lemmaDesc) throws ResourceInitializationException{
-	  return AnalysisEngineFactory.createEngineDescription(
-	      ClearNLPDependencyParserAE.class, 
-	      DEP_MODEL_KEY, 
-	      parserDesc, 
-	      LEM_MODEL_KEY, 
-	      lemmaDesc);
-	}
+      LOGGER.info( "Dependency parser ending with thread:" + Thread.currentThread().getName()
);
+   }
+
+   static private void logDeprecation( final String parameterName, final String resourceName
) {
+      LOGGER.warn( "Use of configuration parameter " + parameterName
+            + " may be deprecated in the future in favor of external resource " + resourceName
);
+   }
+
+   // If someone calls this, they want the default model, lazy initialization of the external
resources:
+   public static synchronized AnalysisEngineDescription createAnnotatorDescription() throws
ResourceInitializationException {
+      return createAnnotatorDescription( defaultParserResource, defaultLemmatizerResource
);
+   }
+
+   public static AnalysisEngineDescription createAnnotatorDescription( ExternalResourceDescription
parserDesc, ExternalResourceDescription lemmaDesc ) throws ResourceInitializationException
{
+      return AnalysisEngineFactory.createEngineDescription(
+            ClearNLPDependencyParserAE.class,
+            DEP_MODEL_KEY,
+            parserDesc,
+            LEM_MODEL_KEY,
+            lemmaDesc );
+   }
 }

Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/DependencySharedModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/DependencySharedModel.java?rev=1801741&r1=1801740&r2=1801741&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/DependencySharedModel.java
(original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/DependencySharedModel.java
Wed Jul 12 14:52:21 2017
@@ -1,50 +1,66 @@
 package org.apache.ctakes.dependency.parser.ae.shared;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-
+import com.googlecode.clearnlp.component.AbstractComponent;
+import com.googlecode.clearnlp.engine.EngineGetter;
+import com.googlecode.clearnlp.nlp.NLPLib;
+import com.googlecode.clearnlp.reader.AbstractReader;
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.log4j.Logger;
 import org.apache.uima.resource.DataResource;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.SharedResourceObject;
 
-import com.googlecode.clearnlp.component.AbstractComponent;
-import com.googlecode.clearnlp.engine.EngineGetter;
-import com.googlecode.clearnlp.nlp.NLPLib;
-import com.googlecode.clearnlp.reader.AbstractReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
 
 public class DependencySharedModel implements SharedResourceObject {
 
-  private AbstractComponent parser;
-  public static final String DEFAULT_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/dependency/mayo-en-dep-1.3.0.jar";
-  final String language = AbstractReader.LANG_EN;
-  public Logger logger = Logger.getLogger(getClass().getName());
-
-  @Override
-  public void load(DataResource aData) throws ResourceInitializationException {
-    URI uri = aData.getUri();
-    try{
-      InputStream parserModel = (uri == null)
-          ? FileLocator.getAsStream(DEFAULT_MODEL_FILE_NAME)
-              : FileLocator.getAsStream(uri.getPath());
-
-          this.parser = EngineGetter.getComponent(parserModel, this.language, NLPLib.MODE_DEP);
-    }catch(IOException e){
-      throw new ResourceInitializationException(e);
-    }
-  }
-
-  public AbstractComponent getParser(){
-    return parser;
-  }
-  
-  public static AbstractComponent getDefaultModel() throws ResourceInitializationException
{
-    try{
-      return EngineGetter.getComponent(FileLocator.getAsStream(DEFAULT_MODEL_FILE_NAME),
AbstractReader.LANG_EN, NLPLib.MODE_DEP);
-    }catch(IOException e){
-      throw new ResourceInitializationException(e);
-    }
-  }
+   private AbstractComponent parser;
+   public static final String DEFAULT_MODEL_FILE_NAME = "org/apache/ctakes/dependency/parser/models/dependency/mayo-en-dep-1.3.0.jar";
+   static public final String DEFAULT_LANGUAGE = AbstractReader.LANG_EN;
+   // If this is final then why don't we just use a default such as above?  Future mutability?
+   final String language = DEFAULT_LANGUAGE;
+   public Logger logger = Logger.getLogger( getClass().getName() );
+
+   @Override
+   public void load( final DataResource aData ) throws ResourceInitializationException {
+      URI uri = aData.getUri();
+//    try{
+//      InputStream parserModel = (uri == null)
+//          ? FileLocator.getAsStream(DEFAULT_MODEL_FILE_NAME)
+//              : FileLocator.getAsStream(uri.getPath());
+//
+//          this.parser = EngineGetter.getComponent(parserModel, this.language, NLPLib.MODE_DEP);
+//    }catch(IOException e){
+//      throw new ResourceInitializationException(e);
+//    }
+      if ( uri != null ) {
+         this.parser = getModel( uri.getPath(), this.language );
+      } else {
+         this.parser = getDefaultModel();
+      }
+   }
+
+   public AbstractComponent getParser() {
+      return parser;
+   }
+
+   static public AbstractComponent getModel( final String modelPath, final String language
) throws ResourceInitializationException {
+      try {
+         final InputStream modelStream = FileLocator.getAsStream( modelPath );
+         return EngineGetter.getComponent( modelStream, language, NLPLib.MODE_DEP );
+      } catch ( IOException e ) {
+         throw new ResourceInitializationException( e );
+      }
+   }
+
+   public static AbstractComponent getDefaultModel() throws ResourceInitializationException
{
+//    try{
+//      return EngineGetter.getComponent(FileLocator.getAsStream(DEFAULT_MODEL_FILE_NAME),
AbstractReader.LANG_EN, NLPLib.MODE_DEP);
+//    }catch(IOException e){
+//      throw new ResourceInitializationException(e);
+//    }
+      return getModel( DEFAULT_MODEL_FILE_NAME, DEFAULT_LANGUAGE );
+   }
 }

Modified: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/LemmatizerSharedModel.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/LemmatizerSharedModel.java?rev=1801741&r1=1801740&r2=1801741&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/LemmatizerSharedModel.java
(original)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/ae/shared/LemmatizerSharedModel.java
Wed Jul 12 14:52:21 2017
@@ -1,43 +1,62 @@
 package org.apache.ctakes.dependency.parser.ae.shared;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-
+import com.googlecode.clearnlp.engine.EngineGetter;
+import com.googlecode.clearnlp.morphology.AbstractMPAnalyzer;
+import com.googlecode.clearnlp.reader.AbstractReader;
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.log4j.Logger;
 import org.apache.uima.resource.DataResource;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.SharedResourceObject;
 
-import com.googlecode.clearnlp.engine.EngineGetter;
-import com.googlecode.clearnlp.morphology.AbstractMPAnalyzer;
-import com.googlecode.clearnlp.reader.AbstractReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
 
 public class LemmatizerSharedModel implements SharedResourceObject {
 
-  public static final String ENG_LEMMATIZER_DATA_FILE = "org/apache/ctakes/dependency/parser/models/lemmatizer/dictionary-1.3.1.jar";
+   public static final String ENG_LEMMATIZER_DATA_FILE = "org/apache/ctakes/dependency/parser/models/lemmatizer/dictionary-1.3.1.jar";
 
-  private AbstractMPAnalyzer lemmatizer = null;
-  final String language = AbstractReader.LANG_EN;
-  public Logger logger = Logger.getLogger(getClass().getName());
-
-  @Override
-  public void load(DataResource aData) throws ResourceInitializationException {
-    URI modelUri = aData.getUri();
-    try{
-      InputStream lemmatizerModel = (modelUri == null)
-          ? FileLocator.getAsStream(ENG_LEMMATIZER_DATA_FILE)
-              : FileLocator.getAsStream(modelUri.getPath());
-          
-      this.lemmatizer = EngineGetter.getMPAnalyzer(language, lemmatizerModel);
-    }catch(IOException e){
-      throw new ResourceInitializationException(e);
-    }
-  }
-
-  public AbstractMPAnalyzer getLemmatizerModel(){
-    return this.lemmatizer;
-  }
+   private AbstractMPAnalyzer lemmatizer = null;
+   static public final String DEFAULT_LANGUAGE = AbstractReader.LANG_EN;
+   // If this is final then why don't we just use a default such as above?  Future mutability?
+   final String language = DEFAULT_LANGUAGE;
+   public Logger logger = Logger.getLogger( getClass().getName() );
+
+   @Override
+   public void load( final DataResource aData ) throws ResourceInitializationException {
+      URI modelUri = aData.getUri();
+//    try{
+//      InputStream lemmatizerModel = (modelUri == null)
+//          ? FileLocator.getAsStream(ENG_LEMMATIZER_DATA_FILE)
+//              : FileLocator.getAsStream(modelUri.getPath());
+//
+//      this.lemmatizer = EngineGetter.getMPAnalyzer(language, lemmatizerModel);
+//    }catch(IOException e){
+//      throw new ResourceInitializationException(e);
+//    }
+      if ( modelUri != null ) {
+         this.lemmatizer = getAnalyzer( modelUri.getPath(), this.language );
+      } else {
+         this.lemmatizer = getDefaultAnalyzer();
+      }
+   }
+
+   public AbstractMPAnalyzer getLemmatizerModel() {
+      return this.lemmatizer;
+   }
+
+   static public AbstractMPAnalyzer getAnalyzer( final String dataPath, final String language
) throws ResourceInitializationException {
+      try {
+         final InputStream dataStream = FileLocator.getAsStream( dataPath );
+         return EngineGetter.getMPAnalyzer( language, dataStream );
+      } catch ( IOException e ) {
+         throw new ResourceInitializationException( e );
+      }
+   }
+
+   static public AbstractMPAnalyzer getDefaultAnalyzer() throws ResourceInitializationException
{
+      return getAnalyzer( ENG_LEMMATIZER_DATA_FILE, DEFAULT_LANGUAGE );
+   }
 
 }



Mime
View raw message