ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1396711 [4/4] - in /incubator/ctakes/trunk: ctakes-chunker/src/test/ ctakes-chunker/src/test/data/ ctakes-chunker/src/test/data/output/ ctakes-chunker/src/test/data/text-files/ ctakes-chunker/src/test/desc/ ctakes-chunker/src/test/java/ ct...
Date Wed, 10 Oct 2012 17:59:45 GMT
Modified: incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/POS_taggerTests.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/POS_taggerTests.launch?rev=1396711&r1=1396710&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/POS_taggerTests.launch (original)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/POS_taggerTests.launch Wed Oct 10 17:59:40 2012
@@ -1,17 +1,19 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<launchConfiguration type="org.eclipse.jdt.junit.launchconfig">
-<stringAttribute key="bad_container_name" value="\ctakes-pos-tagger\resources\launc"/>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-<listEntry value="/ctakes-pos-tagger"/>
-</listAttribute>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-<listEntry value="4"/>
-</listAttribute>
-<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
-<stringAttribute key="org.eclipse.jdt.junit.CONTAINER" value="=ctakes-pos-tagger"/>
-<booleanAttribute key="org.eclipse.jdt.junit.KEEPRUNNING_ATTR" value="false"/>
-<stringAttribute key="org.eclipse.jdt.junit.TESTNAME" value=""/>
-<stringAttribute key="org.eclipse.jdt.junit.TEST_KIND" value="org.eclipse.jdt.junit.loader.junit4"/>
-<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value=""/>
-<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
-</launchConfiguration>
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.junit.launchconfig">
+<stringAttribute key="bad_container_name" value="\ctakes-pos-tagger\resources\launc"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-pos-tagger"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="4"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<stringAttribute key="org.eclipse.jdt.junit.CONTAINER" value="=ctakes-pos-tagger"/>
+<booleanAttribute key="org.eclipse.jdt.junit.KEEPRUNNING_ATTR" value="false"/>
+<stringAttribute key="org.eclipse.jdt.junit.TESTNAME" value=""/>
+<stringAttribute key="org.eclipse.jdt.junit.TEST_KIND" value="org.eclipse.jdt.junit.loader.junit4"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value=""/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>

Modified: incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator--Sample.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator--Sample.launch?rev=1396711&r1=1396710&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator--Sample.launch (original)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator--Sample.launch Wed Oct 10 17:59:40 2012
@@ -1,14 +1,16 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
-<stringAttribute key="bad_container_name" value="\ctakes-pos-tagger\resources\launc"/>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-<listEntry value="/ctakes-pos-tagger/src/java/edu/mayo/bmi/uima/pos_tagger/TagDictionaryCreator.java"/>
-</listAttribute>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-<listEntry value="1"/>
-</listAttribute>
-<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
-<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.postagger.TagDictionaryCreator"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="data\pos\training\sample\sample-pos-training.txt   &#13;&#10;data\pos\training\sample\sample-tagdict.txt &#13;&#10;true"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
-</launchConfiguration>
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<stringAttribute key="bad_container_name" value="\ctakes-pos-tagger\resources\launc"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-pos-tagger/src/java/edu/mayo/bmi/uima/pos_tagger/TagDictionaryCreator.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.postagger.TagDictionaryCreator"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="data\pos\training\sample\sample-pos-training.txt   &#13;&#10;data\pos\training\sample\sample-tagdict.txt &#13;&#10;true"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>

Modified: incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator.launch?rev=1396711&r1=1396710&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator.launch (original)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/PosTagDictionaryCreator.launch Wed Oct 10 17:59:40 2012
@@ -8,7 +8,9 @@
 <listEntry value="1"/>
 </listAttribute>
 <booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.postagger.TagDictionaryCreator"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="data\pos\training\ptb\ptb-pos-training.txt   &#13;&#10;data\pos\training\ptb\ptb-tagdict.txt &#13;&#10;true"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
 </launchConfiguration>

Modified: incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CPE_GUI--POS_tagger.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CPE_GUI--POS_tagger.launch?rev=1396711&r1=1396710&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CPE_GUI--POS_tagger.launch (original)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CPE_GUI--POS_tagger.launch Wed Oct 10 17:59:40 2012
@@ -1,13 +1,15 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-<listEntry value="/ctakes-pos-tagger"/>
-</listAttribute>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-<listEntry value="4"/>
-</listAttribute>
-<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
-<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.cpm.CpmFrame"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
-<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx500M"/>
-</launchConfiguration>
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-pos-tagger"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="4"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.cpm.CpmFrame"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="-Xms500M -Xmx500M"/>
+</launchConfiguration>

Modified: incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CVD--POS_tagger.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CVD--POS_tagger.launch?rev=1396711&r1=1396710&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CVD--POS_tagger.launch (original)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/resources/launch/UIMA_CVD--POS_tagger.launch Wed Oct 10 17:59:40 2012
@@ -1,16 +1,18 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
-<stringAttribute key="bad_container_name" value="\ctakes-pos-tagger\resources\launch"/>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
-<listEntry value="/ctakes-pos-tagger"/>
-</listAttribute>
-<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
-<listEntry value="4"/>
-</listAttribute>
-<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
-<stringAttribute key="org.eclipse.debug.ui.target_debug_perspective" value="perspective_default"/>
-<stringAttribute key="org.eclipse.debug.ui.target_run_perspective" value="perspective_default"/>
-<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.annot_view.Gladis"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
-<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="&quot;-Djava.util.logging.config.file=${env_var:UIMA_HOME}/Logger.properties&quot;  -Xms500M -Xmx500M"/>
-</launchConfiguration>
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<stringAttribute key="bad_container_name" value="\ctakes-pos-tagger\resources\launch"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-pos-tagger"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="4"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<stringAttribute key="org.eclipse.debug.ui.target_debug_perspective" value="perspective_default"/>
+<stringAttribute key="org.eclipse.debug.ui.target_run_perspective" value="perspective_default"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.uima.tools.annot_view.Gladis"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-pos-tagger"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_ARGUMENTS" value="&quot;-Djava.util.logging.config.file=${env_var:UIMA_HOME}/Logger.properties&quot;  -Xms500M -Xmx500M"/>
+</launchConfiguration>

Added: incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/CpeTests.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/CpeTests.java?rev=1396711&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/CpeTests.java (added)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/CpeTests.java Wed Oct 10 17:59:40 2012
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.postagger.test;
+
+
+// from  org.apache.uima.examples.cpe.CpeTests.java in the %UIMA_HOME%\examples\src
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.List;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.collection.CollectionProcessingEngine;
+import org.apache.uima.collection.EntityProcessStatus;
+import org.apache.uima.collection.StatusCallbackListener;
+import org.apache.uima.collection.metadata.CpeDescription;
+import org.apache.uima.util.XMLInputSource;
+
+/**
+ * Main Class that runs a Collection Processing Engine (CPE). This class reads a CPE Descriptor as a
+ * command-line argument and instantiates the CPE. It also registers a callback listener with the
+ * CPE, which will print progress and statistics to System.out.
+ * 
+ * 
+ */
+public class CpeTests extends Thread {
+  /**
+   * The CPE instance.
+   */
+  private CollectionProcessingEngine mCPE;
+
+  /**
+   * Start time of CPE initialization
+   */
+  private long mStartTime;
+  
+  /**
+   * Start time of the processing
+   */
+  private long mInitCompleteTime;
+
+  /**
+   * Constructor for the class.
+   * 
+   * @param args
+   *          command line arguments into the program - see class description
+   */
+  public CpeTests(String args[]) throws Exception {
+    mStartTime = System.currentTimeMillis();
+
+    // check command line args
+    if (args.length < 1) {
+      printUsageMessage();
+      System.exit(1);
+    }
+
+    // parse CPE descriptor
+    System.out.println("Parsing CPE Descriptor");
+    CpeDescription cpeDesc = UIMAFramework.getXMLParser().parseCpeDescription(
+            new XMLInputSource(args[0]));
+    // instantiate CPE
+    System.out.println("Instantiating CPE");
+    mCPE = UIMAFramework.produceCollectionProcessingEngine(cpeDesc);
+
+    // Create and register a Status Callback Listener
+    mCPE.addStatusCallbackListener(new StatusCallbackListenerImpl());
+
+    // Start Processing
+    System.out.println("Running CPE");
+    mCPE.process();
+
+    // Allow user to abort by pressing Enter
+    System.out.println("To abort processing, type \"abort\" and press enter.");
+    while (true) {
+      String line = new BufferedReader(new InputStreamReader(System.in)).readLine();
+      if ("abort".equals(line) && mCPE.isProcessing()) {
+        System.out.println("Aborting...");
+        mCPE.stop();
+        break;
+      }
+    }
+  }
+
+  /**
+   * 
+   */
+  private static void printUsageMessage() {
+    System.out.println(" Arguments to the program are as follows : \n"
+            + "args[0] : path to CPE descriptor file");
+  }
+
+  /**
+   * main class.
+   * 
+   * @param args
+   *          Command line arguments - see class description
+   */
+  public static void main(String[] args) throws Exception {
+    new CpeTests(args);
+  }
+
+  /**
+   * Callback Listener. Receives event notifications from CPE.
+   * 
+   * 
+   */
+  class StatusCallbackListenerImpl implements StatusCallbackListener {
+    int entityCount = 0;
+
+    long size = 0;
+
+    /**
+     * Called when the initialization is completed.
+     * 
+     * @see org.apache.uima.collection.processing.StatusCallbackListener#initializationComplete()
+     */
+    public void initializationComplete() {      
+      System.out.println("CPM Initialization Complete");
+      mInitCompleteTime = System.currentTimeMillis();
+    }
+
+    /**
+     * Called when the batchProcessing is completed.
+     * 
+     * @see org.apache.uima.collection.processing.StatusCallbackListener#batchProcessComplete()
+     * 
+     */
+    public void batchProcessComplete() {
+      System.out.print("Completed " + entityCount + " documents");
+      if (size > 0) {
+        System.out.print("; " + size + " characters");
+      }
+      System.out.println();
+      long elapsedTime = System.currentTimeMillis() - mStartTime;
+      System.out.println("Time Elapsed : " + elapsedTime + " ms ");
+    }
+
+    /**
+     * Called when the collection processing is completed.
+     * 
+     * @see org.apache.uima.collection.processing.StatusCallbackListener#collectionProcessComplete()
+     */
+    public void collectionProcessComplete() {
+      long time = System.currentTimeMillis();
+      System.out.print("Completed " + entityCount + " documents");
+      if (size > 0) {
+        System.out.print("; " + size + " characters");
+      }
+      System.out.println();
+      long initTime = mInitCompleteTime - mStartTime; 
+      long processingTime = time - mInitCompleteTime;
+      long elapsedTime = initTime + processingTime;
+      System.out.println("Total Time Elapsed: " + elapsedTime + " ms ");
+      System.out.println("Initialization Time: " + initTime + " ms");
+      System.out.println("Processing Time: " + processingTime + " ms");
+      
+      System.out.println("\n\n ------------------ PERFORMANCE REPORT ------------------\n");
+      System.out.println(mCPE.getPerformanceReport().toString());
+      // stop the JVM. Otherwise main thread will still be blocked waiting for
+      // user to press Enter.
+      System.exit(1);
+    }
+
+    /**
+     * Called when the CPM is paused.
+     * 
+     * @see org.apache.uima.collection.processing.StatusCallbackListener#paused()
+     */
+    public void paused() {
+      System.out.println("Paused");
+    }
+
+    /**
+     * Called when the CPM is resumed after a pause.
+     * 
+     * @see org.apache.uima.collection.processing.StatusCallbackListener#resumed()
+     */
+    public void resumed() {
+      System.out.println("Resumed");
+    }
+
+    /**
+     * Called when the CPM is stopped abruptly due to errors.
+     * 
+     * @see org.apache.uima.collection.processing.StatusCallbackListener#aborted()
+     */
+    public void aborted() {
+      System.out.println("Aborted");
+      // stop the JVM. Otherwise main thread will still be blocked waiting for
+      // user to press Enter.
+      System.exit(1);
+    }
+
+    /**
+     * Called when the processing of a Document is completed. <br>
+     * The process status can be looked at and corresponding actions taken.
+     * 
+     * @param aCas
+     *          CAS corresponding to the completed processing
+     * @param aStatus
+     *          EntityProcessStatus that holds the status of all the events for aEntity
+     */
+    public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
+      if (aStatus.isException()) {
+        List<?> exceptions = aStatus.getExceptions();
+        for (int i = 0; i < exceptions.size(); i++) {
+          ((Throwable) exceptions.get(i)).printStackTrace();
+        }
+        return;
+      }
+      entityCount++;
+      String docText = aCas.getDocumentText();
+      if (docText != null) {
+        size += docText.length();
+      }
+    }
+  }
+
+}

Added: incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/OpenNLPPOSCollectionReaderTests.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/OpenNLPPOSCollectionReaderTests.java?rev=1396711&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/OpenNLPPOSCollectionReaderTests.java (added)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/OpenNLPPOSCollectionReaderTests.java Wed Oct 10 17:59:40 2012
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.postagger.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.Test;
+
+import org.apache.ctakes.core.TestUtil;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+
+public class OpenNLPPOSCollectionReaderTests {
+
+	@Test
+	public void testReader() throws ResourceInitializationException, IOException, CollectionException {
+		CollectionReader collectionReader = TestUtil.getCR(new File("test/desc/OpenNLPPOSCollectionReader.xml"));
+		AnalysisEngine analysisEngine = TestUtil.getAE(new File("test/desc/NullAnnotator.xml"));
+		JCas jCas = analysisEngine.newJCas();
+		collectionReader.getNext(jCas.getCas());
+		
+		BaseToken baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals(0, baseToken.getBegin());
+		assertEquals(1, baseToken.getEnd());
+		assertEquals("A", baseToken.getCoveredText());
+		assertEquals("A", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals("farmer", baseToken.getCoveredText());
+		assertEquals("B", baseToken.getPartOfSpeech());
+		
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("went", baseToken.getCoveredText());
+		assertEquals("CC", baseToken.getPartOfSpeech());
+		
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("trotting", baseToken.getCoveredText());
+		assertEquals("DDD", baseToken.getPartOfSpeech());
+		
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
+		assertEquals("upon_A", baseToken.getCoveredText());
+		assertEquals("E", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
+		assertEquals("his", baseToken.getCoveredText());
+		assertEquals("EE", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
+		assertEquals(".", baseToken.getCoveredText());
+		assertEquals(".", baseToken.getPartOfSpeech());
+
+		CollectionException ce = null;
+		jCas = analysisEngine.newJCas();
+		try {
+			collectionReader.getNext(jCas.getCas());
+		} catch(CollectionException e) {
+			ce = e;
+		}
+		assertNotNull(ce);
+		
+		jCas = analysisEngine.newJCas();
+		collectionReader.getNext(jCas.getCas());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals("A_", baseToken.getCoveredText());
+		assertEquals("A", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals("_", baseToken.getCoveredText());
+		assertEquals("B", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("_B_", baseToken.getCoveredText());
+		assertEquals("C", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("B", baseToken.getCoveredText());
+		assertEquals("_", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
+		assertEquals("__", baseToken.getCoveredText());
+		assertEquals("_", baseToken.getPartOfSpeech());
+	}
+	
+	@Test
+	public void testLoadWordsOnly() throws ResourceInitializationException, IOException, CollectionException {
+		CollectionReader collectionReader = TestUtil.getCR(new File("test/desc/OpenNLPPOSCollectionReader2.xml"));
+		AnalysisEngine analysisEngine = TestUtil.getAE(new File("test/desc/NullAnnotator.xml"));
+		JCas jCas = analysisEngine.newJCas();
+		collectionReader.getNext(jCas.getCas());
+		
+		BaseToken baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals(0, baseToken.getBegin());
+		assertEquals(1, baseToken.getEnd());
+		assertEquals("A", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals("farmer", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+		
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("went", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+		
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("trotting", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+		
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
+		assertEquals("upon_A", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
+		assertEquals("his", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
+		assertEquals(".", baseToken.getCoveredText());
+		assertNull(baseToken.getPartOfSpeech());
+
+		CollectionException ce = null;
+		jCas = analysisEngine.newJCas();
+		try {
+			collectionReader.getNext(jCas.getCas());
+		} catch(CollectionException e) {
+			ce = e;
+		}
+		assertNotNull(ce);
+		
+	}
+
+}

Added: incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/POSTaggerTests.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/POSTaggerTests.java?rev=1396711&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/POSTaggerTests.java (added)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/POSTaggerTests.java Wed Oct 10 17:59:40 2012
@@ -0,0 +1,221 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.postagger.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.File;
+import java.util.Iterator;
+
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.Test;
+
+import org.apache.ctakes.core.TestUtil;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+
+public class POSTaggerTests {
+
+	@Test
+    public void testTagger() throws ResourceInitializationException {
+		
+		//this tests the tagging model with no tag dictionary
+		AnalysisEngine analysisEngine = TestUtil.getAE(new File("test/desc/POSTaggerAggregate.xml"));
+		JCas jCas = TestUtil.processAE(analysisEngine, "A farmer went trotting upon his gray mare, Bumpety, bumpety, bump, With his daughter behind him, so rosy and fair, Lumpety, lumpety, lump.");
+
+		BaseToken baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals(0, baseToken.getBegin());
+		assertEquals(1, baseToken.getEnd());
+		assertEquals("A", baseToken.getCoveredText());
+		assertEquals("DT", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals(2, baseToken.getBegin());
+		assertEquals(8, baseToken.getEnd());
+		assertEquals("farmer", baseToken.getCoveredText());
+		assertEquals("JJ", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("went", baseToken.getCoveredText());
+		assertEquals("JJ", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("trotting", baseToken.getCoveredText());
+		assertEquals("NN", baseToken.getPartOfSpeech());
+
+		Iterator<?> baseTokenItr = jCas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator();
+		while(baseTokenItr.hasNext()) {
+			baseToken = (BaseToken) baseTokenItr.next();
+			assertNotNull(baseToken.getPartOfSpeech());
+		}
+	}
+
+	/*
+	 * For aiding debug
+	 */
+	private void printPosTags(JCas jCas) {
+		BaseToken baseToken;
+		// Print the pos tag assigned to each token
+		for (int i=0; i < TestUtil.getFeatureStructureSize(jCas, BaseToken.class) ; i++) {
+			baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, i);
+			System.out.println(baseToken.getCoveredText() + " part of speech = " + baseToken.getPartOfSpeech());
+		}
+	}
+	/**
+	 * The first test uses a tag dictionary as case sensitive.  So, all the words should be constrained to have the tag "IN"<br>
+	 * The second test uses the same tag dictionary as case sensitive.  However, all the words in the input string have had their
+	 * case modified so that the tags should not be constrained to be "IN" - but instead simply be whatever the model chooses 
+	 * for them.<br>
+	 * The third test uses the same tag dictionary as case *insensitive* with the same input string used in the second test.  Because
+	 * the dictionary is case insensitive the tags should be constrained to be "IN".  
+	 * @throws ResourceInitializationException
+	 */
+	@Test
+    public void testTagDictionary() throws ResourceInitializationException {
+		
+		AnalysisEngine analysisEngine = TestUtil.getAE(new File("test/desc/POSTaggerAggregate2.xml"));
+		JCas jCas = TestUtil.processAE(analysisEngine, "Use of new biologic markers in the ovulation induction.");
+
+		//TEST1
+		BaseToken baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals("Use", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals("of", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("new", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("biologic", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
+		assertEquals("markers", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
+		assertEquals("in", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
+		assertEquals("the", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 7);
+		assertEquals("ovulation", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 8);
+		assertEquals("induction", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 9);
+		assertEquals(".", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		Iterator<?> baseTokenItr = jCas.getJFSIndexRepository().getAnnotationIndex(BaseToken.type).iterator();
+		while(baseTokenItr.hasNext()) {
+			baseToken = (BaseToken) baseTokenItr.next();
+			assertNotNull(baseToken.getPartOfSpeech());
+		}
+
+		//TEST2
+		analysisEngine = TestUtil.getAE(new File("test/desc/POSTaggerAggregate2.xml"));
+		jCas = TestUtil.processAE(analysisEngine, "use Of neW Biologic Markers IN The oVULation inductiOn.");
+
+		// printPosTags(jCas); // output all the tags so if one of the early ones fail, you still get to see the others
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals("use", baseToken.getCoveredText());
+		assertEquals("NN", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals("Of", baseToken.getCoveredText());
+		assertEquals("CD", baseToken.getPartOfSpeech()); // NN
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("neW", baseToken.getCoveredText());
+		assertEquals("JJ", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("Biologic", baseToken.getCoveredText());
+		assertEquals("NN", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
+		assertEquals("Markers", baseToken.getCoveredText());
+		assertEquals("NNS", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
+		assertEquals("IN", baseToken.getCoveredText());
+		assertEquals("VBP", baseToken.getPartOfSpeech()); // IN
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
+		assertEquals("The", baseToken.getCoveredText());
+		assertEquals("DT", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 7);
+		assertEquals("oVULation", baseToken.getCoveredText());
+		assertEquals("NN", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 8);
+		assertEquals("inductiOn", baseToken.getCoveredText());
+		assertEquals("NN", baseToken.getPartOfSpeech());
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 9);
+		assertEquals(".", baseToken.getCoveredText());
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+
+		
+		//TEST3
+		analysisEngine = TestUtil.getAE(new File("test/desc/POSTaggerAggregate3.xml"));
+		jCas = TestUtil.processAE(analysisEngine, "use Of neW Biologic Markers IN The oVULation inductiOn.");
+
+		// This assertion should really expect "IN" - but the POSDictionary class does not read in the 
+		// dictionary in a case insensitive way.  The word "Use" is the only word in the tag dictionary 
+		// that is not all lower case and so "use" effectively doesn't exist in the dictionary.  
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
+		assertEquals("NN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 7);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 8);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+		baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 9);
+		assertEquals("IN", baseToken.getPartOfSpeech());
+
+	}
+
+}

Added: incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/TagDictionaryCreatorTests.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/TagDictionaryCreatorTests.java?rev=1396711&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/TagDictionaryCreatorTests.java (added)
+++ incubator/ctakes/trunk/ctakes-pos-tagger/src/test/java/org/apache/ctakes/postagger/test/TagDictionaryCreatorTests.java Wed Oct 10 17:59:40 2012
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.postagger.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Set;
+
+import org.junit.Test;
+
+import org.apache.ctakes.postagger.TagDictionaryCreator;
+
+public class TagDictionaryCreatorTests {
+
+	@Test
+	public void testCreateTagDictionary() throws FileNotFoundException, IOException {
+		BufferedReader input = new BufferedReader(new FileReader("test/data/unit-test-2lines-training-data.txt"));
+		HashMap<String, Set<String>> tagDictionaryData = TagDictionaryCreator.createTagDictionary(input, true);
+		
+		Set<String> tags = tagDictionaryData.get("IL-2");
+		assertEquals(3, tags.size());
+		boolean nn = false;
+		boolean cc = false;
+		boolean in = false;
+		for(String tag : tags) {
+			if(tag.equals("NN"))
+				nn = true;
+			if(tag.equals("CC"))
+				cc = true;
+			if(tag.equals("IN"))
+				in = true;
+		}
+		assertTrue(nn);
+		assertTrue(cc);
+		assertTrue(in);
+
+		input = new BufferedReader(new FileReader("test/data/unit-test-2lines-training-data.txt"));
+		tagDictionaryData = TagDictionaryCreator.createTagDictionary(input, false);
+		
+		tags = tagDictionaryData.get("il-2");
+		assertEquals(3, tags.size());
+		nn = false;
+		cc = false;
+		in = false;
+		for(String tag : tags) {
+			if(tag.equals("NN"))
+				nn = true;
+			if(tag.equals("CC"))
+				cc = true;
+			if(tag.equals("IN"))
+				in = true;
+		}
+		assertTrue(nn);
+		assertTrue(cc);
+		assertTrue(in);
+
+		tags = tagDictionaryData.get("surface");
+		assertEquals("NN", tags.iterator().next());
+		
+	}
+	
+	
+	@Test
+	public void testMain() throws FileNotFoundException, IOException{
+		String[] args = new String[] { "test/data/unit-test-2lines-training-data.txt", "test/data/output/unit-test-2lines-tag-dictionary.txt", "true"};
+		TagDictionaryCreator.main(args);
+		BufferedReader input = new BufferedReader(new FileReader("test/data/output/unit-test-2lines-tag-dictionary.txt"));
+		String line;
+		int lines = 0;
+		while((line = input.readLine())!= null) {
+			lines++;
+			if(line.startsWith("IL-2 "))
+				assertEquals("IL-2 CC IN NN", line);
+			if(line.startsWith("requires "))
+				assertEquals("requires VBZ", line);
+		}
+		assertEquals(36, lines);
+	}
+	
+}

Added: incubator/ctakes/trunk/ctakes-preprocessor/src/test/java/preprocessor/ClinicalNotePreProcessorTest.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-preprocessor/src/test/java/preprocessor/ClinicalNotePreProcessorTest.java?rev=1396711&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-preprocessor/src/test/java/preprocessor/ClinicalNotePreProcessorTest.java (added)
+++ incubator/ctakes/trunk/ctakes-preprocessor/src/test/java/preprocessor/ClinicalNotePreProcessorTest.java Wed Oct 10 17:59:40 2012
@@ -0,0 +1,142 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.preprocessor;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.URLDecoder;
+import java.util.Map;
+
+import org.apache.ctakes.preprocessor.ClinicalNotePreProcessor;
+import org.apache.ctakes.preprocessor.DocumentMetaData;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for the ClinicalNotePreProcessor class.  These test the class
+ * against clinical note XML data and determine whether the preprocessor
+ * successfully parsed out the correct info.
+ * 
+ * @author Mayo Clinic
+ */
+public class ClinicalNotePreProcessorTest extends TestCase
+{
+    private ClinicalNotePreProcessor iv_cnotePreProcessor;
+    private String iv_cnoteXML;
+
+    /**
+     * Constructor for ClinicalNotePreProcessorTest.
+     * @param arg0
+     */
+    public ClinicalNotePreProcessorTest(String arg0)
+    {
+        super(arg0);
+    }
+
+    /*
+     * @see TestCase#setUp()
+     */
+    protected void setUp() throws Exception
+    {
+        super.setUp();
+
+        String dtdLocation = "resources/cda/NotesIIST_RTF.DTD";
+        File dtd = new File(dtdLocation);
+        iv_cnotePreProcessor = new ClinicalNotePreProcessor(dtd, false);
+
+		//String cnoteLocationOnCp = "/test/data/testpatient_cn_1.xml";
+		String cnoteLocationOnCp = "../../../../../data/testpatient_cn_1.xml";
+        String cnoteLocation =
+            URLDecoder.decode(getClass().getResource(cnoteLocationOnCp).getPath());
+        
+        if (cnoteLocation == null) {
+        	throw new FileNotFoundException("Unable to find: " + cnoteLocationOnCp);
+        }
+        iv_cnoteXML = load(cnoteLocation);
+    }
+
+    /*
+     * @see TestCase#tearDown()
+     */
+    protected void tearDown() throws Exception
+    {
+        super.tearDown();
+    }
+
+    /**
+     * Tests the process method.
+     */
+    public void testProcess()
+    {
+        try
+        {
+            DocumentMetaData dmd = iv_cnotePreProcessor.process(iv_cnoteXML);
+
+            // validate document properties
+            String docID = "000000000";
+            String serviceCode = "MNT";
+            Map docProperties = dmd.getMetaData();
+            String cnote_docID =
+                (String) docProperties.get(
+                    ClinicalNotePreProcessor.MD_KEY_DOC_ID);
+            String cnote_serviceCode =
+                (String) docProperties.get(
+                    ClinicalNotePreProcessor.MD_KEY_SERVICE_CODE);
+            assertEquals(docID, cnote_docID);
+            assertEquals(serviceCode, cnote_serviceCode);
+
+            // validate each section
+            // TODO Consider validating each section           
+        }
+        catch (Exception e)
+        {
+        	e.printStackTrace(System.err);
+            fail(e.getMessage());
+        }
+    }
+
+    /**
+     * Loads text from a file.
+     * @param filename
+     * @return
+     * @throws FileNotFoundException
+     * @throws IOException
+     */
+    private String load(String filename)
+        throws FileNotFoundException, IOException
+    {
+        String msg = "";
+        File f = new File(filename);
+        BufferedReader br = new BufferedReader(new FileReader(f));
+        String line = br.readLine();
+        while (line != null)
+        {
+            msg += line + "\n";
+            line = br.readLine();
+        }
+        br.close();
+
+        return msg;
+    }
+}

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/NamedEntityFeaturesExtractorTest.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/NamedEntityFeaturesExtractorTest.java?rev=1396711&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/NamedEntityFeaturesExtractorTest.java (added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/NamedEntityFeaturesExtractorTest.java Wed Oct 10 17:59:40 2012
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae;
+
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor;
+import org.cleartk.classifier.Feature;
+import org.junit.Test;
+import org.uimafit.factory.JCasFactory;
+
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+
+public class NamedEntityFeaturesExtractorTest {
+
+  @Test
+  public void test() throws Exception {
+    // create and populate a JCas with some EntityMention objects
+    JCas jCas = JCasFactory.createJCasFromPath("../ctakes-type-system/desc/common_type_system.xml");
+    jCas.setDocumentText("aaa bbb ccc ddd");
+    EntityMention e1 = new EntityMention(jCas, 0, 3);
+    e1.setTypeID(42);
+    e1.addToIndexes();
+    EntityMention e2 = new EntityMention(jCas, 8, 11);
+    e2.setTypeID(1);
+    e2.addToIndexes();
+    EntityMention between = new EntityMention(jCas, 4, 7);
+    between.addToIndexes();
+    
+    // run the feature extractor over the JCas
+    NamedEntityFeaturesExtractor extractor = new NamedEntityFeaturesExtractor();
+    List<Feature> features = extractor.extract(jCas, e1, e2);
+    
+    // make sure that the features that we expect are there
+    assertTrue(features.contains(new Feature("mention1_TypeID", "42")));
+    assertTrue(features.contains(new Feature("mention2_TypeID", "1")));
+    assertTrue(features.contains(new Feature("Distance_EntityMention", 1)));
+    assertTrue(features.contains(new Feature("type1type2", "42_1")));
+    assertTrue(features.contains(new Feature("mention1InMention2", false)));
+    assertTrue(features.contains(new Feature("mention2InMention1", false)));
+  }
+}

Propchange: incubator/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/NamedEntityFeaturesExtractorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/NamedEntityFeaturesExtractorTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Compare.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Compare.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Compare.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Compare.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 import java.io.*;
 
 /**

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Const.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Const.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Const.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/Const.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 
 import java.util.HashSet;
 import java.util.Properties;

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/ConvertAndCompare.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/ConvertAndCompare.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/ConvertAndCompare.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/ConvertAndCompare.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasAnnotation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasAnnotation.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasAnnotation.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasAnnotation.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 import java.util.Collection;
 import java.util.Vector;
 import java.util.Hashtable;

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasDiff.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasDiff.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasDiff.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasDiff.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 import java.util.LinkedList;
 import java.util.Collections;
 import java.io.Writer;

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasFile.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasFile.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasFile.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasFile.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 import java.util.Hashtable;
 import java.util.LinkedList;
 import java.util.Vector;

Modified: incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasProcessor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasProcessor.java?rev=1396711&r1=1395758&r2=1396711&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasProcessor.java (original)
+++ incubator/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/xcas_comparison/XcasProcessor.java Wed Oct 10 17:59:40 2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package edu.mayo.bmi.utils.xcas_comparison;
+package org.apache.ctakes.utils.xcas_comparison;
 import java.util.Hashtable;
 import java.util.HashMap;
 import java.util.LinkedList;



Mime
View raw message