ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1444257 - in /incubator/ctakes/trunk: ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/treekernel/ ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/ ctakes-coreference/src/ma...
Date Fri, 08 Feb 2013 22:04:06 GMT
Author: tmill
Date: Fri Feb  8 22:04:06 2013
New Revision: 1444257

URL: http://svn.apache.org/r1444257
Log:
Addresses ctakes-153.  Makes tree structure mirror cleartk tree structure. Fixed downstream
code that uses it.

Modified:
    incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/treekernel/TreeExtractor.java
    incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/AnnotationTreeUtils.java
    incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
    incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnaphoricityAttributeCalculator.java
    incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
    incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AttributeCalculator.java
    incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/HobbsTreeNavigator.java

Modified: incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/treekernel/TreeExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/treekernel/TreeExtractor.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/treekernel/TreeExtractor.java
(original)
+++ incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/treekernel/TreeExtractor.java
Fri Feb  8 22:04:06 2013
@@ -179,7 +179,9 @@ public class TreeExtractor {
 
 	private static SimpleTree buildSimpleClonePET(TreebankNode lca, TreebankNode t1, TreebankNode
t2){
 		SimpleTree t = new SimpleTree(lca.getNodeType());
-		if(!(lca instanceof TerminalTreebankNode)){
+		if(lca instanceof TerminalTreebankNode){
+			t.addChild(new SimpleTree(lca.getNodeValue()));
+		}else{
 			for(int i = 0; i < lca.getChildren().size(); i++){
 				TreebankNode tn = lca.getChildren(i);
 				if(tn.getEnd() > t1.getBegin() && tn.getBegin() < t2.getEnd()){
@@ -200,7 +202,7 @@ public class TreeExtractor {
 		}
 		
 		TreebankNode lca = t2;
-		while(lca != null && lca.getBegin() > t1.getBegin()){
+		while(lca != null && (lca.getBegin() > t1.getBegin() || lca.getEnd() < t1.getEnd())){
 			lca = lca.getParent();
 		}
 		return lca;
@@ -287,7 +289,6 @@ public class TreeExtractor {
 
 	public static SimpleTree getSurroundingTree(TreebankNode node){
 		SimpleTree tree = null;
-		TreebankNode top = node;
 		while(node.getParent() != null){
 			node = node.getParent();
 		}
@@ -297,7 +298,9 @@ public class TreeExtractor {
 
 	public static SimpleTree getSimpleClone(TreebankNode node) {
 		SimpleTree t = new SimpleTree(node.getNodeType());
-		if(!(node instanceof TerminalTreebankNode)){
+		if(node instanceof TerminalTreebankNode){
+			t.addChild(new SimpleTree(node.getNodeValue()));
+		}else{
 			for(int i = 0; i < node.getChildren().size(); i++){
 				t.addChild(getSimpleClone(node.getChildren(i)));
 			}

Modified: incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/AnnotationTreeUtils.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/AnnotationTreeUtils.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/AnnotationTreeUtils.java
(original)
+++ incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/AnnotationTreeUtils.java
Fri Feb  8 22:04:06 2013
@@ -73,6 +73,7 @@ public class AnnotationTreeUtils {
 			}
 		}
 		copy.setNodeType(orig.getNodeType());
+		copy.setNodeValue(orig.getNodeValue());
 		copy.setBegin(orig.getBegin());
 		copy.setEnd(orig.getEnd());
 		return copy;
@@ -144,7 +145,8 @@ public class AnnotationTreeUtils {
 		do{
 			lastTree = tree;
 			// only continue downward traversal if we are not at a POS node...
-			if(tree.getChildren().size() > 1 || tree.getChildren(0).getChildren() != null){
+//			if(tree.getChildren().size() > 1 || tree.getChildren(0).getChildren() != null){
+			if(tree.getLeaf()){
 				for(int i = 0; i < tree.getChildren().size(); i++){
 					TreebankNode child = tree.getChildren(i);
 					if(child.getBegin() <= arg1.getBegin() && child.getEnd() >= arg1.getEnd()){
@@ -162,13 +164,18 @@ public class AnnotationTreeUtils {
 			}
 			// matches a node in tree, just insert one above it
 			newTree = new TreebankNode(jcas, tree.getBegin(), tree.getEnd());
-			newTree.setNodeType(tree.getNodeType());
-			newTree.setChildren(tree.getChildren());
-			newTree.setParent(tree);
-			tree.setNodeType(nodeType);
-			tree.setChildren(new FSArray(jcas, 1));
-			tree.setChildren(0,newTree);
-			newTree = tree;
+			newTree.setNodeType(nodeType);
+			newTree.setChildren(new FSArray(jcas, 1));
+			newTree.setChildren(0, tree);
+			newTree.setParent(tree.getParent());
+			TreeUtils.replaceChild(tree.getParent(), tree, newTree);
+//			newTree.setNodeType(tree.getNodeType());
+//			newTree.setChildren(tree.getChildren());
+//			newTree.setParent(tree);
+//			tree.setNodeType(nodeType);
+//			tree.setChildren(new FSArray(jcas, 1));
+//			tree.setChildren(0,newTree);
+//			newTree = tree;
 		}else{
 			// mismatch
 
@@ -208,13 +215,18 @@ public class AnnotationTreeUtils {
 			}else{
 				// just put above here...
 				newTree = new TreebankNode(jcas, tree.getBegin(), tree.getEnd());
-				newTree.setNodeType(tree.getNodeType());
-				newTree.setChildren(tree.getChildren());
-				newTree.setParent(tree);
-				tree.setNodeType(nodeType);
-				tree.setChildren(new FSArray(jcas, 1));
-				tree.setChildren(0,newTree);
-				newTree = tree;
+				newTree.setNodeType(nodeType);
+				newTree.setChildren(new FSArray(jcas, 1));
+				newTree.setChildren(0, tree);
+				newTree.setParent(tree.getParent());
+				TreeUtils.replaceChild(tree.getParent(), tree, newTree);
+//				newTree.setNodeType(tree.getNodeType());
+//				newTree.setChildren(tree.getChildren());
+//				newTree.setParent(tree);
+//				tree.setNodeType(nodeType);
+//				tree.setChildren(new FSArray(jcas, 1));
+//				tree.setChildren(0,newTree);
+//				newTree = tree;
 			}
 		}
 		return newTree;
@@ -232,7 +244,6 @@ public class AnnotationTreeUtils {
 				continue;
 			}else if(child.getBegin() > annot.getEnd()){
 				// child is to the right of annotation completely -- remove it and all to the right
-				// TODO
 				FSArray newChildren = new FSArray(jcas, i);
 				for(int j = 0; j < i; j++){
 					newChildren.set(j, node.getChildren(j));
@@ -244,6 +255,72 @@ public class AnnotationTreeUtils {
 			}
 		}
 	}
+
+	public static void removeLeftOfAnnotation(JCas jcas, TreebankNode node, Annotation annot)
{
+		if(node.getEnd() <= annot.getBegin() || node.getLeaf()) return;
+
+		// go through tree and create a list of children that are overalpping or to the right of
the concept node:
+		for(int i = 0; i < node.getChildren().size(); i++){
+			TreebankNode child = node.getChildren(i);
+			if(child.getEnd() < annot.getBegin()){
+				// ignore for now but this will be removed later
+				continue;
+			}else if(child.getEnd() > annot.getBegin()){
+				// if it has substructure to the left of the concept we have to recurse
+				if(child.getBegin() < annot.getBegin()){
+					removeLeftOfAnnotation(jcas, child, annot);
+				}
+				
+				if(i > 0){
+					// if we're leaving some out we need to rebuild the whole children array
+					// now create a child array of children partially or completely to the right
+					FSArray newChildren = new FSArray(jcas, node.getChildren().size()-i);
+					for(int j = i; j < node.getChildren().size(); j++){
+						newChildren.set(j-i, node.getChildren(j));
+					}
+					node.setChildren(newChildren);
+				}
+				break;
+			}
+		}
+	}
+
+	public static TreebankNode getCommonAncestor(TreebankNode node1,
+			TreebankNode node2) {
+		// check for easy cases:
+		// 1 - an argument is null
+		if(node1 == null || node2 == null){
+			return null;
+		}
+		
+		// 1 - one completely dominates the other...
+		if(dominates(node1, node2)){
+			return node1;
+		}else if(dominates(node2, node1)){
+			return node2;
+		}
+		
+		// they were entered in the wrong order...
+		TreebankNode temp;
+		if(node1.getBegin() > node2.getBegin()){
+			temp = node1;
+			node1 = node2;
+			node2 = temp;
+		}
+		
+		TreebankNode ancestor = node2;
+		
+		while(true){
+			if(ancestor == null || ancestor.getBegin() <= node1.getBegin()){
+				break;
+			}
+			ancestor = ancestor.getParent();
+		}
+		
+		return ancestor;
+	}
 	
-	
+	public static final boolean dominates(TreebankNode node1, TreebankNode node2){
+		return(node1.getBegin() <= node2.getBegin() && node1.getEnd() >= node2.getEnd());
+	}
 }

Modified: incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
(original)
+++ incubator/ctakes/trunk/ctakes-constituency-parser/src/main/java/org/apache/ctakes/constituency/parser/util/TreeUtils.java
Fri Feb  8 22:04:06 2013
@@ -206,20 +206,19 @@ public class TreeUtils {
 			if(w instanceof PunctuationToken){
 				String tokStr = w.getCoveredText();
 				if(tokStr.equals("(") || tokStr.equals("[")){
-					ttn.setNodeType("-LRB-");
+					ttn.setNodeValue("-LRB-");
 				}else if(tokStr.equals(")") || tokStr.equals("]")){
-					ttn.setNodeType("-RRB-");
+					ttn.setNodeValue("-RRB-");
 				}else if(tokStr.equals("{")){
-					ttn.setNodeType("-LCB-");
+					ttn.setNodeValue("-LCB-");
 				}else if(tokStr.equals("}")){
-					ttn.setNodeType("-RCB-");
+					ttn.setNodeValue("-RCB-");
 				}else{
-					ttn.setNodeType(w.getCoveredText());
+					ttn.setNodeValue(w.getCoveredText());
 				}
 			}else{
-				ttn.setNodeType(w.getCoveredText());
+				ttn.setNodeValue(w.getCoveredText());
 			}
-			ttn.setNodeValue(ttn.getNodeType());
 			ttn.addToIndexes();
 			terms.set(i, ttn);
 		}
@@ -233,7 +232,7 @@ public class TreeUtils {
 		
 		for(int i = 0; i < termArray.size(); i++){
 			TerminalTreebankNode ttn = (TerminalTreebankNode) termArray.get(i);
-			String word = ttn.getNodeType();
+			String word = ttn.getNodeValue();
 			word = word.replaceAll("\\s", "");
 			if(i == 0) offset = ttn.getBegin();
 			else if(word.length() == 0) continue;
@@ -253,7 +252,7 @@ public class TreeUtils {
 			typeParts = parse.getType().split("-");
 		}
 		parent.setNodeType(typeParts[0]);
-		parent.setNodeValue(typeParts[0]);
+		parent.setNodeValue(null);
 		parent.setLeaf(parse.getChildCount() == 0);
 		StringArray tags = new StringArray(jcas, typeParts.length-1);
 		for(int i = 1; i < typeParts.length; i++){
@@ -267,7 +266,13 @@ public class TreeUtils {
 		
 		for(int i = 0; i < subtrees.length; i++){
 			Parse subtree = subtrees[i];
-			if(subtree.getChildCount() > 0){
+			if(subtree.getChildCount() == 1 && subtree.getChildren()[0].getChildCount() ==
0){
+				// pre-terminal case - now we can set the type (POS tag) and point the parent in the
right direction
+				TerminalTreebankNode term = root.getTerminals(subtree.getHeadIndex());
+				term.setNodeType(subtree.getType());
+				children.set(i,term);
+				term.setParent(parent);				
+			}else{
 				try{
 					TreebankNode child = new TreebankNode(jcas);
 					child.setParent(parent);
@@ -277,10 +282,6 @@ public class TreeUtils {
 					System.err.println("MaxentParserWrapper Error: " + e);
 					throw new AnalysisEngineProcessException();
 				}
-			}else{
-				TerminalTreebankNode term = root.getTerminals(subtree.getHeadIndex());
-				children.set(i,term);
-				term.setParent(parent);
 			}
 		}
 		// after we've built up all the children we can fill in the span of the parent.
@@ -289,5 +290,14 @@ public class TreeUtils {
 		parent.setChildren(children);
 //		parent.addToIndexes();
 	}
+
+	public static void replaceChild(TreebankNode parent, TreebankNode oldTree,
+			TreebankNode newTree) {
+		for(int i = 0; i < parent.getChildren().size(); i++){
+			if(parent.getChildren(i) == oldTree){
+				parent.setChildren(i, newTree);
+			}
+		}
+	}
 }
 

Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnaphoricityAttributeCalculator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnaphoricityAttributeCalculator.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnaphoricityAttributeCalculator.java
(original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnaphoricityAttributeCalculator.java
Fri Feb  8 22:04:06 2013
@@ -53,8 +53,7 @@ public class AnaphoricityAttributeCalcul
 			TreebankNode node = MarkableTreeUtils.markableNode(jcas, m.getBegin(), m.getEnd());
 //			TerminalTreebankNode wordNode = (TerminalTreebankNode) node.getRoot().getTerminals().get(node.getHeadIndex());
 			TerminalTreebankNode wordNode = MarkableTreeUtils.getHead(node);
-			String pos = wordNode.getParent().getNodeType();
-			return pos;
+			return wordNode.getNodeType();
 		}catch(Exception e){
 
 			return null;

Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
(original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
Fri Feb  8 22:04:06 2013
@@ -117,8 +117,8 @@ public class AnnotationSelector {
 			HashSet<String> modalAdj, HashSet<String> cogved, HashSet<String> othervb)
{
 		if (!ttn.getCoveredText().equalsIgnoreCase("it")) return false;
 
-		if (ttn.getParent().getNodeType().equals("PRP")) {
-			TreebankNode tn = ttn.getParent().getParent();
+		if (ttn.getNodeType().equals("PRP")) {
+			TreebankNode tn = ttn.getParent();
 			while (tn.getNodeType().startsWith("NP"))
 				tn = tn.getParent();
 			if (tn.getNodeType().equals("S")) {
@@ -171,7 +171,7 @@ public class AnnotationSelector {
 	}
 
 	private static boolean isBe (TreebankNode n) {
-		String phCat = n.getNodeValue();
+		String phCat = n.getNodeType();
 		String txt = n.getCoveredText();
 		if ((phCat.equals("VB") ||
 				phCat.equals("VBZ") ||

Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AttributeCalculator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AttributeCalculator.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AttributeCalculator.java
(original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AttributeCalculator.java
Fri Feb  8 22:04:06 2013
@@ -23,14 +23,14 @@ import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.LinkedList;
 
+import org.apache.ctakes.coreference.type.Markable;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 //import org.apache.ctakes.typesystem.type.NamedEntity;
-import org.apache.ctakes.coreference.type.Markable;
 
 public class AttributeCalculator {
 
@@ -90,15 +90,14 @@ public class AttributeCalculator {
 		TreebankNode node = MarkableTreeUtils.markableNode(jcas, m.getBegin(), m.getEnd());
 		if(node == null) return basicNumber(m);
 		try{
-			TreebankNode wordNode = MarkableTreeUtils.getHead(node);
-			TreebankNode posNode = wordNode.getParent();
-			String pos = posNode.getNodeType();
+			TerminalTreebankNode termNode = MarkableTreeUtils.getHead(node);
+			String pos = termNode.getNodeType();
 			if(pos.equals("NN") || pos.equals("NNP")) return "S";
 			else if(pos.equals("NNS") || pos.equals("NNPS")) return "P";
 			else{
 				// obviously there are many other pronouns but we don't cover personal pronouns and so

 				// these are all we need.
-				String word = wordNode.getCoveredText();
+				String word = termNode.getCoveredText();
 				if(word.equalsIgnoreCase("it")) return "S";
 				else if(word.equalsIgnoreCase("its")) return "S";
 				else if(word.equalsIgnoreCase("they")) return "P";

Modified: incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/HobbsTreeNavigator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/HobbsTreeNavigator.java?rev=1444257&r1=1444256&r2=1444257&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/HobbsTreeNavigator.java
(original)
+++ incubator/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/HobbsTreeNavigator.java
Fri Feb  8 22:04:06 2013
@@ -56,9 +56,12 @@ public class HobbsTreeNavigator {
 			for(int i = 0; i < cur.getChildren().size(); i++){
 				TreebankNode n = cur.getChildren(i);
 				if(n == node) break;
-				if(!(n instanceof TerminalTreebankNode)){
-					q.add(cur.getChildren(i));
-				}
+				// BEFORE: did this because terminal node was word.  Now terminal is POS tag, which can
go here,
+				// shouldn't need special case to avoid adding terminals...
+//				if(!(n instanceof TerminalTreebankNode)){
+//					q.add(n);
+//				}
+				q.add(n);
 				if(path.contains(n)) break;
 			}
 		}



Mime
View raw message