From commits-return-4466-apmail-ctakes-commits-archive=ctakes.apache.org@ctakes.apache.org Wed Sep 28 22:39:32 2016 Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8A16419C6F for ; Wed, 28 Sep 2016 22:39:32 +0000 (UTC) Received: (qmail 61405 invoked by uid 500); 28 Sep 2016 22:39:32 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 61364 invoked by uid 500); 28 Sep 2016 22:39:32 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 61355 invoked by uid 99); 28 Sep 2016 22:39:32 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 28 Sep 2016 22:39:32 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id E0C5D1A0088 for ; Wed, 28 Sep 2016 22:39:31 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -1.199 X-Spam-Level: X-Spam-Status: No, score=-1.199 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-2.999] autolearn=disabled Received: from mx2-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id jUpdRB38cqTK for ; Wed, 28 Sep 2016 22:39:30 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx2-lw-eu.apache.org (ASF Mail Server at mx2-lw-eu.apache.org) with ESMTP id 9FE6F5FB5F for ; Wed, 28 Sep 2016 22:39:29 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 62841E03EE for ; Wed, 28 Sep 2016 22:39:27 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id CD2813A05B6 for ; Wed, 28 Sep 2016 22:39:27 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1762725 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core: cc/pretty/plaintext/ cc/pretty/row/ cc/property/plaintext/ util/ Date: Wed, 28 Sep 2016 22:39:26 -0000 To: commits@ctakes.apache.org From: seanfinan@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160928223927.CD2813A05B6@svn01-us-west.apache.org> Author: seanfinan Date: Wed Sep 28 22:39:26 2016 New Revision: 1762725 URL: http://svn.apache.org/viewvc?rev=1762725&view=rev Log: -just in case- catches in PrettyTextWriter for string size mismatches Print all sentences in PropertyTextWriter, including those w/o annotations Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java?rev=1762725&r1=1762724&r2=1762725&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java Wed Sep 28 22:39:26 2016 @@ -115,9 +115,7 @@ final public class PrettyTextWriter { // Create annotation rows with shorter spans on top final Collection itemRows = new ArrayList<>(); final ItemRow baseItemRow = new DefaultItemRow(); - for ( ItemCell itemCell : baseItemMap.values() ) { - baseItemRow.addItemCell( itemCell ); - } + baseItemMap.values().forEach( baseItemRow::addItemCell ); itemRows.add( baseItemRow ); itemRows.addAll( createItemRows( coveringItemMap ) ); // Create map of all text span offsets to adjusted offsets @@ -408,9 +406,12 @@ final public class PrettyTextWriter { } final Map> semanticCuis = new HashMap<>(); for ( UmlsConcept umlsConcept : umlsConcepts ) { - final String cui = umlsConcept.getCui(); + final String cui = trimTo8( umlsConcept.getCui() ); final String tui = umlsConcept.getTui(); - final String semanticName = SemanticGroup.getSemanticName( tui ); + String semanticName = SemanticGroup.getSemanticName( tui ); + if ( semanticName.equals( "Unknown" ) ) { + semanticName = trimTo8( identifiedAnnotation.getClass().getSimpleName() ); + } Collection cuis = semanticCuis.get( semanticName ); if ( cuis == null ) { cuis = new HashSet<>(); @@ -421,4 +422,12 @@ final public class PrettyTextWriter { return semanticCuis; } + + static private String trimTo8( final String text ) { + if ( text.length() <= 8 ) { + return text; + } + return "<" + text.substring( text.length() - 7, text.length() ); + } + } Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java?rev=1762725&r1=1762724&r2=1762725&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java Wed Sep 28 22:39:26 2016 @@ -76,7 +76,13 @@ public final class DefaultItemRow implem sb.replace( begin, begin + width, new String( fill_chars ) ); } else { final int paddedOffset = getPaddedOffset( annotationText, width ); - sb.replace( begin + paddedOffset, begin + paddedOffset + annotationText.length(), annotationText ); + final int b = (begin + paddedOffset < 0) ? begin : begin + paddedOffset; + final int e = Math.min( b + annotationText.length(), rowWidth - b ); + if ( e - b < annotationText.length() ) { + sb.replace( b, e, annotationText.substring( 0, e - b ) ); + } else { + sb.replace( b, e, annotationText ); + } } } return sb.toString(); Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java?rev=1762725&r1=1762724&r2=1762725&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java Wed Sep 28 22:39:26 2016 @@ -105,9 +105,6 @@ public class PropertyTextWriter { } final Collection identifiedAnnotations = JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, sentence ); - if ( identifiedAnnotations.isEmpty() ) { - return; - } writer.write( sentenceText ); writer.newLine(); for ( IdentifiedAnnotation annotation : identifiedAnnotations ) { Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java?rev=1762725&r1=1762724&r2=1762725&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/DocumentIDAnnotationUtil.java Wed Sep 28 22:39:26 2016 @@ -27,6 +27,8 @@ import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.JFSIndexRepository; import org.apache.uima.jcas.cas.TOP; +import java.util.regex.Pattern; + /** * Utility class for fetching document id */ @@ -37,13 +39,14 @@ final public class DocumentIDAnnotationU static private final Logger LOGGER = Logger.getLogger( "DocumentIDAnnotationUtil" ); + static private final Pattern FILE_FIX_PATTERN = Pattern.compile( "[^A-Za-z0-9\\.]" ); // Utility classes should be final and have only a private constructor private DocumentIDAnnotationUtil() { } /** - * Check the jcas for a document id. Unlike {@link #getDocumentID(org.apache.uima.jcas.JCas)}, + * Check the jcas for a document id. Unlike {@link #getDeepDocumentId(JCas)}, * this method does not progress into deeper jcas layers/views. * * @param jcas ye olde ... @@ -127,7 +130,7 @@ final public class DocumentIDAnnotationU if ( docId == null || docId.isEmpty() ) { docId = "Unknown_" + System.currentTimeMillis(); } - return docId.replaceAll( "[^A-Za-z0-9\\.]", "_" ); + return FILE_FIX_PATTERN.matcher( docId ).replaceAll( "_" ); } } Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java?rev=1762725&r1=1762724&r2=1762725&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java Wed Sep 28 22:39:26 2016 @@ -364,7 +364,7 @@ final public class OntologyConceptUtil { /** * @param jcas - - * @param lookupWindow + * @param lookupWindow - * @param cui cui of interest * @return all IdentifiedAnnotations that have the given cui */ @@ -376,7 +376,7 @@ final public class OntologyConceptUtil { /** * @param jcas - - * @param lookupWindow + * @param lookupWindow - * @param tui tui of interest * @return all IdentifiedAnnotations that have the given tui */ @@ -388,7 +388,7 @@ final public class OntologyConceptUtil { /** * @param jcas - - * @param lookupWindow + * @param lookupWindow - * @param code code of interest * @return all IdentifiedAnnotations that have the given code */