ctakes-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Zakir Saifi <zakir.sa...@raxa.com>
Subject Re: Making Ctakes Faster after Changing default lookup span value [EXTERNAL]
Date Thu, 21 Feb 2019 06:54:00 GMT
Thanks Sean for early reply,

Here are the content of file you are looking for

*1. tinyDictSpec.xml*

============

<?xml version="1.0" encoding="UTF-8"?>

<lookupSpecification>
    <dictionaries>
                <dictionary>
                    <name>LabAnnotatorTestDict</name>

<implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.UmlsJdbcRareWordDictionary</implementationName>
                    <properties>
                       <property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
                        <property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;autoReconnect=true"/>
                      <property key="jdbcUser" value="root"/>
                      <property key="jdbcPass" value=""/>
                       <property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
                       <property key="umlsVendor" value="NLM-6515182895"/>
                       <property key="umlsUser" value=""/>
                       <property key="umlsPass" value=""/>
                       <property key="rareWordTable" value="rareword"/>
                    </properties>
                </dictionary>
    </dictionaries>

            <conceptFactories>
                <conceptFactory>
                    <name>LabAnnotatorTestConcepts</name>

<implementationName>org.apache.ctakes.dictionary.lookup2.concept.UmlsJdbcConceptFactory</implementationName>
                    <properties>
                        <property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
                          <property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;autoReconnect=true"/>
                      <property key="jdbcUser" value="root"/>
                      <property key="jdbcPass" value=""/>
                        <property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
                        <property key="umlsVendor" value="NLM-6515182895"/>
                        <property key="umlsUser" value=""/>
                        <property key="umlsPass" value=""/>
                        <property key="tuiTable" value="tui"/>
                    </properties>
                </conceptFactory>
            </conceptFactories>


            <dictionaryConceptPairs>
                <dictionaryConceptPair>
                    <name>LabAnnotatorPair</name>
                    <dictionaryName>LabAnnotatorTestDict</dictionaryName>

<conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
                </dictionaryConceptPair>
            </dictionaryConceptPairs>

            <rareWordConsumer>
                <name>Term Consumer</name>

<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
                <properties>
                    <property key="codingScheme" value="custom"/>
                </properties>
            </rareWordConsumer>

</lookupSpecification>

===========

*2.  drugConcept.xml*
<?xml version="1.0" encoding="UTF-8"?>

<lookupSpecification>
    <dictionaries>
                <dictionary>
                    <name>LabAnnotatorTestDict</name>

<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.dictionary.UmlsJdbcDrugTermsDictonary</implementationName>
                    <properties>
                      <property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
                       <property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;autoReconnect=true"/>
                      <property key="jdbcUser" value="root"/>
                      <property key="jdbcPass" value=""/>
                      <property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
                      <property key="umlsVendor" value="NLM-6515182895"/>
                      <property key="umlsUser" value=""/>
                      <property key="umlsPass" value=""/>
                      <property key="rareWordTable" value="drug"/>
                    </properties>
                </dictionary>
    </dictionaries>

            <conceptFactories>
                <conceptFactory>
                    <name>LabAnnotatorTestConcepts</name>

<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.concept.UmlsJdbcDrugNameConceptFactory
</implementationName>
                    <properties>
                       <property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
                       <property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;autoReconnect=true"/>
                      <property key="jdbcUser" value="root"/>
                      <property key="jdbcPass" value=""/>
                       <property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
                       <property key="umlsVendor" value="NLM-6515182895"/>
                       <property key="umlsUser" value=""/>
                       <property key="umlsPass" value=""/>
                       <property key="tuiTable" value="tui"/>
                    </properties>
                </conceptFactory>
            </conceptFactories>


            <dictionaryConceptPairs>
                <dictionaryConceptPair>
                    <name>LabAnnotatorPair</name>
                    <dictionaryName>LabAnnotatorTestDict</dictionaryName>

<conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
                </dictionaryConceptPair>
            </dictionaryConceptPairs>

            <rareWordConsumer>
                <name>Term Consumer</name>

<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
                <properties>
                    <property key="codingScheme" value="custom"/>
                </properties>
            </rareWordConsumer>
</lookupSpecification>

*=======*

*3. personName.xml*

<?xml version="1.0" encoding="UTF-8"?>
<lookupSpecification>
    <dictionaries>
                <dictionary>
                    <name>LabAnnotatorTestDict</name>

<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.dictionary.UmlsJdbcPersonDictionary</implementationName>
                    <properties>
                      <property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
                        <property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;autoReconnect=true"/>
                       <property key="jdbcUser" value="root"/>
                       <property key="jdbcPass" value=""/>
                      <property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
                       <property key="umlsVendor" value="NLM-6515182895"/>
                       <property key="umlsUser" value=""/>
                       <property key="umlsPass" value=""/>
                       <property key="rareWordTable" value="person_name"/>
                    </properties>
                </dictionary>
    </dictionaries>

            <conceptFactories>
                <conceptFactory>
                    <name>LabAnnotatorTestConcepts</name>

<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.concept.UmlsJdbcPersonNameConceptFactory</implementationName>
                    <properties>
                      <property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
                       <property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;autoReconnect=true"/>
                       <property key="jdbcUser" value="root"/>
                       <property key="jdbcPass" value=""/>
                      <property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
                      <property key="umlsVendor" value="NLM-6515182895"/>
                      <property key="umlsUser" value=""/>
                      <property key="umlsPass" value=""/>
                      <property key="tuiTable" value="tui"/>
                    </properties>
                </conceptFactory>
            </conceptFactories>

            <dictionaryConceptPairs>
                <dictionaryConceptPair>
                    <name>LabAnnotatorPair</name>
                    <dictionaryName>LabAnnotatorTestDict</dictionaryName>

<conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
                </dictionaryConceptPair>
            </dictionaryConceptPairs>

            <rareWordConsumer>
                <name>Term Consumer</name>

<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
                <properties>
                    <property key="codingScheme" value="custom"/>
                </properties>
            </rareWordConsumer>
</lookupSpecification>


 *RaxaDefaultJcasTermAnnotator* is similar to the
org.apache.ctakes.dictionary.lookup2.ae.*DefaultJCasTermAnnotator* , I have
only changed the value of   _minimumLookupSpan (to 1) variable
of AbstractJCasTermAnnotator.

On Thu, Feb 21, 2019 at 11:41 AM Finan, Sean <
Sean.Finan@childrens.harvard.edu> wrote:

> Hi Zakir,
>
> In order for me to help you, I need to know more about:
> Your primary dictionary:
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/tinyDictSpec.xml
>
> Your custom dictionary lookup #1:
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/drugConcept.xml
>
> Your custom dictionary lookup #2:
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/personName.xml
>
>
> As for your metrics,
> >For lookup span
> value of 3 (default), rest call was taking less than 2s for text like (
> Systolic blood pressure 180 ) is now taking around 5s.
>
> Does this mean that a document containing such text took 2 seconds, or
> that averaging over discovered annotations per took 2 seconds?
>
> I realize that moving from 3 characters to 1 means that every "a" "to"
> "in" "of" "an" "1" "2" ... is used for lookup.  However, that should not
> multiply the processing time *2.5
>
>
> I have to wonder if the non-ctakes
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae
> .RaxaDefaultJCasTermAnnotator
> is doing something suspect.
>
>
> Sean
>
>
> ________________________________________
> From: Zakir Saifi <zakir.saifi@raxa.com>
> Sent: Thursday, February 21, 2019 12:18 AM
> To: dev@ctakes.apache.org
> Subject: Making Ctakes Faster after Changing default lookup span value
> [EXTERNAL]
>
> Hi Everyone,
>
> I am using Ctakes for Structuring some clinical Text. In my clinical text,
> there are single characters word like *P 90 (Pulse 90) *etc. I want Ctakes
> to detect those. Since the default minimum span detected by Ctakes is 3.
> I was not able to detect these concepts. Therefore I have changed the Value
> of the _minimumLookupSpan to 1. Now I am able to detect the one character
> word using Ctakes after adding them to my Custom Dictionary.
>
> My Problem is that after changing the value of _minimumLookupSpan, ctakes
> has become slow.
> I am using Ctakes-web-Rest (Rest Service using Ctakes). For lookup span
> value of 3 (default), rest call was taking less than 2s for text like (
> Systolic blood pressure 180 ) is now taking around 5s.
>
> How can I make Ctakes faster?. Any configuration which helps to improve the
> performance without losing the current detection rate.
>
> Here is the content of my current Piper file.
>
> load DefaultFastPipeline
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae
> .RaxaDefaultJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/tinyDictSpec.xml
> add LabValueFinder
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/drugConcept.xml
> add org.apache.ctakes.drugner.ae.DrugMentionAnnotator
>
> STATUS_BOUNDARY_ANN_TYPE="org.apache.ctakes.typesystem.type.textsem.MedicationMention"
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/personName.xml
> add org.apache.ctakes.raxactakes.core.ae.PersonNameFinder
>
> addDescription EventAnnotator
> addLogged BackwardsTimeAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/timeannotator/model.jar
> addLogged DocTimeRelAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/doctimerel/model.jar
> addLogged EventTimeRelationAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/eventtime/model.jar
> addLogged EventEventRelationAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/eventevent/model.jar
> addLogged ContextualModalityAnnotator
>
> classifierJarPath=/org/apache/ctakes/temporal/ae/contextualmodality/model.jar
> addLogged EventAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/eventannotator/model.jar
>
> --
> Regards
> Zakir Saifi
> (Software Developer at Raxa)
>


-- 
Regards
Zakir Saifi
(Software Developer at Raxa)

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message