lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Bauer, Herbert S. (Scott)" <Bauer.Sc...@mayo.edu>
Subject PerFieldAnalyzerWrapper does not seem to allow use of a custom analyzer
Date Fri, 07 Aug 2015 18:56:38 GMT
I can’t seem to detect any issues with the final custom analyzer declared in this code snippet
(The one that attempts to use a PatternMatchingTokenizer and is initialized as sa), but it
doesn’t seem to be hit when I run my indexing code despite being in the map.  It is indexed
finally but I assume it’s just falling to the StandardAnalyzer I’ve declared as default
in the PerfieldAnalyzerWrapper.  The other inner Anonymous declarations seem to work fine
and I can see them hit a breakpoint.   I don’t have any errors to standard out or to my
logs.   Is there something obviously wrong with the creatComponenets() method initialization?





    Map<String,Analyzer> analyzerPerField = new HashMap<>();




    analyzerPerField.put(LITERAL_PROPERTY_VALUE_FIELD, literalAnalyzer);

    analyzerPerField.put(LITERAL_AND_REVERSE_PROPERTY_VALUE_FIELD, literalAnalyzer);


        if (doubleMetaphoneEnabled_) {

            Analyzer temp = new Analyzer() {



                @Override

                protected TokenStreamComponents createComponents(String fieldName) {

                    final StandardTokenizer source = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);

                    source.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);

                    TokenStream filter = new StandardFilter(source);

                    filter = new LowerCaseFilter( filter);

                    filter = new StopFilter(filter, StandardAnalyzer.STOP_WORDS_SET);

                    filter = new DoubleMetaphoneFilter(filter, 4, true);

                    return new TokenStreamComponents(source, filter);

                }

            };

            analyzerPerField.put(DOUBLE_METAPHONE_PROPERTY_VALUE_FIELD, temp);

        }


        if (normEnabled_) {

            try {

                Analyzer temp = new StandardAnalyzer(CharArraySet.EMPTY_SET);

                analyzerPerField.put(NORM_PROPERTY_VALUE_FIELD, temp);

            } catch (NoClassDefFoundError e) {

               //

            }

        }


        if (stemmingEnabled_) {

        Analyzer temp = new Analyzer() {



                @Override

                protected TokenStreamComponents createComponents(String fieldName) {

                    final StandardTokenizer source = new StandardTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);

                    source.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);

                    TokenStream filter = new StandardFilter(source);

                    filter = new LowerCaseFilter( filter);

                    filter = new StopFilter(filter, StandardAnalyzer.STOP_WORDS_SET);

                    filter = new SnowballFilter(filter, "English");

                    return new TokenStreamComponents(source, filter);

                }

            };

            analyzerPerField.put(STEMMING_PROPERTY_VALUE_FIELD, temp);

        }





        Analyzer sa = new Analyzer(){


@Override

protected TokenStreamComponents createComponents(String fieldName) {

Pattern pattern = Pattern.compile(STRING_TOKEINZER_TOKEN);

final PatternTokenizer source = new PatternTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY,
pattern, -1);

TokenStream filter = new StandardFilter(source);

System.out.println("In pattern matching analyzer");

return new TokenStreamComponents(source, filter);

}



        };

        analyzerPerField.put("sources", sa);

        analyzerPerField.put("usageContexts", sa);

        analyzerPerField.put("qualifiers", sa);



    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET),
analyzerPerField);

        return analyzer;

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message