lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Naaser <naa...@gmail.com>
Subject Highlighter issue for Exact Phrase search
Date Fri, 31 May 2019 18:51:40 GMT
Hello, 

I am searching exact phrase say "Jane Doe", there are two instances of this
in the text. My highlighter is only outputting the first instance and not
the second one. Can someone please help me understand the issue and how to
fix it, any help would be highly appreciated. Part of my code is below:

//analyzer with the default stop words
        Analyzer analyzer = new StandardAnalyzer();
         
        //Query parser to be used for creating TermQuery
        QueryParser qp = new QueryParser("contents", analyzer);
        qp.setDefaultOperator(QueryParser.Operator.AND);
		qp.setPhraseSlop(0); 
		
		Query query = qp.parse("\"Jane Doe\"");
                 
        //Search the lucene documents
        TopDocs hits = searcher.search(query, 10);
         
        /** Highlighter Code Start ****/
         
        //Uses HTML &lt;B&gt;&lt;/B&gt; tag to highlight the searched terms
        Formatter formatter = new SimpleHTMLFormatter();
         
        //It scores text fragments by the number of unique query terms found
        //Basically the matching score in layman terms
        QueryScorer scorer = new QueryScorer(query,"contents");
         
        //used to markup highlighted terms found in the best sections of a
text
        Highlighter highlighter = new Highlighter(formatter, scorer);
         
        //It breaks text up into same-size texts but does not split up spans
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 2000);
         
        //set fragmenter to highlighter
        highlighter.setTextFragmenter(fragmenter);
        highlighter.setFragmentScorer(scorer);
         
        //Iterate over found results
        for (int i = 0; i < hits.scoreDocs.length; i++)
        {
            int docid = hits.scoreDocs[i].doc;
            Document doc = searcher.doc(docid);
             
            //Get stored text from found document
            String text = doc.get("contents");
            
            Fields vectors = reader.getTermVectors(docid);
 
            //Create token stream
            TokenStream stream = TokenSources.getAnyTokenStream(reader,
docid, "contents", analyzer);
             
            TextFragment[] frag = highlighter.getBestTextFragments(stream,
text, false, 2000);//highlighter.getBestFragments(tokenStream, text, 3,
"...");
			 for (int j = 0; j < frag.length; j++) {
			   if ((frag[j] != null) && (frag[j].getScore() > 0)) {
			   		System.out.println("=======================");
					System.out.println((frag[j].toString()));			   
			   }
			 } 
        }




--
Sent from: http://lucene.472066.n3.nabble.com/Lucene-Java-Users-f532864.html

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message