lucene-general mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Chris Hostetter <hossman_luc...@fucit.org>
Subject Re: Escaping special character doesn't return result
Date Fri, 03 Apr 2020 17:50:16 GMT

1) this sort of question would be best sent to java-user@lucene
2) please don't try to "bold" or otherwise emphasis parts of your email 
using "*" characters, especially when your question is about using "*" 
characters for wildcard searching -- makes it kind of confusing to 
understand what you're asking.

: Example I have indexed the word *temp/hello*

I'm going to assume you mean the string literal "Temp/Hello" (per what i 
see in the code you posted) and those "*" characters were just an attempt 
to "bold" your input.

: Now I want to search the word with wildcard query *te*/hello*

Since i've already been forced to assume that you're occasionally using 
"*" characters to bold things, i'm going to assume that the actual input 
you are giving to the query parser is "te*/hello"

: I get the error* : *Exception in thread "main"
: *org.apache.lucene.queryparser.classic.ParseException*: Cannot parse
: 'te*/hello': Lexical error at line 1, column 10.  Encountered: <EOF> after
: : "/hello"

this has nothing to do with the "*" character in your query string -- it 
has everything to do with the "/" character in your query string, whih 
indicates to the query parser that you wish to do a regex search...  

https://lucene.apache.org/core/8_5_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Regexp_Searches

...It's a "start delimiter" character, but you never have the 
corrisponding "end delimiter" character telling it when the regex ends, 
which is why you get a parse error

: When I escape the query with QueryParser.escape method it doesn’t yield any
: results when special characters are part of query

that's becuase QueryParser.escape will escape ALL of the meta-characters 
hat are significant to the query parser, including both "*" and "/" in 
your input, treating them as literals, which means your "*" won't be used 
to indicate a wildcard search, it will be treated like any other character 
and then either kept or removed by your analyzer -- kept it looks like in 
this particular case -- and your query will fail to match your original 
document because the indexed (lowercased) value does not contain the 
literal sequence of characters "te*/hello"

In short: if you want to use the query parser and specify meta-characters 
for things like wildcard queries, you have to be responsible for escaping 
any meta-characters (like "/") that you want treated as literals for that 
particular query.

you can use QueryParser.escape() to help -- but you can't escape hte whole 
query, just the parts of the query you want treated as string literals...

String q = QueryParser.escape("te") + "*" + QueryParser.escape("/hello");

 
: 
: 
: *Can someone suggest the right way for indexing and searching words with
: special characters?*
: 
: 
: 
: Here’s my simple program
: 
: 
: 
: *import* java.io.BufferedReader;
: 
: *import* java.io.IOException;
: 
: *import* java.io.InputStreamReader;
: 
: *import* java.nio.file.Paths;
: 
: 
: 
: *import* org.apache.lucene.analysis.Analyzer;
: 
: *import* org.apache.lucene.analysis.custom.CustomAnalyzer;
: 
: *import** org.apache.lucene.analysis.standard.StandardAnalyzer;*
: 
: *import* org.apache.lucene.document.Document;
: 
: *import* org.apache.lucene.document.Field;
: 
: *import* org.apache.lucene.document.TextField;
: 
: *import* org.apache.lucene.index.DirectoryReader;
: 
: *import* org.apache.lucene.index.IndexReader;
: 
: *import* org.apache.lucene.index.IndexWriter;
: 
: *import* org.apache.lucene.index.IndexWriterConfig;
: 
: *import* org.apache.lucene.index.IndexWriterConfig.OpenMode;
: 
: *import* org.apache.lucene.queryparser.classic.ParseException;
: 
: *import* org.apache.lucene.queryparser.classic.QueryParser;
: 
: *import* org.apache.lucene.search.IndexSearcher;
: 
: *import* org.apache.lucene.search.Query;
: 
: *import* org.apache.lucene.search.ScoreDoc;
: 
: *import* org.apache.lucene.search.TopDocs;
: 
: *import* org.apache.lucene.store.Directory;
: 
: *import* org.apache.lucene.store.MMapDirectory;
: 
: 
: 
: *public* *class* HelloLucene {
: 
:     *private* *static* Analyzer buildAnalyzer() *throws* IOException {
: 
:         *return* CustomAnalyzer.*builder*()
: 
:                 .withTokenizer("keyWord")
: 
:                 .addTokenFilter("lowercase")
: 
:                 .build();
: 
: 
: 
:     }
: 
: 
: 
: 
: 
:     *public* *static* *void* main(String[] args) *throws* IOException,
: ParseException {
: 
:         Analyzer analyzer = *buildAnalyzer*();
: 
:         // 1. create the index
: 
:         Directory index = *new* MMapDirectory(Paths.*get*("c:\\temp\\index"
: ));
: 
: 
: 
:         IndexWriterConfig config = *new* IndexWriterConfig(analyzer);
: 
: 
: 
:         String indexType = "create";
: 
:         *if* ("create".equals(indexType)) {
: 
:             config.setOpenMode(OpenMode.*CREATE*);
: 
:         } *else* {
: 
:             config.setOpenMode(OpenMode.*CREATE_OR_APPEND*);
: 
:         }
: 
:         IndexWriter w = *new* IndexWriter(index, config);
: 
: 
: 
: 
: 
:         *long* start = System.*currentTimeMillis*();
: 
:         *addDoc*(w, "Temp/Hello", "Artifact");
: 
: 
: 
:         *long* end = System.*currentTimeMillis*();
: 
:         w.close();
: 
: 
: 
:         *for* (*int* i = 0; i < 100; i++) {
: 
: 
: 
:             // 2. query
: 
:             BufferedReader input = *new* BufferedReader(*new*
: InputStreamReader(System.*in*));
: 
:             String query = input.readLine();
: 
: 
: 
:             //Prefix Search
: 
: 
: 
:             QueryParser queryParser = *new* QueryParser("Name",analyzer);
: 
:             queryParser.setAllowLeadingWildcard(*true*);
: 
:             Query q = queryParser.parse(QueryParser.*escape*(query));
: 
: 
: 
: 
: 
:             // 3. search
: 
: 
: 
:             *int* hitsPerPage = 10;
: 
:             IndexReader reader = DirectoryReader.*open*(index);
: 
:             IndexSearcher searcher = *new* IndexSearcher(reader);
: 
:             TopDocs docs = searcher.search(q, hitsPerPage);
: 
:             ScoreDoc[] hits = docs.scoreDocs;
: 
: 
: 
:             // 4. display results
: 
:             *System.**out*.println("Found " + hits.length + " hits.");
: 
:             *for* (*int* j = 0; j < hits.length; ++j) {
: 
:                 *int* docId = hits[j].doc;
: 
:                 Document d = searcher.doc(docId);
: 
:                 *System.**out*.println((j + 1) + ". " + d.get("Name") + "\t"
: + d.get("Type"));
: 
:             }
: 
: 
: 
:             reader.close();
: 
:         }
: 
: 
: 
:     }
: 
: 
: 
:     *private* *static* *void* addDoc(IndexWriter w, String name, String type)
: *throws* IOException {
: 
:         Document doc = *new* Document();
: 
:         doc.add(*new* TextField("Name", name, Field.Store.*YES*));
: 
: 
: 
:         // use a string field for *isbn* because we don't want it
: *tokenized*
: 
:         doc.add(*new* TextField("Type", type, Field.Store.*YES*));
: 
:         w.addDocument(doc);
: 
:     }
: 
: }
: 

-Hoss
http://www.lucidworks.com/
Mime
  • Unnamed multipart/mixed (inline, None, 0 bytes)
View raw message