lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Brian Whitman <brian.whit...@variogr.am>
Subject Re: UTF-8 problem with Resin
Date Mon, 07 May 2007 15:36:28 GMT
On May 7, 2007, at 11:05 AM, Brian Whitman wrote:
> Using resin 3.0.23 with a trunk solr war I am having a problem  
> adding documents with utf-8 characters, including the utf8-example  
> in exampledocs.
>
> The document simply doesn't get added to Solr. Flat ascii documents  
> work fine as does all non-update stuff.
>
> To reproduce:
> install resin 3 and set up solr according to the wiki for resin.
> ./post.sh utf8-example.xml
>
> I also have a real world document that doesn't work (from our nutch  
> crawls):
> wget http://variogr.am/badfile.txt
> ./post.sh badfile.txt


A solr rock star advised me to try SOLR-214, which fixes the problem.  
Perhaps he'll illuminate us as to the reasons! But for now be careful  
with Resin.


-Brian







> I get this in my resin logs.
>
> [10:53:10.834] java.io.CharConversionException: illegal utf8  
> encoding at 0xc3, a
> [10:53:10.834]  at com.caucho.vfs.i18n.UTF8Reader.read 
> (UTF8Reader.java:97)
> [10:53:10.834]  at com.caucho.vfs.i18n.UTF8Reader.read 
> (UTF8Reader.java:178)
> [10:53:10.834]  at com.caucho.vfs.ReadStream.read(ReadStream.java:499)
> [10:53:10.834]  at com.caucho.vfs.BufferedReaderAdapter.read 
> (BufferedReaderAdapter.java:64)
> [10:53:10.834]  at org.xmlpull.mxp1.MXParser.fillBuf(MXParser.java: 
> 2972)
> [10:53:10.834]  at org.xmlpull.mxp1.MXParser.more(MXParser.java:3026)
> [10:53:10.834]  at org.xmlpull.mxp1.MXParser.parseProlog 
> (MXParser.java:1410)
> [10:53:10.834]  at org.xmlpull.mxp1.MXParser.nextImpl(MXParser.java: 
> 1395)
> [10:53:10.834]  at org.xmlpull.mxp1.MXParser.next(MXParser.java:1093)
> [10:53:10.834]  at org.xmlpull.mxp1.MXParser.nextTag(MXParser.java: 
> 1078)
> [10:53:10.834]  at  
> org.apache.solr.handler.XmlUpdateRequestHandler.update 
> (XmlUpdateRequestHandler.java:111)
> [10:53:10.834]  at  
> org.apache.solr.handler.XmlUpdateRequestHandler.handleRequestBody 
> (XmlUpdateRequestHandler.java:84)
> [10:53:10.834]  at  
> org.apache.solr.handler.RequestHandlerBase.handleRequest 
> (RequestHandlerBase.java:77)
> [10:53:10.834]  at org.apache.solr.core.SolrCore.execute 
> (SolrCore.java:671)
> [10:53:10.834]  at  
> org.apache.solr.servlet.SolrDispatchFilter.execute 
> (SolrDispatchFilter.java:188)
> [10:53:10.834]  at  
> org.apache.solr.servlet.SolrDispatchFilter.doFilter 
> (SolrDispatchFilter.java:156)
> [10:53:10.834]  at  
> com.caucho.server.dispatch.FilterFilterChain.doFilter 
> (FilterFilterChain.java:70)
> [10:53:10.834]  at  
> com.caucho.server.webapp.WebAppFilterChain.doFilter 
> (WebAppFilterChain.java:173)
> [10:53:10.834]  at  
> com.caucho.server.dispatch.ServletInvocation.service 
> (ServletInvocation.java:229)
> [10:53:10.834]  at com.caucho.server.http.HttpRequest.handleRequest 
> (HttpRequest.java:274)
> [10:53:10.834]  at com.caucho.server.port.TcpConnection.run 
> (TcpConnection.java:511)
> [10:53:10.834]  at com.caucho.util.ThreadPool.runTasks 
> (ThreadPool.java:520)
> [10:53:10.834]  at com.caucho.util.ThreadPool.run(ThreadPool.java:442)
> [10:53:10.834]  at java.lang.Thread.run(Thread.java:619)
> [10:53:10.834]
> [10:53:10.835] /update  0 2
> [10:53:10.836] java.io.CharConversionException: illegal utf8  
> encoding at 0xc3, a
> [10:53:10.836]  at com.caucho.vfs.i18n.UTF8Reader.read 
> (UTF8Reader.java:97)
> [10:53:10.836]  at com.caucho.vfs.i18n.UTF8Reader.read 
> (UTF8Reader.java:178)
> [10:53:10.836]  at com.caucho.vfs.ReadStream.read(ReadStream.java:499)
> [10:53:10.836]  at com.caucho.vfs.BufferedReaderAdapter.read 
> (BufferedReaderAdapter.java:64)
> [10:53:10.836]  at org.xmlpull.mxp1.MXParser.fillBuf(MXParser.java: 
> 2972)
> [10:53:10.836]  at org.xmlpull.mxp1.MXParser.more(MXParser.java:3026)
> [10:53:10.836]  at org.xmlpull.mxp1.MXParser.parseProlog 
> (MXParser.java:1410)
> [10:53:10.836]  at org.xmlpull.mxp1.MXParser.nextImpl(MXParser.java: 
> 1395)
> [10:53:10.836]  at org.xmlpull.mxp1.MXParser.next(MXParser.java:1093)
> [10:53:10.836]  at org.xmlpull.mxp1.MXParser.nextTag(MXParser.java: 
> 1078)
> [10:53:10.836]  at  
> org.apache.solr.handler.XmlUpdateRequestHandler.update 
> (XmlUpdateRequestHandler.java:111)
> [10:53:10.836]  at  
> org.apache.solr.handler.XmlUpdateRequestHandler.handleRequestBody 
> (XmlUpdateRequestHandler.java:84)
> [10:53:10.836]  at  
> org.apache.solr.handler.RequestHandlerBase.handleRequest 
> (RequestHandlerBase.java:77)
> [10:53:10.836]  at org.apache.solr.core.SolrCore.execute 
> (SolrCore.java:671)
> [10:53:10.836]  at  
> org.apache.solr.servlet.SolrDispatchFilter.execute 
> (SolrDispatchFilter.java:188)
> [10:53:10.836]  at  
> org.apache.solr.servlet.SolrDispatchFilter.doFilter 
> (SolrDispatchFilter.java:156)
> [10:53:10.836]  at  
> com.caucho.server.dispatch.FilterFilterChain.doFilter 
> (FilterFilterChain.java:70)
> [10:53:10.836]  at  
> com.caucho.server.webapp.WebAppFilterChain.doFilter 
> (WebAppFilterChain.java:173)
> [10:53:10.836]  at  
> com.caucho.server.dispatch.ServletInvocation.service 
> (ServletInvocation.java:229)
> [10:53:10.836]  at com.caucho.server.http.HttpRequest.handleRequest 
> (HttpRequest.java:274)
> [10:53:10.836]  at com.caucho.server.port.TcpConnection.run 
> (TcpConnection.java:511)
> [10:53:10.836]  at com.caucho.util.ThreadPool.runTasks 
> (ThreadPool.java:520)
> [10:53:10.836]  at com.caucho.util.ThreadPool.run(ThreadPool.java:442)
> [10:53:10.836]  at java.lang.Thread.run(Thread.java:619)
> [10:53:10.836]
> [10:53:10.837] [2] HTTP/1.1 500 illegal utf8 encoding at 0xc3, a
> [10:53:10.837]
> [10:53:10.837] java.io.CharConversionException: illegal utf8  
> encoding at 0xc3, a
> [10:53:10.837]  at com.caucho.vfs.i18n.UTF8Reader.read 
> (UTF8Reader.java:97)
> [10:53:10.837]  at com.caucho.vfs.i18n.UTF8Reader.read 
> (UTF8Reader.java:178)
> [10:53:10.837]  at com.caucho.vfs.ReadStream.read(ReadStream.java:499)
> [10:53:10.837]  at com.caucho.vfs.BufferedReaderAdapter.read 
> (BufferedReaderAdapter.java:64)
> [10:53:10.837]  at org.xmlpull.mxp1.MXParser.fillBuf(MXParser.java: 
> 2972)
> [10:53:10.837]  at org.xmlpull.mxp1.MXParser.more(MXParser.java:3026)
> [10:53:10.837]  at org.xmlpull.mxp1.MXParser.parseProlog 
> (MXParser.java:1410)
> [10:53:10.837]  at org.xmlpull.mxp1.MXParser.nextImpl(MXParser.java: 
> 1395)
> [10:53:10.837]  at org.xmlpull.mxp1.MXParser.next(MXParser.java:1093)
> [10:53:10.837]  at org.xmlpull.mxp1.MXParser.nextTag(MXParser.java: 
> 1078)
> [10:53:10.837]  at  
> org.apache.solr.handler.XmlUpdateRequestHandler.update 
> (XmlUpdateRequestHandler.java:111)
> [10:53:10.837]  at  
> org.apache.solr.handler.XmlUpdateRequestHandler.handleRequestBody 
> (XmlUpdateRequestHandler.java:84)
> [10:53:10.837]  at  
> org.apache.solr.handler.RequestHandlerBase.handleRequest 
> (RequestHandlerBase.java:77)
> [10:53:10.837]  at org.apache.solr.core.SolrCore.execute 
> (SolrCore.java:671)
> [10:53:10.837]  at  
> org.apache.solr.servlet.SolrDispatchFilter.execute 
> (SolrDispatchFilter.java:188)
> [10:53:10.837]  at  
> org.apache.solr.servlet.SolrDispatchFilter.doFilter 
> (SolrDispatchFilter.java:156)
> [10:53:10.837]  at  
> com.caucho.server.dispatch.FilterFilterChain.doFilter 
> (FilterFilterChain.java:70)
> [10:53:10.837]  at  
> com.caucho.server.webapp.WebAppFilterChain.doFilter 
> (WebAppFilterChain.java:173)
> [10:53:10.837]  at  
> com.caucho.server.dispatch.ServletInvocation.service 
> (ServletInvocation.java:229)
> [10:53:10.837]  at com.caucho.server.http.HttpRequest.handleRequest 
> (HttpRequest.java:274)
> [10:53:10.837]  at com.caucho.server.port.TcpConnection.run 
> (TcpConnection.java:511)
> [10:53:10.837]  at com.caucho.util.ThreadPool.runTasks 
> (ThreadPool.java:520)
> [10:53:10.837]  at com.caucho.util.ThreadPool.run(ThreadPool.java:442)
> [10:53:10.837]  at java.lang.Thread.run(Thread.java:619)
> [10:53:10.837]
>
>
>
> --
> http://variogr.am/
> brian.whitman@variogr.am
>
>
>


Mime
View raw message