lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Croci Francesco Luigi (ID SWS)" <fcr...@id.ethz.ch>
Subject Unsupported ContentType: application/pdf Not in: [application/xml,​ text/csv,​ text/json,​ application/csv,​ application/javabin,​ text/xml,​ application/json]
Date Wed, 20 Aug 2014 13:34:09 GMT
Hallo,

I have solr 4.9.0 and I’m getting the above error if I try to index a pdf document with
the Solr Web-Interface.

Here is my schema and solrconfig. Do I miss something? :

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="simple" version="1.1">
                <types>
                               <fieldtype name="string" class="solr.StrField" postingsFormat="SimpleText"
/>
                               <fieldtype name="ignored" class="solr.TextField" />
                               <fieldtype name="text" class="solr.TextField" postingsFormat="SimpleText">
                                               <analyzer type="index">
                                                               <tokenizer class="solr.StandardTokenizerFactory"/>
                                                               <filter class="solr.LowerCaseFilterFactory"
/> <!--Lowercases the letters in each token. Leaves non-letter tokens alone.-->
                                                               <filter class="solr.TrimFilterFactory"/>
<!--Trims whitespace at either end of a token. -->
                                                               <filter class="solr.StopFilterFactory"
words="stopwords.txt" ignoreCase="true"/> <!--Discards common words.  -->
                                                               <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
                                               </analyzer>
                                               <analyzer type="query">
                                                               <tokenizer class="solr.StandardTokenizerFactory"/>
                                                               <filter class="solr.StopFilterFactory"
words="stopwords.txt" ignoreCase="true"/>
                                                               <filter class="solr.LowerCaseFilterFactory"
/>
                                                               <filter class="solr.TrimFilterFactory"/>
                                                               <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
                                               </analyzer>
                               </fieldtype>
                </types>

                <fields>
                               <field name="signatureField" type="string" indexed="true"
stored="true" multiValued="false" />
                               <dynamicField name="ignored_*" type="ignored" multiValued="true"
indexed="false" stored="false" />
                               <field name="id" type="string" indexed="true" stored="true"
multiValued="false" />
                               <field name="fullText" type="text" indexed="true" multiValued="true"
/>
                </fields>

                <defaultSearchField>fullText</defaultSearchField>

                <solrQueryParser defaultOperator="OR" />
                <uniqueKey>id</uniqueKey>
</schema>



<?xml version="1.0" encoding="UTF-8" ?>
<config>
                <luceneMatchVersion>LUCENE_45</luceneMatchVersion>
                <directoryFactory name='DirectoryFactory' class='solr.MMapDirectoryFactory'
/>

                <codecFactory name="CodecFactory" class="solr.SchemaCodecFactory" />

                <!-- <lib dir='${solr.core.instanceDir}/lib' /> -->
                <lib dir="${solr.core.instanceDir}/dist/" regex="solr-cell-\d.*\.jar" />
                <lib dir="${solr.core.instanceDir}/contrib/extraction/lib" regex=".*\.jar"
/>
                <!-- <lib dir="${solr.core.instanceDir}/dist/" regex="solr-langid-.*\.jar"
/>
                <lib dir="${solr.core.instanceDir}/contrib/langid/lib/" />-->

                <requestHandler name="standard" class="solr.StandardRequestHandler" default="true"
/>

                <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers"
/>

                <requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler"
/>

                <requestHandler name="/update" class="solr.UpdateRequestHandler">
                               <lst name="defaults">
                                               <str name="update.chain">deduplication</str>
                               </lst>
                </requestHandler>

                <requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler">
                               <lst name="defaults">
                                               <str name="captureAttr">true</str>
                                               <str name="lowernames">false</str>
                                               <str name="overwrite">false</str>
                                               <str name="literalsOverride">true</str>
                                               <str name="uprefix">ignored_</str>
                                               <str name="fmap.a">link</str>
                                               <str name="fmap.content">fullText</str>
                                               <!-- the configuration here could be useful
for tests -->
                                               <str name="update.chain">deduplication</str>
                               </lst>
                </requestHandler>

                <updateRequestProcessorChain name="deduplication">
                               <processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
                                               <bool name="overwriteDupes">false</bool>
                                               <str name="signatureField">signatureField</str>
                                               <bool name="enabled">true</bool>
                                               <str name="fields">content</str>
                                               <str name="minTokenLen">10</str>
                                               <str name="quantRate">.2</str>
                                               <str name="signatureClass">solr.update.processor.TextProfileSignature</str>
                               </processor>
                               <processor class="solr.LogUpdateProcessorFactory" />
                               <processor class="solr.RunUpdateProcessorFactory" />
                </updateRequestProcessorChain>

                <requestHandler name="/selectAdmin" class="solr.SearchHandler">
                               <lst name="defaults">

                   </lst>
                </requestHandler>

                <requestHandler name="/select" class="solr.SearchHandler">
                               <str name="echoParams">explicit</str>
                               <int name="rows">10</int>
                </requestHandler>

                <lockType>none</lockType>

                <admin>
                               <defaultQuery>*:*</defaultQuery>
                </admin>

</config>
Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message