lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bennis <bennis.s...@hotmail.fr>
Subject solr index data from hdfs with error
Date Fri, 20 Dec 2019 15:17:32 GMT
Hello
I am new in using Solr and I need your help.
I have data on HDFS that I  need to index with Solr.

I) My data looks like that, it is saved on hdfs  :
ID_METIER_PCS_ESE,CD_PCS_ESE_1,LB_PCS_ESE_1,CD_PCS_ESE_2,LB_PCS_ESE_2,CD_PCS_ESE_3,LB_PCS_ESE_3,DT_DEB,DT_FIN,TS_TEC_INSERT,TS_TEC_UPDATE
37,3,Cadres et professions intellectuelles supérieures,35,Professions de
l'information, des arts et des spectacles,353a,Directeurs de journaux,
administrateurs de presse, directeurs d'éditions (littéraire, musicale,
audiovisuelle et multimédia),01/01/70,31/12/99,08/01/19 18:13:42,274272000,

it is located here :
${GEOBI_NAMENODE}/user/bdatadev2/work/tmp/tmp_TD_METIER_PCS_ESE

II) I made solr-morphline.conf :

*
SOLR_LOCATOR : {
  # Name of solr collection
  collection : oracle_table_test_DEV2 

  # ZooKeeper ensemble
  zkHost : "eufrtopbdt003.randstaddta.gis:2182/solr"
}

morphlines : [
  {
    id : morphline1
    importCommands : ["org.kitesdk.**"]

    commands : [
      {
        readCSV {
          separator : ","
          # This columns should map the one configured in SolR and are
expected in this position inside CSV
          columns :
[ID_METIER_PCS_ESE,CD_PCS_ESE_1,LB_PCS_ESE_1,CD_PCS_ESE_2,LB_PCS_ESE_2,CD_PCS_ESE_3,LB_PCS_ESE_3,DT_DEB,DT_FIN,TS_TEC_INSERT,TS_TEC_UPDATE]
          ignoreFirstLine : true
          commentPrefix : ""
          trim : true
          charset : UTF-8
        }
      }

      {
        sanitizeUnknownSolrFields {
          # Location from which to fetch Solr schema
          solrLocator : ${SOLR_LOCATOR}
        }
      }

      # log the record at DEBUG level to SLF4J
      { logDebug { format : "output record: {}", args : ["@{}"] } }

      # load the record into a Solr server or MapReduce Reducer
      {
        loadSolr {
          solrLocator : ${SOLR_LOCATOR}
        }
      }

    ]
  }
]

*


III) and finally my schema.xml is the following, I modified only the part to
define FIELDS :
*
<?xml version="1.0" encoding="UTF-8" ?>

<schema name="example" version="1.5">
 <fields>
 
    <field name="ID_METIER_PCS_ESE" type="string" indexed="true"
stored="true" required="true"  docValues="false"/>
   <field name="CD_PCS_ESE_1"      type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="LB_PCS_ESE_1"      type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="CD_PCS_ESE_2"      type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="LB_PCS_ESE_2"      type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="CD_PCS_ESE_3"      type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="LB_PCS_ESE_3"      type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="DT_DEB"            type="string"   indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="DT_FIN"            type="string"   indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="TS_TEC_INSERT"     type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="TS_TEC_UPDATE"     type="string" indexed="true"
stored="true" required="false" docValues="false"/>
   <field name="_expire_at_" type="date" indexed="true" stored="true"
required="false"/>
   
   

   

   

   
   
   
   <field name="_version_" type="long" indexed="true" stored="true"/>

   

   
   
   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
   <dynamicField name="*_is" type="int"    indexed="true"  stored="true" 
multiValued="true"/>
   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" />
   <dynamicField name="*_ss" type="string"  indexed="true"  stored="true"
multiValued="true"/>
   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
   <dynamicField name="*_ls" type="long"   indexed="true"  stored="true" 
multiValued="true"/>
   <dynamicField name="*_t"  type="text_general"    indexed="true" 
stored="true"/>
   <dynamicField name="*_txt" type="text_general"   indexed="true" 
stored="true" multiValued="true"/>
   <dynamicField name="*_en"  type="text_en"    indexed="true" 
stored="true" multiValued="true"/>
   <dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/>
   <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" 
multiValued="true"/>
   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
   <dynamicField name="*_fs" type="float"  indexed="true"  stored="true" 
multiValued="true"/>
   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
   <dynamicField name="*_ds" type="double" indexed="true"  stored="true" 
multiValued="true"/>

   
   <dynamicField name="*_coordinate"  type="tdouble" indexed="true" 
stored="false" />

   <dynamicField name="*_dt"  type="date"    indexed="true"  stored="true"/>
   <dynamicField name="*_dts" type="date"    indexed="true"  stored="true"
multiValued="true"/>
   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>

   
   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>

   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
   <dynamicField name="*_c"   type="currency" indexed="true" 
stored="true"/>

   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
   <dynamicField name="attr_*" type="text_general" indexed="true"
stored="true" multiValued="true"/>

   <dynamicField name="random_*" type="random" />

    
   
   
 </fields>


 
 <uniqueKey>ID_METIER_PCS_ESE</uniqueKey> 

 
 
 <types>
  
      -->
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />

    
    <fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true"/>

    <fieldType name="int" class="solr.TrieIntField" precisionStep="0"
positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
positionIncrementGap="0"/>

    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
positionIncrementGap="0"/>

    <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
positionIncrementGap="0"/>

    
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
positionIncrementGap="0"/>


    
    <fieldtype name="binary" class="solr.BinaryField"/>

    <fieldType name="pint" class="solr.IntField"/>
    <fieldType name="plong" class="solr.LongField"/>
    <fieldType name="pfloat" class="solr.FloatField"/>
    <fieldType name="pdouble" class="solr.DoubleField"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>

   <fieldType name="random" class="solr.RandomSortField" indexed="true" />

   
    <fieldType name="text_ws" class="solr.TextField"
positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_general" class="solr.TextField"
positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
        
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
<fieldType class="solr.TextField" name="text_auto">
 <analyzer>
  <tokenizer class="solr.KeywordTokenizerFactory"/>
  <filter class="solr.LowerCaseFilterFactory"/>
 </analyzer>
</fieldType>

    <fieldType name="text_en" class="solr.TextField"
positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>

        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
	<filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
	
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
	<filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
	
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="text_en_splitting" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        
        
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
        <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
        <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

    
    <fieldType name="text_en_splitting_tight" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt"/>
        <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
        
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>

    
    <fieldType name="text_general_rev" class="solr.TextField"
positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ReversedWildcardFilterFactory"
withOriginal="true"
           maxPosAsterisk="3" maxPosQuestion="2"
maxFractionAsterisk="0.33"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" />
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>


    <fieldType name="alphaOnlySort" class="solr.TextField"
sortMissingLast="true" omitNorms="true">
      <analyzer>
        
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        
        <filter class="solr.LowerCaseFilterFactory" />
        
        <filter class="solr.TrimFilterFactory" />

        <filter class="solr.PatternReplaceFilterFactory"
                pattern="([^a-z])" replacement="" replace="all"
        />
      </analyzer>
    </fieldType>
    
    <fieldtype name="phonetic" stored="false" indexed="true"
class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
      </analyzer>
    </fieldtype>

    <fieldtype name="payloads" stored="false" indexed="true"
class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

        <filter class="solr.DelimitedPayloadTokenFilterFactory"
encoder="float"/>
      </analyzer>
    </fieldtype>

    
    <fieldType name="lowercase" class="solr.TextField"
positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory" />
      </analyzer>
    </fieldType>

    
    <fieldType name="descendent_path" class="solr.TextField">
      <analyzer type="index">
	<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
      </analyzer>
      <analyzer type="query">
	<tokenizer class="solr.KeywordTokenizerFactory" />
      </analyzer>
    </fieldType>
    
    <fieldType name="ancestor_path" class="solr.TextField">
      <analyzer type="index">
	<tokenizer class="solr.KeywordTokenizerFactory" />
      </analyzer>
      <analyzer type="query">
	<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
      </analyzer>
    </fieldType>

     
    <fieldtype name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />


    <fieldType name="point" class="solr.PointType" dimension="2"
subFieldSuffix="_d"/>

    
    <fieldType name="location" class="solr.LatLonType"
subFieldSuffix="_coordinate"/>

    
    <fieldType name="location_rpt"
class="solr.SpatialRecursivePrefixTreeFieldType"
        geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees"
/>

    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8"
defaultCurrency="USD" currencyConfig="currency.xml" />
             


   

    
    <fieldType name="text_ar" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ar.txt" />
        
        <filter class="solr.ArabicNormalizationFilterFactory"/>
        <filter class="solr.ArabicStemFilterFactory"/>
      </analyzer>
    </fieldType>

    
    <fieldType name="text_bg" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/> 
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_bg.txt" /> 
        <filter class="solr.BulgarianStemFilterFactory"/>       
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_ca" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.ElisionFilterFactory" ignoreCase="true"
articles="lang/contractions_ca.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ca.txt" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Catalan"/>       
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_cjk" class="solr.TextField"
positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.CJKWidthFilterFactory"/>
        
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.CJKBigramFilterFactory"/>
      </analyzer>
    </fieldType>

    
    <fieldType name="text_cz" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_cz.txt" />
        <filter class="solr.CzechStemFilterFactory"/>       
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_da" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_da.txt" format="snowball" />
        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_de" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_de.txt" format="snowball" />
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.GermanLightStemFilterFactory"/>
        
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_el" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="false"
words="lang/stopwords_el.txt" />
        <filter class="solr.GreekStemFilterFactory"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_es" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_es.txt" format="snowball" />
        <filter class="solr.SpanishLightStemFilterFactory"/>
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_eu" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_eu.txt" />
        <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_fa" class="solr.TextField"
positionIncrementGap="100">
      <analyzer>
        
        <charFilter class="solr.PersianCharFilterFactory"/>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ArabicNormalizationFilterFactory"/>
        <filter class="solr.PersianNormalizationFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_fa.txt" />
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_fi" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_fi.txt" format="snowball" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Finnish"/>
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_fr" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.ElisionFilterFactory" ignoreCase="true"
articles="lang/contractions_fr.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_fr.txt" format="snowball" />
        <filter class="solr.FrenchLightStemFilterFactory"/>
        
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_ga" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.ElisionFilterFactory" ignoreCase="true"
articles="lang/contractions_ga.txt"/>
        
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/hyphenations_ga.txt"/>
        <filter class="solr.IrishLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ga.txt"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_gl" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_gl.txt" />
        <filter class="solr.GalicianStemFilterFactory"/>
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_hi" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        
        <filter class="solr.IndicNormalizationFilterFactory"/>
        
        <filter class="solr.HindiNormalizationFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_hi.txt" />
        <filter class="solr.HindiStemFilterFactory"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_hu" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_hu.txt" format="snowball" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Hungarian"/>
           
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_hy" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_hy.txt" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Armenian"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_id" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_id.txt" />
        
        <filter class="solr.IndonesianStemFilterFactory"
stemDerivational="true"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_it" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.ElisionFilterFactory" ignoreCase="true"
articles="lang/contractions_it.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_it.txt" format="snowball" />
        <filter class="solr.ItalianLightStemFilterFactory"/>
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_ja" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="false">
      <analyzer>
      
        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
        
        
        <filter class="solr.JapaneseBaseFormFilterFactory"/>
        
        <filter class="solr.JapanesePartOfSpeechStopFilterFactory"
tags="lang/stoptags_ja.txt" />
        
        <filter class="solr.CJKWidthFilterFactory"/>
        
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ja.txt" />
        
        <filter class="solr.JapaneseKatakanaStemFilterFactory"
minimumLength="4"/>
        
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_lv" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_lv.txt" />
        <filter class="solr.LatvianStemFilterFactory"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_nl" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_nl.txt" format="snowball" />
        <filter class="solr.StemmerOverrideFilterFactory"
dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_no" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_no.txt" format="snowball" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Norwegian"/>
        
        
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_pt" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_pt.txt" format="snowball" />
        <filter class="solr.PortugueseLightStemFilterFactory"/>
        
        
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_ro" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ro.txt" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Romanian"/>
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_ru" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_ru.txt" format="snowball" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Russian"/>
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_sv" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_sv.txt" format="snowball" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Swedish"/>
        
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_th" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ThaiWordFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_th.txt" />
      </analyzer>
    </fieldType>
    
    
    <fieldType name="text_tr" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.TurkishLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="false"
words="lang/stopwords_tr.txt" />
        <filter class="solr.SnowballPorterFilterFactory"
language="Turkish"/>
      </analyzer>
    </fieldType>

 </types>
  
  
  

</schema>
*


WHEN I EXECUTE MY PROGRAM WITH :
hadoop jar /opt/cloudera/parcels/CDH/jars/search-mr-*-job.jar
org.apache.solr.hadoop.MapReduceIndexerTool \
--output-dir  ${GEOBI_NAMENODE}/solr/oracle_table_test_DEV2/  \
--zk-host ${GEOBI_ZK_HOST}:${GEOBI_ZK_PORT}/solr \
--collection oracle_table_test_DEV2   \
--morphline-file
${GEOBI_HOME}/international/PTG/scripts/oracle_table_test_DEV2_CONF/conf/solr-morphline.conf
\
--solr-home-dir
${GEOBI_HOME}/international/PTG/scripts/oracle_table_test_DEV2_CONF/  \
--go-live \
${GEOBI_NAMENODE}/user/bdatadev2/work/tmp/tmp_TD_METIER_PCS_ESE



*I HAVE THE FOLLOWING ERROR IN REDUCER, *
Error: java.io.IOException: Batch Write Failure at
org.apache.solr.hadoop.BatchWriter.throwIf(BatchWriter.java:239) at
org.apache.solr.hadoop.BatchWriter.queueBatch(BatchWriter.java:181) at
org.apache.solr.hadoop.SolrRecordWriter.close(SolrRecordWriter.java:275) at
org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.close(ReduceTask.java:550)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:629) at
org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) at
org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Caused by:
org.apache.solr.common.SolrException: ERROR: [doc=108] unknown field 'id' at
org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
at
org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
at
org.apache.solr.update.DirectUpdateHandler2.updateDocument(DirectUpdateHandler2.java:814)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:235)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
at
org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:946)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1101)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:702)
at
org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.FieldMutatingUpdateProcessor.processAdd(FieldMutatingUpdateProcessor.java:117)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.DocExpirationUpdateProcessorFactory$TTLUpdateProcessor.processAdd(DocExpirationUpdateProcessorFactory.java:347)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.AbstractDefaultValueUpdateProcessorFactory$DefaultValueUpdateProcessor.processAdd(AbstractDefaultValueUpdateProcessorFactory.java:94)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
at
org.apache.solr.update.processor.AbstractDefaultValueUpdateProcessorFactory$DefaultValueUpdateProcessor.processAdd(AbstractDefaultValueUpdateProcessorFactory.java:94)
at
org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
at org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174) at
org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2259) at
org.apache.solr.client.solrj.embedded.EmbeddedSolrServer.request(EmbeddedSolrServer.java:150)
at
org.apache.solr.client.solrj.request.AbstractUpdateRequest.process(AbstractUpdateRequest.java:124)
at org.apache.solr.client.solrj.SolrServer.add(SolrServer.java:68) at
org.apache.solr.client.solrj.SolrServer.add(SolrServer.java:54) at
org.apache.solr.hadoop.BatchWriter.runUpdate(BatchWriter.java:135) at
org.apache.solr.hadoop.BatchWriter$Batch.run(BatchWriter.java:90) at
org.apache.solr.hadoop.BatchWriter.queueBatch(BatchWriter.java:180)


Do you know what is the issue, and i don't have a field "id" on my schema ?



--
Sent from: https://lucene.472066.n3.nabble.com/Solr-User-f472068.html

Mime
View raw message