lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pnas...@apache.org
Subject [Lucene.Net] svn commit: r1230919 [5/22] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: ./ build/scripts/ build/vs2010/contrib/ build/vs2010/core/ build/vs2010/demo/ build/vs2010/test/ src/contrib/Analyzers/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/...
Date Fri, 13 Jan 2012 08:42:38 GMT
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj Fri Jan 13 08:42:34 2012
@@ -1,130 +1,151 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <PropertyGroup>
-    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
-    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
-    <ProductVersion>9.0.21022</ProductVersion>
-    <SchemaVersion>2.0</SchemaVersion>
-    <ProjectGuid>{4286E961-9143-4821-B46D-3D39D3736386}</ProjectGuid>
-    <OutputType>Library</OutputType>
-    <AppDesignerFolder>Properties</AppDesignerFolder>
-    <RootNamespace>Lucene.Net.Analysis</RootNamespace>
-    <AssemblyName>Lucene.Net.Analyzers</AssemblyName>
-    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
-    <FileAlignment>512</FileAlignment>
-    <FileUpgradeFlags>
-    </FileUpgradeFlags>
-    <OldToolsVersion>3.5</OldToolsVersion>
-    <UpgradeBackupLocation />
-    <TargetFrameworkProfile />
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
-    <DebugSymbols>true</DebugSymbols>
-    <DebugType>full</DebugType>
-    <Optimize>false</Optimize>
-    <OutputPath>..\..\..\build\bin\contrib\Analyzers\Debug\</OutputPath>
-    <DefineConstants>DEBUG;TRACE</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-    <NoWarn>618</NoWarn>
-  </PropertyGroup>
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
-    <DebugType>none</DebugType>
-    <Optimize>true</Optimize>
-    <OutputPath>..\..\..\build\bin\contrib\Analyzers\Release\</OutputPath>
-    <DefineConstants>TRACE</DefineConstants>
-    <ErrorReport>prompt</ErrorReport>
-    <WarningLevel>4</WarningLevel>
-    <DocumentationFile>..\..\..\build\bin\contrib\Analyzers\Release\Lucene.Net.Analyzers.XML</DocumentationFile>
-    <NoWarn>618</NoWarn>
-  </PropertyGroup>
-  <PropertyGroup>
-    <SignAssembly>true</SignAssembly>
-  </PropertyGroup>
-  <PropertyGroup>
-    <AssemblyOriginatorKeyFile>Lucene.Net.snk</AssemblyOriginatorKeyFile>
-  </PropertyGroup>
-  <ItemGroup>
-    <Reference Include="System" />
-  </ItemGroup>
-  <ItemGroup>
-    <Compile Include="AR\ArabicAnalyzer.cs" />
-    <Compile Include="AR\ArabicLetterTokenizer.cs" />
-    <Compile Include="AR\ArabicNormalizationFilter.cs" />
-    <Compile Include="AR\ArabicNormalizer.cs" />
-    <Compile Include="AR\ArabicStemFilter.cs" />
-    <Compile Include="AR\ArabicStemmer.cs" />
-    <Compile Include="BR\BrazilianAnalyzer.cs" />
-    <Compile Include="BR\BrazilianStemFilter.cs" />
-    <Compile Include="BR\BrazilianStemmer.cs" />
-    <Compile Include="CJK\CJKAnalyzer.cs" />
-    <Compile Include="CJK\CJKTokenizer.cs" />
-    <Compile Include="Cn\ChineseAnalyzer.cs" />
-    <Compile Include="Cn\ChineseFilter.cs" />
-    <Compile Include="Cn\ChineseTokenizer.cs" />
-    <Compile Include="Cz\CzechAnalyzer.cs" />
-    <Compile Include="De\GermanAnalyzer.cs" />
-    <Compile Include="De\GermanStemFilter.cs" />
-    <Compile Include="De\GermanStemmer.cs" />
-    <Compile Include="De\WordlistLoader.cs" />
-    <Compile Include="Miscellaneous\ChainedFilter.cs" />
-    <Compile Include="Fr\FrenchAnalyzer.cs" />
-    <Compile Include="Fr\FrenchStemFilter.cs" />
-    <Compile Include="Fr\FrenchStemmer.cs" />
-    <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
-    <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
-    <Compile Include="Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
-    <Compile Include="Miscellaneous\PrefixAwareTokenStream.cs" />
-    <Compile Include="Miscellaneous\SingleTokenTokenStream.cs" />
-    <Compile Include="NGram\EdgeNGramTokenFilter.cs" />
-    <Compile Include="NGram\EdgeNGramTokenizer.cs" />
-    <Compile Include="NGram\NGramTokenFilter.cs" />
-    <Compile Include="NGram\NGramTokenizer.cs" />
-    <Compile Include="Nl\DutchAnalyzer.cs" />
-    <Compile Include="Nl\DutchStemFilter.cs" />
-    <Compile Include="Nl\DutchStemmer.cs" />
-    <Compile Include="Nl\WordlistLoader.cs" />
-    <Compile Include="Payloads\PayloadHelper.cs" />
-    <Compile Include="Ru\RussianAnalyzer.cs" />
-    <Compile Include="Ru\RussianCharsets.cs" />
-    <Compile Include="Ru\RussianLetterTokenizer.cs" />
-    <Compile Include="Ru\RussianLowerCaseFilter.cs" />
-    <Compile Include="Ru\RussianStemFilter.cs" />
-    <Compile Include="Ru\RussianStemmer.cs" />
-    <Compile Include="Properties\AssemblyInfo.cs" />
-    <Compile Include="Shingle\Codec\OneDimensionalNonWeightedTokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Codec\SimpleThreeDimensionalTokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Matrix\Column.cs" />
-    <Compile Include="Shingle\Matrix\Matrix.cs" />
-    <Compile Include="Shingle\Matrix\MatrixPermutationIterator.cs" />
-    <Compile Include="Shingle\Matrix\Row.cs" />
-    <Compile Include="Shingle\ShingleAnalyzerWrapper.cs" />
-    <Compile Include="Shingle\ShingleFilter.cs" />
-    <Compile Include="Shingle\ShingleMatrixFilter.cs" />
-    <Compile Include="Shingle\TokenPositioner.cs" />
-    <Compile Include="WordlistLoader.cs" />
-  </ItemGroup>
-  <ItemGroup>
-    <EmbeddedResource Include="AR\ArabicStopWords.txt" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="..\..\core\Lucene.Net.csproj">
-      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
-      <Name>Lucene.Net</Name>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
-    <None Include="Lucene.Net.snk" />
-  </ItemGroup>
-  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
-  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
-       Other similar extension points exist, see Microsoft.Common.targets.
-  <Target Name="BeforeBuild">
-  </Target>
-  <Target Name="AfterBuild">
-  </Target>
-  -->
-</Project>
\ No newline at end of file
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+
+<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProductVersion>9.0.21022</ProductVersion>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{4286E961-9143-4821-B46D-3D39D3736386}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Analysis</RootNamespace>
+    <AssemblyName>Lucene.Net.Analyzers</AssemblyName>
+    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <FileUpgradeFlags>
+    </FileUpgradeFlags>
+    <OldToolsVersion>3.5</OldToolsVersion>
+    <UpgradeBackupLocation />
+    <TargetFrameworkProfile />
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>..\..\..\build\bin\contrib\Analyzers\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <NoWarn>618</NoWarn>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>none</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>..\..\..\build\bin\contrib\Analyzers\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+    <DocumentationFile>..\..\..\build\bin\contrib\Analyzers\Release\Lucene.Net.Analyzers.XML</DocumentationFile>
+    <NoWarn>618</NoWarn>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SignAssembly>true</SignAssembly>
+  </PropertyGroup>
+  <PropertyGroup>
+    <AssemblyOriginatorKeyFile>Lucene.Net.snk</AssemblyOriginatorKeyFile>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="AR\ArabicAnalyzer.cs" />
+    <Compile Include="AR\ArabicLetterTokenizer.cs" />
+    <Compile Include="AR\ArabicNormalizationFilter.cs" />
+    <Compile Include="AR\ArabicNormalizer.cs" />
+    <Compile Include="AR\ArabicStemFilter.cs" />
+    <Compile Include="AR\ArabicStemmer.cs" />
+    <Compile Include="BR\BrazilianAnalyzer.cs" />
+    <Compile Include="BR\BrazilianStemFilter.cs" />
+    <Compile Include="BR\BrazilianStemmer.cs" />
+    <Compile Include="CJK\CJKAnalyzer.cs" />
+    <Compile Include="CJK\CJKTokenizer.cs" />
+    <Compile Include="Cn\ChineseAnalyzer.cs" />
+    <Compile Include="Cn\ChineseFilter.cs" />
+    <Compile Include="Cn\ChineseTokenizer.cs" />
+    <Compile Include="Cz\CzechAnalyzer.cs" />
+    <Compile Include="De\GermanAnalyzer.cs" />
+    <Compile Include="De\GermanStemFilter.cs" />
+    <Compile Include="De\GermanStemmer.cs" />
+    <Compile Include="De\WordlistLoader.cs" />
+    <Compile Include="Miscellaneous\ChainedFilter.cs" />
+    <Compile Include="Fr\FrenchAnalyzer.cs" />
+    <Compile Include="Fr\FrenchStemFilter.cs" />
+    <Compile Include="Fr\FrenchStemmer.cs" />
+    <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
+    <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
+    <Compile Include="Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
+    <Compile Include="Miscellaneous\PrefixAwareTokenStream.cs" />
+    <Compile Include="Miscellaneous\SingleTokenTokenStream.cs" />
+    <Compile Include="NGram\EdgeNGramTokenFilter.cs" />
+    <Compile Include="NGram\EdgeNGramTokenizer.cs" />
+    <Compile Include="NGram\NGramTokenFilter.cs" />
+    <Compile Include="NGram\NGramTokenizer.cs" />
+    <Compile Include="Nl\DutchAnalyzer.cs" />
+    <Compile Include="Nl\DutchStemFilter.cs" />
+    <Compile Include="Nl\DutchStemmer.cs" />
+    <Compile Include="Nl\WordlistLoader.cs" />
+    <Compile Include="Payloads\PayloadHelper.cs" />
+    <Compile Include="Ru\RussianAnalyzer.cs" />
+    <Compile Include="Ru\RussianCharsets.cs" />
+    <Compile Include="Ru\RussianLetterTokenizer.cs" />
+    <Compile Include="Ru\RussianLowerCaseFilter.cs" />
+    <Compile Include="Ru\RussianStemFilter.cs" />
+    <Compile Include="Ru\RussianStemmer.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Shingle\Codec\OneDimensionalNonWeightedTokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Codec\SimpleThreeDimensionalTokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Matrix\Column.cs" />
+    <Compile Include="Shingle\Matrix\Matrix.cs" />
+    <Compile Include="Shingle\Matrix\MatrixPermutationIterator.cs" />
+    <Compile Include="Shingle\Matrix\Row.cs" />
+    <Compile Include="Shingle\ShingleAnalyzerWrapper.cs" />
+    <Compile Include="Shingle\ShingleFilter.cs" />
+    <Compile Include="Shingle\ShingleMatrixFilter.cs" />
+    <Compile Include="Shingle\TokenPositioner.cs" />
+    <Compile Include="WordlistLoader.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="AR\ArabicStopWords.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\Lucene.Net.csproj">
+      <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Lucene.Net.snk" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs Fri Jan 13 08:42:34 2012
@@ -1,191 +1,212 @@
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-using System.Collections.Generic;
-
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.De;
-using Lucene.Net.Analysis.Standard;
-
-namespace Lucene.Net.Analysis.Cz
-{
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// Analyzer for Czech language. Supports an external list of stopwords (words that
-	/// will not be indexed at all).
-	/// A default set of stopwords is used unless an alternative list is specified, the
-	/// exclusion list is empty by default.
-	/// 
-	/// <author>Lukas Zapletal [lzap@root.cz]</author>
-	/// <version>$Id: CzechAnalyzer.java,v 1.2 2003/01/22 20:54:47 ehatcher Exp $</version>
-	/// </summary>
-	public sealed class CzechAnalyzer : Analyzer 
-	{
-		/// <summary>
-		/// List of typical stopwords.
-		/// </summary>
-		public static String[] STOP_WORDS = 
-				 {
-					 "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
-					 "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
-					 "jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
-					 "mi","n\u00e1m","tom","tomuto","m\u00edt","nic","proto","kterou","byla",
-					 "toho","proto\u017ee","asi","ho","na\u0161i","napi\u0161te","re","co\u017e","t\u00edm",
-					 "tak\u017ee","sv\u00fdch","jej\u00ed","sv\u00fdmi","jste","aj","tu","tedy","teto",
-					 "bylo","kde","ke","prav\u00e9","ji","nad","nejsou","\u010di","pod","t\u00e9ma",
-					 "mezi","p\u0159es","ty","pak","v\u00e1m","ani","kdy\u017e","v\u0161ak","neg","jsem",
-					 "tento","\u010dl\u00e1nku","\u010dl\u00e1nky","aby","jsme","p\u0159ed","pta","jejich",
-					 "byl","je\u0161t\u011b","a\u017e","bez","tak\u00e9","pouze","prvn\u00ed","va\u0161e","kter\u00e1",
-					 "n\u00e1s","nov\u00fd","tipy","pokud","m\u016f\u017ee","strana","jeho","sv\u00e9","jin\u00e9",
-					 "zpr\u00e1vy","nov\u00e9","nen\u00ed","v\u00e1s","jen","podle","zde","u\u017e","b\u00fdt","v\u00edce",
-					 "bude","ji\u017e","ne\u017e","kter\u00fd","by","kter\u00e9","co","nebo","ten","tak",
-					 "m\u00e1","p\u0159i","od","po","jsou","jak","dal\u0161\u00ed","ale","si","se","ve",
-					 "to","jako","za","zp\u011bt","ze","do","pro","je","na","atd","atp",
-					 "jakmile","p\u0159i\u010dem\u017e","j\u00e1","on","ona","ono","oni","ony","my","vy",
-					 "j\u00ed","ji","m\u011b","mne","jemu","tomu","t\u011bm","t\u011bmu","n\u011bmu","n\u011bmu\u017e",
-					 "jeho\u017e","j\u00ed\u017e","jeliko\u017e","je\u017e","jako\u017e","na\u010de\u017e",
-		};
-
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter.
-		/// </summary>
-        private ICollection<string> stoptable = new List<string>();
-
-		/// <summary>
-		/// Builds an analyzer.
-		/// </summary>
-		public CzechAnalyzer() 
-		{
-			stoptable = StopFilter.MakeStopSet( STOP_WORDS );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public CzechAnalyzer( String[] stopwords ) 
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-        public CzechAnalyzer(ICollection<string> stopwords) 
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public CzechAnalyzer( FileInfo stopwords ) 
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Loads stopwords hash from resource stream (file, database...).
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist</param>
-		/// <param name="encoding">Encoding used (win-1250, iso-8859-2, ...}, null for default system encoding</param>
-		public void LoadStopWords( Stream wordfile, String encoding ) 
-		{
-			if ( wordfile == null ) 
-			{
-				stoptable = new List<string>();
-				return;
-			}
-			try 
-			{
-				// clear any previous table (if present)
-				stoptable = new List<string>();
-
-				StreamReader isr;
-				if (encoding == null)
-					isr = new StreamReader(wordfile);
-				else
-					isr = new StreamReader(wordfile, Encoding.GetEncoding(encoding));
-
-				String word;
-				while ( ( word = isr.ReadLine() ) != null ) 
-				{
-					stoptable.Add(word);
-				}
-
-			} 
-			catch ( IOException ) 
-			{
-				stoptable = null;
-			}
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided Reader.
-		/// <returns>
-		/// A TokenStream build from a StandardTokenizer filtered with
-		/// StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter
-		/// </returns>
-		public override TokenStream TokenStream( String fieldName, TextReader reader ) 
-		{
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new LowerCaseFilter( result );
-			result = new StopFilter( result, stoptable );
-			return result;
-		}
-	}
-}
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Text;
+using System.Collections;
+using System.Collections.Generic;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.De;
+using Lucene.Net.Analysis.Standard;
+
+namespace Lucene.Net.Analysis.Cz
+{
+	/* ====================================================================
+	 * The Apache Software License, Version 1.1
+	 *
+	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
+	 * reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without
+	 * modification, are permitted provided that the following conditions
+	 * are met:
+	 *
+	 * 1. Redistributions of source code must retain the above copyright
+	 *    notice, this list of conditions and the following disclaimer.
+	 *
+	 * 2. Redistributions in binary form must reproduce the above copyright
+	 *    notice, this list of conditions and the following disclaimer in
+	 *    the documentation and/or other materials provided with the
+	 *    distribution.
+	 *
+	 * 3. The end-user documentation included with the redistribution,
+	 *    if any, must include the following acknowledgment:
+	 *       "This product includes software developed by the
+	 *        Apache Software Foundation (http://www.apache.org/)."
+	 *    Alternately, this acknowledgment may appear in the software itself,
+	 *    if and wherever such third-party acknowledgments normally appear.
+	 *
+	 * 4. The names "Apache" and "Apache Software Foundation" and
+	 *    "Apache Lucene" must not be used to endorse or promote products
+	 *    derived from this software without prior written permission. For
+	 *    written permission, please contact apache@apache.org.
+	 *
+	 * 5. Products derived from this software may not be called "Apache",
+	 *    "Apache Lucene", nor may "Apache" appear in their name, without
+	 *    prior written permission of the Apache Software Foundation.
+	 *
+	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+	 * SUCH DAMAGE.
+	 * ====================================================================
+	 *
+	 * This software consists of voluntary contributions made by many
+	 * individuals on behalf of the Apache Software Foundation.  For more
+	 * information on the Apache Software Foundation, please see
+	 * <http://www.apache.org/>.
+	 */
+
+	/// <summary>
+	/// Analyzer for Czech language. Supports an external list of stopwords (words that
+	/// will not be indexed at all).
+	/// A default set of stopwords is used unless an alternative list is specified, the
+	/// exclusion list is empty by default.
+	/// 
+	/// <author>Lukas Zapletal [lzap@root.cz]</author>
+	/// <version>$Id: CzechAnalyzer.java,v 1.2 2003/01/22 20:54:47 ehatcher Exp $</version>
+	/// </summary>
+	public sealed class CzechAnalyzer : Analyzer 
+	{
+		/// <summary>
+		/// List of typical stopwords.
+		/// </summary>
+		public static String[] STOP_WORDS = 
+				 {
+					 "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
+					 "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
+					 "jej","zda","pro\u010d","m\u00e1te","tato","kam","tohoto","kdo","kte\u0159\u00ed",
+					 "mi","n\u00e1m","tom","tomuto","m\u00edt","nic","proto","kterou","byla",
+					 "toho","proto\u017ee","asi","ho","na\u0161i","napi\u0161te","re","co\u017e","t\u00edm",
+					 "tak\u017ee","sv\u00fdch","jej\u00ed","sv\u00fdmi","jste","aj","tu","tedy","teto",
+					 "bylo","kde","ke","prav\u00e9","ji","nad","nejsou","\u010di","pod","t\u00e9ma",
+					 "mezi","p\u0159es","ty","pak","v\u00e1m","ani","kdy\u017e","v\u0161ak","neg","jsem",
+					 "tento","\u010dl\u00e1nku","\u010dl\u00e1nky","aby","jsme","p\u0159ed","pta","jejich",
+					 "byl","je\u0161t\u011b","a\u017e","bez","tak\u00e9","pouze","prvn\u00ed","va\u0161e","kter\u00e1",
+					 "n\u00e1s","nov\u00fd","tipy","pokud","m\u016f\u017ee","strana","jeho","sv\u00e9","jin\u00e9",
+					 "zpr\u00e1vy","nov\u00e9","nen\u00ed","v\u00e1s","jen","podle","zde","u\u017e","b\u00fdt","v\u00edce",
+					 "bude","ji\u017e","ne\u017e","kter\u00fd","by","kter\u00e9","co","nebo","ten","tak",
+					 "m\u00e1","p\u0159i","od","po","jsou","jak","dal\u0161\u00ed","ale","si","se","ve",
+					 "to","jako","za","zp\u011bt","ze","do","pro","je","na","atd","atp",
+					 "jakmile","p\u0159i\u010dem\u017e","j\u00e1","on","ona","ono","oni","ony","my","vy",
+					 "j\u00ed","ji","m\u011b","mne","jemu","tomu","t\u011bm","t\u011bmu","n\u011bmu","n\u011bmu\u017e",
+					 "jeho\u017e","j\u00ed\u017e","jeliko\u017e","je\u017e","jako\u017e","na\u010de\u017e",
+		};
+
+		/// <summary>
+		/// Contains the stopwords used with the StopFilter.
+		/// </summary>
+        private ICollection<string> stoptable = new List<string>();
+
+		/// <summary>
+		/// Builds an analyzer.
+		/// </summary>
+		public CzechAnalyzer() 
+		{
+			stoptable = StopFilter.MakeStopSet( STOP_WORDS );
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+		public CzechAnalyzer( String[] stopwords ) 
+		{
+			stoptable = StopFilter.MakeStopSet( stopwords );
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+        public CzechAnalyzer(ICollection<string> stopwords) 
+		{
+			stoptable = stopwords;
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+		public CzechAnalyzer( FileInfo stopwords ) 
+		{
+			stoptable = WordlistLoader.GetWordtable( stopwords );
+		}
+
+		/// <summary>
+		/// Loads stopwords hash from resource stream (file, database...).
+		/// </summary>
+		/// <param name="wordfile">File containing the wordlist</param>
+		/// <param name="encoding">Encoding used (win-1250, iso-8859-2, ...}, null for default system encoding</param>
+		public void LoadStopWords( Stream wordfile, String encoding ) 
+		{
+			if ( wordfile == null ) 
+			{
+				stoptable = new List<string>();
+				return;
+			}
+			try 
+			{
+				// clear any previous table (if present)
+				stoptable = new List<string>();
+
+				StreamReader isr;
+				if (encoding == null)
+					isr = new StreamReader(wordfile);
+				else
+					isr = new StreamReader(wordfile, Encoding.GetEncoding(encoding));
+
+				String word;
+				while ( ( word = isr.ReadLine() ) != null ) 
+				{
+					stoptable.Add(word);
+				}
+
+			} 
+			catch ( IOException ) 
+			{
+				stoptable = null;
+			}
+		}
+
+		/// <summary>
+		/// Creates a TokenStream which tokenizes all the text in the provided Reader.
+		/// <returns>
+		/// A TokenStream build from a StandardTokenizer filtered with
+		/// StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter
+		/// </returns>
+		public override TokenStream TokenStream( String fieldName, TextReader reader ) 
+		{
+			TokenStream result = new StandardTokenizer( reader );
+			result = new StandardFilter( result );
+			result = new LowerCaseFilter( result );
+			result = new StopFilter( result, stoptable );
+			return result;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs Fri Jan 13 08:42:34 2012
@@ -1,126 +1,147 @@
-using System;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-using Lucene.Net.Analysis.Standard;
-using Lucene.Net.Analysis;
-
-namespace Lucene.Net.Analysis.De
-{
-	/// <summary>
-	/// Analyzer for German language. Supports an external list of stopwords (words that
-	/// will not be indexed at all) and an external list of exclusions (word that will
-	/// not be stemmed, but indexed).
-	/// A default set of stopwords is used unless an alternative list is specified, the
-	/// exclusion list is empty by default.
-	/// </summary>
-	public class GermanAnalyzer : Analyzer
-	{
-		/// <summary>
-		/// List of typical german stopwords.
-		/// </summary>
-		private String[] GERMAN_STOP_WORDS = 
-		{
-			"einer", "eine", "eines", "einem", "einen",
-			"der", "die", "das", "dass", "daß",
-			"du", "er", "sie", "es",
-			"was", "wer", "wie", "wir",
-			"und", "oder", "ohne", "mit",
-			"am", "im", "in", "aus", "auf",
-			"ist", "sein", "war", "wird",
-			"ihr", "ihre", "ihres",
-			"als", "für", "von",
-			"dich", "dir", "mich", "mir",
-			"mein", "kein",
-			"durch", "wegen"
-		};
-
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter. 
-		/// </summary>
-        private ICollection<string> stoptable = new List<string>();
-
-		/// <summary>
-		/// Contains words that should be indexed but not stemmed. 
-		/// </summary>
-        private ICollection<string> excltable = new List<string>();
-
-		/// <summary>
-		/// Builds an analyzer. 
-		/// </summary>
-		public GermanAnalyzer()
-		{
-			stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public GermanAnalyzer( String[] stopwords )
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-        public GermanAnalyzer(ICollection<string> stopwords)
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words. 
-		/// </summary>
-		/// <param name="stopwords"></param>
-		public GermanAnalyzer( FileInfo stopwords )
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from an array of Strings. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( String[] exclusionlist )
-		{
-			excltable = StopFilter.MakeStopSet( exclusionlist );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from a Hashtable. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-        public void SetStemExclusionTable(ICollection<string> exclusionlist)
-		{
-			excltable = exclusionlist;
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from the words contained in the given file. 
-		/// </summary>
-		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable(FileInfo exclusionlist)
-		{
-			excltable = WordlistLoader.GetWordtable(exclusionlist);
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
-		/// </summary>
-		/// <param name="fieldName"></param>
-		/// <param name="reader"></param>
-		/// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
-		public override TokenStream TokenStream(String fieldName, TextReader reader)
-		{
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new LowerCaseFilter(result);
-			result = new StopFilter( result, stoptable );
-			result = new GermanStemFilter( result, excltable );
-			return result;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.De
+{
+	/// <summary>
+	/// Analyzer for German language. Supports an external list of stopwords (words that
+	/// will not be indexed at all) and an external list of exclusions (word that will
+	/// not be stemmed, but indexed).
+	/// A default set of stopwords is used unless an alternative list is specified, the
+	/// exclusion list is empty by default.
+	/// </summary>
+	public class GermanAnalyzer : Analyzer
+	{
+		/// <summary>
+		/// List of typical german stopwords.
+		/// </summary>
+		private String[] GERMAN_STOP_WORDS = 
+		{
+			"einer", "eine", "eines", "einem", "einen",
+			"der", "die", "das", "dass", "daß",
+			"du", "er", "sie", "es",
+			"was", "wer", "wie", "wir",
+			"und", "oder", "ohne", "mit",
+			"am", "im", "in", "aus", "auf",
+			"ist", "sein", "war", "wird",
+			"ihr", "ihre", "ihres",
+			"als", "für", "von",
+			"dich", "dir", "mich", "mir",
+			"mein", "kein",
+			"durch", "wegen"
+		};
+
+		/// <summary>
+		/// Contains the stopwords used with the StopFilter. 
+		/// </summary>
+        private ICollection<string> stoptable = new List<string>();
+
+		/// <summary>
+		/// Contains words that should be indexed but not stemmed. 
+		/// </summary>
+        private ICollection<string> excltable = new List<string>();
+
+		/// <summary>
+		/// Builds an analyzer. 
+		/// </summary>
+		public GermanAnalyzer()
+		{
+			stoptable = StopFilter.MakeStopSet( GERMAN_STOP_WORDS );
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words. 
+		/// </summary>
+		/// <param name="stopwords"></param>
+		public GermanAnalyzer( String[] stopwords )
+		{
+			stoptable = StopFilter.MakeStopSet( stopwords );
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words. 
+		/// </summary>
+		/// <param name="stopwords"></param>
+        public GermanAnalyzer(ICollection<string> stopwords)
+		{
+			stoptable = stopwords;
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words. 
+		/// </summary>
+		/// <param name="stopwords"></param>
+		public GermanAnalyzer( FileInfo stopwords )
+		{
+			stoptable = WordlistLoader.GetWordtable( stopwords );
+		}
+
+		/// <summary>
+		/// Builds an exclusionlist from an array of Strings. 
+		/// </summary>
+		/// <param name="exclusionlist"></param>
+		public void SetStemExclusionTable( String[] exclusionlist )
+		{
+			excltable = StopFilter.MakeStopSet( exclusionlist );
+		}
+
+		/// <summary>
+		/// Builds an exclusionlist from a Hashtable. 
+		/// </summary>
+		/// <param name="exclusionlist"></param>
+        public void SetStemExclusionTable(ICollection<string> exclusionlist)
+		{
+			excltable = exclusionlist;
+		}
+
+		/// <summary>
+		/// Builds an exclusionlist from the words contained in the given file. 
+		/// </summary>
+		/// <param name="exclusionlist"></param>
+		public void SetStemExclusionTable(FileInfo exclusionlist)
+		{
+			excltable = WordlistLoader.GetWordtable(exclusionlist);
+		}
+
+		/// <summary>
+		/// Creates a TokenStream which tokenizes all the text in the provided TextReader. 
+		/// </summary>
+		/// <param name="fieldName"></param>
+		/// <param name="reader"></param>
+		/// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns>
+		public override TokenStream TokenStream(String fieldName, TextReader reader)
+		{
+			TokenStream result = new StandardTokenizer( reader );
+			result = new StandardFilter( result );
+			result = new LowerCaseFilter(result);
+			result = new StopFilter( result, stoptable );
+			result = new GermanStemFilter( result, excltable );
+			return result;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs Fri Jan 13 08:42:34 2012
@@ -1,86 +1,107 @@
-using System;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.De
-{
-	/// <summary>
-	/// A filter that stems German words. It supports a table of words that should
-	/// not be stemmed at all. The stemmer used can be changed at runtime after the
-	/// filter object is created (as long as it is a GermanStemmer).
-	/// </summary>
-	public sealed class GermanStemFilter : TokenFilter
-	{
-		/// <summary>
-		/// The actual token in the input stream.
-		/// </summary>
-		private Token token = null;
-		private GermanStemmer stemmer = null;
-        private ICollection<string> exclusions = null;
-    
-		public GermanStemFilter( TokenStream _in ) : base(_in)
-		{
-			stemmer = new GermanStemmer();
-		}
-    
-		/// <summary>
-		/// Builds a GermanStemFilter that uses an exclusiontable. 
-		/// </summary>
-		/// <param name="_in"></param>
-		/// <param name="exclusiontable"></param>
-        public GermanStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
-		{
-			exclusions = exclusiontable;
-		}
-    
-		/// <summary>
-		/// </summary>
-		/// <returns>Returns the next token in the stream, or null at EOS</returns>
-		public override Token Next()
-	
-		{
-			if ( ( token = input.Next() ) == null ) 
-			{
-				return null;
-			}
-				// Check the exclusiontable
-			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
-			{
-				return token;
-			}
-			else 
-			{
-				String s = stemmer.Stem( token.TermText() );
-				// If not stemmed, dont waste the time creating a new token
-				if ( !s.Equals( token.TermText() ) ) 
-				{
-					return new Token( s, token.StartOffset(),
-						token.EndOffset(), token.Type() );
-				}
-				return token;
-			}
-		}
-
-		/// <summary>
-		/// Set a alternative/custom GermanStemmer for this filter. 
-		/// </summary>
-		/// <param name="stemmer"></param>
-		public void SetStemmer( GermanStemmer stemmer )
-		{
-			if ( stemmer != null ) 
-			{
-				this.stemmer = stemmer;
-			}
-		}
-
-		/// <summary>
-		/// Set an alternative exclusion list for this filter. 
-		/// </summary>
-		/// <param name="exclusiontable"></param>
-        public void SetExclusionTable(ICollection<string> exclusiontable)
-		{
-			exclusions = exclusiontable;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.De
+{
+	/// <summary>
+	/// A filter that stems German words. It supports a table of words that should
+	/// not be stemmed at all. The stemmer used can be changed at runtime after the
+	/// filter object is created (as long as it is a GermanStemmer).
+	/// </summary>
+	public sealed class GermanStemFilter : TokenFilter
+	{
+		/// <summary>
+		/// The actual token in the input stream.
+		/// </summary>
+		private Token token = null;
+		private GermanStemmer stemmer = null;
+        private ICollection<string> exclusions = null;
+    
+		public GermanStemFilter( TokenStream _in ) : base(_in)
+		{
+			stemmer = new GermanStemmer();
+		}
+    
+		/// <summary>
+		/// Builds a GermanStemFilter that uses an exclusiontable. 
+		/// </summary>
+		/// <param name="_in"></param>
+		/// <param name="exclusiontable"></param>
+        public GermanStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
+		{
+			exclusions = exclusiontable;
+		}
+    
+		/// <summary>
+		/// </summary>
+		/// <returns>Returns the next token in the stream, or null at EOS</returns>
+		public override Token Next()
+	
+		{
+			if ( ( token = input.Next() ) == null ) 
+			{
+				return null;
+			}
+				// Check the exclusiontable
+			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
+			{
+				return token;
+			}
+			else 
+			{
+				String s = stemmer.Stem( token.TermText() );
+				// If not stemmed, dont waste the time creating a new token
+				if ( !s.Equals( token.TermText() ) ) 
+				{
+					return new Token( s, token.StartOffset(),
+						token.EndOffset(), token.Type() );
+				}
+				return token;
+			}
+		}
+
+		/// <summary>
+		/// Set a alternative/custom GermanStemmer for this filter. 
+		/// </summary>
+		/// <param name="stemmer"></param>
+		public void SetStemmer( GermanStemmer stemmer )
+		{
+			if ( stemmer != null ) 
+			{
+				this.stemmer = stemmer;
+			}
+		}
+
+		/// <summary>
+		/// Set an alternative exclusion list for this filter. 
+		/// </summary>
+		/// <param name="exclusiontable"></param>
+        public void SetExclusionTable(ICollection<string> exclusiontable)
+		{
+			exclusions = exclusiontable;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemmer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemmer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemmer.cs Fri Jan 13 08:42:34 2012
@@ -1,287 +1,308 @@
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-
-namespace Lucene.Net.Analysis.De
-{
-	/// <summary>
-	/// A stemmer for German words. The algorithm is based on the report
-	/// "A Fast and Simple Stemming Algorithm for German Words" by Jörg
-	/// Caumanns (joerg.caumanns@isst.fhg.de).
-	/// </summary>
-	public class GermanStemmer
-	{
-		/// <summary>
-		/// Buffer for the terms while stemming them. 
-		/// </summary>
-		private StringBuilder sb = new StringBuilder();
-
-		/// <summary>
-		/// Amount of characters that are removed with <tt>Substitute()</tt> while stemming.
-		/// </summary>
-		private int substCount = 0;
-
-		/// <summary>
-		/// Stemms the given term to an unique <tt>discriminator</tt>.
-		/// </summary>
-		/// <param name="term">The term that should be stemmed.</param>
-		/// <returns>Discriminator for <tt>term</tt></returns>
-		internal String Stem( String term )
-		{
-			// Use lowercase for medium stemming.
-			term = term.ToLower();
-			if ( !IsStemmable( term ) )
-				return term;
-			// Reset the StringBuilder.
-			sb.Remove(0, sb.Length);
-			sb.Insert(0, term);
-			// Stemming starts here...
-			Substitute( sb );
-			Strip( sb );
-			Optimize( sb );
-			Resubstitute( sb );
-			RemoveParticleDenotion( sb );
-			return sb.ToString();
-		}
-
-		/// <summary>
-		/// Checks if a term could be stemmed.
-		/// </summary>
-		/// <param name="term"></param>
-		/// <returns>true if, and only if, the given term consists in letters.</returns>
-		private bool IsStemmable( String term )
-		{
-			for ( int c = 0; c < term.Length; c++ ) 
-			{
-				if ( !Char.IsLetter(term[c])) return false;
-			}
-			return true;
-		}
-
-		/// <summary>
-		/// Suffix stripping (stemming) on the current term. The stripping is reduced
-		/// to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
-		/// from which all regular suffixes are build of. The simplification causes
-		/// some overstemming, and way more irregular stems, but still provides unique.
-		/// discriminators in the most of those cases.
-		/// The algorithm is context free, except of the length restrictions.
-		/// </summary>
-		/// <param name="buffer"></param>
-		private void Strip( StringBuilder buffer )
-		{
-			bool doMore = true;
-			while ( doMore && buffer.Length > 3 ) 
-			{
-				if ( ( buffer.Length + substCount > 5 ) &&
-					buffer.ToString().Substring(buffer.Length - 2, 2).Equals( "nd" ) )
-				{
-					buffer.Remove( buffer.Length - 2, 2 );
-				}
-				else if ( ( buffer.Length + substCount > 4 ) &&
-					buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "em" ) ) 
-				{
-					buffer.Remove( buffer.Length - 2, 2 );
-				}
-				else if ( ( buffer.Length + substCount > 4 ) &&
-					buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "er" ) ) 
-				{
-					buffer.Remove( buffer.Length - 2, 2 );
-				}
-				else if ( buffer[buffer.Length - 1] == 'e' ) 
-				{
-					buffer.Remove(buffer.Length - 1, 1);
-				}
-				else if ( buffer[buffer.Length - 1] == 's' ) 
-				{
-					buffer.Remove(buffer.Length - 1, 1);
-				}
-				else if ( buffer[buffer.Length - 1] == 'n' ) 
-				{
-					buffer.Remove(buffer.Length - 1, 1);
-				}
-					// "t" occurs only as suffix of verbs.
-				else if ( buffer[buffer.Length - 1] == 't') 
-				{
-					buffer.Remove(buffer.Length - 1, 1);
-				}
-				else 
-				{
-					doMore = false;
-				}
-			}
-		}
-
-		/// <summary>
-		/// Does some optimizations on the term. This optimisations are contextual.
-		/// </summary>
-		/// <param name="buffer"></param>
-		private void Optimize( StringBuilder buffer )
-		{
-			// Additional step for female plurals of professions and inhabitants.
-			if ( buffer.Length > 5 && buffer.ToString().Substring(buffer.Length - 5, 5).Equals( "erin*" )) 
-			{
-				buffer.Remove(buffer.Length - 1, 1);
-				Strip(buffer);
-			}
-			// Additional step for irregular plural nouns like "Matrizen -> Matrix".
-			if ( buffer[buffer.Length - 1] == ('z') ) 
-			{
-				buffer[buffer.Length - 1] = 'x';
-			}
-		}
-
-		/// <summary>
-		/// Removes a particle denotion ("ge") from a term.
-		/// </summary>
-		/// <param name="buffer"></param>
-		private void RemoveParticleDenotion( StringBuilder buffer )
-		{
-			if ( buffer.Length > 4 ) 
-			{
-				for ( int c = 0; c < buffer.Length - 3; c++ ) 
-				{
-					if ( buffer.ToString().Substring( c, 4 ).Equals( "gege" ) ) 
-					{
-						buffer.Remove(c, 2);
-						return;
-					}
-				}
-			}
-		}
-
-		/// <summary>
-		/// Do some substitutions for the term to reduce overstemming:
-		///
-		/// - Substitute Umlauts with their corresponding vowel: äöü -> aou,
-		///   "ß" is substituted by "ss"
-		/// - Substitute a second char of a pair of equal characters with
-		/// an asterisk: ?? -> ?*
-		/// - Substitute some common character combinations with a token:
-		///   sch/ch/ei/ie/ig/st -> $/В§/%/&/#/!
-		/// </summary>
-		private void Substitute( StringBuilder buffer )
-		{
-			substCount = 0;
-			for ( int c = 0; c < buffer.Length; c++ ) 
-			{
-				// Replace the second char of a pair of the equal characters with an asterisk
-				if ( c > 0 && buffer[c] == buffer[c - 1]) 
-				{
-					buffer[c] = '*';
-				}
-					// Substitute Umlauts.
-				else if ( buffer[c] == 'ä' ) 
-				{
-					buffer[c] = 'a';
-				}
-				else if ( buffer[c] == 'ö' ) 
-				{
-					buffer[c] = 'o';
-				}
-				else if ( buffer[c] == 'ü' ) 
-				{
-					buffer[c] = 'u';
-				}
-				// Fix bug so that 'ß' at the end of a word is replaced.
-				else if ( buffer[c] == 'ß' ) 
-				{
-				
-					buffer[c] = 's';
-					buffer.Insert(c + 1, 's');
-					substCount++;
-				}
-				// Take care that at least one character is left left side from the current one
-				if ( c < buffer.Length - 1 ) 
-				{
-					// Masking several common character combinations with an token
-					if ( ( c < buffer.Length - 2 ) && buffer[c] == 's' &&
-						buffer[c + 1] == 'c' && buffer[c + 2] == 'h' )
-					{
-						buffer[c] = '$';
-						buffer.Remove(c + 1, 2);
-						substCount =+ 2;
-					}
-					else if ( buffer[c] == 'c' && buffer[c + 1] == 'h' ) 
-					{
-						buffer[c] = '§';
-						buffer.Remove(c + 1, 1);
-						substCount++;
-					}
-					else if ( buffer[c] == 'e' && buffer[c + 1] == 'i' ) 
-					{
-						buffer[c] = '%';
-						buffer.Remove(c + 1, 1);
-						substCount++;
-					}
-					else if ( buffer[c] == 'i' && buffer[c + 1] == 'e' ) 
-					{
-						buffer[c] = '&';
-						buffer.Remove(c + 1, 1);
-						substCount++;
-					}
-					else if ( buffer[c] == 'i' && buffer[c + 1] == 'g' ) 
-					{
-						buffer[c] = '#';
-						buffer.Remove(c + 1, 1);
-						substCount++;
-					}
-					else if ( buffer[c] == 's' && buffer[c + 1] == 't' ) 
-					{
-						buffer[c] = '!';
-						buffer.Remove(c + 1, 1);
-						substCount++;
-					}
-				}
-			}
-		}
-
-		/// <summary>
-		/// Undoes the changes made by Substitute(). That are character pairs and
-		/// character combinations. Umlauts will remain as their corresponding vowel,
-		/// as "?" remains as "ss".
-		/// </summary>
-		/// <param name="buffer"></param>
-		private void Resubstitute( StringBuilder buffer )
-		{
-			for ( int c = 0; c < buffer.Length; c++ ) 
-			{
-				if ( buffer[c] == '*' ) 
-				{
-					char x = buffer[c - 1];
-					buffer[c] = x;
-				}
-				else if ( buffer[c] == '$' ) 
-				{
-					buffer[c] = 's';
-					buffer.Insert( c + 1, new char[]{'c', 'h'}, 0, 2);
-				}
-				else if ( buffer[c] == '§' ) 
-				{
-					buffer[c] = 'c';
-					buffer.Insert( c + 1, 'h' );
-				}
-				else if ( buffer[c] == '%' ) 
-				{
-					buffer[c] = 'e';
-					buffer.Insert( c + 1, 'i' );
-				}
-				else if ( buffer[c] == '&' ) 
-				{
-					buffer[c] = 'i';
-					buffer.Insert( c + 1, 'e' );
-				}
-				else if ( buffer[c] == '#' ) 
-				{
-					buffer[c] = 'i';
-					buffer.Insert( c + 1, 'g' );
-				}
-				else if ( buffer[c] == '!' ) 
-				{
-					buffer[c] = 's';
-					buffer.Insert( c + 1, 't' );
-				}
-			}
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Text;
+using System.Collections;
+
+namespace Lucene.Net.Analysis.De
+{
+	/// <summary>
+	/// A stemmer for German words. The algorithm is based on the report
+	/// "A Fast and Simple Stemming Algorithm for German Words" by Jörg
+	/// Caumanns (joerg.caumanns@isst.fhg.de).
+	/// </summary>
+	public class GermanStemmer
+	{
+		/// <summary>
+		/// Buffer for the terms while stemming them. 
+		/// </summary>
+		private StringBuilder sb = new StringBuilder();
+
+		/// <summary>
+		/// Amount of characters that are removed with <tt>Substitute()</tt> while stemming.
+		/// </summary>
+		private int substCount = 0;
+
+		/// <summary>
+		/// Stemms the given term to an unique <tt>discriminator</tt>.
+		/// </summary>
+		/// <param name="term">The term that should be stemmed.</param>
+		/// <returns>Discriminator for <tt>term</tt></returns>
+		internal String Stem( String term )
+		{
+			// Use lowercase for medium stemming.
+			term = term.ToLower();
+			if ( !IsStemmable( term ) )
+				return term;
+			// Reset the StringBuilder.
+			sb.Remove(0, sb.Length);
+			sb.Insert(0, term);
+			// Stemming starts here...
+			Substitute( sb );
+			Strip( sb );
+			Optimize( sb );
+			Resubstitute( sb );
+			RemoveParticleDenotion( sb );
+			return sb.ToString();
+		}
+
+		/// <summary>
+		/// Checks if a term could be stemmed.
+		/// </summary>
+		/// <param name="term"></param>
+		/// <returns>true if, and only if, the given term consists in letters.</returns>
+		private bool IsStemmable( String term )
+		{
+			for ( int c = 0; c < term.Length; c++ ) 
+			{
+				if ( !Char.IsLetter(term[c])) return false;
+			}
+			return true;
+		}
+
+		/// <summary>
+		/// Suffix stripping (stemming) on the current term. The stripping is reduced
+		/// to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
+		/// from which all regular suffixes are build of. The simplification causes
+		/// some overstemming, and way more irregular stems, but still provides unique.
+		/// discriminators in the most of those cases.
+		/// The algorithm is context free, except of the length restrictions.
+		/// </summary>
+		/// <param name="buffer"></param>
+		private void Strip( StringBuilder buffer )
+		{
+			bool doMore = true;
+			while ( doMore && buffer.Length > 3 ) 
+			{
+				if ( ( buffer.Length + substCount > 5 ) &&
+					buffer.ToString().Substring(buffer.Length - 2, 2).Equals( "nd" ) )
+				{
+					buffer.Remove( buffer.Length - 2, 2 );
+				}
+				else if ( ( buffer.Length + substCount > 4 ) &&
+					buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "em" ) ) 
+				{
+					buffer.Remove( buffer.Length - 2, 2 );
+				}
+				else if ( ( buffer.Length + substCount > 4 ) &&
+					buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "er" ) ) 
+				{
+					buffer.Remove( buffer.Length - 2, 2 );
+				}
+				else if ( buffer[buffer.Length - 1] == 'e' ) 
+				{
+					buffer.Remove(buffer.Length - 1, 1);
+				}
+				else if ( buffer[buffer.Length - 1] == 's' ) 
+				{
+					buffer.Remove(buffer.Length - 1, 1);
+				}
+				else if ( buffer[buffer.Length - 1] == 'n' ) 
+				{
+					buffer.Remove(buffer.Length - 1, 1);
+				}
+					// "t" occurs only as suffix of verbs.
+				else if ( buffer[buffer.Length - 1] == 't') 
+				{
+					buffer.Remove(buffer.Length - 1, 1);
+				}
+				else 
+				{
+					doMore = false;
+				}
+			}
+		}
+
+		/// <summary>
+		/// Does some optimizations on the term. This optimisations are contextual.
+		/// </summary>
+		/// <param name="buffer"></param>
+		private void Optimize( StringBuilder buffer )
+		{
+			// Additional step for female plurals of professions and inhabitants.
+			if ( buffer.Length > 5 && buffer.ToString().Substring(buffer.Length - 5, 5).Equals( "erin*" )) 
+			{
+				buffer.Remove(buffer.Length - 1, 1);
+				Strip(buffer);
+			}
+			// Additional step for irregular plural nouns like "Matrizen -> Matrix".
+			if ( buffer[buffer.Length - 1] == ('z') ) 
+			{
+				buffer[buffer.Length - 1] = 'x';
+			}
+		}
+
+		/// <summary>
+		/// Removes a particle denotion ("ge") from a term.
+		/// </summary>
+		/// <param name="buffer"></param>
+		private void RemoveParticleDenotion( StringBuilder buffer )
+		{
+			if ( buffer.Length > 4 ) 
+			{
+				for ( int c = 0; c < buffer.Length - 3; c++ ) 
+				{
+					if ( buffer.ToString().Substring( c, 4 ).Equals( "gege" ) ) 
+					{
+						buffer.Remove(c, 2);
+						return;
+					}
+				}
+			}
+		}
+
+		/// <summary>
+		/// Do some substitutions for the term to reduce overstemming:
+		///
+		/// - Substitute Umlauts with their corresponding vowel: äöü -> aou,
+		///   "ß" is substituted by "ss"
+		/// - Substitute a second char of a pair of equal characters with
+		/// an asterisk: ?? -> ?*
+		/// - Substitute some common character combinations with a token:
+		///   sch/ch/ei/ie/ig/st -> $/В§/%/&/#/!
+		/// </summary>
+		private void Substitute( StringBuilder buffer )
+		{
+			substCount = 0;
+			for ( int c = 0; c < buffer.Length; c++ ) 
+			{
+				// Replace the second char of a pair of the equal characters with an asterisk
+				if ( c > 0 && buffer[c] == buffer[c - 1]) 
+				{
+					buffer[c] = '*';
+				}
+					// Substitute Umlauts.
+				else if ( buffer[c] == 'ä' ) 
+				{
+					buffer[c] = 'a';
+				}
+				else if ( buffer[c] == 'ö' ) 
+				{
+					buffer[c] = 'o';
+				}
+				else if ( buffer[c] == 'ü' ) 
+				{
+					buffer[c] = 'u';
+				}
+				// Fix bug so that 'ß' at the end of a word is replaced.
+				else if ( buffer[c] == 'ß' ) 
+				{
+				
+					buffer[c] = 's';
+					buffer.Insert(c + 1, 's');
+					substCount++;
+				}
+				// Take care that at least one character is left left side from the current one
+				if ( c < buffer.Length - 1 ) 
+				{
+					// Masking several common character combinations with an token
+					if ( ( c < buffer.Length - 2 ) && buffer[c] == 's' &&
+						buffer[c + 1] == 'c' && buffer[c + 2] == 'h' )
+					{
+						buffer[c] = '$';
+						buffer.Remove(c + 1, 2);
+						substCount =+ 2;
+					}
+					else if ( buffer[c] == 'c' && buffer[c + 1] == 'h' ) 
+					{
+						buffer[c] = '§';
+						buffer.Remove(c + 1, 1);
+						substCount++;
+					}
+					else if ( buffer[c] == 'e' && buffer[c + 1] == 'i' ) 
+					{
+						buffer[c] = '%';
+						buffer.Remove(c + 1, 1);
+						substCount++;
+					}
+					else if ( buffer[c] == 'i' && buffer[c + 1] == 'e' ) 
+					{
+						buffer[c] = '&';
+						buffer.Remove(c + 1, 1);
+						substCount++;
+					}
+					else if ( buffer[c] == 'i' && buffer[c + 1] == 'g' ) 
+					{
+						buffer[c] = '#';
+						buffer.Remove(c + 1, 1);
+						substCount++;
+					}
+					else if ( buffer[c] == 's' && buffer[c + 1] == 't' ) 
+					{
+						buffer[c] = '!';
+						buffer.Remove(c + 1, 1);
+						substCount++;
+					}
+				}
+			}
+		}
+
+		/// <summary>
+		/// Undoes the changes made by Substitute(). That are character pairs and
+		/// character combinations. Umlauts will remain as their corresponding vowel,
+		/// as "?" remains as "ss".
+		/// </summary>
+		/// <param name="buffer"></param>
+		private void Resubstitute( StringBuilder buffer )
+		{
+			for ( int c = 0; c < buffer.Length; c++ ) 
+			{
+				if ( buffer[c] == '*' ) 
+				{
+					char x = buffer[c - 1];
+					buffer[c] = x;
+				}
+				else if ( buffer[c] == '$' ) 
+				{
+					buffer[c] = 's';
+					buffer.Insert( c + 1, new char[]{'c', 'h'}, 0, 2);
+				}
+				else if ( buffer[c] == '§' ) 
+				{
+					buffer[c] = 'c';
+					buffer.Insert( c + 1, 'h' );
+				}
+				else if ( buffer[c] == '%' ) 
+				{
+					buffer[c] = 'e';
+					buffer.Insert( c + 1, 'i' );
+				}
+				else if ( buffer[c] == '&' ) 
+				{
+					buffer[c] = 'i';
+					buffer.Insert( c + 1, 'e' );
+				}
+				else if ( buffer[c] == '#' ) 
+				{
+					buffer[c] = 'i';
+					buffer.Insert( c + 1, 'g' );
+				}
+				else if ( buffer[c] == '!' ) 
+				{
+					buffer[c] = 's';
+					buffer.Insert( c + 1, 't' );
+				}
+			}
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs Fri Jan 13 08:42:34 2012
@@ -1,98 +1,119 @@
-using System;
-using System.IO;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.De
-{
-	/// <summary>
-	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
-	/// should contain only one word. If the file is not found or on any error, an
-	/// empty table is returned.
-	/// </summary>
-	public class WordlistLoader
-	{
-		/// <summary>
-		/// </summary>
-		/// <param name="path">Path to the wordlist</param>
-		/// <param name="wordfile">Name of the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(String path, String wordfile) 
-		{
-			if ( path == null || wordfile == null ) 
-			{
-				return new List<string>();
-			}
-			return GetWordtable(new FileInfo(path + "\\" + wordfile));
-		}
-
-		/// <summary>
-		/// </summary>
-		/// <param name="wordfile">Complete path to the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(String wordfile) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new List<string>();
-			}
-			return GetWordtable( new FileInfo( wordfile ) );
-		}
-
-		/// <summary>
-		/// 
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist</param>
-		/// <returns></returns>
-        public static ICollection<string> GetWordtable(FileInfo wordfile) 
-		{
-			if ( wordfile == null ) 
-			{
-				return new List<string>();
-			}
-
-            ICollection<string> result = null;
-			try 
-			{
-				StreamReader lnr = new StreamReader(wordfile.FullName);
-				String word = null;
-				String[] stopwords = new String[100];
-				int wordcount = 0;
-				while ( ( word = lnr.ReadLine() ) != null ) 
-				{
-					wordcount++;
-					if ( wordcount == stopwords.Length ) 
-					{
-						String[] tmp = new String[stopwords.Length + 50];
-						Array.Copy( stopwords, 0, tmp, 0, wordcount );
-						stopwords = tmp;
-					}
-					stopwords[wordcount-1] = word;
-				}
-				result = MakeWordTable( stopwords, wordcount );
-			}
-				// On error, use an empty table
-			catch (IOException) 
-			{
-				result = new List<string>();
-			}
-			return result;
-		}
-
-		/// <summary>
-		/// Builds the wordlist table.
-		/// </summary>
-		/// <param name="words">Word that where read</param>
-		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
-		/// <returns></returns>
-        private static ICollection<string> MakeWordTable(String[] words, int length) 
-		{
-			List<string> table = new List<string>( length );
-			for ( int i = 0; i < length; i++ ) 
-			{
-				table.Add(words[i]);
-			}
-			return table;
-		}
-	}
-}
\ No newline at end of file
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.De
+{
+	/// <summary>
+	/// Loads a text file and adds every line as an entry to a Hashtable. Every line
+	/// should contain only one word. If the file is not found or on any error, an
+	/// empty table is returned.
+	/// </summary>
+	public class WordlistLoader
+	{
+		/// <summary>
+		/// </summary>
+		/// <param name="path">Path to the wordlist</param>
+		/// <param name="wordfile">Name of the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(String path, String wordfile) 
+		{
+			if ( path == null || wordfile == null ) 
+			{
+				return new List<string>();
+			}
+			return GetWordtable(new FileInfo(path + "\\" + wordfile));
+		}
+
+		/// <summary>
+		/// </summary>
+		/// <param name="wordfile">Complete path to the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(String wordfile) 
+		{
+			if ( wordfile == null ) 
+			{
+				return new List<string>();
+			}
+			return GetWordtable( new FileInfo( wordfile ) );
+		}
+
+		/// <summary>
+		/// 
+		/// </summary>
+		/// <param name="wordfile">File containing the wordlist</param>
+		/// <returns></returns>
+        public static ICollection<string> GetWordtable(FileInfo wordfile) 
+		{
+			if ( wordfile == null ) 
+			{
+				return new List<string>();
+			}
+
+            ICollection<string> result = null;
+			try 
+			{
+				StreamReader lnr = new StreamReader(wordfile.FullName);
+				String word = null;
+				String[] stopwords = new String[100];
+				int wordcount = 0;
+				while ( ( word = lnr.ReadLine() ) != null ) 
+				{
+					wordcount++;
+					if ( wordcount == stopwords.Length ) 
+					{
+						String[] tmp = new String[stopwords.Length + 50];
+						Array.Copy( stopwords, 0, tmp, 0, wordcount );
+						stopwords = tmp;
+					}
+					stopwords[wordcount-1] = word;
+				}
+				result = MakeWordTable( stopwords, wordcount );
+			}
+				// On error, use an empty table
+			catch (IOException) 
+			{
+				result = new List<string>();
+			}
+			return result;
+		}
+
+		/// <summary>
+		/// Builds the wordlist table.
+		/// </summary>
+		/// <param name="words">Word that where read</param>
+		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
+		/// <returns></returns>
+        private static ICollection<string> MakeWordTable(String[] words, int length) 
+		{
+			List<string> table = new List<string>( length );
+			for ( int i = 0; i < length; i++ ) 
+			{
+				table.Add(words[i]);
+			}
+			return table;
+		}
+	}
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs?rev=1230919&r1=1230918&r2=1230919&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs Fri Jan 13 08:42:34 2012
@@ -1,198 +1,219 @@
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-using System.Collections.Generic;
-
-using Lucene.Net.Analysis;
-using Lucene.Net.Analysis.De;
-using Lucene.Net.Analysis.Standard;
-
-namespace Lucene.Net.Analysis.Fr
-{
-	/* ====================================================================
-	 * The Apache Software License, Version 1.1
-	 *
-	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
-	 * reserved.
-	 *
-	 * Redistribution and use in source and binary forms, with or without
-	 * modification, are permitted provided that the following conditions
-	 * are met:
-	 *
-	 * 1. Redistributions of source code must retain the above copyright
-	 *    notice, this list of conditions and the following disclaimer.
-	 *
-	 * 2. Redistributions in binary form must reproduce the above copyright
-	 *    notice, this list of conditions and the following disclaimer in
-	 *    the documentation and/or other materials provided with the
-	 *    distribution.
-	 *
-	 * 3. The end-user documentation included with the redistribution,
-	 *    if any, must include the following acknowledgment:
-	 *       "This product includes software developed by the
-	 *        Apache Software Foundation (http://www.apache.org/)."
-	 *    Alternately, this acknowledgment may appear in the software itself,
-	 *    if and wherever such third-party acknowledgments normally appear.
-	 *
-	 * 4. The names "Apache" and "Apache Software Foundation" and
-	 *    "Apache Lucene" must not be used to endorse or promote products
-	 *    derived from this software without prior written permission. For
-	 *    written permission, please contact apache@apache.org.
-	 *
-	 * 5. Products derived from this software may not be called "Apache",
-	 *    "Apache Lucene", nor may "Apache" appear in their name, without
-	 *    prior written permission of the Apache Software Foundation.
-	 *
-	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
-	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
-	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-	 * SUCH DAMAGE.
-	 * ====================================================================
-	 *
-	 * This software consists of voluntary contributions made by many
-	 * individuals on behalf of the Apache Software Foundation.  For more
-	 * information on the Apache Software Foundation, please see
-	 * <http://www.apache.org/>.
-	 */
-
-	/// <summary>
-	/// Analyzer for french language. Supports an external list of stopwords (words that
-	/// will not be indexed at all) and an external list of exclusions (word that will
-	/// not be stemmed, but indexed).
-	/// A default set of stopwords is used unless an other list is specified, the
-	/// exclusionlist is empty by default.
-	/// 
-	/// <author>Patrick Talbot (based on Gerhard Schwarz work for German)</author>
-	/// <version>$Id: FrenchAnalyzer.java,v 1.9 2004/10/17 11:41:40 dnaber Exp $</version>
-	/// </summary>
-	public sealed class FrenchAnalyzer : Analyzer 
-	{
-
-		/// <summary>
-		/// Extended list of typical french stopwords.
-		/// </summary>
-		public static String[] FRENCH_STOP_WORDS = 
-				 {
-					 "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
-					 "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
-					 "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
-					 "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
-					 "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
-					 "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
-					 "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
-					 "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
-					 "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
-					 "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
-					 "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui", "là",
-					 "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi",
-					 "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
-					 "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
-					 "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
-					 "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
-					 "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
-					 "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
-					 "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
-					 "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers",
-					 "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès",
-					 "été", "être", "ô"
-				 };
-
-		/// <summary>
-		/// Contains the stopwords used with the StopFilter.
-		/// </summary>
-        private ICollection<string> stoptable = new List<string>();
-
-		/// <summary>
-		/// Contains words that should be indexed but not stemmed.
-		/// </summary>
-        private ICollection<string> excltable = new List<string>();
-
-		/// <summary>
-		/// Builds an analyzer.
-		/// </summary>
-		public FrenchAnalyzer() 
-		{
-			stoptable = StopFilter.MakeStopSet( FRENCH_STOP_WORDS );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public FrenchAnalyzer( String[] stopwords ) 
-		{
-			stoptable = StopFilter.MakeStopSet( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-        public FrenchAnalyzer(ICollection<string> stopwords) 
-		{
-			stoptable = stopwords;
-		}
-
-		/// <summary>
-		/// Builds an analyzer with the given stop words.
-		/// </summary>
-		public FrenchAnalyzer( FileInfo stopwords ) 
-		{
-			stoptable = WordlistLoader.GetWordtable( stopwords );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from an array of Strings.
-		/// </summary>
-		public void SetStemExclusionTable( String[] exclusionlist ) 
-		{
-			excltable = StopFilter.MakeStopSet( exclusionlist );
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from a Hashtable.
-		/// </summary>
-        public void SetStemExclusionTable(ICollection<string> exclusionlist) 
-		{
-			excltable = exclusionlist;
-		}
-
-		/// <summary>
-		/// Builds an exclusionlist from the words contained in the given file.
-		/// </summary>
-		public void SetStemExclusionTable( FileInfo exclusionlist ) 
-		{
-			excltable = WordlistLoader.GetWordtable( exclusionlist );
-		}
-
-		/// <summary>
-		/// Creates a TokenStream which tokenizes all the text in the provided Reader.
-		/// </summary>
-		/// <returns>
-		/// A TokenStream build from a StandardTokenizer filtered with
-		/// 	StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
-		/// </returns>
-		public override TokenStream TokenStream( String fieldName, TextReader reader ) 
-		{
-		
-			if (fieldName==null) throw new ArgumentException("fieldName must not be null");
-			if (reader==null) throw new ArgumentException("readermust not be null");
-				
-			TokenStream result = new StandardTokenizer( reader );
-			result = new StandardFilter( result );
-			result = new StopFilter( result, stoptable );
-			result = new FrenchStemFilter( result, excltable );
-			// Convert to lowercase after stemming!
-			result = new LowerCaseFilter( result );
-			return result;
-		}
-	}
-
-}
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using System.Text;
+using System.Collections;
+using System.Collections.Generic;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.De;
+using Lucene.Net.Analysis.Standard;
+
+namespace Lucene.Net.Analysis.Fr
+{
+	/* ====================================================================
+	 * The Apache Software License, Version 1.1
+	 *
+	 * Copyright (c) 2004 The Apache Software Foundation.  All rights
+	 * reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without
+	 * modification, are permitted provided that the following conditions
+	 * are met:
+	 *
+	 * 1. Redistributions of source code must retain the above copyright
+	 *    notice, this list of conditions and the following disclaimer.
+	 *
+	 * 2. Redistributions in binary form must reproduce the above copyright
+	 *    notice, this list of conditions and the following disclaimer in
+	 *    the documentation and/or other materials provided with the
+	 *    distribution.
+	 *
+	 * 3. The end-user documentation included with the redistribution,
+	 *    if any, must include the following acknowledgment:
+	 *       "This product includes software developed by the
+	 *        Apache Software Foundation (http://www.apache.org/)."
+	 *    Alternately, this acknowledgment may appear in the software itself,
+	 *    if and wherever such third-party acknowledgments normally appear.
+	 *
+	 * 4. The names "Apache" and "Apache Software Foundation" and
+	 *    "Apache Lucene" must not be used to endorse or promote products
+	 *    derived from this software without prior written permission. For
+	 *    written permission, please contact apache@apache.org.
+	 *
+	 * 5. Products derived from this software may not be called "Apache",
+	 *    "Apache Lucene", nor may "Apache" appear in their name, without
+	 *    prior written permission of the Apache Software Foundation.
+	 *
+	 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+	 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+	 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+	 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+	 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+	 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+	 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+	 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+	 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+	 * SUCH DAMAGE.
+	 * ====================================================================
+	 *
+	 * This software consists of voluntary contributions made by many
+	 * individuals on behalf of the Apache Software Foundation.  For more
+	 * information on the Apache Software Foundation, please see
+	 * <http://www.apache.org/>.
+	 */
+
+	/// <summary>
+	/// Analyzer for french language. Supports an external list of stopwords (words that
+	/// will not be indexed at all) and an external list of exclusions (word that will
+	/// not be stemmed, but indexed).
+	/// A default set of stopwords is used unless an other list is specified, the
+	/// exclusionlist is empty by default.
+	/// 
+	/// <author>Patrick Talbot (based on Gerhard Schwarz work for German)</author>
+	/// <version>$Id: FrenchAnalyzer.java,v 1.9 2004/10/17 11:41:40 dnaber Exp $</version>
+	/// </summary>
+	public sealed class FrenchAnalyzer : Analyzer 
+	{
+
+		/// <summary>
+		/// Extended list of typical french stopwords.
+		/// </summary>
+		public static String[] FRENCH_STOP_WORDS = 
+				 {
+					 "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
+					 "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
+					 "c", "car", "ce", "ceci", "cela", "celle", "celles", "celui", "cependant", "certain",
+					 "certaine", "certaines", "certains", "ces", "cet", "cette", "ceux", "chez", "ci",
+					 "combien", "comme", "comment", "concernant", "contre", "d", "dans", "de", "debout",
+					 "dedans", "dehors", "delà", "depuis", "derrière", "des", "désormais", "desquelles",
+					 "desquels", "dessous", "dessus", "devant", "devers", "devra", "divers", "diverse",
+					 "diverses", "doit", "donc", "dont", "du", "duquel", "durant", "dès", "elle", "elles",
+					 "en", "entre", "environ", "est", "et", "etc", "etre", "eu", "eux", "excepté", "hormis",
+					 "hors", "hélas", "hui", "il", "ils", "j", "je", "jusqu", "jusque", "l", "la", "laquelle",
+					 "le", "lequel", "les", "lesquelles", "lesquels", "leur", "leurs", "lorsque", "lui", "là",
+					 "ma", "mais", "malgré", "me", "merci", "mes", "mien", "mienne", "miennes", "miens", "moi",
+					 "moins", "mon", "moyennant", "même", "mêmes", "n", "ne", "ni", "non", "nos", "notre",
+					 "nous", "néanmoins", "nôtre", "nôtres", "on", "ont", "ou", "outre", "où", "par", "parmi",
+					 "partant", "pas", "passé", "pendant", "plein", "plus", "plusieurs", "pour", "pourquoi",
+					 "proche", "près", "puisque", "qu", "quand", "que", "quel", "quelle", "quelles", "quels",
+					 "qui", "quoi", "quoique", "revoici", "revoilà", "s", "sa", "sans", "sauf", "se", "selon",
+					 "seront", "ses", "si", "sien", "sienne", "siennes", "siens", "sinon", "soi", "soit",
+					 "son", "sont", "sous", "suivant", "sur", "ta", "te", "tes", "tien", "tienne", "tiennes",
+					 "tiens", "toi", "ton", "tous", "tout", "toute", "toutes", "tu", "un", "une", "va", "vers",
+					 "voici", "voilà", "vos", "votre", "vous", "vu", "vôtre", "vôtres", "y", "à", "ça", "ès",
+					 "été", "être", "ô"
+				 };
+
+		/// <summary>
+		/// Contains the stopwords used with the StopFilter.
+		/// </summary>
+        private ICollection<string> stoptable = new List<string>();
+
+		/// <summary>
+		/// Contains words that should be indexed but not stemmed.
+		/// </summary>
+        private ICollection<string> excltable = new List<string>();
+
+		/// <summary>
+		/// Builds an analyzer.
+		/// </summary>
+		public FrenchAnalyzer() 
+		{
+			stoptable = StopFilter.MakeStopSet( FRENCH_STOP_WORDS );
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+		public FrenchAnalyzer( String[] stopwords ) 
+		{
+			stoptable = StopFilter.MakeStopSet( stopwords );
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+        public FrenchAnalyzer(ICollection<string> stopwords) 
+		{
+			stoptable = stopwords;
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words.
+		/// </summary>
+		public FrenchAnalyzer( FileInfo stopwords ) 
+		{
+			stoptable = WordlistLoader.GetWordtable( stopwords );
+		}
+
+		/// <summary>
+		/// Builds an exclusionlist from an array of Strings.
+		/// </summary>
+		public void SetStemExclusionTable( String[] exclusionlist ) 
+		{
+			excltable = StopFilter.MakeStopSet( exclusionlist );
+		}
+
+		/// <summary>
+		/// Builds an exclusionlist from a Hashtable.
+		/// </summary>
+        public void SetStemExclusionTable(ICollection<string> exclusionlist) 
+		{
+			excltable = exclusionlist;
+		}
+
+		/// <summary>
+		/// Builds an exclusionlist from the words contained in the given file.
+		/// </summary>
+		public void SetStemExclusionTable( FileInfo exclusionlist ) 
+		{
+			excltable = WordlistLoader.GetWordtable( exclusionlist );
+		}
+
+		/// <summary>
+		/// Creates a TokenStream which tokenizes all the text in the provided Reader.
+		/// </summary>
+		/// <returns>
+		/// A TokenStream build from a StandardTokenizer filtered with
+		/// 	StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
+		/// </returns>
+		public override TokenStream TokenStream( String fieldName, TextReader reader ) 
+		{
+		
+			if (fieldName==null) throw new ArgumentException("fieldName must not be null");
+			if (reader==null) throw new ArgumentException("readermust not be null");
+				
+			TokenStream result = new StandardTokenizer( reader );
+			result = new StandardFilter( result );
+			result = new StopFilter( result, stoptable );
+			result = new FrenchStemFilter( result, excltable );
+			// Convert to lowercase after stemming!
+			result = new LowerCaseFilter( result );
+			return result;
+		}
+	}
+
+}



Mime
View raw message