lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject [Lucene.Net] svn commit: r1143702 - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: src/contrib/Analyzers/ src/contrib/Analyzers/Filters/ test/contrib/Analyzers/ test/contrib/Analyzers/Filters/
Date Thu, 07 Jul 2011 08:06:46 GMT
Author: digy
Date: Thu Jul  7 08:06:45 2011
New Revision: 1143702

URL: http://svn.apache.org/viewvc?rev=1143702&view=rev
Log:
[LUCENENET-430] Contrib.ChainedFilter

Added:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/ChainedFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Filters/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Filters/ChainedFilterTest.cs
Modified:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1143702&r1=1143701&r2=1143702&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
(original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
Thu Jul  7 08:06:45 2011
@@ -60,6 +60,7 @@
     <Compile Include="De\GermanStemFilter.cs" />
     <Compile Include="De\GermanStemmer.cs" />
     <Compile Include="De\WordlistLoader.cs" />
+    <Compile Include="Filters\ChainedFilter.cs" />
     <Compile Include="Fr\FrenchAnalyzer.cs" />
     <Compile Include="Fr\FrenchStemFilter.cs" />
     <Compile Include="Fr\FrenchStemmer.cs" />

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/ChainedFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/ChainedFilter.cs?rev=1143702&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/ChainedFilter.cs
(added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/ChainedFilter.cs
Thu Jul  7 08:06:45 2011
@@ -0,0 +1,275 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+using Lucene.Net.Search;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+
+    ///<summary>
+    ///* <p>
+    /// * Allows multiple {@link Filter}s to be chained.
+    /// * Logical operations such as <b>NOT</b> and <b>XOR</b>
+    /// * are applied between filters. One operation can be used
+    /// * for all filters, or a specific operation can be declared
+    /// * for each filter.
+    /// * </p>
+    /// * <p>
+    /// * Order in which filters are called depends on
+    /// * the position of the filter in the chain. It's probably
+    /// * more efficient to place the most restrictive filters
+    /// * /least computationally-intensive filters first.
+    /// * </p>
+    ///</summary>
+    public class ChainedFilter : Filter
+    {
+        public enum Logic
+        {
+            NONE = -1,
+            OR = 0,
+            AND = 1,
+            ANDNOT = 2,
+            XOR = 3
+        };
+
+        ///<summary>Logical operation when none is declared. Defaults to OR</summary>
+        public const Logic DEFAULT = Logic.OR;
+
+        /** The filter chain */
+        private Filter[] chain = null;
+
+        private Logic[] logicArray;
+
+        private Logic logic = Logic.NONE;
+
+        ///<summary>Ctor</summary><param name="chain">The chain of filters</param>
+        public ChainedFilter(Filter[] chain)
+        {
+            this.chain = chain;
+        }
+
+        ///<summary>ctor</summary>
+        ///<param name="chain">The chain of filters</param>
+        ///<param name="logicArray">Logical operations to apply between filters</param>
+        public ChainedFilter(Filter[] chain, Logic[] logicArray)
+        {
+            this.chain = chain;
+            this.logicArray = logicArray;
+        }
+
+        ///<summary>ctor</summary>
+        ///<param name="chain">The chain of filters</param>
+        ///<param name="logic">Logical operation to apply to ALL filters</param>
+        public ChainedFilter(Filter[] chain, Logic logic)
+        {
+            this.chain = chain;
+            this.logic = logic;
+        }
+
+        ///<see cref="Filter#getDocIdSet"/>
+        public override DocIdSet GetDocIdSet(IndexReader reader)
+        {
+            int[] index = new int[1]; // use array as reference to modifiable int; 
+            index[0] = 0;             // an object attribute would not be thread safe.
+            if (logic != Logic.NONE)
+                return GetDocIdSet(reader, logic, index);
+            else if (logicArray != null)
+                return GetDocIdSet(reader, logicArray, index);
+            else
+                return GetDocIdSet(reader, DEFAULT, index);
+        }
+
+        private DocIdSetIterator GetDISI(Filter filter, IndexReader reader)
+        {
+            DocIdSet docIdSet = filter.GetDocIdSet(reader);
+            if (docIdSet == null)
+            {
+                return DocIdSet.EMPTY_DOCIDSET.Iterator();
+            }
+            else
+            {
+                DocIdSetIterator iter = docIdSet.Iterator();
+                if (iter == null)
+                {
+                    return DocIdSet.EMPTY_DOCIDSET.Iterator();
+                }
+                else
+                {
+                    return iter;
+                }
+            }
+        }
+
+        private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index)
+        {
+            OpenBitSetDISI result;
+            /**
+             * First AND operation takes place against a completely false
+             * bitset and will always return zero results.
+             */
+            if (logic == Logic.AND)
+            {
+                result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
+                ++index[0];
+            }
+            else if (logic == Logic.ANDNOT)
+            {
+                result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
+                result.Flip(0, reader.MaxDoc()); // NOTE: may set bits for deleted docs.
+                ++index[0];
+            }
+            else
+            {
+                result = new OpenBitSetDISI(reader.MaxDoc());
+            }
+            return result;
+        }
+
+
+        ///<summary>
+        ///  * Provide a SortedVIntList when it is definitely
+        ///  * smaller than an OpenBitSet
+        ///  * @deprecated Either use CachingWrapperFilter, or
+        ///  * switch to a different DocIdSet implementation yourself.
+        ///  * This method will be removed in Lucene 4.0 
+        ///</summary>
+        protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
+        {
+            return result;
+        }
+
+
+        /**
+         * Delegates to each filter in the chain.
+         * @param reader IndexReader
+         * @param logic Logical operation
+         * @return DocIdSet
+         */
+        private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index)
+        {
+            OpenBitSetDISI result = InitialResult(reader, logic, index);
+            for (; index[0] < chain.Length; index[0]++)
+            {
+                DoChain(result, logic, chain[index[0]].GetDocIdSet(reader));
+            }
+            return FinalResult(result, reader.MaxDoc());
+        }
+
+        /**
+         * Delegates to each filter in the chain.
+         * @param reader IndexReader
+         * @param logic Logical operation
+         * @return DocIdSet
+         */
+        private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index)
+        {
+            if (logic.Length != chain.Length)
+                throw new ArgumentException("Invalid number of elements in logic array");
+
+            OpenBitSetDISI result = InitialResult(reader, logic[0], index);
+            for (; index[0] < chain.Length; index[0]++)
+            {
+                DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader));
+            }
+            return FinalResult(result, reader.MaxDoc());
+        }
+
+        public override String ToString()
+        {
+            StringBuilder sb = new StringBuilder();
+            sb.Append("ChainedFilter: [");
+            for (int i = 0; i < chain.Length; i++)
+            {
+                sb.Append(chain[i]);
+                sb.Append(' ');
+            }
+            sb.Append(']');
+            return sb.ToString();
+        }
+
+        private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis)
+        {
+
+            if (dis is OpenBitSet)
+            {
+                // optimized case for OpenBitSets
+                switch (logic)
+                {
+                    case Logic.OR:
+                        result.Or((OpenBitSet)dis);
+                        break;
+                    case Logic.AND:
+                        result.And((OpenBitSet)dis);
+                        break;
+                    case Logic.ANDNOT:
+                        result.AndNot((OpenBitSet)dis);
+                        break;
+                    case Logic.XOR:
+                        result.Xor((OpenBitSet)dis);
+                        break;
+                    default:
+                        DoChain(result, DEFAULT, dis);
+                        break;
+                }
+            }
+            else
+            {
+                DocIdSetIterator disi;
+                if (dis == null)
+                {
+                    disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
+                }
+                else
+                {
+                    disi = dis.Iterator();
+                    if (disi == null)
+                    {
+                        disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
+                    }
+                }
+
+                switch (logic)
+                {
+                    case Logic.OR:
+                        result.InPlaceOr(disi);
+                        break;
+                    case Logic.AND:
+                        result.InPlaceAnd(disi);
+                        break;
+                    case Logic.ANDNOT:
+                        result.InPlaceNot(disi);
+                        break;
+                    case Logic.XOR:
+                        result.InPlaceXor(disi);
+                        break;
+                    default:
+                        DoChain(result, DEFAULT, dis);
+                        break;
+                }
+            }
+        }
+
+    }
+
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj?rev=1143702&r1=1143701&r2=1143702&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
(original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
Thu Jul  7 08:06:45 2011
@@ -59,6 +59,7 @@
     <Compile Include="AR\TestArabicAnalyzer.cs" />
     <Compile Include="AR\TestArabicNormalizationFilter.cs" />
     <Compile Include="AR\TestArabicStemFilter.cs" />
+    <Compile Include="Filters\ChainedFilterTest.cs" />
     <Compile Include="NGram\TestEdgeNGramTokenFilter.cs" />
     <Compile Include="NGram\TestEdgeNGramTokenizer.cs" />
     <Compile Include="NGram\TestNGramTokenFilter.cs" />

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Filters/ChainedFilterTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Filters/ChainedFilterTest.cs?rev=1143702&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Filters/ChainedFilterTest.cs
(added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Filters/ChainedFilterTest.cs
Thu Jul  7 08:06:45 2011
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis
+{
+    public class ChainedFilterTest : Lucene.Net.TestCase
+    {
+        public static int MAX = 500;
+
+        private RAMDirectory directory;
+        private IndexSearcher searcher;
+        private Query query;
+        // private DateFilter dateFilter;   DateFilter was deprecated and removed
+        private TermRangeFilter dateFilter;
+        private QueryWrapperFilter bobFilter;
+        private QueryWrapperFilter sueFilter;
+
+        [SetUp]
+        public void SetUp()
+        {
+            directory = new RAMDirectory();
+            IndexWriter writer =
+               new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+
+            DateTime cal = new DateTime(1041397200000L * TimeSpan.TicksPerMillisecond); //
2003 January 01
+
+            for (int i = 0; i < MAX; i++)
+            {
+                Document doc = new Document();
+                doc.Add(new Field("key", "" + (i + 1), Field.Store.YES, Field.Index.NOT_ANALYZED));
+                doc.Add(new Field("owner", (i < MAX / 2) ? "bob" : "sue", Field.Store.YES,
Field.Index.NOT_ANALYZED));
+                doc.Add(new Field("date", (cal.Ticks / TimeSpan.TicksPerMillisecond).ToString(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
+                writer.AddDocument(doc);
+
+                cal.AddMilliseconds(1);
+            }
+
+            writer.Close();
+
+            searcher = new IndexSearcher(directory, true);
+
+            // query for everything to make life easier
+            BooleanQuery bq = new BooleanQuery();
+            bq.Add(new TermQuery(new Term("owner", "bob")), BooleanClause.Occur.SHOULD);
+            bq.Add(new TermQuery(new Term("owner", "sue")), BooleanClause.Occur.SHOULD);
+            query = bq;
+
+            // date filter matches everything too
+            //Date pastTheEnd = parseDate("2099 Jan 1");
+            // dateFilter = DateFilter.Before("date", pastTheEnd);
+            // just treat dates as strings and select the whole range for now...
+            dateFilter = new TermRangeFilter("date", "", "ZZZZ", true, true);
+
+            bobFilter = new QueryWrapperFilter(
+                new TermQuery(new Term("owner", "bob")));
+            sueFilter = new QueryWrapperFilter(
+                new TermQuery(new Term("owner", "sue")));
+        }
+
+        private ChainedFilter GetChainedFilter(Filter[] chain, ChainedFilter.Logic[] logic)
+        {
+            if (logic == null)
+            {
+                return new ChainedFilter(chain);
+            }
+            else
+            {
+                return new ChainedFilter(chain, logic);
+            }
+        }
+
+        private ChainedFilter GetChainedFilter(Filter[] chain, ChainedFilter.Logic logic)
+        {
+            return new ChainedFilter(chain, logic);
+        }
+
+
+        [Test]
+        public void TestSingleFilter()
+        {
+            ChainedFilter chain = GetChainedFilter(new Filter[] { dateFilter }, null);
+
+            int numHits = searcher.Search(query, chain, 1000).TotalHits;
+            Assert.AreEqual(MAX, numHits);
+
+            chain = new ChainedFilter(new Filter[] { bobFilter });
+            numHits = searcher.Search(query, chain, 1000).TotalHits;
+            Assert.AreEqual(MAX / 2, numHits);
+
+            chain = GetChainedFilter(new Filter[] { bobFilter }, new ChainedFilter.Logic[]
{ ChainedFilter.Logic.AND });
+            TopDocs hits = searcher.Search(query, chain, 1000);
+            numHits = hits.TotalHits;
+            Assert.AreEqual(MAX / 2, numHits);
+            Assert.AreEqual("bob", searcher.Doc(hits.ScoreDocs[0].doc).Get("owner"));
+
+            chain = GetChainedFilter(new Filter[] { bobFilter }, new ChainedFilter.Logic[]
{ ChainedFilter.Logic.ANDNOT });
+            hits = searcher.Search(query, chain, 1000);
+            numHits = hits.TotalHits;
+            Assert.AreEqual(MAX / 2, numHits);
+            Assert.AreEqual("sue", searcher.Doc(hits.ScoreDocs[0].doc).Get("owner"));
+        }
+
+        [Test]
+        public void TestOR()
+        {
+            ChainedFilter chain = GetChainedFilter(
+              new Filter[] { sueFilter, bobFilter }, null);
+
+            int numHits = searcher.Search(query, chain, 1000).TotalHits;
+            Assert.AreEqual(MAX, numHits, "OR matches all");
+        }
+
+        [Test]
+        public void TestAND()
+        {
+            ChainedFilter chain = GetChainedFilter(
+              new Filter[] { dateFilter, bobFilter }, ChainedFilter.Logic.AND);
+
+            TopDocs hits = searcher.Search(query, chain, 1000);
+            Assert.AreEqual(MAX / 2, hits.TotalHits, "AND matches just bob");
+            Assert.AreEqual("bob", searcher.Doc(hits.ScoreDocs[0].doc).Get("owner"));
+        }
+
+        [Test]
+        public void TestXOR()
+        {
+            ChainedFilter chain = GetChainedFilter(
+              new Filter[] { dateFilter, bobFilter }, ChainedFilter.Logic.XOR);
+
+            TopDocs hits = searcher.Search(query, chain, 1000);
+            Assert.AreEqual(MAX / 2, hits.TotalHits, "XOR matches sue");
+            Assert.AreEqual("sue", searcher.Doc(hits.ScoreDocs[0].doc).Get("owner"));
+        }
+
+        [Test]
+        public void TestANDNOT()
+        {
+            ChainedFilter chain = GetChainedFilter(
+              new Filter[] { dateFilter, sueFilter },
+                new ChainedFilter.Logic[] { ChainedFilter.Logic.AND, ChainedFilter.Logic.ANDNOT
});
+
+            TopDocs hits = searcher.Search(query, chain, 1000);
+            Assert.AreEqual(MAX / 2, hits.TotalHits, "ANDNOT matches just bob");
+            Assert.AreEqual("bob", searcher.Doc(hits.ScoreDocs[0].doc).Get("owner"));
+
+            chain = GetChainedFilter(
+                new Filter[] { bobFilter, bobFilter },
+                  new ChainedFilter.Logic[] { ChainedFilter.Logic.ANDNOT, ChainedFilter.Logic.ANDNOT
});
+
+            hits = searcher.Search(query, chain, 1000);
+            Assert.AreEqual(MAX / 2, hits.TotalHits, "ANDNOT bob ANDNOT bob matches all sues");
+            Assert.AreEqual("sue", searcher.Doc(hits.ScoreDocs[0].doc).Get("owner"));
+        }
+
+        /*
+        private Date parseDate(String s) throws ParseException {
+          return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s);
+        }
+        */
+
+        [Test]
+        public void TestWithCachingFilter()
+        {
+            Directory dir = new RAMDirectory();
+            Analyzer analyzer = new WhitespaceAnalyzer();
+
+            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+            writer.Close();
+
+            Searcher searcher = new IndexSearcher(dir, true);
+
+            Query query = new TermQuery(new Term("none", "none"));
+
+            QueryWrapperFilter queryFilter = new QueryWrapperFilter(query);
+            CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter);
+
+            searcher.Search(query, cachingFilter, 1);
+
+            CachingWrapperFilter cachingFilter2 = new CachingWrapperFilter(queryFilter);
+            Filter[] chain = new Filter[2];
+            chain[0] = cachingFilter;
+            chain[1] = cachingFilter2;
+            ChainedFilter cf = new ChainedFilter(chain);
+
+            // throws java.lang.ClassCastException: org.apache.lucene.util.OpenBitSet cannot
be cast to java.util.BitSet
+            searcher.Search(new MatchAllDocsQuery(), cf, 1);
+        }
+
+    }
+}
\ No newline at end of file



Mime
View raw message