lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ccurr...@apache.org
Subject svn commit: r1310635 [3/8] - in /incubator/lucene.net/trunk: build/vs2010/contrib/ build/vs2010/test/ src/contrib/FastVectorHighlighter/ src/contrib/Highlighter/ src/contrib/Memory/ src/contrib/Memory/Properties/ src/contrib/Queries/ src/contrib/Querie...
Date Fri, 06 Apr 2012 23:37:54 GMT
Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/TokenGroup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/TokenGroup.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/TokenGroup.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/TokenGroup.cs Fri Apr  6 23:37:48 2012
@@ -16,123 +16,137 @@
  */
 
 using System;
-using Token = Lucene.Net.Analysis.Token;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
-	
-	/// <summary> One, or several overlapping tokens, along with the score(s) and the
-	/// scope of the original text
-	/// </summary>
-	/// <author>  MAHarwood
-	/// </author>
-	public class TokenGroup
-	{
-		
-		private const int MAX_NUM_TOKENS_PER_GROUP = 50;
-		internal Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
-		internal float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
-		internal int numTokens = 0;
-		internal int startOffset = 0;
-		internal int endOffset = 0;
-		internal float tot;
-		
-		internal int matchStartOffset, matchEndOffset;
-		
-		
-		internal virtual void  AddToken(Token token, float score)
-		{
-			if (numTokens < MAX_NUM_TOKENS_PER_GROUP)
-			{
-				if (numTokens == 0)
-				{
-					startOffset = matchStartOffset = token.StartOffset;
-					endOffset = matchEndOffset = token.EndOffset;
-					tot += score;
-				}
-				else
-				{
-					startOffset = Math.Min(startOffset, token.StartOffset);
-					endOffset = Math.Max(endOffset, token.EndOffset);
-					if (score > 0)
-					{
-						if (tot == 0)
-						{
-							matchStartOffset = token.StartOffset;
-							matchEndOffset = token.EndOffset;
-						}
-						else
-						{
-							matchStartOffset = Math.Min(matchStartOffset, token.StartOffset);
-							matchEndOffset = Math.Max(matchEndOffset, token.EndOffset);
-						}
-						tot += score;
-					}
-				}
-				tokens[numTokens] = token;
-				scores[numTokens] = score;
-				numTokens++;
-			}
-		}
-		
-		internal virtual bool IsDistinct(Token token)
-		{
-			return token.StartOffset >= endOffset;
-		}
-		
-		
-		internal virtual void  Clear()
-		{
-			numTokens = 0;
-			tot = 0;
-		}
-		
-		/// <summary> </summary>
-		/// <param name="index">a value between 0 and numTokens -1
-		/// </param>
-		/// <returns> the "n"th token
-		/// </returns>
-		public virtual Token GetToken(int index)
-		{
-			return tokens[index];
-		}
-		
-		/// <summary> </summary>
-		/// <param name="index">a value between 0 and numTokens -1
-		/// </param>
-		/// <returns> the "n"th score
-		/// </returns>
-		public virtual float GetScore(int index)
-		{
-			return scores[index];
-		}
-		
-		/// <returns> the end position in the original text
-		/// </returns>
-		public virtual int GetEndOffset()
-		{
-			return endOffset;
-		}
-		
-		/// <returns> the number of tokens in this group
-		/// </returns>
-		public virtual int GetNumTokens()
-		{
-			return numTokens;
-		}
-		
-		/// <returns> the start position in the original text
-		/// </returns>
-		public virtual int GetStartOffset()
-		{
-			return startOffset;
-		}
-		
-		/// <returns> all tokens' scores summed up
-		/// </returns>
-		public virtual float GetTotalScore()
-		{
-			return tot;
-		}
-	}
+    /// <summary> One, or several overlapping tokens, along with the score(s) and the
+    /// scope of the original text
+    /// </summary>
+    public class TokenGroup
+    {
+        private static readonly int MAX_NUM_TOKENS_PER_GROUP = 50;
+
+        private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
+        private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
+        private int startOffset = 0;
+        private int endOffset = 0;
+        private float tot;
+
+        public int MatchStartOffset { get; private set; }
+        public int MatchEndOffset { get; private set; }
+        public int NumTokens { get; private set; }
+
+        private OffsetAttribute offsetAtt;
+        private TermAttribute termAtt;
+
+        public TokenGroup(TokenStream tokenStream)
+        {
+            NumTokens = 0;
+            offsetAtt = tokenStream.AddAttribute<OffsetAttribute>();
+            termAtt = tokenStream.AddAttribute<TermAttribute>();
+        }
+
+        protected internal void AddToken(float score)
+        {
+            if (NumTokens < MAX_NUM_TOKENS_PER_GROUP)
+            {
+                int termStartOffset = offsetAtt.StartOffset;
+                int termEndOffset = offsetAtt.EndOffset;
+                if (NumTokens == 0)
+                {
+                    startOffset = MatchStartOffset = termStartOffset;
+                    endOffset = MatchEndOffset = termEndOffset;
+                    tot += score;
+                }
+                else
+                {
+                    startOffset = Math.Min(startOffset, termStartOffset);
+                    endOffset = Math.Max(endOffset, termEndOffset);
+                    if (score > 0)
+                    {
+                        if (tot == 0)
+                        {
+                            MatchStartOffset = offsetAtt.StartOffset;
+                            MatchEndOffset = offsetAtt.EndOffset;
+                        }
+                        else
+                        {
+                            MatchStartOffset = Math.Min(MatchStartOffset, termStartOffset);
+                            MatchEndOffset = Math.Max(MatchEndOffset, termEndOffset);
+                        }
+                        tot += score;
+                    }
+                }
+                Token token = new Token(termStartOffset, termEndOffset);
+                token.SetTermBuffer(termAtt.Term());
+                tokens[NumTokens] = token;
+                scores[NumTokens] = score;
+                NumTokens++;
+            }
+        }
+
+        protected internal bool IsDistinct()
+        {
+            return offsetAtt.StartOffset >= endOffset;
+        }
+
+        protected internal void Clear()
+        {
+            NumTokens = 0;
+            tot = 0;
+        }
+
+
+        /// <summary>
+        /// the "n"th token
+        /// </summary>
+        /// <param name="index">a value between 0 and numTokens -1</param>
+        public Token GetToken(int index)
+        {
+            return tokens[index];
+        }
+
+        /// <summary>
+        /// the "n"th score
+        /// </summary>
+        /// <param name="index">a value between 0 and numTokens -1</param>
+        public float GetScore(int index)
+        {
+            return scores[index];
+        }
+
+        /// <summary>
+        /// the end position in the original text
+        /// </summary>
+        public int GetEndOffset()
+        {
+            return endOffset;
+        }
+
+        /// <summary>
+        /// The number of tokens in this group
+        /// </summary>
+        public int GetNumTokens()
+        {
+            return NumTokens;
+        }
+
+        /// <summary>
+        /// The start position in the original text
+        /// </summary>
+        public int GetStartOffset()
+        {
+            return startOffset;
+        }
+
+        /// <summary>
+        /// All tokens' scores summed up
+        /// </summary>
+        public float GetTotalScore()
+        {
+            return tot;
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/TokenSources.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/TokenSources.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/TokenSources.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/TokenSources.cs Fri Apr  6 23:37:48 2012
@@ -18,43 +18,41 @@
 /*
 * Created on 28-Oct-2004
 */
+
 using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
 using Lucene.Net.Index;
-using Analyzer = Lucene.Net.Analysis.Analyzer;
-using Token = Lucene.Net.Analysis.Token;
-using TokenStream = Lucene.Net.Analysis.TokenStream;
-using Document = Lucene.Net.Documents.Document;
-using IndexReader = Lucene.Net.Index.IndexReader;
-using TermPositionVector = Lucene.Net.Index.TermPositionVector;
-using TermVectorOffsetInfo = Lucene.Net.Index.TermVectorOffsetInfo;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
-	
-	/// <summary> Hides implementation issues associated with obtaining a TokenStream for use with
-	/// the higlighter - can obtain from TermFreqVectors with offsets and (optionally) positions or
-	/// from Analyzer class reparsing the stored content. 
-	/// </summary>
-	/// <author>  maharwood
-	/// </author>
-	public class TokenSources
-	{
-		public class StoredTokenStream : TokenStream
-		{
-			internal Token[] tokens;
-			internal int currentToken = 0;
-            TermAttribute termAtt;
-            OffsetAttribute offsetAtt;
-
-			internal StoredTokenStream(Token[] tokens)
-			{
-				this.tokens = tokens;
+
+    /// <summary> Hides implementation issues associated with obtaining a TokenStream for use with
+    /// the higlighter - can obtain from TermFreqVectors with offsets and (optionally) positions or
+    /// from Analyzer class reparsing the stored content. 
+    /// </summary>
+    public class TokenSources
+    {
+        public class StoredTokenStream : TokenStream
+        {
+            protected internal Token[] tokens;
+            protected internal int currentToken = 0;
+            protected internal TermAttribute termAtt;
+            protected internal OffsetAttribute offsetAtt;
+
+            protected internal StoredTokenStream(Token[] tokens)
+            {
+                this.tokens = tokens;
                 termAtt = AddAttribute<TermAttribute>();
                 offsetAtt = AddAttribute<OffsetAttribute>();
-			}
-			public override bool  IncrementToken()
-			{
+            }
+
+            public override bool IncrementToken()
+            {
                 if (currentToken >= tokens.Length)
                 {
                     return false;
@@ -64,175 +62,206 @@ namespace Lucene.Net.Highlight
                 termAtt.SetTermBuffer(token.Term());
                 offsetAtt.SetOffset(token.StartOffset, token.EndOffset);
                 return true;
-			}
+            }
+
+            protected override void Dispose(bool disposing)
+            {
+                // do nothing
+            }
+        }
+
+        /// <summary>
+        /// A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to
+        /// using the passed in {@link org.apache.lucene.document.Document} to retrieve the TokenStream.  This is useful when
+        /// you already have the document, but would prefer to use the vector first.
+        /// </summary>
+        /// <param name="reader">The <see cref="IndexReader"/> to use to try and get the vector from</param>
+        /// <param name="docId">The docId to retrieve.</param>
+        /// <param name="field">The field to retrieve on the document</param>
+        /// <param name="doc">The document to fall back on</param>
+        /// <param name="analyzer">The analyzer to use for creating the TokenStream if the vector doesn't exist</param>
+        /// <returns>The <see cref="TokenStream"/> for the <see cref="IFieldable"/> on the <see cref="Document"/></returns>
+        /// <exception cref="IOException">if there was an error loading</exception>
+        public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, String field, Document doc,
+                                                    Analyzer analyzer)
+        {
+            TokenStream ts = null;
+
+            var tfv = reader.GetTermFreqVector(docId, field);
+            if (tfv != null)
+            {
+                var termPositionVector = tfv as TermPositionVector;
+                if (termPositionVector != null)
+                {
+                    ts = GetTokenStream(termPositionVector);
+                }
+            }
+            //No token info stored so fall back to analyzing raw content
+            return ts ?? GetTokenStream(doc, field, analyzer);
+        }
+
+        /// <summary>
+        /// A convenience method that tries a number of approaches to getting a token stream.
+        /// The cost of finding there are no termVectors in the index is minimal (1000 invocations still 
+        /// registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable
+        /// </summary>
+        /// <returns>null if field not stored correctly</returns>
+        public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
+        {
+            TokenStream ts = null;
+
+            var tfv = reader.GetTermFreqVector(docId, field);
+            if (tfv != null)
+            {
+                var termPositionVector = tfv as TermPositionVector;
+                if (termPositionVector != null)
+                {
+                    ts = GetTokenStream(termPositionVector);
+                }
+            }
+            //No token info stored so fall back to analyzing raw content
+            return ts ?? GetTokenStream(reader, docId, field, analyzer);
+        }
+
+        public static TokenStream GetTokenStream(TermPositionVector tpv)
+        {
+            //assumes the worst and makes no assumptions about token position sequences.
+            return GetTokenStream(tpv, false);
+        }
+
+        /// <summary>
+        /// Low level api.
+        /// Returns a token stream or null if no offset info available in index.
+        /// This can be used to feed the highlighter with a pre-parsed token stream 
+        /// 
+        /// In my tests the speeds to recreate 1000 token streams using this method are:
+        /// - with TermVector offset only data stored - 420  milliseconds 
+        /// - with TermVector offset AND position data stored - 271 milliseconds
+        ///  (nb timings for TermVector with position data are based on a tokenizer with contiguous
+        ///  positions - no overlaps or gaps)
+        /// The cost of not using TermPositionVector to store
+        /// pre-parsed content and using an analyzer to re-parse the original content: 
+        /// - reanalyzing the original content - 980 milliseconds
+        /// 
+        /// The re-analyze timings will typically vary depending on -
+        /// 	1) The complexity of the analyzer code (timings above were using a 
+        /// 	   stemmer/lowercaser/stopword combo)
+        ///  2) The  number of other fields (Lucene reads ALL fields off the disk 
+        ///     when accessing just one document field - can cost dear!)
+        ///  3) Use of compression on field storage - could be faster due to compression (less disk IO)
+        ///     or slower (more CPU burn) depending on the content.
+        /// </summary>
+        /// <param name="tpv"/>
+        /// <param name="tokenPositionsGuaranteedContiguous">true if the token position numbers have no overlaps or gaps. If looking
+        /// to eek out the last drops of performance, set to true. If in doubt, set to false.</param>
+        public static TokenStream GetTokenStream(TermPositionVector tpv, bool tokenPositionsGuaranteedContiguous)
+        {
+            //code to reconstruct the original sequence of Tokens
+            String[] terms = tpv.GetTerms();
+            int[] freq = tpv.GetTermFrequencies();
+
+            int totalTokens = freq.Sum();
+
+            var tokensInOriginalOrder = new Token[totalTokens];
+            List<Token> unsortedTokens = null;
+            for (int t = 0; t < freq.Length; t++)
+            {
+                TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t);
+                if (offsets == null)
+                {
+                    return null;
+                }
+
+                int[] pos = null;
+                if (tokenPositionsGuaranteedContiguous)
+                {
+                    //try get the token position info to speed up assembly of tokens into sorted sequence
+                    pos = tpv.GetTermPositions(t);
+                }
+                if (pos == null)
+                {
+                    //tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
+                    if (unsortedTokens == null)
+                    {
+                        unsortedTokens = new List<Token>();
+                    }
+
+                    foreach (TermVectorOffsetInfo t1 in offsets)
+                    {
+                        var token = new Token(t1.StartOffset, t1.EndOffset);
+                        token.SetTermBuffer(terms[t]);
+                        unsortedTokens.Add(token);
+                    }
+                }
+                else
+                {
+                    //We have positions stored and a guarantee that the token position information is contiguous
+
+                    // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
+                    // creates jumps in position numbers - this code would fail under those circumstances
+
+                    //tokens stored with positions - can use this to index straight into sorted array
+                    for (int tp = 0; tp < pos.Length; tp++)
+                    {
+                        var token = new Token(terms[t], offsets[tp].StartOffset, offsets[tp].EndOffset);
+                        tokensInOriginalOrder[pos[tp]] = token;
+                    }
+                }
+            }
+            //If the field has been stored without position data we must perform a sort        
+            if (unsortedTokens != null)
+            {
+                tokensInOriginalOrder = unsortedTokens.ToArray();
+                Array.Sort(tokensInOriginalOrder, (t1, t2) =>
+                                                      {
+                                                          if (t1.StartOffset > t2.EndOffset)
+                                                              return 1;
+                                                          if (t1.StartOffset < t2.StartOffset)
+                                                              return -1;
+                                                          return 0;
+                                                      });
+            }
+            return new StoredTokenStream(tokensInOriginalOrder);
+        }
+
+        public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field)
+        {
+            var tfv = reader.GetTermFreqVector(docId, field);
+            if (tfv == null)
+            {
+                throw new ArgumentException(field + " in doc #" + docId
+                                            + "does not have any term position data stored");
+            }
+            if (tfv is TermPositionVector)
+            {
+                var tpv = (TermPositionVector) reader.GetTermFreqVector(docId, field);
+                return GetTokenStream(tpv);
+            }
+            throw new ArgumentException(field + " in doc #" + docId
+                                        + "does not have any term position data stored");
+        }
+
+        //convenience method
+        public static TokenStream GetTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
+        {
+            Document doc = reader.Document(docId);
+            return GetTokenStream(doc, field, analyzer);
+        }
+
+        public static TokenStream GetTokenStream(Document doc, String field, Analyzer analyzer)
+        {
+            String contents = doc.Get(field);
+            if (contents == null)
+            {
+                throw new ArgumentException("Field " + field + " in document is not stored and cannot be analyzed");
+            }
+            return GetTokenStream(field, contents, analyzer);
+        }
 
-		    protected override void Dispose(bool disposing)
-		    {
-		        // do nothing
-		    }
-		}
-		private class AnonymousClassComparator : System.Collections.IComparer
-		{
-			public virtual int Compare(System.Object o1, System.Object o2)
-			{
-				Token t1 = (Token) o1;
-				Token t2 = (Token) o2;
-				if (t1.StartOffset > t2.StartOffset)
-					return 1;
-				if (t1.StartOffset < t2.StartOffset)
-					return - 1;
-				return 0;
-			}
-		}
-		/// <summary> A convenience method that tries a number of approaches to getting a token stream.
-		/// The cost of finding there are no termVectors in the index is minimal (1000 invocations still 
-		/// registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable
-		/// </summary>
-        /// <param name="reader"> </param>
-        /// <param name="docId"> </param>
-        /// <param name="field"> </param>
-        /// <param name="analyzer"> </param>
-		/// <returns> null if field not stored correctly </returns>
-		/// <throws>  IOException </throws>
-		public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
-		{
-			TokenStream ts = null;
-			
-			ITermFreqVector tfv = (ITermFreqVector) reader.GetTermFreqVector(docId, field);
-			if (tfv != null)
-			{
-				if (tfv is TermPositionVector)
-				{
-					ts = GetTokenStream((TermPositionVector) tfv);
-				}
-			}
-			//No token info stored so fall back to analyzing raw content
-			if (ts == null)
-			{
-				ts = GetTokenStream(reader, docId, field, analyzer);
-			}
-			return ts;
-		}
-		
-		
-		public static TokenStream GetTokenStream(TermPositionVector tpv)
-		{
-			//assumes the worst and makes no assumptions about token position sequences.
-			return GetTokenStream(tpv, false);
-		}
-		/// <summary> Low level api.
-		/// Returns a token stream or null if no offset info available in index.
-		/// This can be used to feed the highlighter with a pre-parsed token stream 
-		/// 
-		/// In my tests the speeds to recreate 1000 token streams using this method are:
-		/// - with TermVector offset only data stored - 420  milliseconds 
-		/// - with TermVector offset AND position data stored - 271 milliseconds
-		/// (nb timings for TermVector with position data are based on a tokenizer with contiguous
-		/// positions - no overlaps or gaps)
-		/// The cost of not using TermPositionVector to store
-		/// pre-parsed content and using an analyzer to re-parse the original content: 
-		/// - reanalyzing the original content - 980 milliseconds
-		/// 
-		/// The re-analyze timings will typically vary depending on -
-		/// 1) The complexity of the analyzer code (timings above were using a 
-		/// stemmer/lowercaser/stopword combo)
-		/// 2) The  number of other fields (Lucene reads ALL fields off the disk 
-		/// when accessing just one document field - can cost dear!)
-		/// 3) Use of compression on field storage - could be faster cos of compression (less disk IO)
-		/// or slower (more CPU burn) depending on the content.
-		/// 
-		/// </summary>
-        /// <param name="tpv"></param>
-		/// <param name="tokenPositionsGuaranteedContiguous">true if the token position numbers have no overlaps or gaps. If looking
-		/// to eek out the last drops of performance, set to true. If in doubt, set to false.
-		/// </param>
-		public static TokenStream GetTokenStream(TermPositionVector tpv, bool tokenPositionsGuaranteedContiguous)
-		{
-			//an object used to iterate across an array of tokens
-			//code to reconstruct the original sequence of Tokens
-			System.String[] terms = tpv.GetTerms();
-			int[] freq = tpv.GetTermFrequencies();
-			int totalTokens = 0;
-			for (int t = 0; t < freq.Length; t++)
-			{
-				totalTokens += freq[t];
-			}
-			Token[] tokensInOriginalOrder = new Token[totalTokens];
-			System.Collections.ArrayList unsortedTokens = null;
-			for (int t = 0; t < freq.Length; t++)
-			{
-				TermVectorOffsetInfo[] offsets = tpv.GetOffsets(t);
-				if (offsets == null)
-				{
-					return null;
-				}
-				
-				int[] pos = null;
-				if (tokenPositionsGuaranteedContiguous)
-				{
-					//try get the token position info to speed up assembly of tokens into sorted sequence
-					pos = tpv.GetTermPositions(t);
-				}
-				if (pos == null)
-				{
-					//tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
-					if (unsortedTokens == null)
-					{
-						unsortedTokens = new System.Collections.ArrayList();
-					}
-					for (int tp = 0; tp < offsets.Length; tp++)
-					{
-						unsortedTokens.Add(new Token(terms[t], offsets[tp].StartOffset, offsets[tp].EndOffset));
-					}
-				}
-				else
-				{
-					//We have positions stored and a guarantee that the token position information is contiguous
-					
-					// This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
-					// creates jumps in position numbers - this code would fail under those circumstances
-					
-					//tokens stored with positions - can use this to index straight into sorted array
-					for (int tp = 0; tp < pos.Length; tp++)
-					{
-						tokensInOriginalOrder[pos[tp]] = new Token(terms[t], offsets[tp].StartOffset, offsets[tp].EndOffset);
-					}
-				}
-			}
-			//If the field has been stored without position data we must perform a sort        
-			if (unsortedTokens != null)
-			{
-				tokensInOriginalOrder = (Token[]) unsortedTokens.ToArray(typeof(Token));
-				Array.Sort(tokensInOriginalOrder, new AnonymousClassComparator());
-			}
-			return new StoredTokenStream(tokensInOriginalOrder);
-		}
-		
-		public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field)
-		{
-			ITermFreqVector tfv = (ITermFreqVector) reader.GetTermFreqVector(docId, field);
-			if (tfv == null)
-			{
-				throw new System.ArgumentException(field + " in doc #" + docId + "does not have any term position data stored");
-			}
-			if (tfv is TermPositionVector)
-			{
-				TermPositionVector tpv = (TermPositionVector) reader.GetTermFreqVector(docId, field);
-				return GetTokenStream(tpv);
-			}
-			throw new System.ArgumentException(field + " in doc #" + docId + "does not have any term position data stored");
-		}
-		
-		//convenience method
-		public static TokenStream GetTokenStream(IndexReader reader, int docId, System.String field, Analyzer analyzer)
-		{
-			Document doc = reader.Document(docId);
-			System.String contents = doc.Get(field);
-			if (contents == null)
-			{
-				throw new System.ArgumentException("Field " + field + " in document #" + docId + " is not stored and cannot be analyzed");
-			}
-			return analyzer.TokenStream(field, new System.IO.StringReader(contents));
-		}
-	}
+        //convenience method
+        public static TokenStream GetTokenStream(String field, String contents, Analyzer analyzer)
+        {
+            return analyzer.TokenStream(field, new StringReader(contents));
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTerm.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTerm.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTerm.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTerm.cs Fri Apr  6 23:37:48 2012
@@ -19,61 +19,44 @@ using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
-using Lucene.Net.Highlight;
 
 namespace Lucene.Net.Search.Highlight
 {
-    /**
-     * Lightweight class to hold term, weight, and positions used for scoring this
-     * term.
-     */
-
+    /// <summary>
+    /// Lightweight class to hold term, Weight, and positions used for scoring this term.
+    /// </summary>
     public class WeightedSpanTerm : WeightedTerm
     {
-        private bool positionSensitive;
-        private List<PositionSpan> positionSpans = new List<PositionSpan>();
-
-        /**
-         * @param weight
-         * @param term
-         */
+        private bool _positionSensitive;
+        private readonly List<PositionSpan> _positionSpans = new List<PositionSpan>();
 
         public WeightedSpanTerm(float weight, String term)
             : base(weight, term)
         {
 
-            this.positionSpans = new List<PositionSpan>();
+            this._positionSpans = new List<PositionSpan>();
         }
 
-        /**
-         * @param weight
-         * @param term
-         * @param positionSensitive
-         */
-
         public WeightedSpanTerm(float weight, String term, bool positionSensitive)
             : base(weight, term)
         {
 
-            this.positionSensitive = positionSensitive;
+            this._positionSensitive = positionSensitive;
         }
 
-        /**
-         * Checks to see if this term is valid at <code>position</code>.
-         *
-         * @param position
-         *            to check against valid term postions
-         * @return true iff this term is a hit at this position
-         */
-
-        public bool checkPosition(int position)
+        /// <summary>
+        /// Checks to see if this term is valid at <c>position</c>.
+        /// </summary>
+        /// <param name="position">to check against valid term postions</param>
+        /// <returns>true iff this term is a hit at this position</returns>
+        public bool CheckPosition(int position)
         {
             // There would probably be a slight speed improvement if PositionSpans
             // where kept in some sort of priority queue - that way this method
             // could
             // bail early without checking each PositionSpan.
 
-            foreach (var positionSpan in positionSpans)
+            foreach (var positionSpan in _positionSpans)
             {
                 if (((position >= positionSpan.Start) && (position <= positionSpan.End)))
                 {
@@ -84,24 +67,24 @@ namespace Lucene.Net.Search.Highlight
             return false;
         }
 
-        public void addPositionSpans(List<PositionSpan> positionSpans)
+        public void AddPositionSpans(List<PositionSpan> positionSpans)
         {
-            this.positionSpans.AddRange(positionSpans);
+            this._positionSpans.AddRange(positionSpans);
         }
 
-        public bool isPositionSensitive()
+        public bool IsPositionSensitive()
         {
-            return positionSensitive;
+            return _positionSensitive;
         }
 
-        public void setPositionSensitive(bool positionSensitive)
+        public void SetPositionSensitive(bool positionSensitive)
         {
-            this.positionSensitive = positionSensitive;
+            this._positionSensitive = positionSensitive;
         }
 
-        public List<PositionSpan> getPositionSpans()
+        public List<PositionSpan> GetPositionSpans()
         {
-            return positionSpans;
+            return _positionSpans;
         }
     }
 

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTermExtractor.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTermExtractor.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTermExtractor.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedSpanTermExtractor.cs Fri Apr  6 23:37:48 2012
@@ -5,6 +5,7 @@ using System.Linq;
 using System.Text;
 using Lucene.Net.Analysis;
 using Lucene.Net.Index;
+using Lucene.Net.Index.Memory;
 using Lucene.Net.Search.Spans;
 using Lucene.Net.Store;
 using Lucene.Net.Support;
@@ -12,14 +13,12 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Search.Highlight
 {
-    /**
-     * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether 
-     * {@link Term}s from the {@link Query} are contained in a supplied {@link TokenStream}.
-     */
-
+    /// <summary>
+    /// Class used to extract <see cref="WeightedSpanTerm"/>s from a <see cref="Query"/> based on whether 
+    /// <see cref="Term"/>s from the <see cref="Query"/> are contained in a supplied <see cref="TokenStream"/>.
+    /// </summary>
     public class WeightedSpanTermExtractor
     {
-
         private String fieldName;
         private TokenStream tokenStream;
         private IDictionary<String, IndexReader> readers = new HashMap<String, IndexReader>(10);
@@ -40,7 +39,7 @@ namespace Lucene.Net.Search.Highlight
             }
         }
 
-        private void closeReaders()
+        private void CloseReaders()
         {
             ICollection<IndexReader> readerSet = readers.Values;
 
@@ -57,17 +56,12 @@ namespace Lucene.Net.Search.Highlight
             }
         }
 
-        /**
-         * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
-         * 
-         * @param query
-         *          Query to extract Terms from
-         * @param terms
-         *          Map to place created WeightedSpanTerms in
-         * @throws IOException
-         */
-
-        private void extract(Query query, IDictionary<String, WeightedSpanTerm> terms)
+        /// <summary>
+        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
+        /// </summary>
+        /// <param name="query">Query to extract Terms from</param>
+        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
+        private void Extract(Query query, IDictionary<String, WeightedSpanTerm> terms)
         {
             if (query is BooleanQuery)
             {
@@ -75,9 +69,9 @@ namespace Lucene.Net.Search.Highlight
 
                 for (int i = 0; i < queryClauses.Length; i++)
                 {
-                    if (!queryClauses[i].Prohibited)
+                    if (!queryClauses[i].IsProhibited)
                     {
-                        extract(queryClauses[i].Query, terms);
+                        Extract(queryClauses[i].Query, terms);
                     }
                 }
             }
@@ -118,25 +112,25 @@ namespace Lucene.Net.Search.Highlight
 
                 SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                 sp.Boost = query.Boost;
-                extractWeightedSpanTerms(terms, sp);
+                ExtractWeightedSpanTerms(terms, sp);
             }
             else if (query is TermQuery)
             {
-                extractWeightedTerms(terms, query);
+                ExtractWeightedTerms(terms, query);
             }
             else if (query is SpanQuery)
             {
-                extractWeightedSpanTerms(terms, (SpanQuery) query);
+                ExtractWeightedSpanTerms(terms, (SpanQuery) query);
             }
             else if (query is FilteredQuery)
             {
-                extract(((FilteredQuery) query).Query, terms);
+                Extract(((FilteredQuery) query).Query, terms);
             }
             else if (query is DisjunctionMaxQuery)
             {
                 foreach (var q in ((DisjunctionMaxQuery) query))
                 {
-                    extract(q, terms);
+                    Extract(q, terms);
                 }
             }
             else if (query is MultiTermQuery && expandMultiTermQuery)
@@ -152,8 +146,8 @@ namespace Lucene.Net.Search.Highlight
                 MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
                 if (fReader.Field != null)
                 {
-                    IndexReader ir = getReaderForField(fReader.Field);
-                    extract(query.Rewrite(ir), terms);
+                    IndexReader ir = GetReaderForField(fReader.Field);
+                    Extract(query.Rewrite(ir), terms);
                 }
             }
             else if (query is MultiPhraseQuery)
@@ -173,13 +167,13 @@ namespace Lucene.Net.Search.Highlight
                         }
                     }
 
-                    var disjunctLists = new IList<SpanQuery>[maxPosition + 1];
+                    var disjunctLists = new List<SpanQuery>[maxPosition + 1];
                     int distinctPositions = 0;
 
                     for (int i = 0; i < termArrays.Count; ++i)
                     {
                         Term[] termArray = termArrays[i];
-                        IList<SpanQuery> disjuncts = disjunctLists[positions[i]];
+                        List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                         if (disjuncts == null)
                         {
                             disjuncts = (disjunctLists[positions[i]] = new List<SpanQuery>(termArray.Length));
@@ -193,10 +187,10 @@ namespace Lucene.Net.Search.Highlight
 
                     int positionGaps = 0;
                     int position = 0;
-                    var clauses = new SpanQuery[distinctPositions];
+                    SpanQuery[] clauses = new SpanQuery[distinctPositions];
                     for (int i = 0; i < disjunctLists.Length; ++i)
                     {
-                        IList<SpanQuery> disjuncts = disjunctLists[i];
+                        List<SpanQuery> disjuncts = disjunctLists[i];
                         if (disjuncts != null)
                         {
                             clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
@@ -212,33 +206,29 @@ namespace Lucene.Net.Search.Highlight
 
                     SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                     sp.Boost = query.Boost;
-                    extractWeightedSpanTerms(terms, sp);
+                    ExtractWeightedSpanTerms(terms, sp);
                 }
             }
         }
 
-        /**
-         * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>SpanQuery</code>.
-         * 
-         * @param terms
-         *          Map to place created WeightedSpanTerms in
-         * @param spanQuery
-         *          SpanQuery to extract Terms from
-         * @throws IOException
-         */
-
-        private void extractWeightedSpanTerms(IDictionary<String, WeightedSpanTerm> terms, SpanQuery spanQuery)
+        /// <summary>
+        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>SpanQuery</c>.
+        /// </summary>
+        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
+        /// <param name="spanQuery">SpanQuery to extract Terms from</param>
+        private void ExtractWeightedSpanTerms(IDictionary<String, WeightedSpanTerm> terms, SpanQuery spanQuery)
         {
-            ISet<String> fieldNames;
+            HashSet<String> fieldNames;
 
             if (fieldName == null)
             {
                 fieldNames = new HashSet<String>();
-                collectSpanQueryFields(spanQuery, fieldNames);
+                CollectSpanQueryFields(spanQuery, fieldNames);
             }
             else
             {
-                fieldNames = new HashSet<String> {fieldName};
+                fieldNames = new HashSet<String>();
+                fieldNames.Add(fieldName);
             }
             // To support the use of the default field name
             if (defaultField != null)
@@ -246,15 +236,15 @@ namespace Lucene.Net.Search.Highlight
                 fieldNames.Add(defaultField);
             }
 
-            HashMap<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
+            IDictionary<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
 
-            ISet<Term> nonWeightedTerms = new HashSet<Term>();
-            bool mrq = mustRewriteQuery(spanQuery);
-            if (mrq)
+            HashSet<Term> nonWeightedTerms = new HashSet<Term>();
+            bool mustRewriteQuery = MustRewriteQuery(spanQuery);
+            if (mustRewriteQuery)
             {
-                foreach (var field in fieldNames)
+                foreach (String field in fieldNames)
                 {
-                    SpanQuery rewrittenQuery = (SpanQuery) spanQuery.Rewrite(getReaderForField(field));
+                    SpanQuery rewrittenQuery = (SpanQuery) spanQuery.Rewrite(GetReaderForField(field));
                     queries[field] = rewrittenQuery;
                     rewrittenQuery.ExtractTerms(nonWeightedTerms);
                 }
@@ -269,8 +259,16 @@ namespace Lucene.Net.Search.Highlight
             foreach (String field in fieldNames)
             {
 
-                IndexReader reader = getReaderForField(field);
-                Spans.Spans spans = mrq ? queries[field].GetSpans(reader) : spanQuery.GetSpans(reader);
+                IndexReader reader = GetReaderForField(field);
+                Spans.Spans spans;
+                if (mustRewriteQuery)
+                {
+                    spans = queries[field].GetSpans(reader);
+                }
+                else
+                {
+                    spans = spanQuery.GetSpans(reader);
+                }
 
 
                 // collect span positions
@@ -290,47 +288,42 @@ namespace Lucene.Net.Search.Highlight
             foreach (Term queryTerm in nonWeightedTerms)
             {
 
-                if (fieldNameComparator(queryTerm.Field))
+                if (FieldNameComparator(queryTerm.Field))
                 {
                     WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];
 
                     if (weightedSpanTerm == null)
                     {
                         weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
-                        weightedSpanTerm.addPositionSpans(spanPositions);
-                        weightedSpanTerm.setPositionSensitive(true);
+                        weightedSpanTerm.AddPositionSpans(spanPositions);
+                        weightedSpanTerm.SetPositionSensitive(true);
                         terms[queryTerm.Text] = weightedSpanTerm;
                     }
                     else
                     {
                         if (spanPositions.Count > 0)
                         {
-                            weightedSpanTerm.addPositionSpans(spanPositions);
+                            weightedSpanTerm.AddPositionSpans(spanPositions);
                         }
                     }
                 }
             }
         }
 
-        /**
-         * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
-         * 
-         * @param terms
-         *          Map to place created WeightedSpanTerms in
-         * @param query
-         *          Query to extract Terms from
-         * @throws IOException
-         */
-
-        private void extractWeightedTerms(IDictionary<String, WeightedSpanTerm> terms, Query query)
+        /// <summary>
+        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
+        /// </summary>
+        /// <param name="terms"></param>
+        /// <param name="query"></param>
+        private void ExtractWeightedTerms(IDictionary<String, WeightedSpanTerm> terms, Query query)
         {
-            ISet<Term> nonWeightedTerms = new HashSet<Term>();
+            HashSet<Term> nonWeightedTerms = new HashSet<Term>();
             query.ExtractTerms(nonWeightedTerms);
 
             foreach (Term queryTerm in nonWeightedTerms)
             {
 
-                if (fieldNameComparator(queryTerm.Field))
+                if (FieldNameComparator(queryTerm.Field))
                 {
                     WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text);
                     terms[queryTerm.Text] = weightedSpanTerm;
@@ -338,18 +331,17 @@ namespace Lucene.Net.Search.Highlight
             }
         }
 
-        /**
-         * Necessary to implement matches for queries against <code>defaultField</code>
-         */
-
-        private bool fieldNameComparator(String fieldNameToCheck)
+        /// <summary>
+        /// Necessary to implement matches for queries against <code>defaultField</code>
+        /// </summary>
+        private bool FieldNameComparator(String fieldNameToCheck)
         {
             bool rv = fieldName == null || fieldNameToCheck == fieldName
                       || fieldNameToCheck == defaultField;
             return rv;
         }
 
-        private IndexReader getReaderForField(String field)
+        private IndexReader GetReaderForField(String field)
         {
             if (wrapToCaching && !cachedTokenStream && !(tokenStream is CachingTokenFilter))
             {
@@ -359,52 +351,38 @@ namespace Lucene.Net.Search.Highlight
             IndexReader reader = readers[field];
             if (reader == null)
             {
-                //MemoryIndex indexer = new MemoryIndex();
-                //indexer.AddField(field, tokenStream);
-                //tokenStream.Reset();
-                //IndexSearcher searcher = indexer.CreateSearcher();
-                //reader = searcher.IndexReader;
-                //readers[field] = reader;
+                MemoryIndex indexer = new MemoryIndex();
+                indexer.AddField(field, tokenStream);
+                tokenStream.Reset();
+                IndexSearcher searcher = indexer.CreateSearcher();
+                reader = searcher.IndexReader;
+                readers[field] = reader;
             }
 
             return reader;
         }
 
-        /**
-         * Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>.
-         * 
-         * <p>
-         * 
-         * @param query
-         *          that caused hit
-         * @param tokenStream
-         *          of text to be highlighted
-         * @return Map containing WeightedSpanTerms
-         * @throws IOException
-         */
-
-        public HashMap<String, WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream)
-        {
-            return getWeightedSpanTerms(query, tokenStream, null);
-        }
-
-        /**
-         * Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>.
-         * 
-         * <p>
-         * 
-         * @param query
-         *          that caused hit
-         * @param tokenStream
-         *          of text to be highlighted
-         * @param fieldName
-         *          restricts Term's used based on field name
-         * @return Map containing WeightedSpanTerms
-         * @throws IOException
-         */
+        /// <summary>
+        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>.
+        /// </summary>
+        /// <param name="query">query that caused hit</param>
+        /// <param name="tokenStream">TokenStream of text to be highlighted</param>
+        /// <returns>Map containing WeightedSpanTerms</returns>
+        public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream)
+        {
+            return GetWeightedSpanTerms(query, tokenStream, null);
+        }
 
-        public HashMap<String, WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream,
-                                                                      String fieldName)
+
+        /// <summary>
+        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>.
+        /// </summary>
+        /// <param name="query">query that caused hit</param>
+        /// <param name="tokenStream">tokenStream of text to be highlighted</param>
+        /// <param name="fieldName">restricts Term's used based on field name</param>
+        /// <returns>Map containing WeightedSpanTerms</returns>
+        public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream,
+                                                                          String fieldName)
         {
             if (fieldName != null)
             {
@@ -415,41 +393,31 @@ namespace Lucene.Net.Search.Highlight
                 this.fieldName = null;
             }
 
-            HashMap<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
+            IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
             this.tokenStream = tokenStream;
             try
             {
-                extract(query, terms);
+                Extract(query, terms);
             }
             finally
             {
-                closeReaders();
+                CloseReaders();
             }
 
             return terms;
         }
 
-        /**
-         * Creates a Map of <code>WeightedSpanTerms</code> from the given <code>Query</code> and <code>TokenStream</code>. Uses a supplied
-         * <code>IndexReader</code> to properly weight terms (for gradient highlighting).
-         * 
-         * <p>
-         * 
-         * @param query
-         *          that caused hit
-         * @param tokenStream
-         *          of text to be highlighted
-         * @param fieldName
-         *          restricts Term's used based on field name
-         * @param reader
-         *          to use for scoring
-         * @return Map of WeightedSpanTerms with quasi tf/idf scores
-         * @throws IOException
-         */
-
-        public HashMap<String, WeightedSpanTerm> getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream,
-                                                                                String fieldName,
-                                                                                IndexReader reader)
+        /// <summary>
+        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. Uses a supplied
+        /// <c>IndexReader</c> to properly Weight terms (for gradient highlighting).
+        /// </summary>
+        /// <param name="query">Query that caused hit</param>
+        /// <param name="tokenStream">Tokenstream of text to be highlighted</param>
+        /// <param name="fieldName">restricts Term's used based on field name</param>
+        /// <param name="reader">to use for scoring</param>
+        /// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns>
+        public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream,
+                                                                                    String fieldName, IndexReader reader)
         {
             if (fieldName != null)
             {
@@ -461,18 +429,18 @@ namespace Lucene.Net.Search.Highlight
             }
             this.tokenStream = tokenStream;
 
-            HashMap<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
-            extract(query, terms);
+            IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
+            Extract(query, terms);
 
             int totalNumDocs = reader.NumDocs();
             var weightedTerms = terms.Keys;
 
             try
             {
-                foreach (var term in weightedTerms)
+                foreach (var wt in weightedTerms)
                 {
-                    WeightedSpanTerm weightedSpanTerm = terms[term];
-                    int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.term));
+                    WeightedSpanTerm weightedSpanTerm = terms[wt];
+                    int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term));
                     // docFreq counts deletes
                     if (totalNumDocs < docFreq)
                     {
@@ -480,45 +448,44 @@ namespace Lucene.Net.Search.Highlight
                     }
                     // IDF algorithm taken from DefaultSimilarity class
                     float idf = (float) (Math.Log((float) totalNumDocs/(double) (docFreq + 1)) + 1.0);
-                    weightedSpanTerm.weight *= idf;
+                    weightedSpanTerm.Weight *= idf;
                 }
-
             }
             finally
             {
 
-                closeReaders();
+                CloseReaders();
             }
 
             return terms;
         }
 
-        private void collectSpanQueryFields(SpanQuery spanQuery, ISet<String> fieldNames)
+        private void CollectSpanQueryFields(SpanQuery spanQuery, HashSet<String> fieldNames)
         {
             if (spanQuery is FieldMaskingSpanQuery)
             {
-                collectSpanQueryFields(((FieldMaskingSpanQuery) spanQuery).MaskedQuery, fieldNames);
+                CollectSpanQueryFields(((FieldMaskingSpanQuery) spanQuery).MaskedQuery, fieldNames);
             }
             else if (spanQuery is SpanFirstQuery)
             {
-                collectSpanQueryFields(((SpanFirstQuery) spanQuery).Match, fieldNames);
+                CollectSpanQueryFields(((SpanFirstQuery) spanQuery).Match, fieldNames);
             }
             else if (spanQuery is SpanNearQuery)
             {
                 foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
                 {
-                    collectSpanQueryFields(clause, fieldNames);
+                    CollectSpanQueryFields(clause, fieldNames);
                 }
             }
             else if (spanQuery is SpanNotQuery)
             {
-                collectSpanQueryFields(((SpanNotQuery) spanQuery).Include, fieldNames);
+                CollectSpanQueryFields(((SpanNotQuery) spanQuery).Include, fieldNames);
             }
             else if (spanQuery is SpanOrQuery)
             {
                 foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
                 {
-                    collectSpanQueryFields(clause, fieldNames);
+                    CollectSpanQueryFields(clause, fieldNames);
                 }
             }
             else
@@ -527,7 +494,7 @@ namespace Lucene.Net.Search.Highlight
             }
         }
 
-        private bool mustRewriteQuery(SpanQuery spanQuery)
+        private bool MustRewriteQuery(SpanQuery spanQuery)
         {
             if (!expandMultiTermQuery)
             {
@@ -535,17 +502,17 @@ namespace Lucene.Net.Search.Highlight
             }
             else if (spanQuery is FieldMaskingSpanQuery)
             {
-                return mustRewriteQuery(((FieldMaskingSpanQuery) spanQuery).MaskedQuery);
+                return MustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).MaskedQuery);
             }
             else if (spanQuery is SpanFirstQuery)
             {
-                return mustRewriteQuery(((SpanFirstQuery) spanQuery).Match);
+                return MustRewriteQuery(((SpanFirstQuery)spanQuery).Match);
             }
             else if (spanQuery is SpanNearQuery)
             {
                 foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
                 {
-                    if (mustRewriteQuery(clause))
+                    if (MustRewriteQuery(clause))
                     {
                         return true;
                     }
@@ -555,13 +522,13 @@ namespace Lucene.Net.Search.Highlight
             else if (spanQuery is SpanNotQuery)
             {
                 SpanNotQuery spanNotQuery = (SpanNotQuery) spanQuery;
-                return mustRewriteQuery(spanNotQuery.Include) || mustRewriteQuery(spanNotQuery.Exclude);
+                return MustRewriteQuery(spanNotQuery.Include) || MustRewriteQuery(spanNotQuery.Exclude);
             }
             else if (spanQuery is SpanOrQuery)
             {
                 foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
                 {
-                    if (mustRewriteQuery(clause))
+                    if (MustRewriteQuery(clause))
                     {
                         return true;
                     }
@@ -578,88 +545,96 @@ namespace Lucene.Net.Search.Highlight
             }
         }
 
-        /**
-         * This class makes sure that if both position sensitive and insensitive
-         * versions of the same term are added, the position insensitive one wins.
-         */
-
+        
+        /// <summary>
+        /// This class makes sure that if both position sensitive and insensitive
+        /// versions of the same term are added, the position insensitive one wins.
+        /// </summary>
+        /// <typeparam name="K"></typeparam>
         private class PositionCheckingMap<K> : HashMap<K, WeightedSpanTerm>
         {
+            public PositionCheckingMap()
+            {
+
+            }
 
-            public void PutAll(IDictionary<K, WeightedSpanTerm> m)
+            public PositionCheckingMap(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
+            {
+                PutAll(m);
+            }
+
+            public void PutAll(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
             {
                 foreach (var entry in m)
                 {
-                    this[entry.Key] = entry.Value;
+                    Add(entry.Key, entry.Value);
                 }
             }
 
             public override void Add(K key, WeightedSpanTerm value)
             {
-                WeightedSpanTerm prev = base[key] = value;
+                base.Add(key, value);
+                WeightedSpanTerm prev = this[key];
+
+                if (prev == null) return;
 
                 WeightedSpanTerm prevTerm = prev;
                 WeightedSpanTerm newTerm = value;
-                if (!prevTerm.isPositionSensitive())
+                if (!prevTerm.IsPositionSensitive())
                 {
-                    newTerm.setPositionSensitive(false);
+                    newTerm.SetPositionSensitive(false);
                 }
             }
 
         }
 
-        public bool getExpandMultiTermQuery()
+        public bool GetExpandMultiTermQuery()
         {
             return expandMultiTermQuery;
         }
 
-        public void setExpandMultiTermQuery(bool expandMultiTermQuery)
+        public void SetExpandMultiTermQuery(bool expandMultiTermQuery)
         {
             this.expandMultiTermQuery = expandMultiTermQuery;
         }
 
-        public bool isCachedTokenStream()
+        public bool IsCachedTokenStream()
         {
             return cachedTokenStream;
         }
 
-        public TokenStream getTokenStream()
+        public TokenStream GetTokenStream()
         {
             return tokenStream;
         }
 
-        /**
-         * By default, {@link TokenStream}s that are not of the type
-         * {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
-         * ensure an efficient reset - if you are already using a different caching
-         * {@link TokenStream} impl and you don't want it to be wrapped, set this to
-         * false.
-         * 
-         * @param wrap
-         */
 
-        public void setWrapIfNotCachingTokenFilter(bool wrap)
+        /// <summary>
+        /// By default, <see cref="TokenStream"/>s that are not of the type
+        /// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/> to
+        /// <see cref="TokenStream"/> impl and you don't want it to be wrapped, set this to
+        /// false.
+        /// </summary>
+        public void SetWrapIfNotCachingTokenFilter(bool wrap)
         {
             this.wrapToCaching = wrap;
         }
 
-        /**
-         * 
-         * A fake IndexReader class to extract the field from a MultiTermQuery
-         * 
-         */
-        private class FakeReader : FilterIndexReader
-        {
-            //See if this will work.
-            private static IndexReader EMPTY_MEMORY_INDEX_READER = IndexReader.Open(new RAMDirectory());
-            //private static IndexReader EMPTY_MEMORY_INDEX_READER = new MemoryIndex().createSearcher().getIndexReader();
+        /// <summary>
+        /// A fake IndexReader class to extract the field from a MultiTermQuery
+        /// </summary>
+        protected internal sealed class FakeReader : FilterIndexReader
+        {
+
+            private static IndexReader EMPTY_MEMORY_INDEX_READER = new MemoryIndex().CreateSearcher().IndexReader;
 
-            public FakeReader()
+            public String Field { get; private set; }
+
+            protected internal FakeReader()
                 : base(EMPTY_MEMORY_INDEX_READER)
             {
-            }
 
-            public string Field { get; set; }
+            }
 
             public override TermEnum Terms(Term t)
             {
@@ -668,6 +643,8 @@ namespace Lucene.Net.Search.Highlight
                     Field = t.Field;
                 return base.Terms(t);
             }
+
+
         }
     }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedTerm.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedTerm.cs?rev=1310635&r1=1310634&r2=1310635&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedTerm.cs (original)
+++ incubator/lucene.net/trunk/src/contrib/Highlighter/WeightedTerm.cs Fri Apr  6 23:37:48 2012
@@ -17,49 +17,28 @@
 
 using System;
 
-namespace Lucene.Net.Highlight
+namespace Lucene.Net.Search.Highlight
 {
-	/// <summary>Lightweight class to hold term and a weight value used for scoring this term </summary>
-	/// <author>  Mark Harwood
-	/// </author>
-	public class WeightedTerm
-	{
-		internal float weight; // multiplier
-		internal System.String term; //stemmed form
+    /// <summary>
+    /// Lightweight class to hold term and a Weight value used for scoring this term
+    /// </summary>
+    public class WeightedTerm
+    {
+        public WeightedTerm(float weight, String term)
+        {
+            this.Weight = weight;
+            this.Term = term;
+        }
 
-		public WeightedTerm(float weight, System.String term)
-		{
-			this.weight = weight;
-			this.term = term;
-		}
-		
-		
-		/// <returns> the term value (stemmed)
-		/// </returns>
-		public virtual System.String GetTerm()
-		{
-			return term;
-		}
-		
-		/// <returns> the weight associated with this term
-		/// </returns>
-		public virtual float GetWeight()
-		{
-			return weight;
-		}
-		
-		/// <param name="term">the term value (stemmed)
-		/// </param>
-		public virtual void  SetTerm(System.String term)
-		{
-			this.term = term;
-		}
-		
-		/// <param name="weight">the weight associated with this term
-		/// </param>
-		public virtual void  SetWeight(float weight)
-		{
-			this.weight = weight;
-		}
-	}
+        /// <summary>
+        /// the term value (stemmed)
+        /// </summary>
+        public string Term { get; set; }
+
+        /// <summary>
+        /// the Weight associated with this term
+        /// </summary>
+        /// <value> </value>
+        public float Weight { get; set; }
+    }
 }
\ No newline at end of file

Added: incubator/lucene.net/trunk/src/contrib/Memory/CollectionsHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Memory/CollectionsHelper.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Memory/CollectionsHelper.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Memory/CollectionsHelper.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,85 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Index.Memory
+{
+    internal static class CollectionsHelper<T>
+    {
+        private static readonly T[] EmptyArray = new T[0];
+
+        /// <summary>
+        /// Returns an empty list of type T
+        /// </summary>
+        public static IList<T> EmptyList()
+        {
+            return EmptyArray;
+        }
+    }
+
+    public static class CollectionsExtensions
+    {
+        public static ICollection<T> AsReadOnly<T>(this ICollection<T> collection)
+        {
+            return new ReadOnlyCollection<T>(collection);
+        }
+
+        private sealed class ReadOnlyCollection<T> : ICollection<T>
+        {
+            private readonly ICollection<T> _other;
+
+            public ReadOnlyCollection(ICollection<T> other)
+            {
+                _other = other;
+            }
+
+            public IEnumerator<T> GetEnumerator()
+            {
+                return _other.GetEnumerator();
+            }
+
+            IEnumerator IEnumerable.GetEnumerator()
+            {
+                return GetEnumerator();
+            }
+
+            public void Add(T item)
+            {
+                throw new NotSupportedException("Collection is read only!");
+            }
+
+            public void Clear()
+            {
+                throw new NotSupportedException("Collection is read only!");
+            }
+
+            public bool Contains(T item)
+            {
+                return _other.Contains(item);
+            }
+
+            public void CopyTo(T[] array, int arrayIndex)
+            {
+                _other.CopyTo(array, arrayIndex);
+            }
+
+            public bool Remove(T item)
+            {
+                throw new NotSupportedException("Collection is read only!");
+            }
+
+            public int Count
+            {
+                get { return _other.Count; }
+            }
+
+            public bool IsReadOnly
+            {
+                get { return true; }
+            }
+        }
+    }
+}

Added: incubator/lucene.net/trunk/src/contrib/Memory/Contrib.Memory.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Memory/Contrib.Memory.csproj?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Memory/Contrib.Memory.csproj (added)
+++ incubator/lucene.net/trunk/src/contrib/Memory/Contrib.Memory.csproj Fri Apr  6 23:37:48 2012
@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{112B9A7C-29CC-4539-8F5A-45669C07CD4D}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Index.Memory</RootNamespace>
+    <AssemblyName>Lucene.Net.Index.MemoryIndex</AssemblyName>
+    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <TargetFrameworkProfile />
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup>
+    <SignAssembly>true</SignAssembly>
+  </PropertyGroup>
+  <PropertyGroup>
+    <AssemblyOriginatorKeyFile>Lucene.Net.snk</AssemblyOriginatorKeyFile>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="CollectionsHelper.cs" />
+    <Compile Include="EmptyCollector.cs" />
+    <Compile Include="KeywordTokenStream.cs" />
+    <Compile Include="MemoryIndex.cs" />
+    <Compile Include="MemoryTermPositionVector.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="TermComparer.cs" />
+    <Compile Include="MemoryTermEnum.cs" />
+    <Compile Include="MemoryTermPositions.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\core\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="Lucene.Net.snk" />
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

Added: incubator/lucene.net/trunk/src/contrib/Memory/EmptyCollector.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Memory/EmptyCollector.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Memory/EmptyCollector.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Memory/EmptyCollector.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,45 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Index.Memory
+{
+    public partial class MemoryIndex
+    {
+        /// <summary>
+        /// Fills the given float array with the values
+        /// as the collector scores the search
+        /// </summary>
+        private sealed class FillingCollector : Collector
+        {
+            private readonly float[] _scores;
+            private Scorer _scorer;
+
+            public FillingCollector(float[] scores)
+            {
+                _scores = scores;
+            }
+
+            public override void SetScorer(Scorer scorer)
+            {
+                _scorer = scorer;
+            }
+
+            public override void Collect(int doc)
+            {
+                _scores[0] = _scorer.Score();
+            }
+
+            public override void SetNextReader(IndexReader reader, int docBase)
+            { }
+
+            public override bool AcceptsDocsOutOfOrder
+            {
+                get { return true; }
+            }
+        }
+    }
+}

Added: incubator/lucene.net/trunk/src/contrib/Memory/KeywordTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Memory/KeywordTokenStream.cs?rev=1310635&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Memory/KeywordTokenStream.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Memory/KeywordTokenStream.cs Fri Apr  6 23:37:48 2012
@@ -0,0 +1,48 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Index.Memory
+{
+    public partial class MemoryIndex
+    {
+        private sealed class KeywordTokenStream<T> : TokenStream
+        {
+            private IEnumerator<T> iter;
+            private int start = 0;
+            private TermAttribute termAtt;
+            private OffsetAttribute offsetAtt;
+
+            public KeywordTokenStream(IEnumerable<T> keywords)
+            {
+                iter = keywords.GetEnumerator();
+                termAtt = AddAttribute<TermAttribute>();
+                offsetAtt = AddAttribute<OffsetAttribute>();
+            }
+
+            public override bool IncrementToken()
+            {
+                if (!iter.MoveNext()) return false;
+
+                T obj = iter.Current;
+                if (obj == null)
+                    throw new ArgumentException("keyword must not be null");
+
+                String term = obj.ToString();
+                ClearAttributes();
+                termAtt.SetTermBuffer(term);
+                offsetAtt.SetOffset(start, start + termAtt.TermLength());
+                start += term.Length + 1; // separate words by 1 (blank) character
+                return true;
+            }
+
+            protected override void Dispose(bool disposing)
+            {
+            }
+        }
+    }
+}

Added: incubator/lucene.net/trunk/src/contrib/Memory/Lucene.Net.snk
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Memory/Lucene.Net.snk?rev=1310635&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/lucene.net/trunk/src/contrib/Memory/Lucene.Net.snk
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



Mime
View raw message