lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mhern...@apache.org
Subject [12/16] git commit: Started porting QueryParsers.Flexible.Standard
Date Sun, 06 Oct 2013 23:48:00 GMT
Started porting QueryParsers.Flexible.Standard


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f66837d2
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f66837d2
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f66837d2

Branch: refs/heads/branch_4x
Commit: f66837d249e43a23258cc630442c85507801e84c
Parents: 539a6de
Author: Paul Irwin <paulirwin@gmail.com>
Authored: Fri Oct 4 14:05:50 2013 -0400
Committer: Paul Irwin <paulirwin@gmail.com>
Committed: Sat Oct 5 16:37:29 2013 -0400

----------------------------------------------------------------------
 .../QueryParsers/Contrib.QueryParsers.csproj    |   4 +
 .../Standard/Builders/AnyQueryNodeBuilder.cs    |  58 +++-
 .../Builders/BooleanQueryNodeBuilder.cs         |  81 +++++
 .../Standard/Builders/IStandardQueryBuilder.cs  |   2 +-
 .../Standard/Parser/EscapeQuerySyntaxImpl.cs    | 297 +++++++++++++++++++
 .../Flexible/Standard/Parser/ICharStream.cs     |  37 +++
 .../QueryParsers/Support/StringExtensions.cs    |  16 +
 7 files changed, 492 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Contrib.QueryParsers.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Contrib.QueryParsers.csproj b/src/contrib/QueryParsers/Contrib.QueryParsers.csproj
index 59815b4..b69ba58 100644
--- a/src/contrib/QueryParsers/Contrib.QueryParsers.csproj
+++ b/src/contrib/QueryParsers/Contrib.QueryParsers.csproj
@@ -110,9 +110,13 @@
     <Compile Include="Flexible\Messages\Message.cs" />
     <Compile Include="Flexible\Messages\NLS.cs" />
     <Compile Include="Flexible\Standard\Builders\AnyQueryNodeBuilder.cs" />
+    <Compile Include="Flexible\Standard\Builders\BooleanQueryNodeBuilder.cs" />
     <Compile Include="Flexible\Standard\Builders\IStandardQueryBuilder.cs" />
     <Compile Include="Flexible\Standard\ICommonQueryParserConfiguration.cs" />
+    <Compile Include="Flexible\Standard\Parser\EscapeQuerySyntaxImpl.cs" />
+    <Compile Include="Flexible\Standard\Parser\ICharStream.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Support\StringExtensions.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\..\core\Lucene.Net.csproj">

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs b/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs
index f3f0789..30e60a4 100644
--- a/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs
+++ b/src/contrib/QueryParsers/Flexible/Standard/Builders/AnyQueryNodeBuilder.cs
@@ -1,4 +1,10 @@
-using System;
+using Lucene.Net.QueryParsers.Flexible.Core;
+using Lucene.Net.QueryParsers.Flexible.Core.Builders;
+using Lucene.Net.QueryParsers.Flexible.Core.Messages;
+using Lucene.Net.QueryParsers.Flexible.Core.Nodes;
+using Lucene.Net.QueryParsers.Flexible.Messages;
+using Lucene.Net.Search;
+using System;
 using System.Collections.Generic;
 using System.Linq;
 using System.Text;
@@ -6,7 +12,55 @@ using System.Threading.Tasks;
 
 namespace Lucene.Net.QueryParsers.Flexible.Standard.Builders
 {
-    class AnyQueryNodeBuilder
+    public class AnyQueryNodeBuilder : IStandardQueryBuilder
     {
+        public AnyQueryNodeBuilder()
+        {
+            // empty constructor
+        }
+
+        public Query Build(IQueryNode queryNode)
+        {
+            AnyQueryNode andNode = (AnyQueryNode)queryNode;
+
+            BooleanQuery bQuery = new BooleanQuery();
+            IList<IQueryNode> children = andNode.Children;
+
+            if (children != null)
+            {
+                foreach (IQueryNode child in children)
+                {
+                    Object obj = child.GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);
+
+                    if (obj != null)
+                    {
+                        Query query = (Query)obj;
+
+                        try
+                        {
+                            bQuery.Add(query, Occur.SHOULD);
+                        }
+                        catch (BooleanQuery.TooManyClauses ex)
+                        {
+                            throw new QueryNodeException(new Message(
+                                /*
+                                 * IQQQ.Q0028E_TOO_MANY_BOOLEAN_CLAUSES,
+                                 * BooleanQuery.getMaxClauseCount()
+                                 */
+                            QueryParserMessages.EMPTY_MESSAGE), ex);
+                        }
+                    }
+                }
+            }
+
+            bQuery.MinimumNumberShouldMatch = andNode.MinimumMatchingElements;
+
+            return bQuery;
+        }
+
+        object IQueryBuilder.Build(IQueryNode queryNode)
+        {
+            return Build(queryNode);
+        }
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs
b/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs
new file mode 100644
index 0000000..97b580f
--- /dev/null
+++ b/src/contrib/QueryParsers/Flexible/Standard/Builders/BooleanQueryNodeBuilder.cs
@@ -0,0 +1,81 @@
+using Lucene.Net.QueryParsers.Flexible.Core;
+using Lucene.Net.QueryParsers.Flexible.Core.Builders;
+using Lucene.Net.QueryParsers.Flexible.Core.Messages;
+using Lucene.Net.QueryParsers.Flexible.Core.Nodes;
+using Lucene.Net.QueryParsers.Flexible.Messages;
+using Lucene.Net.QueryParsers.Flexible.Standard.Parser;
+using Lucene.Net.Search;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.QueryParsers.Flexible.Standard.Builders
+{
+    public class BooleanQueryNodeBuilder : IStandardQueryBuilder
+    {
+        public BooleanQueryNodeBuilder()
+        {
+            // empty constructor
+        }
+
+        public Query Build(IQueryNode queryNode)
+        {
+            BooleanQueryNode booleanNode = (BooleanQueryNode)queryNode;
+
+            BooleanQuery bQuery = new BooleanQuery();
+            IList<IQueryNode> children = booleanNode.Children;
+
+            if (children != null)
+            {
+                foreach (IQueryNode child in children)
+                {
+                    Object obj = child.GetTag(QueryTreeBuilder.QUERY_TREE_BUILDER_TAGID);
+
+                    if (obj != null)
+                    {
+                        Query query = (Query)obj;
+
+                        try
+                        {
+                            bQuery.Add(query, GetModifierValue(child));
+                        }
+                        catch (BooleanQuery.TooManyClauses ex)
+                        {
+                            throw new QueryNodeException(new Message(
+                                QueryParserMessages.TOO_MANY_BOOLEAN_CLAUSES, BooleanQuery.MaxClauseCount,
queryNode.ToQueryString(new EscapeQuerySyntaxImpl())), ex);
+
+                        }
+                    }
+                }
+            }
+
+            return bQuery;
+        }
+
+        object IQueryBuilder.Build(IQueryNode queryNode)
+        {
+            return Build(queryNode);
+        }
+
+        private static Occur GetModifierValue(IQueryNode node)
+        {
+            if (node is ModifierQueryNode)
+            {
+                ModifierQueryNode mNode = ((ModifierQueryNode)node);
+                switch (mNode.ModifierValue)
+                {
+                    case ModifierQueryNode.Modifier.MOD_REQ:
+                        return Occur.MUST;
+                    case ModifierQueryNode.Modifier.MOD_NOT:
+                        return Occur.MUST_NOT;
+                    case ModifierQueryNode.Modifier.MOD_NONE:
+                        return Occur.SHOULD;
+                }
+            }
+
+            return Occur.SHOULD;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs
b/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs
index 366568f..cca4a1a 100644
--- a/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs
+++ b/src/contrib/QueryParsers/Flexible/Standard/Builders/IStandardQueryBuilder.cs
@@ -11,6 +11,6 @@ namespace Lucene.Net.QueryParsers.Flexible.Standard.Builders
 {
     public interface IStandardQueryBuilder : IQueryBuilder
     {
-        Query Build(IQueryNode queryNode);
+        new Query Build(IQueryNode queryNode);
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs b/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs
new file mode 100644
index 0000000..6c524a0
--- /dev/null
+++ b/src/contrib/QueryParsers/Flexible/Standard/Parser/EscapeQuerySyntaxImpl.cs
@@ -0,0 +1,297 @@
+using Lucene.Net.QueryParsers.Classic;
+using Lucene.Net.QueryParsers.Flexible.Core.Messages;
+using Lucene.Net.QueryParsers.Flexible.Core.Parser;
+using Lucene.Net.QueryParsers.Flexible.Core.Util;
+using Lucene.Net.QueryParsers.Flexible.Messages;
+using Lucene.Net.QueryParsers.Support;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.QueryParsers.Flexible.Standard.Parser
+{
+    public class EscapeQuerySyntaxImpl : IEscapeQuerySyntax
+    {
+        private static readonly char[] wildcardChars = { '*', '?' };
+
+        private static readonly string[] escapableTermExtraFirstChars = { "+", "-", "@" };
+
+        private static readonly string[] escapableTermChars = { "\"", "<", ">", "=",
+            "!", "(", ")", "^", "[", "{", ":", "]", "}", "~", "/" };
+
+        // TODO: check what to do with these "*", "?", "\\"
+        private static readonly string[] escapableQuotedChars = { "\"" };
+        private static readonly string[] escapableWhiteChars = { " ", "\t", "\n", "\r",
+            "\f", "\b", "\u3000" };
+        private static readonly string[] escapableWordTokens = { "AND", "OR", "NOT",
+            "TO", "WITHIN", "SENTENCE", "PARAGRAPH", "INORDER" };
+
+        private static ICharSequence EscapeChar(ICharSequence str, CultureInfo locale)
+        {
+            if (str == null || str.Length == 0)
+                return str;
+
+            ICharSequence buffer = str;
+
+            // regular escapable Char for terms
+            for (int i = 0; i < escapableTermChars.Length; i++)
+            {
+                buffer = ReplaceIgnoreCase(buffer, escapableTermChars[i].ToLower(locale),
+                    "\\", locale);
+            }
+
+            // First Character of a term as more escaping chars
+            for (int i = 0; i < escapableTermExtraFirstChars.Length; i++)
+            {
+                if (buffer.CharAt(0) == escapableTermExtraFirstChars[i][0])
+                {
+                    buffer = new StringCharSequenceWrapper("\\" + buffer.CharAt(0)
+                        + buffer.SubSequence(1, buffer.Length));
+                    break;
+                }
+            }
+
+            return buffer;
+        }
+
+        private ICharSequence EscapeQuoted(ICharSequence str, CultureInfo locale)
+        {
+            if (str == null || str.Length == 0)
+                return str;
+
+            ICharSequence buffer = str;
+
+            for (int i = 0; i < escapableQuotedChars.Length; i++)
+            {
+                buffer = ReplaceIgnoreCase(buffer, escapableTermChars[i].ToLower(locale),
"\\", locale);
+            }
+
+            return buffer;
+        }
+
+        private static ICharSequence EscapeTerm(ICharSequence term, CultureInfo locale)
+        {
+            if (term == null)
+                return term;
+
+            // Escape single Chars
+            term = EscapeChar(term, locale);
+            term = EscapeWhiteChar(term, locale);
+
+            // Escape Parser Words
+            for (int i = 0; i < escapableWordTokens.Length; i++)
+            {
+                if (escapableWordTokens[i].EqualsIgnoreCase(term.ToString()))
+                    return new StringCharSequenceWrapper("\\" + term);
+            }
+            return term;
+        }
+
+        private static ICharSequence ReplaceIgnoreCase(ICharSequence str, string sequence1,
string escapeChar, CultureInfo locale)
+        {
+            if (escapeChar == null || sequence1 == null || str == null)
+                throw new NullReferenceException();
+
+            // empty string case
+            int count = str.Length;
+            int sequence1Length = sequence1.Length;
+            if (sequence1Length == 0)
+            {
+                StringBuilder result = new StringBuilder((count + 1)
+                    * escapeChar.Length);
+                result.Append(escapeChar);
+                for (int i = 0; i < count; i++)
+                {
+                    result.Append(str.CharAt(i));
+                    result.Append(escapeChar);
+                }
+                return new StringCharSequenceWrapper(result.ToString());
+            }
+
+            // normal case
+            StringBuilder result2 = new StringBuilder();
+            char first = sequence1[0];
+            int start = 0, copyStart = 0, firstIndex;
+            while (start < count)
+            {
+                if ((firstIndex = str.ToString().ToLower(locale).IndexOf(first, start)) ==
-1)
+                    break;
+                bool found = true;
+                if (sequence1.Length > 1)
+                {
+                    if (firstIndex + sequence1Length > count)
+                        break;
+                    for (int i = 1; i < sequence1Length; i++)
+                    {
+                        if (str.ToString().ToLower(locale)[firstIndex + i] != sequence1[i])
+                        {
+                            found = false;
+                            break;
+                        }
+                    }
+                }
+                if (found)
+                {
+                    result2.Append(str.ToString().Substring(copyStart, firstIndex));
+                    result2.Append(escapeChar);
+                    result2.Append(str.ToString().Substring(firstIndex,
+                        firstIndex + sequence1Length));
+                    copyStart = start = firstIndex + sequence1Length;
+                }
+                else
+                {
+                    start = firstIndex + 1;
+                }
+            }
+            if (result2.Length == 0 && copyStart == 0)
+                return str;
+            result2.Append(str.ToString().Substring(copyStart));
+            return new StringCharSequenceWrapper(result2.ToString());
+        }
+
+        private static ICharSequence EscapeWhiteChar(ICharSequence str, CultureInfo locale)
+        {
+            if (str == null || str.Length == 0)
+                return str;
+
+            ICharSequence buffer = str;
+
+            for (int i = 0; i < escapableWhiteChars.Length; i++)
+            {
+                buffer = ReplaceIgnoreCase(buffer, escapableWhiteChars[i].ToLower(locale),
"\\", locale);
+            }
+            return buffer;
+        }
+
+        public ICharSequence Escape(ICharSequence text, CultureInfo locale, EscapeQuerySyntax.Type
type)
+        {
+            if (text == null || text.Length == 0)
+                return text;
+
+            // escape wildcards and the escape char (this has to be perform before
+            // anything else)
+            // since we need to preserve the UnescapedCharSequence and escape the
+            // original escape chars
+            if (text is UnescapedCharSequence)
+            {
+                text = new StringCharSequenceWrapper(((UnescapedCharSequence)text).ToStringEscaped(wildcardChars));
+            }
+            else
+            {
+                text = new StringCharSequenceWrapper(new UnescapedCharSequence(text).ToStringEscaped(wildcardChars));
+            }
+
+            if (type == EscapeQuerySyntax.Type.STRING)
+            {
+                return EscapeQuoted(text, locale);
+            }
+            else
+            {
+                return EscapeTerm(text, locale);
+            }
+        }
+
+        public static UnescapedCharSequence DiscardEscapeChar(ICharSequence input)
+        {
+            // Create char array to hold unescaped char sequence
+            char[] output = new char[input.Length];
+            bool[] wasEscaped = new bool[input.Length];
+
+            // The length of the output can be less than the input
+            // due to discarded escape chars. This variable holds
+            // the actual length of the output
+            int length = 0;
+
+            // We remember whether the last processed character was
+            // an escape character
+            bool lastCharWasEscapeChar = false;
+
+            // The multiplier the current unicode digit must be multiplied with.
+            // E. g. the first digit must be multiplied with 16^3, the second with
+            // 16^2...
+            int codePointMultiplier = 0;
+
+            // Used to calculate the codepoint of the escaped unicode character
+            int codePoint = 0;
+
+            for (int i = 0; i < input.Length; i++)
+            {
+                char curChar = input.CharAt(i);
+                if (codePointMultiplier > 0)
+                {
+                    codePoint += HexToInt(curChar) * codePointMultiplier;
+                    codePointMultiplier = Number.URShift(codePointMultiplier, 4);
+                    if (codePointMultiplier == 0)
+                    {
+                        output[length++] = (char)codePoint;
+                        codePoint = 0;
+                    }
+                }
+                else if (lastCharWasEscapeChar)
+                {
+                    if (curChar == 'u')
+                    {
+                        // found an escaped unicode character
+                        codePointMultiplier = 16 * 16 * 16;
+                    }
+                    else
+                    {
+                        // this character was escaped
+                        output[length] = curChar;
+                        wasEscaped[length] = true;
+                        length++;
+                    }
+                    lastCharWasEscapeChar = false;
+                }
+                else
+                {
+                    if (curChar == '\\')
+                    {
+                        lastCharWasEscapeChar = true;
+                    }
+                    else
+                    {
+                        output[length] = curChar;
+                        length++;
+                    }
+                }
+            }
+
+            if (codePointMultiplier > 0)
+            {
+                throw new ParseException(new Message(QueryParserMessages.INVALID_SYNTAX_ESCAPE_UNICODE_TRUNCATION).ToString());
+            }
+
+            if (lastCharWasEscapeChar)
+            {
+                throw new ParseException(new Message(QueryParserMessages.INVALID_SYNTAX_ESCAPE_CHARACTER).ToString());
+            }
+
+            return new UnescapedCharSequence(output, wasEscaped, 0, length);
+        }
+
+        private static int HexToInt(char c)
+        {
+            if ('0' <= c && c <= '9')
+            {
+                return c - '0';
+            }
+            else if ('a' <= c && c <= 'f')
+            {
+                return c - 'a' + 10;
+            }
+            else if ('A' <= c && c <= 'F')
+            {
+                return c - 'A' + 10;
+            }
+            else
+            {
+                throw new ParseException(new Message(QueryParserMessages.INVALID_SYNTAX_ESCAPE_NONE_HEX_UNICODE,
c).ToString());
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs b/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs
new file mode 100644
index 0000000..3a43414
--- /dev/null
+++ b/src/contrib/QueryParsers/Flexible/Standard/Parser/ICharStream.cs
@@ -0,0 +1,37 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.QueryParsers.Flexible.Standard.Parser
+{
+    public interface ICharStream
+    {
+        char ReadChar();
+
+        [Obsolete]
+        int Column { get; }
+
+        [Obsolete]
+        int Line { get; }
+
+        int EndColumn { get; }
+
+        int EndLine { get; }
+
+        int BeginColumn { get; }
+
+        int BeginLine { get; }
+
+        void Backup(int amount);
+
+        char BeginToken();
+
+        string Image { get; }
+
+        char[] GetSuffix(int len);
+
+        void Done();
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f66837d2/src/contrib/QueryParsers/Support/StringExtensions.cs
----------------------------------------------------------------------
diff --git a/src/contrib/QueryParsers/Support/StringExtensions.cs b/src/contrib/QueryParsers/Support/StringExtensions.cs
new file mode 100644
index 0000000..c688e67
--- /dev/null
+++ b/src/contrib/QueryParsers/Support/StringExtensions.cs
@@ -0,0 +1,16 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.QueryParsers.Support
+{
+    public static class StringExtensions
+    {
+        public static bool EqualsIgnoreCase(this string value, string other)
+        {
+            return string.Equals(value, other, StringComparison.OrdinalIgnoreCase);
+        }
+    }
+}


Mime
View raw message