lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [66/72] [abbrv] lucenenet git commit: Lucene.Net.TestFramework: Renamed Util\automaton\ to Util\Automaton\
Date Sun, 26 Feb 2017 23:37:54 GMT
Lucene.Net.TestFramework: Renamed Util\automaton\ to Util\Automaton\


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6a55c21f
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6a55c21f
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6a55c21f

Branch: refs/heads/api-work
Commit: 6a55c21f8ea386a68935ca98dc64a60ce03002c3
Parents: 49a0460
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Sun Feb 26 03:39:03 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Mon Feb 27 06:18:00 2017 +0700

----------------------------------------------------------------------
 .../Lucene.Net.TestFramework.csproj             |   2 +-
 .../Util/Automaton/AutomatonTestUtil.cs         | 575 +++++++++++++++++++
 .../Util/automaton/AutomatonTestUtil.cs         | 575 -------------------
 3 files changed, 576 insertions(+), 576 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6a55c21f/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
index 351f632..788338e 100644
--- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
+++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj
@@ -434,7 +434,7 @@
       <SubType>Code</SubType>
     </Compile>
     <Compile Include="Util\ApiScanTestBase.cs" />
-    <Compile Include="Util\automaton\AutomatonTestUtil.cs">
+    <Compile Include="Util\Automaton\AutomatonTestUtil.cs">
       <SubType>Code</SubType>
     </Compile>
     <Compile Include="Util\BaseDocIdSetTestCase.cs">

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6a55c21f/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs b/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs
new file mode 100644
index 0000000..2c0e1f9
--- /dev/null
+++ b/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs
@@ -0,0 +1,575 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using NUnit.Framework;
+
+namespace Lucene.Net.Util.Automaton
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using Lucene.Net.Randomized.Generators;
+
+    /// <summary>
+    /// Utilities for testing automata.
+    /// <p>
+    /// Capable of generating random regular expressions,
+    /// and automata, and also provides a number of very
+    /// basic unoptimized implementations (*slow) for testing.
+    /// </summary>
+    public class AutomatonTestUtil
+    {
+        /// <summary>
+        /// Returns random string, including full unicode range. </summary>
+        public static string RandomRegexp(Random r)
+        {
+            while (true)
+            {
+                string regexp = RandomRegexpString(r);
+                // we will also generate some undefined unicode queries
+                if (!UnicodeUtil.ValidUTF16String(regexp.ToCharArray()))
+                {
+                    continue;
+                }
+                try
+                {
+                    new RegExp(regexp, RegExp.NONE);
+                    return regexp;
+                }
+#pragma warning disable 168
+                catch (Exception e)
+#pragma warning restore 168
+                {
+                }
+            }
+        }
+
+        private static string RandomRegexpString(Random r)
+        {
+            int end = r.Next(20);
+            if (end == 0)
+            {
+                // allow 0 length
+                return "";
+            }
+            char[] buffer = new char[end];
+            for (int i = 0; i < end; i++)
+            {
+                int t = r.Next(15);
+                if (0 == t && i < end - 1)
+                {
+                    // Make a surrogate pair
+                    // High surrogate
+                    buffer[i++] = (char)TestUtil.NextInt(r, 0xd800, 0xdbff);
+                    // Low surrogate
+                    buffer[i] = (char)TestUtil.NextInt(r, 0xdc00, 0xdfff);
+                }
+                else if (t <= 1)
+                {
+                    buffer[i] = (char)r.Next(0x80);
+                }
+                else if (2 == t)
+                {
+                    buffer[i] = (char)TestUtil.NextInt(r, 0x80, 0x800);
+                }
+                else if (3 == t)
+                {
+                    buffer[i] = (char)TestUtil.NextInt(r, 0x800, 0xd7ff);
+                }
+                else if (4 == t)
+                {
+                    buffer[i] = (char)TestUtil.NextInt(r, 0xe000, 0xffff);
+                }
+                else if (5 == t)
+                {
+                    buffer[i] = '.';
+                }
+                else if (6 == t)
+                {
+                    buffer[i] = '?';
+                }
+                else if (7 == t)
+                {
+                    buffer[i] = '*';
+                }
+                else if (8 == t)
+                {
+                    buffer[i] = '+';
+                }
+                else if (9 == t)
+                {
+                    buffer[i] = '(';
+                }
+                else if (10 == t)
+                {
+                    buffer[i] = ')';
+                }
+                else if (11 == t)
+                {
+                    buffer[i] = '-';
+                }
+                else if (12 == t)
+                {
+                    buffer[i] = '[';
+                }
+                else if (13 == t)
+                {
+                    buffer[i] = ']';
+                }
+                else if (14 == t)
+                {
+                    buffer[i] = '|';
+                }
+            }
+            return new string(buffer, 0, end);
+        }
+
+        /// <summary>
+        /// picks a random int code point, avoiding surrogates;
+        /// throws IllegalArgumentException if this transition only
+        /// accepts surrogates
+        /// </summary>
+        private static int GetRandomCodePoint(Random r, Transition t)
+        {
+            int code;
+            if (t.Max < UnicodeUtil.UNI_SUR_HIGH_START || t.Min > UnicodeUtil.UNI_SUR_HIGH_END)
+            {
+                // easy: entire range is before or after surrogates
+                code = t.Min + r.Next(t.Max - t.Min + 1);
+            }
+            else if (t.Min >= UnicodeUtil.UNI_SUR_HIGH_START)
+            {
+                if (t.Max > UnicodeUtil.UNI_SUR_LOW_END)
+                {
+                    // after surrogates
+                    code = 1 + UnicodeUtil.UNI_SUR_LOW_END + r.Next(t.Max - UnicodeUtil.UNI_SUR_LOW_END);
+                }
+                else
+                {
+                    throw new System.ArgumentException("transition accepts only surrogates:
" + t);
+                }
+            }
+            else if (t.Max <= UnicodeUtil.UNI_SUR_LOW_END)
+            {
+                if (t.Min < UnicodeUtil.UNI_SUR_HIGH_START)
+                {
+                    // before surrogates
+                    code = t.Min + r.Next(UnicodeUtil.UNI_SUR_HIGH_START - t.Min);
+                }
+                else
+                {
+                    throw new System.ArgumentException("transition accepts only surrogates:
" + t);
+                }
+            }
+            else
+            {
+                // range includes all surrogates
+                int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.Min;
+                int gap2 = t.Max - UnicodeUtil.UNI_SUR_LOW_END;
+                int c = r.Next(gap1 + gap2);
+                if (c < gap1)
+                {
+                    code = t.Min + c;
+                }
+                else
+                {
+                    code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1;
+                }
+            }
+
+            Assert.True(code >= t.Min && code <= t.Max && (code <
UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END), "code=" + code +
" min=" + t.Min + " max=" + t.Max);
+            return code;
+        }
+
+        /// <summary>
+        /// Lets you retrieve random strings accepted
+        /// by an Automaton.
+        /// <p>
+        /// Once created, call <seealso cref="#getRandomAcceptedString(Random)"/>
+        /// to get a new string (in UTF-32 codepoints).
+        /// </summary>
+        public class RandomAcceptedStrings
+        {
+            internal readonly IDictionary<Transition, bool?> LeadsToAccept;
+            internal readonly Automaton a;
+
+            private class ArrivingTransition
+            {
+                internal readonly State From;
+                internal readonly Transition t;
+
+                public ArrivingTransition(State from, Transition t)
+                {
+                    this.From = from;
+                    this.t = t;
+                }
+            }
+
+            public RandomAcceptedStrings(Automaton a)
+            {
+                this.a = a;
+                if (!String.IsNullOrEmpty(a.Singleton))
+                {
+                    LeadsToAccept = null;
+                    return;
+                }
+
+                // must use IdentityHashmap because two Transitions w/
+                // different start nodes can be considered the same
+                LeadsToAccept = new IdentityHashMap<Transition, bool?>();
+                IDictionary<State, IList<ArrivingTransition>> allArriving = new
Dictionary<State, IList<ArrivingTransition>>();
+
+                LinkedList<State> q = new LinkedList<State>();
+                HashSet<State> seen = new HashSet<State>();
+
+                // reverse map the transitions, so we can quickly look
+                // up all arriving transitions to a given state
+                foreach (State s in a.GetNumberedStates())
+                {
+                    for (int i = 0; i < s.numTransitions; i++)
+                    {
+                        Transition t = s.TransitionsArray[i];
+                        IList<ArrivingTransition> tl;
+                        allArriving.TryGetValue(t.Dest, out tl);
+                        if (tl == null)
+                        {
+                            tl = new List<ArrivingTransition>();
+                            allArriving[t.Dest] = tl;
+                        }
+                        tl.Add(new ArrivingTransition(s, t));
+                    }
+                    if (s.Accept)
+                    {
+                        q.AddLast(s);
+                        seen.Add(s);
+                    }
+                }
+
+                // Breadth-first search, from accept states,
+                // backwards:
+                while (q.Count > 0)
+                {
+                    State s = q.First.Value;
+                    q.Remove(s);
+                    IList<ArrivingTransition> arriving;
+                    allArriving.TryGetValue(s, out arriving);
+                    if (arriving != null)
+                    {
+                        foreach (ArrivingTransition at in arriving)
+                        {
+                            State from = at.From;
+                            if (!seen.Contains(from))
+                            {
+                                q.AddLast(from);
+                                seen.Add(from);
+                                LeadsToAccept[at.t] = true;
+                            }
+                        }
+                    }
+                }
+            }
+
+            public int[] GetRandomAcceptedString(Random r)
+            {
+                IList<int?> soFar = new List<int?>();
+                if (a.IsSingleton)
+                {
+                    // accepts only one
+                    var s = a.Singleton;
+
+                    int charUpto = 0;
+                    while (charUpto < s.Length)
+                    {
+                        int cp = Character.CodePointAt(s, charUpto);
+                        charUpto += Character.CharCount(cp);
+                        soFar.Add(cp);
+                    }
+                }
+                else
+                {
+                    var s = a.GetInitialState();
+
+                    while (true)
+                    {
+                        if (s.Accept)
+                        {
+                            if (s.numTransitions == 0)
+                            {
+                                // stop now
+                                break;
+                            }
+                            else
+                            {
+                                if (r.NextBoolean())
+                                {
+                                    break;
+                                }
+                            }
+                        }
+
+                        if (s.numTransitions == 0)
+                        {
+                            throw new Exception("this automaton has dead states");
+                        }
+
+                        bool cheat = r.NextBoolean();
+
+                        Transition t;
+                        if (cheat)
+                        {
+                            // pick a transition that we know is the fastest
+                            // path to an accept state
+                            IList<Transition> toAccept = new List<Transition>();
+                            for (int i = 0; i < s.numTransitions; i++)
+                            {
+                                Transition t0 = s.TransitionsArray[i];
+                                if (LeadsToAccept.ContainsKey(t0))
+                                {
+                                    toAccept.Add(t0);
+                                }
+                            }
+                            if (toAccept.Count == 0)
+                            {
+                                // this is OK -- it means we jumped into a cycle
+                                t = s.TransitionsArray[r.Next(s.numTransitions)];
+                            }
+                            else
+                            {
+                                t = toAccept[r.Next(toAccept.Count)];
+                            }
+                        }
+                        else
+                        {
+                            t = s.TransitionsArray[r.Next(s.numTransitions)];
+                        }
+                        soFar.Add(GetRandomCodePoint(r, t));
+                        s = t.Dest;
+                    }
+                }
+
+                return ArrayUtil.ToInt32Array(soFar);
+            }
+        }
+
+        /// <summary>
+        /// return a random NFA/DFA for testing </summary>
+        public static Automaton RandomAutomaton(Random random)
+        {
+            // get two random Automata from regexps
+            Automaton a1 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton();
+            if (random.NextBoolean())
+            {
+                a1 = BasicOperations.Complement(a1);
+            }
+
+            Automaton a2 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton();
+            if (random.NextBoolean())
+            {
+                a2 = BasicOperations.Complement(a2);
+            }
+
+            // combine them in random ways
+            switch (random.Next(4))
+            {
+                case 0:
+                    return BasicOperations.Concatenate(a1, a2);
+
+                case 1:
+                    return BasicOperations.Union(a1, a2);
+
+                case 2:
+                    return BasicOperations.Intersection(a1, a2);
+
+                default:
+                    return BasicOperations.Minus(a1, a2);
+            }
+        }
+
+        /// <summary>
+        /// below are original, unoptimized implementations of DFA operations for testing.
+        /// These are from brics automaton, full license (BSD) below:
+        /// </summary>
+
+        /*
+         * dk.brics.automaton
+         *
+         * Copyright (c) 2001-2009 Anders Moeller
+         * All rights reserved.
+         *
+         * Redistribution and use in source and binary forms, with or without
+         * modification, are permitted provided that the following conditions
+         * are met:
+         * 1. Redistributions of source code must retain the above copyright
+         *    notice, this list of conditions and the following disclaimer.
+         * 2. Redistributions in binary form must reproduce the above copyright
+         *    notice, this list of conditions and the following disclaimer in the
+         *    documentation and/or other materials provided with the distribution.
+         * 3. The name of the author may not be used to endorse or promote products
+         *    derived from this software without specific prior written permission.
+         *
+         * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+         * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+         * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+         * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+         * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+         * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+         * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+         * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+         * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+         */
+
+        /// <summary>
+        /// Simple, original brics implementation of Brzozowski minimize()
+        /// </summary>
+        public static void MinimizeSimple(Automaton a)
+        {
+            if (!String.IsNullOrEmpty(a.Singleton))
+            {
+                return;
+            }
+            DeterminizeSimple(a, SpecialOperations.Reverse(a));
+            DeterminizeSimple(a, SpecialOperations.Reverse(a));
+        }
+
+        /// <summary>
+        /// Simple, original brics implementation of determinize()
+        /// </summary>
+        public static void DeterminizeSimple(Automaton a)
+        {
+            if (a.IsDeterministic || a.IsSingleton)
+            {
+                return;
+            }
+            HashSet<State> initialset = new ValueHashSet<State>();
+            initialset.Add(a.initial);
+            DeterminizeSimple(a, initialset);
+        }
+
+        /// <summary>
+        /// Simple, original brics implementation of determinize()
+        /// Determinizes the given automaton using the given set of initial states.
+        /// </summary>
+        public static void DeterminizeSimple(Automaton a, ISet<State> initialset)
+        {
+            int[] points = a.GetStartPoints();
+            // subset construction
+            IDictionary<ISet<State>, ISet<State>> sets = new Dictionary<ISet<State>,
ISet<State>>();
+            LinkedList<ISet<State>> worklist = new LinkedList<ISet<State>>();
+            IDictionary<ISet<State>, State> newstate = new Dictionary<ISet<State>,
State>();
+            sets[initialset] = initialset;
+            worklist.AddLast(initialset);
+            a.initial = new State();
+            newstate[initialset] = a.initial;
+            while (worklist.Count > 0)
+            {
+                ISet<State> s = worklist.First.Value;
+                worklist.Remove(s);
+                State r = newstate[s];
+                foreach (State q in s)
+                {
+                    if (q.Accept)
+                    {
+                        r.Accept = true;
+                        break;
+                    }
+                }
+                for (int n = 0; n < points.Length; n++)
+                {
+                    ISet<State> p = new ValueHashSet<State>();
+                    foreach (State q in s)
+                    {
+                        foreach (Transition t in q.GetTransitions())
+                        {
+                            if (t.Min <= points[n] && points[n] <= t.Max)
+                            {
+                                p.Add(t.to);
+                            }
+                        }
+                    }
+                    if (!sets.ContainsKey(p))
+                    {
+                        sets[p] = p;
+                        worklist.AddLast(p);
+                        newstate[p] = new State();
+                    }
+                    State q_ = newstate[p];
+                    int min = points[n];
+                    int max;
+                    if (n + 1 < points.Length)
+                    {
+                        max = points[n + 1] - 1;
+                    }
+                    else
+                    {
+                        max = Character.MAX_CODE_POINT;
+                    }
+                    r.AddTransition(new Transition(min, max, q_));
+                }
+            }
+            a.IsDeterministic = true;
+            a.ClearNumberedStates();
+            a.RemoveDeadTransitions();
+        }
+
+        /// <summary>
+        /// Returns true if the language of this automaton is finite.
+        /// <p>
+        /// WARNING: this method is slow, it will blow up if the automaton is large.
+        /// this is only used to test the correctness of our faster implementation.
+        /// </summary>
+        public static bool IsFiniteSlow(Automaton a)
+        {
+            if (!String.IsNullOrEmpty(a.Singleton))
+            {
+                return true;
+            }
+            return IsFiniteSlow(a.GetInitialState(), new HashSet<State>());
+        }
+
+        /// <summary>
+        /// Checks whether there is a loop containing s. (this is sufficient since
+        /// there are never transitions to dead states.)
+        /// </summary>
+        // TODO: not great that this is recursive... in theory a
+        // large automata could exceed java's stack
+        private static bool IsFiniteSlow(State s, HashSet<State> path)
+        {
+            path.Add(s);
+            foreach (Transition t in s.GetTransitions())
+            {
+                if (path.Contains(t.Dest) || !IsFiniteSlow(t.Dest, path))
+                {
+                    return false;
+                }
+            }
+            path.Remove(s);
+            return true;
+        }
+
+        /// <summary>
+        /// Checks that an automaton has no detached states that are unreachable
+        /// from the initial state.
+        /// </summary>
+        public static void AssertNoDetachedStates(Automaton a)
+        {
+            int numStates = a.GetNumberOfStates();
+            a.ClearNumberedStates(); // force recomputation of cached numbered states
+            Assert.True(numStates == a.GetNumberOfStates(), "automaton has " + (numStates
- a.GetNumberOfStates()) + " detached states");
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6a55c21f/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs b/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
deleted file mode 100644
index 2c0e1f9..0000000
--- a/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs
+++ /dev/null
@@ -1,575 +0,0 @@
-using Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using NUnit.Framework;
-
-namespace Lucene.Net.Util.Automaton
-{
-    /*
-     * Licensed to the Apache Software Foundation (ASF) under one or more
-     * contributor license agreements.  See the NOTICE file distributed with
-     * this work for additional information regarding copyright ownership.
-     * The ASF licenses this file to You under the Apache License, Version 2.0
-     * (the "License"); you may not use this file except in compliance with
-     * the License.  You may obtain a copy of the License at
-     *
-     *     http://www.apache.org/licenses/LICENSE-2.0
-     *
-     * Unless required by applicable law or agreed to in writing, software
-     * distributed under the License is distributed on an "AS IS" BASIS,
-     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     * See the License for the specific language governing permissions and
-     * limitations under the License.
-     */
-
-    using Lucene.Net.Randomized.Generators;
-
-    /// <summary>
-    /// Utilities for testing automata.
-    /// <p>
-    /// Capable of generating random regular expressions,
-    /// and automata, and also provides a number of very
-    /// basic unoptimized implementations (*slow) for testing.
-    /// </summary>
-    public class AutomatonTestUtil
-    {
-        /// <summary>
-        /// Returns random string, including full unicode range. </summary>
-        public static string RandomRegexp(Random r)
-        {
-            while (true)
-            {
-                string regexp = RandomRegexpString(r);
-                // we will also generate some undefined unicode queries
-                if (!UnicodeUtil.ValidUTF16String(regexp.ToCharArray()))
-                {
-                    continue;
-                }
-                try
-                {
-                    new RegExp(regexp, RegExp.NONE);
-                    return regexp;
-                }
-#pragma warning disable 168
-                catch (Exception e)
-#pragma warning restore 168
-                {
-                }
-            }
-        }
-
-        private static string RandomRegexpString(Random r)
-        {
-            int end = r.Next(20);
-            if (end == 0)
-            {
-                // allow 0 length
-                return "";
-            }
-            char[] buffer = new char[end];
-            for (int i = 0; i < end; i++)
-            {
-                int t = r.Next(15);
-                if (0 == t && i < end - 1)
-                {
-                    // Make a surrogate pair
-                    // High surrogate
-                    buffer[i++] = (char)TestUtil.NextInt(r, 0xd800, 0xdbff);
-                    // Low surrogate
-                    buffer[i] = (char)TestUtil.NextInt(r, 0xdc00, 0xdfff);
-                }
-                else if (t <= 1)
-                {
-                    buffer[i] = (char)r.Next(0x80);
-                }
-                else if (2 == t)
-                {
-                    buffer[i] = (char)TestUtil.NextInt(r, 0x80, 0x800);
-                }
-                else if (3 == t)
-                {
-                    buffer[i] = (char)TestUtil.NextInt(r, 0x800, 0xd7ff);
-                }
-                else if (4 == t)
-                {
-                    buffer[i] = (char)TestUtil.NextInt(r, 0xe000, 0xffff);
-                }
-                else if (5 == t)
-                {
-                    buffer[i] = '.';
-                }
-                else if (6 == t)
-                {
-                    buffer[i] = '?';
-                }
-                else if (7 == t)
-                {
-                    buffer[i] = '*';
-                }
-                else if (8 == t)
-                {
-                    buffer[i] = '+';
-                }
-                else if (9 == t)
-                {
-                    buffer[i] = '(';
-                }
-                else if (10 == t)
-                {
-                    buffer[i] = ')';
-                }
-                else if (11 == t)
-                {
-                    buffer[i] = '-';
-                }
-                else if (12 == t)
-                {
-                    buffer[i] = '[';
-                }
-                else if (13 == t)
-                {
-                    buffer[i] = ']';
-                }
-                else if (14 == t)
-                {
-                    buffer[i] = '|';
-                }
-            }
-            return new string(buffer, 0, end);
-        }
-
-        /// <summary>
-        /// picks a random int code point, avoiding surrogates;
-        /// throws IllegalArgumentException if this transition only
-        /// accepts surrogates
-        /// </summary>
-        private static int GetRandomCodePoint(Random r, Transition t)
-        {
-            int code;
-            if (t.Max < UnicodeUtil.UNI_SUR_HIGH_START || t.Min > UnicodeUtil.UNI_SUR_HIGH_END)
-            {
-                // easy: entire range is before or after surrogates
-                code = t.Min + r.Next(t.Max - t.Min + 1);
-            }
-            else if (t.Min >= UnicodeUtil.UNI_SUR_HIGH_START)
-            {
-                if (t.Max > UnicodeUtil.UNI_SUR_LOW_END)
-                {
-                    // after surrogates
-                    code = 1 + UnicodeUtil.UNI_SUR_LOW_END + r.Next(t.Max - UnicodeUtil.UNI_SUR_LOW_END);
-                }
-                else
-                {
-                    throw new System.ArgumentException("transition accepts only surrogates:
" + t);
-                }
-            }
-            else if (t.Max <= UnicodeUtil.UNI_SUR_LOW_END)
-            {
-                if (t.Min < UnicodeUtil.UNI_SUR_HIGH_START)
-                {
-                    // before surrogates
-                    code = t.Min + r.Next(UnicodeUtil.UNI_SUR_HIGH_START - t.Min);
-                }
-                else
-                {
-                    throw new System.ArgumentException("transition accepts only surrogates:
" + t);
-                }
-            }
-            else
-            {
-                // range includes all surrogates
-                int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.Min;
-                int gap2 = t.Max - UnicodeUtil.UNI_SUR_LOW_END;
-                int c = r.Next(gap1 + gap2);
-                if (c < gap1)
-                {
-                    code = t.Min + c;
-                }
-                else
-                {
-                    code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1;
-                }
-            }
-
-            Assert.True(code >= t.Min && code <= t.Max && (code <
UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END), "code=" + code +
" min=" + t.Min + " max=" + t.Max);
-            return code;
-        }
-
-        /// <summary>
-        /// Lets you retrieve random strings accepted
-        /// by an Automaton.
-        /// <p>
-        /// Once created, call <seealso cref="#getRandomAcceptedString(Random)"/>
-        /// to get a new string (in UTF-32 codepoints).
-        /// </summary>
-        public class RandomAcceptedStrings
-        {
-            internal readonly IDictionary<Transition, bool?> LeadsToAccept;
-            internal readonly Automaton a;
-
-            private class ArrivingTransition
-            {
-                internal readonly State From;
-                internal readonly Transition t;
-
-                public ArrivingTransition(State from, Transition t)
-                {
-                    this.From = from;
-                    this.t = t;
-                }
-            }
-
-            public RandomAcceptedStrings(Automaton a)
-            {
-                this.a = a;
-                if (!String.IsNullOrEmpty(a.Singleton))
-                {
-                    LeadsToAccept = null;
-                    return;
-                }
-
-                // must use IdentityHashmap because two Transitions w/
-                // different start nodes can be considered the same
-                LeadsToAccept = new IdentityHashMap<Transition, bool?>();
-                IDictionary<State, IList<ArrivingTransition>> allArriving = new
Dictionary<State, IList<ArrivingTransition>>();
-
-                LinkedList<State> q = new LinkedList<State>();
-                HashSet<State> seen = new HashSet<State>();
-
-                // reverse map the transitions, so we can quickly look
-                // up all arriving transitions to a given state
-                foreach (State s in a.GetNumberedStates())
-                {
-                    for (int i = 0; i < s.numTransitions; i++)
-                    {
-                        Transition t = s.TransitionsArray[i];
-                        IList<ArrivingTransition> tl;
-                        allArriving.TryGetValue(t.Dest, out tl);
-                        if (tl == null)
-                        {
-                            tl = new List<ArrivingTransition>();
-                            allArriving[t.Dest] = tl;
-                        }
-                        tl.Add(new ArrivingTransition(s, t));
-                    }
-                    if (s.Accept)
-                    {
-                        q.AddLast(s);
-                        seen.Add(s);
-                    }
-                }
-
-                // Breadth-first search, from accept states,
-                // backwards:
-                while (q.Count > 0)
-                {
-                    State s = q.First.Value;
-                    q.Remove(s);
-                    IList<ArrivingTransition> arriving;
-                    allArriving.TryGetValue(s, out arriving);
-                    if (arriving != null)
-                    {
-                        foreach (ArrivingTransition at in arriving)
-                        {
-                            State from = at.From;
-                            if (!seen.Contains(from))
-                            {
-                                q.AddLast(from);
-                                seen.Add(from);
-                                LeadsToAccept[at.t] = true;
-                            }
-                        }
-                    }
-                }
-            }
-
-            public int[] GetRandomAcceptedString(Random r)
-            {
-                IList<int?> soFar = new List<int?>();
-                if (a.IsSingleton)
-                {
-                    // accepts only one
-                    var s = a.Singleton;
-
-                    int charUpto = 0;
-                    while (charUpto < s.Length)
-                    {
-                        int cp = Character.CodePointAt(s, charUpto);
-                        charUpto += Character.CharCount(cp);
-                        soFar.Add(cp);
-                    }
-                }
-                else
-                {
-                    var s = a.GetInitialState();
-
-                    while (true)
-                    {
-                        if (s.Accept)
-                        {
-                            if (s.numTransitions == 0)
-                            {
-                                // stop now
-                                break;
-                            }
-                            else
-                            {
-                                if (r.NextBoolean())
-                                {
-                                    break;
-                                }
-                            }
-                        }
-
-                        if (s.numTransitions == 0)
-                        {
-                            throw new Exception("this automaton has dead states");
-                        }
-
-                        bool cheat = r.NextBoolean();
-
-                        Transition t;
-                        if (cheat)
-                        {
-                            // pick a transition that we know is the fastest
-                            // path to an accept state
-                            IList<Transition> toAccept = new List<Transition>();
-                            for (int i = 0; i < s.numTransitions; i++)
-                            {
-                                Transition t0 = s.TransitionsArray[i];
-                                if (LeadsToAccept.ContainsKey(t0))
-                                {
-                                    toAccept.Add(t0);
-                                }
-                            }
-                            if (toAccept.Count == 0)
-                            {
-                                // this is OK -- it means we jumped into a cycle
-                                t = s.TransitionsArray[r.Next(s.numTransitions)];
-                            }
-                            else
-                            {
-                                t = toAccept[r.Next(toAccept.Count)];
-                            }
-                        }
-                        else
-                        {
-                            t = s.TransitionsArray[r.Next(s.numTransitions)];
-                        }
-                        soFar.Add(GetRandomCodePoint(r, t));
-                        s = t.Dest;
-                    }
-                }
-
-                return ArrayUtil.ToInt32Array(soFar);
-            }
-        }
-
-        /// <summary>
-        /// return a random NFA/DFA for testing </summary>
-        public static Automaton RandomAutomaton(Random random)
-        {
-            // get two random Automata from regexps
-            Automaton a1 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton();
-            if (random.NextBoolean())
-            {
-                a1 = BasicOperations.Complement(a1);
-            }
-
-            Automaton a2 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton();
-            if (random.NextBoolean())
-            {
-                a2 = BasicOperations.Complement(a2);
-            }
-
-            // combine them in random ways
-            switch (random.Next(4))
-            {
-                case 0:
-                    return BasicOperations.Concatenate(a1, a2);
-
-                case 1:
-                    return BasicOperations.Union(a1, a2);
-
-                case 2:
-                    return BasicOperations.Intersection(a1, a2);
-
-                default:
-                    return BasicOperations.Minus(a1, a2);
-            }
-        }
-
-        /// <summary>
-        /// below are original, unoptimized implementations of DFA operations for testing.
-        /// These are from brics automaton, full license (BSD) below:
-        /// </summary>
-
-        /*
-         * dk.brics.automaton
-         *
-         * Copyright (c) 2001-2009 Anders Moeller
-         * All rights reserved.
-         *
-         * Redistribution and use in source and binary forms, with or without
-         * modification, are permitted provided that the following conditions
-         * are met:
-         * 1. Redistributions of source code must retain the above copyright
-         *    notice, this list of conditions and the following disclaimer.
-         * 2. Redistributions in binary form must reproduce the above copyright
-         *    notice, this list of conditions and the following disclaimer in the
-         *    documentation and/or other materials provided with the distribution.
-         * 3. The name of the author may not be used to endorse or promote products
-         *    derived from this software without specific prior written permission.
-         *
-         * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-         * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-         * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-         * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
-         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-         * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-         * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-         * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-         * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-         * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-         */
-
-        /// <summary>
-        /// Simple, original brics implementation of Brzozowski minimize()
-        /// </summary>
-        public static void MinimizeSimple(Automaton a)
-        {
-            if (!String.IsNullOrEmpty(a.Singleton))
-            {
-                return;
-            }
-            DeterminizeSimple(a, SpecialOperations.Reverse(a));
-            DeterminizeSimple(a, SpecialOperations.Reverse(a));
-        }
-
-        /// <summary>
-        /// Simple, original brics implementation of determinize()
-        /// </summary>
-        public static void DeterminizeSimple(Automaton a)
-        {
-            if (a.IsDeterministic || a.IsSingleton)
-            {
-                return;
-            }
-            HashSet<State> initialset = new ValueHashSet<State>();
-            initialset.Add(a.initial);
-            DeterminizeSimple(a, initialset);
-        }
-
-        /// <summary>
-        /// Simple, original brics implementation of determinize()
-        /// Determinizes the given automaton using the given set of initial states.
-        /// </summary>
-        public static void DeterminizeSimple(Automaton a, ISet<State> initialset)
-        {
-            int[] points = a.GetStartPoints();
-            // subset construction
-            IDictionary<ISet<State>, ISet<State>> sets = new Dictionary<ISet<State>,
ISet<State>>();
-            LinkedList<ISet<State>> worklist = new LinkedList<ISet<State>>();
-            IDictionary<ISet<State>, State> newstate = new Dictionary<ISet<State>,
State>();
-            sets[initialset] = initialset;
-            worklist.AddLast(initialset);
-            a.initial = new State();
-            newstate[initialset] = a.initial;
-            while (worklist.Count > 0)
-            {
-                ISet<State> s = worklist.First.Value;
-                worklist.Remove(s);
-                State r = newstate[s];
-                foreach (State q in s)
-                {
-                    if (q.Accept)
-                    {
-                        r.Accept = true;
-                        break;
-                    }
-                }
-                for (int n = 0; n < points.Length; n++)
-                {
-                    ISet<State> p = new ValueHashSet<State>();
-                    foreach (State q in s)
-                    {
-                        foreach (Transition t in q.GetTransitions())
-                        {
-                            if (t.Min <= points[n] && points[n] <= t.Max)
-                            {
-                                p.Add(t.to);
-                            }
-                        }
-                    }
-                    if (!sets.ContainsKey(p))
-                    {
-                        sets[p] = p;
-                        worklist.AddLast(p);
-                        newstate[p] = new State();
-                    }
-                    State q_ = newstate[p];
-                    int min = points[n];
-                    int max;
-                    if (n + 1 < points.Length)
-                    {
-                        max = points[n + 1] - 1;
-                    }
-                    else
-                    {
-                        max = Character.MAX_CODE_POINT;
-                    }
-                    r.AddTransition(new Transition(min, max, q_));
-                }
-            }
-            a.IsDeterministic = true;
-            a.ClearNumberedStates();
-            a.RemoveDeadTransitions();
-        }
-
-        /// <summary>
-        /// Returns true if the language of this automaton is finite.
-        /// <p>
-        /// WARNING: this method is slow, it will blow up if the automaton is large.
-        /// this is only used to test the correctness of our faster implementation.
-        /// </summary>
-        public static bool IsFiniteSlow(Automaton a)
-        {
-            if (!String.IsNullOrEmpty(a.Singleton))
-            {
-                return true;
-            }
-            return IsFiniteSlow(a.GetInitialState(), new HashSet<State>());
-        }
-
-        /// <summary>
-        /// Checks whether there is a loop containing s. (this is sufficient since
-        /// there are never transitions to dead states.)
-        /// </summary>
-        // TODO: not great that this is recursive... in theory a
-        // large automata could exceed java's stack
-        private static bool IsFiniteSlow(State s, HashSet<State> path)
-        {
-            path.Add(s);
-            foreach (Transition t in s.GetTransitions())
-            {
-                if (path.Contains(t.Dest) || !IsFiniteSlow(t.Dest, path))
-                {
-                    return false;
-                }
-            }
-            path.Remove(s);
-            return true;
-        }
-
-        /// <summary>
-        /// Checks that an automaton has no detached states that are unreachable
-        /// from the initial state.
-        /// </summary>
-        public static void AssertNoDetachedStates(Automaton a)
-        {
-            int numStates = a.GetNumberOfStates();
-            a.ClearNumberedStates(); // force recomputation of cached numbered states
-            Assert.True(numStates == a.GetNumberOfStates(), "automaton has " + (numStates
- a.GetNumberOfStates()) + " detached states");
-        }
-    }
-}
\ No newline at end of file


Mime
View raw message