allura-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kentontay...@apache.org
Subject [allura] 09/17: [#8325] upgrade to beautifulsoup4
Date Wed, 04 Sep 2019 19:15:14 GMT
This is an automated email from the ASF dual-hosted git repository.

kentontaylor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 6abc5020e15a604738d2cbf71e766ccdcae18c5f
Author: Dave Brondsema <dave@brondsema.net>
AuthorDate: Tue Aug 20 18:57:37 2019 -0400

    [#8325] upgrade to beautifulsoup4
---
 Allura/allura/lib/macro.py                              |  4 ++--
 Allura/allura/scripts/trac_export.py                    |  2 +-
 Allura/allura/tests/functional/test_trovecategory.py    | 10 +++++-----
 ForgeImporters/forgeimporters/base.py                   |  2 +-
 ForgeImporters/forgeimporters/github/tests/test_wiki.py | 10 +++++++---
 ForgeImporters/forgeimporters/github/wiki.py            | 15 ++++++++-------
 ForgeTracker/forgetracker/tests/functional/test_root.py |  2 +-
 ForgeWiki/forgewiki/converters.py                       |  4 ++--
 requirements.in                                         |  3 +--
 requirements.txt                                        |  6 +++---
 10 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/Allura/allura/lib/macro.py b/Allura/allura/lib/macro.py
index 3996826..6746a48 100644
--- a/Allura/allura/lib/macro.py
+++ b/Allura/allura/lib/macro.py
@@ -29,7 +29,7 @@ import pymongo
 from tg import tmpl_context as c, app_globals as g
 from tg import request
 from paste.deploy.converters import asint
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 
 from . import helpers as h
 from . import security
@@ -463,7 +463,7 @@ def embed(url=None):
 
         # convert iframe src from http to https, to avoid mixed security blocking when used
on an https page
         # and convert to youtube-nocookie.com
-        html = BeautifulSoup(html)
+        html = BeautifulSoup(html, 'html.parser')
         embed_url = html.find('iframe').get('src')
         if embed_url:
             embed_url = urlparse(embed_url)
diff --git a/Allura/allura/scripts/trac_export.py b/Allura/allura/scripts/trac_export.py
index 8b6419d..86b90bb 100644
--- a/Allura/allura/scripts/trac_export.py
+++ b/Allura/allura/scripts/trac_export.py
@@ -28,7 +28,7 @@ import re
 from optparse import OptionParser
 from itertools import islice
 
-from BeautifulSoup import BeautifulSoup, NavigableString
+from bs4 import BeautifulSoup, NavigableString
 import dateutil.parser
 import pytz
 
diff --git a/Allura/allura/tests/functional/test_trovecategory.py b/Allura/allura/tests/functional/test_trovecategory.py
index 3b1076d..4bb6eed 100644
--- a/Allura/allura/tests/functional/test_trovecategory.py
+++ b/Allura/allura/tests/functional/test_trovecategory.py
@@ -14,7 +14,7 @@
 #       KIND, either express or implied.  See the License for the
 #       specific language governing permissions and limitations
 #       under the License.
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 import mock
 
 from tg import config
@@ -121,8 +121,8 @@ class TestTroveCategoryController(TestController):
                 <li>CategoryB</li>
             </ul>
         </ul>
-        """.strip())
-        assert str(expected) == str(rendered_tree)
+        """.strip(), 'html.parser')
+        assert_equals(str(expected), str(rendered_tree))
 
     @td.with_tool('test2', 'admin_main', 'admin')
     def test_trove_empty_hierarchy(self):
@@ -131,5 +131,5 @@ class TestTroveCategoryController(TestController):
         expected = BeautifulSoup("""
         <ul>
         </ul>
-        """.strip())
-        assert str(expected) == str(rendered_tree)
+        """.strip(), 'html.parser')
+        assert_equals(str(expected), str(rendered_tree))
diff --git a/ForgeImporters/forgeimporters/base.py b/ForgeImporters/forgeimporters/base.py
index e6a64b9..d7ea454 100644
--- a/ForgeImporters/forgeimporters/base.py
+++ b/ForgeImporters/forgeimporters/base.py
@@ -29,7 +29,7 @@ try:
 except ImportError:
     from StringIO import StringIO
 
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 from tg import expose, validate, flash, redirect, config
 from tg.decorators import with_trailing_slash
 from tg import app_globals as g
diff --git a/ForgeImporters/forgeimporters/github/tests/test_wiki.py b/ForgeImporters/forgeimporters/github/tests/test_wiki.py
index ea85212..f99ecc4 100644
--- a/ForgeImporters/forgeimporters/github/tests/test_wiki.py
+++ b/ForgeImporters/forgeimporters/github/tests/test_wiki.py
@@ -365,7 +365,7 @@ Our website is [[http://domain.net]].
         result = u'''<p>Look at [[this page|Some Page]]</p>
 <p>More info at: [[MoreInfo]] [[Even More Info]]</p>
 <p>Our website is [[http://domain.net]].</p>
-<p>&#8216;[[Escaped Tag]]</p>
+<p>\u2018[[Escaped Tag]]</p>
 <p>[External link to the wiki page](https://github.com/a/b/wiki/Page)</p>
 <p>[External link](https://github.com/a/b/issues/1)</p>'''
 
@@ -386,12 +386,16 @@ Our website is [[http://domain.net]].
         assert_equal(
             f(u'<a href="https://github/a/b/issues/1" class="1"></a>',
               prefix, new),
-            u'<a href="https://github/a/b/issues/1" class="1"></a>')
+            u'<a class="1" href="https://github/a/b/issues/1"></a>')
         assert_equal(
             f(u'<a href="https://github/a/b/wiki/Test Page">https://github/a/b/wiki/Test
Page</a>',
               prefix, new),
             u'<a href="/p/test/wiki/Test Page">/p/test/wiki/Test Page</a>')
         assert_equal(
+            f(u'<a href="https://github/a/b/wiki/Test Page">Test blah blah</a>',
+              prefix, new),
+            u'<a href="/p/test/wiki/Test Page">Test blah blah</a>')
+        assert_equal(
             f(u'<a href="https://github/a/b/wiki/Test Page">Test <b>Page</b></a>',
               prefix, new),
             u'<a href="/p/test/wiki/Test Page">Test <b>Page</b></a>')
@@ -497,7 +501,7 @@ some text and *[[Tips n' Tricks]]*
 '''
         result = u'''**[this checklist](Troubleshooting)**
 
-some text and **[Tips n' Tricks]**
+some text and **[Tips n\u2019 Tricks]**
 
 **[link](http://otherlink.com)**
 '''
diff --git a/ForgeImporters/forgeimporters/github/wiki.py b/ForgeImporters/forgeimporters/github/wiki.py
index 91c9594..4875501 100644
--- a/ForgeImporters/forgeimporters/github/wiki.py
+++ b/ForgeImporters/forgeimporters/github/wiki.py
@@ -21,9 +21,10 @@ from datetime import datetime
 from tempfile import mkdtemp
 from shutil import rmtree
 
+import six
 from paste.deploy.converters import aslist
 
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 import git
 from tg import app_globals as g
 from tg import tmpl_context as c
@@ -327,7 +328,7 @@ class GitHubWikiImporter(ToolImporter):
         elif ext and ext in self.textile_exts:
             text = self._prepare_textile_text(text)
 
-            text = h.render_any_markup(filename, text)
+            text = six.text_type(h.render_any_markup(filename, text))
             text = self.rewrite_links(text, self.github_wiki_url, self.app.url)
             if html2text:
                 text = html2text.html2text(text)
@@ -417,16 +418,16 @@ class GitHubWikiImporter(ToolImporter):
             prefix += '/'
         if not new_prefix.endswith('/'):
             new_prefix += '/'
-        soup = BeautifulSoup(html)
+        soup = BeautifulSoup(html, 'html.parser')
         for a in soup.findAll('a'):
             if a.get('href').startswith(prefix):
                 page = a['href'].replace(prefix, '')
                 new_page = self._convert_page_name(page)
                 a['href'] = new_prefix + new_page
-                if a.text == page:
-                    a.setString(new_page)
-                elif a.text == prefix + page:
-                    a.setString(new_prefix + new_page)
+                if a.string == page:
+                    a.string = new_page
+                elif a.string == prefix + page:
+                    a.string = new_prefix + new_page
         return unicode(soup)
 
     def _prepare_textile_text(self, text):
diff --git a/ForgeTracker/forgetracker/tests/functional/test_root.py b/ForgeTracker/forgetracker/tests/functional/test_root.py
index 95a468f..9975c3b 100644
--- a/ForgeTracker/forgetracker/tests/functional/test_root.py
+++ b/ForgeTracker/forgetracker/tests/functional/test_root.py
@@ -26,7 +26,7 @@ import allura
 import mock
 
 import PIL
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 from mock import patch
 from nose.tools import (
     assert_true,
diff --git a/ForgeWiki/forgewiki/converters.py b/ForgeWiki/forgewiki/converters.py
index 95b88f7..2793967 100644
--- a/ForgeWiki/forgewiki/converters.py
+++ b/ForgeWiki/forgewiki/converters.py
@@ -17,7 +17,7 @@
 
 #-*- python -*-
 import re
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 
 _inline_img = re.compile(r'\[\[(File|Image):([^\]|]+)[^]]*\]\]', re.UNICODE)
 _inline_img_markdown = r'[[img src=\2]]'
@@ -50,7 +50,7 @@ def _internal_link_markdown(match):
 
 def _convert_toc(wiki_html):
     """Convert Table of Contents from mediawiki to markdown"""
-    soup = BeautifulSoup(wiki_html)
+    soup = BeautifulSoup(wiki_html, 'html.parser')
     for toc_div in soup.findAll('div', id='toc'):
         toc_div.replaceWith('[TOC]')
     return unicode(soup)
diff --git a/requirements.in b/requirements.in
index 7c77ca6..d316140 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,6 +1,5 @@
 ActivityStream==0.2.2
-BeautifulSoup==3.2.0
-beautifulsoup4==4.6.1
+beautifulsoup4
 Beaker
 backlash==0.1.4
 chardet
diff --git a/requirements.txt b/requirements.txt
index 02eb7ee..7731c01 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,11 +9,10 @@ appnope==0.1.0            # via ipython
 asn1crypto==0.24.0        # via cryptography
 astroid==1.6.6            # via pylint
 backlash==0.1.4
-backports.functools-lru-cache==1.5  # via astroid, isort, pylint
+backports.functools-lru-cache==1.5  # via astroid, isort, pylint, soupsieve
 backports.shutil-get-terminal-size==1.0.0  # via ipython
 beaker==1.10.1
-beautifulsoup4==4.6.1
-beautifulsoup==3.2.0
+beautifulsoup4==4.8.0
 bleach==3.1.0             # via pypeline
 certifi==2019.6.16        # via requests
 cffi==1.12.2              # via cryptography
@@ -89,6 +88,7 @@ simplegeneric==0.8.1      # via ipython
 singledispatch==3.4.0.3   # via astroid, pylint
 six==1.12.0
 smmap2==2.0.4             # via gitdb2
+soupsieve==1.9.3          # via beautifulsoup4
 testfixtures==6.10.0
 textile==3.0.4            # via pypeline
 timermiddleware==0.5.0


Mime
View raw message