nutch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mar...@apache.org
Subject [nutch] branch master updated: NUTCH-2687 Regex for reading title from Content-Disposition is wrong
Date Fri, 18 Jan 2019 10:38:08 GMT
This is an automated email from the ASF dual-hosted git repository.

markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 9cc076f  NUTCH-2687 Regex for reading title from Content-Disposition is wrong
9cc076f is described below

commit 9cc076f33746c34acfdeef8b3007bb5b0dec736d
Author: Markus Jelsma <markus@apache.org>
AuthorDate: Fri Jan 18 11:36:49 2019 +0100

    NUTCH-2687 Regex for reading title from Content-Disposition is wrong
---
 .../src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
index c16d233..8c4a2d6 100644
--- a/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
+++ b/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
@@ -273,7 +273,7 @@ public class MoreIndexingFilter implements IndexingFilter {
   static {
     try {
       // order here is important
-      patterns[0] = Pattern.compile("\\bfilename=['\"](.+)['\"]");
+      patterns[0] = Pattern.compile("\\bfilename=['\"]([^\"]+)");
       patterns[1] = Pattern.compile("\\bfilename=(\\S+)\\b");
     } catch (PatternSyntaxException e) {
       // just ignore


Mime
View raw message