tika-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (TIKA-2762) Capture short fields (<150 chars) in EnviParserHeader Metadata
Date Wed, 31 Oct 2018 01:07:00 GMT

    [ https://issues.apache.org/jira/browse/TIKA-2762?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16669473#comment-16669473
] 

ASF GitHub Bot commented on TIKA-2762:
--------------------------------------

tballison closed pull request #251: TIKA-2762 Capture short fields (<150 chars) in EnviParserHeader
Metadata
URL: https://github.com/apache/tika/pull/251
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index f7aea79fd..2aabda4db 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -17,7 +17,6 @@
 package org.apache.tika.config;
 
 import javax.imageio.spi.ServiceRegistry;
-import javax.xml.parsers.DocumentBuilder;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -60,7 +59,6 @@
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
-import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.multiple.AbstractMultipleParser;
diff --git a/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java b/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
index 3b9d373f4..c2ddb2fd0 100644
--- a/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
+++ b/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.junit.Test;
@@ -191,7 +192,20 @@ public void testDetectStreamReadProblems() throws Exception {
         InputStream stream = new RestrictiveInputStream(data);
         assertEquals(testMT, detector.detect(stream, new Metadata()));
     }
-    
+
+    @Test
+    public void testDetectApplicationEnviHdr() throws Exception {
+        InputStream iStream = MagicDetectorTest.class.getResourceAsStream(
+              "/test-documents/ang20150420t182050_corr_v1e_img.hdr");
+        byte[] data = IOUtils.toByteArray(iStream);
+        MediaType testMT = new MediaType("application", "envi.hdr");
+        Detector detector = new MagicDetector(testMT, data, null, false, 0, 0);
+        // Deliberately prevent InputStream.read(...) from reading the entire
+        // buffer in one go
+        InputStream stream = new RestrictiveInputStream(data);
+        assertEquals(testMT, detector.detect(stream, new Metadata()));
+    }
+
     @Test
     public void testDetectString() throws Exception {
         String data = "abcdEFGhijklmnoPQRstuvwxyz0123456789";
diff --git a/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java b/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
index 7e4839b07..f13346594 100644
--- a/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
+++ b/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
@@ -44,6 +44,7 @@ public void setUp() {
                 Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE),
                 MediaType.TEXT_PLAIN);
         patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
+        patterns.put(Pattern.compile(".*\\.hdr"), MediaType.application("envi.hdr"));
         detector = new NameDetector(patterns);
     }
 
@@ -83,6 +84,8 @@ public void testDetect() {
         assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
         assertDetect(MediaType.OCTET_STREAM, "See README");   // but not this
 
+        assertDetect(MediaType.application("envi.hdr"), "ang20150420t182050_corr_v1e_img.hdr");
+
         // test also the zero input cases
         assertDetect(MediaType.OCTET_STREAM, "");
         assertDetect(MediaType.OCTET_STREAM, null);
@@ -104,5 +107,4 @@ private void assertDetect(MediaType type, String name){
             fail("NameDetector should never throw an IOException");
         }
     }
-
 }
diff --git a/tika-core/src/test/resources/test-documents/ang20150420t182050_corr_v1e_img.hdr
b/tika-core/src/test/resources/test-documents/ang20150420t182050_corr_v1e_img.hdr
new file mode 100644
index 000000000..ba44396cc
--- /dev/null
+++ b/tika-core/src/test/resources/test-documents/ang20150420t182050_corr_v1e_img.hdr
@@ -0,0 +1,20 @@
+ENVI
+description = {
+  Georeferenced Image built from input GLT. [Wed Jun 10 04:37:54 2015] [Wed
+  Jun 10 04:48:52 2015]}
+samples = 739
+lines = 14674
+bands = 432
+header offset = 0
+file type = ENVI Standard
+data type = 4
+interleave = bil
+sensor type = Unknown
+byte order = 0
+map info = { UTM , 1.000 , 1.000 , 724522.127 , 4074620.759 , 1.1000000000e+00 , 1.1000000000e+00
, 12 , North , WGS-84 , units=Meters , rotation=75.00000000 }
+wavelength units = Nanometers
+correction factors = { 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.880586 , 1.741631
, 1.524632 , 1.564904 , 1.390035 , 1.342501 , 1.264768 , 1.200357 , 1.131471 , 1.06512 , 1.105211
, 1.081234 , 1.064999 , 1.056798 , 1.042573 , 1.009621 , 1.009031 , 1.002107 , 0.997332 ,
0.976807 , 0.963755 , 0.969028 , 0.962823 , 0.949522 , 0.960568 , 0.957813 , 0.934766 , 0.944994
, 0.937726 , 0.935257 , 0.932706 , 0.932568 , 0.933217 , 0.928705 , 0.929294 , 0.936669 ,
0.935498 , 0.94823 , 0.949846 , 0.945885 , 0.935468 , 0.930084 , 0.934473 , 0.935378 , 0.939193
, 0.935081 , 0.937398 , 0.943396 , 0.947133 , 0.950645 , 0.945531 , 0.940295 , 0.933129 ,
0.930664 , 0.92736 , 0.931786 , 0.928217 , 0.928205 , 0.926481 , 0.928583 , 0.930504 , 0.93648
, 0.930731 , 0.931265 , 0.935063 , 0.93434 , 0.926983 , 0.932689 , 0.936477 , 0.939647 , 0.940155
, 0.937519 , 0.939448 , 0.942124 , 0.93653 , 0.9435 , 0.959204 , 0.942566 , 0.940873 , 0.939414
, 0.939822 , 0.940174 , 0.941372 , 0.939347 , 0.942108 , 0.942664 , 0.934811 , 0.934567 ,
0.937712 , 0.940611 , 0.944809 , 0.939877 , 0.943376 , 0.939189 , 0.943619 , 0.946268 , 0.940166
, 0.953752 , 0.958975 , 0.954512 , 0.954103 , 0.958978 , 0.953247 , 0.952199 , 0.956082 ,
0.957846 , 0.970078 , 0.973704 , 0.980014 , 0.928845 , 0.922973 , 0.954414 , 0.95521 , 0.961276
, 0.964513 , 0.965296 , 0.964644 , 0.954999 , 0.951133 , 0.956216 , 0.951977 , 0.948547 ,
0.949499 , 0.952685 , 0.950158 , 0.944263 , 0.936946 , 0.938394 , 0.941325 , 0.94116 , 0.941397
, 0.940811 , 0.942695 , 0.945228 , 0.953929 , 0.962457 , 0.968728 , 0.963947 , 0.961222 ,
0.963003 , 0.967658 , 0.969773 , 0.970294 , 0.963456 , 0.970497 , 0.976972 , 0.961611 , 0.953081
, 0.945668 , 0.993867 , 1.019915 , 0.997013 , 0.977643 , 0.998022 , 1.007041 , 1.003881 ,
0.991335 , 0.976202 , 0.967636 , 0.969294 , 0.965331 , 0.968705 , 0.965705 , 0.973601 , 0.97282
, 0.970848 , 0.970687 , 0.969394 , 0.972263 , 0.969286 , 0.970327 , 0.97754 , 0.984703 , 0.993916
, 1.02186 , 1.054704 , 1.061183 , 1.036962 , 1.012519 , 0.991209 , 0.975974 , 0.965446 , 0.958801
, 0.951519 , 0.960628 , 0.957276 , 0.96061 , 0.95689 , 0.956666 , 0.965567 , 0.982251 , 1.038526
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.131643 , 1.150333 , 1.112466 , 1.06255 , 1.033182 , 1.03018 , 1.006512 , 0.993285
, 1.003557 , 1.014441 , 0.998586 , 0.994426 , 0.987975 , 0.984681 , 0.981863 , 0.964106 ,
0.956432 , 0.954467 , 0.956429 , 0.950668 , 0.947217 , 0.944635 , 0.942942 , 0.941078 , 0.943502
, 0.950408 , 0.957625 , 0.965073 , 0.976012 , 0.976683 , 0.975625 , 0.968011 , 0.968843 ,
0.970632 , 0.960977 , 0.960505 , 0.955015 , 0.953597 , 0.951119 , 0.945679 , 0.949988 , 0.951236
, 0.947813 , 0.948004 , 0.950015 , 0.939258 , 0.945863 , 0.953927 , 0.953145 , 0.945291 ,
0.942319 , 0.947022 , 0.948264 , 0.947112 , 0.942092 , 0.943128 , 0.948068 , 0.944432 , 0.950396
, 0.964006 , 0.961019 , 0.951786 , 0.957457 , 0.950327 , 0.954375 , 0.9608 , 0.965864 , 0.982396
, 1.011334 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ,
1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 ,
1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.289798 , 1.126375 , 1.074621 , 1.069365 , 1.051364
, 1.009432 , 0.984578 , 0.991187 , 1.016306 , 1.083681 , 1.076539 , 1.104657 , 1.114005 ,
1.081102 , 1.039196 , 1.009524 , 0.966515 , 0.953688 , 0.966274 , 0.973232 , 0.966402 , 0.946801
, 0.951952 , 0.965679 , 0.96977 , 0.946541 , 0.941678 , 0.937528 , 0.922351 , 0.914192 , 0.92879
, 0.932284 , 0.933182 , 0.922322 , 0.91851 , 0.919591 , 0.925027 , 0.924611 , 0.932288 , 0.933352
, 0.930517 , 0.931666 , 0.931763 , 0.932655 , 0.928945 , 0.933308 , 0.932392 , 0.932943 ,
0.935328 , 0.947019 , 0.954093 , 0.95156 , 0.939591 , 0.942808 , 0.944862 , 0.944004 , 0.949161
, 0.950992 , 0.956738 , 0.951184 , 0.953545 , 0.958836 , 0.966134 , 0.956752 , 0.951961 ,
0.958667 , 0.9579 , 0.968531 , 0.973792 , 0.969238 , 0.970838 , 0.954552 , 0.968166 , 0.989176
, 0.974784 , 0.970674 , 0.9733 , 0.990576 , 1.0062 , 1.010295 , 0.99378 , 0.986109 , 1.007054
, 1.005377 , 1.010013 , 1.014671 , 1.021618 , 1.021229 , 1.021003 , 1.020866 , 1.029358 ,
1.042136 , 1.030482 , 1.019556 , 1.036656 , 1.05348 , 1.015947 , 1.07263 , 1.092879 , 1.053624
, 1.086491 , 1.139334 , 1.163645 , 1.162487 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 }
+wavelength = { 346.2995778 , 351.3082478 , 356.3169178 , 361.3255878 , 366.3342578 , 371.3429278
, 376.3515978 , 381.3602678 , 386.3689378 , 391.3776078 , 396.3862778 , 401.3949478 , 406.4036178
, 411.4122878 , 416.4209578 , 421.4296278 , 426.4382978 , 431.4469678 , 436.4556378 , 441.4643078
, 446.4729778 , 451.4816478 , 456.4903178 , 461.4989878 , 466.5076578 , 471.5163278 , 476.5249978
, 481.5336678 , 486.5423378 , 491.5510078 , 496.5596778 , 501.5683478 , 506.5770178 , 511.5856878
, 516.5943578 , 521.6030278 , 526.6116978 , 531.6203678 , 536.6290378 , 541.6377078 , 546.6463778
, 551.6550478 , 556.6637178 , 561.6723878 , 566.6810578 , 571.6897278 , 576.6983978 , 581.7070678
, 586.7157378 , 591.7244078 , 596.7330778 , 601.7417478 , 606.7504178 , 611.7590878 , 616.7677578
, 621.7764278 , 626.7850978 , 631.7937678 , 636.8024378 , 641.8111078 , 646.8197778 , 651.8284478
, 656.8371178 , 661.8457878 , 666.8544578 , 671.8631278 , 676.8717978 , 681.8804678 , 686.8891378
, 691.8978078 , 696.9064778 , 701.9151478 , 706.9238178 , 711.9324878 , 716.9411578 , 721.9498278
, 726.9584978 , 731.9671678 , 736.9758378 , 741.9845078 , 746.9931778 , 752.0018478 , 757.0105178
, 762.0191878 , 767.0278578 , 772.0365278 , 777.0451978 , 782.0538678 , 787.0625378 , 792.0712078
, 797.0798778 , 802.0885478 , 807.0972178 , 812.1058878 , 817.1145578 , 822.1232278 , 827.1318978
, 832.1405678 , 837.1492378 , 842.1579078 , 847.1665778 , 852.1752478 , 857.1839178 , 862.1925878
, 867.2012578 , 872.2099278 , 877.2185978 , 882.2272678 , 887.2359378 , 892.2446078 , 897.2532778
, 902.2619478 , 907.2706178 , 912.2792878 , 917.2879578 , 922.2966278 , 927.3052978 , 932.3139678
, 937.3226378 , 942.3313078 , 947.3399778 , 952.3486478 , 957.3573178 , 962.3659878 , 967.3746578
, 972.3833278 , 977.3919978 , 982.4006678 , 987.4093378 , 992.4180078 , 997.4266778 , 1002.4353478
, 1007.4440178 , 1012.4526878 , 1017.4613578 , 1022.4700278 , 1027.4786978 , 1032.4873678
, 1037.4960378 , 1042.5047078 , 1047.5133778 , 1052.5220478 , 1057.5307178 , 1062.5393878
, 1067.5480578 , 1072.5567278 , 1077.5653978 , 1082.5740778 , 1087.5827478 , 1092.5914078
, 1097.6000778 , 1102.6087478 , 1107.6174278 , 1112.6260878 , 1117.6347678 , 1122.6434278
, 1127.6520978 , 1132.6607678 , 1137.6694478 , 1142.6781078 , 1147.6867778 , 1152.6954478
, 1157.7041278 , 1162.7127878 , 1167.7214678 , 1172.7301378 , 1177.7388078 , 1182.7474778
, 1187.7561478 , 1192.7648178 , 1197.7734878 , 1202.7821578 , 1207.7908278 , 1212.7994978
, 1217.8081678 , 1222.8168378 , 1227.8255078 , 1232.8341778 , 1237.8428478 , 1242.8515178
, 1247.8601878 , 1252.8688578 , 1257.8775278 , 1262.8861978 , 1267.8948678 , 1272.9035378
, 1277.9122078 , 1282.9208778 , 1287.9295478 , 1292.9382178 , 1297.9468878 , 1302.9555578
, 1307.9642278 , 1312.9728978 , 1317.9815678 , 1322.9902378 , 1327.9989078 , 1333.0075778
, 1338.0162478 , 1343.0249178 , 1348.0335878 , 1353.0422578 , 1358.0509278 , 1363.0595978
, 1368.0682678 , 1373.0769378 , 1378.0856078 , 1383.0942778 , 1388.1029478 , 1393.1116178
, 1398.1202878 , 1403.1289578 , 1408.1376278 , 1413.1462978 , 1418.1549678 , 1423.1636378
, 1428.1723078 , 1433.1809778 , 1438.1896478 , 1443.1983178 , 1448.2069878 , 1453.2156578
, 1458.2243278 , 1463.2329978 , 1468.2416678 , 1473.2503378 , 1478.2590078 , 1483.2676778
, 1488.2763478 , 1493.2850178 , 1498.2936878 , 1503.3023578 , 1508.3110278 , 1513.3196978
, 1518.3283678 , 1523.3370378 , 1528.3457078 , 1533.3543778 , 1538.3630478 , 1543.3717178
, 1548.3803878 , 1553.3890578 , 1558.3977278 , 1563.4063978 , 1568.4150678 , 1573.4237378
, 1578.4324078 , 1583.4410778 , 1588.4497478 , 1593.4584178 , 1598.4670878 , 1603.4757578
, 1608.4844278 , 1613.4930978 , 1618.5017678 , 1623.5104378 , 1628.5191078 , 1633.5277778
, 1638.5364478 , 1643.5451178 , 1648.5537878 , 1653.5624578 , 1658.5711278 , 1663.5797978
, 1668.5884678 , 1673.5971378 , 1678.6058078 , 1683.6144778 , 1688.6231478 , 1693.6318178
, 1698.6404878 , 1703.6491578 , 1708.6578278 , 1713.6664978 , 1718.6751678 , 1723.6838378
, 1728.6925078 , 1733.7011778 , 1738.7098478 , 1743.7185178 , 1748.7271878 , 1753.7358578
, 1758.7445278 , 1763.7531978 , 1768.7618678 , 1773.7705378 , 1778.7792078 , 1783.7878778
, 1788.7965478 , 1793.8052178 , 1798.8138878 , 1803.8225578 , 1808.8312278 , 1813.8398978
, 1818.8485678 , 1823.8572378 , 1828.8659078 , 1833.8745778 , 1838.8832478 , 1843.8919178
, 1848.9005878 , 1853.9092578 , 1858.9179278 , 1863.9265978 , 1868.9352678 , 1873.9439378
, 1878.9526078 , 1883.9612778 , 1888.9699478 , 1893.9786178 , 1898.9872878 , 1903.9959578
, 1909.0046278 , 1914.0132978 , 1919.0219778 , 1924.0306378 , 1929.0393178 , 1934.0479778
, 1939.0566478 , 1944.0653278 , 1949.0739978 , 1954.0826578 , 1959.0913278 , 1964.1000078
, 1969.1086778 , 1974.1173378 , 1979.1260078 , 1984.1346778 , 1989.1433578 , 1994.1520178
, 1999.1606878 , 2004.1693678 , 2009.1780378 , 2014.1867078 , 2019.1953778 , 2024.2040478
, 2029.2127178 , 2034.2213878 , 2039.2300578 , 2044.2387278 , 2049.2473978 , 2054.2560678
, 2059.2647378 , 2064.2734078 , 2069.2820778 , 2074.2907478 , 2079.2994178 , 2084.3080878
, 2089.3167578 , 2094.3254278 , 2099.3340978 , 2104.3427678 , 2109.3514378 , 2114.3601078
, 2119.3687778 , 2124.3774478 , 2129.3861178 , 2134.3947878 , 2139.4034578 , 2144.4121278
, 2149.4207978 , 2154.4294678 , 2159.4381378 , 2164.4468078 , 2169.4554778 , 2174.4641478
, 2179.4728178 , 2184.4814878 , 2189.4901578 , 2194.4988278 , 2199.5074978 , 2204.5161678
, 2209.5248378 , 2214.5335078 , 2219.5421778 , 2224.5508478 , 2229.5595178 , 2234.5681878
, 2239.5768578 , 2244.5855278 , 2249.5941978 , 2254.6028678 , 2259.6115378 , 2264.6202078
, 2269.6288778 , 2274.6375478 , 2279.6462178 , 2284.6548878 , 2289.6635578 , 2294.6722278
, 2299.6808978 , 2304.6895678 , 2309.6982378 , 2314.7069078 , 2319.7155778 , 2324.7242478
, 2329.7329178 , 2334.7415878 , 2339.7502578 , 2344.7589278 , 2349.7675978 , 2354.7762678
, 2359.7849378 , 2364.7936078 , 2369.8022778 , 2374.8109478 , 2379.8196178 , 2384.8282878
, 2389.8369578 , 2394.8456278 , 2399.8542978 , 2404.8629678 , 2409.8716378 , 2414.8803078
, 2419.8889778 , 2424.8976478 , 2429.9063178 , 2434.9149878 , 2439.9236578 , 2444.9323278
, 2449.9409978 , 2454.9496678 , 2459.9583378 , 2464.9670078 , 2469.9756778 , 2474.9843478
, 2479.9930178 , 2485.0016878 , 2490.0103578 , 2495.0190278 , 2500.0276978 , 2505.0363678
}
+fwhm = { 5.55165 , 5.55524 , 5.55879 , 5.5623 , 5.56577 , 5.5692 , 5.5726 , 5.57595 , 5.57927
, 5.58255 , 5.58579 , 5.58899 , 5.59216 , 5.59529 , 5.59839 , 5.60144 , 5.60446 , 5.60745
, 5.6104 , 5.61332 , 5.61619 , 5.61904 , 5.62185 , 5.62463 , 5.62737 , 5.63008 , 5.63275 ,
5.6354 , 5.63801 , 5.64058 , 5.64313 , 5.64564 , 5.64812 , 5.65057 , 5.65299 , 5.65537 , 5.65773
, 5.66005 , 5.66235 , 5.66461 , 5.66684 , 5.66905 , 5.67122 , 5.67337 , 5.67549 , 5.67758
, 5.67964 , 5.68167 , 5.68367 , 5.68565 , 5.6876 , 5.68952 , 5.69141 , 5.69328 , 5.69512 ,
5.69694 , 5.69873 , 5.70049 , 5.70223 , 5.70394 , 5.70563 , 5.70729 , 5.70893 , 5.71055 ,
5.71214 , 5.7137 , 5.71524 , 5.71676 , 5.71826 , 5.71973 , 5.72118 , 5.72261 , 5.72401 , 5.72539
, 5.72676 , 5.72809 , 5.72941 , 5.73071 , 5.73198 , 5.73324 , 5.73447 , 5.73569 , 5.73688
, 5.73806 , 5.73921 , 5.74034 , 5.74146 , 5.74256 , 5.74363 , 5.74469 , 5.74573 , 5.74676
, 5.74776 , 5.74875 , 5.74972 , 5.75067 , 5.7516 , 5.75252 , 5.75342 , 5.75431 , 5.75518 ,
5.75603 , 5.75687 , 5.75769 , 5.75849 , 5.75928 , 5.76006 , 5.76082 , 5.76156 , 5.76229 ,
5.76301 , 5.76371 , 5.7644 , 5.76507 , 5.76573 , 5.76638 , 5.76702 , 5.76764 , 5.76825 , 5.76884
, 5.76943 , 5.77 , 5.77056 , 5.77111 , 5.77164 , 5.77217 , 5.77268 , 5.77319 , 5.77368 , 5.77416
, 5.77463 , 5.77509 , 5.77554 , 5.77598 , 5.77641 , 5.77683 , 5.77725 , 5.77765 , 5.77804
, 5.77843 , 5.7788 , 5.77917 , 5.77953 , 5.77988 , 5.78022 , 5.78056 , 5.78088 , 5.7812 ,
5.78151 , 5.78182 , 5.78212 , 5.78241 , 5.78269 , 5.78297 , 5.78324 , 5.78351 , 5.78377 ,
5.78402 , 5.78427 , 5.78452 , 5.78475 , 5.78499 , 5.78521 , 5.78543 , 5.78565 , 5.78587 ,
5.78607 , 5.78628 , 5.78648 , 5.78668 , 5.78687 , 5.78706 , 5.78724 , 5.78743 , 5.78761 ,
5.78778 , 5.78796 , 5.78813 , 5.7883 , 5.78846 , 5.78862 , 5.78879 , 5.78895 , 5.7891 , 5.78926
, 5.78941 , 5.78957 , 5.78972 , 5.78987 , 5.79002 , 5.79017 , 5.79032 , 5.79046 , 5.79061
, 5.79076 , 5.79091 , 5.79105 , 5.7912 , 5.79135 , 5.7915 , 5.79164 , 5.79179 , 5.79194 ,
5.79209 , 5.79224 , 5.7924 , 5.79255 , 5.79271 , 5.79286 , 5.79302 , 5.79318 , 5.79335 , 5.79351
, 5.79368 , 5.79385 , 5.79402 , 5.79419 , 5.79437 , 5.79455 , 5.79473 , 5.79492 , 5.79511
, 5.7953 , 5.7955 , 5.7957 , 5.7959 , 5.7961 , 5.79631 , 5.79653 , 5.79675 , 5.79697 , 5.7972
, 5.79743 , 5.79766 , 5.7979 , 5.79815 , 5.7984 , 5.79865 , 5.79891 , 5.79918 , 5.79945 ,
5.79972 , 5.80001 , 5.80029 , 5.80058 , 5.80088 , 5.80119 , 5.8015 , 5.80181 , 5.80213 , 5.80246
, 5.8028 , 5.80314 , 5.80349 , 5.80384 , 5.8042 , 5.80457 , 5.80494 , 5.80532 , 5.80571 ,
5.80611 , 5.80651 , 5.80692 , 5.80734 , 5.80777 , 5.8082 , 5.80864 , 5.80909 , 5.80955 , 5.81001
, 5.81048 , 5.81096 , 5.81145 , 5.81195 , 5.81246 , 5.81297 , 5.81349 , 5.81402 , 5.81456
, 5.81511 , 5.81567 , 5.81623 , 5.81681 , 5.81739 , 5.81799 , 5.81859 , 5.8192 , 5.81982 ,
5.82045 , 5.82109 , 5.82174 , 5.8224 , 5.82307 , 5.82374 , 5.82443 , 5.82513 , 5.82584 , 5.82656
, 5.82728 , 5.82802 , 5.82877 , 5.82953 , 5.8303 , 5.83108 , 5.83186 , 5.83266 , 5.83347 ,
5.8343 , 5.83513 , 5.83597 , 5.83682 , 5.83769 , 5.83856 , 5.83944 , 5.84034 , 5.84125 , 5.84217
, 5.8431 , 5.84404 , 5.84499 , 5.84595 , 5.84692 , 5.84791 , 5.84891 , 5.84992 , 5.85093 ,
5.85197 , 5.85301 , 5.85406 , 5.85513 , 5.85621 , 5.85729 , 5.8584 , 5.85951 , 5.86063 , 5.86177
, 5.86292 , 5.86408 , 5.86525 , 5.86643 , 5.86763 , 5.86884 , 5.87005 , 5.87129 , 5.87253
, 5.87379 , 5.87506 , 5.87634 , 5.87763 , 5.87893 , 5.88025 , 5.88158 , 5.88292 , 5.88428
, 5.88564 , 5.88702 , 5.88841 , 5.88982 , 5.89123 , 5.89266 , 5.8941 , 5.89556 , 5.89702 ,
5.8985 , 5.89999 , 5.90149 , 5.90301 , 5.90454 , 5.90608 , 5.90763 , 5.9092 , 5.91078 , 5.91237
, 5.91398 , 5.91559 , 5.91722 , 5.91886 , 5.92052 , 5.92219 , 5.92387 , 5.92556 , 5.92727
, 5.92898 , 5.93071 , 5.93246 , 5.93421 , 5.93598 , 5.93776 , 5.93956 , 5.94137 , 5.94319
, 5.94502 , 5.94686 , 5.94872 , 5.95059 , 5.95247 , 5.95437 , 5.95628 , 5.9582 , 5.96013 ,
5.96207 , 5.96403 , 5.966 , 5.96799 , 5.96998 , 5.97199 , 5.97401 , 5.97604 , 5.97809 , 5.98015
, 5.98222 , 5.9843 , 5.98639 , 5.9885 , 5.99062 , 5.99275 , 5.9949 , 5.99706 , 5.99922 , 6.00141
, 6.0036 , 6.0058 , 6.00802 , 6.01025 , 6.01249 , 6.01475 , 6.01701 , 6.01929 , 6.02158 ,
6.02388 , 6.02619 , 6.02852 }
+bbl = { 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0
, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0
, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0
, 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 }
+smoothing factors = { 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
, 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0 , 1.0
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
b/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
index 6884a7ef0..10d8845e7 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
@@ -89,7 +89,7 @@ public void parse(InputStream stream, ContentHandler handler,
                 metadata.set(Metadata.CONTENT_ENCODING, charset.name());
                 xhtml = new XHTMLContentHandler(handler, metadata);
                 xhtml.startDocument();
-                readLines(reader);
+                readLines(reader, metadata);
                 xhtml.endDocument();
         } catch (IOException | TikaException e) {
           LOG.error("Error reading input data stream.", e);
@@ -97,25 +97,31 @@ public void parse(InputStream stream, ContentHandler handler,
 
     }
 
-    private void readLines(AutoDetectReader reader) throws IOException, SAXException {
-      // text contents of the xhtml
-      String line;
-      while ((line = reader.readLine()) != null) {
-          if (line.contains("{") && !line.endsWith("}") || line.startsWith(" "))
{
-              String completeField = parseMultiLineFieldValue(line);
-              if (completeField != null) {
-                  writeParagraph(completeField);
-              }
-          } else {
-              writeParagraph(line);
+    private void readLines(AutoDetectReader reader, Metadata metadata) throws IOException,
SAXException {
+        // text contents of the xhtml
+        String line;
+        while ((line = reader.readLine()) != null) {
+            if (line.contains("{") && !line.endsWith("}") || line.startsWith(" "))
{
+                String completeField = parseMultiLineFieldValue(line);
+                if (completeField != null) {
+                    writeParagraphAndSetMetadata(completeField, metadata);
+                }
+            } else {
+                writeParagraphAndSetMetadata(line, metadata);
           }
-      }
+        }
     }
 
     /*
-     * Simple write a line to the XHTMLContentHandler
+     * Write a line to the XHTMLContentHandler and populate the key, value into the Metadata
      */
-    private void writeParagraph(String line) throws SAXException {
+    private void writeParagraphAndSetMetadata(String line, Metadata metadata) throws SAXException
{
+        if(line.length() < 150) {
+            String[] keyValue = line.split("=");
+            if(keyValue.length != 1) {
+                metadata.set("envi." + keyValue[0].trim().replace(" ", "."), keyValue[1].trim());
+            }
+        }
         xhtml.startElement("p");
         xhtml.characters(line);
         xhtml.endElement("p");


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> Capture short fields (<150 chars) in EnviParserHeader Metadata
> --------------------------------------------------------------
>
>                 Key: TIKA-2762
>                 URL: https://issues.apache.org/jira/browse/TIKA-2762
>             Project: Tika
>          Issue Type: Improvement
>          Components: parser
>    Affects Versions: 1.19.1
>            Reporter: Lewis John McGibbney
>            Assignee: Lewis John McGibbney
>            Priority: Major
>             Fix For: 1.20
>
>
> I have always wanted to capture more metadata for the EnviHeader files. Right now everything
is shoved into the records content and I think we could improve it.
> I've implemented a rudimentary parser improvement with essentially captures any reasonably
sized lines items (<150 chars) which can then be populated up to Metadata level making
faceted search over ENVI .hdr documents a much easier task.
> PR coming up. 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message