tika-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (TIKA-2309) New Detector and Parser classes for Time Stamped Data Envelope file format
Date Tue, 04 Apr 2017 13:48:41 GMT

    [ https://issues.apache.org/jira/browse/TIKA-2309?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15955152#comment-15955152
] 

ASF GitHub Bot commented on TIKA-2309:
--------------------------------------

Shinobi75 commented on a change in pull request #161: fix for TIKA-2309 contributed by Shinobi@75
URL: https://github.com/apache/tika/pull/161#discussion_r109666158
 
 

 ##########
 File path: tika-parsers/src/main/java/org/apache/tika/parser/crypto/TSDParser.java
 ##########
 @@ -0,0 +1,333 @@
+package org.apache.tika.parser.crypto;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.math.BigInteger;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.security.NoSuchProviderException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.TimeZone;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.bouncycastle.asn1.cryptopro.CryptoProObjectIdentifiers;
+import org.bouncycastle.asn1.nist.NISTObjectIdentifiers;
+import org.bouncycastle.asn1.oiw.OIWObjectIdentifiers;
+import org.bouncycastle.asn1.pkcs.PKCSObjectIdentifiers;
+import org.bouncycastle.asn1.teletrust.TeleTrusTObjectIdentifiers;
+import org.bouncycastle.asn1.x509.GeneralName;
+import org.bouncycastle.asn1.x509.X509ObjectIdentifiers;
+import org.bouncycastle.asn1.x9.X9ObjectIdentifiers;
+import org.bouncycastle.cms.CMSSignedDataGenerator;
+import org.bouncycastle.tsp.TimeStampToken;
+import org.bouncycastle.tsp.cms.CMSTimeStampedDataParser;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/*
+
+Nome Formato:            Time Stamped Data Envelope
+
+Mime Type:               application/timestamped-data
+
+Estensione:              .tsd
+
+*/
+public class TSDParser extends AbstractParser {
+    
+    /**
+     * 
+     */
+    private static final long serialVersionUID = 6139181424595882376L;
+    
+    private final String TSD_LOOP_LABEL = "Time-Stamp-n.";
+    private final String TSD_DESCRIPTION_VALUE = "Time Stamped Data Envelope";
+    private final String TSD_PARSED_LABEL = "File-Parsed";
+    private final String TSD_PARSED_DATE = "File-Parsed-DateTime";
+    private final String TSD_DATE = "Time-Stamp-DateTime";
+    private final String TSD_DATE_FORMAT = "UTC";
+    private final String TSD_POLICY_ID = "Policy-Id";
+    private final String TSD_SERIAL_NUMBER = "Serial-Number";
+    private final String TSD_TSA = "TSA";
+    private final String TSD_ALGORITHM = "Algorithm";
+    
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("timestamped-data"));
+    public static final String TSD_MIME_TYPE = "application/timestamped-data";
+    
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+    
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws IOException, SAXException,
TikaException {
+                
+        //Try to parse TSD File
+        List<TSDMetas> tsdMetasList = this.buildMetas(stream);
+        
+        Integer count = 1;
+        
+        for(TSDMetas tsdm: tsdMetasList) {
+            metadata.set(TSD_LOOP_LABEL + count + " - " + Metadata.CONTENT_TYPE, TSD_MIME_TYPE);
+            metadata.set(TSD_LOOP_LABEL + count + " - " + Metadata.DESCRIPTION, TSD_DESCRIPTION_VALUE);
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_PARSED_LABEL, tsdm.getParseBuiltStr());
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_PARSED_DATE, tsdm.getParsedDateStr()
+ " " + this.TSD_DATE_FORMAT);
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_DATE, tsdm.getEmitDateStr()
+ " " + this.TSD_DATE_FORMAT);
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_POLICY_ID, tsdm.getPolicyId());
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_SERIAL_NUMBER, tsdm.getSerialNumberFormatted());
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_TSA, tsdm.getTSAstr());
+            metadata.set(TSD_LOOP_LABEL + count + " - " + this.TSD_ALGORITHM, tsdm.getAlgorithmName());
+            count++;
+        }
+                
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+        
+     }
+    
+    private List<TSDMetas> buildMetas(InputStream stream) {
+        
+        List<TSDMetas> tsdMetasList = new ArrayList<TSDMetas>();
+        
+        try {
+             
+             CMSTimeStampedDataParser cmsTimeStampedData = new CMSTimeStampedDataParser(stream);
+             
+             TimeStampToken[] tokens = cmsTimeStampedData.getTimeStampTokens();
+             
+             for (int i=0; i < tokens.length; i++) {
+                 
+                 TSDMetas tsdMetas = new TSDMetas(true,
+                                                   tokens[i].getTimeStampInfo().getGenTime(),
+                                                   tokens[i].getTimeStampInfo().getPolicy().getId(),
+                                                   tokens[i].getTimeStampInfo().getSerialNumber(),
+                                                   tokens[i].getTimeStampInfo().getTsa(),
+                                                   tokens[i].getTimeStampInfo().getHashAlgorithm().getAlgorithm().getId());
+                 
+                 tsdMetasList.add(tsdMetas);
+             }
+             
+        } catch (Exception ex) {
 
 Review comment:
   done on commit db345ac0e06fe0d530758277ad8370019594d64d
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> New Detector and Parser classes for Time Stamped Data Envelope file format
> --------------------------------------------------------------------------
>
>                 Key: TIKA-2309
>                 URL: https://issues.apache.org/jira/browse/TIKA-2309
>             Project: Tika
>          Issue Type: Improvement
>          Components: detector, parser
>    Affects Versions: 1.13, 1.14
>            Reporter: Fabio
>            Priority: Minor
>         Attachments: MANIFEST.XML.TSD
>
>
> Hello,
> I'm Fabio Evangelista from Rome. I'm working for an italian Public Administration company
and i'm using Apache Tika in my Java applications to detect and parse a broad kinds of file
formats. During that activity, after following your good guide on Tika project page, I've
made with success new type of Detector and Parser classes for a particular crypto timestamp
type with these caracteristics:
> Format name:               Time Stamped Data Envelope
> Mime Type:                   application/timestamped-data
> File extension:              .tsd
> TSD file hax magic code at the start of the file:   30 80 06 0B 2A 86 48 86 F7
> I've integrated and tested successfully with my applications those new classes in Tika
1.13 tika-core.jar and tika-parsers.jar. What should I do to submit my new classes to you?
Should I to push those in a particular git branch or, is there a particular process to follow
to submit my classes?
> Thank you for you patience and best regards.
> Fabio.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Mime
View raw message