ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1799919 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
Date Mon, 26 Jun 2017 13:45:35 GMT
Author: tmill
Date: Mon Jun 26 13:45:34 2017
New Revision: 1799919

URL: http://svn.apache.org/viewvc?rev=1799919&view=rev
Log:
Update custom data writer to also write feature map file.

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java?rev=1799919&r1=1799918&r2=1799919&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
(original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/EncoderReusingDataWriter.java
Mon Jun 26 13:45:34 2017
@@ -1,15 +1,21 @@
 package org.apache.ctakes.assertion.medfacts.cleartk;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.ObjectInputStream;
+import java.io.*;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Scanner;
+import java.util.Set;
 
+import com.google.common.collect.Lists;
+import de.bwaldvogel.liblinear.FeatureNode;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.encoder.CleartkEncoderException;
+import org.cleartk.ml.encoder.features.FeaturesEncoder;
 import org.cleartk.ml.encoder.outcome.StringToIntegerOutcomeEncoder;
 import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
 import org.cleartk.ml.liblinear.encoder.FeatureNodeArrayEncoder;
+import org.cleartk.util.collection.GenericStringMapper;
+import org.cleartk.util.collection.Writable;
 
 public class EncoderReusingDataWriter extends LibLinearStringOutcomeDataWriter {
 
@@ -20,12 +26,14 @@ public class EncoderReusingDataWriter ex
     if(encoderFile.exists()){
       try {
         ObjectInputStream ois = new ObjectInputStream(new FileInputStream(encoderFile));
-        this.setFeaturesEncoder((FeatureNodeArrayEncoder) ois.readObject());
+        this.setFeaturesEncoder(new WritingFeatureNodeArrayEncoder((FeatureNodeArrayEncoder)
ois.readObject()));
         ois.close();
       } catch (ClassNotFoundException | IOException e) {
         e.printStackTrace();
         throw new FileNotFoundException("Problem loading encoder from encoders.ser");
       }
+    }else{
+      this.setFeaturesEncoder(new WritingFeatureNodeArrayEncoder());
     }
     
     File outputEncoderFile = new File(outputDirectory, "outcome-lookup.txt");
@@ -43,4 +51,51 @@ public class EncoderReusingDataWriter ex
     }
   }
 
+  public static class WritingFeatureNodeArrayEncoder implements FeaturesEncoder<FeatureNode[]>
{
+    private FeatureNodeArrayEncoder encoder = null;
+    private Set<String> featureNames = null;
+    public static final String LOOKUP_FILE_NAME = "features-lookup.txt";
+
+    public WritingFeatureNodeArrayEncoder() {
+      encoder = new FeatureNodeArrayEncoder();
+      featureNames = new HashSet<>();
+    }
+
+    public WritingFeatureNodeArrayEncoder(FeatureNodeArrayEncoder encoder){
+      this.encoder = encoder;
+      featureNames = new HashSet<>();
+    }
+
+    @Override
+    public FeatureNode[] encodeAll(Iterable<Feature> features) throws CleartkEncoderException
{
+      FeatureNode[] encoded = encoder.encodeAll(features);
+      for(Feature feature : features){
+        String name;
+        if(feature.getValue() instanceof Number) {
+          name = feature.getName();
+        } else {
+          name = Feature.createName(new String[]{feature.getName(), feature.getValue().toString()});
+        }
+        featureNames.add(name);
+      }
+      return encoded;
+    }
+
+    @Override
+    public void finalizeFeatureSet(File file) throws IOException {
+      encoder.finalizeFeatureSet(file);
+
+      File outFile = new File(file.getPath(), this.LOOKUP_FILE_NAME);
+      PrintWriter out = new PrintWriter(new FileWriter(outFile));
+      for(String featName : featureNames){
+        List<Feature> feat = Lists.newArrayList(new Feature(featName, 1.0));
+        try {
+          FeatureNode encodedNode = encoder.encodeAll(feat)[1]; // index 0 is the bias feature
+          out.println(String.format("%s : %d", featName, encodedNode.getIndex()));
+        }catch(CleartkEncoderException e){
+          throw new IOException(e);
+        }
+      }
+    }
+  }
 }



Mime
View raw message