Author: mostarda
Date: Tue Apr 3 09:40:03 2012
New Revision: 1308786
URL: http://svn.apache.org/viewvc?rev=1308786&view=rev
Log:
Improved CLI support, introduced usage of JCommander. The commandline parsing logic has been moved into ToolRunner as well as the production of help and version messages. This commit includes a set of minor fixes on the patch provided by stripodi (Simone Tripodi) and it is related to issue #ANY23-71.
Removed:
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Version.java
incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/VersionTest.java
Modified:
incubator/any23/trunk/core/pom.xml
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Tool.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ToolRunner.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/WriterRegistry.java
incubator/any23/trunk/core/src/main/resources/default-configuration.properties
incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/RoverTest.java
incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolTestBase.java
incubator/any23/trunk/plugins/basic-crawler/pom.xml
incubator/any23/trunk/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
incubator/any23/trunk/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
incubator/any23/trunk/pom.xml
Modified: incubator/any23/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/pom.xml?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/pom.xml (original)
+++ incubator/any23/trunk/core/pom.xml Tue Apr 3 09:40:03 2012
@@ -23,7 +23,7 @@
<groupId>org.apache.any23</groupId>
<artifactId>any23-parent</artifactId>
<version>0.7.0-incubating-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
+ <relativePath>../</relativePath>
</parent>
<artifactId>any23-core</artifactId>
@@ -44,13 +44,13 @@
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- </dependency>
- <dependency>
<groupId>net.sourceforge.nekohtml</groupId>
<artifactId>nekohtml</artifactId>
</dependency>
+ <dependency>
+ <groupId>com.beust</groupId>
+ <artifactId>jcommander</artifactId>
+ </dependency>
<!-- BEGIN: Tika -->
<dependency>
@@ -173,12 +173,8 @@
<configuration>
<programs>
<program>
- <mainClass>org.apache.any23.cli.Rover</mainClass>
- <name>any23</name>
- </program>
- <program>
<mainClass>org.apache.any23.cli.ToolRunner</mainClass>
- <name>any23tools</name>
+ <name>any23</name>
</program>
</programs>
</configuration>
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java Tue Apr 3 09:40:03 2012
@@ -17,122 +17,63 @@
package org.apache.any23.cli;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
import org.apache.any23.extractor.ExampleInputOutput;
import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.ExtractorRegistry;
-import org.apache.any23.util.LogUtils;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.Extractor.BlindExtractor;
import org.apache.any23.extractor.Extractor.ContentExtractor;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.ExtractorRegistry;
import org.kohsuke.MetaInfServices;
import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
/**
* This class provides some command-line documentation
* about available extractors and their usage.
*/
@MetaInfServices
-@ToolRunner.Description("Utility for obtaining documentation about metadata extractors.")
+@Parameters( commandNames = { "extractor" }, commandDescription= "Utility for obtaining documentation about metadata extractors.")
public class ExtractorDocumentation implements Tool {
- /**
- * Main method to access the class functionality.
- *
- * Usage:
- * ExtractorDocumentation -list
- * shows the names of all available extractors
- *
- * ExtractorDocumentation -i extractor-name
- * shows example input for the given extractor
- *
- * ExtractorDocumentation -o extractor-name
- * shows example output for the given extractor
- *
- * ExtractorDocumentation -all
- * shows a report about all available extractors
- *
- * @param args allowed arguments
- * @throws ExtractionException
- * @throws IOException
- */
- public static void main(String[] args) throws ExtractionException, IOException {
- System.exit( new ExtractorDocumentation().run(args) );
- }
+ @Parameter( names = { "-l", "--list" }, description = "shows the names of all available extractors" )
+ private boolean showList;
- public int run(String[] args) {
- LogUtils.setDefaultLogging();
- try {
- if (args.length == 0) {
- printUsage();
- return 1;
- }
+ @Parameter( names = { "-i", "--input" }, description = "shows example input for the given extractor" )
+ private boolean showInput;
- final String option = args[0];
- if ("-list".equals(option)) {
- if (args.length > 1) {
- printUsage();
- return 2;
- }
- printExtractorList();
- }
- else if ("-i".equals(option)) {
- if (args.length > 2) {
- printUsage();
- return 3;
- }
- if (args.length < 2) {
- printError("Required argument for -i: extractor name");
- return 4;
- }
- printExampleInput(args[1]);
- }
- else if ("-o".equals(option)) {
- if (args.length > 2) {
- printUsage();
- return 5;
- }
- if (args.length < 2) {
- printError("Required argument for -o: extractor name");
- return 6;
- }
- printExampleOutput(args[1]);
+ @Parameter( names = { "-o", "--outut" }, description = "shows example output for the given extractor" )
+ private boolean showOutput;
+
+ @Parameter( names = { "-a", "--all" }, description = "shows a report about all available extractors" )
+ private boolean showAll;
+
+ @Parameter( arity = 1, description = "Extractor name" )
+ private List<String> extractor = new LinkedList<String>();
+
+ public void run() throws Exception {
+ if (showList) {
+ printExtractorList();
+ } else if (showInput) {
+ if (extractor.isEmpty()) {
+ throw new IllegalArgumentException("Required argument for -i: extractor name");
}
- else if ("-all".equals(option)) {
- if (args.length > 1) {
- printUsage();
- return 7;
- }
- printReport();
- } else {
- printUsage();
+
+ printExampleInput(extractor.get(0));
+ } else if (showOutput) {
+ if (extractor.isEmpty()) {
+ throw new IllegalArgumentException("Required argument for -o: extractor name");
}
- } catch (Exception e) {
- e.printStackTrace(System.err);
- return 8;
- }
- return 0;
- }
- /**
- * Prints the command line usage help.
- */
- public void printUsage() {
- System.out.println("Usage:");
- System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -list");
- System.out.println(" shows the names of all available extractors");
- System.out.println();
- System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -i extractor-name");
- System.out.println(" shows example input for the given extractor");
- System.out.println();
- System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -o extractor-name");
- System.out.println(" shows example output for the given extractor");
- System.out.println();
- System.out.println(" " + ExtractorDocumentation.class.getSimpleName() + " -all");
- System.out.println(" shows a report about all available extractors");
- System.out.println();
+ printExampleOutput(extractor.get(0));
+ } else if (showAll) {
+ printReport();
+ }
}
/**
@@ -148,7 +89,7 @@ public class ExtractorDocumentation impl
* Prints the list of all the available extractors.
*/
public void printExtractorList() {
- for(ExtractorFactory factory : ExtractorRegistry.getInstance().getExtractorGroup()) {
+ for (ExtractorFactory factory : ExtractorRegistry.getInstance().getExtractorGroup()) {
System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorType()));
}
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MicrodataParser.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MicrodataParser.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MicrodataParser.java Tue Apr 3 09:40:03 2012
@@ -17,6 +17,10 @@
package org.apache.any23.cli;
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.beust.jcommander.Parameters;
import org.apache.any23.extractor.html.TagSoupParser;
import org.apache.any23.http.DefaultHTTPClient;
import org.apache.any23.source.DocumentSource;
@@ -26,9 +30,10 @@ import org.apache.any23.util.StreamUtils
import org.kohsuke.MetaInfServices;
import java.io.File;
-import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
+import java.util.LinkedList;
+import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -40,50 +45,57 @@ import java.util.regex.Pattern;
* @author Michele Mostarda (mostarda@fbk.eu)
*/
@MetaInfServices
-@ToolRunner.Description("Commandline Tool for extracting Microdata from file/HTTP source.")
+@Parameters( commandNames = { "microdata" }, commandDescription = "Commandline Tool for extracting Microdata from file/HTTP source.")
public class MicrodataParser implements Tool {
- private static final String HTTP_DOCUMENT_SOURCE = "^https?://.*";
- private static final String FILE_DOCUMENT_SOURCE = "^file:(.*)$";
+ private static final Pattern HTTP_DOCUMENT_PATTERN = Pattern.compile("^https?://.*");
- public static void main(String[] args) throws URISyntaxException, IOException {
- System.exit( new MicrodataParser().run(args) );
- }
+ private static final Pattern FILE_DOCUMENT_PATTERN = Pattern.compile("^file:(.*)$");
- public int run(String[] args) {
- if(args.length != 1) {
- System.err.println("USAGE: {http://path/to/resource.html|file:/path/to/local.file}");
- return 1;
+ @Parameter(
+ arity = 1,
+ description = "Input document URL, {http://path/to/resource.html|file:/path/to/local.file}",
+ converter = MicrodataParserDocumentSourceConverter.class
+ )
+ private List<DocumentSource> document = new LinkedList<DocumentSource>();
+
+ public void run() throws Exception {
+ if (document.isEmpty()) {
+ throw new IllegalArgumentException("No input document URL specified");
}
InputStream documentInputInputStream = null;
try {
- final DocumentSource documentSource = getDocumentSource(args[0]);
+ final DocumentSource documentSource = document.get(0);
documentInputInputStream = documentSource.openInputStream();
final TagSoupParser tagSoupParser = new TagSoupParser(
documentInputInputStream,
documentSource.getDocumentURI()
);
org.apache.any23.extractor.microdata.MicrodataParser.getMicrodataAsJSON(tagSoupParser.getDOM(), System.out);
- } catch (Exception e) {
- System.err.println("***ERROR: " + e.getMessage());
- e.printStackTrace();
- return 1;
} finally {
- if(documentInputInputStream != null) StreamUtils.closeGracefully(documentInputInputStream);
+ if (documentInputInputStream != null) StreamUtils.closeGracefully(documentInputInputStream);
}
- return 0;
}
- private DocumentSource getDocumentSource(String source) throws URISyntaxException {
- final Matcher httpMatcher = Pattern.compile(HTTP_DOCUMENT_SOURCE).matcher(source);
- if(httpMatcher.find()) {
- return new HTTPDocumentSource(new DefaultHTTPClient(), source);
- }
- final Matcher fileMatcher = Pattern.compile(FILE_DOCUMENT_SOURCE).matcher(source);
- if(fileMatcher.find()) {
- return new FileDocumentSource( new File( fileMatcher.group(1) ) );
+ public static final class MicrodataParserDocumentSourceConverter implements IStringConverter<DocumentSource> {
+
+ @Override
+ public DocumentSource convert( String value ) {
+ final Matcher httpMatcher = HTTP_DOCUMENT_PATTERN.matcher(value);
+ if (httpMatcher.find()) {
+ try {
+ return new HTTPDocumentSource(new DefaultHTTPClient(), value);
+ } catch ( URISyntaxException e ) {
+ throw new ParameterException("Invalid source URI: '" + value + "'");
+ }
+ }
+ final Matcher fileMatcher = FILE_DOCUMENT_PATTERN.matcher(value);
+ if (fileMatcher.find()) {
+ return new FileDocumentSource( new File( fileMatcher.group(1) ) );
+ }
+ throw new ParameterException("Invalid source protocol: '" + value + "'");
}
- throw new IllegalArgumentException("Invalid source protocol: '" + source + "'");
+
}
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/MimeDetector.java Tue Apr 3 09:40:03 2012
@@ -17,6 +17,9 @@
package org.apache.any23.cli;
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.http.DefaultHTTPClient;
import org.apache.any23.http.HTTPClient;
@@ -32,6 +35,8 @@ import org.kohsuke.MetaInfServices;
import java.io.File;
import java.net.URISyntaxException;
+import java.util.LinkedList;
+import java.util.List;
/**
* Commandline tool to detect <b>MIME Type</b>s from
@@ -41,76 +46,73 @@ import java.net.URISyntaxException;
* @author Michele Mostarda (mostarda@fbk.eu)
*/
@MetaInfServices
-@ToolRunner.Description("MIME Type Detector Tool.")
+@Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.")
public class MimeDetector implements Tool{
public static final String FILE_DOCUMENT_PREFIX = "file://";
+
public static final String INLINE_DOCUMENT_PREFIX = "inline://";
- public static final String URL_DOCUMENT_RE = "^https?://.*";
- public static void main(String[] args) {
- System.exit( new MimeDetector().run(args) );
- }
+ public static final String URL_DOCUMENT_RE = "^https?://.*";
- @Override
- public int run(String[] args) {
- if(args.length != 1) {
- System.err.println("USAGE: {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}");
- return 1;
+ @Parameter(
+ arity = 1,
+ description = "Input document URL, {http://path/to/resource.html|file:///path/to/local.file|inline:// some inline content}",
+ converter = MimeDetectorDocumentSourceConverter.class
+ )
+ private List<DocumentSource> document = new LinkedList<DocumentSource>();
+
+ public void run() throws Exception {
+ if (document.isEmpty()) {
+ throw new IllegalArgumentException("No input document URL specified");
}
- final String document = args[0];
- try {
- final DocumentSource documentSource = createDocumentSource(document);
- final MIMETypeDetector detector = new TikaMIMETypeDetector();
- final MIMEType mimeType = detector.guessMIMEType(
- documentSource.getDocumentURI(),
- documentSource.openInputStream(),
- MIMEType.parse(documentSource.getContentType())
- );
- System.out.println(mimeType);
- return 0;
- } catch (Exception e) {
- System.err.print("Error while detecting MIME Type.");
- e.printStackTrace(System.err);
- return 1;
- }
+ final DocumentSource documentSource = document.get(0);
+ final MIMETypeDetector detector = new TikaMIMETypeDetector();
+ final MIMEType mimeType = detector.guessMIMEType(
+ documentSource.getDocumentURI(),
+ documentSource.openInputStream(),
+ MIMEType.parse(documentSource.getContentType())
+ );
+ System.out.println(mimeType);
}
- private DocumentSource createDocumentSource(String document) throws URISyntaxException {
- if(document.startsWith(FILE_DOCUMENT_PREFIX)) {
- return new FileDocumentSource(
- new File(
- document.substring(FILE_DOCUMENT_PREFIX.length())
- )
- );
- }
- if(document.startsWith(INLINE_DOCUMENT_PREFIX)) {
- return new StringDocumentSource(
- document.substring(INLINE_DOCUMENT_PREFIX.length()),
- ""
- );
- }
- if(document.matches(URL_DOCUMENT_RE)) {
- final HTTPClient client = new DefaultHTTPClient();
- // TODO: anonymous config class also used in Any23. centralize.
- client.init(new HTTPClientConfiguration() {
- public String getUserAgent() {
- return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
- }
- public String getAcceptHeader() {
- return "";
- }
- public int getDefaultTimeout() {
- return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout");
- }
- public int getMaxConnections() {
- return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections");
+ public static final class MimeDetectorDocumentSourceConverter implements IStringConverter<DocumentSource> {
+
+ @Override
+ public DocumentSource convert( String document ) {
+ if (document.startsWith(FILE_DOCUMENT_PREFIX)) {
+ return new FileDocumentSource( new File( document.substring(FILE_DOCUMENT_PREFIX.length()) ) );
+ }
+ if (document.startsWith(INLINE_DOCUMENT_PREFIX)) {
+ return new StringDocumentSource( document.substring(INLINE_DOCUMENT_PREFIX.length()), "" );
+ }
+ if (document.matches(URL_DOCUMENT_RE)) {
+ final HTTPClient client = new DefaultHTTPClient();
+ // TODO: anonymous config class also used in Any23. centralize.
+ client.init(new HTTPClientConfiguration() {
+ public String getUserAgent() {
+ return DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
+ }
+ public String getAcceptHeader() {
+ return "";
+ }
+ public int getDefaultTimeout() {
+ return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.timeout");
+ }
+ public int getMaxConnections() {
+ return DefaultConfiguration.singleton().getPropertyIntOrFail("any23.http.client.max.connections");
+ }
+ });
+ try {
+ return new HTTPDocumentSource(client, document);
+ } catch ( URISyntaxException e ) {
+ throw new IllegalArgumentException("Invalid source URI: '" + document + "'");
}
- });
- return new HTTPDocumentSource(client, document);
+ }
+ throw new IllegalArgumentException("Unsupported protocol for document " + document);
}
- throw new IllegalArgumentException("Unsupported protocol for document " + document);
+
}
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java Tue Apr 3 09:40:03 2012
@@ -17,12 +17,9 @@
package org.apache.any23.cli;
-import java.io.File;
-import java.io.PrintStream;
-import java.net.MalformedURLException;
-import java.util.Collection;
-import java.util.Iterator;
-
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
+import com.beust.jcommander.converters.FileConverter;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.mime.MIMEType;
import org.apache.any23.plugin.Any23PluginManager;
@@ -30,6 +27,13 @@ import org.apache.any23.plugin.Author;
import org.apache.any23.plugin.ExtractorPlugin;
import org.kohsuke.MetaInfServices;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
/**
* Commandline utility to verify the <b>Any23</b> plugins
* and extract basic information.
@@ -37,66 +41,49 @@ import org.kohsuke.MetaInfServices;
* @author Michele Mostarda (mostarda@fbk.eu)
*/
@MetaInfServices
-@ToolRunner.Description("Utility for plugin management verification.")
+@Parameters(commandNames = { "verify" }, commandDescription = "Utility for plugin management verification.")
public class PluginVerifier implements Tool {
private Any23PluginManager pluginManager = Any23PluginManager.getInstance();
- public static void main(String[] args) throws MalformedURLException {
- System.exit( new PluginVerifier().run(args) );
- }
+ @Parameter(
+ description = "plugins-dir",
+ converter = FileConverter.class
+ )
+ private List<File> pluginsDirs = new LinkedList<File>();
- public int run(String[] args) {
- if(args.length != 1) {
- printHelp("Invalid argument.");
- return 1;
+ public void run() throws Exception {
+ if (pluginsDirs.isEmpty()) {
+ throw new IllegalArgumentException("No plugin directory specified.");
}
- final File pluginsDir = new File(args[0]);
- if(!pluginsDir.isDirectory()) {
- printHelp("<plugins-dir> must be a valid dir.");
- return 2;
+ final File pluginsDir = pluginsDirs.get(0);
+ if (!pluginsDir.isDirectory()) {
+ throw new IllegalArgumentException("<plugins-dir> must be a valid dir.");
}
- final Iterator<ExtractorPlugin> plugins;
- try{
- pluginManager.loadJARDir(pluginsDir);
- plugins = pluginManager.getExtractors();
- } catch (Exception e) {
- e.printStackTrace(System.err);
- return 3;
- }
+ pluginManager.loadJARDir(pluginsDir);
+
+ final Iterator<ExtractorPlugin> plugins = pluginManager.getExtractors();
+
while (plugins.hasNext()) {
- System.out.println("-----------------------------");
- printPluginData(plugins.next().getClass(), System.out);
- System.out.println("-----------------------------");
+ printPluginData(plugins.next(), System.out);
+ System.out.println("------------------------------------------------------------------------");
}
- return 0;
- }
-
- private void printHelp(String msg) {
- System.err.println("***ERROR: " + msg);
- System.err.println("Usage: " + this.getClass().getSimpleName() + " <plugins-dir>");
}
private String getMimeTypesStr(Collection<MIMEType> mimeTypes) {
final StringBuilder sb = new StringBuilder();
- for(MIMEType mt : mimeTypes) {
+ for (MIMEType mt : mimeTypes) {
sb.append(mt).append(' ');
}
return sb.toString();
}
- private void printPluginData(Class<? extends ExtractorPlugin> extractorPlugin, PrintStream ps) {
- final Author authorAnnotation = extractorPlugin.getAnnotation(Author.class);
- final ExtractorPlugin instance;
- try {
- instance = extractorPlugin.newInstance();
- } catch (Exception e) {
- throw new IllegalStateException("Error while instantiating plugin.", e);
- }
+ private void printPluginData(ExtractorPlugin instance, PrintStream ps) {
+ final Author authorAnnotation = instance.getClass().getAnnotation(Author.class);
final ExtractorFactory<?> extractorFactory = instance.getExtractorFactory();
- ps.printf("Plugin class : %s\n", extractorPlugin.getClass());
+ ps.printf("Plugin class : %s\n", instance.getClass());
ps.printf("Plugin author : %s\n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
ps.printf("Plugin factory : %s\n", extractorFactory.getClass());
ps.printf("Plugin mime-types: %s\n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() ));
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Rover.java Tue Apr 3 09:40:03 2012
@@ -17,43 +17,41 @@
package org.apache.any23.cli;
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.beust.jcommander.Parameters;
+import com.beust.jcommander.converters.FileConverter;
import org.apache.any23.Any23;
import org.apache.any23.configuration.Configuration;
import org.apache.any23.configuration.DefaultConfiguration;
-import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionParameters.ValidationMode;
import org.apache.any23.extractor.SingleDocumentExtraction;
import org.apache.any23.filter.IgnoreAccidentalRDFa;
import org.apache.any23.filter.IgnoreTitlesOfEmptyDocuments;
import org.apache.any23.source.DocumentSource;
-import org.apache.any23.util.LogUtils;
import org.apache.any23.writer.BenchmarkTripleHandler;
import org.apache.any23.writer.LoggingTripleHandler;
import org.apache.any23.writer.ReportingTripleHandler;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.apache.any23.writer.WriterRegistry;
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.PosixParser;
import org.kohsuke.MetaInfServices;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
-import java.net.URISyntaxException;
import java.net.URL;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
-import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
+import static java.lang.String.format;
/**
* A default rover implementation. Goes and fetches a URL using an hint
@@ -64,360 +62,208 @@ import static org.apache.any23.extractor
* @author Gabriele Renzi
*/
@MetaInfServices
-@ToolRunner.Description("Any23 Command Line Tool.")
+@Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.")
public class Rover implements Tool {
private static final String[] FORMATS = WriterRegistry.getInstance().getIdentifiers();
+
private static final int DEFAULT_FORMAT_INDEX = 0;
private static final Logger logger = LoggerFactory.getLogger(Rover.class);
- private Options options;
-
- private CommandLine commandLine;
+ @Parameter(
+ names = { "-o", "--output" },
+ description = "Specify Output file (defaults to standard output)",
+ converter = PrintStreamConverter.class
+ )
+ private PrintStream outputStream = System.out;
- private boolean verbose = false;
+ @Parameter(description = "input URIs {<url>|<file>}+", converter = ArgumentToURIConverter.class)
+ protected List<String> inputURIs = new LinkedList<String>();
- private PrintStream outputStream;
- private TripleHandler tripleHandler;
- private ReportingTripleHandler reportingTripleHandler;
- private BenchmarkTripleHandler benchmarkTripleHandler;
-
- private ExtractionParameters eps;
- private Any23 any23;
+ @Parameter(names = { "-e", "--extractors" }, description = "a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle")
+ private List<String> extractors = new LinkedList<String>();
- protected boolean isVerbose() {
- return verbose;
- }
+ @Parameter(names = { "-f", "--format" }, description = "the output format")
+ private String format = FORMATS[DEFAULT_FORMAT_INDEX];
- public static void main(String[] args) {
- System.exit( new Rover().run(args) );
- }
+ @Parameter(
+ names = { "-l", "--log" },
+ description = "Produce log within a file.",
+ converter = FileConverter.class
+ )
+ private File logFile = null;
- public int run(String[] args) {
- try {
- final String[] uris = configure(args);
- performExtraction(uris);
- return 0;
- } catch (Exception e) {
- System.err.println( e.getMessage() );
- final int exitCode = e instanceof ExitCodeException ? ((ExitCodeException) e).exitCode : 1;
- if(verbose) e.printStackTrace(System.err);
- return exitCode;
- }
- }
+ @Parameter(names = { "-s", "--stats" }, description = "Print out extraction statistics.")
+ private boolean statistics;
- protected CommandLine getCommandLine() {
- if(commandLine == null) throw new IllegalStateException("Rover must be configured first.");
- return commandLine;
- }
+ @Parameter(names = { "-t", "--notrivial" }, description = "Filter trivial statements (e.g. CSS related ones).")
+ private boolean noTrivial;
- protected String[] configure(String[] args) throws Exception {
- final CommandLineParser parser = new PosixParser();
- options = createOptions();
- commandLine = parser.parse(options, args);
+ @Parameter(names = { "-p", "--pedantic" }, description = "Validate and fixes HTML content detecting commons issues.")
+ private boolean pedantic;
- if (commandLine.hasOption("h")) {
- printHelp();
- throw new ExitCodeException(0);
- }
+ @Parameter(names = { "-n", "--nesting" }, description = "Disable production of nesting triples.")
+ private boolean nestingDisabled;
- if (commandLine.hasOption('v')) {
- verbose = true;
- LogUtils.setVerboseLogging();
- } else {
- LogUtils.setDefaultLogging();
- }
+ @Parameter(names = { "-d", "--defaultns" }, description = "Override the default namespace used to produce statements.")
+ private String defaultns;
- if (commandLine.getArgs().length < 1) {
- printHelp();
- throw new IllegalArgumentException("Expected at least 1 argument.");
- }
+ // non parameters
- final String[] inputURIs = argumentsToURIs(commandLine.getArgs());
- final String[] extractorNames = getExtractors(commandLine);
+ private TripleHandler tripleHandler;
- try {
- outputStream = getOutputStream(commandLine);
- tripleHandler = getTripleHandler(commandLine, outputStream);
- tripleHandler = decorateWithLogHandler(commandLine, tripleHandler);
- tripleHandler = decorateWithStatisticsHandler(commandLine, tripleHandler);
+ private ReportingTripleHandler reportingTripleHandler;
- benchmarkTripleHandler =
- tripleHandler instanceof BenchmarkTripleHandler ? (BenchmarkTripleHandler) tripleHandler : null;
+ private BenchmarkTripleHandler benchmarkTripleHandler;
- tripleHandler = decorateWithAccidentalTriplesFilter(commandLine, tripleHandler);
+ private Any23 any23;
- reportingTripleHandler = new ReportingTripleHandler(tripleHandler);
- eps = getExtractionParameters(commandLine);
- any23 = createAny23(extractorNames);
+ private ExtractionParameters extractionParameters;
- return inputURIs;
+ protected void configure() {
+ try {
+ tripleHandler = WriterRegistry.getInstance().getWriterInstanceByIdentifier(format, outputStream);
} catch (Exception e) {
- closeStreams();
- throw e;
+ throw new NullPointerException(
+ format("Invalid output format '%s', admitted values: %s",
+ format,
+ Arrays.toString(FORMATS)
+ )
+ );
}
- }
- protected Options createOptions() {
- final Options options = new Options();
- options.addOption(
- new Option("v", "verbose", false, "Show debug and progress information.")
- );
- options.addOption(
- new Option("h", "help", false, "Print this help.")
- );
- options.addOption(
- new Option("e", true, "Specify a comma-separated list of extractors, e.g. rdf-xml,rdf-turtle.")
- );
- options.addOption(
- new Option("o", "output", true, "Specify Output file (defaults to standard output).")
- );
- options.addOption(
- new Option(
- "f",
- "Output format",
- true,
- "[" + printFormats(FORMATS, DEFAULT_FORMAT_INDEX) + "]"
- )
- );
- options.addOption(
- new Option("t", "notrivial", false, "Filter trivial statements (e.g. CSS related ones).")
- );
- options.addOption(
- new Option("s", "stats", false, "Print out extraction statistics.")
- );
- options.addOption(
- new Option("l", "log", true, "Produce log within a file.")
- );
- options.addOption(
- new Option("p", "pedantic", false, "Validate and fixes HTML content detecting commons issues.")
- );
- options.addOption(
- new Option("n", "nesting", false, "Disable production of nesting triples.")
- );
- options.addOption(
- new Option("d", "defaultns", true, "Override the default namespace used to produce statements.")
- );
- return options;
- }
+ if (logFile != null) {
+ try {
+ tripleHandler = new LoggingTripleHandler(tripleHandler, new PrintWriter(logFile));
+ } catch (FileNotFoundException fnfe) {
+ throw new IllegalArgumentException( format("Can not write to log file [%s]", logFile), fnfe );
+ }
+ }
- protected void performExtraction(DocumentSource documentSource) {
- performExtraction(any23, eps, documentSource, reportingTripleHandler);
- }
+ if (statistics) {
+ benchmarkTripleHandler = new BenchmarkTripleHandler(tripleHandler);
+ tripleHandler = benchmarkTripleHandler;
+ }
- protected void performExtraction(String[] inputURIs) throws URISyntaxException, IOException {
- try {
- final long start = System.currentTimeMillis();
- for (String inputURI : inputURIs) {
- performExtraction( any23.createDocumentSource(inputURI) );
- }
- final long elapsed = System.currentTimeMillis() - start;
+ if (noTrivial) {
+ tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(tripleHandler),
+ true // suppress stylesheet triples.
+ );
+ }
- if (benchmarkTripleHandler != null) {
- System.err.println(benchmarkTripleHandler.report());
- }
+ reportingTripleHandler = new ReportingTripleHandler(tripleHandler);
- logger.info("Extractors used: " + reportingTripleHandler.getExtractorNames());
- logger.info(reportingTripleHandler.getTotalTriples() + " triples, " + elapsed + "ms");
- } finally {
- closeStreams();
+ final Configuration configuration = DefaultConfiguration.singleton();
+ extractionParameters =
+ pedantic
+ ?
+ new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)
+ :
+ new ExtractionParameters(configuration, ValidationMode.None , nestingDisabled);
+ if (defaultns != null) {
+ extractionParameters.setProperty(SingleDocumentExtraction.EXTRACTION_CONTEXT_URI_PROPERTY,
+ defaultns);
}
+
+ any23 = (extractors.isEmpty()) ? new Any23()
+ : new Any23(extractors.toArray(new String[extractors.size()]));
+ any23.setHTTPUserAgent(Any23.DEFAULT_HTTP_CLIENT_USER_AGENT + "/" + Any23.VERSION);
}
protected String printReports() {
final StringBuilder sb = new StringBuilder();
- if(benchmarkTripleHandler != null) sb.append( benchmarkTripleHandler.report() ).append('\n');
- if(reportingTripleHandler != null) sb.append( reportingTripleHandler.printReport() ).append('\n');
+ if (benchmarkTripleHandler != null) sb.append( benchmarkTripleHandler.report() ).append('\n');
+ if (reportingTripleHandler != null) sb.append( reportingTripleHandler.printReport() ).append('\n');
return sb.toString();
}
- private void printHelp() {
- HelpFormatter formatter = new HelpFormatter();
- formatter.printHelp("[{<url>|<file>}]+", options, true);
- }
-
- private String printFormats(String[] formats, int defaultIndex) {
- final StringBuilder sb = new StringBuilder();
- for (int i = 0; i < formats.length; i++) {
- sb.append(formats[i]);
- if(i == defaultIndex) sb.append(" (default)");
- if(i < formats.length - 1) sb.append(", ");
+ protected void performExtraction(DocumentSource documentSource) throws Exception {
+ if (!any23.extract(extractionParameters, documentSource, tripleHandler).hasMatchingExtractors()) {
+ throw new IllegalStateException(format("No suitable extractors found for source %s", documentSource));
}
- return sb.toString();
}
- private String argumentToURI(String uri) {
- uri = uri.trim();
- if (uri.toLowerCase().startsWith("http:") || uri.toLowerCase().startsWith("https:")) {
+ protected void close() {
+ if (tripleHandler != null) {
try {
- return new URL(uri).toString();
- } catch (MalformedURLException murle) {
- throw new IllegalArgumentException(String.format("Invalid URI: '%s'", uri), murle);
+ tripleHandler.close();
+ } catch (TripleHandlerException the) {
+ throw new RuntimeException("Error while closing TripleHandler", the);
}
}
- final File f = new File(uri);
- if (!f.exists()) {
- throw new IllegalArgumentException(String.format("No such file: [%s]", f.getAbsolutePath()));
+ if (outputStream != null && outputStream != System.out) { // TODO: low - find better solution to avoid closing system out.
+ outputStream.close();
}
- if (f.isDirectory()) {
- throw new IllegalArgumentException(String.format("Found a directory: [%s]", f.getAbsolutePath()));
- }
- return f.toURI().toString();
}
- protected String[] argumentsToURIs(String[] args) {
- final String[] uris = new String[args.length];
- for(int i = 0; i < args.length; i++) {
- uris[i] = argumentToURI(args[i]);
+ public void run() throws Exception {
+ if (inputURIs.isEmpty()) {
+ throw new IllegalArgumentException("Expected at least 1 argument.");
}
- return uris;
- }
-
- private String[] getExtractors(CommandLine cl) {
- if (cl.hasOption('e')) {
- return cl.getOptionValue('e').split(",");
- }
- return null;
- }
- private PrintStream openPrintStream(String fileName) {
- final File file = new File(fileName);
- try {
- return new PrintStream(file);
- } catch (FileNotFoundException fnfe) {
- throw new IllegalArgumentException("Cannot open file '" + file.getAbsolutePath() + "'", fnfe);
- }
- }
+ configure();
- private PrintStream getOutputStream(CommandLine cl) {
- if (cl.hasOption("o")) {
- final String fileName = cl.getOptionValue("o");
- return openPrintStream(fileName);
- } else {
- return System.out;
- }
- }
+ // perform conversions
- private TripleHandler getTripleHandler(CommandLine cl, OutputStream os) {
- final String FORMAT_OPTION = "f";
- String format = FORMATS[DEFAULT_FORMAT_INDEX];
- if (cl.hasOption(FORMAT_OPTION)) {
- format = cl.getOptionValue(FORMAT_OPTION).toLowerCase();
- }
try {
- return WriterRegistry.getInstance().getWriterInstanceByIdentifier(format, os);
- } catch (Exception e) {
- throw new IllegalArgumentException(
- String.format("Invalid option value '%s' for option %s", format, FORMAT_OPTION)
- );
- }
- }
-
- private TripleHandler decorateWithAccidentalTriplesFilter(CommandLine cl, TripleHandler in) {
- if (cl.hasOption('t')) {
- return new IgnoreAccidentalRDFa(
- new IgnoreTitlesOfEmptyDocuments(in),
- true // suppress stylesheet triples.
- );
- }
- return in;
- }
+ final long start = System.currentTimeMillis();
+ for (String inputURI : inputURIs) {
+ DocumentSource source = any23.createDocumentSource(inputURI);
- private TripleHandler decorateWithStatisticsHandler(CommandLine cl, TripleHandler in) {
- if (cl.hasOption('s')) {
- return new BenchmarkTripleHandler(in);
- }
- return in;
- }
+ performExtraction( source );
+ }
+ final long elapsed = System.currentTimeMillis() - start;
- private TripleHandler decorateWithLogHandler(CommandLine cl, TripleHandler in) {
- if (cl.hasOption('l')) {
- File logFile = new File(cl.getOptionValue('l'));
- try {
- return new LoggingTripleHandler(in, new PrintWriter(logFile));
- } catch (FileNotFoundException fnfe) {
- throw new IllegalArgumentException( String.format("Could not write to log file [%s]", logFile), fnfe );
+ if (benchmarkTripleHandler != null) {
+ System.err.println(benchmarkTripleHandler.report());
}
- }
- return in;
- }
- private ExtractionParameters getExtractionParameters(CommandLine cl) {
- final boolean nestingDisabled = ! cl.hasOption('n');
- final Configuration configuration = DefaultConfiguration.singleton();
- final ExtractionParameters extractionParameters =
- cl.hasOption('p')
- ?
- new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)
- :
- new ExtractionParameters(configuration, ValidationMode.None , nestingDisabled);
- if( cl.hasOption('d') ) {
- extractionParameters.setProperty(
- SingleDocumentExtraction.EXTRACTION_CONTEXT_URI_PROPERTY,
- cl.getOptionValue('d')
- );
+ logger.info("Extractors used: " + reportingTripleHandler.getExtractorNames());
+ logger.info(reportingTripleHandler.getTotalTriples() + " triples, " + elapsed + "ms");
+ } finally {
+ close();
}
- return extractionParameters;
}
- private Any23 createAny23(String[] extractorNames) {
- Any23 any23 = (extractorNames == null || extractorNames.length == 0)
- ? new Any23()
- : new Any23(extractorNames);
- any23.setHTTPUserAgent(Any23.DEFAULT_HTTP_CLIENT_USER_AGENT + "/" + Any23.VERSION);
- return any23;
- }
+ public static final class ArgumentToURIConverter implements IStringConverter<String> {
- private void performExtraction(
- Any23 any23, ExtractionParameters eps, DocumentSource documentSource, TripleHandler th
- ) {
- try {
- if (! any23.extract(eps, documentSource, th).hasMatchingExtractors()) {
- throw new ExitCodeException("No suitable extractors found.", 2);
+ @Override
+ public String convert(String uri) {
+ uri = uri.trim();
+ if (uri.toLowerCase().startsWith("http:") || uri.toLowerCase().startsWith("https:")) {
+ try {
+ return new URL(uri).toString();
+ } catch (MalformedURLException murle) {
+ throw new ParameterException(format("Invalid URI: '%s': %s", uri, murle.getMessage()));
+ }
}
- } catch (ExtractionException ex) {
- throw new ExitCodeException("Exception while extracting metadata.", ex, 3);
- } catch (IOException ex) {
- throw new ExitCodeException("Exception while producing output.", ex, 4);
- }
- }
- private void closeHandler() {
- if(tripleHandler == null) return;
- try {
- tripleHandler.close();
- } catch (TripleHandlerException the) {
- throw new ExitCodeException("Error while closing TripleHandler", the, 5);
+ final File f = new File(uri);
+ if (!f.exists()) {
+ throw new ParameterException(format("No such file: [%s]", f.getAbsolutePath()));
+ }
+ if (f.isDirectory()) {
+ throw new ParameterException(format("Found a directory: [%s]", f.getAbsolutePath()));
+ }
+ return f.toURI().toString();
}
- }
- private void closeStreams() {
- closeHandler();
- if(outputStream != null) outputStream.close();
}
- protected class ExitCodeException extends RuntimeException {
-
- private final int exitCode;
+ public static final class PrintStreamConverter implements IStringConverter<PrintStream> {
- public ExitCodeException(String message, Throwable cause, int exitCode) {
- super(message, cause);
- this.exitCode = exitCode;
- }
- public ExitCodeException(String message, int exitCode) {
- super(message);
- this.exitCode = exitCode;
- }
- public ExitCodeException(int exitCode) {
- super();
- this.exitCode = exitCode;
+ @Override
+ public PrintStream convert( String value ) {
+ final File file = new File(value);
+ try {
+ return new PrintStream(file);
+ } catch (FileNotFoundException fnfe) {
+ throw new ParameterException(format("Cannot open file '%s': %s", file, fnfe.getMessage()));
+ }
}
- protected int getExitCode() {
- return exitCode;
- }
}
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Tool.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Tool.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Tool.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/Tool.java Tue Apr 3 09:40:03 2012
@@ -27,9 +27,8 @@ public interface Tool {
/**
* Runs the tool and retrieves the exit code.
*
- * @param args command line arguments.
* @return exit code.
*/
- int run(String[] args);
+ void run() throws Exception;
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ToolRunner.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ToolRunner.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ToolRunner.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/ToolRunner.java Tue Apr 3 09:40:03 2012
@@ -17,15 +17,24 @@
package org.apache.any23.cli;
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.converters.FileConverter;
+import org.apache.any23.Any23;
+import org.apache.any23.plugin.Any23PluginManager;
+import org.apache.any23.util.LogUtils;
+
import java.io.File;
import java.io.IOException;
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Date;
import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
-import org.apache.any23.plugin.Any23PluginManager;
+import static java.lang.System.currentTimeMillis;
+import static java.lang.System.exit;
/**
* This class is the main class responsible to provide a uniform command-line
@@ -34,95 +43,181 @@ import org.apache.any23.plugin.Any23Plug
* @see ExtractorDocumentation
* @see Rover
*/
-public class ToolRunner {
+public final class ToolRunner {
+
+ private static final PrintStream infoStream = System.err;
+
+ @Parameter( names = { "-h", "--help" }, description = "Display help information." )
+ private boolean printHelp;
+
+ @Parameter( names = { "-v", "--version" }, description = "Display version information." )
+ private boolean showVersion;
+
+ @Parameter( names = { "-X", "--verbose" }, description = "Produce execution verbose output." )
+ private boolean verbose;
+
+ @Parameter( names = { "-p", "--plugins-dir" }, description = "The Any23 plugins directory.", converter = FileConverter.class )
+ private File pluginsDir = new File(new File(System.getProperty("user.home")), ".any23/plugins");
+
+ public static void main( String[] args ) throws Exception {
+ exit( new ToolRunner().execute( args ) );
+ }
- public static final File HOME_PLUGIN_DIR = new File(
- new File(System.getProperty("user.home")),
- ".any23/plugins"
- );
-
- private static final String USAGE = String.format(
- "Usage: %s <utility> [options...]",
- ToolRunner.class.getSimpleName()
- );
+ public int execute(String...args) throws Exception {
+ JCommander commander = new JCommander(this);
+ commander.setProgramName(System.getProperty("app.name"));
- public static void main(String[] args) throws IOException {
- //Generate automatically the cli.
+ // add all plugins first
final Iterator<Tool> tools = getToolsInClasspath();
- try {
- if (args.length < 1) {
- usage(null, tools);
- }
+ while (tools.hasNext()) {
+ Tool tool = tools.next();
+ commander.addCommand(tool);
+ }
+
+ commander.parse(args);
+
+ Map<String, JCommander> commands = commander.getCommands();
+ String parsedCommand = commander.getParsedCommand();
+ if(parsedCommand == null) {
+ infoStream.println("A command must be specified.");
+ printHelp = true;
+ }
+
+ if (printHelp) {
+ commander.usage();
+ return 1;
+ }
- final String toolName = args[0];
+ if (showVersion) {
+ printVersionInfo();
+ return 0;
+ }
+
+ if (verbose) {
+ LogUtils.setVerboseLogging();
+ } else {
+ LogUtils.setDefaultLogging();
+ }
+
+ long start = currentTimeMillis();
+ int exit = 0;
+
+ Throwable error = null;
- while (tools.hasNext()) {
- Tool tool = tools.next();
- if (tool.getClass().getSimpleName().equals(toolName)) {
- String[] mainArgs = new String[args.length - 1];
- System.arraycopy(args, 1, mainArgs, 0, mainArgs.length);
+ // execute the parsed command
+ infoStream.println();
+ infoStream.println( "------------------------------------------------------------------------" );
+ infoStream.printf( "Apache Any23 :: %s%n", parsedCommand );
+ infoStream.println( "------------------------------------------------------------------------" );
+ infoStream.println();
- System.exit(tool.run(mainArgs));
+ try {
+ Tool.class.cast( commands.get( parsedCommand ).getObjects().get( 0 ) ).run();
+ } catch (Throwable t) {
+ exit = 1;
+ error = t;
+ } finally {
+ infoStream.println();
+ infoStream.println( "------------------------------------------------------------------------" );
+ infoStream.printf( "Apache Any23 %s%n", ( exit != 0 ) ? "FAILURE" : "SUCCESS" );
+
+ if (exit != 0) {
+ infoStream.println();
+
+ if (verbose) {
+ System.err.println( "Execution terminated with errors:" );
+ error.printStackTrace(infoStream);
+ } else {
+ infoStream.printf( "Execution terminated with errors: %s%n", error.getMessage() );
}
+
+ infoStream.println();
}
- usage( String.format("[%s] is not a valid tool name.", toolName), tools);
- throw new IllegalStateException();
- } catch (Throwable e) {
- e.printStackTrace();
- Throwable cause = e.getCause();
- if(cause != null) cause.printStackTrace();
- usage(e.toString(), null);
+ infoStream.printf( "Total time: %ss%n", ( ( currentTimeMillis() - start ) / 1000 ) );
+ infoStream.printf( "Finished at: %s%n", new Date() );
+
+ final Runtime runtime = Runtime.getRuntime();
+ final int megaUnit = 1024 * 1024;
+ infoStream.printf( "Final Memory: %sM/%sM%n", ( runtime.totalMemory() - runtime.freeMemory() ) / megaUnit,
+ runtime.totalMemory() / megaUnit );
+
+ infoStream.println( "------------------------------------------------------------------------" );
}
+
+ return exit;
}
- public static Iterator<Tool> getToolsInClasspath() throws IOException {
+ Iterator<Tool> getToolsInClasspath() throws IOException {
final Any23PluginManager pluginManager = Any23PluginManager.getInstance();
- if(HOME_PLUGIN_DIR.exists()) {
- pluginManager.loadJARDir(HOME_PLUGIN_DIR);
+ if (pluginsDir.exists() && pluginsDir.isDirectory()) {
+ pluginManager.loadJARDir(pluginsDir);
}
return pluginManager.getTools();
}
- private static String padLeft(String s, int n) {
- return String.format("%1$#" + n + "s", s);
- }
-
- private static String getUtilitiesMessage(Iterator<Tool> toolClasses) {
- StringBuffer sb = new StringBuffer();
- sb.append(" where <utility> is one of:\n");
- Description description;
- String utilityName;
- int padding;
- while (toolClasses.hasNext()) {
- Class<?> toolClass = toolClasses.next().getClass();
- utilityName = toolClass.getSimpleName();
- sb.append("\t").append(utilityName);
- description = toolClass.getAnnotation(Description.class);
- padding = 100 - utilityName.length();
- if (description != null) {
- sb.append( padLeft( description.value(), padding >= 0 ? padding : 0) );
+ private static void printVersionInfo() {
+ Properties properties = new Properties();
+ InputStream input = ToolRunner.class.getClassLoader().getResourceAsStream( "META-INF/maven/org.apache.any23/any23-core/pom.properties" );
+
+ if ( input != null ) {
+ try {
+ properties.load( input );
+ } catch ( IOException e ) {
+ // ignore, just don't load the properties
+ } finally {
+ try {
+ input.close();
+ } catch (IOException e) {
+ // close quietly
+ }
}
- sb.append('\n');
}
- return sb.toString();
+
+ infoStream.printf( "Apache Any23 %s%n", Any23.VERSION );
+ infoStream.printf( "Java version: %s, vendor: %s%n",
+ System.getProperty( "java.version" ),
+ System.getProperty( "java.vendor" ) );
+ infoStream.printf( "Java home: %s%n", System.getProperty( "java.home" ) );
+ infoStream.printf( "Default locale: %s_%s, platform encoding: %s%n",
+ System.getProperty( "user.language" ),
+ System.getProperty( "user.country" ),
+ System.getProperty( "sun.jnu.encoding" ) );
+ infoStream.printf( "OS name: \"%s\", version: \"%s\", arch: \"%s\", family: \"%s\"%n",
+ System.getProperty( "os.name" ),
+ System.getProperty( "os.version" ),
+ System.getProperty( "os.arch" ),
+ getOsFamily() );
}
- private static void usage(String msg, Iterator<Tool> utilities) {
- if (msg != null) {
- System.err.println("*** ERROR: " + msg);
- System.err.println();
- }
- System.err.println(USAGE);
- if (utilities != null) {
- System.err.println(getUtilitiesMessage(utilities));
+ private static final String getOsFamily() {
+ String osName = System.getProperty( "os.name" ).toLowerCase();
+ String pathSep = System.getProperty( "path.separator" );
+
+ if (osName.contains("windows")) {
+ return "windows";
+ } else if (osName.contains("os/2")) {
+ return "os/2";
+ } else if (osName.contains("z/os") || osName.contains("os/390")) {
+ return "z/os";
+ } else if (osName.contains("os/400")) {
+ return "os/400";
+ } else if (pathSep.equals( ";" )) {
+ return "dos";
+ } else if (osName.contains("mac")) {
+ if (osName.endsWith("x")) {
+ return "mac"; // MACOSX
+ }
+ return "unix";
+ } else if (osName.contains("nonstop_kernel")) {
+ return "tandem";
+ } else if (osName.contains("openvms")) {
+ return "openvms";
+ } else if (pathSep.equals(":")) {
+ return "unix";
}
- System.exit(1);
- }
- @Retention(RetentionPolicy.RUNTIME)
- @Target(ElementType.TYPE)
- public @interface Description { String value(); }
+ return "undefined";
+ }
}
-
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/VocabPrinter.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/VocabPrinter.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/cli/VocabPrinter.java Tue Apr 3 09:40:03 2012
@@ -18,17 +18,12 @@
package org.apache.any23.cli;
import org.apache.any23.vocab.RDFSchemaUtils;
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.CommandLineParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.PosixParser;
+import org.apache.any23.vocab.RDFSchemaUtils.VocabularyFormat;
import org.kohsuke.MetaInfServices;
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
+import com.beust.jcommander.IStringConverter;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
/**
* Prints out the vocabulary <i>RDFSchema</i> as <i>NQuads</i>.
@@ -36,67 +31,31 @@ import java.util.Arrays;
* @author Michele Mostarda (mostarda@fbk.eu)
*/
@MetaInfServices
-@ToolRunner.Description("Prints out the RDF Schema of the vocabularies used by Any23.")
+@Parameters(commandNames = { "vocab" }, commandDescription = "Prints out the RDF Schema of the vocabularies used by Any23.")
public class VocabPrinter implements Tool {
- public static void main(String[] args) throws IOException {
- System.exit( new VocabPrinter().run(args) );
+ @Parameter(
+ names = { "-f", "--format" },
+ description = "Vocabulary output format",
+ converter = VocabularyFormatConverter.class
+ )
+ private VocabularyFormat format = RDFSchemaUtils.VocabularyFormat.NQuads;
+
+ public void run() throws Exception {
+ RDFSchemaUtils.serializeVocabularies(format, System.out);
}
- public int run(String[] args) {
- final CommandLineParser parser = new PosixParser();
- final CommandLine commandLine;
- final RDFSchemaUtils.VocabularyFormat format;
- try {
- final Options options = new Options();
- options.addOption(
- new Option("h", "help", false, "Print this help.")
- );
- options.addOption(
- new Option(
- "f", "format",
- true,
- "Vocabulary output format, supported values are: " +
- Arrays.toString(RDFSchemaUtils.VocabularyFormat.values())
- )
- );
- commandLine = parser.parse(options, args);
- if (commandLine.hasOption("h")) {
- printHelp(options);
- return 0;
- }
- try {
- format = RDFSchemaUtils.VocabularyFormat.valueOf(
- commandLine.getOptionValue("f", RDFSchemaUtils.VocabularyFormat.NQuads.name())
- );
- } catch (IllegalArgumentException iae) {
- throw new IllegalArgumentException("Unknown format [" + commandLine.getOptionValue("f") + "'");
- }
- } catch (Exception e) {
- e.printStackTrace(System.err);
- return 1;
- }
+ public static final class VocabularyFormatConverter implements IStringConverter<RDFSchemaUtils.VocabularyFormat> {
- final BufferedOutputStream bos = new BufferedOutputStream(System.out);
- try {
- RDFSchemaUtils.serializeVocabularies(format, System.out);
- } catch (Exception e) {
- e.printStackTrace(System.err);
- return 1;
- } finally {
+ @Override
+ public VocabularyFormat convert(String value) {
try {
- bos.flush();
- } catch (IOException ioe) {
- ioe.printStackTrace(System.err);
+ return RDFSchemaUtils.VocabularyFormat.valueOf( value );
+ } catch (Throwable t) {
+ throw new IllegalArgumentException("Unknown format [" + value + "'");
}
- System.out.println();
}
- return 0;
- }
- private void printHelp(Options options) {
- HelpFormatter formatter = new HelpFormatter();
- formatter.printHelp(this.getClass().getSimpleName(), options, true);
}
}
Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/WriterRegistry.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/WriterRegistry.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/WriterRegistry.java (original)
+++ incubator/any23/trunk/core/src/main/java/org/apache/any23/writer/WriterRegistry.java Tue Apr 3 09:40:03 2012
@@ -229,7 +229,7 @@ public class WriterRegistry {
try {
return clazz.getConstructor(OutputStream.class).newInstance(os);
} catch (Exception e) {
- throw new IllegalArgumentException("Error while initializing format writer " + clazz + " .");
+ throw new IllegalArgumentException("Error while initializing format writer " + clazz + " .", e);
}
}
Modified: incubator/any23/trunk/core/src/main/resources/default-configuration.properties
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/resources/default-configuration.properties?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/main/resources/default-configuration.properties (original)
+++ incubator/any23/trunk/core/src/main/resources/default-configuration.properties Tue Apr 3 09:40:03 2012
@@ -1,5 +1,5 @@
# Any23 Core Version
-any23.core.version=${project.version}
+any23.core.version=${project.version} (${implementation.build.tstamp})
# HTTP Client Configuration.
# ---- Default HTTP User Agent if not specified.
Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java Tue Apr 3 09:40:03 2012
@@ -34,12 +34,12 @@ public class ExtractorDocumentationTest
@Test
public void tesList() throws Exception {
- runToolCheckExit0("-list");
+ runToolCheckExit0("--list");
}
@Test
public void testAll() throws Exception {
- runToolCheckExit0("-all");
+ runToolCheckExit0("--all");
}
//@Ignore("no available example")
Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/RoverTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/RoverTest.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/RoverTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/RoverTest.java Tue Apr 3 09:40:03 2012
@@ -60,7 +60,7 @@ public class RoverTest extends ToolTestB
final File outFile = File.createTempFile("rover-test", "out");
final int exitCode = runTool(
String.format(
- "-v -o %s -f nquads -p -n %s -d %s",
+ "-o %s -f nquads -p -n %s -d %s",
outFile.getAbsolutePath(),
"src/test/resources/cli/rover-test1.nq",
DEFAULT_GRAPH
@@ -101,7 +101,7 @@ public class RoverTest extends ToolTestB
final int exitCode = runTool(
String.format(
- "-v -o %s -f nquads -l %s -p -n %s",
+ "-o %s -f nquads -l %s -p -n %s",
outFile.getAbsolutePath(),
logFile.getAbsolutePath(),
StringUtils.join(" ", targets)
Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolRunnerTest.java Tue Apr 3 09:40:03 2012
@@ -17,15 +17,14 @@
package org.apache.any23.cli;
-import static org.junit.Assert.assertTrue;
+import org.junit.Test;
import java.io.IOException;
-import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
/**
* Test case for {@link ToolRunner}.
@@ -40,13 +39,12 @@ public class ToolRunnerTest {
add(MimeDetector.class);
add(PluginVerifier.class);
add(Rover.class);
- add(Version.class);
add(VocabPrinter.class);
}};
@Test
public void testGetToolsInClasspath() throws IOException {
- Iterator<Tool> tools = ToolRunner.getToolsInClasspath();
+ Iterator<Tool> tools = new ToolRunner().getToolsInClasspath();
while (tools.hasNext()) {
assertTrue("Some core tools have not been detected.", coreTools.contains(tools.next().getClass()));
}
Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolTestBase.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolTestBase.java?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolTestBase.java (original)
+++ incubator/any23/trunk/core/src/test/java/org/apache/any23/cli/ToolTestBase.java Tue Apr 3 09:40:03 2012
@@ -17,12 +17,14 @@
package org.apache.any23.cli;
+import com.beust.jcommander.Parameters;
import org.apache.any23.Any23OnlineTestBase;
-import org.junit.Assert;
-import java.lang.reflect.Method;
import java.util.Arrays;
+import static java.lang.String.format;
+import static org.junit.Assert.assertEquals;
+
/**
* Base class for <i>CLI</i> related tests.
*
@@ -36,7 +38,7 @@ public abstract class ToolTestBase exten
private final Class<? extends Tool> toolClazz;
protected ToolTestBase(Class<? extends Tool> tool) {
- if(tool == null) throw new NullPointerException();
+ if (tool == null) throw new NullPointerException();
toolClazz = tool;
}
@@ -47,11 +49,14 @@ public abstract class ToolTestBase exten
* @return the tool exit code.
* @throws Exception
*/
- protected int runTool(String... args)
- throws Exception {
- final Object instance = toolClazz.newInstance();
- final Method mainMethod = toolClazz.getMethod(TOOL_RUN_METHOD, String[].class);
- return (Integer) mainMethod.invoke(instance, (Object) args);
+ protected int runTool(String... args) throws Exception {
+ final String commandName = toolClazz.getAnnotation( Parameters.class ).commandNames()[0];
+
+ final String[] enhancedArgs = new String[args.length + 1];
+ enhancedArgs[0] = commandName;
+ System.arraycopy( args, 0, enhancedArgs, 1, args.length );
+
+ return new ToolRunner().execute( enhancedArgs );
}
/**
@@ -72,8 +77,8 @@ public abstract class ToolTestBase exten
* @throws Exception
*/
protected void runToolCheckExit0(String... args) throws Exception {
- Assert.assertEquals(
- String.format(
+ assertEquals(
+ format(
"Unexpected exit code for tool [%s] invoked with %s",
toolClazz.getSimpleName(),
Arrays.asList(args)
Modified: incubator/any23/trunk/plugins/basic-crawler/pom.xml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/plugins/basic-crawler/pom.xml?rev=1308786&r1=1308785&r2=1308786&view=diff
==============================================================================
--- incubator/any23/trunk/plugins/basic-crawler/pom.xml (original)
+++ incubator/any23/trunk/plugins/basic-crawler/pom.xml Tue Apr 3 09:40:03 2012
@@ -57,6 +57,13 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
+
+ <!-- The CLI interfaces -->
+ <dependency>
+ <groupId>com.beust</groupId>
+ <artifactId>jcommander</artifactId>
+ <scope>provided</scope>
+ </dependency>
</dependencies>
<build>
|