flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mxm <...@git.apache.org>
Subject [GitHub] flink pull request #2618: Refactoring the Continuous File Monitoring Functio...
Date Wed, 12 Oct 2016 14:54:10 GMT
Github user mxm commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2618#discussion_r83016929
  
    --- Diff: flink-fs-tests/src/test/java/org/apache/flink/hdfstests/ContinuousFileProcessingTests.java
---
    @@ -336,237 +348,294 @@ public int compare(String o1, String o2) {
     			Assert.assertEquals(expectedFileContents.get(fileIdx), cntntStr.toString());
     		}
     
    -		for(org.apache.hadoop.fs.Path file: filesCreated) {
    +		for (org.apache.hadoop.fs.Path file: filesCreated) {
     			hdfs.delete(file, false);
     		}
     	}
     
    -	private static class PathFilter extends FilePathFilter {
    -
    -		@Override
    -		public boolean filterPath(Path filePath) {
    -			return filePath.getName().startsWith("**");
    -		}
    -	}
    +	////				Monitoring Function Tests				//////
     
     	@Test
     	public void testFilePathFiltering() throws Exception {
    -		Set<String> uniqFilesFound = new HashSet<>();
     		Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    +		Set<String> filesKept = new TreeSet<>();
     
     		// create the files to be discarded
     		for (int i = 0; i < NO_OF_FILES; i++) {
    -			Tuple2<org.apache.hadoop.fs.Path, String> file = fillWithData(hdfsURI, "**file",
i, "This is test line.");
    +			Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(hdfsURI,
"**file", i, "This is test line.");
     			filesCreated.add(file.f0);
     		}
     
     		// create the files to be kept
     		for (int i = 0; i < NO_OF_FILES; i++) {
    -			Tuple2<org.apache.hadoop.fs.Path, String> file = fillWithData(hdfsURI, "file",
i, "This is test line.");
    +			Tuple2<org.apache.hadoop.fs.Path, String> file =
    +				createFileAndFillWithData(hdfsURI, "file", i, "This is test line.");
     			filesCreated.add(file.f0);
    +			filesKept.add(file.f0.getName());
     		}
     
     		TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
     		format.setFilesFilter(new PathFilter());
    +
     		ContinuousFileMonitoringFunction<String> monitoringFunction =
     			new ContinuousFileMonitoringFunction<>(format, hdfsURI,
     				FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
     
    +		final FileVerifyingSourceContext context =
    +			new FileVerifyingSourceContext(new OneShotLatch(), monitoringFunction, 0, -1);
    +
     		monitoringFunction.open(new Configuration());
    -		monitoringFunction.run(new TestingSourceContext(monitoringFunction, uniqFilesFound));
    +		monitoringFunction.run(context);
     
    -		Assert.assertEquals(NO_OF_FILES, uniqFilesFound.size());
    -		for(int i = 0; i < NO_OF_FILES; i++) {
    -			org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" +
i);
    -			Assert.assertTrue(uniqFilesFound.contains(file.toString()));
    -		}
    +		Assert.assertArrayEquals(filesKept.toArray(), context.getSeenFiles().toArray());
     
    -		for(org.apache.hadoop.fs.Path file: filesCreated) {
    +		// finally delete the files created for the test.
    +		for (org.apache.hadoop.fs.Path file: filesCreated) {
     			hdfs.delete(file, false);
     		}
     	}
     
    +	private static class PathFilter extends FilePathFilter {
    +		@Override
    +		public boolean filterPath(Path filePath) {
    +			return filePath.getName().startsWith("**");
    +		}
    +	}
    +
     	@Test
    -	public void testFileSplitMonitoringReprocessWithAppended() throws Exception {
    -		final Set<String> uniqFilesFound = new HashSet<>();
    +	public void testSortingOnModTime() throws Exception {
    +		final long[] modTimes = new long[NO_OF_FILES];
    +		final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
    +
    +		// create some files
    +		for (int i = 0; i < NO_OF_FILES; i++) {
    +			Tuple2<org.apache.hadoop.fs.Path, String> file =
    +				createFileAndFillWithData(hdfsURI, "file", i, "This is test line.");
    +			Thread.sleep(10);
    +
    +			filesCreated[i] = file.f0;
    +			modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
    +		}
    +
    +		TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
    +		format.setFilesFilter(FilePathFilter.createDefaultFilter());
     
    -		FileCreator fc = new FileCreator(INTERVAL, NO_OF_FILES);
    -		fc.start();
    +		// this is just to verify that all splits have been forwarded later.
    +		FileInputSplit[] splits = format.createInputSplits(1);
     
    -		Thread t = new Thread(new Runnable() {
    +		ContinuousFileMonitoringFunction<String> monitoringFunction =
    +			new ContinuousFileMonitoringFunction<>(format, hdfsURI,
    +				FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
    +
    +		ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);
    +
    +		monitoringFunction.open(new Configuration());
    +		monitoringFunction.run(context);
    +		Assert.assertEquals(splits.length, context.getCounter());
    +
    +		// delete the created files.
    +		for (int i = 0; i < NO_OF_FILES; i++) {
    +			hdfs.delete(filesCreated[i], false);
    +		}
    +	}
    +
    +	@Test
    +	public void testProcessOnce() throws Exception {
    +		final OneShotLatch latch = new OneShotLatch();
    +
    +		// create a single file in the directory
    +		Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
    +			createFileAndFillWithData(hdfsURI, "file", NO_OF_FILES + 1, "This is test line.");
    +		Assert.assertTrue(hdfs.exists(bootstrap.f0));
    +
    +		// the source is supposed to read only this file.
    +		final Set<String> filesToBeRead = new TreeSet<>();
    +		filesToBeRead.add(bootstrap.f0.getName());
    +
    +		TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
    +		format.setFilesFilter(FilePathFilter.createDefaultFilter());
    +
    +		final ContinuousFileMonitoringFunction<String> monitoringFunction =
    +			new ContinuousFileMonitoringFunction<>(format, hdfsURI,
    +				FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
    +
    +		final FileVerifyingSourceContext context =
    +			new FileVerifyingSourceContext(latch, monitoringFunction, 1, -1);
    +
    +		final Thread t = new Thread() {
     			@Override
     			public void run() {
    -				TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
    -				format.setFilesFilter(FilePathFilter.createDefaultFilter());
    -				ContinuousFileMonitoringFunction<String> monitoringFunction =
    -					new ContinuousFileMonitoringFunction<>(format, hdfsURI,
    -						FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);
    -
     				try {
     					monitoringFunction.open(new Configuration());
    -					monitoringFunction.run(new TestingSourceContext(monitoringFunction, uniqFilesFound));
    +					monitoringFunction.run(context);
     				} catch (Exception e) {
    -					// do nothing as we interrupted the thread.
    +					Assert.fail(e.getMessage());
     				}
     			}
    -		});
    +		};
     		t.start();
     
    -		// wait until the sink also sees all the splits.
    -		synchronized (uniqFilesFound) {
    -			uniqFilesFound.wait();
    +		if (!latch.isTriggered()) {
    +			latch.await();
     		}
    -		t.interrupt();
    -		fc.join();
     
    -		Assert.assertEquals(NO_OF_FILES, fc.getFilesCreated().size());
    -		Assert.assertEquals(NO_OF_FILES, uniqFilesFound.size());
    -
    -		Set<org.apache.hadoop.fs.Path> filesCreated = fc.getFilesCreated();
    -		Set<String> fileNamesCreated = new HashSet<>();
    -		for (org.apache.hadoop.fs.Path path: fc.getFilesCreated()) {
    -			fileNamesCreated.add(path.toString());
    +		// create some additional files that would be processed in the case of PROCESS_CONTINUOUSLY
    +		final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
    +		for (int i = 0; i < NO_OF_FILES; i++) {
    +			Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile =
    +				createFileAndFillWithData(hdfsURI, "file", i, "This is test line.");
    +			filesCreated[i] = ignoredFile.f0;
     		}
     
    -		for(String file: uniqFilesFound) {
    -			Assert.assertTrue(fileNamesCreated.contains(file));
    -		}
    +		// wait until the monitoring thread exits
    +		t.join();
     
    -		for(org.apache.hadoop.fs.Path file: filesCreated) {
    -			hdfs.delete(file, false);
    +		Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());
    +
    +		// finally delete the files created for the test.
    +		hdfs.delete(bootstrap.f0, false);
    +		for (org.apache.hadoop.fs.Path path: filesCreated) {
    +			hdfs.delete(path, false);
     		}
     	}
     
     	@Test
    -	public void testFileSplitMonitoringProcessOnce() throws Exception {
    -		Set<String> uniqFilesFound = new HashSet<>();
    -
    -		FileCreator fc = new FileCreator(INTERVAL, 1);
    -		Set<org.apache.hadoop.fs.Path> filesCreated = fc.getFilesCreated();
    -		fc.start();
    -
    -		// to make sure that at least one file is created
    -		if (filesCreated.size() == 0) {
    -			synchronized (filesCreated) {
    -				if (filesCreated.size() == 0) {
    -					filesCreated.wait();
    -				}
    -			}
    -		}
    -		Assert.assertTrue(fc.getFilesCreated().size() >= 1);
    +	public void testProcessContinuously() throws Exception {
    +		final OneShotLatch latch = new OneShotLatch();
    +
    +		// create a single file in the directory
    +		Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
    +			createFileAndFillWithData(hdfsURI, "file", NO_OF_FILES + 1, "This is test line.");
    +		Assert.assertTrue(hdfs.exists(bootstrap.f0));
    +
    +		// the source is supposed to read only this file.
    --- End diff --
    
    I think the source is supposed to read more files in this test case :)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message