mirror of
https://github.com/torproject/collector.git
synced 2025-02-20 17:42:14 +00:00
Start parsing bridge snapshot tarballs.
This commit is contained in:
parent
a3be87b103
commit
cdcdcf02de
@ -10,7 +10,7 @@ public class ArchiveReader {
|
||||
ExtraInfoParser eip, String archivesDir, Set<String> directoryKeys)
|
||||
throws IOException {
|
||||
System.out.print("Parsing all files in directory " + archivesDir
|
||||
+ "/...");
|
||||
+ "/... ");
|
||||
Stack<File> filesInInputDir = new Stack<File>();
|
||||
filesInInputDir.add(new File(archivesDir));
|
||||
while (!filesInInputDir.isEmpty()) {
|
||||
|
66
src/BridgeSnapshotReader.java
Normal file
66
src/BridgeSnapshotReader.java
Normal file
@ -0,0 +1,66 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import org.apache.commons.compress.compressors.gzip.*;
|
||||
import org.apache.commons.compress.archivers.tar.*;
|
||||
|
||||
/**
|
||||
* Reads the half-hourly snapshots of bridge descriptors from Tonga.
|
||||
*/
|
||||
public class BridgeSnapshotReader {
|
||||
public BridgeSnapshotReader(BridgeStatsFileHandler bsfh,
|
||||
String bridgeDirectoriesDir, String parsedBridgeDirectories,
|
||||
Set<String> countries) throws IOException {
|
||||
SortedSet<String> parsed = new TreeSet<String>();
|
||||
File pbdFile = new File(parsedBridgeDirectories);
|
||||
if (pbdFile.exists()) {
|
||||
System.out.print("Reading existing file " + parsedBridgeDirectories
|
||||
+ "... ");
|
||||
BufferedReader br = new BufferedReader(new FileReader(pbdFile));
|
||||
String line = null;
|
||||
while ((line = br.readLine()) != null) {
|
||||
parsed.add(line);
|
||||
}
|
||||
br.close();
|
||||
System.out.println("done");
|
||||
}
|
||||
File bdDir = new File(bridgeDirectoriesDir);
|
||||
if (bdDir.exists()) {
|
||||
System.out.print("Parsing all files in directory "
|
||||
+ bridgeDirectoriesDir + "/... ");
|
||||
Stack<File> filesInInputDir = new Stack<File>();
|
||||
filesInInputDir.add(bdDir);
|
||||
while (!filesInInputDir.isEmpty()) {
|
||||
File pop = filesInInputDir.pop();
|
||||
if (pop.isDirectory()) {
|
||||
for (File f : pop.listFiles()) {
|
||||
filesInInputDir.add(f);
|
||||
}
|
||||
} else if (!parsed.contains(pop.getName())) {
|
||||
FileInputStream in = new FileInputStream(pop);
|
||||
GzipCompressorInputStream gcis =
|
||||
new GzipCompressorInputStream(in);
|
||||
TarArchiveInputStream tais = new TarArchiveInputStream(gcis);
|
||||
InputStreamReader isr = new InputStreamReader(tais);
|
||||
BufferedReader br = new BufferedReader(isr);
|
||||
TarArchiveEntry en = null;
|
||||
String line = null;
|
||||
while ((en = tais.getNextTarEntry()) != null) {
|
||||
while ((line = br.readLine()) != null) {
|
||||
; // TODO do all the hard work
|
||||
}
|
||||
}
|
||||
br.close();
|
||||
parsed.add(pop.getName());
|
||||
}
|
||||
}
|
||||
System.out.print("done\nWriting file " + pbdFile + "... ");
|
||||
BufferedWriter bw = new BufferedWriter(new FileWriter(pbdFile));
|
||||
for (String f : parsed) {
|
||||
bw.append(f + "\n");
|
||||
}
|
||||
bw.close();
|
||||
System.out.println("done");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -61,6 +61,9 @@ public class Main {
|
||||
directories.keySet());
|
||||
SanitizedBridgesReader sbr = new SanitizedBridgesReader(csfh, bsfh,
|
||||
"bridges", countries);
|
||||
BridgeSnapshotReader bsr = new BridgeSnapshotReader(bsfh,
|
||||
"bridge-directories", "stats/parsed-bridge-directories",
|
||||
countries);
|
||||
}
|
||||
|
||||
// Download current descriptors
|
||||
|
@ -8,7 +8,7 @@ public class SanitizedBridgesReader {
|
||||
BridgeStatsFileHandler bsfh, String bridgesDir,
|
||||
SortedSet<String> countries) throws IOException, ParseException {
|
||||
System.out.print("Parsing all files in directory " + bridgesDir
|
||||
+ "/...");
|
||||
+ "/... ");
|
||||
SimpleDateFormat timeFormat = new SimpleDateFormat(
|
||||
"yyyy-MM-dd HH:mm:ss");
|
||||
timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
|
Loading…
x
Reference in New Issue
Block a user