Archive OnionPerf analysis .json files.

Implements #34072.
This commit is contained in:
Karsten Loesing 2020-04-30 17:29:35 +02:00
parent a87ce0d02f
commit 0f5536ed68
5 changed files with 195 additions and 39 deletions

View File

@ -1,5 +1,10 @@
# Changes in version 1.1?.? - 2020-0?-??
* Medium changes
- Update to metrics-lib 2.12.1.
- Download OnionPerf analysis .json files in addition to .tpf
files.
* Minor changes
- Simplify logging configuration.
- Set default locale `US` and default time zone `UTC` at the

View File

@ -12,7 +12,7 @@
<property name="release.version" value="1.14.1-dev" />
<property name="project-main-class" value="org.torproject.metrics.collector.Main" />
<property name="name" value="collector"/>
<property name="metricslibversion" value="2.10.0" />
<property name="metricslibversion" value="2.12.1" />
<property name="jarincludes" value="collector.properties logback.xml" />
<patternset id="runtime" >

View File

@ -13,6 +13,7 @@ import org.torproject.metrics.collector.conf.Key;
import org.torproject.metrics.collector.cron.CollecTorMain;
import org.torproject.metrics.collector.downloader.Downloader;
import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -32,14 +33,16 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.Stack;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** Download download .tpf files from OnionPerf hosts. */
/** Download OnionPerf files from OnionPerf hosts. */
public class OnionPerfDownloader extends CollecTorMain {
private static final Logger logger = LoggerFactory.getLogger(
@ -47,6 +50,8 @@ public class OnionPerfDownloader extends CollecTorMain {
private static final String TORPERF = "torperf";
private static final String ONIONPERF = "onionperf";
/** Instantiate the OnionPerf module using the given configuration. */
public OnionPerfDownloader(Configuration config) {
super(config);
@ -54,21 +59,25 @@ public class OnionPerfDownloader extends CollecTorMain {
}
/** File containing the download history, which is necessary, because
* OnionPerf does not delete older .tpf files, but which enables us to do
* so. */
* OnionPerf does not delete older files, but which enables us to do so. */
private File onionPerfDownloadedFile;
/** Full URLs of .tpf files downloaded in the current or in past
* executions. */
private SortedSet<String> downloadedTpfFiles = new TreeSet<>();
/** Full URLs of files downloaded in the current or in past executions. */
private SortedSet<String> downloadedFiles = new TreeSet<>();
/** Base URLs of configured OnionPerf hosts. */
private URL[] onionPerfHosts = null;
/** Directory for storing archived .tpf files. */
/** Relative URLs of available .tpf files by base URL. */
private Map<URL, List<String>> tpfFileUrls = new HashMap<>();
/** Relative URLs of available OnionPerf analysis files by base URL. */
private Map<URL, List<String>> onionPerfAnalysisFileUrls = new HashMap<>();
/** Directory for storing archived files. */
private File archiveDirectory = null;
/** Directory for storing recent .tpf files. */
/** Directory for storing recent files. */
private File recentDirectory = null;
@Override
@ -87,19 +96,17 @@ public class OnionPerfDownloader extends CollecTorMain {
new File(config.getPath(Key.StatsPath).toFile(),
"onionperf-downloaded");
this.onionPerfHosts = config.getUrlArray(Key.OnionPerfHosts);
this.readDownloadedOnionPerfTpfFiles();
this.archiveDirectory = new File(config.getPath(Key.OutputPath).toFile(),
TORPERF);
this.recentDirectory = new File(config.getPath(Key.RecentPath).toFile(),
TORPERF);
this.readDownloadedOnionPerfFiles();
this.archiveDirectory = config.getPath(Key.OutputPath).toFile();
this.recentDirectory = config.getPath(Key.RecentPath).toFile();
for (URL baseUrl : this.onionPerfHosts) {
this.downloadFromOnionPerfHost(baseUrl);
}
this.writeDownloadedOnionPerfTpfFiles();
this.writeDownloadedOnionPerfFiles();
this.cleanUpRsyncDirectory();
}
private void readDownloadedOnionPerfTpfFiles() {
private void readDownloadedOnionPerfFiles() {
if (!this.onionPerfDownloadedFile.exists()) {
return;
}
@ -107,47 +114,69 @@ public class OnionPerfDownloader extends CollecTorMain {
this.onionPerfDownloadedFile))) {
String line;
while ((line = br.readLine()) != null) {
this.downloadedTpfFiles.add(line);
this.downloadedFiles.add(line);
}
} catch (IOException e) {
logger.info("Unable to read download history file '{}'. Ignoring "
+ "download history and downloading all available .tpf files.",
+ "download history and downloading all available files.",
this.onionPerfDownloadedFile.getAbsolutePath());
this.downloadedTpfFiles.clear();
this.downloadedFiles.clear();
}
}
private void downloadFromOnionPerfHost(URL baseUrl) {
logger.info("Downloading from OnionPerf host {}", baseUrl);
List<String> tpfFileNames =
this.downloadOnionPerfDirectoryListing(baseUrl);
this.downloadOnionPerfDirectoryListing(baseUrl);
String source = baseUrl.getHost().split("\\.")[0];
for (String tpfFileName : tpfFileNames) {
this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
if (this.tpfFileUrls.containsKey(baseUrl)) {
for (String tpfFileName : this.tpfFileUrls.get(baseUrl)) {
this.downloadAndParseOnionPerfTpfFile(baseUrl, source, tpfFileName);
}
}
if (this.onionPerfAnalysisFileUrls.containsKey(baseUrl)) {
for (String onionPerfAnalysisFileName
: this.onionPerfAnalysisFileUrls.get(baseUrl)) {
this.downloadAndParseOnionPerfAnalysisFile(baseUrl, source,
onionPerfAnalysisFileName);
}
}
}
/** Pattern for links contained in directory listings. */
/** Patterns for links contained in directory listings. */
private static final Pattern TPF_FILE_URL_PATTERN =
Pattern.compile(".*<a href=\"([^\"]+\\.tpf)\">.*");
private List<String> downloadOnionPerfDirectoryListing(URL baseUrl) {
List<String> tpfFileUrls = new ArrayList<>();
private static final Pattern ONIONPERF_ANALYSIS_FILE_URL_PATTERN =
Pattern.compile(
".*<a href=\"([0-9-]{10}\\.onionperf\\.analysis\\.json\\.xz)\">.*");
private void downloadOnionPerfDirectoryListing(URL baseUrl) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(
baseUrl.openStream()))) {
String line;
while ((line = br.readLine()) != null) {
Matcher matcher = TPF_FILE_URL_PATTERN.matcher(line);
if (matcher.matches() && !matcher.group(1).startsWith("/")) {
tpfFileUrls.add(matcher.group(1));
Matcher tpfFileMatcher = TPF_FILE_URL_PATTERN.matcher(line);
if (tpfFileMatcher.matches()
&& !tpfFileMatcher.group(1).startsWith("/")) {
this.tpfFileUrls.putIfAbsent(baseUrl, new ArrayList<>());
this.tpfFileUrls.get(baseUrl).add(tpfFileMatcher.group(1));
}
Matcher onionPerfAnalysisFileMatcher
= ONIONPERF_ANALYSIS_FILE_URL_PATTERN.matcher(line);
if (onionPerfAnalysisFileMatcher.matches()
&& !onionPerfAnalysisFileMatcher.group(1).startsWith("/")) {
this.onionPerfAnalysisFileUrls.putIfAbsent(baseUrl,
new ArrayList<>());
this.onionPerfAnalysisFileUrls.get(baseUrl)
.add(onionPerfAnalysisFileMatcher.group(1));
}
}
} catch (IOException e) {
logger.warn("Unable to download directory listing from '{}'. Skipping "
+ "this OnionPerf host.", baseUrl);
tpfFileUrls.clear();
this.tpfFileUrls.remove(baseUrl);
this.onionPerfAnalysisFileUrls.remove(baseUrl);
}
return tpfFileUrls;
}
private static final DateFormat DATE_FORMAT;
@ -169,7 +198,7 @@ public class OnionPerfDownloader extends CollecTorMain {
}
/* Skip if we successfully downloaded this file before. */
if (this.downloadedTpfFiles.contains(tpfFileUrl.toString())) {
if (this.downloadedFiles.contains(tpfFileUrl.toString())) {
return;
}
@ -197,7 +226,8 @@ public class OnionPerfDownloader extends CollecTorMain {
}
/* Download file contents to temporary file. */
File tempFile = new File(this.recentDirectory, "." + tpfFileName);
File tempFile = new File(this.recentDirectory,
TORPERF + "/." + tpfFileName);
byte[] downloadedBytes;
try {
downloadedBytes = Downloader.downloadFromHttpServer(
@ -263,7 +293,7 @@ public class OnionPerfDownloader extends CollecTorMain {
/* Copy/move files in place. */
File archiveFile = new File(this.archiveDirectory,
date.replaceAll("-", "/") + "/" + tpfFileName);
TORPERF + "/" + date.replaceAll("-", "/") + "/" + tpfFileName);
archiveFile.getParentFile().mkdirs();
try {
Files.copy(tempFile.toPath(), archiveFile.toPath(),
@ -274,18 +304,132 @@ public class OnionPerfDownloader extends CollecTorMain {
tempFile.delete();
return;
}
File recentFile = new File(this.recentDirectory, tpfFileName);
File recentFile = new File(this.recentDirectory,
TORPERF + "/" + tpfFileName);
tempFile.renameTo(recentFile);
/* Add to download history to avoid downloading it again. */
this.downloadedTpfFiles.add(baseUrl + tpfFileName);
this.downloadedFiles.add(baseUrl + tpfFileName);
}
private void writeDownloadedOnionPerfTpfFiles() {
private void downloadAndParseOnionPerfAnalysisFile(URL baseUrl, String source,
String onionPerfAnalysisFileName) {
URL onionPerfAnalysisFileUrl;
try {
onionPerfAnalysisFileUrl = new URL(baseUrl, onionPerfAnalysisFileName);
} catch (MalformedURLException e1) {
logger.warn("Unable to put together base URL '{}' and file path '{}' to "
+ "a URL. Skipping.", baseUrl, onionPerfAnalysisFileName);
return;
}
/* Skip if we successfully downloaded this file before. */
if (this.downloadedFiles.contains(onionPerfAnalysisFileUrl.toString())) {
return;
}
/* Parse date from file name: yyyy-MM-dd.onionperf.analysis.json.xz */
String date;
try {
date = onionPerfAnalysisFileName.substring(0, 10);
DATE_FORMAT.parse(date);
} catch (NumberFormatException | ParseException e) {
logger.warn("Invalid file name '{}{}'. Skipping.", baseUrl,
onionPerfAnalysisFileName, e);
return;
}
/* Download file contents to temporary file. */
File tempFile = new File(this.recentDirectory,
ONIONPERF + "/." + onionPerfAnalysisFileName);
byte[] downloadedBytes;
try {
downloadedBytes = Downloader.downloadFromHttpServer(
new URL(baseUrl + onionPerfAnalysisFileName));
} catch (IOException e) {
logger.warn("Unable to download '{}{}'. Skipping.", baseUrl,
onionPerfAnalysisFileName, e);
return;
}
if (null == downloadedBytes) {
logger.warn("Unable to download '{}{}'. Skipping.", baseUrl,
onionPerfAnalysisFileName);
return;
}
tempFile.getParentFile().mkdirs();
try {
Files.write(tempFile.toPath(), downloadedBytes);
} catch (IOException e) {
logger.warn("Unable to write previously downloaded '{}{}' to temporary "
+ "file '{}'. Skipping.", baseUrl, onionPerfAnalysisFileName,
tempFile, e);
return;
}
/* Validate contained descriptors. */
DescriptorParser descriptorParser =
DescriptorSourceFactory.createDescriptorParser();
byte[] rawDescriptorBytes;
try {
rawDescriptorBytes = IOUtils.toByteArray(
Files.newInputStream(tempFile.toPath()));
} catch (IOException e) {
logger.warn("OnionPerf file '{}{}' could not be read. Skipping.", baseUrl,
onionPerfAnalysisFileName, e);
tempFile.delete();
return;
}
Iterable<Descriptor> descriptors = descriptorParser.parseDescriptors(
rawDescriptorBytes, null, onionPerfAnalysisFileName);
String message = null;
for (Descriptor descriptor : descriptors) {
if (!(descriptor instanceof TorperfResult)) {
message = "File contains descriptors other than an OnionPerf analysis "
+ "document: " + descriptor.getClass();
break;
}
TorperfResult torperf = (TorperfResult) descriptor;
if (!source.equals(torperf.getSource())) {
message = "File contains transfer from another source: "
+ torperf.getSource();
break;
}
}
if (null != message) {
logger.warn("OnionPerf file '{}{}' was found to be invalid: {}. "
+ "Skipping.", baseUrl, onionPerfAnalysisFileName, message);
tempFile.delete();
return;
}
/* Copy/move files in place. */
File archiveFile = new File(this.archiveDirectory,
ONIONPERF + "/" + date.replaceAll("-", "/") + "/" + date + "." + source
+ ".onionperf.analysis.json.xz");
archiveFile.getParentFile().mkdirs();
try {
Files.copy(tempFile.toPath(), archiveFile.toPath(),
StandardCopyOption.REPLACE_EXISTING);
} catch (IOException e) {
logger.warn("Unable to copy OnionPerf file {} to {}. Skipping.",
tempFile, archiveFile, e);
tempFile.delete();
return;
}
File recentFile = new File(this.recentDirectory,
ONIONPERF + "/" + date + "." + source + ".onionperf.analysis.json.xz");
tempFile.renameTo(recentFile);
/* Add to download history to avoid downloading it again. */
this.downloadedFiles.add(baseUrl + onionPerfAnalysisFileName);
}
private void writeDownloadedOnionPerfFiles() {
this.onionPerfDownloadedFile.getParentFile().mkdirs();
try (BufferedWriter bw = new BufferedWriter(new FileWriter(
this.onionPerfDownloadedFile))) {
for (String line : this.downloadedTpfFiles) {
for (String line : this.downloadedFiles) {
bw.write(line);
bw.newLine();
}

View File

@ -175,7 +175,7 @@ ExitlistUrl = https://check.torproject.org/exit-addresses
######## OnionPerf downloader ########
#
## Define descriptor sources
# possible values: Remote,Sync
# possible values: Remote,Sync (.tpf files only!)
OnionPerfSources = Remote
# Retrieve files from the following CollecTor instances.
# List of URLs separated by comma.

View File

@ -40,6 +40,8 @@ TARBALLS=(
exit-list-$YEARTWO-$MONTHTWO
torperf-$YEARONE-$MONTHONE
torperf-$YEARTWO-$MONTHTWO
onionperf-$YEARONE-$MONTHONE
onionperf-$YEARTWO-$MONTHTWO
certs
microdescs-$YEARONE-$MONTHONE
microdescs-$YEARTWO-$MONTHTWO
@ -73,6 +75,8 @@ DIRECTORIES=(
$OUTDIR/exit-lists/$YEARTWO/$MONTHTWO/
$OUTDIR/torperf/$YEARONE/$MONTHONE/
$OUTDIR/torperf/$YEARTWO/$MONTHTWO/
$OUTDIR/onionperf/$YEARONE/$MONTHONE/
$OUTDIR/onionperf/$YEARTWO/$MONTHTWO/
$OUTDIR/relay-descriptors/certs/
$OUTDIR/relay-descriptors/microdesc/$YEARONE/$MONTHONE
$OUTDIR/relay-descriptors/microdesc/$YEARTWO/$MONTHTWO
@ -178,6 +182,9 @@ ln -f -s -t $ARCHIVEDIR/relay-descriptors/bandwidths/ $TARBALLTARGETDIR/bandwidt
mkdir -p $ARCHIVEDIR/torperf/
ln -f -s -t $ARCHIVEDIR/torperf/ $TARBALLTARGETDIR/torperf-20??-??.tar.xz
mkdir -p $ARCHIVEDIR/onionperf/
ln -f -s -t $ARCHIVEDIR/onionperf/ $TARBALLTARGETDIR/onionperf-20??-??.tar.xz
mkdir -p $ARCHIVEDIR/webstats/
ln -f -s -t $ARCHIVEDIR/webstats/ $TARBALLTARGETDIR/webstats-20??-??.tar