Remove GetTor statistics processing code.

This commit is contained in:
Karsten Loesing 2012-08-07 12:33:09 +02:00
parent f0c1411185
commit 27869c4922
10 changed files with 1 additions and 288 deletions

25
README
View File

@ -20,8 +20,7 @@ or only the database and the graphing engine, if desired.
The metrics database contains data about the Tor Network coming from
different sources, including the Tor directory authorities, Torperf
performance measurement installations, the GetTor software package
delivery service, and others.
performance measurement installations, and others.
1.1. Preparing the operating system
@ -289,28 +288,6 @@ Run the database import:
$ ./run.sh
1.10. Importing GetTor statistics
=================================
WARNING: The GetTor statistics are not available for download yet, so that
this section only applies to the official metrics website.
GetTor is a software distribution service that allows users to fetch the
Tor software via email. GetTor produces daily statistics of requested
packages that can be imported into the metrics database.
Put the GetTor statistics file into /srv/metrics-web/gettor/ .
Edit /srv/metrics-web/config to contain the following options:
ProcessGetTorStats 1
GetTorDirectory gettor/
Run the database import:
$ ./run.sh
2. Installing the graphing engine
=================================

View File

@ -45,12 +45,6 @@
## Relative path to directory to import torperf results from
#TorperfDirectory in/torperf/
#
## Process GetTor stats and import them into the database
#ProcessGetTorStats 0
#
## Relative path to directory where to find GetTor stats
#GetTorDirectory in/gettor/
#
## JDBC string for ExoneraTor database
#ExoneraTorDatabaseJdbc jdbc:postgresql://localhost/exonerator?user=metrics&password=password
#

View File

@ -899,15 +899,6 @@ CREATE TABLE torperf_stats (
CONSTRAINT torperf_stats_pkey PRIMARY KEY("date", source)
);
-- TABLE gettor_stats
-- Packages requested from GetTor
CREATE TABLE gettor_stats (
"date" DATE NOT NULL,
bundle CHARACTER VARYING(64) NOT NULL,
downloads INTEGER NOT NULL,
CONSTRAINT gettor_stats_pkey PRIMARY KEY("date", bundle)
);
-- Refresh all statistics in the database.
CREATE OR REPLACE FUNCTION refresh_all() RETURNS INTEGER AS $$
BEGIN

View File

@ -182,41 +182,6 @@ export_bridge_users <- function(path) {
write.csv(bridgeusers, path, quote = FALSE, row.names = FALSE)
}
export_gettor <- function(path) {
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
q <- paste("SELECT date, bundle, downloads FROM gettor_stats",
"WHERE date < current_date - 1")
rs <- dbSendQuery(con, q)
downloads <- fetch(rs, n = -1)
dbDisconnect(con)
dbUnloadDriver(drv)
downloads_total <- downloads[downloads$bundle != "none", ]
downloads_total <- aggregate(downloads_total$downloads,
by = list(date = downloads_total$date), sum)
downloads_en <- downloads[grep("*_en", downloads$bundle), ]
downloads_en <- aggregate(downloads_en$downloads,
by = list(date = downloads_en$date), sum)
downloads_zh_cn <- downloads[grep("*_zh_cn", downloads$bundle), ]
downloads_zh_cn <- aggregate(downloads_zh_cn$downloads,
by = list(date = downloads_zh_cn$date), sum)
downloads_fa <- downloads[grep("*_fa", downloads$bundle), ]
downloads_fa <- aggregate(downloads_fa$downloads,
by = list(date = downloads_fa$date), sum)
downloads <- rbind(
data.frame(date = downloads_total$date,
bundle = "total", downloads = downloads_total$x),
data.frame(date = downloads_en$date,
bundle = "en", downloads = downloads_en$x),
data.frame(date = downloads_zh_cn$date,
bundle = "zh_cn", downloads = downloads_zh_cn$x),
data.frame(date = downloads_fa$date,
bundle = "fa", downloads = downloads_fa$x))
downloads <- cast(downloads, date ~ bundle, value = "downloads")
downloads <- downloads[order(downloads$date), ]
write.csv(downloads, path, quote = FALSE, row.names = FALSE)
}
export_torperf <- function(path) {
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)

View File

@ -781,45 +781,6 @@ plot_bridge_users <- function(start, end, country, path, dpi) {
ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
}
plot_gettor <- function(start, end, language, path, dpi) {
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)
condition <- ifelse(language == "all", "<> 'none'",
paste("LIKE '%_", tolower(language), "'", sep = ""))
q <- paste("SELECT date, SUM(downloads) AS downloads ",
"FROM gettor_stats WHERE bundle ", condition, " AND date >= '",
start, "' AND date <= '", end,
"' AND date < current_date - 1 GROUP BY date", sep = "")
rs <- dbSendQuery(con, q)
downloads <- fetch(rs, n = -1)
dbDisconnect(con)
dbUnloadDriver(drv)
dates <- seq(from = as.Date(start, "%Y-%m-%d"),
to = as.Date(end, "%Y-%m-%d"), by="1 day")
missing <- setdiff(dates, downloads$date)
if (length(missing) > 0)
downloads <- rbind(downloads,
data.frame(date = as.Date(missing, origin = "1970-01-01"),
downloads = NA))
title <- ifelse(language == "all",
"Total packages requested from GetTor per day\n",
paste(languagename(language), " (", language,
") packages requested from GetTor per day\n", sep = ""))
date_breaks <- date_breaks(
as.numeric(max(as.Date(downloads$date, "%Y-%m-%d")) -
min(as.Date(downloads$date, "%Y-%m-%d"))))
ggplot(downloads, aes(x = as.Date(date, "%Y-%m-%d"), y = downloads)) +
geom_line(size = 1) +
scale_x_date(name = paste("\nThe Tor Project - ",
"https://metrics.torproject.org/", sep = ""),
format = date_breaks$format, major = date_breaks$major,
minor = date_breaks$minor) +
scale_y_continuous(name = "", limits = c(0, max(downloads$downloads,
na.rm = TRUE))) +
opts(title = title)
ggsave(filename = path, width = 8, height = 5, dpi = as.numeric(dpi))
}
plot_torperf <- function(start, end, source, filesize, path, dpi) {
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, user = dbuser, password = dbpassword, dbname = db)

View File

@ -30,8 +30,6 @@ public class Configuration {
private boolean writeBridgeStats = false;
private boolean importWriteTorperfStats = false;
private String torperfDirectory = "in/torperf/";
private boolean processGetTorStats = false;
private String getTorDirectory = "in/gettor/";
private String exoneraTorDatabaseJdbc = "jdbc:postgresql:"
+ "//localhost/exonerator?user=metrics&password=password";
private String exoneraTorImportDirectory = "exonerator-import/";
@ -87,11 +85,6 @@ public class Configuration {
line.split(" ")[1]) != 0;
} else if (line.startsWith("TorperfDirectory")) {
this.torperfDirectory = line.split(" ")[1];
} else if (line.startsWith("ProcessGetTorStats")) {
this.processGetTorStats = Integer.parseInt(
line.split(" ")[1]) != 0;
} else if (line.startsWith("GetTorDirectory")) {
this.getTorDirectory = line.split(" ")[1];
} else if (line.startsWith("ExoneraTorDatabaseJdbc")) {
this.exoneraTorDatabaseJdbc = line.split(" ")[1];
} else if (line.startsWith("ExoneraTorImportDirectory")) {
@ -160,12 +153,6 @@ public class Configuration {
public String getTorperfDirectory() {
return this.torperfDirectory;
}
public boolean getProcessGetTorStats() {
return this.processGetTorStats;
}
public String getGetTorDirectory() {
return this.getTorDirectory;
}
public String getExoneraTorDatabaseJdbc() {
return this.exoneraTorDatabaseJdbc;
}

View File

@ -1,152 +0,0 @@
/* Copyright 2011, 2012 The Tor Project
* See LICENSE for licensing information */
package org.torproject.ernie.cron;
import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.torproject.descriptor.Descriptor;
import org.torproject.descriptor.DescriptorFile;
import org.torproject.descriptor.DescriptorReader;
import org.torproject.descriptor.DescriptorSourceFactory;
import org.torproject.descriptor.GetTorStatistics;
public class GetTorProcessor {
public GetTorProcessor(File getTorDirectory, String connectionURL) {
Logger logger = Logger.getLogger(GetTorProcessor.class.getName());
/* Parse stats file. */
File getTorFile = new File(getTorDirectory, "gettor_stats.txt");
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
if (!getTorFile.exists() || getTorFile.isDirectory()) {
logger.warning("Could not read GetTor stats");
return;
}
SortedSet<String> columns = new TreeSet<String>();
SortedMap<String, Map<String, Integer>> data =
new TreeMap<String, Map<String, Integer>>();
logger.fine("Importing GetTor stats files in directory "
+ getTorDirectory + "/...");
DescriptorReader reader =
DescriptorSourceFactory.createDescriptorReader();
reader.addDirectory(getTorDirectory);
Iterator<DescriptorFile> descriptorFiles = reader.readDescriptors();
while (descriptorFiles.hasNext()) {
DescriptorFile descriptorFile = descriptorFiles.next();
if (descriptorFile.getException() != null) {
logger.log(Level.WARNING, "Could not parse descriptor file '"
+ descriptorFile.getFileName() + "'. Skipping.",
descriptorFile.getException());
continue;
}
if (descriptorFile.getDescriptors() != null) {
for (Descriptor descriptor : descriptorFile.getDescriptors()) {
if (!(descriptor instanceof GetTorStatistics)) {
continue;
}
GetTorStatistics stats = (GetTorStatistics) descriptor;
String date = dateFormat.format(stats.getDateMillis());
Map<String, Integer> obs = new HashMap<String, Integer>();
for (Map.Entry<String, Integer> e :
stats.getDownloadedPackages().entrySet()) {
columns.add(e.getKey().toLowerCase());
obs.put(e.getKey().toLowerCase(), e.getValue());
}
data.put(date, obs);
}
}
}
/* Write results to database. */
if (connectionURL != null) {
try {
Map<String, Integer> updateRows = new HashMap<String, Integer>(),
insertRows = new HashMap<String, Integer>();
for (Map.Entry<String, Map<String, Integer>> e :
data.entrySet()) {
String date = e.getKey();
Map<String, Integer> obs = e.getValue();
for (String column : columns) {
if (obs.containsKey(column)) {
Integer value = obs.get(column);
String key = date + "," + column;
insertRows.put(key, value);
}
}
}
Connection conn = DriverManager.getConnection(connectionURL);
PreparedStatement psI = conn.prepareStatement(
"INSERT INTO gettor_stats (downloads, date, bundle) "
+ "VALUES (?, ?, ?)");
PreparedStatement psU = conn.prepareStatement(
"UPDATE gettor_stats SET downloads = ? "
+ "WHERE date = ? AND bundle = ?");
conn.setAutoCommit(false);
Statement statement = conn.createStatement();
ResultSet rs = statement.executeQuery(
"SELECT date, bundle, downloads FROM gettor_stats");
while (rs.next()) {
String date = rs.getDate(1).toString();
String bundle = rs.getString(2);
String key = date + "," + bundle;
if (insertRows.containsKey(key)) {
int insertRow = insertRows.remove(key);
int oldCount = rs.getInt(3);
if (insertRow != oldCount) {
updateRows.put(key, insertRow);
}
}
}
for (Map.Entry<String, Integer> e : updateRows.entrySet()) {
String[] keyParts = e.getKey().split(",");
java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
String bundle = keyParts[1];
int downloads = e.getValue();
psU.clearParameters();
psU.setLong(1, downloads);
psU.setDate(2, date);
psU.setString(3, bundle);
psU.executeUpdate();
}
for (Map.Entry<String, Integer> e : insertRows.entrySet()) {
String[] keyParts = e.getKey().split(",");
java.sql.Date date = java.sql.Date.valueOf(keyParts[0]);
String bundle = keyParts[1];
int downloads = e.getValue();
psI.clearParameters();
psI.setLong(1, downloads);
psI.setDate(2, date);
psI.setString(3, bundle);
psI.executeUpdate();
}
conn.commit();
conn.close();
} catch (SQLException e) {
logger.log(Level.WARNING, "Failed to add GetTor stats to "
+ "database.", e);
}
}
logger.info("Finished processing statistics on Tor packages "
+ "delivered by GetTor.\nLast date in statistics is "
+ (data.isEmpty() ? "(null)" : data.lastKey()) + ".");
}
}

View File

@ -84,13 +84,6 @@ public class Main {
statsDirectory, config.getRelayDescriptorDatabaseJDBC());
}
// Download and process GetTor stats
if (config.getProcessGetTorStats()) {
new GetTorProcessor(
new File(config.getGetTorDirectory()),
config.getRelayDescriptorDatabaseJDBC());
}
// Remove lock file
lf.releaseLock();

View File

@ -68,7 +68,6 @@ public class GraphDataServlet extends HttpServlet {
"direct-users");
this.availableGraphDataFiles.put("bridge-users-by-country",
"bridge-users");
this.availableGraphDataFiles.put("gettor", "gettor");
this.availableGraphDataFiles.put("torperf", "torperf");
/* Initialize map of graphs with specific variable columns. */

View File

@ -61,7 +61,6 @@ public class RObjectGenerator implements ServletContextListener {
this.availableCsvFiles.add("direct-users");
this.availableCsvFiles.add("dirreq-stats");
this.availableCsvFiles.add("dirbytes");
this.availableCsvFiles.add("gettor");
this.availableCsvFiles.add("monthly-users-average");
this.availableCsvFiles.add("monthly-users-peak");
this.availableCsvFiles.add("networksize");
@ -95,7 +94,6 @@ public class RObjectGenerator implements ServletContextListener {
"start,end,country,events,filename,nocutoff,dpi");
this.availableGraphs.put("bridge-users",
"start,end,country,filename,dpi");
this.availableGraphs.put("gettor", "start,end,language,filename,dpi");
this.availableGraphs.put("torperf",
"start,end,source,filesize,filename,dpi");
this.availableGraphs.put("torperf-failures",