mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-02 07:05:24 +00:00
144 lines
5.1 KiB
Perl
Executable File
144 lines
5.1 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
|
|
# This script is designed to be used with the Mozilla Uninstall Survey
|
|
# It will connect to the database and pull results into a .csv for each product
|
|
# ("Mozilla Firefox 1.5", etc.). The .csv's are saved to the specified directory
|
|
# and, once created, updated incrementally (only new rows are pushed).
|
|
# There is no output if everything goes well (this will probably be a cron script)
|
|
#
|
|
# @author Wil Clouser <wclouser@mozilla.com>
|
|
|
|
|
|
# Fill in the correct values below
|
|
my $output_dir = '.';
|
|
|
|
my $dsn = 'dbi:mysql:survey:localhost:3306';
|
|
my $user = '';
|
|
my $pass = '';
|
|
|
|
my $csv_dsn = "dbi:CSV:f_dir=$output_dir;csv_eol=\n;";
|
|
|
|
# If this is true phone numbers and email addresses will be obscured (you probably
|
|
# want this)
|
|
my $privacy = 1;
|
|
|
|
|
|
# ---- End of configuration ----
|
|
# ------------------------------
|
|
|
|
use strict;
|
|
use DBI;
|
|
|
|
# Do some initial tests to make sure things don't break further on
|
|
if (! -d $output_dir) {
|
|
die "Destination is not a directory: $output_dir\n";
|
|
}
|
|
|
|
if (! -w $output_dir) {
|
|
die "Cannot write to directory: $output_dir\n";
|
|
}
|
|
|
|
my $dbh = DBI->connect($dsn, $user, $pass)
|
|
or die "Can't connect to the db: $DBI::errstr\n";
|
|
|
|
my $csvh = DBI->connect($csv_dsn)
|
|
or die "Can't connect to the db: $DBI::errstr\n";
|
|
|
|
|
|
# Setup some variables for use in the main loop
|
|
my $applications = get_current_applications();
|
|
|
|
my $results_query = $dbh->prepare("
|
|
SELECT
|
|
`results`.`id`,
|
|
`results`.`created`,
|
|
`choices`.`description` as `intention`,
|
|
`choices_results`.`other` as `intention_other`,
|
|
`results`.`comments`
|
|
FROM `results`
|
|
LEFT JOIN `choices_results` ON `results`.`id`=`choices_results`.`result_id`
|
|
INNER JOIN `choices` ON `choices_results`.`choice_id` = `choices`.`id`
|
|
INNER JOIN `applications` ON `applications`.`id` = `results`.`application_id`
|
|
WHERE
|
|
`applications`.`name` LIKE ?
|
|
AND
|
|
`applications`.`version` LIKE ?
|
|
AND
|
|
`results`.`id` > ?
|
|
AND
|
|
`choices`.`type` = 'intention'
|
|
ORDER BY
|
|
`results`.`created` ASC");
|
|
|
|
# table name is arbitrary
|
|
my $csv = $csvh->prepare("INSERT INTO csv VALUES(?,?,?,?,?)");
|
|
|
|
# Main Loop
|
|
foreach my $apps ( @$applications ) {
|
|
# Clean spaces from names. In the future if there are other strange
|
|
# characters, we'll probably want to replace them too.
|
|
my $application_name = $apps->{name};
|
|
$application_name =~ tr/ /_/;
|
|
my $application_version = $apps->{version};
|
|
$application_version =~ tr/ /_/;
|
|
|
|
my $filename = "export-$application_name"."_"."$application_version.csv";
|
|
|
|
# Used for incremental additions. Default to adding rows starting at zero
|
|
my $maxid = 0;
|
|
|
|
$csvh->{'csv_tables'}->{'csv'} = { 'file' => $filename, 'col_names' => ['id','created','intention','intention_other','comments']};
|
|
|
|
# If the file doesn't exist, this will create it.
|
|
if (! -f $filename) {
|
|
open CSVFILE, ">$output_dir/$filename" or
|
|
die "ERROR: Could not open file: $output_dir/$filename!";
|
|
close CSVFILE;
|
|
} else {
|
|
# Pull out the max ID for an incremental update
|
|
($maxid) = $csvh->selectrow_array("SELECT MAX(id) FROM csv");
|
|
|
|
# If the CSV is empty (but exists), this will be undefined. In this case, start from zero
|
|
$maxid = (defined $maxid) ? $maxid : 0;
|
|
}
|
|
|
|
$results_query->execute($apps->{name},$apps->{version}, $maxid);
|
|
|
|
# grab results from the db and send to the csv
|
|
while ( my @row = $results_query->fetchrow_array ) {
|
|
if ($privacy) {
|
|
# Pull out phone numbers and email addresses. We have to compile the
|
|
# right side of the substitution because of 'use strict;'
|
|
if ($row[3]) {
|
|
$row[3] =~ s/([0-9]{3})[ .-]?[0-9]{4}/(defined $1 ? $1 : '')."-****"/ge;
|
|
$row[3] =~ s/\ ?(.+)?@(.+)?[.,](.+)?\ ?/(defined $1 ? $1 : '')."@****.".(defined $3 ? $3 : '')/ge;
|
|
}
|
|
|
|
if ($row[4]) {
|
|
$row[4] =~ s/([0-9]{3})[ .-]?[0-9]{4}/(defined $1 ? $1 : '')."-****"/ge;
|
|
$row[4] =~ s/\ ?(.+)?@(.+)?[.,](.+)?\ ?/(defined $1 ? $1 : '')."@****.".(defined $3 ? $3 : '')/ge;
|
|
}
|
|
}
|
|
$csv->execute(@row);
|
|
}
|
|
}
|
|
|
|
# Finish Up
|
|
$csv->finish();
|
|
$results_query->finish();
|
|
$csvh->disconnect();
|
|
$dbh->disconnect();
|
|
|
|
# ---- Sub-routines ----
|
|
# ----------------------
|
|
|
|
sub get_current_applications {
|
|
# Pulling only visible rows is purely a speed consideration - feel free to remove it
|
|
return $dbh->selectall_arrayref("SELECT * FROM applications WHERE visible=1 ORDER BY id", { Slice => {} });
|
|
}
|
|
|
|
sub get_max_id {
|
|
# Pulling only visible rows is purely a speed consideration - feel free to remove it
|
|
return $dbh->selectall_arrayref("SELECT MAX(id) FROM applications WHERE visible=1 ORDER BY id", { Slice => {} });
|
|
}
|