mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-01 22:55:23 +00:00
Bug 304550: Bugzilla should always store data in MySQL as UTF-8
Patch By Max Kanat-Alexander <mkanat@bugzilla.org> r=LpSolit, a=justdave
This commit is contained in:
parent
ac35462909
commit
f5aba59a0f
@ -48,7 +48,7 @@ use base qw(Exporter);
|
||||
check_opsys check_shadowdb check_urlbase check_webdotbase
|
||||
check_netmask check_user_verify_class check_image_converter
|
||||
check_languages check_mail_delivery_method check_notification
|
||||
check_timezone
|
||||
check_timezone check_utf8
|
||||
);
|
||||
|
||||
# Checking functions for the various values
|
||||
@ -114,6 +114,18 @@ sub check_sslbase {
|
||||
return "";
|
||||
}
|
||||
|
||||
sub check_utf8 {
|
||||
my $utf8 = shift;
|
||||
# You cannot turn off the UTF-8 parameter if you've already converted
|
||||
# your tables to utf-8.
|
||||
my $dbh = Bugzilla->dbh;
|
||||
if ($dbh->isa('Bugzilla::DB::Mysql') && $dbh->bz_db_is_utf8 && !$utf8) {
|
||||
return "You cannot disable UTF-8 support, because your MySQL database"
|
||||
. " is encoded in UTF-8";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
sub check_priority {
|
||||
my ($value) = (@_);
|
||||
my $legal_priorities = get_legal_field_values('priority');
|
||||
|
@ -98,6 +98,7 @@ sub get_param_list {
|
||||
name => 'utf8',
|
||||
type => 'b',
|
||||
default => '0',
|
||||
checker => \&check_utf8
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -338,7 +338,7 @@ use constant ERROR_MODE_DIE_SOAP_FAULT => 2;
|
||||
|
||||
# Data about what we require for different databases.
|
||||
use constant DB_MODULE => {
|
||||
'mysql' => {db => 'Bugzilla::DB::Mysql', db_version => '4.0.14',
|
||||
'mysql' => {db => 'Bugzilla::DB::Mysql', db_version => '4.1.2',
|
||||
dbd => 'DBD::mysql', dbd_version => '2.9003',
|
||||
name => 'MySQL'},
|
||||
'pg' => {db => 'Bugzilla::DB::Pg', db_version => '8.00.0000',
|
||||
|
@ -130,6 +130,10 @@ sub bz_check_requirements {
|
||||
. bz_locations()->{'localconfig'};
|
||||
}
|
||||
|
||||
die("It is not safe to run Bugzilla inside the 'mysql' database.\n"
|
||||
. "Please pick a different value for \$db_name in localconfig.")
|
||||
if $lc->{db_name} eq 'mysql';
|
||||
|
||||
# Check the existence and version of the DBD that we need.
|
||||
my $dbd = $db->{dbd};
|
||||
my $dbd_ver = $db->{dbd_version};
|
||||
@ -196,8 +200,14 @@ sub bz_create_database {
|
||||
print "Creating database $db_name...\n";
|
||||
|
||||
# Try to create the DB, and if we fail print a friendly error.
|
||||
if (!eval { $dbh->do("CREATE DATABASE $db_name") }) {
|
||||
my $error = $dbh->errstr;
|
||||
my $success = eval {
|
||||
my @sql = $dbh->_bz_schema->get_create_database_sql($db_name);
|
||||
# This ends with 1 because this particular do doesn't always
|
||||
# return something.
|
||||
$dbh->do($_) foreach @sql; 1;
|
||||
};
|
||||
if (!$success) {
|
||||
my $error = $dbh->errstr || $@;
|
||||
chomp($error);
|
||||
print STDERR "The '$db_name' database could not be created.",
|
||||
" The error returned was:\n\n $error\n\n",
|
||||
@ -221,7 +231,7 @@ sub _get_no_db_connection {
|
||||
if (!$conn_success) {
|
||||
my $sql_server = DB_MODULE->{lc($lc->{db_driver})}->{name};
|
||||
# Can't use $dbh->errstr because $dbh is undef.
|
||||
my $error = $DBI::errstr;
|
||||
my $error = $DBI::errstr || $@;
|
||||
chomp($error);
|
||||
print STDERR "There was an error connecting to $sql_server:\n\n",
|
||||
" $error\n\n", _bz_connect_error_reasons();
|
||||
|
@ -59,6 +59,10 @@ sub new {
|
||||
|
||||
my $self = $class->db_new($dsn, $user, $pass);
|
||||
|
||||
# This makes sure that if the tables are encoded as UTF-8, we
|
||||
# return their data correctly.
|
||||
$self->do("SET NAMES utf8") if Bugzilla->params->{'utf8'};
|
||||
|
||||
# all class local variables stored in DBI derived class needs to have
|
||||
# a prefix 'private_'. See DBI documentation.
|
||||
$self->{private_bz_tables_locked} = "";
|
||||
@ -545,6 +549,88 @@ sub bz_setup_database {
|
||||
MAX_ROWS=100000");
|
||||
}
|
||||
|
||||
# Convert the database to UTF-8 if the utf8 parameter is on.
|
||||
if (Bugzilla->params->{'utf8'} && !$self->bz_db_is_utf8) {
|
||||
print <<EOT;
|
||||
|
||||
WARNING: We are about to convert your table storage format to UTF8. This
|
||||
allows Bugzilla to correctly store and sort international characters.
|
||||
However, if you have any non-UTF-8 data in your database,
|
||||
it ***WILL BE DELETED*** by this process. So, before
|
||||
you continue with checksetup.pl, if you have any non-UTF-8
|
||||
data (or even if you're not sure) you should press Ctrl-C now
|
||||
to interrupt checksetup.pl, and run contrib/recode.pl to make all
|
||||
the data in your database into UTF-8. You should also back up your
|
||||
database before continuing.
|
||||
|
||||
If you ever used a version of Bugzilla before 2.22, we STRONGLY
|
||||
recommend that you stop checksetup.pl NOW and run contrib/recode.pl.
|
||||
|
||||
Continuing in 60 seconds...
|
||||
EOT
|
||||
sleep 60;
|
||||
|
||||
print "Converting table storage format to UTF-8. This may take a",
|
||||
" while.\n";
|
||||
foreach my $table ($self->bz_table_list_real) {
|
||||
my $info_sth = $self->prepare("SHOW FULL COLUMNS FROM $table");
|
||||
$info_sth->execute();
|
||||
while (my $column = $info_sth->fetchrow_hashref) {
|
||||
# If this particular column isn't stored in utf-8
|
||||
if ($column->{Collation} ne 'NULL'
|
||||
&& $column->{Collation} !~ /utf8/)
|
||||
{
|
||||
my $name = $column->{Field};
|
||||
|
||||
# The code below doesn't work on a field with a FULLTEXT
|
||||
# index. So we drop it. The upgrade code will re-create
|
||||
# it later.
|
||||
if ($table eq 'longdescs' && $name eq 'thetext') {
|
||||
$self->bz_drop_index('longdescs',
|
||||
'longdescs_thetext_idx');
|
||||
}
|
||||
if ($table eq 'bugs' && $name eq 'short_desc') {
|
||||
$self->bz_drop_index('bugs', 'bugs_short_desc_idx');
|
||||
}
|
||||
|
||||
print "Converting $table.$name to be stored as UTF-8...\n";
|
||||
my $col_info =
|
||||
$self->bz_column_info_real($table, $name);
|
||||
|
||||
# CHANGE COLUMN doesn't take PRIMARY KEY
|
||||
delete $col_info->{PRIMARYKEY};
|
||||
|
||||
my $sql_def = $self->_bz_schema->get_type_ddl($col_info);
|
||||
# We don't want MySQL to actually try to *convert*
|
||||
# from our current charset to UTF-8, we just want to
|
||||
# transfer the bytes directly. This is how we do that.
|
||||
|
||||
# The CHARACTER SET part of the definition has to come
|
||||
# right after the type, which will always come first.
|
||||
my ($binary, $utf8) = ($sql_def, $sql_def);
|
||||
my $type = $self->_bz_schema->convert_type($col_info->{TYPE});
|
||||
$binary =~ s/(\Q$type\E)/$1 CHARACTER SET binary/;
|
||||
$utf8 =~ s/(\Q$type\E)/$1 CHARACTER SET utf8/;
|
||||
$self->do("ALTER TABLE $table CHANGE COLUMN $name $name
|
||||
$binary");
|
||||
$self->do("ALTER TABLE $table CHANGE COLUMN $name $name
|
||||
$utf8");
|
||||
}
|
||||
$self->do("ALTER TABLE $table DEFAULT CHARACTER SET utf8");
|
||||
}
|
||||
} # foreach my $table (@tables)
|
||||
|
||||
my $db_name = Bugzilla->localconfig->{db_name};
|
||||
$self->do("ALTER DATABASE $db_name CHARACTER SET utf8");
|
||||
}
|
||||
}
|
||||
|
||||
sub bz_db_is_utf8 {
|
||||
my $self = shift;
|
||||
my $db_collation = $self->selectrow_arrayref(
|
||||
"SHOW VARIABLES LIKE 'character_set_database'");
|
||||
# First column holds the variable name, second column holds the value.
|
||||
return $db_collation->[1] =~ /utf8/ ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1302,7 +1302,7 @@ sub get_type_ddl {
|
||||
}
|
||||
|
||||
my $fkref = $self->{enable_references} ? $finfo->{REFERENCES} : undef;
|
||||
my $type_ddl = $self->{db_specific}{$type} || $type;
|
||||
my $type_ddl = $self->convert_type($type);
|
||||
# DEFAULT attribute must appear before any column constraints
|
||||
# (e.g., NOT NULL), for Oracle
|
||||
$type_ddl .= " DEFAULT $default" if (defined($default));
|
||||
@ -1313,7 +1313,19 @@ sub get_type_ddl {
|
||||
return($type_ddl);
|
||||
|
||||
} #eosub--get_type_ddl
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
sub convert_type {
|
||||
|
||||
=item C<convert_type>
|
||||
|
||||
Converts a TYPE from the L</ABSTRACT_SCHEMA> format into the real SQL type.
|
||||
|
||||
=cut
|
||||
|
||||
my ($self, $type) = @_;
|
||||
return $self->{db_specific}->{$type} || $type;
|
||||
}
|
||||
|
||||
sub get_column {
|
||||
=item C<get_column($table, $column)>
|
||||
|
||||
@ -1383,7 +1395,12 @@ sub get_table_columns {
|
||||
return @columns;
|
||||
|
||||
} #eosub--get_table_columns
|
||||
#--------------------------------------------------------------------------
|
||||
|
||||
sub get_create_database_sql {
|
||||
my ($self, $name) = @_;
|
||||
return ("CREATE DATABASE $name");
|
||||
}
|
||||
|
||||
sub get_table_ddl {
|
||||
|
||||
=item C<get_table_ddl>
|
||||
|
@ -129,7 +129,9 @@ sub _get_create_table_ddl {
|
||||
|
||||
my($self, $table) = @_;
|
||||
|
||||
return($self->SUPER::_get_create_table_ddl($table) . ' TYPE = MYISAM');
|
||||
my $charset = Bugzilla->dbh->bz_db_is_utf8 ? "CHARACTER SET utf8" : '';
|
||||
return($self->SUPER::_get_create_table_ddl($table)
|
||||
. " ENGINE = MYISAM $charset");
|
||||
|
||||
} #eosub--_get_create_table_ddl
|
||||
#------------------------------------------------------------------------------
|
||||
@ -150,6 +152,16 @@ sub _get_create_index_ddl {
|
||||
} #eosub--_get_create_index_ddl
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
sub get_create_database_sql {
|
||||
my ($self, $name) = @_;
|
||||
# We only create as utf8 if we have no params (meaning we're doing
|
||||
# a new installation) or if the utf8 param is on.
|
||||
my $create_utf8 = Bugzilla->params->{'utf8'}
|
||||
|| !defined Bugzilla->params->{'utf8'};
|
||||
my $charset = $create_utf8 ? "CHARACTER SET utf8" : '';
|
||||
return ("CREATE DATABASE $name $charset");
|
||||
}
|
||||
|
||||
# MySQL has a simpler ALTER TABLE syntax than ANSI.
|
||||
sub get_alter_column_ddl {
|
||||
my ($self, $table, $column, $new_def, $set_nulls_to) = @_;
|
||||
|
@ -64,9 +64,13 @@
|
||||
|
||||
utf8 => "Use UTF-8 (Unicode) encoding for all text in ${terms.Bugzilla}. New " _
|
||||
"installations should set this to true to avoid character encoding " _
|
||||
"problems. Existing databases should set this to true only after " _
|
||||
"the data has been converted from existing legacy character " _
|
||||
"encodings to UTF-8.",
|
||||
"problems. <strong>Existing databases should set this to true " _
|
||||
" only after the data has been converted from existing legacy " _
|
||||
" character encodings to UTF-8, using the " _
|
||||
" <kbd>contrib/recode.pl</kbd> script</strong>. <br><br> Note " _
|
||||
" that if you turn this parameter from "off" to " _
|
||||
" "on", you must re-run checksetup.pl immediately " _
|
||||
" afterward.",
|
||||
|
||||
shutdownhtml => "If this field is non-empty, then $terms.Bugzilla will be completely " _
|
||||
"disabled and this text will be displayed instead of all the " _
|
||||
|
@ -216,6 +216,9 @@
|
||||
[% IF param_changed.size > 0 %]
|
||||
[% FOREACH param = param_changed %]
|
||||
Changed <em>[% param FILTER html %]</em><br>
|
||||
[% IF param == 'utf8' && Param('utf8') %]
|
||||
<strong>You must now re-run checksetup.pl.</strong><br>
|
||||
[% END %]
|
||||
[% END %]
|
||||
[% ELSE %]
|
||||
No changes made.
|
||||
|
Loading…
Reference in New Issue
Block a user