Speedups for nsMorkReader and history import (bug 327330). r=brettw

This commit is contained in:
bryner%brianryner.com 2006-03-01 02:51:43 +00:00
parent b6f7e49306
commit 6e1d034a18
8 changed files with 515 additions and 238 deletions

View File

@ -41,6 +41,7 @@
#include "mozStorageHelper.h"
#include "prprf.h"
#include "nsNetUtil.h"
#include "nsTArray.h"
NS_IMPL_ISUPPORTS1(nsMorkHistoryImporter, nsIMorkHistoryImporter)
@ -61,45 +62,31 @@ static const char * const gColumnNames[] = {
struct TableReadClosure
{
TableReadClosure(nsMorkReader *aReader, nsINavHistoryService *aHistory)
: reader(aReader), history(aHistory), swapBytes(PR_FALSE)
TableReadClosure(nsMorkReader *aReader, nsNavHistory *aHistory)
: reader(aReader), history(aHistory), swapBytes(PR_FALSE),
byteOrderColumn(-1)
{
voidString.SetIsVoid(PR_TRUE);
NS_CONST_CAST(nsString*, &voidString)->SetIsVoid(PR_TRUE);
for (PRUint32 i = 0; i < kColumnCount; ++i) {
columnIndexes[i] = -1;
}
}
// Backpointers to the reader and history we're operating on
nsMorkReader *reader;
nsINavHistoryService *history;
const nsMorkReader *reader;
nsNavHistory *history;
// A voided string to use for the user title
nsString voidString;
const nsString voidString;
// Whether we need to swap bytes (file format is other-endian)
PRBool swapBytes;
// Column ids of the columns that we care about
nsCString columnIDs[kColumnCount];
nsCString byteOrderColumn;
// Indexes of the columns that we care about
PRInt32 columnIndexes[kColumnCount];
PRInt32 byteOrderColumn;
};
// Enumerator callback to build up the column list
/* static */ PLDHashOperator PR_CALLBACK
nsMorkHistoryImporter::EnumerateColumnsCB(const nsACString &aColumnID,
nsCString aName, void *aData)
{
TableReadClosure *data = NS_STATIC_CAST(TableReadClosure*, aData);
for (PRUint32 i = 0; i < kColumnCount; ++i) {
if (aName.Equals(gColumnNames[i])) {
data->columnIDs[i].Assign(aColumnID);
return PL_DHASH_NEXT;
}
}
if (aName.EqualsLiteral("ByteOrder")) {
data->byteOrderColumn.Assign(aColumnID);
}
return PL_DHASH_NEXT;
}
// Reverses the high and low bytes in a PRUnichar buffer.
// This is used if the file format has a different endianness from the
// current architecture.
@ -114,18 +101,20 @@ SwapBytes(PRUnichar *buffer)
// Enumerator callback to add a table row to the NavHistoryService
/* static */ PLDHashOperator PR_CALLBACK
nsMorkHistoryImporter::AddToHistoryCB(const nsACString &aRowID,
const nsMorkReader::StringMap *aMap,
nsMorkHistoryImporter::AddToHistoryCB(const nsCSubstring &aRowID,
const nsTArray<nsCString> *aValues,
void *aData)
{
TableReadClosure *data = NS_STATIC_CAST(TableReadClosure*, aData);
nsMorkReader *reader = data->reader;
const nsMorkReader *reader = data->reader;
nsCString values[kColumnCount];
nsCString *columnIDs = data->columnIDs;
const PRInt32 *columnIndexes = data->columnIndexes;
for (PRInt32 i = 0; i < kColumnCount; ++i) {
aMap->Get(columnIDs[i], &values[i]);
reader->NormalizeValue(values[i]);
if (columnIndexes[i] != -1) {
values[i] = (*aValues)[columnIndexes[i]];
reader->NormalizeValue(values[i]);
}
}
// title is really a UTF-16 string at this point
@ -168,29 +157,15 @@ nsMorkHistoryImporter::AddToHistoryCB(const nsACString &aRowID,
if (uri) {
PRBool isTyped = values[kTypedColumn].EqualsLiteral("1");
nsINavHistoryService *history = data->history;
PRInt32 transition = isTyped ? nsINavHistoryService::TRANSITION_TYPED
: nsINavHistoryService::TRANSITION_LINK;
nsNavHistory *history = data->history;
if (date != -1 && count != 0) {
// We have a last visit date, so we'll be adding a visit on that date.
// Since that will increment the visit count by 1, we need to initially
// add the entry with count - 1 visits.
--count;
}
history->SetPageDetails(uri, nsDependentString(title, titleLength),
data->voidString, count,
values[kHiddenColumn].EqualsLiteral("1"), isTyped);
if (date != -1) {
PRInt32 transition = isTyped ? nsINavHistoryService::TRANSITION_TYPED
: nsINavHistoryService::TRANSITION_LINK;
// Referrer is not handled at present -- doing this requires adding
// visits in such an order that we have a visit id for the referring
// page already.
PRInt64 visitID;
history->AddVisit(uri, date, 0, transition, PR_FALSE, 0, &visitID);
}
history->AddPageWithVisit(uri,
nsDependentString(title, titleLength),
data->voidString,
values[kHiddenColumn].EqualsLiteral("1"),
isTyped, count, transition, date);
}
return PL_DHASH_NEXT;
}
@ -221,24 +196,40 @@ nsMorkHistoryImporter::ImportHistory(nsIFile *aFile,
NS_ENSURE_SUCCESS(rv, rv);
// Gather up the column ids so we don't need to find them on each row
TableReadClosure data(&reader, aHistory);
reader.EnumerateColumns(EnumerateColumnsCB, &data);
nsNavHistory *history = NS_STATIC_CAST(nsNavHistory*, aHistory);
TableReadClosure data(&reader, history);
const nsTArray<nsMorkReader::MorkColumn> &columns = reader.GetColumns();
for (PRUint32 i = 0; i < columns.Length(); ++i) {
const nsCSubstring &name = columns[i].name;
for (PRUint32 j = 0; j < kColumnCount; ++j) {
if (name.Equals(gColumnNames[j])) {
data.columnIndexes[j] = i;
break;
}
}
if (name.EqualsLiteral("ByteOrder")) {
data.byteOrderColumn = i;
}
}
// Determine the byte order from the table's meta-row.
nsCString byteOrder;
if (reader.GetMetaRow().Get(data.byteOrderColumn, &byteOrder)) {
// Note whether the file uses a non-native byte ordering.
// If it does, we'll have to swap bytes for PRUnichar values.
reader.NormalizeValue(byteOrder);
const nsTArray<nsCString> *metaRow = reader.GetMetaRow();
if (metaRow) {
const nsCString &byteOrder = (*metaRow)[data.byteOrderColumn];
if (!byteOrder.IsVoid()) {
// Note whether the file uses a non-native byte ordering.
// If it does, we'll have to swap bytes for PRUnichar values.
nsCAutoString byteOrderValue(byteOrder);
reader.NormalizeValue(byteOrderValue);
#ifdef IS_LITTLE_ENDIAN
data.swapBytes = !byteOrder.EqualsLiteral("LE");
data.swapBytes = !byteOrderValue.EqualsLiteral("LE");
#else
data.swapBytes = !byteOrder.EqualsLiteral("BE");
data.swapBytes = !byteOrderValue.EqualsLiteral("BE");
#endif
}
}
// Now add the results to history
nsNavHistory *history = NS_STATIC_CAST(nsNavHistory*, aHistory);
mozIStorageConnection *conn = history->GetStorageConnection();
NS_ENSURE_TRUE(conn, NS_ERROR_NOT_INITIALIZED);
mozStorageTransaction transaction(conn, PR_FALSE);
@ -248,6 +239,11 @@ nsMorkHistoryImporter::ImportHistory(nsIFile *aFile,
#endif
reader.EnumerateRows(AddToHistoryCB, &data);
// Make sure we don't have any duplicate items in the database.
rv = history->RemoveDuplicateURIs();
NS_ENSURE_SUCCESS(rv, rv);
#ifdef IN_MEMORY_LINKS
memTransaction.Commit();
#endif

View File

@ -42,6 +42,8 @@
#include "nsINavHistoryService.h"
#include "nsMorkReader.h"
template<class E> class nsTArray;
// The nsMorkHistoryImporter object parses a Mork-format history file and
// adds the history items to the NavHistoryService. It is invoked the first
// time the history service is created for a given profile, if a Mork history
@ -54,16 +56,10 @@ public:
NS_DECL_NSIMORKHISTORYIMPORTER
private:
// Enumerator callback to build up a list of columns
static PLDHashOperator PR_CALLBACK
EnumerateColumnsCB(const nsACString &aColumnID,
nsCString aName,
void *aData);
// Enumerator callback to add a single row to the NavHistory.
static PLDHashOperator PR_CALLBACK
AddToHistoryCB(const nsACString &aRowID,
const nsMorkReader::StringMap *aMap,
AddToHistoryCB(const nsCSubstring &aRowID,
const nsTArray<nsCString> *aValues,
void *aData);
};

View File

@ -556,8 +556,8 @@ nsNavHistory::InitDB(PRBool *aDoImport)
// mDBAddNewPage (see InternalAddNewPage)
rv = mDBConn->CreateStatement(NS_LITERAL_CSTRING(
"INSERT INTO moz_history "
"(url, rev_host, hidden, typed, visit_count) "
"VALUES (?1, ?2, ?3, ?4, ?5)"),
"(url, title, rev_host, hidden, typed, visit_count) "
"VALUES (?1, ?2, ?3, ?4, ?5, ?6)"),
getter_AddRefs(mDBAddNewPage));
NS_ENSURE_SUCCESS(rv, rv);
@ -696,7 +696,9 @@ nsNavHistory::GetUrlIdFor(nsIURI* aURI, PRInt64* aEntryID,
// create a new hidden, untyped, unvisited entry
mDBGetURLPageInfo->Reset();
statementResetter.Abandon();
rv = InternalAddNewPage(aURI, PR_TRUE, PR_FALSE, 0, aEntryID);
nsString voidString;
voidString.SetIsVoid(PR_TRUE);
rv = InternalAddNewPage(aURI, voidString, PR_TRUE, PR_FALSE, 0, aEntryID);
if (NS_SUCCEEDED(rv))
transaction.Commit();
return rv;
@ -729,34 +731,43 @@ nsNavHistory::SaveCollapseItem(const nsAString& aTitle)
// If non-null, the new page ID will be placed into aPageID.
nsresult
nsNavHistory::InternalAddNewPage(nsIURI* aURI, PRBool aHidden, PRBool aTyped,
nsNavHistory::InternalAddNewPage(nsIURI* aURI, const nsAString& aTitle,
PRBool aHidden, PRBool aTyped,
PRInt32 aVisitCount, PRInt64* aPageID)
{
mozStorageStatementScoper scoper(mDBAddNewPage);
nsresult rv = BindStatementURI(mDBAddNewPage, 0, aURI);
NS_ENSURE_SUCCESS(rv, rv);
// title
if (aTitle.IsVoid()) {
rv = mDBAddNewPage->BindNullParameter(1);
} else {
rv = mDBAddNewPage->BindStringParameter(1, aTitle);
}
NS_ENSURE_SUCCESS(rv, rv);
// host (reversed with trailing period)
nsAutoString revHost;
rv = GetReversedHostname(aURI, revHost);
// Not all URI types have hostnames, so this is optional.
if (NS_SUCCEEDED(rv)) {
rv = mDBAddNewPage->BindStringParameter(1, revHost);
rv = mDBAddNewPage->BindStringParameter(2, revHost);
} else {
rv = mDBAddNewPage->BindNullParameter(1);
rv = mDBAddNewPage->BindNullParameter(2);
}
NS_ENSURE_SUCCESS(rv, rv);
// hidden
rv = mDBAddNewPage->BindInt32Parameter(2, aHidden);
rv = mDBAddNewPage->BindInt32Parameter(3, aHidden);
NS_ENSURE_SUCCESS(rv, rv);
// typed
rv = mDBAddNewPage->BindInt32Parameter(3, aTyped);
rv = mDBAddNewPage->BindInt32Parameter(4, aTyped);
NS_ENSURE_SUCCESS(rv, rv);
// visit count
rv = mDBAddNewPage->BindInt32Parameter(4, aVisitCount);
rv = mDBAddNewPage->BindInt32Parameter(5, aVisitCount);
NS_ENSURE_SUCCESS(rv, rv);
rv = mDBAddNewPage->Execute();
@ -1551,8 +1562,10 @@ nsNavHistory::AddVisit(nsIURI* aURI, PRTime aTime, PRInt64 aReferringVisit,
// See the hidden computation code above for a little more explanation.
hidden = (aTransitionType == TRANSITION_EMBED || aIsRedirect);
// set as not typed, visited once
rv = InternalAddNewPage(aURI, hidden, PR_FALSE, 1, &pageID);
// set as not typed, visited once, no title
nsString voidString;
voidString.SetIsVoid(PR_TRUE);
rv = InternalAddNewPage(aURI, voidString, hidden, PR_FALSE, 1, &pageID);
NS_ENSURE_SUCCESS(rv, rv);
}
@ -3563,6 +3576,90 @@ nsNavHistory::CreateLookupIndexes()
return NS_OK;
}
nsresult
nsNavHistory::AddPageWithVisit(nsIURI *aURI,
const nsString &aTitle,
const nsString &aUserTitle,
PRBool aHidden, PRBool aTyped,
PRInt32 aVisitCount,
PRInt32 aLastVisitTransition,
PRTime aLastVisitDate)
{
PRBool canAdd = PR_FALSE;
nsresult rv = CanAddURI(aURI, &canAdd);
NS_ENSURE_SUCCESS(rv, rv);
if (!canAdd) {
return NS_OK;
}
PRInt64 pageID;
rv = InternalAddNewPage(aURI, aTitle, aHidden, aTyped, aVisitCount, &pageID);
NS_ENSURE_SUCCESS(rv, rv);
if (aLastVisitDate != -1) {
PRInt64 visitID;
rv = InternalAddVisit(pageID, 0, 0,
aLastVisitDate, aLastVisitTransition, &visitID);
NS_ENSURE_SUCCESS(rv, rv);
}
return NS_OK;
}
nsresult
nsNavHistory::RemoveDuplicateURIs()
{
nsCOMPtr<mozIStorageStatement> statement;
nsresult rv = mDBConn->CreateStatement(
NS_LITERAL_CSTRING("SELECT id, url FROM moz_history ORDER BY url"),
getter_AddRefs(statement));
NS_ENSURE_SUCCESS(rv, rv);
nsTArray<PRInt64> duplicates;
nsCAutoString lastURI;
PRBool hasMore;
while (NS_SUCCEEDED(statement->ExecuteStep(&hasMore)) && hasMore) {
nsCAutoString uri;
statement->GetUTF8String(1, uri);
if (uri.Equals(lastURI)) {
duplicates.AppendElement(statement->AsInt64(0));
} else {
lastURI = uri;
}
}
// Now remove all of the duplicates from the history and visit tables.
rv = mDBConn->CreateStatement(
NS_LITERAL_CSTRING("DELETE FROM moz_history WHERE id = ?1"),
getter_AddRefs(statement));
NS_ENSURE_SUCCESS(rv, rv);
nsCOMPtr<mozIStorageStatement> visitDelete;
rv = mDBConn->CreateStatement(
NS_LITERAL_CSTRING("DELETE FROM moz_historyvisit WHERE page_id = ?1"),
getter_AddRefs(visitDelete));
NS_ENSURE_SUCCESS(rv, rv);
for (PRUint32 i = 0; i < duplicates.Length(); ++i) {
PRInt64 id = duplicates[i];
{
mozStorageStatementScoper scope(statement);
rv = statement->BindInt64Parameter(0, id);
NS_ENSURE_SUCCESS(rv, rv);
rv = statement->Execute();
NS_ENSURE_SUCCESS(rv, rv);
}
{
mozStorageStatementScoper scope(visitDelete);
rv = visitDelete->BindInt64Parameter(0, id);
NS_ENSURE_SUCCESS(rv, rv);
rv = visitDelete->Execute();
NS_ENSURE_SUCCESS(rv, rv);
}
}
return NS_OK;
}
// Local function **************************************************************

View File

@ -273,7 +273,25 @@ public:
nsCOMArray<nsNavHistoryQuery>* aQueries,
nsNavHistoryQueryOptions** aOptions);
private:
// Import-friendly version of SetPageDetails + AddVisit.
// This method adds a page to history along with a single last visit.
// It is an error to call this method if aURI might already be in history.
// The given aVisitCount should include the given last-visit date.
// aLastVisitDate can be -1 if there is no last visit date to record.
nsresult AddPageWithVisit(nsIURI *aURI,
const nsString &aTitle,
const nsString &aUserTitle,
PRBool aHidden, PRBool aTyped,
PRInt32 aVisitCount,
PRInt32 aLastVisitTransition,
PRTime aLastVisitDate);
// Checks the database for any duplicate URLs. If any are found,
// all but the first are removed. This must be called after using
// AddPageWithVisit, to ensure that the database is in a consistent state.
nsresult RemoveDuplicateURIs();
private:
~nsNavHistory();
// used by GetHistoryService
@ -334,7 +352,8 @@ protected:
nsresult AddVisitChain(nsIURI* aURI, PRBool aToplevel, PRBool aRedirect,
nsIURI* aReferrer, PRInt64* aVisitID,
PRInt64* aSessionID, PRInt64* aRedirectBookmark);
nsresult InternalAddNewPage(nsIURI* aURI, PRBool aHidden, PRBool aTyped,
nsresult InternalAddNewPage(nsIURI* aURI, const nsAString& aTitle,
PRBool aHidden, PRBool aTyped,
PRInt32 aVisitCount, PRInt64* aPageID);
nsresult InternalAddVisit(PRInt64 aPageID, PRInt64 aReferringVisit,
PRInt64 aSessionID, PRTime aTime,

View File

@ -39,8 +39,9 @@
#include "nsMorkReader.h"
#include "prio.h"
#include "nsNetUtil.h"
#include "nsVoidArray.h"
// A FixedString implementation that can hold an 80-character line
// A FixedString implementation that can hold 2 80-character lines
class nsCLineString : public nsFixedCString
{
public:
@ -52,7 +53,7 @@ public:
}
private:
char mStorage[80];
char_type mStorage[160];
};
// Convert a hex character (0-9, A-F) to its corresponding byte value.
@ -80,54 +81,69 @@ static void
MorkUnescape(const nsCSubstring &aString, nsCString &aResult)
{
PRUint32 len = aString.Length();
PRInt32 startIndex = -1;
for (PRUint32 i = 0; i < len; ++i) {
char c = aString[i];
// We optimize for speed over space here -- size the result buffer to
// the size of the source, which is an upper bound on the size of the
// unescaped string.
aResult.SetLength(len);
char *result = aResult.BeginWriting();
const char *source = aString.BeginReading();
const char *sourceEnd = source + len;
const char *startPos = nsnull;
PRUint32 bytes;
for (; source < sourceEnd; ++source) {
char c = *source;
if (c == '\\') {
if (startIndex != -1) {
aResult.Append(Substring(aString, startIndex, i - startIndex));
startIndex = -1;
if (startPos) {
bytes = source - startPos;
memcpy(result, startPos, bytes);
result += bytes;
startPos = nsnull;
}
if (i < len - 1) {
aResult.Append(aString[++i]);
if (source < sourceEnd - 1) {
*(result++) = *(++source);
}
} else if (c == '$') {
if (startIndex != -1) {
aResult.Append(Substring(aString, startIndex, i - startIndex));
startIndex = -1;
if (startPos) {
bytes = source - startPos;
memcpy(result, startPos, bytes);
result += bytes;
startPos = nsnull;
}
if (i < len - 2) {
if (source < sourceEnd - 2) {
// Would be nice to use ToInteger() here, but it currently
// requires a null-terminated string.
char c2 = aString[++i];
char c3 = aString[++i];
char c2 = *(++source);
char c3 = *(++source);
if (ConvertChar(&c2) && ConvertChar(&c3)) {
aResult.Append((c2 << 4 ) | c3);
*(result++) = ((c2 << 4) | c3);
}
}
} else if (startIndex == -1) {
startIndex = PRInt32(i);
} else if (!startPos) {
startPos = source;
}
}
if (startIndex != -1) {
aResult.Append(Substring(aString, startIndex, len - startIndex));
if (startPos) {
bytes = source - startPos;
memcpy(result, startPos, bytes);
result += bytes;
}
aResult.SetLength(result - aResult.BeginReading());
}
nsresult
nsMorkReader::Init()
{
NS_ENSURE_TRUE(mColumnMap.Init(), NS_ERROR_OUT_OF_MEMORY);
NS_ENSURE_TRUE(mValueMap.Init(), NS_ERROR_OUT_OF_MEMORY);
NS_ENSURE_TRUE(mMetaRow.Init(), NS_ERROR_OUT_OF_MEMORY);
NS_ENSURE_TRUE(mTable.Init(), NS_ERROR_OUT_OF_MEMORY);
return NS_OK;
}
PR_STATIC_CALLBACK(PLDHashOperator)
DeleteStringMap(const nsACString& aKey,
nsMorkReader::StringMap *aData,
void *aUserArg)
DeleteStringArray(const nsCSubstring& aKey,
nsTArray<nsCString> *aData,
void *aUserArg)
{
delete aData;
return PL_DHASH_NEXT;
@ -135,7 +151,33 @@ DeleteStringMap(const nsACString& aKey,
nsMorkReader::~nsMorkReader()
{
mTable.EnumerateRead(DeleteStringMap, nsnull);
mTable.EnumerateRead(DeleteStringArray, nsnull);
}
struct AddColumnClosure
{
AddColumnClosure(nsTArray<nsMorkReader::MorkColumn> *a,
nsMorkReader::IndexMap *c)
: array(a), columnMap(c), result(NS_OK) {}
nsTArray<nsMorkReader::MorkColumn> *array;
nsMorkReader::IndexMap *columnMap;
nsresult result;
};
PR_STATIC_CALLBACK(PLDHashOperator)
AddColumn(const nsCSubstring &id, nsCString name, void *userData)
{
AddColumnClosure *closure = NS_STATIC_CAST(AddColumnClosure*, userData);
nsTArray<nsMorkReader::MorkColumn> *array = closure->array;
if (!array->AppendElement(nsMorkReader::MorkColumn(id, name)) ||
!closure->columnMap->Put(id, array->Length() - 1)) {
closure->result = NS_ERROR_OUT_OF_MEMORY;
return PL_DHASH_STOP;
}
return PL_DHASH_NEXT;
}
nsresult
@ -157,6 +199,9 @@ nsMorkReader::Read(nsIFile *aFile)
return NS_ERROR_FAILURE; // unexpected file format
}
IndexMap columnMap;
NS_ENSURE_TRUE(columnMap.Init(), NS_ERROR_OUT_OF_MEMORY);
while (NS_SUCCEEDED(ReadLine(line))) {
// Trim off leading spaces
PRUint32 idx = 0, len = line.Length();
@ -171,16 +216,32 @@ nsMorkReader::Read(nsIFile *aFile)
// Look at the line to figure out what section type this is
if (StringBeginsWith(l, NS_LITERAL_CSTRING("< <(a=c)>"))) {
// Column map
rv = ParseMap(l, &mColumnMap);
// Column map. We begin by creating a hash of column id to column name.
StringMap columnNameMap;
NS_ENSURE_TRUE(columnNameMap.Init(), NS_ERROR_OUT_OF_MEMORY);
rv = ParseMap(l, &columnNameMap);
NS_ENSURE_SUCCESS(rv, rv);
// Now that we have the list of columns, we put them into a flat array.
// Rows will have value arrays of the same size, with indexes that
// correspond to the columns array. As we insert each column into the
// array, we also make an entry in columnMap so that we can look up the
// index given the column id.
mColumns.SetCapacity(columnNameMap.Count());
AddColumnClosure closure(&mColumns, &columnMap);
columnNameMap.EnumerateRead(AddColumn, &closure);
if (NS_FAILED(closure.result)) {
return closure.result;
}
} else if (StringBeginsWith(l, NS_LITERAL_CSTRING("<("))) {
// Value map
rv = ParseMap(l, &mValueMap);
NS_ENSURE_SUCCESS(rv, rv);
} else if (l[0] == '{' || l[0] == '[') {
// Table / table row
rv = ParseTable(l);
rv = ParseTable(l, columnMap);
NS_ENSURE_SUCCESS(rv, rv);
} else {
// Don't know, hopefully don't care
@ -190,18 +251,11 @@ nsMorkReader::Read(nsIFile *aFile)
return NS_OK;
}
void
nsMorkReader::EnumerateColumns(ColumnEnumerator aCallback,
void *aUserData) const
{
mColumnMap.EnumerateRead(aCallback, aUserData);
}
void
nsMorkReader::EnumerateRows(RowEnumerator aCallback, void *aUserData) const
{
// Constify the table values
typedef const nsDataHashtable<nsCStringHashKey, const StringMap*> ConstTable;
typedef const nsDataHashtable<IDKey, const nsTArray<nsCString>* > ConstTable;
NS_REINTERPRET_CAST(ConstTable*, &mTable)->EnumerateRead(aCallback,
aUserData);
}
@ -259,7 +313,7 @@ nsMorkReader::ParseMap(const nsCSubstring &aLine, StringMap *aMap)
PRUint32 tokenEnd = PR_MIN(idx, len);
++idx;
nsCAutoString value;
nsCString value;
MorkUnescape(Substring(line, tokenStart, tokenEnd - tokenStart),
value);
aMap->Put(key, value);
@ -287,11 +341,14 @@ nsMorkReader::ParseMap(const nsCSubstring &aLine, StringMap *aMap)
// value map. '=' is used as the separator when the value is a literal.
nsresult
nsMorkReader::ParseTable(const nsCSubstring &aLine)
nsMorkReader::ParseTable(const nsCSubstring &aLine, const IndexMap &aColumnMap)
{
nsCLineString line(aLine);
nsCAutoString column;
StringMap *currentRow = nsnull;
const PRUint32 columnCount = mColumns.Length(); // total number of columns
PRInt32 columnIndex = -1; // column index of the cell we're parsing
// value array for the row we're parsing
nsTArray<nsCString> *currentRow = nsnull;
PRBool inMetaRow = PR_FALSE;
do {
@ -317,6 +374,9 @@ nsMorkReader::ParseTable(const nsCSubstring &aLine)
case '[':
{
// Start of a new row. Consume the row id, up to the first '('.
// Row edits also have a table namespace, separated from the row id
// by a colon. We don't make use of the namespace, but we need to
// make sure not to consider it part of the row id.
if (currentRow) {
NS_WARNING("unterminated row?");
currentRow = nsnull;
@ -334,24 +394,38 @@ nsMorkReader::ParseTable(const nsCSubstring &aLine)
}
tokenStart = idx;
while (idx < len &&
line[idx] != '(' &&
line[idx] != ']' &&
line[idx] != ':') {
++idx;
}
tokenEnd = idx;
while (idx < len && line[idx] != '(' && line[idx] != ']') {
++idx;
}
if (inMetaRow) {
currentRow = &mMetaRow;
mMetaRow = NewVoidStringArray(columnCount);
NS_ENSURE_TRUE(mMetaRow, NS_ERROR_OUT_OF_MEMORY);
currentRow = mMetaRow;
} else {
const nsCSubstring& row = Substring(line,
tokenStart, idx - tokenStart);
const nsCSubstring& row = Substring(line, tokenStart,
tokenEnd - tokenStart);
if (!mTable.Get(row, &currentRow)) {
currentRow = new StringMap();
NS_ENSURE_TRUE(currentRow && currentRow->Init(),
currentRow = NewVoidStringArray(columnCount);
NS_ENSURE_TRUE(currentRow, NS_ERROR_OUT_OF_MEMORY);
NS_ENSURE_TRUE(mTable.Put(row, currentRow),
NS_ERROR_OUT_OF_MEMORY);
mTable.Put(row, currentRow);
}
}
if (cutColumns) {
currentRow->Clear();
// Set all of the columns to void
// (this differentiates them from columns which are empty strings).
for (PRUint32 i = 0; i < columnCount; ++i) {
currentRow->ElementAt(i).SetIsVoid(PR_TRUE);
}
}
break;
}
@ -361,53 +435,76 @@ nsMorkReader::ParseTable(const nsCSubstring &aLine)
inMetaRow = PR_FALSE;
break;
case '(':
if (!currentRow) {
NS_WARNING("cell value outside of row");
break;
}
if (!column.IsEmpty()) {
NS_WARNING("unterminated cell?");
column.Truncate(0);
}
if (line[idx] == '^') {
++idx; // this is not part of the column id, advance past it
}
tokenStart = idx;
while (idx < len && line[idx] != '^' && line[idx] != '=') {
if (line[idx] == '\\') {
++idx; // skip escaped characters
{
if (!currentRow) {
NS_WARNING("cell value outside of row");
break;
}
++idx;
}
tokenEnd = PR_MIN(idx, len);
MorkUnescape(Substring(line, tokenStart, tokenEnd - tokenStart),
column);
NS_WARN_IF_FALSE(columnIndex == -1, "unterminated cell?");
PRBool columnIsAtom;
if (line[idx] == '^') {
columnIsAtom = PR_TRUE;
++idx; // this is not part of the column id, advance past it
} else {
columnIsAtom = PR_FALSE;
}
tokenStart = idx;
while (idx < len && line[idx] != '^' && line[idx] != '=') {
if (line[idx] == '\\') {
++idx; // skip escaped characters
}
++idx;
}
tokenEnd = PR_MIN(idx, len);
nsCAutoString column;
const nsCSubstring &colValue =
Substring(line, tokenStart, tokenEnd - tokenStart);
if (columnIsAtom) {
column.Assign(colValue);
} else {
MorkUnescape(colValue, column);
}
if (!aColumnMap.Get(colValue, &columnIndex)) {
NS_WARNING("Column not in column map, discarding it");
columnIndex = -1;
}
}
break;
case '=':
case '^':
if (column.IsEmpty()) {
NS_WARNING("stray ^ or = marker");
break;
}
tokenStart = idx - 1; // include the '=' or '^' marker in the value
while (idx < len && line[idx] != ')') {
if (line[idx] == '\\') {
++idx; // skip escaped characters
{
if (columnIndex == -1) {
NS_WARNING("stray ^ or = marker");
break;
}
++idx;
}
tokenEnd = PR_MIN(idx, len);
++idx;
nsCAutoString value;
MorkUnescape(Substring(line, tokenStart, tokenEnd - tokenStart),
value);
currentRow->Put(column, value);
column.Truncate(0);
PRBool valueIsAtom = (line[idx - 1] == '^');
tokenStart = idx - 1; // include the '=' or '^' marker in the value
while (idx < len && line[idx] != ')') {
if (line[idx] == '\\') {
++idx; // skip escaped characters
}
++idx;
}
tokenEnd = PR_MIN(idx, len);
++idx;
const nsCSubstring &value =
Substring(line, tokenStart, tokenEnd - tokenStart);
if (valueIsAtom) {
(*currentRow)[columnIndex] = value;
} else {
nsCAutoString value2;
MorkUnescape(value, value2);
(*currentRow)[columnIndex] = value2;
}
columnIndex = -1;
}
break;
}
}
@ -460,3 +557,18 @@ nsMorkReader::NormalizeValue(nsCString &aValue) const
aValue.Truncate(0);
}
}
/* static */ nsTArray<nsCString>*
nsMorkReader::NewVoidStringArray(PRInt32 aCount)
{
nsAutoPtr< nsTArray<nsCString> > array = new nsTArray<nsCString>(aCount);
NS_ENSURE_TRUE(array, nsnull);
for (PRInt32 i = 0; i < aCount; ++i) {
nsCString *elem = array->AppendElement();
NS_ENSURE_TRUE(elem, nsnull);
elem->SetIsVoid(PR_TRUE);
}
return array.forget();
}

View File

@ -41,6 +41,8 @@
#include "nsDataHashtable.h"
#include "nsILineInputStream.h"
#include "nsTArray.h"
#include "nsAutoPtr.h"
// The nsMorkReader object allows a consumer to read in a mork-format
// file and enumerate the rows that it contains. It does not provide
@ -54,33 +56,82 @@
class nsMorkReader
{
public:
typedef nsDataHashtable<nsCStringHashKey,nsCString> StringMap;
// This string type has built-in storage for the hex string representation
// of a 32-bit row id or atom map key, plus the terminating null.
class IDString : public nsFixedCString
{
public:
IDString() : fixed_string_type(mStorage, sizeof(mStorage), 0) {}
IDString(const substring_type &str) :
fixed_string_type(mStorage, sizeof(mStorage), 0)
{
Assign(str);
}
// Enumerator callback type for processing column ids.
// A column id is a short way to reference a particular column in the table.
// These column ids can be used to look up cell values when enumerating rows.
// columnID is the table-unique column id
// name is the name of the column
// userData is the opaque pointer passed to EnumerateColumns()
// The callback can return PL_DHASH_NEXT to continue enumerating,
// or PL_DHASH_STOP to stop.
typedef PLDHashOperator
(*PR_CALLBACK ColumnEnumerator)(const nsACString &columnID,
nsCString name,
void *userData);
private:
static const int kStorageSize = 9;
char_type mStorage[kStorageSize];
};
// Hashtable key type that contains an IDString
class IDKey : public PLDHashEntryHdr
{
public:
typedef const nsCSubstring& KeyType;
typedef const nsCSubstring* KeyTypePointer;
IDKey(KeyTypePointer aStr) : mStr(*aStr) { }
IDKey(const IDKey& toCopy) : mStr(toCopy.mStr) { }
~IDKey() { }
KeyType GetKey() const { return mStr; }
KeyTypePointer GetKeyPointer() const { return &mStr; }
PRBool KeyEquals(const KeyTypePointer aKey) const
{
return mStr.Equals(*aKey);
}
static KeyTypePointer KeyToPointer(KeyType aKey) { return &aKey; }
static PLDHashNumber HashKey(const KeyTypePointer aKey)
{
return HashString(*aKey);
}
enum { ALLOW_MEMMOVE = PR_FALSE };
private:
const IDString mStr;
};
// A convenience typedef for an IDKey-to-string mapping.
typedef nsDataHashtable<IDKey,nsCString> StringMap;
// A convenience typdef for an IDKey-to-index mapping, used for the
// column index hashtable.
typedef nsDataHashtable<IDKey,PRInt32> IndexMap;
// A MorkColumn represents the data associated with a single table column.
struct MorkColumn
{
MorkColumn(const nsCSubstring &i, const nsCSubstring &n)
: id(i), name(n) {}
IDString id;
nsCString name;
};
// Enumerator callback type for processing table rows.
// A row contains cells. Each cell specifies a column id, and the value
// for the column for that row.
// rowID is the table-unique row id
// values contains the cell values, keyed by column id.
// values contains the cell values, in an order which corresponds to
// the columns returned by GetColumns().
// You should call NormalizeValue() on any cell value that you plan to use.
// userData is the opaque pointer passed to EnumerateRows()
// The callback can return PL_DHASH_NEXT to continue enumerating,
// or PL_DHASH_STOP to stop.
typedef PLDHashOperator
(*PR_CALLBACK RowEnumerator)(const nsACString &rowID,
const StringMap *values,
(*PR_CALLBACK RowEnumerator)(const nsCSubstring &rowID,
const nsTArray<nsCString> *values,
void *userData);
// Initialize the importer object's data structures
@ -90,16 +141,17 @@ class nsMorkReader
// Note: currently, only single-table mork files are supported
nsresult Read(nsIFile *aFile);
// Enumerate the columns in the current table.
void EnumerateColumns(ColumnEnumerator aCallback, void *aUserData) const;
// Returns the list of columns in the current table.
const nsTArray<MorkColumn>& GetColumns() const { return mColumns; }
// Enumerate the rows in the current table.
void EnumerateRows(RowEnumerator aCallback, void *aUserData) const;
// Get the "meta row" for the table. Each table has at most one meta row,
// which records information about the table. Like normal rows, the
// meta row is a collection of column id / value pairs.
const StringMap& GetMetaRow() const { return mMetaRow; }
// meta row contains columns in the same order as returned by GetColumns().
// Returns null if there is no meta row for this table.
const nsTArray<nsCString>* GetMetaRow() const { return mMetaRow; }
// Normalizes the cell value (resolves references to the value map).
// aValue is modified in-place.
@ -117,18 +169,24 @@ private:
// Parses a line of the file which contains a table or row definition.
// Additional lines are read from mStream of the line ends mid-row.
// An entry is added to mTable using the row ID as the key, which contains
// a column id -> value map for the row.
nsresult ParseTable(const nsCSubstring &aLine);
// a column array for the row. The supplied column hash table maps from
// column id to an index in mColumns.
nsresult ParseTable(const nsCSubstring &aLine, const IndexMap &aColumnMap);
// Reads a single logical line from mStream into aLine.
// Any continuation lines are consumed and appended to the line.
nsresult ReadLine(nsCString &aLine);
StringMap mColumnMap;
// Create a new nsCString array and fill it with the supplied number
// of void strings. Returns null on out-of-memory.
static nsTArray<nsCString>* NewVoidStringArray(PRInt32 aSize);
nsTArray<MorkColumn> mColumns;
StringMap mValueMap;
StringMap mMetaRow;
nsDataHashtable<nsCStringHashKey,StringMap*> mTable;
nsAutoPtr< nsTArray<nsCString> > mMetaRow;
nsDataHashtable< IDKey,nsTArray<nsCString>* > mTable;
nsCOMPtr<nsILineInputStream> mStream;
nsCString mEmptyString; // note: not EmptyCString() since that's not sharable
};
#endif // nsMorkReader_h_

View File

@ -60,6 +60,7 @@
#include "mozStorageHelper.h"
#include "mozStorageCID.h"
#include "nsIAutoCompleteSimpleResult.h"
#include "nsTArray.h"
// nsFormHistoryResult is a specialized autocomplete result class that knows
// how to remove entries from the form history table.
@ -512,51 +513,45 @@ static const char * const gColumnNames[] = {
struct FormHistoryImportClosure
{
FormHistoryImportClosure(nsMorkReader *aReader, nsIFormHistory *aFormHistory)
: reader(aReader), formHistory(aFormHistory) { }
// Back pointers to the reader and history we're operating on
nsMorkReader *reader;
nsIFormHistory *formHistory;
// Column ids of the columns that we care about
nsCString columnIDs[kColumnCount];
};
// Enumerator callback to build up the column list
/* static */ PLDHashOperator PR_CALLBACK
nsFormHistoryImporter::EnumerateColumnsCB(const nsACString &aColumnID,
nsCString aName, void *aData)
{
FormHistoryImportClosure *data = NS_STATIC_CAST(FormHistoryImportClosure*,
aData);
for (PRUint32 i = 0; i < kColumnCount; ++i) {
if (aName.Equals(gColumnNames[i])) {
data->columnIDs[i].Assign(aColumnID);
return PL_DHASH_NEXT;
: reader(aReader), formHistory(aFormHistory)
{
for (PRUint32 i = 0; i < kColumnCount; ++i) {
columnIndexes[i] = -1;
}
}
return PL_DHASH_NEXT;
}
// Back pointers to the reader and history we're operating on
const nsMorkReader *reader;
nsIFormHistory *formHistory;
// Indexes of the columns that we care about
PRInt32 columnIndexes[kColumnCount];
};
// Enumerator callback to add an entry to the FormHistory
/* static */ PLDHashOperator PR_CALLBACK
nsFormHistoryImporter::AddToFormHistoryCB(const nsACString &aRowID,
const nsMorkReader::StringMap *aMap,
nsFormHistoryImporter::AddToFormHistoryCB(const nsCSubstring &aRowID,
const nsTArray<nsCString> *aValues,
void *aData)
{
FormHistoryImportClosure *data = NS_STATIC_CAST(FormHistoryImportClosure*,
aData);
nsMorkReader *reader = data->reader;
const nsMorkReader *reader = data->reader;
nsCString values[kColumnCount];
const PRUnichar* valueStrings[kColumnCount];
PRUint32 valueLengths[kColumnCount];
nsCString *columnIDs = data->columnIDs;
const PRInt32 *columnIndexes = data->columnIndexes;
PRInt32 i;
// Values are in UTF16.
for (i = 0; i < kColumnCount; ++i) {
aMap->Get(columnIDs[i], &values[i]);
if (columnIndexes[i] == -1) {
// We didn't find this column in the map
continue;
}
values[i] = (*aValues)[columnIndexes[i]];
reader->NormalizeValue(values[i]);
PRUint32 length;
@ -607,7 +602,16 @@ nsFormHistoryImporter::ImportFormHistory(nsIFile *aFile,
// Gather up the column ids so we don't need to find them on each row
FormHistoryImportClosure data(&reader, aFormHistory);
reader.EnumerateColumns(EnumerateColumnsCB, &data);
const nsTArray<nsMorkReader::MorkColumn> columns = reader.GetColumns();
for (PRUint32 i = 0; i < columns.Length(); ++i) {
const nsCSubstring &name = columns[i].name;
for (PRUint32 j = 0; j < kColumnCount; ++j) {
if (name.Equals(gColumnNames[j])) {
data.columnIndexes[j] = i;
break;
}
}
}
// Add the rows to form history
nsCOMPtr<nsIFormHistoryPrivate> fhPrivate = do_QueryInterface(aFormHistory);

View File

@ -60,6 +60,7 @@
class nsIAutoCompleteSimpleResult;
class nsIAutoCompleteResult;
class nsFormHistory;
template <class E> class nsTArray;
#define NS_IFORMHISTORYPRIVATE_IID \
{0xc4a47315, 0xaeb5, 0x4039, {0x9f, 0x34, 0x45, 0x11, 0xb3, 0xa7, 0x58, 0xdd}}
@ -139,16 +140,10 @@ public:
NS_DECL_NSIFORMHISTORYIMPORTER
private:
// Enumerator callback to build up a list of columns
static PLDHashOperator PR_CALLBACK
EnumerateColumnsCB(const nsACString &aColumnID,
nsCString aName,
void *aData);
// Enumerator callback to add a single row to the FormHistory.
static PLDHashOperator PR_CALLBACK
AddToFormHistoryCB(const nsACString &aRowID,
const nsMorkReader::StringMap *aMap,
AddToFormHistoryCB(const nsCSubstring &aRowID,
const nsTArray<nsCString> *aValues,
void *aData);
};
#endif