From 3c396257de9ee3277d1ffffc13cd3bc44bbd8ec1 Mon Sep 17 00:00:00 2001 From: Christophe Dumez Date: Wed, 3 Mar 2010 21:11:40 +0000 Subject: [PATCH] Use QXmlStreamReader instead of QDomDocument to save memory when parsing the RSS documents --- Changelog | 1 + src/rss.cpp | 125 +++++++++++++++++++++++++++++----------------------- src/rss.h | 61 ++++++++++++++----------- 3 files changed, 106 insertions(+), 81 deletions(-) diff --git a/Changelog b/Changelog index 4a5430bca..2a0a81627 100644 --- a/Changelog +++ b/Changelog @@ -14,6 +14,7 @@ - FEATURE: Allow to change the priority of several files at once - FEATURE: Support for multiple scan folders (Patch by Christian Kandeler) - BUGFIX: Only one log window can be opened at a time + - BUGFIX: Optimized RSS module memory usage - COSMETIC: Improved style management * Mon Jan 18 2010 - Christophe Dumez - v2.1.0 diff --git a/src/rss.cpp b/src/rss.cpp index 2a50098d5..28eb1bd42 100644 --- a/src/rss.cpp +++ b/src/rss.cpp @@ -560,56 +560,80 @@ QString RssStream::getIconUrl() { } // read and create items from a rss document -short RssStream::readDoc(const QDomDocument& doc) { +short RssStream::readDoc(QIODevice* device) { + qDebug("Parsing RSS file..."); + QXmlStreamReader xml(device); // is it a rss file ? - QDomElement root = doc.documentElement(); - if(root.tagName() == QString::fromUtf8("html")){ - qDebug("the file is empty, maybe the url is invalid or the server is too busy"); + if (xml.atEnd()) { + qDebug("ERROR: Could not parse RSS file"); return -1; } - else if(root.tagName() != QString::fromUtf8("rss")){ - qDebug("the file is not a rss stream, omitted: %s", root.tagName().toLocal8Bit().data()); - return -1; - } - QDomNode rss = root.firstChild(); - QDomElement channel = root.firstChild().toElement(); - - while(!channel.isNull()) { - // we are reading the rss'main info - if (channel.tagName() == "channel") { - QDomElement property = channel.firstChild().toElement(); - while(!property.isNull()) { - if (property.tagName() == "title") { - title = property.text(); - if(alias==getUrl()) - rename(title); - } - else if (property.tagName() == "link") - link = property.text(); - else if (property.tagName() == "description") - description = property.text(); - else if (property.tagName() == "image") - image = property.text(); - else if(property.tagName() == "item") { - RssItem * item = new RssItem(this, property); - if(item->isValid()) { - QString title = item->getTitle(); - bool already_exists = itemAlreadyExists(title); - if(!already_exists) { - (*this)[title] = item; - } else { - delete item; - } - } else { - delete item; - } - } - property = property.nextSibling().toElement(); + while (!xml.atEnd()) { + xml.readNext(); + if(xml.isStartElement()) { + if(xml.name() != "rss") { + qDebug("ERROR: this is not a rss file, root tag is <%s>", qPrintable(xml.name().toString())); + return -1; + } else { + break; } } - channel = channel.nextSibling().toElement(); } + // Read channels + while(!xml.atEnd()) { + xml.readNext(); + + if(xml.isEndElement()) + break; + + if(xml.isStartElement()) { + //qDebug("xml.name() == %s", qPrintable(xml.name().toString())); + if(xml.name() == "channel") { + qDebug("in channel"); + + // Parse channel content + while(!xml.atEnd()) { + xml.readNext(); + + if(xml.isEndElement() && xml.name() == "channel") { + break; + } + + if(xml.isStartElement()) { + //qDebug("xml.name() == %s", qPrintable(xml.name().toString())); + if(xml.name() == "title") { + title = xml.readElementText(); + if(alias == getUrl()) + rename(title); + } + else if(xml.name() == "link") { + link = xml.readElementText(); + } + else if(xml.name() == "description") { + description = xml.readElementText(); + } + else if(xml.name() == "image") { + image = xml.attributes().value("url").toString(); + } + else if(xml.name() == "item") { + RssItem * item = new RssItem(this, xml); + if(item->isValid() && !itemAlreadyExists(item->getTitle())) { + this->insert(item->getTitle(), item); + } else { + delete item; + } + } + } + } + return 0; + } + } + } + qDebug("XML Error: This is not a valid RSS document"); + return -1; + resizeList(); + // RSS Feed Downloader foreach(RssItem* item, values()) { if(item->isRead()) continue; @@ -658,7 +682,6 @@ void RssStream::resizeList() { // existing and opening test after download short RssStream::openRss(){ qDebug("openRss() called"); - QDomDocument doc("Rss Seed"); QFile fileRss(filePath); if(!fileRss.open(QIODevice::ReadOnly | QIODevice::Text)) { qDebug("openRss error: open failed, no file or locked, %s", (const char*)filePath.toLocal8Bit()); @@ -667,19 +690,9 @@ short RssStream::openRss(){ } return -1; } - QString error_msg = ""; - int line=-1; - int col = -1; - if(!doc.setContent(&fileRss, &error_msg, &line, &col)) { - qDebug("Error when parsing XML at line %d (col: %d): %s", line, col, error_msg.toLocal8Bit().data()); - fileRss.close(); - if(QFile::exists(filePath)) { - fileRss.remove(); - } - return -1; - } + // start reading the xml - short return_lecture = readDoc(doc); + short return_lecture = readDoc(&fileRss); fileRss.close(); if(QFile::exists(filePath)) { fileRss.remove(); diff --git a/src/rss.h b/src/rss.h index 494c525d1..ea709873e 100644 --- a/src/rss.h +++ b/src/rss.h @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -267,36 +267,47 @@ protected: public: // public constructor - RssItem(RssStream* parent, const QDomElement& properties): parent(parent), read(false) { + RssItem(RssStream* parent, QXmlStreamReader& xml): parent(parent), read(false) { is_valid = false; torrent_url = QString::null; news_link = QString::null; title = QString::null; - QDomElement property = properties.firstChild().toElement(); - while(!property.isNull()) { - if (property.tagName() == "title") { - title = property.text(); - if(title.isEmpty()) { - is_valid = false; - return; + while(!xml.atEnd()) { + xml.readNext(); + + if(xml.isEndElement() && xml.name() == "item") + break; + qDebug("in item: <%s>", qPrintable(xml.name().toString())); + + if(xml.isStartElement()) { + if(xml.name() == "title") { + title = xml.readElementText(); + if(title.isEmpty()) { + is_valid = false; + return; + } + } + else if(xml.name() == "enclosure") { + if(xml.attributes().value("type") == "application/x-bittorrent") { + torrent_url = xml.attributes().value("url").toString(); + } + } + else if(xml.name() == "link") { + news_link = xml.readElementText(); + } + else if(xml.name() == "description") { + description = xml.readElementText(); + } + else if(xml.name() == "pubDate") { + date = parseDate(xml.readElementText()); + } + else if(xml.name() == "author") { + author = xml.readElementText(); } } - else if (property.tagName() == "enclosure") { - if(property.attribute("type", "") == "application/x-bittorrent") { - torrent_url = property.attribute("url", QString::null); - } - } - else if (property.tagName() == "link") - news_link = property.text(); - else if (property.tagName() == "description") - description = property.text(); - else if (property.tagName() == "pubDate") - date = parseDate(property.text()); - else if (property.tagName() == "author") - author = property.text(); - property = property.nextSibling().toElement(); } - is_valid = true; + if(!title.isEmpty()) + is_valid = true; } RssItem(RssStream* parent, QString _title, QString _torrent_url, QString _news_link, QString _description, QDateTime _date, QString _author, bool _read): @@ -435,7 +446,7 @@ public: QString getIconUrl(); private: - short readDoc(const QDomDocument& doc); + short readDoc(QIODevice* device); void resizeList(); short openRss(); };