1999-03-20 08:19:28 +00:00
|
|
|
#!/usr/bonsaitools/bin/perl -w
|
|
|
|
# -*- Mode: perl; indent-tabs-mode: nil -*-
|
|
|
|
#
|
1999-11-01 23:33:56 +00:00
|
|
|
# The contents of this file are subject to the Mozilla Public
|
|
|
|
# License Version 1.1 (the "License"); you may not use this file
|
|
|
|
# except in compliance with the License. You may obtain a copy of
|
|
|
|
# the License at http://www.mozilla.org/MPL/
|
|
|
|
#
|
|
|
|
# Software distributed under the License is distributed on an "AS
|
|
|
|
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
|
|
# implied. See the License for the specific language governing
|
|
|
|
# rights and limitations under the License.
|
|
|
|
#
|
1999-03-20 08:19:28 +00:00
|
|
|
# The Original Code is NewsBot
|
1999-11-01 23:33:56 +00:00
|
|
|
#
|
1999-03-20 08:19:28 +00:00
|
|
|
# The Initial Developer of the Original Code is Netscape Communications
|
1999-11-01 23:33:56 +00:00
|
|
|
# Corporation. Portions created by Netscape are
|
|
|
|
# Copyright (C) 1998 Netscape Communications Corporation. All
|
|
|
|
# Rights Reserved.
|
|
|
|
#
|
1999-03-20 08:19:28 +00:00
|
|
|
# Contributor(s): Dawn Endico <endico@mozilla.org>
|
|
|
|
|
|
|
|
# Harvest pointers to news articles and their summaries from mailbox file
|
|
|
|
# and write html and rdf files from it.
|
1999-03-22 00:55:53 +00:00
|
|
|
#
|
|
|
|
# usage: newsbot [mailfile1, mailfile2...] [rdffile]
|
|
|
|
# each mail file is standard mbox format such as a sendmail spool file.
|
|
|
|
# rdffile is where to put the generated rdf.
|
|
|
|
# output is written to standard output.
|
1999-03-20 08:19:28 +00:00
|
|
|
|
|
|
|
require 5.00397;
|
|
|
|
use strict;
|
|
|
|
use Mail::Folder::Mbox;
|
|
|
|
use Mail::Address;
|
|
|
|
|
1999-03-22 00:55:53 +00:00
|
|
|
my $rdffile = pop (@ARGV); #name of file to write rdf data
|
1999-03-20 08:19:28 +00:00
|
|
|
|
|
|
|
unless (@ARGV) {
|
|
|
|
# command line argument should be list of mail files to
|
|
|
|
# process. If none given, use this file.
|
|
|
|
my $mailfile = "/var/mail/newsbot";
|
|
|
|
die("No mail\n") if (!-f $mailfile);
|
|
|
|
push(@ARGV, $mailfile);
|
|
|
|
}
|
|
|
|
|
|
|
|
for my $file (@ARGV) {
|
|
|
|
my $folder = new Mail::Folder('AUTODETECT', $file);
|
|
|
|
unless ($folder) {
|
|
|
|
warn("can't open $folder: $!");
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
printheader();
|
|
|
|
|
1999-03-22 00:55:53 +00:00
|
|
|
my %articlehash;
|
1999-03-20 08:19:28 +00:00
|
|
|
my @articles;
|
|
|
|
my $index=0;
|
|
|
|
|
|
|
|
for my $msg (sort { $a <=> $b } $folder->message_list) {
|
|
|
|
my $entity = $folder->get_mime_message($msg);
|
|
|
|
my $submitter = $entity->get('From'); chomp($submitter);
|
1999-04-09 03:35:15 +00:00
|
|
|
$submitter =~ s/&/&/g;
|
1999-03-20 08:19:28 +00:00
|
|
|
$submitter =~ s/</</g;
|
|
|
|
$submitter =~ s/>/>/g;
|
|
|
|
my $submitdate = $entity->get('Date'); chomp($submitdate);
|
|
|
|
|
|
|
|
#
|
|
|
|
# This is important for weeding out junk. Submissions must be multipart
|
|
|
|
# mime messages as created by Messenger when forwarding a news article.
|
|
|
|
# the first part is text/html or text/plain. The second part should be
|
|
|
|
# type message/rfc822. This format preserves the header information of
|
|
|
|
# the original news article (especially the Message-ID). It also makes
|
|
|
|
# it more difficult for random junk mailed to newsbot to litter the
|
|
|
|
# web page. We don't want someone to post an article, cc newsbot and
|
|
|
|
# have a big thread begin where the rest of the messages continue
|
|
|
|
# being cc'd to newsbot.
|
|
|
|
#
|
1999-05-27 23:25:52 +00:00
|
|
|
# 99-05-27: allow message/news in addition to message/rfc822 for the
|
|
|
|
# second part as this is what communicator 3.x uses. -endico
|
1999-03-20 08:19:28 +00:00
|
|
|
if ($entity->parts < 2) {
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
my @parts = $entity->parts;
|
|
|
|
if ( !($parts[0]->head->mime_type =~ /text\/html/) &&
|
|
|
|
!($parts[0]->head->mime_type =~ /text\/plain/) ) {
|
|
|
|
next;
|
|
|
|
}
|
1999-05-27 23:25:52 +00:00
|
|
|
if ( !( ($parts[1]->head->mime_type =~ /message\/rfc822/) ||
|
1999-05-27 23:38:06 +00:00
|
|
|
($parts[1]->head->mime_type =~ /message\/news/) )
|
1999-05-27 23:25:52 +00:00
|
|
|
) {
|
1999-03-20 08:19:28 +00:00
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $IO;
|
|
|
|
my $summary = "";
|
|
|
|
if ($IO = $entity->parts(0)->open("r")) {
|
|
|
|
$summary = $summary . $_ while (defined($_ = $IO->getline));
|
|
|
|
$IO->close;
|
|
|
|
if ( $entity->parts(0)->head->mime_type =~ /text\/plain/ ) {
|
2005-11-25 19:48:04 +00:00
|
|
|
# line beginning with -- is a signature separator. Delete the sig
|
1999-03-30 04:36:38 +00:00
|
|
|
$summary =~ s/^--.*//ms;
|
1999-04-09 03:35:15 +00:00
|
|
|
$summary =~ s/&/&/g;
|
1999-03-30 04:36:38 +00:00
|
|
|
$summary =~ s/</</mg;
|
|
|
|
$summary =~ s/>/>/mg;
|
1999-03-20 08:19:28 +00:00
|
|
|
$summary =~ s/(http:\/\/([\S])+)/<A HREF=\"$1\">$1<\/A>/mg;
|
|
|
|
$summary =~ s/(ftp:\/\/([\S])+)/<A HREF=\"$1\">$1<\/A>/mg;
|
|
|
|
$summary =~ s/<(([\S])+@([\S])+)>/<<A HREF=\"mailto:$1\">$1<\/A>>/mg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
my $news = "";
|
|
|
|
if ($IO = $entity->parts(1)->open("r")) {
|
|
|
|
$news = $news . $_ while (defined($_ = $IO->getline));
|
|
|
|
$IO->close;
|
|
|
|
}
|
|
|
|
# check to make sure this is a news article. If not, skip it.
|
|
|
|
$news =~ /^Newsgroups: ([^\n]+)/m ;
|
|
|
|
my $newsgroups = $1;
|
|
|
|
if (!$newsgroups) {
|
|
|
|
next;
|
|
|
|
}
|
1999-04-25 18:02:22 +00:00
|
|
|
$newsgroups =~ s/(netscape.public.mozilla.([\w-.])+)/\n<A HREF="news:\/\/news.mozilla.org\/$1\">\n $1<\/A>/g;
|
1999-03-20 08:19:28 +00:00
|
|
|
$news =~ /^Message-ID: <([^>]+)/m;
|
|
|
|
my $MID = $1;
|
|
|
|
$news =~ /^From: ([^\n]+)/m;
|
|
|
|
my $from = $1;
|
|
|
|
$news =~ /^Subject: ([^\n]+)/m;
|
|
|
|
my $subject = $1;
|
2000-01-04 00:42:57 +00:00
|
|
|
$subject =~ s/^Re://ig;
|
|
|
|
$subject =~ s/^Fwd://ig;
|
1999-04-09 03:35:15 +00:00
|
|
|
$subject =~ s/&/&/g;
|
1999-03-23 08:31:12 +00:00
|
|
|
$subject =~ s/</</g;
|
|
|
|
$subject =~ s/>/>/g;
|
1999-03-20 08:19:28 +00:00
|
|
|
$news =~ /^Date: ([^\n]+)/m;
|
|
|
|
my $date = $1;
|
|
|
|
|
|
|
|
my %article;
|
|
|
|
if (! %articlehash->{"$MID"}) {
|
|
|
|
%articlehash->{"$MID"}=\%article;
|
|
|
|
$index += 1;
|
|
|
|
$articles[$index]=\%article;
|
|
|
|
}
|
|
|
|
|
|
|
|
%article->{'Message-ID'} = $MID;
|
|
|
|
%article->{'Subject'} = $subject;
|
|
|
|
%article->{'Date'} = $date;
|
|
|
|
%article->{'From'} = $from;
|
|
|
|
%article->{'Newsgroups'} = $newsgroups;
|
|
|
|
%article->{'Summary'} = $summary;
|
|
|
|
%article->{'submitter'} = $submitter;
|
|
|
|
%article->{'submitdate'} = $submitdate;
|
|
|
|
|
|
|
|
$entity->purge;
|
|
|
|
} #for loop
|
|
|
|
$folder->close;
|
|
|
|
|
|
|
|
for (my $i=$index; $i > 0 ; $i--) {
|
|
|
|
printarticle ($articles[$i]);
|
|
|
|
}
|
|
|
|
printfooter();
|
1999-03-22 00:55:53 +00:00
|
|
|
|
|
|
|
if ($rdffile) {
|
|
|
|
printrdf (\@articles, $rdffile);
|
|
|
|
}
|
1999-03-20 08:19:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
1999-03-22 00:55:53 +00:00
|
|
|
sub printrdf() {
|
|
|
|
my ($ref, $rdffile) = @_;
|
|
|
|
my @articles = @{$ref};
|
|
|
|
|
|
|
|
unless (open (RDFFILE,">$rdffile") ){
|
|
|
|
die "Couldn\'t open rdf file:\"$rdffile\"\n";
|
|
|
|
}
|
|
|
|
select RDFFILE;
|
|
|
|
|
|
|
|
my $header =<<'RDFHEAD';
|
1999-03-23 07:41:19 +00:00
|
|
|
<?xml version="1.0"?>
|
1999-03-23 07:54:15 +00:00
|
|
|
<rdf:RDF
|
1999-03-22 00:55:53 +00:00
|
|
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
1999-03-23 07:54:15 +00:00
|
|
|
xmlns="http://my.netscape.com/rdf/simple/0.9/">
|
1999-03-22 00:55:53 +00:00
|
|
|
|
|
|
|
<channel>
|
1999-03-26 21:18:24 +00:00
|
|
|
<title>Mozilla NewsBot</title>
|
1999-03-22 00:55:53 +00:00
|
|
|
<link>http://www.mozilla.org/newsbot/</link>
|
1999-03-26 21:18:24 +00:00
|
|
|
<description>Pointers to the hottest mozilla newsgroup threads.</description>
|
1999-03-22 00:55:53 +00:00
|
|
|
</channel>
|
|
|
|
|
|
|
|
<image>
|
|
|
|
<title>Mozilla</title>
|
1999-03-22 09:46:05 +00:00
|
|
|
<url>http://www.mozilla.org/images/hack.gif</url>
|
1999-03-26 21:18:24 +00:00
|
|
|
<link>http://www.mozilla.org/newsbot/</link>
|
1999-03-22 00:55:53 +00:00
|
|
|
</image>
|
|
|
|
|
|
|
|
RDFHEAD
|
|
|
|
print $header;
|
|
|
|
|
1999-03-22 01:12:00 +00:00
|
|
|
my $index = @articles - 1;
|
1999-03-27 06:18:16 +00:00
|
|
|
# only print newest 15 articles
|
|
|
|
my $min = 0;
|
|
|
|
if ($index > 15) {
|
|
|
|
$min = $index - 15;
|
|
|
|
}
|
|
|
|
for (my $i=$index; $i > $min ; $i--) {
|
1999-03-22 00:55:53 +00:00
|
|
|
print (" <item>\n");
|
|
|
|
print (" <title>" . $articles[$i]->{'Subject'} . "</title>\n");
|
2001-04-09 23:49:51 +00:00
|
|
|
print (" <link>http://http://groups.google.com/groups?q=msgid:" . $articles[$i]->{'Message-ID'} . "&ic=1</link>\n");
|
1999-03-22 00:55:53 +00:00
|
|
|
print (" </item>\n\n");
|
|
|
|
}
|
|
|
|
|
1999-03-23 07:58:12 +00:00
|
|
|
print "</rdf:RDF>\n";
|
1999-03-22 00:55:53 +00:00
|
|
|
} #end printrdf()
|
1999-03-20 08:19:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub printarticle() {
|
|
|
|
|
|
|
|
my ($artref) = @_;
|
|
|
|
my %article = %{$artref};
|
|
|
|
|
1999-10-21 23:52:41 +00:00
|
|
|
print "\n<P>\n";
|
1999-04-04 15:51:14 +00:00
|
|
|
print "<A NAME=\"" . %article->{'Message-ID'} . "\"></A>\n";
|
1999-03-20 08:19:28 +00:00
|
|
|
print "<TABLE border=0 width=100%><TR><TD><B><FONT SIZE=+1>\n";
|
|
|
|
print %article->{'Subject'} ."\n";
|
|
|
|
print "</B></FONT>\n";
|
|
|
|
print "</TD></TR><TR><TD>\n";
|
|
|
|
print %article->{'Summary'};
|
1999-10-22 00:14:43 +00:00
|
|
|
print "</TD></TR><TR><TD ALIGN=\"right\">\n";
|
1999-03-20 08:19:28 +00:00
|
|
|
print "<FONT SIZE=-1>\nPosted: " . %article->{'Date'} ."\n</FONT>";
|
|
|
|
print "<BR>";
|
|
|
|
print %article->{'Newsgroups'} . "\n";
|
1999-10-21 23:52:41 +00:00
|
|
|
print "<BR>\n";
|
2001-04-09 23:49:51 +00:00
|
|
|
print "<A HREF=\"http://groups.google.com/groups?q=msgid:" . %article->{'Message-ID'} . "&ic=1\">\n";
|
|
|
|
print "View Article</A>\n";
|
1999-03-20 08:19:28 +00:00
|
|
|
print "<!--Submitted to NewsBot by: " . %article->{'submitter'} . "-->\n";
|
|
|
|
print "<!--" . %article->{'submitdate'} . "-->\n";
|
|
|
|
print "</TD></TR>\n";
|
|
|
|
print "</TABLE>\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub printheader() {
|
|
|
|
my $header =<<'ENDHEAD';
|
|
|
|
<HTML>
|
|
|
|
<HEAD>
|
|
|
|
<TITLE>newsbot</TITLE>
|
|
|
|
</HEAD>
|
|
|
|
|
|
|
|
<BODY>
|
|
|
|
<H1>newsbot</H1>
|
|
|
|
Since not everyone has a chance to keep up with all the mozilla
|
|
|
|
<A HREF="http://www.mozilla.org/community.html">news groups</A>,
|
|
|
|
newsbot is here to collect pointers to some of the more important
|
|
|
|
announcements, discussions, and goings-<WBR>on.
|
|
|
|
<P>
|
|
|
|
When you see an article of interest to the general mozilla community
|
|
|
|
forward it to <A HREF="mailto:newsbot@mozilla.org">newsbot@mozilla.org</A>
|
|
|
|
and write a summary of the article. Newsbot will add your summary to this
|
1999-03-22 00:55:53 +00:00
|
|
|
page and make pointers back to the original article and its thread in DejaNews.
|
1999-04-09 03:35:15 +00:00
|
|
|
For My Netcape users we also have a
|
|
|
|
<A HREF="http://www.mozilla.org/my-mozilla.html">channel</A>
|
|
|
|
for newsbot.
|
|
|
|
|
|
|
|
|
1999-03-20 08:19:28 +00:00
|
|
|
|
|
|
|
<BLOCKQUOTE><FONT SIZE=-1>
|
|
|
|
For Netscape Communicator users, this means pressing the <I>Forward</I>
|
1999-04-09 03:35:15 +00:00
|
|
|
button and writing a summary in the message window. (Forwarding as "quoted"
|
1999-03-20 08:19:28 +00:00
|
|
|
or "inline" confuses newsbot. Be sure to forward as attachment.) For users
|
|
|
|
of other clients, the forwarded message should be a multipart MIME message
|
|
|
|
where the first part is text/plain or text/html and contains your summary,
|
|
|
|
and the second part is type message/rfc822 and contains the news article.
|
|
|
|
</FONT></BLOCKQUOTE>
|
|
|
|
|
|
|
|
<P>
|
|
|
|
ENDHEAD
|
|
|
|
print $header
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub printfooter() {
|
|
|
|
my $footer =<<'ENDFOOT';
|
|
|
|
<P>
|
|
|
|
<FONT SIZE=-1>
|
|
|
|
Send newsbot feedback
|
|
|
|
to <A HREF="mailto:endico@mozilla.org">Dawn Endico</a>.
|
|
|
|
</FONT>
|
|
|
|
</BODY>
|
|
|
|
</HTML>
|
|
|
|
ENDFOOT
|
|
|
|
print $footer;
|
|
|
|
}
|