Add twitter service with parsing

This commit is contained in:
hiro 2019-10-10 16:18:07 +02:00
parent cdfb42de25
commit 2082fa2b00
9 changed files with 413 additions and 18 deletions

View File

@ -3,6 +3,7 @@
"dbname": "/srv/gettor.torproject.org/home/gettor/gettor.db",
"email_parser_logfile": "/srv/gettor.torproject.org/home/gettor/log/email_parser.log",
"email_requests_limit": 30,
"twitter_requests_limit": 1,
"sendmail_interval": 10,
"sendmail_addr": "gettor@torproject.org",
"sendmail_host": "localhost",
@ -11,5 +12,7 @@
"consumer_secret": "",
"access_key": "",
"access_secret": "",
"twitter_handle": "get_tor"
"twitter_handle": "get_tor",
"twitter_messages_endpoint": "https://api.twitter.com/1.1/direct_messages/events/list.json",
"twitter_new_message_endpoint": "https://api.twitter.com/1.1/direct_messages/events/new.json"
}

View File

@ -19,6 +19,7 @@ from .utils import options
from .services import BaseService
from .services.email.sendmail import Sendmail
from .services.twitter.twitterdm import Twitterdm
def run(gettor, app):
"""
@ -36,3 +37,12 @@ def run(gettor, app):
gettor.addService(sendmail_service)
gettor.setServiceParent(app)
twitter_service = BaseService(
"twitterdm", twitterdm.get_interval(), twitterdm
)
gettor.addService(twitter_service)
gettor.setServiceParent(app)

148
gettor/parse/twitter.py Normal file
View File

@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <hiro@torproject.org>
# see also AUTHORS file
#
# :copyright: (c) 2008-2014, The Tor Project, Inc.
# (c) 2019, Hiro
#
# :license: This is Free Software. See LICENSE for license information.
from __future__ import absolute_import
import re
import dkim
import hashlib
from datetime import datetime
import configparser
from twisted.python import log
from twisted.internet import defer
from twisted.enterprise import adbapi
from ..utils.db import SQLite3
from ..utils import strings
class TwitterParser(object):
"""Class for parsing twitter message requests."""
def __init__(self, settings, twitter_id=None):
"""
Constructor.
"""
self.settings = settings
self.twitter_id = twitter_id
def build_request(self, msg_text, twitter_id, languages, platforms):
request = {
"id": twitter_id,
"command": None,
"platform": None,
"language": "en",
"service": "twitter"
}
if msg_text:
for word in re.split(r"\s+", msg_text.strip()):
if word.lower() in languages:
request["language"] = word.lower()
if word.lower() in platforms:
request["command"] = "links"
request["platform"] = word.lower()
if word.lower() == "help":
request["command"] = "help"
break
return request
def parse(self, msg, twitter_id):
"""
Parse message content. Prevent service flooding. Finally, look for
commands to process the request. Current commands are:
- links: request links for download.
- help: help request.
:param msg_str (str): incomming message as string.
:return dict with email address and command (`links` or `help`).
"""
log.msg("Building twitter message from string.", system="twitter parser")
platforms = self.settings.get("platforms")
languages = [*strings.get_locales().keys()]
hid = hashlib.sha256(twitter_id.encode('utf-8'))
log.msg(
"Request from {}".format(hid.hexdigest()), system="twitter parser"
)
request = self.build_request(msg, twitter_id, languages, platforms)
return request
@defer.inlineCallbacks
def parse_callback(self, request):
"""
Callback invoked when the message has been parsed. It stores the
obtained information in the database for further processing by the
Sendmail service.
:param (dict) request: the built request based on message's content.
It contains the `email_addr` and command `fields`.
:return: deferred whose callback/errback will log database query
execution details.
"""
twitter_requests_limit = self.settings.get("twitter_requests_limit")
log.msg(
"Found request for {}.".format(request['command']),
system="twitter parser"
)
if request["command"]:
now_str = datetime.now().strftime("%Y%m%d%H%M%S")
dbname = self.settings.get("dbname")
conn = SQLite3(dbname)
hid = hashlib.sha256(request['id'].encode('utf-8'))
# check limits first
num_requests = yield conn.get_num_requests(
id=hid.hexdigest(), service=request['service']
)
if num_requests[0][0] > twitter_requests_limit:
log.msg(
"Discarded. Too many requests from {}.".format(
hid.hexdigest
), system="twitter parser"
)
else:
conn.new_request(
id=request['id'],
command=request['command'],
platform=request['platform'],
language=request['language'],
service=request['service'],
date=now_str,
status="ONHOLD",
)
def parse_errback(self, error):
"""
Errback if we don't/can't parse the message's content.
"""
log.msg(
"Error while parsing twitter message content: {}.".format(error),
system="twitter parser"
)

View File

@ -15,10 +15,13 @@ from __future__ import absolute_import
import gettext
import hashlib
import configparser
from twisted.internet import defer
from ...parse.twitter import TwitterParser
from ...utils.twitter import Twitter
from ...utils.db import SQLite3 as DB
from ...utils.commons import log
from ...utils import strings
@ -31,17 +34,14 @@ class Twitterdm(object):
"""
Constructor. It opens and stores a connection to the database.
:dbname: reads from configs
"""
self.settings = settings
dbname = self.settings.get("dbname")
consumer_key = self.settings.get("consumer_key")
consumer_secret = self.settings.get("consumer_secret")
access_key = self.settings.get("access_key")
access_secret = self.settings.get("access_secret")
twitter_handle = self.settings.get("twitter_handle")
self.twitter = Twitter(settings)
self.conn = DB(dbname)
def get_interval(self):
"""
Get time interval for service periodicity.
@ -59,6 +59,7 @@ class Twitterdm(object):
"""
log.info("Message sent successfully.")
def twitter_errback(self, error):
"""
Errback if we don't/can't send the message.
@ -67,25 +68,158 @@ class Twitterdm(object):
raise Error("{}".format(error))
def twitter_msg_list(self):
def twitterdm(self):
def twitterdm(self, twitter_id, message):
"""
Send a twitter message for each message received. It creates a plain
text message, and sends it via twitter APIs
:param twitter_handle (str): email address of the recipient.
:param text (str): subject of the message.
:param twitter_id (str): twitter_id of the recipient.
:param message (str): text of the message.
:return: deferred whose callback/errback will handle the API execution
details.
"""
return self.twitter.post_message(
twitter_id, message
).addCallback(self.twitterdm_callback).addErrback(self.twitterdm_errback)
@defer.inlineCallbacks
def get_new(self):
"""
Get new requests to process. This will define the `main loop` of
the Twitter service.
"""
log.debug("Retrieve list of messages")
data = self.twitter.twitter_data()
log.debug("Creating message")
for e in data['events']:
message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] }
log.debug("Calling twitter APIs.")
log.debug("Parsing message")
tp = TwitterParser(settings, message_id)
yield defer.maybeDeferred(
tp.parse, e['message_create']['message_data']['text'], message_id
).addCallback(tp.parse_callback).addErrback(tp.parse_errback)
# Manage help and links messages separately
help_requests = yield self.conn.get_requests(
status="ONHOLD", command="help", service="twitter"
)
link_requests = yield self.conn.get_requests(
status="ONHOLD", command="links", service="twtter"
)
if help_requests:
strings.load_strings("en")
try:
log.info("Got new help request.")
for request in help_requests:
ids = json.load(request[0])
message_id = ids['id']
twitter_id = ids['twitter_handle']
date = request[5]
hid = hashlib.sha256(twitter_id.encode('utf-8'))
log.info(
"Sending help message to {}.".format(
hid.hexdigest()
)
)
yield self.twitterdm(
twitter_id=twitter_id,
body=strings._("help_body")
)
yield self.conn.update_stats(
command="help", platform='', language='en',
service="twitter"
)
yield self.conn.update_request(
id=request[0], hid=hid.hexdigest(), status="SENT",
service="twitter", date=date
)
except Error as e:
log.info("Error sending twitter message: {}.".format(e))
elif link_requests:
try:
log.info("Got new links request.")
for request in link_requests:
ids = json.load(request[0])
message_id = ids['id']
twitter_id = ids['twitter_handle']
date = request[5]
platform = request[2]
language = request[3]
if not language:
language = 'en'
locales = strings.get_locales()
strings.load_strings(language)
locale = locales[language]['locale']
log.info("Getting links for {}.".format(platform))
links = yield self.conn.get_links(
platform=platform, language=locale, status="ACTIVE"
)
# build message
link_msg = None
for link in links:
provider = link[5]
version = link[4]
arch = link[3]
url = link[0]
file = link[7]
sig_url = url + ".asc"
link_str = "Tor Browser {} for {}-{}-{} ({}): {}\n".format(
version, platform, locale, arch, provider, url
)
link_str += "Signature file: {}\n".format(sig_url)
if link_msg:
link_msg = "{}\n{}".format(link_msg, link_str)
else:
link_msg = link_str
body_msg = strings._("links_body").format(platform, link_msg, file)
hid = hashlib.sha256(twitter_id.encode('utf-8'))
log.info(
"Sending links to {}.".format(
hid.hexdigest()
)
)
yield self.twitterdm(
email_addr=twitter_id,
body=body_msg
)
yield self.conn.update_stats(
command="links", platform=platform, language=locale,
service="twitter"
)
yield self.conn.update_request(
id=request[0], hid=hid.hexdigest(), status="SENT",
service="twitter", date=date
)
except Error as e:
log.info("Error sending message: {}.".format(e))
else:
log.debug("No pending twitter requests. Keep waiting.")

61
gettor/utils/twitter.py Normal file
View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
#
# This file is part of GetTor, a Tor Browser distribution system.
#
# :authors: isra <hiro@torproject.org>
# see also AUTHORS file
#
# :copyright: (c) 2008-2014, The Tor Project, Inc.
# (c) 2019, Hiro
#
# :license: This is Free Software. See LICENSE for license information.
from requests_oauthlib import OAuth1Session
import json
class Twitter(object):
"""
Class for sending twitter commands via the API.
"""
def __init__(self, settings):
"""
Constructor.
"""
self.settings = settings
consumer_key = self.settings.get("consumer_key")
consumer_secret = self.settings.get("consumer_secret")
access_key = self.settings.get("access_key")
access_secret = self.settings.get("access_secret")
twitter_handle = self.settings.get("twitter_handle")
self.twitter_messages_endpoint = self.settings.get("twitter_messages_endpoint")
self.twitter_new_message_endpoint = self.settings.get("twitter_new_message_endpoint")
self.twitter_client = self.twitter_oauth(consumer_key, consumer_secret, access_key, access_secret)
def twitter_oauth(self, consumer_key, consumer_secret, access_key, access_secret):
tw_client = OAuth1Session(client_key=consumer_key,
client_secret=consumer_secret,
resource_owner_key=access_key,
resource_owner_secret=access_secret)
return tw_client
def twitter_data(self):
data = self.twitter_client.get(self.twitter_messages_endpoint)
return data.json()
def post_message(self, twitter_id, text):
message = {
"event": {
"type": "message_create",
"message_create": {
"target": {"recipient_id": twitter_id },
"message_data": {"text": text }
}
}
}
data = self.twitter_client.post(self.twitter_new_message_endpoint, message)

View File

@ -2,5 +2,5 @@ service_identity==18.1.0
pydkim==0.3
pyopenssl==19.0.0
dnspython==1.16.0
validate_email==1.3
twisted==19.2.1
requests_oauthlib=1.0.0

View File

@ -7,7 +7,7 @@
"help_config": "Custom config file location (optional)",
"smtp_links_subject": "[GetTor] Links for your request",
"smtp_mirrors_subject": "[GetTor] Mirrors",
"smtp_help_subject": "[GetTor] Help",
"smtp_help_subject": "[GetTor] Ayuda",
"smtp_unsupported_locale_subject": "[GetTor] Unsupported locale",
"smtp_unsupported_locale_msg": "The locale you requested '{}' is not supported.",
"smtp_vlinks_msg": "You requested Tor Browser for {}.\n\nYou will need only one of the links below to download the bundle. If a link does not work for you, try the next one.\n\n{}\n\nShould you have issues with any of the links above you can access the following Google Drive folder: https://drive.google.com/open?id=13CADQTsCwrGsIID09YQbNz2DfRMUoxUU\n\n Download the file: {}\n\n \n--\nGetTor",

View File

@ -4,8 +4,11 @@ from __future__ import unicode_literals
from gettor.utils import options
from gettor.utils import strings
from gettor.utils import twitter
from gettor.services.email import sendmail
from gettor.services.twitter import twitterdm
from gettor.parse.email import EmailParser, AddressError, DKIMError
from gettor.parse.twitter import TwitterParser
from email import message_from_string
from email.utils import parseaddr

36
tests/test_twitter.py Normal file
View File

@ -0,0 +1,36 @@
#!/usr/bin/env python3
import pytest
from twisted.trial import unittest
from twisted.internet import defer, reactor
from twisted.internet import task
from . import conftests
class TwitterTests(unittest.TestCase):
# Fail any tests which take longer than 15 seconds.
timeout = 15
def setUp(self):
self.settings = conftests.options.parse_settings()
self.tw_client = conftests.twitter.Twitter(self.settings)
def tearDown(self):
print("tearDown()")
def test_load_messages(self):
data = self.tw_client.twitter_data()
assert data['events']
def test_parse_tweet(self):
e = {'type': 'message_create', 'id': '1178649287208689669', 'created_timestamp': '1569846862972', 'message_create': {'target': {'recipient_id': '2514714800'}, 'sender_id': '1467062174', 'message_data': {'text': 'windows 10', 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}}}}
message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] }
message = e['message_create']['message_data']['text']
tp = conftests.TwitterParser(self.settings, message_id)
r = tp.parse(message, str(message_id))
self.assertEqual(r, {'command': 'links', 'id': "{'id': '1178649287208689669', 'twitter_handle': '1467062174'}", 'language': 'en', 'platform': 'windows','service': 'twitter'})
if __name__ == "__main__":
unittest.main()