From 2082fa2b00c3611649f160e27754064f0459f025 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 10 Oct 2019 16:18:07 +0200 Subject: [PATCH] Add twitter service with parsing --- gettor.conf.json.example | 5 +- gettor/main.py | 10 ++ gettor/parse/twitter.py | 148 ++++++++++++++++++++++++ gettor/services/twitter/twitterdm.py | 164 ++++++++++++++++++++++++--- gettor/utils/twitter.py | 61 ++++++++++ requirements.txt | 2 +- share/locale/es.json | 2 +- tests/conftests.py | 3 + tests/test_twitter.py | 36 ++++++ 9 files changed, 413 insertions(+), 18 deletions(-) create mode 100644 gettor/parse/twitter.py create mode 100644 gettor/utils/twitter.py create mode 100644 tests/test_twitter.py diff --git a/gettor.conf.json.example b/gettor.conf.json.example index 5c06e37..76450ce 100644 --- a/gettor.conf.json.example +++ b/gettor.conf.json.example @@ -3,6 +3,7 @@ "dbname": "/srv/gettor.torproject.org/home/gettor/gettor.db", "email_parser_logfile": "/srv/gettor.torproject.org/home/gettor/log/email_parser.log", "email_requests_limit": 30, + "twitter_requests_limit": 1, "sendmail_interval": 10, "sendmail_addr": "gettor@torproject.org", "sendmail_host": "localhost", @@ -11,5 +12,7 @@ "consumer_secret": "", "access_key": "", "access_secret": "", - "twitter_handle": "get_tor" + "twitter_handle": "get_tor", + "twitter_messages_endpoint": "https://api.twitter.com/1.1/direct_messages/events/list.json", + "twitter_new_message_endpoint": "https://api.twitter.com/1.1/direct_messages/events/new.json" } diff --git a/gettor/main.py b/gettor/main.py index 98a2de3..f914deb 100644 --- a/gettor/main.py +++ b/gettor/main.py @@ -19,6 +19,7 @@ from .utils import options from .services import BaseService from .services.email.sendmail import Sendmail +from .services.twitter.twitterdm import Twitterdm def run(gettor, app): """ @@ -36,3 +37,12 @@ def run(gettor, app): gettor.addService(sendmail_service) gettor.setServiceParent(app) + + + twitter_service = BaseService( + "twitterdm", twitterdm.get_interval(), twitterdm + ) + + gettor.addService(twitter_service) + + gettor.setServiceParent(app) diff --git a/gettor/parse/twitter.py b/gettor/parse/twitter.py new file mode 100644 index 0000000..ea7ad81 --- /dev/null +++ b/gettor/parse/twitter.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# +# This file is part of GetTor, a Tor Browser distribution system. +# +# :authors: isra +# see also AUTHORS file +# +# :copyright: (c) 2008-2014, The Tor Project, Inc. +# (c) 2019, Hiro +# +# :license: This is Free Software. See LICENSE for license information. + +from __future__ import absolute_import + +import re +import dkim +import hashlib + +from datetime import datetime +import configparser + +from twisted.python import log +from twisted.internet import defer +from twisted.enterprise import adbapi + +from ..utils.db import SQLite3 +from ..utils import strings + + +class TwitterParser(object): + """Class for parsing twitter message requests.""" + + def __init__(self, settings, twitter_id=None): + """ + Constructor. + """ + self.settings = settings + self.twitter_id = twitter_id + + + def build_request(self, msg_text, twitter_id, languages, platforms): + + request = { + "id": twitter_id, + "command": None, + "platform": None, + "language": "en", + "service": "twitter" + } + + if msg_text: + for word in re.split(r"\s+", msg_text.strip()): + if word.lower() in languages: + request["language"] = word.lower() + if word.lower() in platforms: + request["command"] = "links" + request["platform"] = word.lower() + if word.lower() == "help": + request["command"] = "help" + break + + return request + + + def parse(self, msg, twitter_id): + """ + Parse message content. Prevent service flooding. Finally, look for + commands to process the request. Current commands are: + + - links: request links for download. + - help: help request. + + :param msg_str (str): incomming message as string. + + :return dict with email address and command (`links` or `help`). + """ + + log.msg("Building twitter message from string.", system="twitter parser") + + platforms = self.settings.get("platforms") + languages = [*strings.get_locales().keys()] + + hid = hashlib.sha256(twitter_id.encode('utf-8')) + log.msg( + "Request from {}".format(hid.hexdigest()), system="twitter parser" + ) + + request = self.build_request(msg, twitter_id, languages, platforms) + + return request + + + @defer.inlineCallbacks + def parse_callback(self, request): + """ + Callback invoked when the message has been parsed. It stores the + obtained information in the database for further processing by the + Sendmail service. + + :param (dict) request: the built request based on message's content. + It contains the `email_addr` and command `fields`. + + :return: deferred whose callback/errback will log database query + execution details. + """ + twitter_requests_limit = self.settings.get("twitter_requests_limit") + log.msg( + "Found request for {}.".format(request['command']), + system="twitter parser" + ) + + if request["command"]: + now_str = datetime.now().strftime("%Y%m%d%H%M%S") + dbname = self.settings.get("dbname") + conn = SQLite3(dbname) + + hid = hashlib.sha256(request['id'].encode('utf-8')) + # check limits first + num_requests = yield conn.get_num_requests( + id=hid.hexdigest(), service=request['service'] + ) + + if num_requests[0][0] > twitter_requests_limit: + log.msg( + "Discarded. Too many requests from {}.".format( + hid.hexdigest + ), system="twitter parser" + ) + + else: + conn.new_request( + id=request['id'], + command=request['command'], + platform=request['platform'], + language=request['language'], + service=request['service'], + date=now_str, + status="ONHOLD", + ) + + def parse_errback(self, error): + """ + Errback if we don't/can't parse the message's content. + """ + log.msg( + "Error while parsing twitter message content: {}.".format(error), + system="twitter parser" + ) diff --git a/gettor/services/twitter/twitterdm.py b/gettor/services/twitter/twitterdm.py index 7f8543e..b346946 100644 --- a/gettor/services/twitter/twitterdm.py +++ b/gettor/services/twitter/twitterdm.py @@ -15,10 +15,13 @@ from __future__ import absolute_import import gettext import hashlib + import configparser from twisted.internet import defer +from ...parse.twitter import TwitterParser +from ...utils.twitter import Twitter from ...utils.db import SQLite3 as DB from ...utils.commons import log from ...utils import strings @@ -31,17 +34,14 @@ class Twitterdm(object): """ Constructor. It opens and stores a connection to the database. :dbname: reads from configs + """ self.settings = settings dbname = self.settings.get("dbname") - consumer_key = self.settings.get("consumer_key") - consumer_secret = self.settings.get("consumer_secret") - access_key = self.settings.get("access_key") - access_secret = self.settings.get("access_secret") - twitter_handle = self.settings.get("twitter_handle") - + self.twitter = Twitter(settings) self.conn = DB(dbname) + def get_interval(self): """ Get time interval for service periodicity. @@ -59,6 +59,7 @@ class Twitterdm(object): """ log.info("Message sent successfully.") + def twitter_errback(self, error): """ Errback if we don't/can't send the message. @@ -67,25 +68,158 @@ class Twitterdm(object): raise Error("{}".format(error)) - def twitter_msg_list(self): - - - - def twitterdm(self): + def twitterdm(self, twitter_id, message): """ Send a twitter message for each message received. It creates a plain text message, and sends it via twitter APIs - :param twitter_handle (str): email address of the recipient. - :param text (str): subject of the message. + :param twitter_id (str): twitter_id of the recipient. + :param message (str): text of the message. :return: deferred whose callback/errback will handle the API execution details. """ + return self.twitter.post_message( + twitter_id, message + ).addCallback(self.twitterdm_callback).addErrback(self.twitterdm_errback) + + @defer.inlineCallbacks + def get_new(self): + """ + Get new requests to process. This will define the `main loop` of + the Twitter service. + """ + log.debug("Retrieve list of messages") + data = self.twitter.twitter_data() - log.debug("Creating message") + for e in data['events']: + message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] } - log.debug("Calling twitter APIs.") + log.debug("Parsing message") + tp = TwitterParser(settings, message_id) + yield defer.maybeDeferred( + tp.parse, e['message_create']['message_data']['text'], message_id + ).addCallback(tp.parse_callback).addErrback(tp.parse_errback) + + # Manage help and links messages separately + help_requests = yield self.conn.get_requests( + status="ONHOLD", command="help", service="twitter" + ) + + link_requests = yield self.conn.get_requests( + status="ONHOLD", command="links", service="twtter" + ) + + if help_requests: + strings.load_strings("en") + try: + log.info("Got new help request.") + + for request in help_requests: + ids = json.load(request[0]) + message_id = ids['id'] + twitter_id = ids['twitter_handle'] + date = request[5] + + hid = hashlib.sha256(twitter_id.encode('utf-8')) + log.info( + "Sending help message to {}.".format( + hid.hexdigest() + ) + ) + + yield self.twitterdm( + twitter_id=twitter_id, + body=strings._("help_body") + ) + + yield self.conn.update_stats( + command="help", platform='', language='en', + service="twitter" + ) + + yield self.conn.update_request( + id=request[0], hid=hid.hexdigest(), status="SENT", + service="twitter", date=date + ) + + except Error as e: + log.info("Error sending twitter message: {}.".format(e)) + + elif link_requests: + try: + log.info("Got new links request.") + + for request in link_requests: + ids = json.load(request[0]) + message_id = ids['id'] + twitter_id = ids['twitter_handle'] + date = request[5] + platform = request[2] + language = request[3] + + if not language: + language = 'en' + + locales = strings.get_locales() + + strings.load_strings(language) + locale = locales[language]['locale'] + + log.info("Getting links for {}.".format(platform)) + links = yield self.conn.get_links( + platform=platform, language=locale, status="ACTIVE" + ) + + # build message + link_msg = None + for link in links: + provider = link[5] + version = link[4] + arch = link[3] + url = link[0] + file = link[7] + sig_url = url + ".asc" + + link_str = "Tor Browser {} for {}-{}-{} ({}): {}\n".format( + version, platform, locale, arch, provider, url + ) + + link_str += "Signature file: {}\n".format(sig_url) + + if link_msg: + link_msg = "{}\n{}".format(link_msg, link_str) + else: + link_msg = link_str + + body_msg = strings._("links_body").format(platform, link_msg, file) + + hid = hashlib.sha256(twitter_id.encode('utf-8')) + log.info( + "Sending links to {}.".format( + hid.hexdigest() + ) + ) + + yield self.twitterdm( + email_addr=twitter_id, + body=body_msg + ) + + yield self.conn.update_stats( + command="links", platform=platform, language=locale, + service="twitter" + ) + + yield self.conn.update_request( + id=request[0], hid=hid.hexdigest(), status="SENT", + service="twitter", date=date + ) + + except Error as e: + log.info("Error sending message: {}.".format(e)) + else: + log.debug("No pending twitter requests. Keep waiting.") diff --git a/gettor/utils/twitter.py b/gettor/utils/twitter.py new file mode 100644 index 0000000..2e3968b --- /dev/null +++ b/gettor/utils/twitter.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# +# This file is part of GetTor, a Tor Browser distribution system. +# +# :authors: isra +# see also AUTHORS file +# +# :copyright: (c) 2008-2014, The Tor Project, Inc. +# (c) 2019, Hiro +# +# :license: This is Free Software. See LICENSE for license information. + +from requests_oauthlib import OAuth1Session +import json + +class Twitter(object): + """ + Class for sending twitter commands via the API. + """ + def __init__(self, settings): + """ + Constructor. + + """ + self.settings = settings + + consumer_key = self.settings.get("consumer_key") + consumer_secret = self.settings.get("consumer_secret") + access_key = self.settings.get("access_key") + access_secret = self.settings.get("access_secret") + twitter_handle = self.settings.get("twitter_handle") + + self.twitter_messages_endpoint = self.settings.get("twitter_messages_endpoint") + self.twitter_new_message_endpoint = self.settings.get("twitter_new_message_endpoint") + self.twitter_client = self.twitter_oauth(consumer_key, consumer_secret, access_key, access_secret) + + def twitter_oauth(self, consumer_key, consumer_secret, access_key, access_secret): + tw_client = OAuth1Session(client_key=consumer_key, + client_secret=consumer_secret, + resource_owner_key=access_key, + resource_owner_secret=access_secret) + return tw_client + + + def twitter_data(self): + data = self.twitter_client.get(self.twitter_messages_endpoint) + return data.json() + + + def post_message(self, twitter_id, text): + message = { + "event": { + "type": "message_create", + "message_create": { + "target": {"recipient_id": twitter_id }, + "message_data": {"text": text } + } + } + } + + data = self.twitter_client.post(self.twitter_new_message_endpoint, message) diff --git a/requirements.txt b/requirements.txt index a6150bc..fc786a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ service_identity==18.1.0 pydkim==0.3 pyopenssl==19.0.0 dnspython==1.16.0 -validate_email==1.3 twisted==19.2.1 +requests_oauthlib=1.0.0 diff --git a/share/locale/es.json b/share/locale/es.json index 8ed6722..4d18f61 100644 --- a/share/locale/es.json +++ b/share/locale/es.json @@ -7,7 +7,7 @@ "help_config": "Custom config file location (optional)", "smtp_links_subject": "[GetTor] Links for your request", "smtp_mirrors_subject": "[GetTor] Mirrors", - "smtp_help_subject": "[GetTor] Help", + "smtp_help_subject": "[GetTor] Ayuda", "smtp_unsupported_locale_subject": "[GetTor] Unsupported locale", "smtp_unsupported_locale_msg": "The locale you requested '{}' is not supported.", "smtp_vlinks_msg": "You requested Tor Browser for {}.\n\nYou will need only one of the links below to download the bundle. If a link does not work for you, try the next one.\n\n{}\n\nShould you have issues with any of the links above you can access the following Google Drive folder: https://drive.google.com/open?id=13CADQTsCwrGsIID09YQbNz2DfRMUoxUU\n\n Download the file: {}\n\n \n--\nGetTor", diff --git a/tests/conftests.py b/tests/conftests.py index 1f73f21..f5194a5 100644 --- a/tests/conftests.py +++ b/tests/conftests.py @@ -4,8 +4,11 @@ from __future__ import unicode_literals from gettor.utils import options from gettor.utils import strings +from gettor.utils import twitter from gettor.services.email import sendmail +from gettor.services.twitter import twitterdm from gettor.parse.email import EmailParser, AddressError, DKIMError +from gettor.parse.twitter import TwitterParser from email import message_from_string from email.utils import parseaddr diff --git a/tests/test_twitter.py b/tests/test_twitter.py new file mode 100644 index 0000000..fe155cc --- /dev/null +++ b/tests/test_twitter.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +import pytest +from twisted.trial import unittest +from twisted.internet import defer, reactor +from twisted.internet import task + +from . import conftests + +class TwitterTests(unittest.TestCase): + # Fail any tests which take longer than 15 seconds. + timeout = 15 + def setUp(self): + self.settings = conftests.options.parse_settings() + self.tw_client = conftests.twitter.Twitter(self.settings) + + + def tearDown(self): + print("tearDown()") + + + def test_load_messages(self): + data = self.tw_client.twitter_data() + assert data['events'] + + + def test_parse_tweet(self): + e = {'type': 'message_create', 'id': '1178649287208689669', 'created_timestamp': '1569846862972', 'message_create': {'target': {'recipient_id': '2514714800'}, 'sender_id': '1467062174', 'message_data': {'text': 'windows 10', 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [], 'urls': []}}}} + message_id = { 'id': e['id'], 'twitter_handle': e['message_create']['sender_id'] } + message = e['message_create']['message_data']['text'] + tp = conftests.TwitterParser(self.settings, message_id) + r = tp.parse(message, str(message_id)) + self.assertEqual(r, {'command': 'links', 'id': "{'id': '1178649287208689669', 'twitter_handle': '1467062174'}", 'language': 'en', 'platform': 'windows','service': 'twitter'}) + + +if __name__ == "__main__": + unittest.main()