Major refactor

This commit is contained in:
Sijmen Schoon 2016-09-26 03:52:59 +02:00
parent 9052cac2e8
commit e883055e61

View File

@ -1,53 +1,77 @@
import json
import requests
import threading
import aiotg
"""
Telematrix
App service for Matrix to bridge a room with a Telegram group.
"""
import asyncio
import html
import json
import logging
import mimetypes
import time
import html
import datetime
from datetime import datetime
from time import time
from urllib.parse import unquote, quote, urlparse, parse_qs
from aiohttp import web, ClientSession, MultipartWriter
from pprint import pprint
from aiohttp import web, ClientSession
from aiotg import Bot
from bs4 import BeautifulSoup
# Read the configuration file
try:
with open('config.json', 'r') as f:
config = json.load(f)
with open('config.json', 'r') as config_file:
CONFIG = json.load(config_file)
HS_TOKEN = config['tokens']['hs']
AS_TOKEN = config['tokens']['as']
TG_TOKEN = config['tokens']['telegram']
GOOGLE_TOKEN = config['tokens']['google']
HS_TOKEN = CONFIG['tokens']['hs']
AS_TOKEN = CONFIG['tokens']['as']
TG_TOKEN = CONFIG['tokens']['telegram']
GOOGLE_TOKEN = CONFIG['tokens']['google']
MATRIX_HOST = config['hosts']['internal']
MATRIX_HOST_EXT = config['hosts']['external']
MATRIX_HOST = CONFIG['hosts']['internal']
MATRIX_HOST_EXT = CONFIG['hosts']['external']
MATRIX_PREFIX = MATRIX_HOST + '_matrix/client/r0/'
MATRIX_MEDIA_PREFIX = MATRIX_HOST + '_matrix/media/r0/'
USER_ID_FORMAT = config['user_id_format']
USER_ID_FORMAT = CONFIG['user_id_format']
telegram_chats = config['chats']
matrix_rooms = {v: k for k, v in telegram_chats.items()}
except (OSError, IOError) as e:
TELEGRAM_CHATS = CONFIG['chats']
MATRIX_ROOMS = {v: k for k, v in TELEGRAM_CHATS.items()}
except (OSError, IOError) as exception:
print('Error opening config file:')
print(e)
print(exception)
exit(1)
bot = aiotg.Bot(api_token=TG_TOKEN)
client_session = ClientSession()
GOO_GL_URL = 'https://www.googleapis.com/urlshortener/v1/url'
TG_BOT = Bot(api_token=TG_TOKEN)
MATRIX_SESS = ClientSession()
SHORTEN_SESS = ClientSession()
def create_response(code, obj):
return web.Response(text=json.dumps(obj), status=code, content_type='application/json', charset='utf-8')
"""
Create an HTTP response with a JSON body.
:param code: The status code of the response.
:param obj: The object to serialize and include in the response.
:return: A web.Response.
"""
return web.Response(text=json.dumps(obj), status=code,
content_type='application/json', charset='utf-8')
VALID_TAGS = ['b', 'strong', 'i', 'em', 'a', 'pre']
def sanitize_html(h):
h = h.replace('\n', '')
h = h.replace('<br>', '\n').replace('<br/>', '\n').replace('<br />', '\n')
soup = BeautifulSoup(h, 'html.parser')
def sanitize_html(string):
"""
Sanitize an HTML string for the Telegram bot API.
:param string: The HTML string to sanitized.
:return: The sanitized HTML string.
"""
string = string.replace('\n', '')
string = string.replace('<br>', '\n').replace('<br/>', '\n') \
.replace('<br />', '\n')
soup = BeautifulSoup(string, 'html.parser')
for tag in soup.find_all(True):
if tag.name == 'blockquote':
tag.string = ('\n' + tag.text).replace('\n', '\n> ').rstrip('\n>')
@ -56,133 +80,188 @@ def sanitize_html(h):
return soup.renderContents().decode('utf-8')
def format_matrix_msg(form, username, content):
"""
Formats a matrix message for sending to Telegram
:param form: The format string of the message, where the first parameter
is the username and the second one the message.
:param username: The username of the user.
:param content: The content to be sent.
:return: The formatted string.
"""
if 'format' in content and content['format'] == 'org.matrix.custom.html':
sanitized = sanitize_html(content['formatted_body'])
return html.escape(form).format(username, sanitized), 'HTML'
else:
return form.format(username, content['body']), None
async def download_matrix_file(url, filename):
"""
Download a file from an MXC URL to /tmp/{filename}
:param url: The MXC URL to download from.
:param filename: The filename in /tmp/ to download into.
"""
m_url = MATRIX_MEDIA_PREFIX + 'download/{}{}'.format(url.netloc, url.path)
async with MATRIX_SESS.get(m_url) as response:
data = await response.read()
with open('/tmp/{}'.format(filename), 'wb') as file:
file.write(data)
async def shorten_url(url):
"""
Shorten an URL using goo.gl. Returns the original URL if it fails.
:param url: The URL to shorten.
:return: The shortened URL.
"""
headers = {'Content-Type': 'application/json'}
async with SHORTEN_SESS.post(GOO_GL_URL, params={'key': GOOGLE_TOKEN},
data={'longUrl': url}, headers=headers) \
as response:
obj = response.json()
return obj['id'] if 'id' in obj else url
async def matrix_transaction(request):
transaction = request.match_info['transaction']
"""
Handle a transaction sent by the homeserver.
:param request: The request containing the transaction.
:return: The response to send.
"""
body = await request.json()
events = body['events']
for event in events:
room_id = event['room_id']
if room_id not in matrix_rooms:
print('{} not in matrix_rooms!'.format(room_id))
if event['room_id'] not in MATRIX_ROOMS:
print('{} not in matrix_rooms!'.format(event['room_id']))
elif event['type'] == 'm.room.message':
group = bot.group(matrix_rooms[room_id])
group = TG_BOT.group(MATRIX_ROOMS[event['room_id']])
username = event['user_id'].split(':')[0][1:]
if username.startswith('telegram_'):
return create_response(200, {})
content = event['content']
if content['msgtype'] == 'm.text':
if 'formatted_body' in content:
await group.send_text('&lt;{}&gt; {}'.format(username, sanitize_html(content['formatted_body'])), parse_mode='HTML')
else:
await group.send_text('<{}> {}'.format(username, content['body']))
msg, mode = format_matrix_msg('<{}> {}', username, content)
await group.send_text(msg, parse_mode=mode)
elif content['msgtype'] == 'm.notice':
if 'formatted_body' in content:
await group.send_text('[{}] {}'.format(username, sanitize_html(content['formatted_body'])), parse_mode='HTML')
else:
await group.send_text('[{}] {}'.format(username, content['body']))
msg, mode = format_matrix_msg('[{}] {}', username, content)
await group.send_text(msg, parse_mode=mode)
elif content['msgtype'] == 'm.emote':
if 'formatted_body' in content:
await group.send_text('*** {} {}'.format(username, sanitize_html(content['formatted_body'])), parse_mode='HTML')
else:
await group.send_text('*** {} {}'.format(username, content['body']))
msg, mode = format_matrix_msg('* {} {}', username, content)
await group.send_text(msg, parse_mode=mode)
elif content['msgtype'] == 'm.image':
url = urlparse(content['url'])
async with client_session.get(MATRIX_MEDIA_PREFIX + 'download/{}{}'.format(url.netloc, url.path)) as response:
b = await response.read()
download_matrix_file(url, content['body'])
with open('/tmp/{}'.format(content['body']), 'rb') as img_file:
url_str = MATRIX_HOST_EXT + \
'_matrix/media/r0/download/{}{}' \
.format(url.netloc, quote(url.path))
url_str = shorten_url(url_str)
with open('/tmp/{}'.format(content['body']), 'wb') as f:
f.write(b)
with open('/tmp/{}'.format(content['body']), 'rb') as f:
url_str = MATRIX_HOST_EXT + '_matrix/media/r0/download/{}{}'.format(url.netloc, quote(url.path))
async with ClientSession() as shorten_session:
async with shorten_session.post('https://www.googleapis.com/urlshortener/v1/url',
params={'key': GOOGLE_TOKEN},
data=json.dumps({'longUrl': url_str}),
headers={'Content-Type': 'application/json'}) as response:
j = await response.json()
if 'id' in j:
url_str = j['id']
else:
print('Something went wrong while shortening:')
pprint(j)
caption = '<{}> {} ({})'.format(username, content['body'], url_str)
await group.send_photo(f, caption=caption)
caption = '<{}> {} ({})'.format(username, content['body'],
url_str)
await group.send_photo(img_file, caption=caption)
else:
print(json.dumps(content, indent=4))
return create_response(200, {})
async def _matrix_request(method_fun, category, path, user_id, data=None, content_type=None):
async def _matrix_request(method_fun, category, path, user_id, data=None,
content_type=None):
# pylint: disable=too-many-arguments
# Due to this being a helper function, the argument count acceptable
buffer = None
if data is not None:
if isinstance(data, dict):
data = json.dumps(data)
buffer = json.dumps(data)
content_type = 'application/json; charset=utf-8'
elif content_type is None:
buffer = data
content_type = 'application/octet-stream'
params = {'access_token': AS_TOKEN}
if user_id is not None:
params['user_id'] = user_id
async with method_fun('{}_matrix/{}/r0/{}'.format(MATRIX_HOST, quote(category), quote(path)),
params=params, data=data, headers={'Content-Type': content_type}) as response:
if response.headers['Content-Type'].split(';')[0] == 'application/json':
async with method_fun('{}_matrix/{}/r0/{}'
.format(MATRIX_HOST, quote(category), quote(path)),
params=params, data=buffer,
headers={'Content-Type': content_type}) as response:
if response.headers['Content-Type'].split(';')[0] \
== 'application/json':
return await response.json()
else:
return await response.read()
def matrix_post(category, path, user_id, data, content_type=None):
return _matrix_request(client_session.post, category, path, user_id, data, content_type)
return _matrix_request(MATRIX_SESS.post, category, path, user_id, data,
content_type)
def matrix_put(category, path, user_id, data, content_type=None):
return _matrix_request(client_session.put, category, path, user_id, data, content_type)
return _matrix_request(MATRIX_SESS.put, category, path, user_id, data,
content_type)
def matrix_get(category, path, user_id):
return _matrix_request(client_session.get, category, path, user_id)
return _matrix_request(MATRIX_SESS.get, category, path, user_id)
def matrix_delete(category, path, user_id):
return _matrix_request(client_session.delete, category, path, user_id)
return _matrix_request(MATRIX_SESS.delete, category, path, user_id)
async def matrix_room(request):
room_alias = request.match_info['room_alias']
args = parse_qs(urlparse(request.path_qs).query)
print('Checking for {} | {}'.format(unquote(room_alias), args['access_token'][0]))
print('Checking for {} | {}'.format(unquote(room_alias),
args['access_token'][0]))
try:
if args['access_token'][0] != HS_TOKEN:
return create_response(403, {'errcode': 'M_FORBIDDEN'})
except KeyError:
return create_response(401, {'errcode': 'NL.SIJMENSCHOON.TELEMATRIX_UNAUTHORIZED'})
return create_response(401,
{'errcode':
'NL.SIJMENSCHOON.TELEMATRIX_UNAUTHORIZED'})
localpart, host = room_alias.split(':')
localpart = room_alias.split(':')[0]
chat = '_'.join(localpart.split('_')[1:])
if chat in telegram_chats:
await matrix_post('client', 'createRoom', None, {'room_alias_name': localpart[1:]})
if chat in TELEGRAM_CHATS:
await matrix_post('client', 'createRoom', None,
{'room_alias_name': localpart[1:]})
return create_response(200, {})
else:
return create_response(404, {'errcode': 'NL.SIJMENSCHOON.TELEMATRIX_NOT_FOUND'})
return create_response(404, {'errcode':
'NL.SIJMENSCHOON.TELEMATRIX_NOT_FOUND'})
def send_matrix_message(room_id, user_id, txn_id, **kwargs):
return matrix_put('client', 'rooms/{}/send/m.room.message/{}'.format(room_id, txn_id), user_id, kwargs)
return matrix_put('client', 'rooms/{}/send/m.room.message/{}'
.format(room_id, txn_id), user_id, kwargs)
async def upload_tgfile_to_matrix(file_id, user_id):
file_path = (await bot.get_file(file_id))['file_path']
request = await bot.download_file(file_path)
file_path = (await TG_BOT.get_file(file_id))['file_path']
request = await TG_BOT.download_file(file_path)
mimetype = request.headers['Content-Type']
data = await request.read()
j = await matrix_post('media', 'upload', user_id, data, mimetype)
if 'content_uri' in j:
return (j['content_uri'], mimetype, len(data))
return j['content_uri'], mimetype, len(data)
else:
return None, None, 0
async def register_join_matrix(chat, room_id, user_id):
name = chat.sender['first_name']
if 'last_name' in chat.sender:
@ -190,23 +269,27 @@ async def register_join_matrix(chat, room_id, user_id):
name += ' (Telegram)'
user = user_id.split(':')[0][1:]
await matrix_post('client', 'register', None, {'type': 'm.login.application_service', 'user': user})
profile_photos = await bot.get_user_profile_photos(chat.sender['id'])
await matrix_post('client', 'register', None,
{'type': 'm.login.application_service', 'user': user})
profile_photos = await TG_BOT.get_user_profile_photos(chat.sender['id'])
try:
pp_file_id = profile_photos['result']['photos'][0][-1]['file_id']
pp_uri, _, _ = await upload_tgfile_to_matrix(pp_file_id, user_id)
if pp_uri:
await matrix_put('client', 'profile/{}/avatar_url'.format(user_id), user_id, {'avatar_url': pp_uri})
await matrix_put('client', 'profile/{}/avatar_url'.format(user_id),
user_id, {'avatar_url': pp_uri})
except IndexError:
pass
await matrix_put('client', 'profile/{}/displayname'.format(user_id), user_id, {'displayname': name})
await matrix_put('client', 'profile/{}/displayname'.format(user_id),
user_id, {'displayname': name})
await matrix_post('client', 'join/{}'.format(room_id), user_id, {})
@bot.handle('photo')
@TG_BOT.handle('photo')
async def aiotg_photo(chat, photo):
try:
room_id = telegram_chats[str(chat.id)]
room_id = TELEGRAM_CHATS[str(chat.id)]
except KeyError:
print('Unknown telegram chat {}'.format(chat))
return
@ -216,74 +299,112 @@ async def aiotg_photo(chat, photo):
file_id = photo[-1]['file_id']
uri, mime, length = await upload_tgfile_to_matrix(file_id, user_id)
info = {'mimetype': mime, 'size': length, 'h': photo[-1]['height'], 'w': photo[-1]['width']}
body = 'Image_{}{}'.format(int(time.time() * 1000), mimetypes.guess_extension(mime))
info = {'mimetype': mime, 'size': length, 'h': photo[-1]['height'],
'w': photo[-1]['width']}
body = 'Image_{}{}'.format(int(time() * 1000),
mimetypes.guess_extension(mime))
if uri:
j = await send_matrix_message(room_id, user_id, txn_id, body=body, url=uri, info=info, msgtype='m.image')
j = await send_matrix_message(room_id, user_id, txn_id, body=body,
url=uri, info=info, msgtype='m.image')
if 'errcode' in j and j['errcode'] == 'M_FORBIDDEN':
await register_join_matrix(chat, room_id, user_id)
await send_matrix_message(room_id, user_id, txn_id, body=body, url=uri, info=info, msgtype='m.image')
await send_matrix_message(room_id, user_id, txn_id, body=body,
url=uri, info=info, msgtype='m.image')
@bot.command(r'(?s)(.*)')
@TG_BOT.command(r'(?s)(.*)')
async def aiotg_message(chat, match):
try:
room_id = telegram_chats[str(chat.id)]
room_id = TELEGRAM_CHATS[str(chat.id)]
except KeyError:
print('Unknown telegram chat {}'.format(chat))
return
user_id = USER_ID_FORMAT.format(chat.sender['id'])
txn_id = quote('{}:{}'.format(chat.message['message_id'], chat.id))
message = match.group(0)
if 'forward_from' in chat.message:
fw_from = chat.message['forward_from']
if 'last_name' in fw_from:
msg_from = '{} {} (Telegram)'.format(fw_from['first_name'], fw_from['last_name'])
msg_from = '{} {} (Telegram)'.format(fw_from['first_name'],
fw_from['last_name'])
else:
msg_from = '{} (Telegram)'.format(fw_from['first_name'])
date = datetime.datetime.fromtimestamp(fw_from['date'])
date = datetime.fromtimestamp(fw_from['date'])
quoted_msg = '\n'.join(['>{}'.format(x) for x in message.split('\n')])
quoted_msg = 'Forwarded from {}, who {} said:\n{}'.format(msg_from, date, quoted_msg)
quoted_msg = 'Forwarded from {}, who {} said:\n{}' \
.format(msg_from, date, quoted_msg)
quoted_html = '<blockquote>{}</blockquote>' \
.format(html.escape(message).replace('\n', '<br />'))
quoted_html = '<i>Forwarded from {}, who {} said:</i>\n{}' \
.format(html.escape(msg_from), html.escape(str(date)),
quoted_html)
j = await send_matrix_message(room_id, user_id, txn_id,
body=quoted_msg,
formatted_body=quoted_html,
format='org.matrix.custom.html',
msgtype='m.text')
quoted_html = '<blockquote>{}</blockquote>'.format(html.escape(message).replace('\n', '<br />'))
quoted_html = '<i>Forwarded from {}, who {} said:</i>\n{}'.format(html.escape(msg_from), html.escape(str(date)), quoted_html)
j = await send_matrix_message(room_id, user_id, txn_id, body=quoted_msg, formatted_body=quoted_html, format='org.matrix.custom.html', msgtype='m.text')
elif 'reply_to_message' in chat.message:
re_msg = chat.message['reply_to_message']
if 'last_name' in re_msg['from']:
msg_from = '{} {} (Telegram)'.format(re_msg['from']['first_name'], re_msg['from']['last_name'])
msg_from = '{} {} (Telegram)'.format(re_msg['from']['first_name'],
re_msg['from']['last_name'])
else:
msg_from = '{} (Telegram)'.format(re_msg['from']['first_name'])
date = datetime.datetime.fromtimestamp(re_msg['date']).strftime('on %Y-%m-%d at %H:%M:%S')
date = datetime.fromtimestamp(re_msg['date']) \
.strftime('on %Y-%m-%d at %H:%M:%S')
quoted_msg = '\n'.join(['>{}'.format(x) for x in re_msg['text'].split('\n')])
quoted_msg = 'Reply to {}, who {} said:\n{}\n\n{}'.format(msg_from, date, quoted_msg, message)
quoted_msg = '\n'.join(['>{}'.format(x)
for x in re_msg['text'].split('\n')])
quoted_msg = 'Reply to {}, who {} said:\n{}\n\n{}' \
.format(msg_from, date, quoted_msg, message)
quoted_html = '<blockquote>{}</blockquote>'.format(html.escape(re_msg['text']).replace('\n', '<br />'))
quoted_html = '<i>Reply to {}, who {} said:</i><br />{}<p>{}</p>'.format(html.escape(msg_from), html.escape(str(date)), quoted_html, html.escape(message).replace('\n', '<br />'))
quoted_html = '<blockquote>{}</blockquote>' \
.format(html.escape(re_msg['text'])
.replace('\n', '<br />'))
quoted_html = '<i>Reply to {}, who {} said:</i><br />{}<p>{}</p>' \
.format(html.escape(msg_from), html.escape(str(date)),
quoted_html,
html.escape(message).replace('\n', '<br />'))
j = await send_matrix_message(room_id, user_id, txn_id, body=quoted_msg, formatted_body=quoted_html, format='org.matrix.custom.html', msgtype='m.text')
j = await send_matrix_message(room_id, user_id, txn_id,
body=quoted_msg,
formatted_body=quoted_html,
format='org.matrix.custom.html',
msgtype='m.text')
else:
j = await send_matrix_message(room_id, user_id, txn_id, body=message, msgtype='m.text')
j = await send_matrix_message(room_id, user_id, txn_id, body=message,
msgtype='m.text')
if 'errcode' in j and j['errcode'] == 'M_FORBIDDEN':
await asyncio.sleep(1)
await register_join_matrix(chat, room_id, user_id)
await asyncio.sleep(1)
await send_matrix_message(room_id, user_id, txn_id, body=message, msgtype='m.text')
await send_matrix_message(room_id, user_id, txn_id, body=message,
msgtype='m.text')
if __name__ == "__main__":
def main():
"""
Main function to get the entire ball rolling.
"""
logging.basicConfig(level=logging.WARNING)
loop = asyncio.get_event_loop()
asyncio.ensure_future(bot.loop())
asyncio.ensure_future(TG_BOT.loop())
app = web.Application(loop=loop)
app.router.add_route('GET', '/rooms/{room_alias}', matrix_room)
app.router.add_route('PUT', '/transactions/{transaction}', matrix_transaction)
app.router.add_route('PUT', '/transactions/{transaction}',
matrix_transaction)
web.run_app(app, port=5000)
if __name__ == "__main__":
main()