commit d3c41043e99597dfadb5ae6c8f58458f4e26f61f Author: Lorenzo Zolfanelli Date: Sun May 2 23:57:58 2021 +0200 Initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..15c7f4e --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Moodle Emails diff --git a/moodle_emails/__init__.py b/moodle_emails/__init__.py new file mode 100644 index 0000000..d81436d --- /dev/null +++ b/moodle_emails/__init__.py @@ -0,0 +1,3 @@ +from .utils import * + + diff --git a/moodle_emails/__pycache__/utils.cpython-37.pyc b/moodle_emails/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000..a019309 Binary files /dev/null and b/moodle_emails/__pycache__/utils.cpython-37.pyc differ diff --git a/moodle_emails/cli.py b/moodle_emails/cli.py new file mode 100644 index 0000000..43afa27 --- /dev/null +++ b/moodle_emails/cli.py @@ -0,0 +1,13 @@ +import argparse + +from .utils import MailListDownloader + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('url_token', help='Url obtained following instructions.') + parser.add_argument('--output', help='Output path', default='mails') + args = parser.parse_args() + + md = MailListDownloader(args.url_token) + md.download_from_new_courses(args.output) diff --git a/moodle_emails/utils.py b/moodle_emails/utils.py new file mode 100644 index 0000000..3f1cb1b --- /dev/null +++ b/moodle_emails/utils.py @@ -0,0 +1,117 @@ +import re +import pathlib +from warnings import warn + +import numpy as np +import pandas as pd +from tqdm import tqdm + +from moodle_dl.moodle_connector.request_helper import RequestHelper +from moodle_dl.moodle_connector.first_contact_handler import FirstContactHandler +from moodle_dl.moodle_connector.sso_token_receiver import extract_token + + + +def get_cds(c): + regex = '^B\d+ \((B\d+)\) -.*' + m = re.match(regex, c.fullname) + if m: + return m.groups(0)[0] + +def normalize_name(email, namestring): + mailuser, domain = email.split('@') + allnames = namestring.lower().split(' ') + + if domain == 'stud.unifi.it': + firstname, lastname = mailuser.split('.') + firstname = " ".join([ n.capitalize() for n in allnames if n in firstname ]) + lastname = " ".join([ n.capitalize() for n in allnames if n in lastname ]) + return firstname, lastname + + if len(allnames) == 2: + firstname = namestring[1].capitalize() + lastname = namestring[0].capitalize() + return firstname, lastname + + return "", " ".join([a.capitalize() for a in allnames]) + + +def is_student(u): + roleids = [r['roleid'] for r in u['roles']] + return (5 in roleids and not u['email'].endswith('@unifi.it')) + +class MailListDownloader(): + def __init__(self, url): + + self.token, self.secret_token = extract_token(url) + self.rh = RequestHelper('e-l.unifi.it', token=self.token) + self.fch = FirstContactHandler(self.rh) + + def course_emails(self, courseid): + users_raw = self.rh.post_REST('core_enrol_get_enrolled_users', dict(courseid=courseid)) + users = list() + + skipped = 0 + + for u in users_raw: + if not 'fullname' in u: + print(f'Malformed record u={u}') + continue + if not 'email' in u: + skipped += 1 + #print(f"Skipping {u['fullname']}, no available email.") + continue + if not is_student(u): + skipped += 1 + #print(f"Skipping {u['fullname']}, not a student.") + continue + + firstname, lastname = normalize_name(u['email'], u['fullname']) + users.append(dict(firstname=firstname, lastname=lastname, email=u['email'])) + print(f"\t{len(users)} found, {skipped} skipped.") + return users + + def course_list(self): + userid, version = self.fch.fetch_userid_and_version() + courses = self.fch.fetch_courses(userid) + return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ] + + def download_from_new_courses(self, path): + path = pathlib.Path(path) + downloaded_file = path.with_suffix('.downloaded.npy') + database_file = path.with_suffix('.db.hdf') + + if downloaded_file.is_file(): + downloaded = list(np.fromfile(downloaded_file)) + else: + downloaded = [] + + if database_file.is_file(): + db = pd.read_hdf(database_file, 'emails') + else: + db = pd.DataFrame() + db['cds'] = [] + db['firstname'] = [] + db['lastname'] = [] + db['email'] = [] + + for c in tqdm(self.course_list()): + old_len = len(db) + if c['cds'] and not c['id'] in downloaded: + print(f"Downloading {c['fullname']}") + cid = c['id'] + cds = c['cds'] + emails = self.course_emails(cid) + emails = pd.DataFrame(emails) + emails['cds'] = cds + db = pd.concat([db, emails]).drop_duplicates().reset_index(drop=True) + downloaded.append(cid) + new_len = len(db) + new = new_len - old_len + duplicates = len(emails) - new + print(f"\t{new_len-old_len} new, {duplicates} duplicates.") + + np.array(downloaded).tofile(downloaded_file) + db.to_hdf(database_file, key='emails') + return db + diff --git a/prova.db.hdf b/prova.db.hdf new file mode 100644 index 0000000..1698fe1 Binary files /dev/null and b/prova.db.hdf differ diff --git a/prova.downloaded.npy b/prova.downloaded.npy new file mode 100644 index 0000000..a508917 Binary files /dev/null and b/prova.downloaded.npy differ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ecba641 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel", + "setuptools_scm[toml]>3.4", +] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +write_to = "moodle_emails/version.py" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..e1e0e3c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,30 @@ +[metadata] +name = moodle_emails +version = attr:moodle_emails.version +description = Download all mails from moodle. +long_description = file:README.md +long_description_content_type = text/markdown +url = https://projects.lilik.it/zolfa/moodle_emails +author = Zolfa +author_email = zolfa@lilik.it +classifiers = + Development Status :: 1 - Planning + License :: OSI Approved :: GNU General Public License v3 (GPLv3) + Programming Language :: Python + Programming Language :: Python :: 3.9 +platforms = any + +[options] +packages = + moodle_emails +install_requires = + tqdm + pandas + tables + moodle-dl +setup_requires = + setuptools_scm + +[options.entry_points] +console_scripts = + moodledlall = moodle_emails.cli:main diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8263c13 --- /dev/null +++ b/setup.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +import pkg_resources +import setuptools + + +pkg_resources.require('setuptools>=42') +setuptools.setup( + use_scm_version=True, +)