Initial commit

3 years ago · e243ce6aae
--- a/README.md
+++ b/README.md
@ -0,0 +1 @@
 # Moodle Emails
--- a/moodle_emails/init.py
+++ b/moodle_emails/init.py
@ -0,0 +1,3 @@
 from .utils import *


--- a/moodle_emails/cli.py
+++ b/moodle_emails/cli.py
@ -0,0 +1,13 @@
 import argparse

 from .utils import MailListDownloader


 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('url_token', help='Url obtained following instructions.')
    parser.add_argument('--output', help='Output path', default='mails')    
    args = parser.parse_args()

    md = MailListDownloader(args.url_token)
    md.download_from_new_courses(args.output)
--- a/moodle_emails/utils.py
+++ b/moodle_emails/utils.py
@ -0,0 +1,117 @@
 import re
 import pathlib
 from warnings import warn

 import numpy as np
 import pandas as pd
 from tqdm import tqdm

 from moodle_dl.moodle_connector.request_helper import RequestHelper
 from moodle_dl.moodle_connector.first_contact_handler import FirstContactHandler
 from moodle_dl.moodle_connector.sso_token_receiver import extract_token



 def get_cds(c):
    regex = '^B\d+ \((B\d+)\) -.*'
    m = re.match(regex, c.fullname)
    if m:
        return m.groups(0)[0]

 def normalize_name(email, namestring):
    mailuser, domain = email.split('@')
    allnames = namestring.lower().split(' ')

    if domain == 'stud.unifi.it':
        firstname, lastname = mailuser.split('.')
        firstname = " ".join([ n.capitalize() for n in allnames if n in firstname ])
        lastname = " ".join([ n.capitalize() for n in allnames if n in lastname ])
        return firstname, lastname
    
    if len(allnames) == 2:
        firstname = namestring[1].capitalize()
        lastname = namestring[0].capitalize()
        return firstname, lastname
    
    return "", " ".join([a.capitalize() for a in allnames]) 


 def is_student(u):
    roleids = [r['roleid'] for r in u['roles']]
    return (5 in roleids and not u['email'].endswith('@unifi.it'))  

 class MailListDownloader():
    def __init__(self, url):

        self.token, self.secret_token = extract_token(url)
        self.rh = RequestHelper('e-l.unifi.it', token=self.token)
        self.fch = FirstContactHandler(self.rh)
    
    def course_emails(self, courseid):
        users_raw = self.rh.post_REST('core_enrol_get_enrolled_users', dict(courseid=courseid))
        users = list()
    
        skipped = 0

        for u in users_raw:
            if not 'fullname' in u:
                print(f'Malformed record u={u}')
                continue
            if not 'email' in u:
                skipped += 1
                #print(f"Skipping {u['fullname']}, no available email.")
                continue
            if not is_student(u):
                skipped += 1
                #print(f"Skipping {u['fullname']}, not a student.")
                continue
    
            firstname, lastname = normalize_name(u['email'], u['fullname'])
            users.append(dict(firstname=firstname, lastname=lastname, email=u['email']))
        print(f"\t{len(users)} found, {skipped} skipped.")
        return users

    def course_list(self):
        userid, version = self.fch.fetch_userid_and_version()
        courses = self.fch.fetch_courses(userid)
        return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ]

    def download_from_new_courses(self, path):
        path = pathlib.Path(path)
        downloaded_file = path.with_suffix('.downloaded.npy')
        database_file = path.with_suffix('.db.hdf')

        if downloaded_file.is_file():
            downloaded = list(np.fromfile(downloaded_file))
        else:
            downloaded = []
        
        if database_file.is_file():
            db = pd.read_hdf(database_file, 'emails')
        else:
            db = pd.DataFrame()
            db['cds'] = []
            db['firstname'] = []
            db['lastname'] = []
            db['email'] = []
        
        for c in tqdm(self.course_list()):
            old_len = len(db)
            if c['cds'] and not c['id'] in downloaded:
                print(f"Downloading {c['fullname']}")
                cid = c['id']
                cds = c['cds']
                emails = self.course_emails(cid)
                emails = pd.DataFrame(emails)
                emails['cds'] = cds
                db = pd.concat([db, emails]).drop_duplicates().reset_index(drop=True)
                downloaded.append(cid)
                new_len = len(db)
                new = new_len - old_len
                duplicates = len(emails) - new
                print(f"\t{new_len-old_len} new, {duplicates} duplicates.")
        
            np.array(downloaded).tofile(downloaded_file)
            db.to_hdf(database_file, key='emails')
        return db

--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,10 @@
 [build-system]
 requires = [
    "setuptools>=42",
    "wheel",
    "setuptools_scm[toml]>3.4",
 ]
 build-backend = "setuptools.build_meta"

 [tool.setuptools_scm]
 write_to = "moodle_emails/version.py"
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,30 @@
 [metadata]
 name = moodle_emails
 version = attr:moodle_emails.version
 description = Download all mails from moodle.
 long_description = file:README.md
 long_description_content_type = text/markdown
 url = https://projects.lilik.it/zolfa/moodle_emails
 author = Zolfa
 author_email = zolfa@lilik.it
 classifiers =
    Development Status :: 1 - Planning
    License :: OSI Approved :: GNU General Public License v3 (GPLv3)
    Programming Language :: Python
    Programming Language :: Python :: 3.9
 platforms = any

 [options]
 packages =
    moodle_emails
 install_requires =
    tqdm
    pandas
    tables
    moodle-dl
 setup_requires =
    setuptools_scm

 [options.entry_points]
 console_scripts =
    moodledlall = moodle_emails.cli:main
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,10 @@
 #!/usr/bin/env python3

 import pkg_resources
 import setuptools


 pkg_resources.require('setuptools>=42')
 setuptools.setup(
    use_scm_version=True,
 )