Browse Source

Initial commit

alpha0
Lorenzo Zolfanelli 4 years ago
commit
d3c41043e9
10 changed files with 184 additions and 0 deletions
  1. +1
    -0
      README.md
  2. +3
    -0
      moodle_emails/__init__.py
  3. BIN
      moodle_emails/__pycache__/utils.cpython-37.pyc
  4. +13
    -0
      moodle_emails/cli.py
  5. +117
    -0
      moodle_emails/utils.py
  6. BIN
      prova.db.hdf
  7. BIN
      prova.downloaded.npy
  8. +10
    -0
      pyproject.toml
  9. +30
    -0
      setup.cfg
  10. +10
    -0
      setup.py

+ 1
- 0
README.md View File

@ -0,0 +1 @@
# Moodle Emails

+ 3
- 0
moodle_emails/__init__.py View File

@ -0,0 +1,3 @@
from .utils import *

BIN
moodle_emails/__pycache__/utils.cpython-37.pyc View File


+ 13
- 0
moodle_emails/cli.py View File

@ -0,0 +1,13 @@
import argparse
from .utils import MailListDownloader
def main():
parser = argparse.ArgumentParser()
parser.add_argument('url_token', help='Url obtained following instructions.')
parser.add_argument('--output', help='Output path', default='mails')
args = parser.parse_args()
md = MailListDownloader(args.url_token)
md.download_from_new_courses(args.output)

+ 117
- 0
moodle_emails/utils.py View File

@ -0,0 +1,117 @@
import re
import pathlib
from warnings import warn
import numpy as np
import pandas as pd
from tqdm import tqdm
from moodle_dl.moodle_connector.request_helper import RequestHelper
from moodle_dl.moodle_connector.first_contact_handler import FirstContactHandler
from moodle_dl.moodle_connector.sso_token_receiver import extract_token
def get_cds(c):
regex = '^B\d+ \((B\d+)\) -.*'
m = re.match(regex, c.fullname)
if m:
return m.groups(0)[0]
def normalize_name(email, namestring):
mailuser, domain = email.split('@')
allnames = namestring.lower().split(' ')
if domain == 'stud.unifi.it':
firstname, lastname = mailuser.split('.')
firstname = " ".join([ n.capitalize() for n in allnames if n in firstname ])
lastname = " ".join([ n.capitalize() for n in allnames if n in lastname ])
return firstname, lastname
if len(allnames) == 2:
firstname = namestring[1].capitalize()
lastname = namestring[0].capitalize()
return firstname, lastname
return "", " ".join([a.capitalize() for a in allnames])
def is_student(u):
roleids = [r['roleid'] for r in u['roles']]
return (5 in roleids and not u['email'].endswith('@unifi.it'))
class MailListDownloader():
def __init__(self, url):
self.token, self.secret_token = extract_token(url)
self.rh = RequestHelper('e-l.unifi.it', token=self.token)
self.fch = FirstContactHandler(self.rh)
def course_emails(self, courseid):
users_raw = self.rh.post_REST('core_enrol_get_enrolled_users', dict(courseid=courseid))
users = list()
skipped = 0
for u in users_raw:
if not 'fullname' in u:
print(f'Malformed record u={u}')
continue
if not 'email' in u:
skipped += 1
#print(f"Skipping {u['fullname']}, no available email.")
continue
if not is_student(u):
skipped += 1
#print(f"Skipping {u['fullname']}, not a student.")
continue
firstname, lastname = normalize_name(u['email'], u['fullname'])
users.append(dict(firstname=firstname, lastname=lastname, email=u['email']))
print(f"\t{len(users)} found, {skipped} skipped.")
return users
def course_list(self):
userid, version = self.fch.fetch_userid_and_version()
courses = self.fch.fetch_courses(userid)
return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ]
def download_from_new_courses(self, path):
path = pathlib.Path(path)
downloaded_file = path.with_suffix('.downloaded.npy')
database_file = path.with_suffix('.db.hdf')
if downloaded_file.is_file():
downloaded = list(np.fromfile(downloaded_file))
else:
downloaded = []
if database_file.is_file():
db = pd.read_hdf(database_file, 'emails')
else:
db = pd.DataFrame()
db['cds'] = []
db['firstname'] = []
db['lastname'] = []
db['email'] = []
for c in tqdm(self.course_list()):
old_len = len(db)
if c['cds'] and not c['id'] in downloaded:
print(f"Downloading {c['fullname']}")
cid = c['id']
cds = c['cds']
emails = self.course_emails(cid)
emails = pd.DataFrame(emails)
emails['cds'] = cds
db = pd.concat([db, emails]).drop_duplicates().reset_index(drop=True)
downloaded.append(cid)
new_len = len(db)
new = new_len - old_len
duplicates = len(emails) - new
print(f"\t{new_len-old_len} new, {duplicates} duplicates.")
np.array(downloaded).tofile(downloaded_file)
db.to_hdf(database_file, key='emails')
return db

BIN
prova.db.hdf View File


BIN
prova.downloaded.npy View File


+ 10
- 0
pyproject.toml View File

@ -0,0 +1,10 @@
[build-system]
requires = [
"setuptools>=42",
"wheel",
"setuptools_scm[toml]>3.4",
]
build-backend = "setuptools.build_meta"
[tool.setuptools_scm]
write_to = "moodle_emails/version.py"

+ 30
- 0
setup.cfg View File

@ -0,0 +1,30 @@
[metadata]
name = moodle_emails
version = attr:moodle_emails.version
description = Download all mails from moodle.
long_description = file:README.md
long_description_content_type = text/markdown
url = https://projects.lilik.it/zolfa/moodle_emails
author = Zolfa
author_email = zolfa@lilik.it
classifiers =
Development Status :: 1 - Planning
License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Programming Language :: Python
Programming Language :: Python :: 3.9
platforms = any
[options]
packages =
moodle_emails
install_requires =
tqdm
pandas
tables
moodle-dl
setup_requires =
setuptools_scm
[options.entry_points]
console_scripts =
moodledlall = moodle_emails.cli:main

+ 10
- 0
setup.py View File

@ -0,0 +1,10 @@
#!/usr/bin/env python3
import pkg_resources
import setuptools
pkg_resources.require('setuptools>=42')
setuptools.setup(
use_scm_version=True,
)

Loading…
Cancel
Save