diff --git a/moodle_emails/utils.py b/moodle_emails/utils.py index 3f1cb1b..1d9f26b 100644 --- a/moodle_emails/utils.py +++ b/moodle_emails/utils.py @@ -1,3 +1,4 @@ +import json import re import pathlib from warnings import warn @@ -74,15 +75,16 @@ class MailListDownloader(): def course_list(self): userid, version = self.fch.fetch_userid_and_version() courses = self.fch.fetch_courses(userid) - return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ] + return [ dict(cid=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ] def download_from_new_courses(self, path): path = pathlib.Path(path) - downloaded_file = path.with_suffix('.downloaded.npy') + downloaded_file = path.with_suffix('.downloaded.json') database_file = path.with_suffix('.db.hdf') if downloaded_file.is_file(): - downloaded = list(np.fromfile(downloaded_file)) + with downloaded_file.open() as f: + downloaded = json.load(f) else: downloaded = [] @@ -97,9 +99,9 @@ class MailListDownloader(): for c in tqdm(self.course_list()): old_len = len(db) - if c['cds'] and not c['id'] in downloaded: + if c['cds'] and not c['cid'] in downloaded: print(f"Downloading {c['fullname']}") - cid = c['id'] + cid = c['cid'] cds = c['cds'] emails = self.course_emails(cid) emails = pd.DataFrame(emails) @@ -111,7 +113,8 @@ class MailListDownloader(): duplicates = len(emails) - new print(f"\t{new_len-old_len} new, {duplicates} duplicates.") - np.array(downloaded).tofile(downloaded_file) + with downloaded_file.open('w') as f: + json.dump(downloaded, f) db.to_hdf(database_file, key='emails') return db