|
|
@ -1,3 +1,4 @@ |
|
|
|
import json |
|
|
|
import re |
|
|
|
import pathlib |
|
|
|
from warnings import warn |
|
|
@ -74,15 +75,16 @@ class MailListDownloader(): |
|
|
|
def course_list(self): |
|
|
|
userid, version = self.fch.fetch_userid_and_version() |
|
|
|
courses = self.fch.fetch_courses(userid) |
|
|
|
return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ] |
|
|
|
return [ dict(cid=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ] |
|
|
|
|
|
|
|
def download_from_new_courses(self, path): |
|
|
|
path = pathlib.Path(path) |
|
|
|
downloaded_file = path.with_suffix('.downloaded.npy') |
|
|
|
downloaded_file = path.with_suffix('.downloaded.json') |
|
|
|
database_file = path.with_suffix('.db.hdf') |
|
|
|
|
|
|
|
if downloaded_file.is_file(): |
|
|
|
downloaded = list(np.fromfile(downloaded_file)) |
|
|
|
with downloaded_file.open() as f: |
|
|
|
downloaded = json.load(f) |
|
|
|
else: |
|
|
|
downloaded = [] |
|
|
|
|
|
|
@ -97,9 +99,9 @@ class MailListDownloader(): |
|
|
|
|
|
|
|
for c in tqdm(self.course_list()): |
|
|
|
old_len = len(db) |
|
|
|
if c['cds'] and not c['id'] in downloaded: |
|
|
|
if c['cds'] and not c['cid'] in downloaded: |
|
|
|
print(f"Downloading {c['fullname']}") |
|
|
|
cid = c['id'] |
|
|
|
cid = c['cid'] |
|
|
|
cds = c['cds'] |
|
|
|
emails = self.course_emails(cid) |
|
|
|
emails = pd.DataFrame(emails) |
|
|
@ -111,7 +113,8 @@ class MailListDownloader(): |
|
|
|
duplicates = len(emails) - new |
|
|
|
print(f"\t{new_len-old_len} new, {duplicates} duplicates.") |
|
|
|
|
|
|
|
np.array(downloaded).tofile(downloaded_file) |
|
|
|
with downloaded_file.open('w') as f: |
|
|
|
json.dump(downloaded, f) |
|
|
|
db.to_hdf(database_file, key='emails') |
|
|
|
return db |
|
|
|
|