fixed ignore already downloaded

4 years ago · ead47436cb
--- a/moodle_emails/utils.py
+++ b/moodle_emails/utils.py
@ -1,3 +1,4 @@
 import json
 import re
 import pathlib
 from warnings import warn
@ -74,15 +75,16 @@ class MailListDownloader():
    def course_list(self):
        userid, version = self.fch.fetch_userid_and_version()
        courses = self.fch.fetch_courses(userid)
        return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ]
        return [ dict(cid=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ]

    def download_from_new_courses(self, path):
        path = pathlib.Path(path)
        downloaded_file = path.with_suffix('.downloaded.npy')
        downloaded_file = path.with_suffix('.downloaded.json')
        database_file = path.with_suffix('.db.hdf')

        if downloaded_file.is_file():
            downloaded = list(np.fromfile(downloaded_file))
            with downloaded_file.open() as f:
                downloaded = json.load(f)
        else:
            downloaded = []
        
@ -97,9 +99,9 @@ class MailListDownloader():
        
        for c in tqdm(self.course_list()):
            old_len = len(db)
            if c['cds'] and not c['id'] in downloaded:
            if c['cds'] and not c['cid'] in downloaded:
                print(f"Downloading {c['fullname']}")
                cid = c['id']
                cid = c['cid']
                cds = c['cds']
                emails = self.course_emails(cid)
                emails = pd.DataFrame(emails)
@ -111,7 +113,8 @@ class MailListDownloader():
                duplicates = len(emails) - new
                print(f"\t{new_len-old_len} new, {duplicates} duplicates.")
        
            np.array(downloaded).tofile(downloaded_file)
            with downloaded_file.open('w') as f:
                json.dump(downloaded, f)
            db.to_hdf(database_file, key='emails')
        return db