Browse Source

fixed ignore already downloaded

main
Lorenzo Zolfanelli 4 years ago
parent
commit
ead47436cb
1 changed files with 9 additions and 6 deletions
  1. +9
    -6
      moodle_emails/utils.py

+ 9
- 6
moodle_emails/utils.py View File

@ -1,3 +1,4 @@
import json
import re
import pathlib
from warnings import warn
@ -74,15 +75,16 @@ class MailListDownloader():
def course_list(self):
userid, version = self.fch.fetch_userid_and_version()
courses = self.fch.fetch_courses(userid)
return [ dict(id=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ]
return [ dict(cid=c.id,cds=get_cds(c),fullname=c.fullname) for c in courses ]
def download_from_new_courses(self, path):
path = pathlib.Path(path)
downloaded_file = path.with_suffix('.downloaded.npy')
downloaded_file = path.with_suffix('.downloaded.json')
database_file = path.with_suffix('.db.hdf')
if downloaded_file.is_file():
downloaded = list(np.fromfile(downloaded_file))
with downloaded_file.open() as f:
downloaded = json.load(f)
else:
downloaded = []
@ -97,9 +99,9 @@ class MailListDownloader():
for c in tqdm(self.course_list()):
old_len = len(db)
if c['cds'] and not c['id'] in downloaded:
if c['cds'] and not c['cid'] in downloaded:
print(f"Downloading {c['fullname']}")
cid = c['id']
cid = c['cid']
cds = c['cds']
emails = self.course_emails(cid)
emails = pd.DataFrame(emails)
@ -111,7 +113,8 @@ class MailListDownloader():
duplicates = len(emails) - new
print(f"\t{new_len-old_len} new, {duplicates} duplicates.")
np.array(downloaded).tofile(downloaded_file)
with downloaded_file.open('w') as f:
json.dump(downloaded, f)
db.to_hdf(database_file, key='emails')
return db

Loading…
Cancel
Save