diff --git a/python/podcast/feedutils.py b/python/podcast/feedutils.py index 89624420e8c508cf2b3927474c4df6c9d7e2ee82..36de21b27e881ca383b06edfaa46554469af6972 100644 --- a/python/podcast/feedutils.py +++ b/python/podcast/feedutils.py @@ -43,7 +43,7 @@ def fetch_feed(published, url) -> FeedParserDict: if "itunes_new-feed-url" in feed.feed and feed.feed["itunes_new-feed-url"] != url: raise NewFeedUrlError(feed.feed["itunes_new-feed-url"]) logger.info( - "podcast init size of entries: %d", len(feed.entries) + "podcast feed number of entries: %d", len(feed.entries) ) return feed @@ -62,6 +62,23 @@ def iterate_feed_entries(feed, should_fetch_next_page: Callable[[], bool] = lamb else: break +def get_post_audio_data_from_enclousures(entry): + if len(entry["enclosures"]) == 0: + logger.warning("post %s has no enclosures", entry.title) + for e in entry["enclosures"]: + if "href" in e: + if e.type[:5] == "audio": + length = 0 + if "length" in e.keys(): + length = e.length + return e.href, e.type, length + elif e.href[-3:] == "mp3": + length = 0 + if "length" in e.keys(): + length = e.length + return e.href, "audio/mp3", length + return None, None, 0 + class FeedFetchingError(BaseException): def __init__(self, msg): diff --git a/python/podcast/persistent_log.py b/python/podcast/persistent_log.py index ace1590179f6d879e63db9e2a634bb9a2b9aa274..b9eb2c73de648a763bca733038ba0ce5a998343f 100644 --- a/python/podcast/persistent_log.py +++ b/python/podcast/persistent_log.py @@ -19,6 +19,7 @@ class LogType(Enum): SuccessfulRefresh = "SuccessfulRefresh" Refresh304 = "Refresh304" FeedRedirect = "FeedRedirect" + EpisodeHrefChanged = "EpisodeHrefChanged" class LogMessage(BaseModel): diff --git a/python/podcast/podcast.py b/python/podcast/podcast.py index 645d28308a78aae58550148b8ec9708d0749b00d..69fb79c44d66568d3377d7c613b0895d7ee2fd39 100644 --- a/python/podcast/podcast.py +++ b/python/podcast/podcast.py @@ -162,6 +162,21 @@ class Podcast(BaseModel): new_posts = [] for entry in feedutils.iterate_feed_entries(feed): if entry_already_known(entry): + if "guid" in entry and entry["guid"] in known_guids: + (new_href, new_type, new_length) = feedutils.get_post_audio_data_from_enclousures(entry) + if new_href and new_type: + try: + post = Podpost.get_or_none(Podpost.guid == entry["guid"] and Podpost.href != new_href) + if post: + logger.info("Updating href of post with guid '%s'", entry["guid"]) + (post.href, post.type, post.length) = (new_href, new_type, new_length) + post.save() + persist_log(LogType.EpisodeHrefChanged, title=self.title, entry=entry, old_href=post.href) + else: + logger.warning("tried updating an existing episode but did not find it in db (guid=%s,new_href=%s)",entry["guid"],new_href) + except: + logger.exception("tried to update existing episode but failed (guid=%s)",entry["guid"]) + pyotherside.send("refreshPost", None) if break_on_first_existing_episode: break @@ -352,6 +367,7 @@ class Podcast(BaseModel): newurl=e.url) yield from self.refresh(moveto, limit, full_refresh) except Exception as e: + logger.exception("unexpected exception during refresh",e) persist_log(LogType.Exception, msg="during refresh", podcasttitle=self.title, exception=e) pyotherside.send("refreshPost", None) @@ -363,7 +379,7 @@ class Podcast(BaseModel): """ new_posts = self.__process_episodes(feed, 0 if full_refresh else limit, break_on_first_existing_episode=not full_refresh) - logger.info("Fount %d new entries.", len(new_posts)) + logger.info("Found %d new entries.", len(new_posts)) return new_posts def set_params(self, params): diff --git a/python/podcast/podpost.py b/python/podcast/podpost.py index 03e9d0768213949ca2a461ebd825cf725e9f21f7..848811bc6207bb41874ea75a9e4c7e6265de80ab 100644 --- a/python/podcast/podpost.py +++ b/python/podcast/podpost.py @@ -11,7 +11,7 @@ from podcast.persistent_log import persist_log, LogType sys.path.append("../") from podcast.podcast import Podcast -from podcast import util +from podcast import util, feedutils from podcast.constants import Constants, BaseModel from peewee import AutoField, TextField, IntegerField, DateTimeField, BooleanField, CharField, \ FloatField, DoesNotExist, ForeignKeyField, ModelSelect @@ -45,7 +45,7 @@ class Podpost(BaseModel): guid: str = CharField(index=True) id: AutoField = AutoField(primary_key=True) # POST_ID_TYPE author: str = CharField(default="") - duration: int = IntegerField(null=True,help_text="in ms") + duration: int = IntegerField(null=True, help_text="in ms") favorite: bool = BooleanField(default=False) file_path: str = TextField(null=True) # podcast file url @@ -64,7 +64,7 @@ class Podpost(BaseModel): # download percentage percentage: float = FloatField(default=0) plainpart: TextField = TextField(default="") - position: int = IntegerField(default=0,help_text="in ms") + position: int = IntegerField(default=0, help_text="in ms") podcast = ForeignKeyField(Podcast, null=True, backref='episodes', lazy_load=True, on_delete='CASCADE') # when the post was published according to feed published = DateTimeField() @@ -146,26 +146,7 @@ class Podpost(BaseModel): post.guid = entry["id"] else: post.guid = hashlib.sha256(entry["summary"].encode()).hexdigest() - if len(entry["enclosures"]) == 0: - logger.warning("post %s has no enclosures", post.title) - for e in entry["enclosures"]: - if "href" in e: - if e.type[:5] == "audio": - if "length" in e.keys(): - post.length = e.length - else: - post.length = 0 - post.type = e.type - post.href = e.href - if "guid" in e.keys(): - post.guid = e.guid - elif e.href[-3:] == "mp3": - if "length" in e.keys(): - post.length = e.length - else: - post.length = 0 - post.type = "audio/mp3" - post.href = e.href + (post.href, post.type, post.length) = feedutils.get_post_audio_data_from_enclousures(entry) if "itunes_duration" in entry: post.duration = util.tx_to_s(entry["itunes_duration"]) else: @@ -252,7 +233,7 @@ class Podpost(BaseModel): "loaded": loaded, "haschapters": haschapters, "listened": self.listened or self.position > 0 and ( - self.duration- self.position < Constants().markListenedBeforeEndThreshold * 1000) + self.duration - self.position < Constants().markListenedBeforeEndThreshold * 1000) } def get_image_descriptor(self): @@ -299,17 +280,15 @@ class Podpost(BaseModel): self.percentage = 100 os.rename(file_path_part, file_path) - except URLError: - logger.exception("Download failed") - self.delete_file() - file_path = None - self.percentage = 0 - persist_log(LogType.NetworkError, what="episode download", title=self.title, url=self.href) except BaseException as e: - logger.exception("renaming the downloaded file failed") + logger.exception("Generic exception during download of '%s'(%s) from podcast %s", self.href, self.title, + self.podcast) + if isinstance(e, URLError): + logger.info("Deleting downloaded file") + self.delete_file() file_path = None self.percentage = 0 - persist_log(LogType.Exception, what="episode download", title=self.title, exception=e) + persist_log(LogType.Exception, what="episode download", title=self.title, url=self.href, exception=e) self.file_path = file_path PodpostFactory().persist(self) @@ -387,7 +366,6 @@ class Podpost(BaseModel): self.listened = True PodpostFactory().persist(self) - @property def get_position(self): """ diff --git a/python/podcast/util.py b/python/podcast/util.py index 3258e27a15c23a41b52b2bd35206803d18eeaefd..6f2252faf356aeb6b0739727944e36068e8cc487 100644 --- a/python/podcast/util.py +++ b/python/podcast/util.py @@ -1,6 +1,7 @@ """ some utilities """ +import http.client import logging import os import urllib.request @@ -146,10 +147,9 @@ def dl_from_url_progress(url, path): req = urllib.request.Request(url, data=None, headers={"User-Agent": agent}) try: h = urllib.request.urlopen(req) - except urllib.error.HTTPError as e: - if hasattr(e, "reason"): - pyotherside.send("apperror", "Error opening URL: " + e.reason) - return + except (http.client.InvalidURL, urllib.error.HTTPError) as e: + logger.info("Could not download '%s' to '%s'", url, path, e) + raise e length = int(h.getheader("content-length")) diff --git a/test/test_podcast.py b/test/test_podcast.py index 3eda9cb654bbd517a6fce549e9958545bedd985f..cb840fd43461d549f3f933cfa6861822fb476924 100644 --- a/test/test_podcast.py +++ b/test/test_podcast.py @@ -7,7 +7,6 @@ from typing import Tuple, List import httpretty import pytest from httpretty import HTTPretty -from more_itertools import first from peewee import DoesNotExist from podcast import POST_ID_TYPE @@ -222,8 +221,9 @@ def refreshable_podcast_fixture(request) -> Tuple[Podcast, List[Podpost]]: def request_callback(request, uri, response_headers): nonlocal invoked invoked += 1 - logger.info("Returning normal response file") - testdata = read_testdata(filename + ".xml") if invoked == 1 else read_testdata(filename + "2.xml") + return_filename = filename + ".xml" if invoked == 1 else filename + "2.xml" + logger.info("returning data from file '%s' (invoked = %s)", return_filename, invoked) + testdata = read_testdata(return_filename) return [200, response_headers, testdata] feed_url = 'http://fakefeed.com/feed' @@ -283,6 +283,19 @@ def test_pagination(): list(podcast.refresh(0, 0, True)) assert 2 == podcast.count_episodes() +# parametrized tests seem to interfer... order matters therefore (dont put them together) +@pytest.mark.parametrize("refreshable_podcast_fixture", ["testdata/episode_href_updated"], indirect=True) +def test_episode_href_changed(refreshable_podcast_fixture): + p: Podcast + p, episodes = refreshable_podcast_fixture + plist = PodcastList() + assert ilen(p.get_entries()) == 1 + assert len(episodes) == 1 + guid = episodes[0].guid + assert episodes[0].href == 'http://fakefeed.com/epsidoe/1/audio.mp3' + list(plist.refresh(0)) + assert list(Podpost.select().where(Podpost.guid == guid))[0].href == 'http://fakefeed.com/episode/1/audio.mp3' + @httpretty.activate def test_new_feed_url(): @@ -298,6 +311,7 @@ def test_new_feed_url(): assert list(get_log_messages())[0].messagetype == LogType.FeedRedirect.name +# parametrized tests seem to interfer... order matters therefore (dont put them together) @pytest.mark.parametrize("refreshable_podcast_fixture", ["testdata/feed_entries_not_ordered"], indirect=True) def test_podcastlist_refresh(refreshable_podcast_fixture): p, episodes = refreshable_podcast_fixture @@ -306,3 +320,4 @@ def test_podcastlist_refresh(refreshable_podcast_fixture): assert plist.get_podcast_count() == 1 list(plist.refresh(0)) assert ilen(p.get_entries()) == 2 + diff --git a/test/testdata/episode_href_updated.xml b/test/testdata/episode_href_updated.xml new file mode 100644 index 0000000000000000000000000000000000000000..f47cfce261d2105e3a2f0dd9824992630158f05a --- /dev/null +++ b/test/testdata/episode_href_updated.xml @@ -0,0 +1,70 @@ + + + + fakefeed + http://fakefeed.com + Tue, 02 Mar 2021 24:20:08 -0000 + Tue, 02 Mar 2021 23:20:11 -0000 + 60 + en-us + No Copyright © 2021 Podqast. No rights reserved. + no, none + + + + Richard + Test + thigg + http://blogs.law.harvard.edu/tech/rss + + https://example.com/image.png + imagetitle + imagelink.com + 1337 + 1337 + + + thigg + episodic + + + + + true + + nomail@example.com + bunny nunny + + subtitle + summary + + No Copyright © 2021 podqast. No Rights reserved. + + Arts + + the_guid_for_href_changed + Hello, I am a fake episode for testing!!öäü + Tue, 03 Mar 2021 23:20:08 -0000 + https://fakefeed.com/episode/1 + + + + I am a fake itunes subtitle + true + full + 1 + 13:37 + + + thigg + + + + + + + + + \ No newline at end of file diff --git a/test/testdata/episode_href_updated2.xml b/test/testdata/episode_href_updated2.xml new file mode 100644 index 0000000000000000000000000000000000000000..e45eed922c38586609c6f18c952e01f5cb3ea1c0 --- /dev/null +++ b/test/testdata/episode_href_updated2.xml @@ -0,0 +1,70 @@ + + + + fakefeed + http://fakefeed.com + Tue, 02 Mar 2021 24:20:08 -0000 + Tue, 02 Mar 2021 23:20:11 -0000 + 60 + en-us + No Copyright © 2021 Podqast. No rights reserved. + no, none + + + + Richard + Test + thigg + http://blogs.law.harvard.edu/tech/rss + + https://example.com/image.png + imagetitle + imagelink.com + 1337 + 1337 + + + thigg + episodic + + + + + true + + nomail@example.com + bunny nunny + + subtitle + summary + + No Copyright © 2021 podqast. No Rights reserved. + + Arts + + the_guid_for_href_changed + Hello, I am a fake episode for testing!!öäü + Tue, 03 Mar 2021 23:20:08 -0000 + https://fakefeed.com/episode/1 + + + + I am a fake itunes subtitle + true + full + 1 + 13:37 + + + thigg + + + + + + + + + \ No newline at end of file diff --git a/test/testdata/fakefeed2.xml b/test/testdata/fakefeed2.xml index f48390222e09e23495ce4db9067c3c71d30bc116..ebcd6b3ddc5751ce4f00e2743aac53d7572e26ca 100644 --- a/test/testdata/fakefeed2.xml +++ b/test/testdata/fakefeed2.xml @@ -77,7 +77,7 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergr Arts - prx_96_1337d3a0-e636-445f-b379-f1337deadbeef + prx_96_1337d3a0-e636-445f-b379-f1337deadbeef2 Hello, I am a new fake episode for testing!!öäü Tue, 10 Mar 2021 23:20:08 -0000 https://fakefeed.com/episode/2