diff --git a/python/podcast/feedutils.py b/python/podcast/feedutils.py
index 89624420e8c508cf2b3927474c4df6c9d7e2ee82..36de21b27e881ca383b06edfaa46554469af6972 100644
--- a/python/podcast/feedutils.py
+++ b/python/podcast/feedutils.py
@@ -43,7 +43,7 @@ def fetch_feed(published, url) -> FeedParserDict:
if "itunes_new-feed-url" in feed.feed and feed.feed["itunes_new-feed-url"] != url:
raise NewFeedUrlError(feed.feed["itunes_new-feed-url"])
logger.info(
- "podcast init size of entries: %d", len(feed.entries)
+ "podcast feed number of entries: %d", len(feed.entries)
)
return feed
@@ -62,6 +62,23 @@ def iterate_feed_entries(feed, should_fetch_next_page: Callable[[], bool] = lamb
else:
break
+def get_post_audio_data_from_enclousures(entry):
+ if len(entry["enclosures"]) == 0:
+ logger.warning("post %s has no enclosures", entry.title)
+ for e in entry["enclosures"]:
+ if "href" in e:
+ if e.type[:5] == "audio":
+ length = 0
+ if "length" in e.keys():
+ length = e.length
+ return e.href, e.type, length
+ elif e.href[-3:] == "mp3":
+ length = 0
+ if "length" in e.keys():
+ length = e.length
+ return e.href, "audio/mp3", length
+ return None, None, 0
+
class FeedFetchingError(BaseException):
def __init__(self, msg):
diff --git a/python/podcast/persistent_log.py b/python/podcast/persistent_log.py
index ace1590179f6d879e63db9e2a634bb9a2b9aa274..b9eb2c73de648a763bca733038ba0ce5a998343f 100644
--- a/python/podcast/persistent_log.py
+++ b/python/podcast/persistent_log.py
@@ -19,6 +19,7 @@ class LogType(Enum):
SuccessfulRefresh = "SuccessfulRefresh"
Refresh304 = "Refresh304"
FeedRedirect = "FeedRedirect"
+ EpisodeHrefChanged = "EpisodeHrefChanged"
class LogMessage(BaseModel):
diff --git a/python/podcast/podcast.py b/python/podcast/podcast.py
index 645d28308a78aae58550148b8ec9708d0749b00d..69fb79c44d66568d3377d7c613b0895d7ee2fd39 100644
--- a/python/podcast/podcast.py
+++ b/python/podcast/podcast.py
@@ -162,6 +162,21 @@ class Podcast(BaseModel):
new_posts = []
for entry in feedutils.iterate_feed_entries(feed):
if entry_already_known(entry):
+ if "guid" in entry and entry["guid"] in known_guids:
+ (new_href, new_type, new_length) = feedutils.get_post_audio_data_from_enclousures(entry)
+ if new_href and new_type:
+ try:
+ post = Podpost.get_or_none(Podpost.guid == entry["guid"] and Podpost.href != new_href)
+ if post:
+ logger.info("Updating href of post with guid '%s'", entry["guid"])
+ (post.href, post.type, post.length) = (new_href, new_type, new_length)
+ post.save()
+ persist_log(LogType.EpisodeHrefChanged, title=self.title, entry=entry, old_href=post.href)
+ else:
+ logger.warning("tried updating an existing episode but did not find it in db (guid=%s,new_href=%s)",entry["guid"],new_href)
+ except:
+ logger.exception("tried to update existing episode but failed (guid=%s)",entry["guid"])
+
pyotherside.send("refreshPost", None)
if break_on_first_existing_episode:
break
@@ -352,6 +367,7 @@ class Podcast(BaseModel):
newurl=e.url)
yield from self.refresh(moveto, limit, full_refresh)
except Exception as e:
+ logger.exception("unexpected exception during refresh",e)
persist_log(LogType.Exception, msg="during refresh", podcasttitle=self.title, exception=e)
pyotherside.send("refreshPost", None)
@@ -363,7 +379,7 @@ class Podcast(BaseModel):
"""
new_posts = self.__process_episodes(feed, 0 if full_refresh else limit,
break_on_first_existing_episode=not full_refresh)
- logger.info("Fount %d new entries.", len(new_posts))
+ logger.info("Found %d new entries.", len(new_posts))
return new_posts
def set_params(self, params):
diff --git a/python/podcast/podpost.py b/python/podcast/podpost.py
index 03e9d0768213949ca2a461ebd825cf725e9f21f7..848811bc6207bb41874ea75a9e4c7e6265de80ab 100644
--- a/python/podcast/podpost.py
+++ b/python/podcast/podpost.py
@@ -11,7 +11,7 @@ from podcast.persistent_log import persist_log, LogType
sys.path.append("../")
from podcast.podcast import Podcast
-from podcast import util
+from podcast import util, feedutils
from podcast.constants import Constants, BaseModel
from peewee import AutoField, TextField, IntegerField, DateTimeField, BooleanField, CharField, \
FloatField, DoesNotExist, ForeignKeyField, ModelSelect
@@ -45,7 +45,7 @@ class Podpost(BaseModel):
guid: str = CharField(index=True)
id: AutoField = AutoField(primary_key=True) # POST_ID_TYPE
author: str = CharField(default="")
- duration: int = IntegerField(null=True,help_text="in ms")
+ duration: int = IntegerField(null=True, help_text="in ms")
favorite: bool = BooleanField(default=False)
file_path: str = TextField(null=True)
# podcast file url
@@ -64,7 +64,7 @@ class Podpost(BaseModel):
# download percentage
percentage: float = FloatField(default=0)
plainpart: TextField = TextField(default="")
- position: int = IntegerField(default=0,help_text="in ms")
+ position: int = IntegerField(default=0, help_text="in ms")
podcast = ForeignKeyField(Podcast, null=True, backref='episodes', lazy_load=True, on_delete='CASCADE')
# when the post was published according to feed
published = DateTimeField()
@@ -146,26 +146,7 @@ class Podpost(BaseModel):
post.guid = entry["id"]
else:
post.guid = hashlib.sha256(entry["summary"].encode()).hexdigest()
- if len(entry["enclosures"]) == 0:
- logger.warning("post %s has no enclosures", post.title)
- for e in entry["enclosures"]:
- if "href" in e:
- if e.type[:5] == "audio":
- if "length" in e.keys():
- post.length = e.length
- else:
- post.length = 0
- post.type = e.type
- post.href = e.href
- if "guid" in e.keys():
- post.guid = e.guid
- elif e.href[-3:] == "mp3":
- if "length" in e.keys():
- post.length = e.length
- else:
- post.length = 0
- post.type = "audio/mp3"
- post.href = e.href
+ (post.href, post.type, post.length) = feedutils.get_post_audio_data_from_enclousures(entry)
if "itunes_duration" in entry:
post.duration = util.tx_to_s(entry["itunes_duration"])
else:
@@ -252,7 +233,7 @@ class Podpost(BaseModel):
"loaded": loaded,
"haschapters": haschapters,
"listened": self.listened or self.position > 0 and (
- self.duration- self.position < Constants().markListenedBeforeEndThreshold * 1000)
+ self.duration - self.position < Constants().markListenedBeforeEndThreshold * 1000)
}
def get_image_descriptor(self):
@@ -299,17 +280,15 @@ class Podpost(BaseModel):
self.percentage = 100
os.rename(file_path_part, file_path)
- except URLError:
- logger.exception("Download failed")
- self.delete_file()
- file_path = None
- self.percentage = 0
- persist_log(LogType.NetworkError, what="episode download", title=self.title, url=self.href)
except BaseException as e:
- logger.exception("renaming the downloaded file failed")
+ logger.exception("Generic exception during download of '%s'(%s) from podcast %s", self.href, self.title,
+ self.podcast)
+ if isinstance(e, URLError):
+ logger.info("Deleting downloaded file")
+ self.delete_file()
file_path = None
self.percentage = 0
- persist_log(LogType.Exception, what="episode download", title=self.title, exception=e)
+ persist_log(LogType.Exception, what="episode download", title=self.title, url=self.href, exception=e)
self.file_path = file_path
PodpostFactory().persist(self)
@@ -387,7 +366,6 @@ class Podpost(BaseModel):
self.listened = True
PodpostFactory().persist(self)
-
@property
def get_position(self):
"""
diff --git a/python/podcast/util.py b/python/podcast/util.py
index 3258e27a15c23a41b52b2bd35206803d18eeaefd..6f2252faf356aeb6b0739727944e36068e8cc487 100644
--- a/python/podcast/util.py
+++ b/python/podcast/util.py
@@ -1,6 +1,7 @@
"""
some utilities
"""
+import http.client
import logging
import os
import urllib.request
@@ -146,10 +147,9 @@ def dl_from_url_progress(url, path):
req = urllib.request.Request(url, data=None, headers={"User-Agent": agent})
try:
h = urllib.request.urlopen(req)
- except urllib.error.HTTPError as e:
- if hasattr(e, "reason"):
- pyotherside.send("apperror", "Error opening URL: " + e.reason)
- return
+ except (http.client.InvalidURL, urllib.error.HTTPError) as e:
+ logger.info("Could not download '%s' to '%s'", url, path, e)
+ raise e
length = int(h.getheader("content-length"))
diff --git a/test/test_podcast.py b/test/test_podcast.py
index 3eda9cb654bbd517a6fce549e9958545bedd985f..cb840fd43461d549f3f933cfa6861822fb476924 100644
--- a/test/test_podcast.py
+++ b/test/test_podcast.py
@@ -7,7 +7,6 @@ from typing import Tuple, List
import httpretty
import pytest
from httpretty import HTTPretty
-from more_itertools import first
from peewee import DoesNotExist
from podcast import POST_ID_TYPE
@@ -222,8 +221,9 @@ def refreshable_podcast_fixture(request) -> Tuple[Podcast, List[Podpost]]:
def request_callback(request, uri, response_headers):
nonlocal invoked
invoked += 1
- logger.info("Returning normal response file")
- testdata = read_testdata(filename + ".xml") if invoked == 1 else read_testdata(filename + "2.xml")
+ return_filename = filename + ".xml" if invoked == 1 else filename + "2.xml"
+ logger.info("returning data from file '%s' (invoked = %s)", return_filename, invoked)
+ testdata = read_testdata(return_filename)
return [200, response_headers, testdata]
feed_url = 'http://fakefeed.com/feed'
@@ -283,6 +283,19 @@ def test_pagination():
list(podcast.refresh(0, 0, True))
assert 2 == podcast.count_episodes()
+# parametrized tests seem to interfer... order matters therefore (dont put them together)
+@pytest.mark.parametrize("refreshable_podcast_fixture", ["testdata/episode_href_updated"], indirect=True)
+def test_episode_href_changed(refreshable_podcast_fixture):
+ p: Podcast
+ p, episodes = refreshable_podcast_fixture
+ plist = PodcastList()
+ assert ilen(p.get_entries()) == 1
+ assert len(episodes) == 1
+ guid = episodes[0].guid
+ assert episodes[0].href == 'http://fakefeed.com/epsidoe/1/audio.mp3'
+ list(plist.refresh(0))
+ assert list(Podpost.select().where(Podpost.guid == guid))[0].href == 'http://fakefeed.com/episode/1/audio.mp3'
+
@httpretty.activate
def test_new_feed_url():
@@ -298,6 +311,7 @@ def test_new_feed_url():
assert list(get_log_messages())[0].messagetype == LogType.FeedRedirect.name
+# parametrized tests seem to interfer... order matters therefore (dont put them together)
@pytest.mark.parametrize("refreshable_podcast_fixture", ["testdata/feed_entries_not_ordered"], indirect=True)
def test_podcastlist_refresh(refreshable_podcast_fixture):
p, episodes = refreshable_podcast_fixture
@@ -306,3 +320,4 @@ def test_podcastlist_refresh(refreshable_podcast_fixture):
assert plist.get_podcast_count() == 1
list(plist.refresh(0))
assert ilen(p.get_entries()) == 2
+
diff --git a/test/testdata/episode_href_updated.xml b/test/testdata/episode_href_updated.xml
new file mode 100644
index 0000000000000000000000000000000000000000..f47cfce261d2105e3a2f0dd9824992630158f05a
--- /dev/null
+++ b/test/testdata/episode_href_updated.xml
@@ -0,0 +1,70 @@
+
+
+
+ fakefeed
+ http://fakefeed.com
+ Tue, 02 Mar 2021 24:20:08 -0000
+ Tue, 02 Mar 2021 23:20:11 -0000
+ 60
+ en-us
+ No Copyright © 2021 Podqast. No rights reserved.
+ no, none
+
+
+
+ Richard
+ Test
+ thigg
+ http://blogs.law.harvard.edu/tech/rss
+
+ https://example.com/image.png
+ imagetitle
+ imagelink.com
+ 1337
+ 1337
+
+
+ thigg
+ episodic
+
+
+
+
+ true
+
+ nomail@example.com
+ bunny nunny
+
+ subtitle
+ summary
+
+ No Copyright © 2021 podqast. No Rights reserved.
+
+ Arts
+ -
+ the_guid_for_href_changed
+ Hello, I am a fake episode for testing!!öäü
+ Tue, 03 Mar 2021 23:20:08 -0000
+ https://fakefeed.com/episode/1
+
+
+
+ I am a fake itunes subtitle
+ true
+ full
+ 1
+ 13:37
+
+
+ thigg
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test/testdata/episode_href_updated2.xml b/test/testdata/episode_href_updated2.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e45eed922c38586609c6f18c952e01f5cb3ea1c0
--- /dev/null
+++ b/test/testdata/episode_href_updated2.xml
@@ -0,0 +1,70 @@
+
+
+
+ fakefeed
+ http://fakefeed.com
+ Tue, 02 Mar 2021 24:20:08 -0000
+ Tue, 02 Mar 2021 23:20:11 -0000
+ 60
+ en-us
+ No Copyright © 2021 Podqast. No rights reserved.
+ no, none
+
+
+
+ Richard
+ Test
+ thigg
+ http://blogs.law.harvard.edu/tech/rss
+
+ https://example.com/image.png
+ imagetitle
+ imagelink.com
+ 1337
+ 1337
+
+
+ thigg
+ episodic
+
+
+
+
+ true
+
+ nomail@example.com
+ bunny nunny
+
+ subtitle
+ summary
+
+ No Copyright © 2021 podqast. No Rights reserved.
+
+ Arts
+ -
+ the_guid_for_href_changed
+ Hello, I am a fake episode for testing!!öäü
+ Tue, 03 Mar 2021 23:20:08 -0000
+ https://fakefeed.com/episode/1
+
+
+
+ I am a fake itunes subtitle
+ true
+ full
+ 1
+ 13:37
+
+
+ thigg
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test/testdata/fakefeed2.xml b/test/testdata/fakefeed2.xml
index f48390222e09e23495ce4db9067c3c71d30bc116..ebcd6b3ddc5751ce4f00e2743aac53d7572e26ca 100644
--- a/test/testdata/fakefeed2.xml
+++ b/test/testdata/fakefeed2.xml
@@ -77,7 +77,7 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergr
Arts
-
- prx_96_1337d3a0-e636-445f-b379-f1337deadbeef
+ prx_96_1337d3a0-e636-445f-b379-f1337deadbeef2
Hello, I am a new fake episode for testing!!öäü
Tue, 10 Mar 2021 23:20:08 -0000
https://fakefeed.com/episode/2