@@ -141,45 +141,40 @@ def get_podcast_values(feed_url, user_id, username: Optional[str] = None, passwo
141141
142142 # Use requests to fetch the feed content
143143 try :
144+ # Simpler headers that worked in the original version
144145 headers = {
145- 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ,
146- 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' ,
146+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' ,
147147 'Accept-Language' : 'en-US,en;q=0.9' ,
148- 'Accept-Encoding' : 'gzip, deflate, br' ,
149- 'Connection' : 'keep-alive' ,
150- 'Upgrade-Insecure-Requests' : '1' ,
151- 'Sec-Fetch-Dest' : 'document' ,
152- 'Sec-Fetch-Mode' : 'navigate' ,
153- 'Sec-Fetch-Site' : 'none' ,
154- 'Sec-Fetch-User' : '?1'
155148 }
156149 print (f"Fetching URL: { feed_url } " )
157- print ( f"Headers: { headers } " )
150+
158151 if username and password :
159152 print (f"Using auth for user: { username } " )
160153 response = requests .get (feed_url , headers = headers , auth = HTTPBasicAuth (username , password ))
161154 else :
162155 response = requests .get (feed_url , headers = headers )
163156
164- response .raise_for_status () # Raise an exception for HTTP errors
157+ response .raise_for_status ()
158+ # Use binary content which worked in the original version
165159 feed_content = response .content
160+
166161 except requests .RequestException as e :
167162 try :
168- # Only try to access response if it exists
169163 if 'response' in locals ():
170164 print (f"Response headers: { response .headers } " )
171- print (f"Response content: { response .content } " )
165+ print (f"Response content: { response .content [: 500 ] } " )
172166 except :
173167 pass
174168 raise ValueError (f"Error fetching the feed: { str (e )} " )
175169
176170 # Parse the feed
177171 d = feedparser .parse (feed_content )
172+ print (f"Feed parsed - title: { d .feed .get ('title' , 'Unknown' )} " )
178173
179- # Initialize podcast_values as a dictionary
174+ # Initialize podcast_values as in the original version that worked
180175 podcast_values = {
181176 'pod_title' : d .feed .title if hasattr (d .feed , 'title' ) else None ,
182- 'pod_artwork' : d . feed . image . href if hasattr ( d . feed , 'image' ) and hasattr ( d . feed . image , 'href' ) else None ,
177+ 'pod_artwork' : None , # We'll set this with multiple checks below
183178 'pod_author' : d .feed .author if hasattr (d .feed , 'author' ) else None ,
184179 'categories' : [],
185180 'pod_description' : d .feed .description if hasattr (d .feed , 'description' ) else None ,
@@ -190,39 +185,76 @@ def get_podcast_values(feed_url, user_id, username: Optional[str] = None, passwo
190185 'user_id' : user_id
191186 }
192187
188+ # Enhanced image URL extraction combining both approaches
189+ if hasattr (d .feed , 'image' ):
190+ if hasattr (d .feed .image , 'href' ):
191+ podcast_values ['pod_artwork' ] = d .feed .image .href
192+ elif hasattr (d .feed .image , 'url' ): # Added for news feed format
193+ podcast_values ['pod_artwork' ] = d .feed .image .url
194+ elif isinstance (d .feed .image , dict ):
195+ if 'href' in d .feed .image :
196+ podcast_values ['pod_artwork' ] = d .feed .image ['href' ]
197+ elif 'url' in d .feed .image :
198+ podcast_values ['pod_artwork' ] = d .feed .image ['url' ]
199+
200+ # iTunes image fallback
193201 if not podcast_values ['pod_artwork' ] and hasattr (d .feed , 'itunes_image' ):
194- podcast_values ['pod_artwork' ] = d .feed .itunes_image ['href' ]
202+ if hasattr (d .feed .itunes_image , 'href' ):
203+ podcast_values ['pod_artwork' ] = d .feed .itunes_image .href
204+ elif isinstance (d .feed .itunes_image , dict ) and 'href' in d .feed .itunes_image :
205+ podcast_values ['pod_artwork' ] = d .feed .itunes_image ['href' ]
195206
207+ # Author fallback
196208 if not podcast_values ['pod_author' ] and hasattr (d .feed , 'itunes_author' ):
197209 podcast_values ['pod_author' ] = d .feed .itunes_author
198210
199- # Extracting categories, primarily from iTunes
200- if hasattr (d .feed , 'itunes_category' ):
201- for cat in d .feed .itunes_category :
202- podcast_values ['categories' ].append (cat ['text' ])
203- if 'itunes_category' in cat :
204- for subcat in cat ['itunes_category' ]:
205- podcast_values ['categories' ].append (subcat ['text' ])
211+ # Description fallbacks
212+ if not podcast_values ['pod_description' ]:
213+ if hasattr (d .feed , 'subtitle' ):
214+ podcast_values ['pod_description' ] = d .feed .subtitle
215+ elif hasattr (d .feed , 'itunes_summary' ):
216+ podcast_values ['pod_description' ] = d .feed .itunes_summary
217+
218+ # Category extraction with robust error handling
219+ try :
220+ if hasattr (d .feed , 'itunes_category' ):
221+ if isinstance (d .feed .itunes_category , list ):
222+ for cat in d .feed .itunes_category :
223+ if isinstance (cat , dict ) and 'text' in cat :
224+ podcast_values ['categories' ].append (cat ['text' ])
225+ elif hasattr (cat , 'text' ):
226+ podcast_values ['categories' ].append (cat .text )
227+ elif isinstance (d .feed .itunes_category , dict ) and 'text' in d .feed .itunes_category :
228+ podcast_values ['categories' ].append (d .feed .itunes_category ['text' ])
229+ except Exception as e :
230+ print (f"Error extracting categories: { e } " )
206231
207- # Now, check if categories list is empty after attempting to populate it
232+ # Handle empty categories
208233 if not podcast_values ['categories' ]:
209- podcast_values ['categories' ] = "" # Set to empty string if no categories found
234+ podcast_values ['categories' ] = { '1' : 'Podcasts' } # Default category
210235 else :
211236 categories_dict = {str (i ): cat for i , cat in enumerate (podcast_values ['categories' ], start = 1 )}
212- podcast_values ['categories' ] = json . dumps ( categories_dict ) # Serialize populated categories dict
237+ podcast_values ['categories' ] = categories_dict
213238
214- if not podcast_values ['pod_description' ] and hasattr (d .feed , 'itunes_summary' ):
215- podcast_values ['pod_description' ] = d .feed .itunes_summary
239+ # Add explicit check with robust handling
240+ try :
241+ if hasattr (d .feed , 'itunes_explicit' ):
242+ if isinstance (d .feed .itunes_explicit , str ):
243+ podcast_values ['pod_explicit' ] = d .feed .itunes_explicit .lower () in ('yes' , 'true' , '1' )
244+ elif isinstance (d .feed .itunes_explicit , bool ):
245+ podcast_values ['pod_explicit' ] = d .feed .itunes_explicit
246+ except Exception as e :
247+ print (f"Error checking explicit flag: { e } " )
216248
217- # Check for explicit content
218- if hasattr (d .feed , 'itunes_explicit' ):
219- podcast_values ['pod_explicit' ] = d .feed .itunes_explicit == 'yes'
249+ # Print values for debugging
250+ print ("Extracted podcast values:" )
251+ for key , value in podcast_values .items ():
252+ print (f"{ key } : { value } " )
220253
221254 return podcast_values
222255
223256
224257
225-
226258def check_valid_feed (feed_url : str , username : Optional [str ] = None , password : Optional [str ] = None ):
227259 """
228260 Check if the provided URL points to a valid podcast feed.
0 commit comments