#!/usr/bin/python3 import feedparser import yt_dlp from ollama import Client from bs4 import BeautifulSoup client = Client(host='http://splet.4a.si:80') model = "llama2:13b-chat-fp16" prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16," def opreme(): r = [] for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries: oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []} for author in entry.authors: oprema["authors"].append(author.name) summary = BeautifulSoup(entry.summary, features="html.parser") body = None for i in summary.findAll("div"): if "class" in i.attrs: if "field-name-body" in i.attrs["class"]: body = i break if "" if body == None: raise Exception("body is None in " + entry.link) body = body.text.replace("\r", "") while "\n\n" in body: body = body.replace("\n\n", "\n") r.append(oprema) return r if __name__ == "__main__": opreme()