From 1d0e0e54f663920a0992f1af5bd7d0b37eee537a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Tue, 4 Jun 2024 15:40:40 +0200 Subject: e --- "skripti/r\305\241_glasbena_oprema.py" | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 "skripti/r\305\241_glasbena_oprema.py" (limited to 'skripti/rš_glasbena_oprema.py') diff --git "a/skripti/r\305\241_glasbena_oprema.py" "b/skripti/r\305\241_glasbena_oprema.py" new file mode 100644 index 0000000..8d53702 --- /dev/null +++ "b/skripti/r\305\241_glasbena_oprema.py" @@ -0,0 +1,31 @@ +#!/usr/bin/python3 +import feedparser +import yt_dlp +from ollama import Client +from bs4 import BeautifulSoup +client = Client(host='http://splet.4a.si:80') +model = "llama2:13b-chat-fp16" +prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16," +def opreme(): + r = [] + for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries: + oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []} + for author in entry.authors: + oprema["authors"].append(author.name) + summary = BeautifulSoup(entry.summary, features="html.parser") + body = None + for i in summary.findAll("div"): + if "class" in i.attrs: + if "field-name-body" in i.attrs["class"]: + body = i + break + if "" + if body == None: + raise Exception("body is None in " + entry.link) + body = body.text.replace("\r", "") + while "\n\n" in body: + body = body.replace("\n\n", "\n") + r.append(oprema) + return r +if __name__ == "__main__": + opreme() \ No newline at end of file -- cgit v1.2.3