diff options
author | Anton Luka Šijanec <anton@sijanec.eu> | 2024-01-11 01:18:11 +0100 |
---|---|---|
committer | Anton Luka Šijanec <anton@sijanec.eu> | 2024-01-11 01:18:11 +0100 |
commit | ee73fc066ac927d94084d77f416834b6f909abf7 (patch) | |
tree | 262817d6172665f9f95a049ee23e65526f4db4d7 /prog/sear.php/index.php | |
parent | namesti.sh (diff) | |
download | r-ee73fc066ac927d94084d77f416834b6f909abf7.tar r-ee73fc066ac927d94084d77f416834b6f909abf7.tar.gz r-ee73fc066ac927d94084d77f416834b6f909abf7.tar.bz2 r-ee73fc066ac927d94084d77f416834b6f909abf7.tar.lz r-ee73fc066ac927d94084d77f416834b6f909abf7.tar.xz r-ee73fc066ac927d94084d77f416834b6f909abf7.tar.zst r-ee73fc066ac927d94084d77f416834b6f909abf7.zip |
Diffstat (limited to 'prog/sear.php/index.php')
-rw-r--r-- | prog/sear.php/index.php | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/prog/sear.php/index.php b/prog/sear.php/index.php new file mode 100644 index 0000000..cd684b8 --- /dev/null +++ b/prog/sear.php/index.php @@ -0,0 +1,307 @@ +<?php +function find_class ($haystack, $definition) { + $pos = stripos($haystack, $definition); + if ($pos === false) + return false; + while ($haystack[$pos] != ".") + $pos--; + $endofclass = ++$pos; + while (ctype_alnum($haystack[$endofclass])) + $endofclass++; + return substr($haystack, $pos, $endofclass-$pos); +} +function fix_url ($h) { + if (str_starts_with($h, "/url?q=")) { + $h = substr($h, strlen("/url?q=")); + $h = substr($h, 0, strpos($h, "&")); + $h = urldecode($h); + } + if (strpos($h, "googleweblight.com/fp?u=") !== false) { + $h = substr($h, strpos($h, "googleweblight.com/fp?u=")+strlen("googleweblight.com/fp?u=")); + $h = substr($h, 0, strpos($h, "&")); + $h = urldecode($h); + } + return $h; +} +function only_text_content ($node) { + $r = ""; + foreach($node->childNodes as $n) + if ($n->nodeType == XML_TEXT_NODE) + $r .= $n->nodeValue; + return $r; +} +function query_google ($s, $image = false, $noredirect = false, $bindstring = false) { + $us = urlencode($s); + $a = ""; + if ($image) + $a .= "&tbm=isch"; + if ($noredirect) + $a .= "&nfpr=1"; + $url = "http://wap.google.com/search?q=$us&num=100&ie=UTF-8$a"; + $sockarr = []; + if ($bindstring) + $sockarr["bindto"] = $bindstring; + $txtdoc = file_get_contents($url, false, stream_context_create(["socket" => $sockarr, "http" => ["ignore_errors" => true, "header" => "User-Agent: Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)\r\n"]])); + if ($txtdoc == false) + return ["status" => false, "code" => "failed"]; + if (str_contains($txtdoc, "In the meantime, solving the above CAPTCHA will let you continue")) + return ["status" => false, "code" => "captcha", "url" => $url]; + $resultsforclass = find_class($txtdoc, "{color:#1967d2}"); + if ($image) { + $imageclass = find_class($txtdoc, "{font-family:Roboto,Helvetica,Arial,sans-serif}"); + if (!$imageclass) + return ["status" => false, "code" => "noimgclass"]; + } else { + $titleclass = find_class($txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}"); + $descclass = find_class($txtdoc, "{word-break:break-word}"); + if (!$titleclass || !$descclass) + return ["status" => false, "code" => "noclass"]; + } + $results = []; + $x = new DOMDocument(); + $x->loadHTML($txtdoc); + if ($image) + foreach ($x->getElementsByTagName("div") as $div) { + if ($div->getAttribute("class") != $imageclass) + continue; + $hreflink = $div->getElementsByTagName("a")[0]->getAttribute("href"); + if (!$hreflink) + continue; + preg_match(",.*s?imgurl=([^&]*)&imgrefurl=([^&]*).*,", $hreflink, $matches); + $imgurl = $matches[1]; + $imgrefurl = $matches[2]; + if (!$imgurl && !$imgrefurl) + continue; + $imgurl = urldecode(fix_url($imgurl)); + $imgrefurl = urldecode(fix_url($imgrefurl)); + $results[] = ["imgsrc" => $imgurl, "url" => $imgrefurl]; + } + else + foreach ($x->getElementsByTagName("a") as $a) { + if (!$a->getAttribute("class")) + continue; + if (!str_contains($a->getAttribute("class"), $titleclass)) + continue; + $hreflink = fix_url($a->getAttribute("href")); + if (!$hreflink) + continue; + $firstspan = null; + foreach ($a->getElementsByTagName("span") as $span) { + if ($firstspan == null) + $firstspan = $span; + if ($span->getAttribute("class") == $descclass) + $breadcrumbs = $span->nodeValue; + } + foreach ($a->parentNode->parentNode->getElementsByTagName("table")[0]->getElementsByTagName("span") as $span) + if ($span->getAttribute("class") == $descclass) + $description = $span->nodeValue; + $results[] = ["url" => $hreflink, "title" => $firstspan->nodeValue, "breadcrumbs" => $breadcrumbs, "description" => $description]; + } + $suggested = []; + if ($resultsforclass) + foreach ($x->getElementsByTagName("a") as $a) + if (str_contains($a->getAttribute("class"), $resultsforclass)) + if (str_contains($a->getAttribute("href"), "&spell=1&") || (str_contains($a->getAttribute("href"), "&nfpr=1&") && sizeof($suggested) > 0)) + $suggested[] = $a->nodeValue; + if (sizeof($suggested) > 1 && !$noredirect) // aaaaaaaaaaa tukaj obstaja še ... "Skupaj z rezultati za" response!!!!!!!! + $s = $suggested[0]; + $suggestion = false; + if (sizeof($suggested) == 1 || ($noredirect && sizeof($suggested) > 0)) + $suggestion = $suggested[0]; + return ["status" => true, "query" => $s, "suggestion" => $suggestion, "results" => $results]; +} +function template ($title, $queryinfo, $body, $query = "", $additionalform = "", $imgfirst = false) { + $query = htmlspecialchars($query); + $title = htmlspecialchars($title); + $buttons = " + <button type=submit value=🔍 ><span>=></span></button> + <button accesskey=f type=submit name=f value=Ʊ ><span>1.</span></button> + <button accesskey=i type=submit name=i value=🖼><span>[^]</span></button> + "; + if ($imgfirst) { + $buttons = " + <button type=submit name=i value=🖼><span>[^]</span></button> + <button accesskey=i type=submit value=🔍 ><span>=></span></button> + <button accesskey=f type=submit name=f value=Ʊ ><span>1.</span></button> + "; + } + return " + <!DOCTYPE html> + <html lang=sl> + <head> + <meta charset=UTF-8> + <title>$title :: sear.c</title> + <meta name=viewport content=width='device-width, initial-scale=1'> + <link rel=stylesheet href=/css.css> + <link rel=icon type=image/x-icon href=favicon.ico> + <link title=sear.c rel=search type=application/opensearchdescription+xml href=/osdd.xml> + </head> + <body> + <form class=container action=.> + <input accesskey=s type=text name=q value='$query' placeholder='sear.php ...' size=50 /> + $buttons + $additionalform + </form> + <h3> + $queryinfo + </h3> + $body + <hr> + <h4 align=center> + <a href=//ni.šijanec.eu/projects/r/tree/prog/sear.php>sear.php</a> + <a href=javascript:window.external.AddSearchProvider(window.location.origin+'/osdd.xml') id=r hidden=hidden >registriraj v brskalnik</a> + </h4> + <script> + if (typeof window.external.AddSearchProvider === 'function') + document.getElementById('r').hidden = false; + </script> + </body> + "; +} +function results_html ($results, $plaintext, $limit) { + $r = ""; + $i = 0; + foreach ($results as $result) { + if ($limit && $i == $limit) + return $r; + $safeurl = htmlspecialchars($result["url"]); + if ($plaintext) + $safeurl = htmlspecialchars(str_replace("https:", "http:", $result["url"], 1)); + if (isset($result["imgsrc"])) { + $safeimgsrc = htmlspecialchars($result["imgsrc"]); + if ($plaintext) + $safeimgsrc = htmlspecialchars(str_replace("https:", "http:", $result["imgsrc"], 1)); + $r .= " + <a id=result$i class=result href='$safeurl'> + <img src='$safeimgsrc' /> + </a> + "; + } else { + $safetitle = trim(htmlspecialchars($result["title"])); + $safebreadcrumb = htmlspecialchars($result["breadcrumbs"]); + $safedesc = htmlspecialchars($result["description"]); + $r .= " + <div id=result$i class=result> + <h4> + <a href='$safeurl' accesskey=$i>$safetitle</a> + <span class=breadcrumb> + $safebreadcrumb + </span> + </h4> + <p> + $safedesc + </p> + </div> + "; + } + $i++; + } + return $r; +} +function handle_response ($query, $response, $limit, $plaintext, $horseshoe, $add_url, $add_form, $image) { + if (sizeof($response["results"]) == 0) + die(template($query, "ni rezultatov", "vaše iskanje ni obrodilo sadov.", $query, "", $image)); + if ($horseshoe) + header("Location: " . $response["results"][0]["url"]); + $queryinfo = ""; + if ($response["query"] != $query) { + $safequeryurl = urlencode($query); + $safequeryhtml = htmlspecialchars($query); + $queryinfo .= "preusmeril sem vas iz <a href='?e=e$add_url&q=$safequeryurl'>$safequeryhtml</a> | "; + } + if ($response["suggestion"] != false) { + $safequeryurl = urlencode($response["suggestion"]); + $safequeryhtml = htmlspecialchars($response["suggestion"]); + $queryinfo .= "predlagam iskanje <a href='?e=e$add_url&q=$safequeryurl'>$safequeryhtml</a> | "; + } + $queryinfo .= sizeof($response["results"]) . " zadetkov"; + $resultshtml = results_html($response["results"], $plaintext, $limit); + die(template($response["query"], $queryinfo, $resultshtml, $response["query"], $add_form, $image)); +} +$add_form = ""; +$add_url = ""; +$q = null; +if (isset($_REQUEST["q"])) + $q = $_REQUEST["q"]; +$image = false; +if (!empty($_REQUEST["i"])) + $image = true; +$exact = false; +if (!empty($_REQUEST["e"])) { + $add_url .= "&e=e"; + $exact = true; + $add_form .= "<input type=hidden name=e value=e />"; +} +if (!empty($_REQUEST["json"])) { + header("Content-Type: application/json"); + die(json_encode(query_google($q, $image, $exact))); +} +$limit = false; +if (!empty($_REQUEST["l"])) { + $limit = int($_REQUEST["l"]); + $add_url .= "&l=$limit"; + $add_form .= "<input type=hidden name=l value=$limit />"; +} +$plaintext = false; +if (!empty($_REQUEST["h"])) { + $plaintext = true; + $add_url .= "&h=h"; + $add_form .= "<input type=hidden name=h value=h />"; +} +$horseshoe = false; +if (!empty($_REQUEST["f"])) { + $horseshoe = true; + $add_url .= "&f=f"; + $add_form .= "<input type=hidden name=f value=f />"; +} +if ($q == null) { + die(template("", "iskalnik sear.php", "dobrodošli v iskalniku sear.php, naslednjiku programa sear.c. za iskanje po spletu nekaj vnesite v iskalno vrstico zgoraj in pritisnite na gumb.")); +} +$bindstrings = []; +foreach (net_get_interfaces() as $interface) + foreach ($interface["unicast"] as $unicast) { + if (!isset($unicast["address"])) + continue; + $address = $unicast["address"]; + if ($address == "::1") // tu timeouta + continue; + $bindstring = "$address:0"; + if (str_contains($address, ":")) + $bindstring = "[$address]:0"; + $bindstrings[] = $bindstring; + } +if (getenv("BINDSTRINGS")) + $bindstrings = explode(",", getenv("BINDSTRINGS")); +foreach ($bindstrings as $bindstring) { + // file_put_contents("/dev/stderr", "trying bindstring $bindstring\n"); + $response = query_google($q, $image, $exact, $bindstring); + if ($response["status"] == true) + break; + if ($response["code"] == "captcha") + continue; + if ($response["code"] == "failed") + continue; + break; +} +if ($response["status"] != true) { + if ($response["code"] == "captcha") { + foreach (["http://searc.oliwerix.com/sear.php?ref=b"] as $fallback) { + $add_url = ""; + if ($exact) + $add_url .= "&e=e"; + $json = json_decode(file_get_contents($fallback . $add_url . "&q=" . urlencode($q)), true); + if (!isset($json["status"])) + continue; + if ($json["status"] == false) { + if ($json["code"] == "captcha") + continue; + die(template("napaka", "napaka pri posredovanju zahteve", "ker je poizvedba vrnila captcho, sem jo posredoval na drug strežnik, ta pa je vrnil napako " . $json["code"] . ".", $q)); + } + handle_response($q, $json, $limit, $plaintext, $horseshoe, $add_url, $add_form, $image); + } + die(template("napaka", "iskalno zaledje omejuje iskanja", "iskalno zaledje se je odzvalo s CAPTCHO, češ, da ta instanca prehitro pošilja zahteve. obenem se je enako zgodilo tudi vsem rezervnim strežnikom. iščete lahko ročno preko spletne strani zaledne storitve: <a href='" . htmlspecialchars($response["url"]) . "'>kliknite sem</a>.", $q)); + } + die(template("napaka", "napaka pri poizvedbi", "poizvedba na iskalno zaledje je vrnila napako " . $response["code"] . ".", $q)); +} +handle_response($q, $response, $limit, $plaintext, $horseshoe, $add_url, $add_form, $image); +?> |