LibreY

- privacy respecting meta search engine
git clone git://git.acid.vegas/LibreY.git
Log | Files | Refs | Archive | README | LICENSE

commit dd8df71abdb5aa2276199abeec7ea068a59755ff
parent 68b5de6bd61fe0994a9da04315ce6c6aa895814a
Author: Ahwx <ahwx@ahwx.org>
Date: Thu, 31 Aug 2023 14:07:17 +0200

feat: cache results to combat spammers (merge pull request #34 from davidovski/results_caching) Results caching

Diffstat:
Mconfig.php.example | 3+++
Mengines/ahmia/hidden_service.php | 3+--
Mengines/bittorrent/1337x.php | 4+---
Mengines/bittorrent/merge.php | 2+-
Mengines/bittorrent/nyaa.php | 3+--
Mengines/bittorrent/rutor.php | 3+--
Mengines/bittorrent/thepiratebay.php | 3+--
Mengines/bittorrent/torrentgalaxy.php | 3+--
Mengines/bittorrent/yts.php | 3+--
Mengines/invidious/video.php | 3+--
Mengines/librex/fallback.php | 17++++++++++++-----
Mengines/qwant/image.php | 4++--
Mengines/special/currency.php | 4+---
Mengines/special/definition.php | 4+---
Mengines/special/ip.php | 2+-
Mengines/special/tor.php | 4+---
Mengines/special/user_agent.php | 2+-
Mengines/special/weather.php | 3+--
Mengines/special/wikipedia.php | 4+---
Mengines/text/duckduckgo.php | 10+++++++---
Mengines/text/google.php | 10++++++++--
Mengines/text/text.php | 39++++++++++++++++++++++++++-------------
Mmisc/cooldowns.php | 23+++++++++++++++++++++++
Mmisc/search_engine.php | 46+++++++++++++++++++++++++++++++++++++---------

24 files changed, 134 insertions(+), 68 deletions(-)

diff --git a/config.php.example b/config.php.example
@@ -23,6 +23,9 @@
         // how long in minutes to put google/other instances on cooldown if they aren't responding
         "request_cooldown" => 25,
 
+        // how long in minutes to store results for in the cache
+        "cache_time" => 20,
+
         /*
             Preset privacy friendly frontends for users, these can be overwritten by users in the settings
             e.g.: Preset the invidious instance URL: "instance_url" => "https://yewtu.be",
diff --git a/engines/ahmia/hidden_service.php b/engines/ahmia/hidden_service.php
@@ -6,8 +6,7 @@
             return "https://ahmia.fi/search/?q=" . urlencode($this->query);
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
+        public function parse_results($response) {
             $results = array();
             $xpath = get_xpath($response);
 
diff --git a/engines/bittorrent/1337x.php b/engines/bittorrent/1337x.php
@@ -5,9 +5,7 @@
             return "https://1337x.to/search/$query/1/";
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
-
+        public function parse_results($response) {
             $xpath = get_xpath($response);
             $results = array();
 
diff --git a/engines/bittorrent/merge.php b/engines/bittorrent/merge.php
@@ -21,7 +21,7 @@
             );
         }
 
-        public function get_results() {
+        public function parse_results($response) {
             $results = array();
             foreach ($this->requests as $request) {
                 if ($request->successful())
diff --git a/engines/bittorrent/nyaa.php b/engines/bittorrent/nyaa.php
@@ -6,8 +6,7 @@
             return "https://$this->SOURCE/?q=" . urlencode($this->query);
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
+        public function parse_results($response) {
             $xpath = get_xpath($response);
             $results = array();
 
diff --git a/engines/bittorrent/rutor.php b/engines/bittorrent/rutor.php
@@ -4,8 +4,7 @@
             return "http://rutor.info/search/" . urlencode($this->query);
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
+        public function parse_results($response) {
             $xpath = get_xpath($response);
             $results = array();
 
diff --git a/engines/bittorrent/thepiratebay.php b/engines/bittorrent/thepiratebay.php
@@ -4,8 +4,7 @@
             return "https://apibay.org/q.php?q=" . urlencode($this->query);
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
+        public function parse_results($response) {
             $results = array();
             $json_response = json_decode($response, true);
 
diff --git a/engines/bittorrent/torrentgalaxy.php b/engines/bittorrent/torrentgalaxy.php
@@ -5,8 +5,7 @@
             return "https://torrentgalaxy.to/torrents.php?search=$query#results";
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
+        public function parse_results($response) {
             $xpath = get_xpath($response);
             $results = array();
 
diff --git a/engines/bittorrent/yts.php b/engines/bittorrent/yts.php
@@ -4,9 +4,8 @@
             return "https://yts.mx/api/v2/list_movies.json?query_term=" . urlencode($this->query);
         }
 
-        public function get_results() {
+        public function parse_results($response) {
             $response = curl_multi_getcontent($this->ch);
-            global $config;
             $results = array();
             $json_response = json_decode($response, true);
 
diff --git a/engines/invidious/video.php b/engines/invidious/video.php
@@ -6,9 +6,8 @@
             return "$this->instance_url/api/v1/search?q=$query";
         }
 
-        public function get_results() {
+        public function parse_results($response) {
             $results = array();
-            $response = curl_multi_getcontent($this->ch);
             $json_response = json_decode($response, true);
 
             foreach ($json_response as $response) {
diff --git a/engines/librex/fallback.php b/engines/librex/fallback.php
@@ -7,11 +7,10 @@
         }
 
         public function get_request_url() {
-           return $this->instance . "api.php?" . opts_to_params($this->opts);
+           return $this->instance . "api.php?" . opts_to_params($this->opts) . "&nfb=1";
         }
 
-        public function get_results() {
-            $response = curl_exec($this->ch);
+        public function parse_results($response) {
             $response = json_decode($response, true);
             if (!$response)
                 return array();
@@ -47,13 +46,17 @@
 
             $instance = array_pop($instances);
 
+            if (!$instance)
+                break;
+
             if (parse_url($instance)["host"] == parse_url($_SERVER['HTTP_HOST'])["host"])
                 continue;
 
             $librex_request = new LibreXFallback($instance, $opts, null);
+
             $results = $librex_request->get_results();
 
-            if (count($results) > 1)
+            if (!empty($results))
                 return $results;
 
             // on fail then do this
@@ -62,7 +65,11 @@
 
         } while (!empty($instances));
 
-        return array();
+        return array(
+            "error" => array(
+                "message" => "No results found. Unable to fallback to other instances."
+            )
+        );
     }
 
 ?>
diff --git a/engines/qwant/image.php b/engines/qwant/image.php
@@ -7,9 +7,9 @@
             return "https://lite.qwant.com/?q=$query&t=images&p=$page";
         }
 
-        public function get_results() {
+        public function parse_results($response) {
             $results = array();
-            $xpath = get_xpath(curl_multi_getcontent($this->ch));
+            $xpath = get_xpath($response);
 
             if (!$xpath)
                 return $results;
diff --git a/engines/special/currency.php b/engines/special/currency.php
@@ -4,9 +4,7 @@
             return "https://cdn.moneyconvert.net/api/latest.json";
         }
         
-        public function get_results() { 
-            $response = curl_multi_getcontent($this->ch);
-
+        public function parse_results($response) {
             $split_query = explode(" ", $this->query);
 
             $base_currency = strtoupper($split_query[1]);
diff --git a/engines/special/definition.php b/engines/special/definition.php
@@ -8,9 +8,7 @@
             return "https://api.dictionaryapi.dev/api/v2/entries/en/$word_to_define";
         }
         
-        public function get_results() {        
-            $response = curl_multi_getcontent($this->ch);
-
+        public function parse_results($response) {
             $json_response = json_decode($response, true);
 
             if (!array_key_exists("title", $json_response))
diff --git a/engines/special/ip.php b/engines/special/ip.php
@@ -1,6 +1,6 @@
 <?php
     class IPRequest extends EngineRequest {
-        function get_results() {
+        public function parse_results($response) {
             return array(
                 "special_response" => array(
                     "response" => $_SERVER["REMOTE_ADDR"],
diff --git a/engines/special/tor.php b/engines/special/tor.php
@@ -5,9 +5,7 @@
             return "https://check.torproject.org/torbulkexitlist";
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($ch);
-
+        public function parse_results($response) {
             $formatted_response = strpos($response, $_SERVER["REMOTE_ADDR"]) ? "It seems like you are using Tor" : "It seems like you are not using Tor";
             $source = "https://check.torproject.org";
             
diff --git a/engines/special/user_agent.php b/engines/special/user_agent.php
@@ -1,6 +1,6 @@
 <?php
     class UserAgentRequest extends EngineRequest {
-        function get_results() {
+        public function parse_results($response) {
             return array(
                 "special_response" => array(
                     "response" => $_SERVER["HTTP_USER_AGENT"], 
diff --git a/engines/special/weather.php b/engines/special/weather.php
@@ -4,8 +4,7 @@
             return "https://wttr.in/@" . $_SERVER["REMOTE_ADDR"] . "?format=j1";
         }
         
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
+        public function parse_results($response) {
             $json_response = json_decode($response, true);
 
             if (!$json_response)
diff --git a/engines/special/wikipedia.php b/engines/special/wikipedia.php
@@ -10,9 +10,7 @@
                 return "https://$this->wikipedia_language.wikipedia.org/w/api.php?format=json&action=query&prop=extracts%7Cpageimages&exintro&explaintext&redirects=1&pithumbsize=500&titles=$query_encoded";
         }
 
-        public function get_results() {
-            $response = curl_multi_getcontent($this->ch);
-
+        public function parse_results($response) {
             $json_response = json_decode($response, true);
 
             $first_page = array_values($json_response["query"]["pages"])[0];
diff --git a/engines/text/duckduckgo.php b/engines/text/duckduckgo.php
@@ -19,12 +19,16 @@
 
             if (isset($_COOKIE["safe_search"]))
                 $url .= "&safe=medium";
+
             return $url;
         }
 
-        public function get_results() {
+        public function parse_results($response) {
             $results = array();
-            $xpath = get_xpath(curl_multi_getcontent($this->ch));
+            $xpath = get_xpath($response);
+
+            if (!$xpath)
+                return $results;
             
             foreach($xpath->query("/html/body/div[1]/div[". count($xpath->query('/html/body/div[1]/div')) ."]/div/div/div[contains(@class, 'web-result')]/div") as $result)
             {
@@ -57,7 +61,7 @@
                     )
                 );
            }
-            return $results;
+           return $results;
         }
 
     }
diff --git a/engines/text/google.php b/engines/text/google.php
@@ -26,9 +26,9 @@
         }
 
 
-        public function get_results() {
+        public function parse_results($response) {
             $results = array();
-            $xpath = get_xpath(curl_multi_getcontent($this->ch));
+            $xpath = get_xpath($response);
 
             if (!$xpath)
                 return $results;
@@ -70,6 +70,12 @@
                 );
             }
 
+            if (empty($results) && !str_contains($response, "Our systems have detected unusual traffic from your computer network.")) {
+                $results["error"] = array(
+                    "message" => "There are no results. Please try different keywords!"
+                );
+            }
+
             return $results;
         }
     }
diff --git a/engines/text/text.php b/engines/text/text.php
@@ -12,9 +12,6 @@
             if (substr($this->query, 0, 1) == "!" || substr($last_word_query, 0, 1) == "!")
                 check_ddg_bang($this->query, $opts);
 
-            if (has_cooldown($this->engine, $this->opts->cooldowns))
-                return;
-
             if ($this->engine == "google") {
                 
                 require "engines/text/google.php";
@@ -26,33 +23,49 @@
                 $this->engine_request = new DuckDuckGoRequest($opts, $mh);
             }
 
+            if (has_cooldown($this->engine, $this->opts->cooldowns) && !has_cached_results($this->engine_request->url)) {
+                // TODO dont add it in the first place
+                curl_multi_remove_handle($mh, $this->engine_request->ch);
+                $this->engine_request = null;
+                return;
+            }
+
+
             require "engines/special/special.php";
             $this->special_request = get_special_search_request($opts, $mh);
         }
 
-        public function get_results() {
-            if (!$this->engine_request)
+        public function parse_results($response) {
+            if (!isset($this->engine_request))
                 return array();
 
             $results = $this->engine_request->get_results();
 
-            if ($this->special_request) {
-                $special_result = $this->special_request->get_results();
+            if (empty($results)) {
+                set_cooldown($this->engine, ($opts->request_cooldown ?? "1") * 60, $this->opts->cooldowns);
+            } else {
+                if ($this->special_request) {
+                    $special_result = $this->special_request->get_results();
 
-                if ($special_result)
-                    $results = array_merge(array($special_result), $results);
+                    if ($special_result)
+                        $results = array_merge(array($special_result), $results);
+                }
             }
 
-            if (count($results) <= 1)
-                set_cooldown($this->engine, ($opts->request_cooldown ?? "1") * 60, $this->opts->cooldowns);
-
             return $results;
         }
 
         public static function print_results($results) {
 
-            if (empty($results))
+            if (empty($results)) {
+                echo "<div class=\"text-result-container\"><p>An error occured fetching results</p></div>";
                 return;
+            }
+
+            if (array_key_exists("error", $results)) {
+                echo "<div class=\"text-result-container\"><p>" . $results["error"]["message"] . "</p></div>";
+                return;
+            }
 
             $special = $results[0];
 
diff --git a/misc/cooldowns.php b/misc/cooldowns.php
@@ -1,4 +1,8 @@
 <?php
+    if (!function_exists("apcu_fetch"))
+        error_log("apcu is not installed! Please consider installing php-pecl-apcu for significant performance improvements");
+
+
     function load_cooldowns() {
         if (function_exists("apcu_fetch"))
             return apcu_exists("cooldowns") ? apcu_fetch("cooldowns") : array();
@@ -19,4 +23,23 @@
     function has_cooldown($instance, $cooldowns) {
         return ($cooldowns[$instance] ?? 0) > time();
     }
+    
+    function has_cached_results($url) {
+        if (function_exists("apcu_exists"))
+            return apcu_exists("cached:$url");
+
+        return false;
+    }
+
+    function store_cached_results($url, $results, $ttl = 0) {
+        if (function_exists("apcu_store") && !empty($results))
+            return apcu_store("cached:$url", $results, $ttl);
+    }
+
+    function fetch_cached_results($url) {
+        if (function_exists("apcu_fetch"))
+            return apcu_fetch("cached:$url");
+
+        return array();
+    }
 ?>
diff --git a/misc/search_engine.php b/misc/search_engine.php
@@ -1,15 +1,21 @@
 <?php
+    require "misc/cooldowns.php";
     abstract class EngineRequest {
+        protected $DO_CACHING = true;
         function __construct($opts, $mh) {
             $this->query = $opts->query;
             $this->page = $opts->page;
+            $this->mh = $mh;
             $this->opts = $opts;
 
-            $url = $this->get_request_url();
-            if (!$url)
+            $this->url = $this->get_request_url();
+            if (!$this->url)
                 return;
 
-            $this->ch = curl_init($url);
+            if (has_cached_results($this->url))
+                return;
+
+            $this->ch = curl_init($this->url);
 
             if ($opts->curl_settings)
                 curl_setopt_array($this->ch, $opts->curl_settings);
@@ -23,10 +29,31 @@
         }
 
         public function successful() {
-            return curl_getinfo($this->ch)['http_code'] == '200';
+            return (isset($this->ch) && curl_getinfo($this->ch)['http_code'] == '200') 
+                || has_cached_results($this->url);
+        }
+
+        abstract function parse_results($response);
+
+        public function get_results() {
+            if (!isset($this->url))
+                return $this->parse_results(null);
+
+            if ($this->DO_CACHING && has_cached_results($this->url))
+                return fetch_cached_results($this->url);
+
+            if (!isset($this->ch))
+                return $this->parse_results(null);
+
+            $response = $this->mh ? curl_multi_getcontent($this->ch) : curl_exec($this->ch);
+            $results = $this->parse_results($response) ?? array();
+
+            if ($this->DO_CACHING && !empty($results))
+                store_cached_results($this->url, $results, $this->opts->cache_time * 60);
+
+            return $results;
         }
 
-        abstract function get_results();
         static public function print_results($results){}
     }
 
@@ -34,6 +61,7 @@
         $opts = require "config.php";
 
         $opts->request_cooldown ??= 25;
+        $opts->cache_time ??= 25;
 
         $opts->query = trim($_REQUEST["q"] ?? "");
         $opts->type = (int) ($_REQUEST["t"] ?? 0);
@@ -47,7 +75,7 @@
 
         $opts->disable_frontends = (int) ($_REQUEST["nf"] ?? 0) == 1 || isset($_COOKIE["disable_frontends"]);
 
-        $opts->language = $_REQUEST["lang"] ?? trim(htmlspecialchars($_COOKIE["language"] ?? $opts->language));
+        $opts->language = $_REQUEST["lang"] ?? trim(htmlspecialchars($_COOKIE["language"] ?? $opts->language ?? "en"));
 
         $opts->do_fallback = (int) ($_REQUEST["nfb"] ?? 0) == 0;
         if (!$opts->instance_fallback) {
@@ -60,6 +88,8 @@
             $opts->frontends[$frontend]["instance_url"] = $_COOKIE[$frontend] ?? $opts->frontends[$frontend]["instance_url"];
         }
 
+        $opts->curl_settings[CURLOPT_FOLLOWLOCATION] ??= true;
+
         return $opts;
     }
 
@@ -70,7 +100,6 @@
         $params .= "p=$opts->page";
         $params .= "&q=$query";
         $params .= "&t=$opts->type";
-        $params .= "&nfb=" . ($opts->do_fallback ? 0 : 1);
         $params .= "&safe=" . ($opts->safe_search ? 1 : 0);
         $params .= "&nf=" . ($opts->disable_frontends ? 1 : 0);
         $params .= "&ns=" . ($opts->disable_special ? 1 : 0);
@@ -113,7 +142,6 @@
     }
 
     function fetch_search_results($opts, $do_print) {
-        require "misc/cooldowns.php";
         $opts->cooldowns = load_cooldowns();
 
         $start_time = microtime(true);
@@ -128,7 +156,7 @@
 
         $results = $search_category->get_results();
 
-        if (count($results) <= 1) {
+        if (empty($results)) {
             require "engines/librex/fallback.php";
             $results = get_librex_results($opts);
         }