From e4faa82362348912516e17b5bbae3d9fe2c15413 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Fri, 9 Aug 2024 20:53:53 +0800 Subject: [PATCH 01/12] Fix #307, update outdated searxng/settings.yml --- searxng/settings.yml | 2345 +----------------------------------------- 1 file changed, 3 insertions(+), 2342 deletions(-) diff --git a/searxng/settings.yml b/searxng/settings.yml index da973c1..54d27c4 100644 --- a/searxng/settings.yml +++ b/searxng/settings.yml @@ -1,2356 +1,17 @@ -general: - # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} - debug: false - # displayed name - instance_name: 'searxng' - # For example: https://example.com/privacy - privacypolicy_url: false - # use true to use your own donation page written in searx/info/en/donate.md - # use false to disable the donation link - donation_url: false - # mailto:contact@example.com - contact_url: false - # record stats - enable_metrics: true +use_default_settings: true -brand: - new_issue_url: https://github.com/searxng/searxng/issues/new - docs_url: https://docs.searxng.org/ - public_instances: https://searx.space - wiki_url: https://github.com/searxng/searxng/wiki - issue_url: https://github.com/searxng/searxng/issues - # custom: - # maintainer: "Jon Doe" - # # Custom entries in the footer: [title]: [link] - # links: - # Uptime: https://uptime.searxng.org/history/darmarit-org - # About: "https://searxng.org" +general: + instance_name: 'searxng' search: - # Filter results. 0: None, 1: Moderate, 2: Strict - safe_search: 0 - # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl", - # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off - # by default. autocomplete: 'google' - # minimun characters to type before autocompleter starts - autocomplete_min: 4 - # Default search language - leave blank to detect from browser information or - # use codes from 'languages.py' - default_lang: 'auto' - # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages - # Available languages - # languages: - # - all - # - en - # - en-US - # - de - # - it-IT - # - fr - # - fr-BE - # ban time in seconds after engine errors - ban_time_on_fail: 5 - # max ban time in seconds after engine errors - max_ban_time_on_fail: 120 - suspended_times: - # Engine suspension time after error (in seconds; set to 0 to disable) - # For error "Access denied" and "HTTP error [402, 403]" - SearxEngineAccessDenied: 86400 - # For error "CAPTCHA" - SearxEngineCaptcha: 86400 - # For error "Too many request" and "HTTP error 429" - SearxEngineTooManyRequests: 3600 - # Cloudflare CAPTCHA - cf_SearxEngineCaptcha: 1296000 - cf_SearxEngineAccessDenied: 86400 - # ReCAPTCHA - recaptcha_SearxEngineCaptcha: 604800 - - # remove format to deny access, use lower case. - # formats: [html, csv, json, rss] formats: - html - json server: - # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} - port: 8888 - bind_address: '127.0.0.1' - # public URL of the instance, to ensure correct inbound links. Is overwritten - # by ${SEARXNG_URL}. - base_url: / # "http://example.com/location" - limiter: false # rate limit the number of request on the instance, block some bots - public_instance: false # enable features designed only for public instances - - # If your instance owns a /etc/searxng/settings.yml file, then set the following - # values there. - secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} - # Proxying image results through searx - image_proxy: false - # 1.0 and 1.1 are supported - http_protocol_version: '1.0' - # POST queries are more secure as they don't show up in history but may cause - # problems when using Firefox containers - method: 'POST' - default_http_headers: - X-Content-Type-Options: nosniff - X-Download-Options: noopen - X-Robots-Tag: noindex, nofollow - Referrer-Policy: no-referrer - -redis: - # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. - # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis - url: false - -ui: - # Custom static path - leave it blank if you didn't change - static_path: '' - static_use_hash: false - # Custom templates path - leave it blank if you didn't change - templates_path: '' - # query_in_title: When true, the result page's titles contains the query - # it decreases the privacy, since the browser can records the page titles. - query_in_title: false - # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page. - infinite_scroll: false - # ui theme - default_theme: simple - # center the results ? - center_alignment: false - # URL prefix of the internet archive, don't forget trailing slash (if needed). - # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" - # Default interface locale - leave blank to detect from browser information or - # use codes from the 'locales' config section - default_locale: '' - # Open result links in a new tab by default - # results_on_new_tab: false - theme_args: - # style of simple theme: auto, light, dark - simple_style: auto - # Perform search immediately if a category selected. - # Disable to select multiple categories at once and start the search manually. - search_on_category_select: true - # Hotkeys: default or vim - hotkeys: default - -# Lock arbitrary settings on the preferences page. To find the ID of the user -# setting you want to lock, check the ID of the form on the page "preferences". -# -# preferences: -# lock: -# - language -# - autocomplete -# - method -# - query_in_title - -# searx supports result proxification using an external service: -# https://github.com/asciimoo/morty uncomment below section if you have running -# morty proxy the key is base64 encoded (keep the !!binary notation) -# Note: since commit af77ec3, morty accepts a base64 encoded key. -# -# result_proxy: -# url: http://127.0.0.1:3000/ -# # the key is a base64 encoded string, the YAML !!binary prefix is optional -# key: !!binary "your_morty_proxy_key" -# # [true|false] enable the "proxy" button next to each result -# proxify_results: true - -# communication with search engines -# -outgoing: - # default timeout in seconds, can be override by engine - request_timeout: 3.0 - # the maximum timeout in seconds - # max_request_timeout: 10.0 - # suffix of searx_useragent, could contain information like an email address - # to the administrator - useragent_suffix: '' - # The maximum number of concurrent connections that may be established. - pool_connections: 100 - # Allow the connection pool to maintain keep-alive connections below this - # point. - pool_maxsize: 20 - # See https://www.python-httpx.org/http2/ - enable_http2: true - # uncomment below section if you want to use a custom server certificate - # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults - # and https://www.python-httpx.org/compatibility/#ssl-configuration - # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer - # - # uncomment below section if you want to use a proxyq see: SOCKS proxies - # https://2.python-requests.org/en/latest/user/advanced/#proxies - # are also supported: see - # https://2.python-requests.org/en/latest/user/advanced/#socks - # - # proxies: - # all://: - # - http://proxy1:8080 - # - http://proxy2:8080 - # - # using_tor_proxy: true - # - # Extra seconds to add in order to account for the time taken by the proxy - # - # extra_proxy_timeout: 10.0 - # - # uncomment below section only if you have more than one network interface - # which can be the source of outgoing search requests - # - # source_ips: - # - 1.1.1.1 - # - 1.1.1.2 - # - fe80::/126 - -# External plugin configuration, for more details see -# https://docs.searxng.org/dev/plugins.html -# -# plugins: -# - plugin1 -# - plugin2 -# - ... - -# Comment or un-comment plugin to activate / deactivate by default. -# -# enabled_plugins: -# # these plugins are enabled if nothing is configured .. -# - 'Hash plugin' -# - 'Self Information' -# - 'Tracker URL remover' -# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy -# # these plugins are disabled if nothing is configured .. -# - 'Hostname replace' # see hostname_replace configuration below -# - 'Open Access DOI rewrite' -# - 'Tor check plugin' -# # Read the docs before activate: auto-detection of the language could be -# # detrimental to users expectations / users can activate the plugin in the -# # preferences if they want. -# - 'Autodetect search language' - -# Configuration of the "Hostname replace" plugin: -# -# hostname_replace: -# '(.*\.)?youtube\.com$': 'invidious.example.com' -# '(.*\.)?youtu\.be$': 'invidious.example.com' -# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com' -# '(.*\.)?reddit\.com$': 'teddit.example.com' -# '(.*\.)?redd\.it$': 'teddit.example.com' -# '(www\.)?twitter\.com$': 'nitter.example.com' -# # to remove matching host names from result list, set value to false -# 'spam\.example\.com': false - -checker: - # disable checker when in debug mode - off_when_debug: true - - # use "scheduling: false" to disable scheduling - # scheduling: interval or int - - # to activate the scheduler: - # * uncomment "scheduling" section - # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" - # to your uwsgi.ini - - # scheduling: - # start_after: [300, 1800] # delay to start the first run of the checker - # every: [86400, 90000] # how often the checker runs - - # additional tests: only for the YAML anchors (see the engines section) - # - additional_tests: - rosebud: &test_rosebud - matrix: - query: rosebud - lang: en - result_container: - - not_empty - - ['one_title_contains', 'citizen kane'] - test: - - unique_results - - android: &test_android - matrix: - query: ['android'] - lang: ['en', 'de', 'fr', 'zh-CN'] - result_container: - - not_empty - - ['one_title_contains', 'google'] - test: - - unique_results - - # tests: only for the YAML anchors (see the engines section) - tests: - infobox: &tests_infobox - infobox: - matrix: - query: ['linux', 'new york', 'bbc'] - result_container: - - has_infobox - -categories_as_tabs: - general: - images: - videos: - news: - map: - music: - it: - science: - files: - social media: engines: - - name: 9gag - engine: 9gag - shortcut: 9g - disabled: true - - - name: annas archive - engine: annas_archive - disabled: true - shortcut: aa - - # - name: annas articles - # engine: annas_archive - # shortcut: aaa - # # https://docs.searxng.org/dev/engines/online/annas_archive.html - # aa_content: 'journal_article' # book_any .. magazine, standards_document - # aa_ext: 'pdf' # pdf, epub, .. - # aa_sort: 'newest' # newest, oldest, largest, smallest - - - name: apk mirror - engine: apkmirror - timeout: 4.0 - shortcut: apkm - disabled: true - - - name: apple app store - engine: apple_app_store - shortcut: aps - disabled: true - - # Requires Tor - - name: ahmia - engine: ahmia - categories: onions - enable_http: true - shortcut: ah - - - name: anaconda - engine: xpath - paging: true - first_page_num: 0 - search_url: https://anaconda.org/search?q={query}&page={pageno} - results_xpath: //tbody/tr - url_xpath: ./td/h5/a[last()]/@href - title_xpath: ./td/h5 - content_xpath: ./td[h5]/text() - categories: it - timeout: 6.0 - shortcut: conda - disabled: true - - - name: arch linux wiki - engine: archlinux - shortcut: al - - - name: artic - engine: artic - shortcut: arc - timeout: 4.0 - - - name: arxiv - engine: arxiv - shortcut: arx - timeout: 4.0 - - - name: ask - engine: ask - shortcut: ask - disabled: true - - # tmp suspended: dh key too small - # - name: base - # engine: base - # shortcut: bs - - - name: bandcamp - engine: bandcamp - shortcut: bc - categories: music - - - name: wikipedia - engine: wikipedia - shortcut: wp - # add "list" to the array to get results in the results list - display_type: ['infobox'] - base_url: 'https://{language}.wikipedia.org/' - categories: [general] - - - name: bilibili - engine: bilibili - shortcut: bil - disabled: true - - - name: bing - engine: bing - shortcut: bi - disabled: true - - - name: bing images - engine: bing_images - shortcut: bii - - - name: bing news - engine: bing_news - shortcut: bin - - - name: bing videos - engine: bing_videos - shortcut: biv - - - name: bitbucket - engine: xpath - paging: true - search_url: https://bitbucket.org/repo/all/{pageno}?name={query} - url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href - title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] - content_xpath: //article[@class="repo-summary"]/p - categories: [it, repos] - timeout: 4.0 - disabled: true - shortcut: bb - about: - website: https://bitbucket.org/ - wikidata_id: Q2493781 - official_api_documentation: https://developer.atlassian.com/bitbucket - use_official_api: false - require_api_key: false - results: HTML - - - name: bpb - engine: bpb - shortcut: bpb - disabled: true - - - name: btdigg - engine: btdigg - shortcut: bt - disabled: true - - - name: ccc-tv - engine: xpath - paging: false - search_url: https://media.ccc.de/search/?q={query} - url_xpath: //div[@class="caption"]/h3/a/@href - title_xpath: //div[@class="caption"]/h3/a/text() - content_xpath: //div[@class="caption"]/h4/@title - categories: videos - disabled: true - shortcut: c3tv - about: - website: https://media.ccc.de/ - wikidata_id: Q80729951 - official_api_documentation: https://github.com/voc/voctoweb - use_official_api: false - require_api_key: false - results: HTML - # We don't set language: de here because media.ccc.de is not just - # for a German audience. It contains many English videos and many - # German videos have English subtitles. - - - name: openverse - engine: openverse - categories: images - shortcut: opv - - - name: chefkoch - engine: chefkoch - shortcut: chef - # to show premium or plus results too: - # skip_premium: false - - # - name: core.ac.uk - # engine: core - # categories: science - # shortcut: cor - # # get your API key from: https://core.ac.uk/api-keys/register/ - # api_key: 'unset' - - - name: crossref - engine: crossref - shortcut: cr - timeout: 30 - disabled: true - - - name: crowdview - engine: json_engine - shortcut: cv - categories: general - paging: false - search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} - results_query: results - url_query: link - title_query: title - content_query: snippet - disabled: true - about: - website: https://crowdview.ai/ - - - name: yep - engine: yep - shortcut: yep - categories: general - search_type: web - disabled: true - - - name: yep images - engine: yep - shortcut: yepi - categories: images - search_type: images - disabled: true - - - name: yep news - engine: yep - shortcut: yepn - categories: news - search_type: news - disabled: true - - - name: curlie - engine: xpath - shortcut: cl - categories: general - disabled: true - paging: true - lang_all: '' - search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189 - page_size: 20 - results_xpath: //div[@id="site-list-content"]/div[@class="site-item"] - url_xpath: ./div[@class="title-and-desc"]/a/@href - title_xpath: ./div[@class="title-and-desc"]/a/div - content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"] - about: - website: https://curlie.org/ - wikidata_id: Q60715723 - use_official_api: false - require_api_key: false - results: HTML - - - name: currency - engine: currency_convert - categories: general - shortcut: cc - - - name: bahnhof - engine: json_engine - search_url: https://www.bahnhof.de/api/stations/search/{query} - url_prefix: https://www.bahnhof.de/ - url_query: slug - title_query: name - content_query: state - shortcut: bf - disabled: true - about: - website: https://www.bahn.de - wikidata_id: Q22811603 - use_official_api: false - require_api_key: false - results: JSON - language: de - - - name: deezer - engine: deezer - shortcut: dz - disabled: true - - - name: destatis - engine: destatis - shortcut: destat - disabled: true - - - name: deviantart - engine: deviantart - shortcut: da - timeout: 3.0 - - - name: ddg definitions - engine: duckduckgo_definitions - shortcut: ddd - weight: 2 - disabled: true - tests: *tests_infobox - - # cloudflare protected - # - name: digbt - # engine: digbt - # shortcut: dbt - # timeout: 6.0 - # disabled: true - - - name: docker hub - engine: docker_hub - shortcut: dh - categories: [it, packages] - - - name: erowid - engine: xpath - paging: true - first_page_num: 0 - page_size: 30 - search_url: https://www.erowid.org/search.php?q={query}&s={pageno} - url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href - title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() - content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] - categories: [] - shortcut: ew - disabled: true - about: - website: https://www.erowid.org/ - wikidata_id: Q1430691 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - # - name: elasticsearch - # shortcut: es - # engine: elasticsearch - # base_url: http://localhost:9200 - # username: elastic - # password: changeme - # index: my-index - # # available options: match, simple_query_string, term, terms, custom - # query_type: match - # # if query_type is set to custom, provide your query here - # #custom_query_json: {"query":{"match_all": {}}} - # #show_metadata: false - # disabled: true - - - name: wikidata - engine: wikidata - shortcut: wd - timeout: 3.0 - weight: 2 - # add "list" to the array to get results in the results list - display_type: ['infobox'] - tests: *tests_infobox - categories: [general] - - - name: duckduckgo - engine: duckduckgo - shortcut: ddg - - - name: duckduckgo images - engine: duckduckgo_extra - categories: [images, web] - ddg_category: images - shortcut: ddi - disabled: true - - - name: duckduckgo videos - engine: duckduckgo_extra - categories: [videos, web] - ddg_category: videos - shortcut: ddv - disabled: true - - - name: duckduckgo news - engine: duckduckgo_extra - categories: [news, web] - ddg_category: news - shortcut: ddn - disabled: true - - - name: duckduckgo weather - engine: duckduckgo_weather - shortcut: ddw - disabled: true - - - name: apple maps - engine: apple_maps - shortcut: apm - disabled: true - timeout: 5.0 - - - name: emojipedia - engine: emojipedia - timeout: 4.0 - shortcut: em - disabled: true - - - name: tineye - engine: tineye - shortcut: tin - timeout: 9.0 - disabled: true - - - name: etymonline - engine: xpath - paging: true - search_url: https://etymonline.com/search?page={pageno}&q={query} - url_xpath: //a[contains(@class, "word__name--")]/@href - title_xpath: //a[contains(@class, "word__name--")] - content_xpath: //section[contains(@class, "word__defination")] - first_page_num: 1 - shortcut: et - categories: [dictionaries] - about: - website: https://www.etymonline.com/ - wikidata_id: Q1188617 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - # - name: ebay - # engine: ebay - # shortcut: eb - # base_url: 'https://www.ebay.com' - # disabled: true - # timeout: 5 - - - name: 1x - engine: www1x - shortcut: 1x - timeout: 3.0 - disabled: true - - - name: fdroid - engine: fdroid - shortcut: fd - disabled: true - - - name: flickr - categories: images - shortcut: fl - # You can use the engine using the official stable API, but you need an API - # key, see: https://www.flickr.com/services/apps/create/ - # engine: flickr - # api_key: 'apikey' # required! - # Or you can use the html non-stable engine, activated by default - engine: flickr_noapi - - - name: free software directory - engine: mediawiki - shortcut: fsd - categories: [it, software wikis] - base_url: https://directory.fsf.org/ - search_type: title - timeout: 5.0 - disabled: true - about: - website: https://directory.fsf.org/ - wikidata_id: Q2470288 - - # - name: freesound - # engine: freesound - # shortcut: fnd - # disabled: true - # timeout: 15.0 - # API key required, see: https://freesound.org/docs/api/overview.html - # api_key: MyAPIkey - - - name: frinkiac - engine: frinkiac - shortcut: frk - disabled: true - - - name: fyyd - engine: fyyd - shortcut: fy - timeout: 8.0 - disabled: true - - - name: genius - engine: genius - shortcut: gen - - - name: gentoo - engine: gentoo - shortcut: ge - timeout: 10.0 - - - name: gitlab - engine: json_engine - paging: true - search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno} - url_query: web_url - title_query: name_with_namespace - content_query: description - page_size: 20 - categories: [it, repos] - shortcut: gl - timeout: 10.0 - disabled: true - about: - website: https://about.gitlab.com/ - wikidata_id: Q16639197 - official_api_documentation: https://docs.gitlab.com/ee/api/ - use_official_api: false - require_api_key: false - results: JSON - - - name: github - engine: github - shortcut: gh - - # This a Gitea service. If you would like to use a different instance, - # change codeberg.org to URL of the desired Gitea host. Or you can create a - # new engine by copying this and changing the name, shortcut and search_url. - - - name: codeberg - engine: json_engine - search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10 - url_query: html_url - title_query: name - content_query: description - categories: [it, repos] - shortcut: cb - disabled: true - about: - website: https://codeberg.org/ - wikidata_id: - official_api_documentation: https://try.gitea.io/api/swagger - use_official_api: false - require_api_key: false - results: JSON - - - name: goodreads - engine: goodreads - shortcut: good - timeout: 4.0 - disabled: true - - - name: google - engine: google - shortcut: go - # additional_tests: - # android: *test_android - - - name: google images - engine: google_images - shortcut: goi - # additional_tests: - # android: *test_android - # dali: - # matrix: - # query: ['Dali Christ'] - # lang: ['en', 'de', 'fr', 'zh-CN'] - # result_container: - # - ['one_title_contains', 'Salvador'] - - - name: google news - engine: google_news - shortcut: gon - # additional_tests: - # android: *test_android - - - name: google videos - engine: google_videos - shortcut: gov - # additional_tests: - # android: *test_android - - - name: google scholar - engine: google_scholar - shortcut: gos - - - name: google play apps - engine: google_play - categories: [files, apps] - shortcut: gpa - play_categ: apps - disabled: true - - - name: google play movies - engine: google_play - categories: videos - shortcut: gpm - play_categ: movies - disabled: true - - - name: material icons - engine: material_icons - categories: images - shortcut: mi - disabled: true - - - name: gpodder - engine: json_engine - shortcut: gpod - timeout: 4.0 - paging: false - search_url: https://gpodder.net/search.json?q={query} - url_query: url - title_query: title - content_query: description - page_size: 19 - categories: music - disabled: true - about: - website: https://gpodder.net - wikidata_id: Q3093354 - official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/ - use_official_api: false - requires_api_key: false - results: JSON - - - name: habrahabr - engine: xpath - paging: true - search_url: https://habr.com/en/search/page{pageno}/?q={query} - results_xpath: //article[contains(@class, "tm-articles-list__item")] - url_xpath: .//a[@class="tm-title__link"]/@href - title_xpath: .//a[@class="tm-title__link"] - content_xpath: .//div[contains(@class, "article-formatted-body")] - categories: it - timeout: 4.0 - disabled: true - shortcut: habr - about: - website: https://habr.com/ - wikidata_id: Q4494434 - official_api_documentation: https://habr.com/en/docs/help/api/ - use_official_api: false - require_api_key: false - results: HTML - - - name: hackernews - engine: hackernews - shortcut: hn - disabled: true - - - name: hoogle - engine: xpath - paging: true - search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno} - results_xpath: '//div[@class="result"]' - title_xpath: './/div[@class="ans"]//a' - url_xpath: './/div[@class="ans"]//a/@href' - content_xpath: './/div[@class="from"]' - page_size: 20 - categories: [it, packages] - shortcut: ho - about: - website: https://hoogle.haskell.org/ - wikidata_id: Q34010 - official_api_documentation: https://hackage.haskell.org/api - use_official_api: false - require_api_key: false - results: JSON - - - name: imdb - engine: imdb - shortcut: imdb - timeout: 6.0 - disabled: true - - - name: imgur - engine: imgur - shortcut: img - disabled: true - - - name: ina - engine: ina - shortcut: in - timeout: 6.0 - disabled: true - - - name: invidious - engine: invidious - # Instanes will be selected randomly, see https://api.invidious.io/ for - # instances that are stable (good uptime) and close to you. - base_url: - - https://invidious.io.lol - - https://invidious.fdn.fr - - https://yt.artemislena.eu - - https://invidious.tiekoetter.com - - https://invidious.flokinet.to - - https://vid.puffyan.us - - https://invidious.privacydev.net - - https://inv.tux.pizza - shortcut: iv - timeout: 3.0 - disabled: true - - - name: jisho - engine: jisho - shortcut: js - timeout: 3.0 - disabled: true - - - name: kickass - engine: kickass - base_url: - - https://kickasstorrents.to - - https://kickasstorrents.cr - - https://kickasstorrent.cr - - https://kickass.sx - - https://kat.am - shortcut: kc - timeout: 4.0 - - - name: lemmy communities - engine: lemmy - lemmy_type: Communities - shortcut: leco - - - name: lemmy users - engine: lemmy - network: lemmy communities - lemmy_type: Users - shortcut: leus - - - name: lemmy posts - engine: lemmy - network: lemmy communities - lemmy_type: Posts - shortcut: lepo - - - name: lemmy comments - engine: lemmy - network: lemmy communities - lemmy_type: Comments - shortcut: lecom - - - name: library genesis - engine: xpath - # search_url: https://libgen.is/search.php?req={query} - search_url: https://libgen.rs/search.php?req={query} - url_xpath: //a[contains(@href,"book/index.php?md5")]/@href - title_xpath: //a[contains(@href,"book/")]/text()[1] - content_xpath: //td/a[1][contains(@href,"=author")]/text() - categories: files - timeout: 7.0 - disabled: true - shortcut: lg - about: - website: https://libgen.fun/ - wikidata_id: Q22017206 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: z-library - engine: zlibrary - shortcut: zlib - categories: files - timeout: 7.0 - - - name: library of congress - engine: loc - shortcut: loc - categories: images - - - name: lingva - engine: lingva - shortcut: lv - # set lingva instance in url, by default it will use the official instance - # url: https://lingva.thedaviddelta.com - - - name: lobste.rs - engine: xpath - search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance - results_xpath: //li[contains(@class, "story")] - url_xpath: .//a[@class="u-url"]/@href - title_xpath: .//a[@class="u-url"] - content_xpath: .//a[@class="domain"] - categories: it - shortcut: lo - timeout: 5.0 - disabled: true - about: - website: https://lobste.rs/ - wikidata_id: Q60762874 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: mastodon users - engine: mastodon - mastodon_type: accounts - base_url: https://mastodon.social - shortcut: mau - - - name: mastodon hashtags - engine: mastodon - mastodon_type: hashtags - base_url: https://mastodon.social - shortcut: mah - - # - name: matrixrooms - # engine: mrs - # # https://docs.searxng.org/dev/engines/online/mrs.html - # # base_url: https://mrs-api-host - # shortcut: mtrx - # disabled: true - - - name: mdn - shortcut: mdn - engine: json_engine - categories: [it] - paging: true - search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} - results_query: documents - url_query: mdn_url - url_prefix: https://developer.mozilla.org - title_query: title - content_query: summary - about: - website: https://developer.mozilla.org - wikidata_id: Q3273508 - official_api_documentation: null - use_official_api: false - require_api_key: false - results: JSON - - - name: metacpan - engine: metacpan - shortcut: cpan - disabled: true - number_of_results: 20 - - # - name: meilisearch - # engine: meilisearch - # shortcut: mes - # enable_http: true - # base_url: http://localhost:7700 - # index: my-index - - - name: mixcloud - engine: mixcloud - shortcut: mc - - # MongoDB engine - # Required dependency: pymongo - # - name: mymongo - # engine: mongodb - # shortcut: md - # exact_match_only: false - # host: '127.0.0.1' - # port: 27017 - # enable_http: true - # results_per_page: 20 - # database: 'business' - # collection: 'reviews' # name of the db collection - # key: 'name' # key in the collection to search for - - - name: mozhi - engine: mozhi - base_url: - - https://mozhi.aryak.me - - https://translate.bus-hit.me - - https://nyc1.mz.ggtyler.dev - # mozhi_engine: google - see https://mozhi.aryak.me for supported engines - timeout: 4.0 - shortcut: mz - disabled: true - - - name: mwmbl - engine: mwmbl - # api_url: https://api.mwmbl.org - shortcut: mwm - disabled: true - - - name: npm - engine: json_engine - paging: true - first_page_num: 0 - search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno} - results_query: results - url_query: package/links/npm - title_query: package/name - content_query: package/description - page_size: 25 - categories: [it, packages] - disabled: true - timeout: 5.0 - shortcut: npm - about: - website: https://npms.io/ - wikidata_id: Q7067518 - official_api_documentation: https://api-docs.npms.io/ - use_official_api: false - require_api_key: false - results: JSON - - - name: nyaa - engine: nyaa - shortcut: nt - disabled: true - - - name: mankier - engine: json_engine - search_url: https://www.mankier.com/api/v2/mans/?q={query} - results_query: results - url_query: url - title_query: name - content_query: description - categories: it - shortcut: man - about: - website: https://www.mankier.com/ - official_api_documentation: https://www.mankier.com/api - use_official_api: true - require_api_key: false - results: JSON - - - name: odysee - engine: odysee - shortcut: od - disabled: true - - - name: openairedatasets - engine: json_engine - paging: true - search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} - results_query: response/results/result - url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ - title_query: metadata/oaf:entity/oaf:result/title/$ - content_query: metadata/oaf:entity/oaf:result/description/$ - content_html_to_text: true - categories: 'science' - shortcut: oad - timeout: 5.0 - about: - website: https://www.openaire.eu/ - wikidata_id: Q25106053 - official_api_documentation: https://api.openaire.eu/ - use_official_api: false - require_api_key: false - results: JSON - - - name: openairepublications - engine: json_engine - paging: true - search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} - results_query: response/results/result - url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ - title_query: metadata/oaf:entity/oaf:result/title/$ - content_query: metadata/oaf:entity/oaf:result/description/$ - content_html_to_text: true - categories: science - shortcut: oap - timeout: 5.0 - about: - website: https://www.openaire.eu/ - wikidata_id: Q25106053 - official_api_documentation: https://api.openaire.eu/ - use_official_api: false - require_api_key: false - results: JSON - - # - name: opensemanticsearch - # engine: opensemantic - # shortcut: oss - # base_url: 'http://localhost:8983/solr/opensemanticsearch/' - - - name: openstreetmap - engine: openstreetmap - shortcut: osm - - - name: openrepos - engine: xpath - paging: true - search_url: https://openrepos.net/search/node/{query}?page={pageno} - url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href - title_xpath: //li[@class="search-result"]//h3[@class="title"]/a - content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] - categories: files - timeout: 4.0 - disabled: true - shortcut: or - about: - website: https://openrepos.net/ - wikidata_id: - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: packagist - engine: json_engine - paging: true - search_url: https://packagist.org/search.json?q={query}&page={pageno} - results_query: results - url_query: url - title_query: name - content_query: description - categories: [it, packages] - disabled: true - timeout: 5.0 - shortcut: pack - about: - website: https://packagist.org - wikidata_id: Q108311377 - official_api_documentation: https://packagist.org/apidoc - use_official_api: true - require_api_key: false - results: JSON - - - name: pdbe - engine: pdbe - shortcut: pdb - # Hide obsolete PDB entries. Default is not to hide obsolete structures - # hide_obsolete: false - - - name: photon - engine: photon - shortcut: ph - - - name: pinterest - engine: pinterest - shortcut: pin - - - name: piped - engine: piped - shortcut: ppd - categories: videos - piped_filter: videos - timeout: 3.0 - - # URL to use as link and for embeds - frontend_url: https://srv.piped.video - # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ - backend_url: - - https://pipedapi.kavin.rocks - - https://pipedapi-libre.kavin.rocks - - https://pipedapi.adminforge.de - - - name: piped.music - engine: piped - network: piped - shortcut: ppdm - categories: music - piped_filter: music_songs - timeout: 3.0 - - - name: piratebay - engine: piratebay - shortcut: tpb - # You may need to change this URL to a proxy if piratebay is blocked in your - # country - url: https://thepiratebay.org/ - timeout: 3.0 - - - name: podcastindex - engine: podcastindex - shortcut: podcast - - # Required dependency: psychopg2 - # - name: postgresql - # engine: postgresql - # database: postgres - # username: postgres - # password: postgres - # limit: 10 - # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' - # shortcut : psql - - - name: presearch - engine: presearch - search_type: search - categories: [general, web] - shortcut: ps - timeout: 4.0 - disabled: true - - - name: presearch images - engine: presearch - network: presearch - search_type: images - categories: [images, web] - timeout: 4.0 - shortcut: psimg - disabled: true - - - name: presearch videos - engine: presearch - network: presearch - search_type: videos - categories: [general, web] - timeout: 4.0 - shortcut: psvid - disabled: true - - - name: presearch news - engine: presearch - network: presearch - search_type: news - categories: [news, web] - timeout: 4.0 - shortcut: psnews - disabled: true - - - name: pub.dev - engine: xpath - shortcut: pd - search_url: https://pub.dev/packages?q={query}&page={pageno} - paging: true - results_xpath: //div[contains(@class,"packages-item")] - url_xpath: ./div/h3/a/@href - title_xpath: ./div/h3/a - content_xpath: ./div/div/div[contains(@class,"packages-description")]/span - categories: [packages, it] - timeout: 3.0 - disabled: true - first_page_num: 1 - about: - website: https://pub.dev/ - official_api_documentation: https://pub.dev/help/api - use_official_api: false - require_api_key: false - results: HTML - - - name: pubmed - engine: pubmed - shortcut: pub - timeout: 3.0 - - - name: pypi - shortcut: pypi - engine: xpath - paging: true - search_url: https://pypi.org/search/?q={query}&page={pageno} - results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"] - url_xpath: ./@href - title_xpath: ./h3/span[@class="package-snippet__name"] - content_xpath: ./p - suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"] - first_page_num: 1 - categories: [it, packages] - about: - website: https://pypi.org - wikidata_id: Q2984686 - official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html - use_official_api: false - require_api_key: false - results: HTML - - - name: qwant - qwant_categ: web - engine: qwant - shortcut: qw - categories: [general, web] - additional_tests: - rosebud: *test_rosebud - - - name: qwant news - qwant_categ: news - engine: qwant - shortcut: qwn - categories: news - network: qwant - - - name: qwant images - qwant_categ: images - engine: qwant - shortcut: qwi - categories: [images, web] - network: qwant - - - name: qwant videos - qwant_categ: videos - engine: qwant - shortcut: qwv - categories: [videos, web] - network: qwant - - # - name: library - # engine: recoll - # shortcut: lib - # base_url: 'https://recoll.example.org/' - # search_dir: '' - # mount_prefix: /export - # dl_prefix: 'https://download.example.org' - # timeout: 30.0 - # categories: files - # disabled: true - - # - name: recoll library reference - # engine: recoll - # base_url: 'https://recoll.example.org/' - # search_dir: reference - # mount_prefix: /export - # dl_prefix: 'https://download.example.org' - # shortcut: libr - # timeout: 30.0 - # categories: files - # disabled: true - - - name: radio browser - engine: radio_browser - shortcut: rb - - - name: reddit - engine: reddit - shortcut: re - page_size: 25 - - - name: rottentomatoes - engine: rottentomatoes - shortcut: rt - disabled: true - - # Required dependency: redis - # - name: myredis - # shortcut : rds - # engine: redis_server - # exact_match_only: false - # host: '127.0.0.1' - # port: 6379 - # enable_http: true - # password: '' - # db: 0 - - # tmp suspended: bad certificate - # - name: scanr structures - # shortcut: scs - # engine: scanr_structures - # disabled: true - - - name: sepiasearch - engine: sepiasearch - shortcut: sep - - - name: soundcloud - engine: soundcloud - shortcut: sc - - - name: stackoverflow - engine: stackexchange - shortcut: st - api_site: 'stackoverflow' - categories: [it, q&a] - - - name: askubuntu - engine: stackexchange - shortcut: ubuntu - api_site: 'askubuntu' - categories: [it, q&a] - - - name: internetarchivescholar - engine: internet_archive_scholar - shortcut: ias - timeout: 5.0 - - - name: superuser - engine: stackexchange - shortcut: su - api_site: 'superuser' - categories: [it, q&a] - - - name: searchcode code - engine: searchcode_code - shortcut: scc - disabled: true - - # - name: searx - # engine: searx_engine - # shortcut: se - # instance_urls : - # - http://127.0.0.1:8888/ - # - ... - # disabled: true - - - name: semantic scholar - engine: semantic_scholar - disabled: true - shortcut: se - - # Spotify needs API credentials - # - name: spotify - # engine: spotify - # shortcut: stf - # api_client_id: ******* - # api_client_secret: ******* - - # - name: solr - # engine: solr - # shortcut: slr - # base_url: http://localhost:8983 - # collection: collection_name - # sort: '' # sorting: asc or desc - # field_list: '' # comma separated list of field names to display on the UI - # default_fields: '' # default field to query - # query_fields: '' # query fields - # enable_http: true - - # - name: springer nature - # engine: springer - # # get your API key from: https://dev.springernature.com/signup - # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" - # api_key: 'unset' - # shortcut: springer - # timeout: 15.0 - - - name: startpage - engine: startpage - shortcut: sp - timeout: 6.0 - disabled: true - additional_tests: - rosebud: *test_rosebud - - - name: tokyotoshokan - engine: tokyotoshokan - shortcut: tt - timeout: 6.0 - disabled: true - - - name: solidtorrents - engine: solidtorrents - shortcut: solid - timeout: 4.0 - base_url: - - https://solidtorrents.to - - https://bitsearch.to - - # For this demo of the sqlite engine download: - # https://liste.mediathekview.de/filmliste-v2.db.bz2 - # and unpack into searx/data/filmliste-v2.db - # Query to test: "!demo concert" - # - # - name: demo - # engine: sqlite - # shortcut: demo - # categories: general - # result_template: default.html - # database: searx/data/filmliste-v2.db - # query_str: >- - # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, - # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, - # description AS content - # FROM film - # WHERE title LIKE :wildcard OR description LIKE :wildcard - # ORDER BY duration DESC - - - name: tagesschau - engine: tagesschau - # when set to false, display URLs from Tagesschau, and not the actual source - # (e.g. NDR, WDR, SWR, HR, ...) - use_source_url: true - shortcut: ts - disabled: true - - - name: tmdb - engine: xpath - paging: true - categories: movies - search_url: https://www.themoviedb.org/search?page={pageno}&query={query} - results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] - url_xpath: .//div[contains(@class,"poster")]/a/@href - thumbnail_xpath: .//img/@src - title_xpath: .//div[contains(@class,"title")]//h2 - content_xpath: .//div[contains(@class,"overview")] - shortcut: tm - disabled: true - - # Requires Tor - - name: torch - engine: xpath - paging: true - search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and - results_xpath: //table//tr - url_xpath: ./td[2]/a - title_xpath: ./td[2]/b - content_xpath: ./td[2]/small - categories: onions - enable_http: true - shortcut: tch - - # torznab engine lets you query any torznab compatible indexer. Using this - # engine in combination with Jackett opens the possibility to query a lot of - # public and private indexers directly from SearXNG. More details at: - # https://docs.searxng.org/dev/engines/online/torznab.html - # - # - name: Torznab EZTV - # engine: torznab - # shortcut: eztv - # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab - # enable_http: true # if using localhost - # api_key: xxxxxxxxxxxxxxx - # show_magnet_links: true - # show_torrent_files: false - # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories - # torznab_categories: # optional - # - 2000 - # - 5000 - - # tmp suspended - too slow, too many errors - # - name: urbandictionary - # engine : xpath - # search_url : https://www.urbandictionary.com/define.php?term={query} - # url_xpath : //*[@class="word"]/@href - # title_xpath : //*[@class="def-header"] - # content_xpath: //*[@class="meaning"] - # shortcut: ud - - - name: unsplash - engine: unsplash - shortcut: us - - - name: yandex music - engine: yandex_music - shortcut: ydm - disabled: true - # https://yandex.com/support/music/access.html - inactive: true - - - name: yahoo - engine: yahoo - shortcut: yh - disabled: true - - - name: yahoo news - engine: yahoo_news - shortcut: yhn - - - name: youtube - shortcut: yt - # You can use the engine using the official stable API, but you need an API - # key See: https://console.developers.google.com/project - # - # engine: youtube_api - # api_key: 'apikey' # required! - # - # Or you can use the html non-stable engine, activated by default - engine: youtube_noapi - - - name: dailymotion - engine: dailymotion - shortcut: dm - - - name: vimeo - engine: vimeo - shortcut: vm - - - name: wiby - engine: json_engine - paging: true - search_url: https://wiby.me/json/?q={query}&p={pageno} - url_query: URL - title_query: Title - content_query: Snippet - categories: [general, web] - shortcut: wib - disabled: true - about: - website: https://wiby.me/ - - - name: alexandria - engine: json_engine - shortcut: alx - categories: general - paging: true - search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} - results_query: results - title_query: title - url_query: url - content_query: snippet - timeout: 1.5 - disabled: true - about: - website: https://alexandria.org/ - official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md - use_official_api: true - require_api_key: false - results: JSON - - - name: wikibooks - engine: mediawiki - weight: 0.5 - shortcut: wb - categories: [general, wikimedia] - base_url: 'https://{language}.wikibooks.org/' - search_type: text - disabled: true - about: - website: https://www.wikibooks.org/ - wikidata_id: Q367 - - - name: wikinews - engine: mediawiki - shortcut: wn - categories: [news, wikimedia] - base_url: 'https://{language}.wikinews.org/' - search_type: text - srsort: create_timestamp_desc - about: - website: https://www.wikinews.org/ - wikidata_id: Q964 - - - name: wikiquote - engine: mediawiki - weight: 0.5 - shortcut: wq - categories: [general, wikimedia] - base_url: 'https://{language}.wikiquote.org/' - search_type: text - disabled: true - additional_tests: - rosebud: *test_rosebud - about: - website: https://www.wikiquote.org/ - wikidata_id: Q369 - - - name: wikisource - engine: mediawiki - weight: 0.5 - shortcut: ws - categories: [general, wikimedia] - base_url: 'https://{language}.wikisource.org/' - search_type: text - disabled: true - about: - website: https://www.wikisource.org/ - wikidata_id: Q263 - - - name: wikispecies - engine: mediawiki - shortcut: wsp - categories: [general, science, wikimedia] - base_url: 'https://species.wikimedia.org/' - search_type: text - disabled: true - about: - website: https://species.wikimedia.org/ - wikidata_id: Q13679 - - - name: wiktionary - engine: mediawiki - shortcut: wt - categories: [dictionaries, wikimedia] - base_url: 'https://{language}.wiktionary.org/' - search_type: text - about: - website: https://www.wiktionary.org/ - wikidata_id: Q151 - - - name: wikiversity - engine: mediawiki - weight: 0.5 - shortcut: wv - categories: [general, wikimedia] - base_url: 'https://{language}.wikiversity.org/' - search_type: text - disabled: true - about: - website: https://www.wikiversity.org/ - wikidata_id: Q370 - - - name: wikivoyage - engine: mediawiki - weight: 0.5 - shortcut: wy - categories: [general, wikimedia] - base_url: 'https://{language}.wikivoyage.org/' - search_type: text - disabled: true - about: - website: https://www.wikivoyage.org/ - wikidata_id: Q373 - - - name: wikicommons.images - engine: wikicommons - shortcut: wc - categories: images - number_of_results: 10 - - name: wolframalpha - shortcut: wa - # You can use the engine using the official stable API, but you need an API - # key. See: https://products.wolframalpha.com/api/ - # - # engine: wolframalpha_api - # api_key: '' - # - # Or you can use the html non-stable engine, activated by default - engine: wolframalpha_noapi - timeout: 6.0 - categories: general disabled: false - - - name: dictzone - engine: dictzone - shortcut: dc - - - name: mymemory translated - engine: translated - shortcut: tl - timeout: 5.0 - # You can use without an API key, but you are limited to 1000 words/day - # See: https://mymemory.translated.net/doc/usagelimits.php - # api_key: '' - - # Required dependency: mysql-connector-python - # - name: mysql - # engine: mysql_server - # database: mydatabase - # username: user - # password: pass - # limit: 10 - # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' - # shortcut: mysql - - - name: 1337x - engine: 1337x - shortcut: 1337x - disabled: true - - - name: duden - engine: duden - shortcut: du - disabled: true - - - name: seznam - shortcut: szn - engine: seznam - disabled: true - - # - name: deepl - # engine: deepl - # shortcut: dpl - # # You can use the engine using the official stable API, but you need an API key - # # See: https://www.deepl.com/pro-api?cta=header-pro-api - # api_key: '' # required! - # timeout: 5.0 - # disabled: true - - - name: mojeek - shortcut: mjk - engine: xpath - paging: true - categories: [general, web] - search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang} - results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"] - url_xpath: ./@href - title_xpath: ../h2/a - content_xpath: ..//p[@class="s"] - suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a - first_page_num: 0 - page_size: 10 - max_page: 100 - disabled: true - about: - website: https://www.mojeek.com/ - wikidata_id: Q60747299 - official_api_documentation: https://www.mojeek.com/services/api.html/ - use_official_api: false - require_api_key: false - results: HTML - - - name: moviepilot - engine: moviepilot - shortcut: mp - disabled: true - - - name: naver - shortcut: nvr - categories: [general, web] - engine: xpath - paging: true - search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno} - url_xpath: //a[@class="link_tit"]/@href - title_xpath: //a[@class="link_tit"] - content_xpath: //a[@class="total_dsc"]/div - first_page_num: 1 - page_size: 10 - disabled: true - about: - website: https://www.naver.com/ - wikidata_id: Q485639 - official_api_documentation: https://developers.naver.com/docs/nmt/examples/ - use_official_api: false - require_api_key: false - results: HTML - language: ko - - - name: rubygems - shortcut: rbg - engine: xpath - paging: true - search_url: https://rubygems.org/search?page={pageno}&query={query} - results_xpath: /html/body/main/div/a[@class="gems__gem"] - url_xpath: ./@href - title_xpath: ./span/h2 - content_xpath: ./span/p - suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a - first_page_num: 1 - categories: [it, packages] - disabled: true - about: - website: https://rubygems.org/ - wikidata_id: Q1853420 - official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ - use_official_api: false - require_api_key: false - results: HTML - - - name: peertube - engine: peertube - shortcut: ptb - paging: true - # alternatives see: https://instances.joinpeertube.org/instances - # base_url: https://tube.4aem.com - categories: videos - disabled: true - timeout: 6.0 - - - name: mediathekviewweb - engine: mediathekviewweb - shortcut: mvw - disabled: true - - - name: yacy - engine: yacy - categories: general - search_type: text - base_url: https://yacy.searchlab.eu - shortcut: ya - disabled: true - # required if you aren't using HTTPS for your local yacy instance - # https://docs.searxng.org/dev/engines/online/yacy.html - # enable_http: true - # timeout: 3.0 - # search_mode: 'global' - - - name: yacy images - engine: yacy - categories: images - search_type: image - base_url: https://yacy.searchlab.eu - shortcut: yai - disabled: true - - - name: rumble - engine: rumble - shortcut: ru - base_url: https://rumble.com/ - paging: true - categories: videos - disabled: true - - - name: livespace - engine: livespace - shortcut: ls - categories: videos - disabled: true - timeout: 5.0 - - - name: wordnik - engine: wordnik - shortcut: def - base_url: https://www.wordnik.com/ - categories: [dictionaries] - timeout: 5.0 - - - name: woxikon.de synonyme - engine: xpath - shortcut: woxi - categories: [dictionaries] - timeout: 5.0 - disabled: true - search_url: https://synonyme.woxikon.de/synonyme/{query}.php - url_xpath: //div[@class="upper-synonyms"]/a/@href - content_xpath: //div[@class="synonyms-list-group"] - title_xpath: //div[@class="upper-synonyms"]/a - no_result_for_http_status: [404] - about: - website: https://www.woxikon.de/ - wikidata_id: # No Wikidata ID - use_official_api: false - require_api_key: false - results: HTML - language: de - - - name: seekr news - engine: seekr - shortcut: senews - categories: news - seekr_category: news - disabled: true - - - name: seekr images - engine: seekr - network: seekr news - shortcut: seimg - categories: images - seekr_category: images - disabled: true - - - name: seekr videos - engine: seekr - network: seekr news - shortcut: sevid - categories: videos - seekr_category: videos - disabled: true - - - name: sjp.pwn - engine: sjp - shortcut: sjp - base_url: https://sjp.pwn.pl/ - timeout: 5.0 - disabled: true - - - name: stract - engine: stract - shortcut: str - disabled: true - - - name: svgrepo - engine: svgrepo - shortcut: svg - timeout: 10.0 - disabled: true - - - name: tootfinder - engine: tootfinder - shortcut: toot - - - name: wallhaven - engine: wallhaven - # api_key: abcdefghijklmnopqrstuvwxyz - shortcut: wh - - # wikimini: online encyclopedia for children - # The fulltext and title parameter is necessary for Wikimini because - # sometimes it will not show the results and redirect instead - - name: wikimini - engine: xpath - shortcut: wkmn - search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search - url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href - title_xpath: //li//div[@class="mw-search-result-heading"]/a - content_xpath: //li/div[@class="searchresult"] - categories: general - disabled: true - about: - website: https://wikimini.org/ - wikidata_id: Q3568032 - use_official_api: false - require_api_key: false - results: HTML - language: fr - - - name: wttr.in - engine: wttr - shortcut: wttr - timeout: 9.0 - - - name: yummly - engine: yummly - shortcut: yum - disabled: true - - - name: brave - engine: brave - shortcut: br - time_range_support: true - paging: true - categories: [general, web] - brave_category: search - # brave_spellcheck: true - - - name: brave.images - engine: brave - network: brave - shortcut: brimg - categories: [images, web] - brave_category: images - - - name: brave.videos - engine: brave - network: brave - shortcut: brvid - categories: [videos, web] - brave_category: videos - - - name: brave.news - engine: brave - network: brave - shortcut: brnews - categories: news - brave_category: news - - # - name: brave.goggles - # engine: brave - # network: brave - # shortcut: brgog - # time_range_support: true - # paging: true - # categories: [general, web] - # brave_category: goggles - # Goggles: # required! This should be a URL ending in .goggle - - - name: lib.rs - shortcut: lrs - engine: xpath - search_url: https://lib.rs/search?q={query} - results_xpath: /html/body/main/div/ol/li/a - url_xpath: ./@href - title_xpath: ./div[@class="h"]/h4 - content_xpath: ./div[@class="h"]/p - categories: [it, packages] - disabled: true - about: - website: https://lib.rs - wikidata_id: Q113486010 - use_official_api: false - require_api_key: false - results: HTML - - - name: sourcehut - shortcut: srht - engine: xpath - paging: true - search_url: https://sr.ht/projects?page={pageno}&search={query} - results_xpath: (//div[@class="event-list"])[1]/div[@class="event"] - url_xpath: ./h4/a[2]/@href - title_xpath: ./h4/a[2] - content_xpath: ./p - first_page_num: 1 - categories: [it, repos] - disabled: true - about: - website: https://sr.ht - wikidata_id: Q78514485 - official_api_documentation: https://man.sr.ht/ - use_official_api: false - require_api_key: false - results: HTML - - - name: goo - shortcut: goo - engine: xpath - paging: true - search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0 - url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href - title_xpath: //div[@class="result"]/p[@class='title fsL1']/a - content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p - first_page_num: 0 - categories: [general, web] - disabled: true - timeout: 4.0 - about: - website: https://search.goo.ne.jp - wikidata_id: Q249044 - use_official_api: false - require_api_key: false - results: HTML - language: ja - - - name: bt4g - engine: bt4g - shortcut: bt4g - - - name: pkg.go.dev - engine: xpath - shortcut: pgo - search_url: https://pkg.go.dev/search?limit=100&m=package&q={query} - results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"] - url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href - title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a - content_xpath: ./p[@class="SearchSnippet-synopsis"] - categories: [packages, it] - timeout: 3.0 - disabled: true - about: - website: https://pkg.go.dev/ - use_official_api: false - require_api_key: false - results: HTML - -# Doku engine lets you access to any Doku wiki instance: -# A public one or a privete/corporate one. -# - name: ubuntuwiki -# engine: doku -# shortcut: uw -# base_url: 'https://doc.ubuntu-fr.org' - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: git grep -# engine: command -# command: ['git', 'grep', '{{QUERY}}'] -# shortcut: gg -# tokens: [] -# disabled: true -# delimiter: -# chars: ':' -# keys: ['filepath', 'code'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: locate -# engine: command -# command: ['locate', '{{QUERY}}'] -# shortcut: loc -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: find -# engine: command -# command: ['find', '.', '-name', '{{QUERY}}'] -# query_type: path -# shortcut: fnd -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: pattern search in files -# engine: command -# command: ['fgrep', '{{QUERY}}'] -# shortcut: fgr -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: regex search in files -# engine: command -# command: ['grep', '{{QUERY}}'] -# shortcut: gr -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -doi_resolvers: - oadoi.org: 'https://oadoi.org/' - doi.org: 'https://doi.org/' - doai.io: 'https://dissem.in/' - sci-hub.se: 'https://sci-hub.se/' - sci-hub.st: 'https://sci-hub.st/' - sci-hub.ru: 'https://sci-hub.ru/' - -default_doi_resolver: 'oadoi.org' From 51939ff842928bfe6793a4087bfeeb8b78f719f9 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Sat, 24 Aug 2024 21:48:27 +0530 Subject: [PATCH 02/12] feat(webSearchAgent): fix typo, closes #313 --- src/agents/webSearchAgent.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index c93dabb..803ae23 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -27,7 +27,7 @@ const basicSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. If the question contains some links and asks to answer from those links or even if they don't you need to return the links inside 'links' XML block and the question inside 'question' XML block. If there are no links then you need to return the question without any XML block. -If the user asks to summarrize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block. +If the user asks to summarize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block. Example: 1. Follow up question: What is the capital of France? From 8bb3e4f016cfb98698efdad9d5242e03d8a41c16 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Sun, 25 Aug 2024 12:03:32 +0530 Subject: [PATCH 03/12] feat(agents): update types --- src/agents/academicSearchAgent.ts | 2 +- src/agents/redditSearchAgent.ts | 2 +- src/agents/webSearchAgent.ts | 3 ++- src/agents/wolframAlphaSearchAgent.ts | 2 +- src/agents/writingAssistant.ts | 2 +- src/agents/youtubeSearchAgent.ts | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts index d797119..a64f9b9 100644 --- a/src/agents/academicSearchAgent.ts +++ b/src/agents/academicSearchAgent.ts @@ -66,7 +66,7 @@ const basicAcademicSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts index 578e2bb..bbb4e92 100644 --- a/src/agents/redditSearchAgent.ts +++ b/src/agents/redditSearchAgent.ts @@ -66,7 +66,7 @@ const basicRedditSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index c93dabb..1a356c8 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -22,6 +22,7 @@ import logger from '../utils/logger'; import LineListOutputParser from '../lib/outputParsers/listLineOutputParser'; import { getDocumentsFromLinks } from '../lib/linkDocument'; import LineOutputParser from '../lib/outputParsers/lineOutputParser'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. @@ -95,7 +96,7 @@ const basicWebSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts index b80fcf3..07d8857 100644 --- a/src/agents/wolframAlphaSearchAgent.ts +++ b/src/agents/wolframAlphaSearchAgent.ts @@ -65,7 +65,7 @@ const basicWolframAlphaSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/writingAssistant.ts b/src/agents/writingAssistant.ts index 7c2cb49..5fd1f36 100644 --- a/src/agents/writingAssistant.ts +++ b/src/agents/writingAssistant.ts @@ -19,7 +19,7 @@ Since you are a writing assistant, you would not perform web searches. If you th const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts index 4133157..b0da25b 100644 --- a/src/agents/youtubeSearchAgent.ts +++ b/src/agents/youtubeSearchAgent.ts @@ -66,7 +66,7 @@ const basicYoutubeSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { From c521b032a7dbbd841e86db1d6ffd6cd33e87a15d Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Sun, 25 Aug 2024 15:08:30 +0530 Subject: [PATCH 04/12] feat(agents): fix unresloved types --- src/agents/academicSearchAgent.ts | 1 + src/agents/redditSearchAgent.ts | 1 + src/agents/webSearchAgent.ts | 2 +- src/agents/wolframAlphaSearchAgent.ts | 1 + src/agents/writingAssistant.ts | 1 + src/agents/youtubeSearchAgent.ts | 1 + 6 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts index a64f9b9..a72e3a2 100644 --- a/src/agents/academicSearchAgent.ts +++ b/src/agents/academicSearchAgent.ts @@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicAcademicSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts index bbb4e92..9c2c443 100644 --- a/src/agents/redditSearchAgent.ts +++ b/src/agents/redditSearchAgent.ts @@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicRedditSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index 9c0d97f..f7a6d7e 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -96,7 +96,7 @@ const basicWebSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: IterableReadableStream, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts index 07d8857..1de8d0f 100644 --- a/src/agents/wolframAlphaSearchAgent.ts +++ b/src/agents/wolframAlphaSearchAgent.ts @@ -18,6 +18,7 @@ import type { Embeddings } from '@langchain/core/embeddings'; import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicWolframAlphaSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. diff --git a/src/agents/writingAssistant.ts b/src/agents/writingAssistant.ts index 5fd1f36..7e275df 100644 --- a/src/agents/writingAssistant.ts +++ b/src/agents/writingAssistant.ts @@ -10,6 +10,7 @@ import eventEmitter from 'events'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const writingAssistantPrompt = ` You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query. diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts index b0da25b..e9b6553 100644 --- a/src/agents/youtubeSearchAgent.ts +++ b/src/agents/youtubeSearchAgent.ts @@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicYoutubeSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. From ff34d1043fd3501f15e1e1ab2425b022c917911e Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Sun, 25 Aug 2024 15:08:47 +0530 Subject: [PATCH 05/12] feat(app): lint & format --- ui/components/ChatWindow.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ui/components/ChatWindow.tsx b/ui/components/ChatWindow.tsx index 5e6d382..0ace2dd 100644 --- a/ui/components/ChatWindow.tsx +++ b/ui/components/ChatWindow.tsx @@ -59,7 +59,9 @@ const useSocket = ( chatModelProvider = Object.keys(chatModelProviders)[0]; if (chatModelProvider === 'custom_openai') { - toast.error('Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL'); + toast.error( + 'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL', + ); setError(true); return; } else { @@ -192,7 +194,7 @@ const useSocket = ( if (data.type === 'error') { toast.error(data.data); } - }) + }); setWs(ws); }; From 806c47e70592356193bacc487f6fe0b148e84337 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Wed, 28 Aug 2024 09:53:06 +0530 Subject: [PATCH 06/12] feat(chatwindow): fix infinite loading --- ui/components/ChatWindow.tsx | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ui/components/ChatWindow.tsx b/ui/components/ChatWindow.tsx index 0ace2dd..cc93da8 100644 --- a/ui/components/ChatWindow.tsx +++ b/ui/components/ChatWindow.tsx @@ -201,13 +201,6 @@ const useSocket = ( connectWs(); } - - return () => { - if (ws?.readyState === 1) { - ws?.close(); - console.log('[DEBUG] closed'); - } - }; }, [ws, url, setIsWSReady, setError]); return ws; From 2873093fee1fbb7b473b4ab2c1f3e1d7116d95ee Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Wed, 28 Aug 2024 10:00:05 +0530 Subject: [PATCH 07/12] feat(package): bump version --- package.json | 2 +- ui/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index db3d773..c09454c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "perplexica-backend", - "version": "1.9.0-rc1", + "version": "1.9.0-rc2", "license": "MIT", "author": "ItzCrazyKns", "scripts": { diff --git a/ui/package.json b/ui/package.json index 1d892de..23afda2 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "perplexica-frontend", - "version": "1.9.0-rc1", + "version": "1.9.0-rc2", "license": "MIT", "author": "ItzCrazyKns", "scripts": { From e8ed4df31aaac4001192f64a66701c48bc6e9378 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Wed, 28 Aug 2024 14:27:22 +0530 Subject: [PATCH 08/12] feat(chat-window): close socket on unmount --- ui/components/ChatWindow.tsx | 410 ++++++++++++++++++----------------- 1 file changed, 208 insertions(+), 202 deletions(-) diff --git a/ui/components/ChatWindow.tsx b/ui/components/ChatWindow.tsx index cc93da8..9a1fe3c 100644 --- a/ui/components/ChatWindow.tsx +++ b/ui/components/ChatWindow.tsx @@ -1,42 +1,42 @@ -'use client'; +'use client' -import { useEffect, useRef, useState } from 'react'; -import { Document } from '@langchain/core/documents'; -import Navbar from './Navbar'; -import Chat from './Chat'; -import EmptyChat from './EmptyChat'; -import crypto from 'crypto'; -import { toast } from 'sonner'; -import { useSearchParams } from 'next/navigation'; -import { getSuggestions } from '@/lib/actions'; -import Error from 'next/error'; +import { useEffect, useRef, useState } from 'react' +import { Document } from '@langchain/core/documents' +import Navbar from './Navbar' +import Chat from './Chat' +import EmptyChat from './EmptyChat' +import crypto from 'crypto' +import { toast } from 'sonner' +import { useSearchParams } from 'next/navigation' +import { getSuggestions } from '@/lib/actions' +import Error from 'next/error' export type Message = { - messageId: string; - chatId: string; - createdAt: Date; - content: string; - role: 'user' | 'assistant'; - suggestions?: string[]; - sources?: Document[]; -}; + messageId: string + chatId: string + createdAt: Date + content: string + role: 'user' | 'assistant' + suggestions?: string[] + sources?: Document[] +} const useSocket = ( url: string, setIsWSReady: (ready: boolean) => void, setError: (error: boolean) => void, ) => { - const [ws, setWs] = useState(null); + const [ws, setWs] = useState(null) useEffect(() => { if (!ws) { const connectWs = async () => { - let chatModel = localStorage.getItem('chatModel'); - let chatModelProvider = localStorage.getItem('chatModelProvider'); - let embeddingModel = localStorage.getItem('embeddingModel'); + let chatModel = localStorage.getItem('chatModel') + let chatModelProvider = localStorage.getItem('chatModelProvider') + let embeddingModel = localStorage.getItem('embeddingModel') let embeddingModelProvider = localStorage.getItem( 'embeddingModelProvider', - ); + ) const providers = await fetch( `${process.env.NEXT_PUBLIC_API_URL}/models`, @@ -45,7 +45,7 @@ const useSocket = ( 'Content-Type': 'application/json', }, }, - ).then(async (res) => await res.json()); + ).then(async res => await res.json()) if ( !chatModel || @@ -54,58 +54,55 @@ const useSocket = ( !embeddingModelProvider ) { if (!chatModel || !chatModelProvider) { - const chatModelProviders = providers.chatModelProviders; + const chatModelProviders = providers.chatModelProviders - chatModelProvider = Object.keys(chatModelProviders)[0]; + chatModelProvider = Object.keys(chatModelProviders)[0] if (chatModelProvider === 'custom_openai') { toast.error( 'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL', - ); - setError(true); - return; + ) + setError(true) + return } else { - chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; + chatModel = Object.keys(chatModelProviders[chatModelProvider])[0] if ( !chatModelProviders || Object.keys(chatModelProviders).length === 0 ) - return toast.error('No chat models available'); + return toast.error('No chat models available') } } if (!embeddingModel || !embeddingModelProvider) { - const embeddingModelProviders = providers.embeddingModelProviders; + const embeddingModelProviders = providers.embeddingModelProviders if ( !embeddingModelProviders || Object.keys(embeddingModelProviders).length === 0 ) - return toast.error('No embedding models available'); + return toast.error('No embedding models available') - embeddingModelProvider = Object.keys(embeddingModelProviders)[0]; + embeddingModelProvider = Object.keys(embeddingModelProviders)[0] embeddingModel = Object.keys( embeddingModelProviders[embeddingModelProvider], - )[0]; + )[0] } - localStorage.setItem('chatModel', chatModel!); - localStorage.setItem('chatModelProvider', chatModelProvider); - localStorage.setItem('embeddingModel', embeddingModel!); - localStorage.setItem( - 'embeddingModelProvider', - embeddingModelProvider, - ); + localStorage.setItem('chatModel', chatModel!) + localStorage.setItem('chatModelProvider', chatModelProvider) + localStorage.setItem('embeddingModel', embeddingModel!) + localStorage.setItem('embeddingModelProvider', embeddingModelProvider) } else { - const chatModelProviders = providers.chatModelProviders; - const embeddingModelProviders = providers.embeddingModelProviders; + const chatModelProviders = providers.chatModelProviders + const embeddingModelProviders = providers.embeddingModelProviders if ( Object.keys(chatModelProviders).length > 0 && !chatModelProviders[chatModelProvider] ) { - chatModelProvider = Object.keys(chatModelProviders)[0]; - localStorage.setItem('chatModelProvider', chatModelProvider); + chatModelProvider = Object.keys(chatModelProviders)[0] + localStorage.setItem('chatModelProvider', chatModelProvider) } if ( @@ -113,19 +110,19 @@ const useSocket = ( chatModelProvider != 'custom_openai' && !chatModelProviders[chatModelProvider][chatModel] ) { - chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; - localStorage.setItem('chatModel', chatModel); + chatModel = Object.keys(chatModelProviders[chatModelProvider])[0] + localStorage.setItem('chatModel', chatModel) } if ( Object.keys(embeddingModelProviders).length > 0 && !embeddingModelProviders[embeddingModelProvider] ) { - embeddingModelProvider = Object.keys(embeddingModelProviders)[0]; + embeddingModelProvider = Object.keys(embeddingModelProviders)[0] localStorage.setItem( 'embeddingModelProvider', embeddingModelProvider, - ); + ) } if ( @@ -134,77 +131,77 @@ const useSocket = ( ) { embeddingModel = Object.keys( embeddingModelProviders[embeddingModelProvider], - )[0]; - localStorage.setItem('embeddingModel', embeddingModel); + )[0] + localStorage.setItem('embeddingModel', embeddingModel) } } - const wsURL = new URL(url); - const searchParams = new URLSearchParams({}); + const wsURL = new URL(url) + const searchParams = new URLSearchParams({}) - searchParams.append('chatModel', chatModel!); - searchParams.append('chatModelProvider', chatModelProvider); + searchParams.append('chatModel', chatModel!) + searchParams.append('chatModelProvider', chatModelProvider) if (chatModelProvider === 'custom_openai') { searchParams.append( 'openAIApiKey', localStorage.getItem('openAIApiKey')!, - ); + ) searchParams.append( 'openAIBaseURL', localStorage.getItem('openAIBaseURL')!, - ); + ) } - searchParams.append('embeddingModel', embeddingModel!); - searchParams.append('embeddingModelProvider', embeddingModelProvider); + searchParams.append('embeddingModel', embeddingModel!) + searchParams.append('embeddingModelProvider', embeddingModelProvider) - wsURL.search = searchParams.toString(); + wsURL.search = searchParams.toString() - const ws = new WebSocket(wsURL.toString()); + const ws = new WebSocket(wsURL.toString()) const timeoutId = setTimeout(() => { if (ws.readyState !== 1) { toast.error( 'Failed to connect to the server. Please try again later.', - ); + ) } - }, 10000); + }, 10000) ws.onopen = () => { - console.log('[DEBUG] open'); - clearTimeout(timeoutId); - setIsWSReady(true); - }; + console.log('[DEBUG] open') + clearTimeout(timeoutId) + setIsWSReady(true) + } ws.onerror = () => { - clearTimeout(timeoutId); - setError(true); - toast.error('WebSocket connection error.'); - }; + clearTimeout(timeoutId) + setError(true) + toast.error('WebSocket connection error.') + } ws.onclose = () => { - clearTimeout(timeoutId); - setError(true); - console.log('[DEBUG] closed'); - }; + clearTimeout(timeoutId) + setError(true) + console.log('[DEBUG] closed') + } - ws.addEventListener('message', (e) => { - const data = JSON.parse(e.data); + ws.addEventListener('message', e => { + const data = JSON.parse(e.data) if (data.type === 'error') { - toast.error(data.data); + toast.error(data.data) } - }); + }) - setWs(ws); - }; + setWs(ws) + } - connectWs(); + connectWs() } - }, [ws, url, setIsWSReady, setError]); + }, [ws, url, setIsWSReady, setError]) - return ws; -}; + return ws +} const loadMessages = async ( chatId: string, @@ -222,66 +219,66 @@ const loadMessages = async ( 'Content-Type': 'application/json', }, }, - ); + ) if (res.status === 404) { - setNotFound(true); - setIsMessagesLoaded(true); - return; + setNotFound(true) + setIsMessagesLoaded(true) + return } - const data = await res.json(); + const data = await res.json() const messages = data.messages.map((msg: any) => { return { ...msg, ...JSON.parse(msg.metadata), - }; - }) as Message[]; + } + }) as Message[] - setMessages(messages); + setMessages(messages) - const history = messages.map((msg) => { - return [msg.role, msg.content]; - }) as [string, string][]; + const history = messages.map(msg => { + return [msg.role, msg.content] + }) as [string, string][] - console.log('[DEBUG] messages loaded'); + console.log('[DEBUG] messages loaded') - document.title = messages[0].content; + document.title = messages[0].content - setChatHistory(history); - setFocusMode(data.chat.focusMode); - setIsMessagesLoaded(true); -}; + setChatHistory(history) + setFocusMode(data.chat.focusMode) + setIsMessagesLoaded(true) +} const ChatWindow = ({ id }: { id?: string }) => { - const searchParams = useSearchParams(); - const initialMessage = searchParams.get('q'); + const searchParams = useSearchParams() + const initialMessage = searchParams.get('q') - const [chatId, setChatId] = useState(id); - const [newChatCreated, setNewChatCreated] = useState(false); + const [chatId, setChatId] = useState(id) + const [newChatCreated, setNewChatCreated] = useState(false) - const [hasError, setHasError] = useState(false); - const [isReady, setIsReady] = useState(false); + const [hasError, setHasError] = useState(false) + const [isReady, setIsReady] = useState(false) - const [isWSReady, setIsWSReady] = useState(false); + const [isWSReady, setIsWSReady] = useState(false) const ws = useSocket( process.env.NEXT_PUBLIC_WS_URL!, setIsWSReady, setHasError, - ); + ) - const [loading, setLoading] = useState(false); - const [messageAppeared, setMessageAppeared] = useState(false); + const [loading, setLoading] = useState(false) + const [messageAppeared, setMessageAppeared] = useState(false) - const [chatHistory, setChatHistory] = useState<[string, string][]>([]); - const [messages, setMessages] = useState([]); + const [chatHistory, setChatHistory] = useState<[string, string][]>([]) + const [messages, setMessages] = useState([]) - const [focusMode, setFocusMode] = useState('webSearch'); + const [focusMode, setFocusMode] = useState('webSearch') - const [isMessagesLoaded, setIsMessagesLoaded] = useState(false); + const [isMessagesLoaded, setIsMessagesLoaded] = useState(false) - const [notFound, setNotFound] = useState(false); + const [notFound, setNotFound] = useState(false) useEffect(() => { if ( @@ -297,37 +294,46 @@ const ChatWindow = ({ id }: { id?: string }) => { setChatHistory, setFocusMode, setNotFound, - ); + ) } else if (!chatId) { - setNewChatCreated(true); - setIsMessagesLoaded(true); - setChatId(crypto.randomBytes(20).toString('hex')); + setNewChatCreated(true) + setIsMessagesLoaded(true) + setChatId(crypto.randomBytes(20).toString('hex')) } // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); - - const messagesRef = useRef([]); + }, []) useEffect(() => { - messagesRef.current = messages; - }, [messages]); + return () => { + if (ws?.readyState === 1) { + ws.close() + console.log('[DEBUG] closed') + } + } + }, []) + + const messagesRef = useRef([]) + + useEffect(() => { + messagesRef.current = messages + }, [messages]) useEffect(() => { if (isMessagesLoaded && isWSReady) { - setIsReady(true); + setIsReady(true) } - }, [isMessagesLoaded, isWSReady]); + }, [isMessagesLoaded, isWSReady]) const sendMessage = async (message: string) => { - if (loading) return; - setLoading(true); - setMessageAppeared(false); + if (loading) return + setLoading(true) + setMessageAppeared(false) - let sources: Document[] | undefined = undefined; - let recievedMessage = ''; - let added = false; + let sources: Document[] | undefined = undefined + let recievedMessage = '' + let added = false - const messageId = crypto.randomBytes(7).toString('hex'); + const messageId = crypto.randomBytes(7).toString('hex') ws?.send( JSON.stringify({ @@ -339,9 +345,9 @@ const ChatWindow = ({ id }: { id?: string }) => { focusMode: focusMode, history: [...chatHistory, ['human', message]], }), - ); + ) - setMessages((prevMessages) => [ + setMessages(prevMessages => [ ...prevMessages, { content: message, @@ -350,21 +356,21 @@ const ChatWindow = ({ id }: { id?: string }) => { role: 'user', createdAt: new Date(), }, - ]); + ]) const messageHandler = async (e: MessageEvent) => { - const data = JSON.parse(e.data); + const data = JSON.parse(e.data) if (data.type === 'error') { - toast.error(data.data); - setLoading(false); - return; + toast.error(data.data) + setLoading(false) + return } if (data.type === 'sources') { - sources = data.data; + sources = data.data if (!added) { - setMessages((prevMessages) => [ + setMessages(prevMessages => [ ...prevMessages, { content: '', @@ -374,15 +380,15 @@ const ChatWindow = ({ id }: { id?: string }) => { sources: sources, createdAt: new Date(), }, - ]); - added = true; + ]) + added = true } - setMessageAppeared(true); + setMessageAppeared(true) } if (data.type === 'message') { if (!added) { - setMessages((prevMessages) => [ + setMessages(prevMessages => [ ...prevMessages, { content: data.data, @@ -392,35 +398,35 @@ const ChatWindow = ({ id }: { id?: string }) => { sources: sources, createdAt: new Date(), }, - ]); - added = true; + ]) + added = true } - setMessages((prev) => - prev.map((message) => { + setMessages(prev => + prev.map(message => { if (message.messageId === data.messageId) { - return { ...message, content: message.content + data.data }; + return { ...message, content: message.content + data.data } } - return message; + return message }), - ); + ) - recievedMessage += data.data; - setMessageAppeared(true); + recievedMessage += data.data + setMessageAppeared(true) } if (data.type === 'messageEnd') { - setChatHistory((prevHistory) => [ + setChatHistory(prevHistory => [ ...prevHistory, ['human', message], ['assistant', recievedMessage], - ]); + ]) - ws?.removeEventListener('message', messageHandler); - setLoading(false); + ws?.removeEventListener('message', messageHandler) + setLoading(false) - const lastMsg = messagesRef.current[messagesRef.current.length - 1]; + const lastMsg = messagesRef.current[messagesRef.current.length - 1] if ( lastMsg.role === 'assistant' && @@ -428,54 +434,54 @@ const ChatWindow = ({ id }: { id?: string }) => { lastMsg.sources.length > 0 && !lastMsg.suggestions ) { - const suggestions = await getSuggestions(messagesRef.current); - setMessages((prev) => - prev.map((msg) => { + const suggestions = await getSuggestions(messagesRef.current) + setMessages(prev => + prev.map(msg => { if (msg.messageId === lastMsg.messageId) { - return { ...msg, suggestions: suggestions }; + return { ...msg, suggestions: suggestions } } - return msg; + return msg }), - ); + ) } } - }; + } - ws?.addEventListener('message', messageHandler); - }; + ws?.addEventListener('message', messageHandler) + } const rewrite = (messageId: string) => { - const index = messages.findIndex((msg) => msg.messageId === messageId); + const index = messages.findIndex(msg => msg.messageId === messageId) - if (index === -1) return; + if (index === -1) return - const message = messages[index - 1]; + const message = messages[index - 1] - setMessages((prev) => { - return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)]; - }); - setChatHistory((prev) => { - return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)]; - }); + setMessages(prev => { + return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)] + }) + setChatHistory(prev => { + return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)] + }) - sendMessage(message.content); - }; + sendMessage(message.content) + } useEffect(() => { if (isReady && initialMessage) { - sendMessage(initialMessage); + sendMessage(initialMessage) } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [isReady, initialMessage]); + }, [isReady, initialMessage]) if (hasError) { return ( -
-

+

+

Failed to connect to the server. Please try again later.

- ); + ) } return isReady ? ( @@ -504,25 +510,25 @@ const ChatWindow = ({ id }: { id?: string }) => {
) ) : ( -
+
- ); -}; + ) +} -export default ChatWindow; +export default ChatWindow From f620252406f828c67555d9393fe3c6a49440e42f Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:51:12 +0530 Subject: [PATCH 09/12] feat(linkDocument): add error handling --- src/lib/linkDocument.ts | 98 ++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/src/lib/linkDocument.ts b/src/lib/linkDocument.ts index 9607220..5e90571 100644 --- a/src/lib/linkDocument.ts +++ b/src/lib/linkDocument.ts @@ -3,6 +3,7 @@ import { htmlToText } from 'html-to-text'; import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; import { Document } from '@langchain/core/documents'; import pdfParse from 'pdf-parse'; +import logger from '../utils/logger'; export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { const splitter = new RecursiveCharacterTextSplitter(); @@ -16,66 +17,81 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { ? link : `https://${link}`; - const res = await axios.get(link, { - responseType: 'arraybuffer', - }); + try { + const res = await axios.get(link, { + responseType: 'arraybuffer', + }); - const isPdf = res.headers['content-type'] === 'application/pdf'; + const isPdf = res.headers['content-type'] === 'application/pdf'; - if (isPdf) { - const pdfText = await pdfParse(res.data); - const parsedText = pdfText.text + if (isPdf) { + const pdfText = await pdfParse(res.data); + const parsedText = pdfText.text + .replace(/(\r\n|\n|\r)/gm, ' ') + .replace(/\s+/g, ' ') + .trim(); + + const splittedText = await splitter.splitText(parsedText); + const title = 'PDF Document'; + + const linkDocs = splittedText.map((text) => { + return new Document({ + pageContent: text, + metadata: { + title: title, + url: link, + }, + }); + }); + + docs.push(...linkDocs); + return; + } + + const parsedText = htmlToText(res.data.toString('utf8'), { + selectors: [ + { + selector: 'a', + options: { + ignoreHref: true, + }, + }, + ], + }) .replace(/(\r\n|\n|\r)/gm, ' ') .replace(/\s+/g, ' ') .trim(); const splittedText = await splitter.splitText(parsedText); - const title = 'PDF Document'; + const title = res.data + .toString('utf8') + .match(/(.*?)<\/title>/)?.[1]; const linkDocs = splittedText.map((text) => { return new Document({ pageContent: text, metadata: { - title: title, + title: title || link, url: link, }, }); }); docs.push(...linkDocs); - return; - } - - const parsedText = htmlToText(res.data.toString('utf8'), { - selectors: [ - { - selector: 'a', - options: { - ignoreHref: true, + } catch (err) { + logger.error( + `Error at generating documents from links: ${err.message}`, + ); + docs.push( + new Document({ + pageContent: `Failed to retrieve content from the link: ${err.message}`, + metadata: { + title: 'Failed to retrieve content', + url: link, }, - }, - ], - }) - .replace(/(\r\n|\n|\r)/gm, ' ') - .replace(/\s+/g, ' ') - .trim(); - - const splittedText = await splitter.splitText(parsedText); - const title = res.data - .toString('utf8') - .match(/<title>(.*?)<\/title>/)?.[1]; - - const linkDocs = splittedText.map((text) => { - return new Document({ - pageContent: text, - metadata: { - title: title || link, - url: link, - }, - }); - }); - - docs.push(...linkDocs); + }), + ); + } }), ); From 449684c4192105ef39dcfe74dc0bc053780c4e15 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:51:42 +0530 Subject: [PATCH 10/12] feat(webSearchAgent): update retriever prompt & change temp --- src/agents/webSearchAgent.ts | 59 ++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index f7a6d7e..159023e 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -23,22 +23,37 @@ import LineListOutputParser from '../lib/outputParsers/listLineOutputParser'; import { getDocumentsFromLinks } from '../lib/linkDocument'; import LineOutputParser from '../lib/outputParsers/lineOutputParser'; import { IterableReadableStream } from '@langchain/core/utils/stream'; +import { ChatOpenAI } from '@langchain/openai'; const basicSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. -If the question contains some links and asks to answer from those links or even if they don't you need to return the links inside 'links' XML block and the question inside 'question' XML block. If there are no links then you need to return the question without any XML block. -If the user asks to summarize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block. +You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. +If it is a smple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). +If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. +You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. -Example: -1. Follow up question: What is the capital of France? -Rephrased question: \`Capital of france\` +There are several examples attached for your reference inside the below \`examples\` XML block -2. Follow up question: What is the population of New York City? -Rephrased question: \`Population of New York City\` +<examples> +1. Follow up question: What is the capital of France +Rephrased question:\` +<question> +Capital of france +</question> +\` + +2. Hi, how are you? +Rephrased question\` +<question> +not_needed +</question> +\` 3. Follow up question: What is Docker? -Rephrased question: \`What is Docker\` +Rephrased question: \` +<question> +What is Docker +</question> +\` 4. Follow up question: Can you tell me what is X from https://example.com Rephrased question: \` @@ -54,16 +69,20 @@ https://example.com 5. Follow up question: Summarize the content from https://example.com Rephrased question: \` <question> -Summarize +summarize </question> <links> https://example.com </links> \` +</examples> -Conversation: +Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. + +<conversation> {chat_history} +</conversation> Follow up question: {query} Rephrased question: @@ -133,15 +152,13 @@ type BasicChainInput = { }; const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { + (llm as unknown as ChatOpenAI).temperature = 0; + return RunnableSequence.from([ PromptTemplate.fromTemplate(basicSearchRetrieverPrompt), llm, strParser, RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { - return { query: '', docs: [] }; - } - const linksOutputParser = new LineListOutputParser({ key: 'links', }); @@ -153,9 +170,13 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { const links = await linksOutputParser.parse(input); let question = await questionOutputParser.parse(input); + if (question === 'not_needed') { + return { query: '', docs: [] }; + } + if (links.length > 0) { if (question.length === 0) { - question = 'Summarize'; + question = 'summarize'; } let docs = []; @@ -272,7 +293,7 @@ const createBasicWebSearchAnsweringChain = ( return docs; } - if (query === 'Summarize') { + if (query.toLocaleLowerCase() === 'summarize') { return docs; } @@ -295,7 +316,7 @@ const createBasicWebSearchAnsweringChain = ( }); const sortedDocs = similarity - .filter((sim) => sim.similarity > 0.5) + .filter((sim) => sim.similarity > 0.3) .sort((a, b) => b.similarity - a.similarity) .slice(0, 15) .map((sim) => docsWithContent[sim.index]); From c952469f087c9d0577d9421308eaaf2d1d9217a7 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:51:59 +0530 Subject: [PATCH 11/12] feat(chaWindow): lint & beautify --- ui/components/ChatWindow.tsx | 407 ++++++++++++++++++----------------- 1 file changed, 205 insertions(+), 202 deletions(-) diff --git a/ui/components/ChatWindow.tsx b/ui/components/ChatWindow.tsx index 9a1fe3c..b3d0089 100644 --- a/ui/components/ChatWindow.tsx +++ b/ui/components/ChatWindow.tsx @@ -1,42 +1,42 @@ -'use client' +'use client'; -import { useEffect, useRef, useState } from 'react' -import { Document } from '@langchain/core/documents' -import Navbar from './Navbar' -import Chat from './Chat' -import EmptyChat from './EmptyChat' -import crypto from 'crypto' -import { toast } from 'sonner' -import { useSearchParams } from 'next/navigation' -import { getSuggestions } from '@/lib/actions' -import Error from 'next/error' +import { useEffect, useRef, useState } from 'react'; +import { Document } from '@langchain/core/documents'; +import Navbar from './Navbar'; +import Chat from './Chat'; +import EmptyChat from './EmptyChat'; +import crypto from 'crypto'; +import { toast } from 'sonner'; +import { useSearchParams } from 'next/navigation'; +import { getSuggestions } from '@/lib/actions'; +import Error from 'next/error'; export type Message = { - messageId: string - chatId: string - createdAt: Date - content: string - role: 'user' | 'assistant' - suggestions?: string[] - sources?: Document[] -} + messageId: string; + chatId: string; + createdAt: Date; + content: string; + role: 'user' | 'assistant'; + suggestions?: string[]; + sources?: Document[]; +}; const useSocket = ( url: string, setIsWSReady: (ready: boolean) => void, setError: (error: boolean) => void, ) => { - const [ws, setWs] = useState<WebSocket | null>(null) + const [ws, setWs] = useState<WebSocket | null>(null); useEffect(() => { if (!ws) { const connectWs = async () => { - let chatModel = localStorage.getItem('chatModel') - let chatModelProvider = localStorage.getItem('chatModelProvider') - let embeddingModel = localStorage.getItem('embeddingModel') + let chatModel = localStorage.getItem('chatModel'); + let chatModelProvider = localStorage.getItem('chatModelProvider'); + let embeddingModel = localStorage.getItem('embeddingModel'); let embeddingModelProvider = localStorage.getItem( 'embeddingModelProvider', - ) + ); const providers = await fetch( `${process.env.NEXT_PUBLIC_API_URL}/models`, @@ -45,7 +45,7 @@ const useSocket = ( 'Content-Type': 'application/json', }, }, - ).then(async res => await res.json()) + ).then(async (res) => await res.json()); if ( !chatModel || @@ -54,55 +54,58 @@ const useSocket = ( !embeddingModelProvider ) { if (!chatModel || !chatModelProvider) { - const chatModelProviders = providers.chatModelProviders + const chatModelProviders = providers.chatModelProviders; - chatModelProvider = Object.keys(chatModelProviders)[0] + chatModelProvider = Object.keys(chatModelProviders)[0]; if (chatModelProvider === 'custom_openai') { toast.error( 'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL', - ) - setError(true) - return + ); + setError(true); + return; } else { - chatModel = Object.keys(chatModelProviders[chatModelProvider])[0] + chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; if ( !chatModelProviders || Object.keys(chatModelProviders).length === 0 ) - return toast.error('No chat models available') + return toast.error('No chat models available'); } } if (!embeddingModel || !embeddingModelProvider) { - const embeddingModelProviders = providers.embeddingModelProviders + const embeddingModelProviders = providers.embeddingModelProviders; if ( !embeddingModelProviders || Object.keys(embeddingModelProviders).length === 0 ) - return toast.error('No embedding models available') + return toast.error('No embedding models available'); - embeddingModelProvider = Object.keys(embeddingModelProviders)[0] + embeddingModelProvider = Object.keys(embeddingModelProviders)[0]; embeddingModel = Object.keys( embeddingModelProviders[embeddingModelProvider], - )[0] + )[0]; } - localStorage.setItem('chatModel', chatModel!) - localStorage.setItem('chatModelProvider', chatModelProvider) - localStorage.setItem('embeddingModel', embeddingModel!) - localStorage.setItem('embeddingModelProvider', embeddingModelProvider) + localStorage.setItem('chatModel', chatModel!); + localStorage.setItem('chatModelProvider', chatModelProvider); + localStorage.setItem('embeddingModel', embeddingModel!); + localStorage.setItem( + 'embeddingModelProvider', + embeddingModelProvider, + ); } else { - const chatModelProviders = providers.chatModelProviders - const embeddingModelProviders = providers.embeddingModelProviders + const chatModelProviders = providers.chatModelProviders; + const embeddingModelProviders = providers.embeddingModelProviders; if ( Object.keys(chatModelProviders).length > 0 && !chatModelProviders[chatModelProvider] ) { - chatModelProvider = Object.keys(chatModelProviders)[0] - localStorage.setItem('chatModelProvider', chatModelProvider) + chatModelProvider = Object.keys(chatModelProviders)[0]; + localStorage.setItem('chatModelProvider', chatModelProvider); } if ( @@ -110,19 +113,19 @@ const useSocket = ( chatModelProvider != 'custom_openai' && !chatModelProviders[chatModelProvider][chatModel] ) { - chatModel = Object.keys(chatModelProviders[chatModelProvider])[0] - localStorage.setItem('chatModel', chatModel) + chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; + localStorage.setItem('chatModel', chatModel); } if ( Object.keys(embeddingModelProviders).length > 0 && !embeddingModelProviders[embeddingModelProvider] ) { - embeddingModelProvider = Object.keys(embeddingModelProviders)[0] + embeddingModelProvider = Object.keys(embeddingModelProviders)[0]; localStorage.setItem( 'embeddingModelProvider', embeddingModelProvider, - ) + ); } if ( @@ -131,77 +134,77 @@ const useSocket = ( ) { embeddingModel = Object.keys( embeddingModelProviders[embeddingModelProvider], - )[0] - localStorage.setItem('embeddingModel', embeddingModel) + )[0]; + localStorage.setItem('embeddingModel', embeddingModel); } } - const wsURL = new URL(url) - const searchParams = new URLSearchParams({}) + const wsURL = new URL(url); + const searchParams = new URLSearchParams({}); - searchParams.append('chatModel', chatModel!) - searchParams.append('chatModelProvider', chatModelProvider) + searchParams.append('chatModel', chatModel!); + searchParams.append('chatModelProvider', chatModelProvider); if (chatModelProvider === 'custom_openai') { searchParams.append( 'openAIApiKey', localStorage.getItem('openAIApiKey')!, - ) + ); searchParams.append( 'openAIBaseURL', localStorage.getItem('openAIBaseURL')!, - ) + ); } - searchParams.append('embeddingModel', embeddingModel!) - searchParams.append('embeddingModelProvider', embeddingModelProvider) + searchParams.append('embeddingModel', embeddingModel!); + searchParams.append('embeddingModelProvider', embeddingModelProvider); - wsURL.search = searchParams.toString() + wsURL.search = searchParams.toString(); - const ws = new WebSocket(wsURL.toString()) + const ws = new WebSocket(wsURL.toString()); const timeoutId = setTimeout(() => { if (ws.readyState !== 1) { toast.error( 'Failed to connect to the server. Please try again later.', - ) + ); } - }, 10000) + }, 10000); ws.onopen = () => { - console.log('[DEBUG] open') - clearTimeout(timeoutId) - setIsWSReady(true) - } + console.log('[DEBUG] open'); + clearTimeout(timeoutId); + setIsWSReady(true); + }; ws.onerror = () => { - clearTimeout(timeoutId) - setError(true) - toast.error('WebSocket connection error.') - } + clearTimeout(timeoutId); + setError(true); + toast.error('WebSocket connection error.'); + }; ws.onclose = () => { - clearTimeout(timeoutId) - setError(true) - console.log('[DEBUG] closed') - } + clearTimeout(timeoutId); + setError(true); + console.log('[DEBUG] closed'); + }; - ws.addEventListener('message', e => { - const data = JSON.parse(e.data) + ws.addEventListener('message', (e) => { + const data = JSON.parse(e.data); if (data.type === 'error') { - toast.error(data.data) + toast.error(data.data); } - }) + }); - setWs(ws) - } + setWs(ws); + }; - connectWs() + connectWs(); } - }, [ws, url, setIsWSReady, setError]) + }, [ws, url, setIsWSReady, setError]); - return ws -} + return ws; +}; const loadMessages = async ( chatId: string, @@ -219,66 +222,66 @@ const loadMessages = async ( 'Content-Type': 'application/json', }, }, - ) + ); if (res.status === 404) { - setNotFound(true) - setIsMessagesLoaded(true) - return + setNotFound(true); + setIsMessagesLoaded(true); + return; } - const data = await res.json() + const data = await res.json(); const messages = data.messages.map((msg: any) => { return { ...msg, ...JSON.parse(msg.metadata), - } - }) as Message[] + }; + }) as Message[]; - setMessages(messages) + setMessages(messages); - const history = messages.map(msg => { - return [msg.role, msg.content] - }) as [string, string][] + const history = messages.map((msg) => { + return [msg.role, msg.content]; + }) as [string, string][]; - console.log('[DEBUG] messages loaded') + console.log('[DEBUG] messages loaded'); - document.title = messages[0].content + document.title = messages[0].content; - setChatHistory(history) - setFocusMode(data.chat.focusMode) - setIsMessagesLoaded(true) -} + setChatHistory(history); + setFocusMode(data.chat.focusMode); + setIsMessagesLoaded(true); +}; const ChatWindow = ({ id }: { id?: string }) => { - const searchParams = useSearchParams() - const initialMessage = searchParams.get('q') + const searchParams = useSearchParams(); + const initialMessage = searchParams.get('q'); - const [chatId, setChatId] = useState<string | undefined>(id) - const [newChatCreated, setNewChatCreated] = useState(false) + const [chatId, setChatId] = useState<string | undefined>(id); + const [newChatCreated, setNewChatCreated] = useState(false); - const [hasError, setHasError] = useState(false) - const [isReady, setIsReady] = useState(false) + const [hasError, setHasError] = useState(false); + const [isReady, setIsReady] = useState(false); - const [isWSReady, setIsWSReady] = useState(false) + const [isWSReady, setIsWSReady] = useState(false); const ws = useSocket( process.env.NEXT_PUBLIC_WS_URL!, setIsWSReady, setHasError, - ) + ); - const [loading, setLoading] = useState(false) - const [messageAppeared, setMessageAppeared] = useState(false) + const [loading, setLoading] = useState(false); + const [messageAppeared, setMessageAppeared] = useState(false); - const [chatHistory, setChatHistory] = useState<[string, string][]>([]) - const [messages, setMessages] = useState<Message[]>([]) + const [chatHistory, setChatHistory] = useState<[string, string][]>([]); + const [messages, setMessages] = useState<Message[]>([]); - const [focusMode, setFocusMode] = useState('webSearch') + const [focusMode, setFocusMode] = useState('webSearch'); - const [isMessagesLoaded, setIsMessagesLoaded] = useState(false) + const [isMessagesLoaded, setIsMessagesLoaded] = useState(false); - const [notFound, setNotFound] = useState(false) + const [notFound, setNotFound] = useState(false); useEffect(() => { if ( @@ -294,46 +297,46 @@ const ChatWindow = ({ id }: { id?: string }) => { setChatHistory, setFocusMode, setNotFound, - ) + ); } else if (!chatId) { - setNewChatCreated(true) - setIsMessagesLoaded(true) - setChatId(crypto.randomBytes(20).toString('hex')) + setNewChatCreated(true); + setIsMessagesLoaded(true); + setChatId(crypto.randomBytes(20).toString('hex')); } // eslint-disable-next-line react-hooks/exhaustive-deps - }, []) + }, []); useEffect(() => { return () => { if (ws?.readyState === 1) { - ws.close() - console.log('[DEBUG] closed') + ws.close(); + console.log('[DEBUG] closed'); } - } - }, []) + }; + }, []); - const messagesRef = useRef<Message[]>([]) + const messagesRef = useRef<Message[]>([]); useEffect(() => { - messagesRef.current = messages - }, [messages]) + messagesRef.current = messages; + }, [messages]); useEffect(() => { if (isMessagesLoaded && isWSReady) { - setIsReady(true) + setIsReady(true); } - }, [isMessagesLoaded, isWSReady]) + }, [isMessagesLoaded, isWSReady]); const sendMessage = async (message: string) => { - if (loading) return - setLoading(true) - setMessageAppeared(false) + if (loading) return; + setLoading(true); + setMessageAppeared(false); - let sources: Document[] | undefined = undefined - let recievedMessage = '' - let added = false + let sources: Document[] | undefined = undefined; + let recievedMessage = ''; + let added = false; - const messageId = crypto.randomBytes(7).toString('hex') + const messageId = crypto.randomBytes(7).toString('hex'); ws?.send( JSON.stringify({ @@ -345,9 +348,9 @@ const ChatWindow = ({ id }: { id?: string }) => { focusMode: focusMode, history: [...chatHistory, ['human', message]], }), - ) + ); - setMessages(prevMessages => [ + setMessages((prevMessages) => [ ...prevMessages, { content: message, @@ -356,21 +359,21 @@ const ChatWindow = ({ id }: { id?: string }) => { role: 'user', createdAt: new Date(), }, - ]) + ]); const messageHandler = async (e: MessageEvent) => { - const data = JSON.parse(e.data) + const data = JSON.parse(e.data); if (data.type === 'error') { - toast.error(data.data) - setLoading(false) - return + toast.error(data.data); + setLoading(false); + return; } if (data.type === 'sources') { - sources = data.data + sources = data.data; if (!added) { - setMessages(prevMessages => [ + setMessages((prevMessages) => [ ...prevMessages, { content: '', @@ -380,15 +383,15 @@ const ChatWindow = ({ id }: { id?: string }) => { sources: sources, createdAt: new Date(), }, - ]) - added = true + ]); + added = true; } - setMessageAppeared(true) + setMessageAppeared(true); } if (data.type === 'message') { if (!added) { - setMessages(prevMessages => [ + setMessages((prevMessages) => [ ...prevMessages, { content: data.data, @@ -398,35 +401,35 @@ const ChatWindow = ({ id }: { id?: string }) => { sources: sources, createdAt: new Date(), }, - ]) - added = true + ]); + added = true; } - setMessages(prev => - prev.map(message => { + setMessages((prev) => + prev.map((message) => { if (message.messageId === data.messageId) { - return { ...message, content: message.content + data.data } + return { ...message, content: message.content + data.data }; } - return message + return message; }), - ) + ); - recievedMessage += data.data - setMessageAppeared(true) + recievedMessage += data.data; + setMessageAppeared(true); } if (data.type === 'messageEnd') { - setChatHistory(prevHistory => [ + setChatHistory((prevHistory) => [ ...prevHistory, ['human', message], ['assistant', recievedMessage], - ]) + ]); - ws?.removeEventListener('message', messageHandler) - setLoading(false) + ws?.removeEventListener('message', messageHandler); + setLoading(false); - const lastMsg = messagesRef.current[messagesRef.current.length - 1] + const lastMsg = messagesRef.current[messagesRef.current.length - 1]; if ( lastMsg.role === 'assistant' && @@ -434,54 +437,54 @@ const ChatWindow = ({ id }: { id?: string }) => { lastMsg.sources.length > 0 && !lastMsg.suggestions ) { - const suggestions = await getSuggestions(messagesRef.current) - setMessages(prev => - prev.map(msg => { + const suggestions = await getSuggestions(messagesRef.current); + setMessages((prev) => + prev.map((msg) => { if (msg.messageId === lastMsg.messageId) { - return { ...msg, suggestions: suggestions } + return { ...msg, suggestions: suggestions }; } - return msg + return msg; }), - ) + ); } } - } + }; - ws?.addEventListener('message', messageHandler) - } + ws?.addEventListener('message', messageHandler); + }; const rewrite = (messageId: string) => { - const index = messages.findIndex(msg => msg.messageId === messageId) + const index = messages.findIndex((msg) => msg.messageId === messageId); - if (index === -1) return + if (index === -1) return; - const message = messages[index - 1] + const message = messages[index - 1]; - setMessages(prev => { - return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)] - }) - setChatHistory(prev => { - return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)] - }) + setMessages((prev) => { + return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)]; + }); + setChatHistory((prev) => { + return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)]; + }); - sendMessage(message.content) - } + sendMessage(message.content); + }; useEffect(() => { if (isReady && initialMessage) { - sendMessage(initialMessage) + sendMessage(initialMessage); } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [isReady, initialMessage]) + }, [isReady, initialMessage]); if (hasError) { return ( - <div className='flex flex-col items-center justify-center min-h-screen'> - <p className='dark:text-white/70 text-black/70 text-sm'> + <div className="flex flex-col items-center justify-center min-h-screen"> + <p className="dark:text-white/70 text-black/70 text-sm"> Failed to connect to the server. Please try again later. </p> </div> - ) + ); } return isReady ? ( @@ -510,25 +513,25 @@ const ChatWindow = ({ id }: { id?: string }) => { </div> ) ) : ( - <div className='flex flex-row items-center justify-center min-h-screen'> + <div className="flex flex-row items-center justify-center min-h-screen"> <svg - aria-hidden='true' - className='w-8 h-8 text-light-200 fill-light-secondary dark:text-[#202020] animate-spin dark:fill-[#ffffff3b]' - viewBox='0 0 100 101' - fill='none' - xmlns='http://www.w3.org/2000/svg' + aria-hidden="true" + className="w-8 h-8 text-light-200 fill-light-secondary dark:text-[#202020] animate-spin dark:fill-[#ffffff3b]" + viewBox="0 0 100 101" + fill="none" + xmlns="http://www.w3.org/2000/svg" > <path - d='M100 50.5908C100.003 78.2051 78.1951 100.003 50.5908 100C22.9765 99.9972 0.997224 78.018 1 50.4037C1.00281 22.7993 22.8108 0.997224 50.4251 1C78.0395 1.00281 100.018 22.8108 100 50.4251ZM9.08164 50.594C9.06312 73.3997 27.7909 92.1272 50.5966 92.1457C73.4023 92.1642 92.1298 73.4365 92.1483 50.6308C92.1669 27.8251 73.4392 9.0973 50.6335 9.07878C27.8278 9.06026 9.10003 27.787 9.08164 50.594Z' - fill='currentColor' + d="M100 50.5908C100.003 78.2051 78.1951 100.003 50.5908 100C22.9765 99.9972 0.997224 78.018 1 50.4037C1.00281 22.7993 22.8108 0.997224 50.4251 1C78.0395 1.00281 100.018 22.8108 100 50.4251ZM9.08164 50.594C9.06312 73.3997 27.7909 92.1272 50.5966 92.1457C73.4023 92.1642 92.1298 73.4365 92.1483 50.6308C92.1669 27.8251 73.4392 9.0973 50.6335 9.07878C27.8278 9.06026 9.10003 27.787 9.08164 50.594Z" + fill="currentColor" /> <path - d='M93.9676 39.0409C96.393 38.4037 97.8624 35.9116 96.9801 33.5533C95.1945 28.8227 92.871 24.3692 90.0681 20.348C85.6237 14.1775 79.4473 9.36872 72.0454 6.45794C64.6435 3.54717 56.3134 2.65431 48.3133 3.89319C45.869 4.27179 44.3768 6.77534 45.014 9.20079C45.6512 11.6262 48.1343 13.0956 50.5786 12.717C56.5073 11.8281 62.5542 12.5399 68.0406 14.7911C73.527 17.0422 78.2187 20.7487 81.5841 25.4923C83.7976 28.5886 85.4467 32.059 86.4416 35.7474C87.1273 38.1189 89.5423 39.6781 91.9676 39.0409Z' - fill='currentFill' + d="M93.9676 39.0409C96.393 38.4037 97.8624 35.9116 96.9801 33.5533C95.1945 28.8227 92.871 24.3692 90.0681 20.348C85.6237 14.1775 79.4473 9.36872 72.0454 6.45794C64.6435 3.54717 56.3134 2.65431 48.3133 3.89319C45.869 4.27179 44.3768 6.77534 45.014 9.20079C45.6512 11.6262 48.1343 13.0956 50.5786 12.717C56.5073 11.8281 62.5542 12.5399 68.0406 14.7911C73.527 17.0422 78.2187 20.7487 81.5841 25.4923C83.7976 28.5886 85.4467 32.059 86.4416 35.7474C87.1273 38.1189 89.5423 39.6781 91.9676 39.0409Z" + fill="currentFill" /> </svg> </div> - ) -} + ); +}; -export default ChatWindow +export default ChatWindow; From 92abbc5b981b9809466c00363c7f90ecbcd1d857 Mon Sep 17 00:00:00 2001 From: ItzCrazyKns <95534749+ItzCrazyKns@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:54:37 +0530 Subject: [PATCH 12/12] feat(webSearchRetriever): use `question` instead of `input` --- src/agents/webSearchAgent.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index 159023e..77ec181 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -248,7 +248,7 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { return { query: question, docs: docs }; } else { - const res = await searchSearxng(input, { + const res = await searchSearxng(question, { language: 'en', }); @@ -264,7 +264,7 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { }), ); - return { query: input, docs: documents }; + return { query: question, docs: documents }; } }), ]);