commit d1c74c861e669325d2b5dbe07c0745bd235655b9 Author: ItzCrazyKns Date: Tue Apr 9 16:21:05 2024 +0530 Initial commit diff --git a/.assets/perplexica-preview.gif b/.assets/perplexica-preview.gif new file mode 100644 index 0000000..5dae084 Binary files /dev/null and b/.assets/perplexica-preview.gif differ diff --git a/.assets/perplexica-screenshot.png b/.assets/perplexica-screenshot.png new file mode 100644 index 0000000..c47a544 Binary files /dev/null and b/.assets/perplexica-screenshot.png differ diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1df483a --- /dev/null +++ b/.env.example @@ -0,0 +1,4 @@ +PORT=3001 +OPENAI_API_KEY= +SIMILARITY_MEASURE=cosine # cosine or dot +SEARXNG_API_URL= # no need to fill this if using docker \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c34d7f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Node.js +node_modules/ +npm-debug.log +yarn-error.log + +# Build output +/.next/ +/out/ + +# IDE/Editor specific +.vscode/ +.idea/ +*.iml + +# Dependency lock files +package-lock.json +yarn.lock + +# Environment variables +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +# Log files +logs/ +*.log + +# Testing +/coverage/ + +# Miscellaneous +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/.prettierrc.js b/.prettierrc.js new file mode 100644 index 0000000..1937ff1 --- /dev/null +++ b/.prettierrc.js @@ -0,0 +1,12 @@ +/** @type {import("prettier").Config} */ + +const config = { + printWidth: 80, + trailingComma: 'all', + endOfLine: 'auto', + singleQuote: true, + tabWidth: 2, + semi: true, +}; + +module.exports = config; diff --git a/README.md b/README.md new file mode 100644 index 0000000..cdcac00 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# 🚀 Perplexica - An AI-powered search engine 🔎 + +![preview](.assets/perplexica-screenshot.png) + +## Overview + +Perplexica is an open-source AI-powered searching tool or an AI-powered search engine that goes deep into the internet to find answers. Inspired by Perplexity AI, it's an open-source option that not just searches the web but understands your questions. It uses advanced machine learning algorithms like similarity searching and embeddings to refine results and provides clear answers with sources cited. + +## Preview +![video-preview](.assets/perplexica-preview.gif) + +## Features + +- **Two Main Modes:** + - **Copilot Mode:** (In development) Boosts search by generating different queries to find more relevant internet sources. Like normal search instead of just using the context by SearxNG, it visits the top matches and tries to find relevant sources to the user's query directly from the page. + - **Normal Mode:** Processes your query and performs a web search. +- **Focus Modes:** (In development) special modes to better answer specific types of questions. +- **Current Information:** Some search tools might give you outdated info because they use data from crawling bots and convert them into embeddings and store them in a index (its like converting the web into embeddings which is quite expensive.). Unlike them, Perplexica uses SearxNG, a metasearch engine to get the results and rerank and get the most relevent source out of it, ensuring you always get the latest information without the overhead of daily data updates. + +It has many more features like image and video search. Some of the planned features are mentioned in [upcoming features](#upcoming-features). + +## Installation + +There are mainly 2 ways of installing Perplexica - With Docker, Without Docker. Using Docker is highly recommended. + +### Getting Started with Docker (Recommended) + +1. Make sure Docker is installed and running on your system. +2. Clone the Perplexica repository: + +```bash +git clone https://github.com/ItzCrazyKns/Perplexica.git +``` + +3. After cloning, rename the `.env.example` file to `.env` in the root directory. For Docker setups, you only need to fill these fields: + +- `OPENAI_API_KEY` +- `SIMILARITY_MEASURE` (Its filled by default, you can leave it if you do not know about it.) + +4. Navigate to the directory containing `docker-compose.yaml` and execute: + +```bash +docker compose up +``` + +5. Wait a few minutes for the setup to complete. Access Perplexica at `http://localhost:3001` in your web browser. + +### Non-Docker Installation + +For setups without Docker: + +1. Follow the initial steps to clone the repository and rename the `.env.example` file to `.env` in the root directory. You will need to fill in all the fields in this file. +2. Additionally, rename the `.env.example` file to `.env` in the `ui` folder and complete all fields. +3. The non-Docker setup requires manual configuration of both the backend and frontend. + +**Note**: Using Docker is recommended as it simplifies the setup process, especially for managing environment variables and dependencies. + +## Upcoming Features + +- Finalizing Copilot Mode +- Adding support for multiple local LLMs and LLM providers such as Anthropic, Google, etc. +- Adding Discover and History Saving features +- Introducing various Focus Modes +- Continuous bug fixing + +## Contribution + +Perplexica is built on the idea that AI and large language models should be easy for everyone to use. If you find bugs or have ideas, please share them in via GitHub Issues. Details on how to contribute will be shared soon. + +## Acknowledgements + +Inspired by Perplexity AI, Perplexica aims to provide a similar service but always up-to-date and fully open source, thanks to SearxNG. + +If you have any queries you can reach me via my Discord - `itzcrazykns`. Thanks for checking out Perplexica. diff --git a/app.dockerfile b/app.dockerfile new file mode 100644 index 0000000..105cf86 --- /dev/null +++ b/app.dockerfile @@ -0,0 +1,15 @@ +FROM node:alpine + +ARG NEXT_PUBLIC_WS_URL +ARG NEXT_PUBLIC_API_URL +ENV NEXT_PUBLIC_WS_URL=${NEXT_PUBLIC_WS_URL} +ENV NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} + +WORKDIR /home/perplexica + +COPY ui /home/perplexica/ + +RUN yarn install +RUN yarn build + +CMD ["yarn", "start"] \ No newline at end of file diff --git a/backend.dockerfile b/backend.dockerfile new file mode 100644 index 0000000..6cbd192 --- /dev/null +++ b/backend.dockerfile @@ -0,0 +1,17 @@ +FROM node:alpine + +ARG SEARXNG_API_URL +ENV SEARXNG_API_URL=${SEARXNG_API_URL} + +WORKDIR /home/perplexica + +COPY src /home/perplexica/src +COPY tsconfig.json /home/perplexica/ +COPY .env /home/perplexica/ +COPY package.json /home/perplexica/ +COPY yarn.lock /home/perplexica/ + +RUN yarn install +RUN yarn build + +CMD ["yarn", "start"] \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..3a73660 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,44 @@ +services: + searxng: + build: + context: . + dockerfile: searxng.dockerfile + expose: + - 4000 + ports: + - 4000:8080 + networks: + - perplexica-network + perplexica-backend: + build: + context: . + dockerfile: backend.dockerfile + args: + - SEARXNG_API_URL=http://searxng:8080 + depends_on: + - searxng + expose: + - 3001 + ports: + - 3001:3001 + networks: + - perplexica-network + + perplexica-frontend: + build: + context: . + dockerfile: app.dockerfile + args: + - NEXT_PUBLIC_API_URL=http://127.0.0.1:3001/api + - NEXT_PUBLIC_WS_URL=ws://127.0.0.1:3001 + depends_on: + - perplexica-backend + expose: + - 3000 + ports: + - 3000:3000 + networks: + - perplexica-network + +networks: + perplexica-network: diff --git a/package.json b/package.json new file mode 100644 index 0000000..6f13e40 --- /dev/null +++ b/package.json @@ -0,0 +1,32 @@ +{ + "name": "perplexica-backend", + "version": "1.0.0", + "license": "MIT", + "author": "ItzCrazyKns", + "scripts": { + "start": "node --env-file=.env dist/app.js", + "build": "tsc", + "dev": "nodemon --env-file=.env src/app.ts", + "format": "prettier . --check", + "format:write": "prettier . --write" + }, + "devDependencies": { + "@types/cors": "^2.8.17", + "@types/express": "^4.17.21", + "@types/readable-stream": "^4.0.11", + "prettier": "^3.2.5", + "ts-node": "^10.9.2", + "typescript": "^5.4.3" + }, + "dependencies": { + "@langchain/openai": "^0.0.25", + "axios": "^1.6.8", + "compute-cosine-similarity": "^1.1.0", + "compute-dot": "^1.1.0", + "cors": "^2.8.5", + "express": "^4.19.2", + "langchain": "^0.1.30", + "ws": "^8.16.0", + "zod": "^3.22.4" + } +} diff --git a/searxng-settings.yml b/searxng-settings.yml new file mode 100644 index 0000000..aa37ddd --- /dev/null +++ b/searxng-settings.yml @@ -0,0 +1,2380 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: 'searxng' + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # maintainer: "Jon Doe" + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl", + # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off + # by default. + autocomplete: 'google' + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: 'auto' + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + - json + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: '127.0.0.1' + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_URL}. + base_url: / # "http://example.com/location" + limiter: false # rate limit the number of request on the instance, block some bots + public_instance: false # enable features designed only for public instances + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} + # Proxying image results through searx + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: '1.0' + # POST queries are more secure as they don't show up in history but may cause + # problems when using Firefox containers + method: 'POST' + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +redis: + # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. + # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: '' + static_use_hash: false + # Custom templates path - leave it blank if you didn't change + templates_path: '' + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page. + infinite_scroll: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: '' + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + +# Lock arbitrary settings on the preferences page. To find the ID of the user +# setting you want to lock, check the ID of the form on the page "preferences". +# +# preferences: +# lock: +# - language +# - autocomplete +# - method +# - query_in_title + +# searx supports result proxification using an external service: +# https://github.com/asciimoo/morty uncomment below section if you have running +# morty proxy the key is base64 encoded (keep the !!binary notation) +# Note: since commit af77ec3, morty accepts a base64 encoded key. +# +# result_proxy: +# url: http://127.0.0.1:3000/ +# # the key is a base64 encoded string, the YAML !!binary prefix is optional +# key: !!binary "your_morty_proxy_key" +# # [true|false] enable the "proxy" button next to each result +# proxify_results: true + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searx_useragent, could contain information like an email address + # to the administrator + useragent_suffix: '' + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10.0 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + +# External plugin configuration, for more details see +# https://docs.searxng.org/dev/plugins.html +# +# plugins: +# - plugin1 +# - plugin2 +# - ... + +# Comment or un-comment plugin to activate / deactivate by default. +# +# enabled_plugins: +# # these plugins are enabled if nothing is configured .. +# - 'Hash plugin' +# - 'Self Information' +# - 'Tracker URL remover' +# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy +# # these plugins are disabled if nothing is configured .. +# - 'Hostname replace' # see hostname_replace configuration below +# - 'Open Access DOI rewrite' +# - 'Tor check plugin' +# # Read the docs before activate: auto-detection of the language could be +# # detrimental to users expectations / users can activate the plugin in the +# # preferences if they want. +# - 'Autodetect search language' + +# Configuration of the "Hostname replace" plugin: +# +# hostname_replace: +# '(.*\.)?youtube\.com$': 'invidious.example.com' +# '(.*\.)?youtu\.be$': 'invidious.example.com' +# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# # to remove matching host names from result list, set value to false +# 'spam\.example\.com': false + +checker: + # disable checker when in debug mode + off_when_debug: true + + # use "scheduling: false" to disable scheduling + # scheduling: interval or int + + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" + # to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + + # additional tests: only for the YAML anchors (see the engines section) + # + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + + android: &test_android + matrix: + query: ['android'] + lang: ['en', 'de', 'fr', 'zh-CN'] + result_container: + - not_empty + - ['one_title_contains', 'google'] + test: + - unique_results + + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ['linux', 'new york', 'bbc'] + result_container: + - has_infobox + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: annas archive + engine: annas_archive + disabled: true + shortcut: aa + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'journal_article' # book_any .. magazine, standards_document + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: 'newest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: arxiv + engine: arxiv + shortcut: arx + timeout: 4.0 + + - name: ask + engine: ask + shortcut: ask + disabled: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ['infobox'] + base_url: 'https://{language}.wikipedia.org/' + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: ccc-tv + engine: xpath + paging: false + search_url: https://media.ccc.de/search/?q={query} + url_xpath: //div[@class="caption"]/h3/a/@href + title_xpath: //div[@class="caption"]/h3/a/text() + content_xpath: //div[@class="caption"]/h4/@title + categories: videos + disabled: true + shortcut: c3tv + about: + website: https://media.ccc.de/ + wikidata_id: Q80729951 + official_api_documentation: https://github.com/voc/voctoweb + use_official_api: false + require_api_key: false + results: HTML + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # - name: core.ac.uk + # engine: core + # categories: science + # shortcut: cor + # # get your API key from: https://core.ac.uk/api-keys/register/ + # api_key: 'unset' + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: curlie + engine: xpath + shortcut: cl + categories: general + disabled: true + paging: true + lang_all: '' + search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189 + page_size: 20 + results_xpath: //div[@id="site-list-content"]/div[@class="site-item"] + url_xpath: ./div[@class="title-and-desc"]/a/@href + title_xpath: ./div[@class="title-and-desc"]/a/div + content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"] + about: + website: https://curlie.org/ + wikidata_id: Q60715723 + use_official_api: false + require_api_key: false + results: HTML + + - name: currency + engine: currency_convert + categories: general + shortcut: cc + + - name: bahnhof + engine: json_engine + search_url: https://www.bahnhof.de/api/stations/search/{query} + url_prefix: https://www.bahnhof.de/ + url_query: slug + title_query: name + content_query: state + shortcut: bf + disabled: true + about: + website: https://www.bahn.de + wikidata_id: Q22811603 + use_official_api: false + require_api_key: false + results: JSON + language: de + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + tests: *tests_infobox + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + # - name: elasticsearch + # shortcut: es + # engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # # available options: match, simple_query_string, term, terms, custom + # query_type: match + # # if query_type is set to custom, provide your query here + # #custom_query_json: {"query":{"match_all": {}}} + # #show_metadata: false + # disabled: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ['infobox'] + tests: *tests_infobox + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images + shortcut: ddi + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn + disabled: true + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + # - name: ebay + # engine: ebay + # shortcut: eb + # base_url: 'https://www.ebay.com' + # disabled: true + # timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: flickr + categories: images + shortcut: fl + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + # engine: flickr + # api_key: 'apikey' # required! + # Or you can use the html non-stable engine, activated by default + engine: flickr_noapi + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + # - name: freesound + # engine: freesound + # shortcut: fnd + # disabled: true + # timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: gentoo + shortcut: ge + timeout: 10.0 + + - name: gitlab + engine: json_engine + paging: true + search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno} + url_query: web_url + title_query: name_with_namespace + content_query: description + page_size: 20 + categories: [it, repos] + shortcut: gl + timeout: 10.0 + disabled: true + about: + website: https://about.gitlab.com/ + wikidata_id: Q16639197 + official_api_documentation: https://docs.gitlab.com/ee/api/ + use_official_api: false + require_api_key: false + results: JSON + + - name: github + engine: github + shortcut: gh + + # This a Gitea service. If you would like to use a different instance, + # change codeberg.org to URL of the desired Gitea host. Or you can create a + # new engine by copying this and changing the name, shortcut and search_url. + + - name: codeberg + engine: json_engine + search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10 + url_query: html_url + title_query: name + content_query: description + categories: [it, repos] + shortcut: cb + disabled: true + about: + website: https://codeberg.org/ + wikidata_id: + official_api_documentation: https://try.gitea.io/api/swagger + use_official_api: false + require_api_key: false + results: JSON + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + # additional_tests: + # android: *test_android + + - name: google images + engine: google_images + shortcut: goi + # additional_tests: + # android: *test_android + # dali: + # matrix: + # query: ['Dali Christ'] + # lang: ['en', 'de', 'fr', 'zh-CN'] + # result_container: + # - ['one_title_contains', 'Salvador'] + + - name: google news + engine: google_news + shortcut: gon + # additional_tests: + # android: *test_android + + - name: google videos + engine: google_videos + shortcut: gov + # additional_tests: + # android: *test_android + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: material icons + engine: material_icons + categories: images + shortcut: mi + disabled: true + + - name: gpodder + engine: json_engine + shortcut: gpod + timeout: 4.0 + paging: false + search_url: https://gpodder.net/search.json?q={query} + url_query: url + title_query: title + content_query: description + page_size: 19 + categories: music + disabled: true + about: + website: https://gpodder.net + wikidata_id: Q3093354 + official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/ + use_official_api: false + requires_api_key: false + results: JSON + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hoogle + engine: xpath + paging: true + search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + - name: invidious + engine: invidious + # Instanes will be selected randomly, see https://api.invidious.io/ for + # instances that are stable (good uptime) and close to you. + base_url: + - https://invidious.io.lol + - https://invidious.fdn.fr + - https://yt.artemislena.eu + - https://invidious.tiekoetter.com + - https://invidious.flokinet.to + - https://vid.puffyan.us + - https://invidious.privacydev.net + - https://inv.tux.pizza + shortcut: iv + timeout: 3.0 + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + categories: files + timeout: 7.0 + + - name: library of congress + engine: loc + shortcut: loc + categories: images + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: azlyrics + shortcut: lyrics + engine: xpath + timeout: 4.0 + disabled: true + categories: [music, lyrics] + paging: true + search_url: https://search.azlyrics.com/search.php?q={query}&w=lyrics&p={pageno} + url_xpath: //td[@class="text-left visitedlyr"]/a/@href + title_xpath: //span/b/text() + content_xpath: //td[@class="text-left visitedlyr"]/a/small + about: + website: https://azlyrics.com + wikidata_id: Q66372542 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: npm + engine: json_engine + paging: true + first_page_num: 0 + search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno} + results_query: results + url_query: package/links/npm + title_query: package/name + content_query: package/description + page_size: 25 + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: npm + about: + website: https://npms.io/ + wikidata_id: Q7067518 + official_api_documentation: https://api-docs.npms.io/ + use_official_api: false + require_api_key: false + results: JSON + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: 'science' + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.kavin.rocks + - https://pipedapi-libre.kavin.rocks + - https://pipedapi.adminforge.de + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: pubmed + engine: pubmed + shortcut: pub + timeout: 3.0 + + - name: pypi + shortcut: pypi + engine: xpath + paging: true + search_url: https://pypi.org/search/?q={query}&page={pageno} + results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"] + url_xpath: ./@href + title_xpath: ./h3/span[@class="package-snippet__name"] + content_xpath: ./p + suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"] + first_page_num: 1 + categories: [it, packages] + about: + website: https://pypi.org + wikidata_id: Q2984686 + official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html + use_official_api: false + require_api_key: false + results: HTML + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + additional_tests: + rosebud: *test_rosebud + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: redis + # - name: myredis + # shortcut : rds + # engine: redis_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: internetarchivescholar + engine: internet_archive_scholar + shortcut: ias + timeout: 5.0 + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: searchcode code + engine: searchcode_code + shortcut: scc + disabled: true + + - name: framalibre + engine: framalibre + shortcut: frl + disabled: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + disabled: true + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + # - name: springer nature + # engine: springer + # # get your API key from: https://dev.springernature.com/signup + # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" + # api_key: 'unset' + # shortcut: springer + # timeout: 15.0 + + - name: startpage + engine: startpage + shortcut: sp + timeout: 6.0 + disabled: true + additional_tests: + rosebud: *test_rosebud + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!demo concert" + # + # - name: demo + # engine: sqlite + # shortcut: demo + # categories: general + # result_template: default.html + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + # + # - name: Torznab EZTV + # engine: torznab + # shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + # show_magnet_links: true + # show_torrent_files: false + # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + # torznab_categories: # optional + # - 2000 + # - 5000 + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex music + engine: yandex_music + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + inactive: true + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + # + # engine: youtube_api + # api_key: 'apikey' # required! + # + # Or you can use the html non-stable engine, activated by default + engine: youtube_noapi + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: alexandria + engine: json_engine + shortcut: alx + categories: general + paging: true + search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} + results_query: results + title_query: title + url_query: url + content_query: snippet + timeout: 1.5 + disabled: true + about: + website: https://alexandria.org/ + official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md + use_official_api: true + require_api_key: false + results: JSON + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: 'https://{language}.wikibooks.org/' + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: 'https://{language}.wikinews.org/' + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: 'https://{language}.wikiquote.org/' + search_type: text + disabled: true + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: 'https://{language}.wikisource.org/' + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: 'https://species.wikimedia.org/' + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: 'https://{language}.wiktionary.org/' + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: 'https://{language}.wikiversity.org/' + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: 'https://{language}.wikivoyage.org/' + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wc + categories: images + number_of_results: 10 + + - name: wolframalpha + shortcut: wa + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + # + # engine: wolframalpha_api + # api_key: '' + # + # Or you can use the html non-stable engine, activated by default + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: true + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + # - name: deepl + # engine: deepl + # shortcut: dpl + # # You can use the engine using the official stable API, but you need an API key + # # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + # timeout: 5.0 + # disabled: true + + - name: mojeek + shortcut: mjk + engine: xpath + paging: true + categories: [general, web] + search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang} + results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"] + url_xpath: ./@href + title_xpath: ../h2/a + content_xpath: ..//p[@class="s"] + suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a + first_page_num: 0 + page_size: 10 + max_page: 100 + disabled: true + about: + website: https://www.mojeek.com/ + wikidata_id: Q60747299 + official_api_documentation: https://www.mojeek.com/services/api.html/ + use_official_api: false + require_api_key: false + results: HTML + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: naver + shortcut: nvr + categories: [general, web] + engine: xpath + paging: true + search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno} + url_xpath: //a[@class="link_tit"]/@href + title_xpath: //a[@class="link_tit"] + content_xpath: //a[@class="total_dsc"]/div + first_page_num: 1 + page_size: 10 + disabled: true + about: + website: https://www.naver.com/ + wikidata_id: Q485639 + official_api_documentation: https://developers.naver.com/docs/nmt/examples/ + use_official_api: false + require_api_key: false + results: HTML + language: ko + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + engine: yacy + categories: general + search_type: text + base_url: https://yacy.searchlab.eu + shortcut: ya + disabled: true + # required if you aren't using HTTPS for your local yacy instance + # https://docs.searxng.org/dev/engines/online/yacy.html + # enable_http: true + # timeout: 3.0 + # search_mode: 'global' + + - name: yacy images + engine: yacy + categories: images + search_type: image + base_url: https://yacy.searchlab.eu + shortcut: yai + disabled: true + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: livespace + engine: livespace + shortcut: ls + categories: videos + disabled: true + timeout: 5.0 + + - name: wordnik + engine: wordnik + shortcut: def + base_url: https://www.wordnik.com/ + categories: [dictionaries] + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: seekr news + engine: seekr + shortcut: senews + categories: news + seekr_category: news + disabled: true + + - name: seekr images + engine: seekr + network: seekr news + shortcut: seimg + categories: images + seekr_category: images + disabled: true + + - name: seekr videos + engine: seekr + network: seekr news + shortcut: sevid + categories: videos + seekr_category: videos + disabled: true + + - name: sjp.pwn + engine: sjp + shortcut: sjp + base_url: https://sjp.pwn.pl/ + timeout: 5.0 + disabled: true + + - name: stract + engine: stract + shortcut: str + disabled: true + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: yummly + engine: yummly + shortcut: yum + disabled: true + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: xpath + search_url: https://lib.rs/search?q={query} + results_xpath: /html/body/main/div/ol/li/a + url_xpath: ./@href + title_xpath: ./div[@class="h"]/h4 + content_xpath: ./div[@class="h"]/p + categories: [it, packages] + disabled: true + about: + website: https://lib.rs + wikidata_id: Q113486010 + use_official_api: false + require_api_key: false + results: HTML + + - name: sourcehut + shortcut: srht + engine: xpath + paging: true + search_url: https://sr.ht/projects?page={pageno}&search={query} + results_xpath: (//div[@class="event-list"])[1]/div[@class="event"] + url_xpath: ./h4/a[2]/@href + title_xpath: ./h4/a[2] + content_xpath: ./p + first_page_num: 1 + categories: [it, repos] + disabled: true + about: + website: https://sr.ht + wikidata_id: Q78514485 + official_api_documentation: https://man.sr.ht/ + use_official_api: false + require_api_key: false + results: HTML + + - name: goo + shortcut: goo + engine: xpath + paging: true + search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0 + url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href + title_xpath: //div[@class="result"]/p[@class='title fsL1']/a + content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p + first_page_num: 0 + categories: [general, web] + disabled: true + timeout: 4.0 + about: + website: https://search.goo.ne.jp + wikidata_id: Q249044 + use_official_api: false + require_api_key: false + results: HTML + language: ja + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: xpath + shortcut: pgo + search_url: https://pkg.go.dev/search?limit=100&m=package&q={query} + results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"] + url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href + title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a + content_xpath: ./p[@class="SearchSnippet-synopsis"] + categories: [packages, it] + timeout: 3.0 + disabled: true + about: + website: https://pkg.go.dev/ + use_official_api: false + require_api_key: false + results: HTML + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + doai.io: 'https://dissem.in/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/searxng.dockerfile b/searxng.dockerfile new file mode 100644 index 0000000..8bcd2b2 --- /dev/null +++ b/searxng.dockerfile @@ -0,0 +1,3 @@ +FROM searxng/searxng + +COPY searxng-settings.yml /etc/searxng/settings.yml \ No newline at end of file diff --git a/src/agents/imageSearchAgent.ts b/src/agents/imageSearchAgent.ts new file mode 100644 index 0000000..37ac186 --- /dev/null +++ b/src/agents/imageSearchAgent.ts @@ -0,0 +1,80 @@ +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { PromptTemplate } from '@langchain/core/prompts'; +import { OpenAI } from '@langchain/openai'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import { BaseMessage } from '@langchain/core/messages'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { searchSearxng } from '../core/searxng'; + +const llm = new OpenAI({ + temperature: 0, + modelName: 'gpt-3.5-turbo', +}); + +const imageSearchChainPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images. +You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation. + +Example: +1. Follow up question: What is a cat? +Rephrased: A cat + +2. Follow up question: What is a car? How does it works? +Rephrased: Car working + +3. Follow up question: How does an AC work? +Rephrased: AC working + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +type ImageSearchChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const strParser = new StringOutputParser(); + +const imageSearchChain = RunnableSequence.from([ + RunnableMap.from({ + chat_history: (input: ImageSearchChainInput) => { + return formatChatHistoryAsString(input.chat_history); + }, + query: (input: ImageSearchChainInput) => { + return input.query; + }, + }), + PromptTemplate.fromTemplate(imageSearchChainPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + const res = await searchSearxng(input, { + categories: ['images'], + engines: ['bing_images', 'google_images'], + }); + + const images = []; + + res.results.forEach((result) => { + if (result.img_src && result.url && result.title) { + images.push({ + img_src: result.img_src, + url: result.url, + title: result.title, + }); + } + }); + + return images.slice(0, 10); + }), +]); + +export default imageSearchChain; diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts new file mode 100644 index 0000000..3241021 --- /dev/null +++ b/src/agents/webSearchAgent.ts @@ -0,0 +1,250 @@ +import { BaseMessage } from '@langchain/core/messages'; +import { + PromptTemplate, + ChatPromptTemplate, + MessagesPlaceholder, +} from '@langchain/core/prompts'; +import { + RunnableSequence, + RunnableMap, + RunnableLambda, +} from '@langchain/core/runnables'; +import { ChatOpenAI, OpenAI, OpenAIEmbeddings } from '@langchain/openai'; +import { StringOutputParser } from '@langchain/core/output_parsers'; +import { Document } from '@langchain/core/documents'; +import { searchSearxng } from '../core/searxng'; +import type { StreamEvent } from '@langchain/core/tracers/log_stream'; +import formatChatHistoryAsString from '../utils/formatHistory'; +import eventEmitter from 'events'; +import computeSimilarity from '../utils/computeSimilarity'; + +const chatLLM = new ChatOpenAI({ + modelName: 'gpt-3.5-turbo', + temperature: 0.7, +}); + +const llm = new OpenAI({ + temperature: 0, + modelName: 'gpt-3.5-turbo', +}); + +const embeddings = new OpenAIEmbeddings({ + modelName: 'text-embedding-3-large', +}); + +const basicSearchRetrieverPrompt = ` +You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. +If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. + +Example: +1. Follow up question: What is the capital of France? +Rephrased: Capital of france + +2. Follow up question: What is the population of New York City? +Rephrased: Population of New York City + +3. Follow up question: What is Docker? +Rephrased: What is Docker + +Conversation: +{chat_history} + +Follow up question: {query} +Rephrased question: +`; + +const basicWebSearchResponsePrompt = ` + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. + + Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containg a brief description of the content of that page). + You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. + You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. + You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. + Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. + However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times. The number refers to the number of the search result (passed in the context) used to generate that part of the answer. + + Aything inside the following \`context\` HTML block provided below is for your knowledge returned by the search engine and is not shared by the user. You have to answer question on the basis of it and cite the relevant information from it but you do not have to + talk about the context in your response. + + + {context} + + + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} +`; + +const strParser = new StringOutputParser(); + +const handleStream = async ( + stream: AsyncGenerator, + emitter: eventEmitter, +) => { + for await (const event of stream) { + if ( + event.event === 'on_chain_end' && + event.name === 'FinalSourceRetriever' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'sources', data: event.data.output }), + ); + } + if ( + event.event === 'on_chain_stream' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit( + 'data', + JSON.stringify({ type: 'response', data: event.data.chunk }), + ); + } + if ( + event.event === 'on_chain_end' && + event.name === 'FinalResponseGenerator' + ) { + emitter.emit('end'); + } + } +}; + +const processDocs = async (docs: Document[]) => { + return docs + .map((_, index) => `${index + 1}. ${docs[index].pageContent}`) + .join('\n'); +}; + +const rerankDocs = async ({ + query, + docs, +}: { + query: string; + docs: Document[]; +}) => { + if (docs.length === 0) { + return docs; + } + + const docsWithContent = docs.filter( + (doc) => doc.pageContent && doc.pageContent.length > 0, + ); + + const docEmbeddings = await embeddings.embedDocuments( + docsWithContent.map((doc) => doc.pageContent), + ); + + const queryEmbedding = await embeddings.embedQuery(query); + + const similarity = docEmbeddings.map((docEmbedding, i) => { + const sim = computeSimilarity(queryEmbedding, docEmbedding); + + return { + index: i, + similarity: sim, + }; + }); + + const sortedDocs = similarity + .sort((a, b) => b.similarity - a.similarity) + .filter((sim) => sim.similarity > 0.5) + .slice(0, 15) + .map((sim) => docsWithContent[sim.index]); + + return sortedDocs; +}; + +type BasicChainInput = { + chat_history: BaseMessage[]; + query: string; +}; + +const basicWebSearchRetrieverChain = RunnableSequence.from([ + PromptTemplate.fromTemplate(basicSearchRetrieverPrompt), + llm, + strParser, + RunnableLambda.from(async (input: string) => { + if (input === 'not_needed') { + return { query: '', docs: [] }; + } + + const res = await searchSearxng(input, { + language: 'en', + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: input, docs: documents }; + }), +]); + +const basicWebSearchAnsweringChain = RunnableSequence.from([ + RunnableMap.from({ + query: (input: BasicChainInput) => input.query, + chat_history: (input: BasicChainInput) => input.chat_history, + context: RunnableSequence.from([ + (input) => ({ + query: input.query, + chat_history: formatChatHistoryAsString(input.chat_history), + }), + basicWebSearchRetrieverChain + .pipe(rerankDocs) + .withConfig({ + runName: 'FinalSourceRetriever', + }) + .pipe(processDocs), + ]), + }), + ChatPromptTemplate.fromMessages([ + ['system', basicWebSearchResponsePrompt], + new MessagesPlaceholder('chat_history'), + ['user', '{query}'], + ]), + chatLLM, + strParser, +]).withConfig({ + runName: 'FinalResponseGenerator', +}); + +const basicWebSearch = (query: string, history: BaseMessage[]) => { + const emitter = new eventEmitter(); + + try { + const stream = basicWebSearchAnsweringChain.streamEvents( + { + chat_history: history, + query: query, + }, + { + version: 'v1', + }, + ); + + handleStream(stream, emitter); + } catch (err) { + emitter.emit( + 'error', + JSON.stringify({ data: 'An error has occurred please try again later' }), + ); + console.error(err); + } + + return emitter; +}; + +const handleWebSearch = (message: string, history: BaseMessage[]) => { + const emitter = basicWebSearch(message, history); + return emitter; +}; + +export default handleWebSearch; diff --git a/src/app.ts b/src/app.ts new file mode 100644 index 0000000..993cb23 --- /dev/null +++ b/src/app.ts @@ -0,0 +1,26 @@ +import { startWebSocketServer } from './websocket'; +import express from 'express'; +import cors from 'cors'; +import http from 'http'; +import routes from './routes'; + +const app = express(); +const server = http.createServer(app); + +const corsOptions = { + origin: '*', +}; + +app.use(cors(corsOptions)); +app.use(express.json()); + +app.use('/api', routes); +app.get('/api', (_, res) => { + res.status(200).json({ status: 'ok' }); +}); + +server.listen(process.env.PORT!, () => { + console.log(`API server started on port ${process.env.PORT}`); +}); + +startWebSocketServer(server); diff --git a/src/core/agentPicker.ts b/src/core/agentPicker.ts new file mode 100644 index 0000000..ff118da --- /dev/null +++ b/src/core/agentPicker.ts @@ -0,0 +1,69 @@ +import { z } from 'zod'; +import { OpenAI } from '@langchain/openai'; +import { RunnableSequence } from '@langchain/core/runnables'; +import { StructuredOutputParser } from 'langchain/output_parsers'; +import { PromptTemplate } from '@langchain/core/prompts'; + +const availableAgents = [ + { + name: 'webSearch', + description: + 'It is expert is searching the web for information and answer user queries', + }, + /* { + name: 'academicSearch', + description: + 'It is expert is searching the academic databases for information and answer user queries. It is particularly good at finding research papers and articles on topics like science, engineering, and technology. Use this instead of wolframAlphaSearch if the user query is not mathematical or scientific in nature', + }, + { + name: 'youtubeSearch', + description: + 'This model is expert at finding videos on youtube based on user queries', + }, + { + name: 'wolframAlphaSearch', + description: + 'This model is expert at finding answers to mathematical and scientific questions based on user queries.', + }, + { + name: 'redditSearch', + description: + 'This model is expert at finding posts and discussions on reddit based on user queries', + }, + { + name: 'writingAssistant', + description: + 'If there is no need for searching, this model is expert at generating text based on user queries', + }, */ +]; + +const parser = StructuredOutputParser.fromZodSchema( + z.object({ + agent: z.string().describe('The name of the selected agent'), + }), +); + +const prompt = ` + You are an AI model who is expert at finding suitable agents for user queries. The available agents are: + ${availableAgents.map((agent) => `- ${agent.name}: ${agent.description}`).join('\n')} + + Your task is to find the most suitable agent for the following query: {query} + + {format_instructions} +`; + +const chain = RunnableSequence.from([ + PromptTemplate.fromTemplate(prompt), + new OpenAI({ temperature: 0 }), + parser, +]); + +const pickSuitableAgent = async (query: string) => { + const res = await chain.invoke({ + query, + format_instructions: parser.getFormatInstructions(), + }); + return res.agent; +}; + +export default pickSuitableAgent; diff --git a/src/core/searxng.ts b/src/core/searxng.ts new file mode 100644 index 0000000..3bb4a53 --- /dev/null +++ b/src/core/searxng.ts @@ -0,0 +1,42 @@ +import axios from 'axios'; + +interface SearxngSearchOptions { + categories?: string[]; + engines?: string[]; + language?: string; + pageno?: number; +} + +interface SearxngSearchResult { + title: string; + url: string; + img_src?: string; + thumbnail_src?: string; + content?: string; + author?: string; +} + +export const searchSearxng = async ( + query: string, + opts?: SearxngSearchOptions, +) => { + const url = new URL(`${process.env.SEARXNG_API_URL}/search?format=json`); + url.searchParams.append('q', query); + + if (opts) { + Object.keys(opts).forEach((key) => { + if (Array.isArray(opts[key])) { + url.searchParams.append(key, opts[key].join(',')); + return; + } + url.searchParams.append(key, opts[key]); + }); + } + + const res = await axios.get(url.toString()); + + const results: SearxngSearchResult[] = res.data.results; + const suggestions: string[] = res.data.suggestions; + + return { results, suggestions }; +}; diff --git a/src/routes/images.ts b/src/routes/images.ts new file mode 100644 index 0000000..5a33ac6 --- /dev/null +++ b/src/routes/images.ts @@ -0,0 +1,22 @@ +import express from 'express'; +import imageSearchChain from '../agents/imageSearchAgent'; + +const router = express.Router(); + +router.post('/', async (req, res) => { + try { + const { query, chat_history } = req.body; + + const images = await imageSearchChain.invoke({ + query, + chat_history, + }); + + res.status(200).json({ images }); + } catch (err) { + res.status(500).json({ message: 'An error has occurred.' }); + console.log(err.message); + } +}); + +export default router; diff --git a/src/routes/index.ts b/src/routes/index.ts new file mode 100644 index 0000000..f2800cf --- /dev/null +++ b/src/routes/index.ts @@ -0,0 +1,8 @@ +import express from 'express'; +import imagesRouter from './images'; + +const router = express.Router(); + +router.use('/images', imagesRouter); + +export default router; diff --git a/src/utils/computeSimilarity.ts b/src/utils/computeSimilarity.ts new file mode 100644 index 0000000..1b07cc7 --- /dev/null +++ b/src/utils/computeSimilarity.ts @@ -0,0 +1,14 @@ +import dot from 'compute-dot'; +import cosineSimilarity from 'compute-cosine-similarity'; + +const computeSimilarity = (x: number[], y: number[]): number => { + if (process.env.SIMILARITY_MEASURE === 'cosine') { + return cosineSimilarity(x, y); + } else if (process.env.SIMILARITY_MEASURE === 'dot') { + return dot(x, y); + } + + throw new Error('Invalid similarity measure'); +}; + +export default computeSimilarity; diff --git a/src/utils/formatHistory.ts b/src/utils/formatHistory.ts new file mode 100644 index 0000000..6d0d309 --- /dev/null +++ b/src/utils/formatHistory.ts @@ -0,0 +1,9 @@ +import { BaseMessage } from '@langchain/core/messages'; + +const formatChatHistoryAsString = (history: BaseMessage[]) => { + return history + .map((message) => `${message._getType()}: ${message.content}`) + .join('\n'); +}; + +export default formatChatHistoryAsString; diff --git a/src/websocket/connectionManager.ts b/src/websocket/connectionManager.ts new file mode 100644 index 0000000..a5746e4 --- /dev/null +++ b/src/websocket/connectionManager.ts @@ -0,0 +1,11 @@ +import { WebSocket } from 'ws'; +import { handleMessage } from './messageHandler'; + +export const handleConnection = (ws: WebSocket) => { + ws.on( + 'message', + async (message) => await handleMessage(message.toString(), ws), + ); + + ws.on('close', () => console.log('Connection closed')); +}; diff --git a/src/websocket/index.ts b/src/websocket/index.ts new file mode 100644 index 0000000..1b9ae77 --- /dev/null +++ b/src/websocket/index.ts @@ -0,0 +1,8 @@ +import { initServer } from './websocketServer'; +import http from 'http'; + +export const startWebSocketServer = ( + server: http.Server, +) => { + initServer(server); +}; diff --git a/src/websocket/messageHandler.ts b/src/websocket/messageHandler.ts new file mode 100644 index 0000000..b7d2fd4 --- /dev/null +++ b/src/websocket/messageHandler.ts @@ -0,0 +1,81 @@ +import { WebSocket } from 'ws'; +import pickSuitableAgent from '../core/agentPicker'; +import handleWebSearch from '../agents/webSearchAgent'; +import { BaseMessage, AIMessage, HumanMessage } from '@langchain/core/messages'; + +type Message = { + type: string; + content: string; + copilot: boolean; + focus: string; + history: Array<[string, string]>; +}; + +export const handleMessage = async (message: string, ws: WebSocket) => { + try { + const parsedMessage = JSON.parse(message) as Message; + const id = Math.random().toString(36).substring(7); + + if (!parsedMessage.content) + return ws.send( + JSON.stringify({ type: 'error', data: 'Invalid message format' }), + ); + + const history: BaseMessage[] = parsedMessage.history.map((msg) => { + if (msg[0] === 'human') { + return new HumanMessage({ + content: msg[1], + }); + } else { + return new AIMessage({ + content: msg[1], + }); + } + }); + + if (parsedMessage.type === 'message') { + /* if (!parsedMessage.focus) { + const agent = await pickSuitableAgent(parsedMessage.content); + parsedMessage.focus = agent; + } */ + + parsedMessage.focus = 'webSearch'; + + switch (parsedMessage.focus) { + case 'webSearch': { + const emitter = handleWebSearch(parsedMessage.content, history); + emitter.on('data', (data) => { + const parsedData = JSON.parse(data); + if (parsedData.type === 'response') { + ws.send( + JSON.stringify({ + type: 'message', + data: parsedData.data, + messageId: id, + }), + ); + } else if (parsedData.type === 'sources') { + ws.send( + JSON.stringify({ + type: 'sources', + data: parsedData.data, + messageId: id, + }), + ); + } + }); + emitter.on('end', () => { + ws.send(JSON.stringify({ type: 'messageEnd', messageId: id })); + }); + emitter.on('error', (data) => { + const parsedData = JSON.parse(data); + ws.send(JSON.stringify({ type: 'error', data: parsedData.data })); + }); + } + } + } + } catch (error) { + console.error('Failed to handle message', error); + ws.send(JSON.stringify({ type: 'error', data: 'Invalid message format' })); + } +}; diff --git a/src/websocket/websocketServer.ts b/src/websocket/websocketServer.ts new file mode 100644 index 0000000..8aca021 --- /dev/null +++ b/src/websocket/websocketServer.ts @@ -0,0 +1,15 @@ +import { WebSocketServer } from 'ws'; +import { handleConnection } from './connectionManager'; +import http from 'http'; + +export const initServer = ( + server: http.Server, +) => { + const wss = new WebSocketServer({ server }); + + wss.on('connection', (ws) => { + handleConnection(ws); + }); + + console.log(`WebSocket server started on port ${process.env.PORT}`); +}; diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..5bdba67 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "lib": ["ESNext"], + "module": "commonjs", + "target": "ESNext", + "outDir": "dist", + "sourceMap": false, + "esModuleInterop": true, + "experimentalDecorators": true, + "emitDecoratorMetadata": true, + "allowSyntheticDefaultImports": true, + "skipLibCheck": true, + "skipDefaultLibCheck": true + }, + "include": ["src"], + "exclude": ["node_modules", "**/*.spec.ts"] +} diff --git a/ui/.env.example b/ui/.env.example new file mode 100644 index 0000000..57a3ed9 --- /dev/null +++ b/ui/.env.example @@ -0,0 +1,2 @@ +NEXT_PUBLIC_WS_URL=ws://localhost:3001 +NEXT_PUBLIC_API_URL=http://localhost:3001/api \ No newline at end of file diff --git a/ui/.eslintrc.json b/ui/.eslintrc.json new file mode 100644 index 0000000..bffb357 --- /dev/null +++ b/ui/.eslintrc.json @@ -0,0 +1,3 @@ +{ + "extends": "next/core-web-vitals" +} diff --git a/ui/.gitignore b/ui/.gitignore new file mode 100644 index 0000000..5913b49 --- /dev/null +++ b/ui/.gitignore @@ -0,0 +1,34 @@ +# dependencies +/node_modules +/.pnp +.pnp.js +.yarn/install-state.gz + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# local env files +.env*.local + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/ui/.prettierrc.js b/ui/.prettierrc.js new file mode 100644 index 0000000..8ca480f --- /dev/null +++ b/ui/.prettierrc.js @@ -0,0 +1,11 @@ +/** @type {import("prettier").Config} */ + +const config = { + printWidth: 80, + trailingComma: 'all', + endOfLine: 'auto', + singleQuote: true, + tabWidth: 2, +}; + +module.exports = config; diff --git a/ui/app/favicon.ico b/ui/app/favicon.ico new file mode 100644 index 0000000..718d6fe Binary files /dev/null and b/ui/app/favicon.ico differ diff --git a/ui/app/globals.css b/ui/app/globals.css new file mode 100644 index 0000000..f75daca --- /dev/null +++ b/ui/app/globals.css @@ -0,0 +1,13 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer base { + .overflow-hidden-scrollable { + -ms-overflow-style: none; + } + + .overflow-hidden-scrollable::-webkit-scrollbar { + display: none; + } +} diff --git a/ui/app/layout.tsx b/ui/app/layout.tsx new file mode 100644 index 0000000..cb670f5 --- /dev/null +++ b/ui/app/layout.tsx @@ -0,0 +1,32 @@ +import type { Metadata } from 'next'; +import { Montserrat } from 'next/font/google'; +import './globals.css'; +import { cn } from '@/lib/utils'; +import Sidebar from '@/components/Sidebar'; + +const montserrat = Montserrat({ + weight: ['300', '400', '500', '700'], + subsets: ['latin'], + display: 'swap', + fallback: ['Arial', 'sans-serif'], +}); + +export const metadata: Metadata = { + title: 'Perplexica - Chat with the internet', + description: + 'Perplexica is an AI powered chatbot that is connected to the internet.', +}; + +export default function RootLayout({ + children, +}: Readonly<{ + children: React.ReactNode; +}>) { + return ( + + + {children} + + + ); +} diff --git a/ui/app/page.tsx b/ui/app/page.tsx new file mode 100644 index 0000000..982763a --- /dev/null +++ b/ui/app/page.tsx @@ -0,0 +1,17 @@ +import ChatWindow from '@/components/ChatWindow'; +import { Metadata } from 'next'; + +export const metadata: Metadata = { + title: 'Chat - Perplexica', + description: 'Chat with the internet, chat with Perplexica.', +}; + +const Home = () => { + return ( +
+ +
+ ); +}; + +export default Home; diff --git a/ui/components/Chat.tsx b/ui/components/Chat.tsx new file mode 100644 index 0000000..61ab3ab --- /dev/null +++ b/ui/components/Chat.tsx @@ -0,0 +1,87 @@ +'use client'; + +import { useEffect, useRef, useState } from 'react'; +import MessageInput from './MessageInput'; +import { Message } from './ChatWindow'; +import MessageBox from './MessageBox'; +import MessageBoxLoading from './MessageBoxLoading'; + +const Chat = ({ + loading, + messages, + sendMessage, + messageAppeared, + rewrite, +}: { + messages: Message[]; + sendMessage: (message: string) => void; + loading: boolean; + messageAppeared: boolean; + rewrite: (messageId: string) => void; +}) => { + const [dividerWidth, setDividerWidth] = useState(0); + const dividerRef = useRef(null); + const messageEnd = useRef(null); + + useEffect(() => { + const updateDividerWidth = () => { + if (dividerRef.current) { + setDividerWidth(dividerRef.current.scrollWidth); + } + }; + + updateDividerWidth(); + + window.addEventListener('resize', updateDividerWidth); + + return () => { + window.removeEventListener('resize', updateDividerWidth); + }; + }); + + useEffect(() => { + messageEnd.current?.scrollIntoView({ behavior: 'smooth' }); + + if (messages.length === 1) { + document.title = `${messages[0].content.substring(0, 30)} - Perplexica`; + } + }, [messages]); + + return ( +
+ {messages.map((msg, i) => { + const isLast = i === messages.length - 1; + + return ( + <> + + {!isLast && msg.role === 'assistant' && ( +
+ )} + + ); + })} + {loading && !messageAppeared && } +
+ {dividerWidth > 0 && ( +
+ +
+ )} +
+ ); +}; + +export default Chat; diff --git a/ui/components/ChatWindow.tsx b/ui/components/ChatWindow.tsx new file mode 100644 index 0000000..46e8568 --- /dev/null +++ b/ui/components/ChatWindow.tsx @@ -0,0 +1,170 @@ +'use client'; + +import { useEffect, useState } from 'react'; +import { Document } from '@langchain/core/documents'; +import Navbar from './Navbar'; +import Chat from './Chat'; +import EmptyChat from './EmptyChat'; + +export type Message = { + id: string; + createdAt?: Date; + content: string; + role: 'user' | 'assistant'; + sources?: Document[]; +}; + +const useSocket = (url: string) => { + const [ws, setWs] = useState(null); + + useEffect(() => { + if (!ws) { + const ws = new WebSocket(url); + ws.onopen = () => { + console.log('[DEBUG] open'); + setWs(ws); + }; + } + + return () => { + ws?.close(); + console.log('[DEBUG] closed'); + }; + }, [ws, url]); + + return ws; +}; + +const ChatWindow = () => { + const ws = useSocket(process.env.NEXT_PUBLIC_WS_URL!); + const [chatHistory, setChatHistory] = useState<[string, string][]>([]); + const [messages, setMessages] = useState([]); + const [loading, setLoading] = useState(false); + const [messageAppeared, setMessageAppeared] = useState(false); + + const sendMessage = async (message: string) => { + if (loading) return; + setLoading(true); + setMessageAppeared(false); + + let sources: Document[] | undefined = undefined; + let recievedMessage = ''; + let added = false; + + ws?.send( + JSON.stringify({ + type: 'message', + content: message, + history: [...chatHistory, ['human', message]], + }), + ); + + setMessages((prevMessages) => [ + ...prevMessages, + { + content: message, + id: Math.random().toString(36).substring(7), + role: 'user', + }, + ]); + + const messageHandler = (e: MessageEvent) => { + const data = JSON.parse(e.data); + + if (data.type === 'sources') { + sources = data.data; + if (!added) { + setMessages((prevMessages) => [ + ...prevMessages, + { + content: '', + id: data.messageId, + role: 'assistant', + sources: sources, + }, + ]); + added = true; + } + setMessageAppeared(true); + } + + if (data.type === 'message') { + if (!added) { + setMessages((prevMessages) => [ + ...prevMessages, + { + content: data.data, + id: data.messageId, + role: 'assistant', + sources: sources, + }, + ]); + added = true; + } + + setMessages((prev) => + prev.map((message) => { + if (message.id === data.messageId) { + return { ...message, content: message.content + data.data }; + } + + return message; + }), + ); + + recievedMessage += data.data; + setMessageAppeared(true); + } + + if (data.type === 'messageEnd') { + setChatHistory((prevHistory) => [ + ...prevHistory, + ['human', message], + ['assistant', recievedMessage], + ]); + ws?.removeEventListener('message', messageHandler); + setLoading(false); + } + }; + + ws?.addEventListener('message', messageHandler); + }; + + const rewrite = (messageId: string) => { + const index = messages.findIndex((msg) => msg.id === messageId); + + if (index === -1) return; + + const message = messages[index - 1]; + + setMessages((prev) => { + return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)]; + }); + setChatHistory((prev) => { + return [...prev.slice(0, messages.length > 2 ? index - 1 : 0)]; + }); + + sendMessage(message.content); + }; + + return ( +
+ {messages.length > 0 ? ( + <> + + + + ) : ( + + )} +
+ ); +}; + +export default ChatWindow; diff --git a/ui/components/EmptyChat.tsx b/ui/components/EmptyChat.tsx new file mode 100644 index 0000000..89cc5ce --- /dev/null +++ b/ui/components/EmptyChat.tsx @@ -0,0 +1,18 @@ +import EmptyChatMessageInput from './EmptyChatMessageInput'; + +const EmptyChat = ({ + sendMessage, +}: { + sendMessage: (message: string) => void; +}) => { + return ( +
+

+ Research begins here. +

+ +
+ ); +}; + +export default EmptyChat; diff --git a/ui/components/EmptyChatMessageInput.tsx b/ui/components/EmptyChatMessageInput.tsx new file mode 100644 index 0000000..581f044 --- /dev/null +++ b/ui/components/EmptyChatMessageInput.tsx @@ -0,0 +1,61 @@ +import { ArrowRight } from 'lucide-react'; +import { useState } from 'react'; +import TextareaAutosize from 'react-textarea-autosize'; +import { Attach, CopilotToggle, Focus } from './MessageInputActions'; + +const EmptyChatMessageInput = ({ + sendMessage, +}: { + sendMessage: (message: string) => void; +}) => { + const [copilotEnabled, setCopilotEnabled] = useState(false); + const [message, setMessage] = useState(''); + + return ( +
{ + e.preventDefault(); + sendMessage(message); + setMessage(''); + }} + onKeyDown={(e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + sendMessage(message); + setMessage(''); + } + }} + className="w-full" + > +
+ setMessage(e.target.value)} + minRows={2} + className="bg-transparent placeholder:text-white/50 text-sm text-white resize-none focus:outline-none w-full max-h-24 lg:max-h-36 xl:max-h-48" + placeholder="Ask anything..." + /> +
+
+ + +
+
+ + +
+
+
+
+ ); +}; + +export default EmptyChatMessageInput; diff --git a/ui/components/Layout.tsx b/ui/components/Layout.tsx new file mode 100644 index 0000000..e517e00 --- /dev/null +++ b/ui/components/Layout.tsx @@ -0,0 +1,9 @@ +const Layout = ({ children }: { children: React.ReactNode }) => { + return ( +
+
{children}
+
+ ); +}; + +export default Layout; diff --git a/ui/components/MessageActions/Copy.tsx b/ui/components/MessageActions/Copy.tsx new file mode 100644 index 0000000..b19d8d4 --- /dev/null +++ b/ui/components/MessageActions/Copy.tsx @@ -0,0 +1,29 @@ +import { Check, ClipboardList } from 'lucide-react'; +import { Message } from '../ChatWindow'; +import { useState } from 'react'; + +const Copy = ({ + message, + initialMessage, +}: { + message: Message; + initialMessage: string; +}) => { + const [copied, setCopied] = useState(false); + + return ( + + ); +}; + +export default Copy; diff --git a/ui/components/MessageActions/Rewrite.tsx b/ui/components/MessageActions/Rewrite.tsx new file mode 100644 index 0000000..3ae282d --- /dev/null +++ b/ui/components/MessageActions/Rewrite.tsx @@ -0,0 +1,20 @@ +import { ArrowLeftRight } from 'lucide-react'; + +const Rewrite = ({ + rewrite, + messageId, +}: { + rewrite: (messageId: string) => void; + messageId: string; +}) => { + return ( + + ); +}; + +export default Rewrite; diff --git a/ui/components/MessageBox.tsx b/ui/components/MessageBox.tsx new file mode 100644 index 0000000..9018166 --- /dev/null +++ b/ui/components/MessageBox.tsx @@ -0,0 +1,134 @@ +/* eslint-disable @next/next/no-img-element */ +import React, { MutableRefObject, useEffect, useState } from 'react'; +import { Message } from './ChatWindow'; +import { cn } from '@/lib/utils'; +import { + BookCopy, + Disc3, + FilePen, + PlusIcon, + Share, + ThumbsDown, + VideoIcon, +} from 'lucide-react'; +import Markdown from 'markdown-to-jsx'; +import Copy from './MessageActions/Copy'; +import Rewrite from './MessageActions/Rewrite'; +import MessageSources from './MessageSources'; +import SearchImages from './SearchImages'; + +const MessageBox = ({ + message, + messageIndex, + history, + loading, + dividerRef, + isLast, + rewrite, +}: { + message: Message; + messageIndex: number; + history: Message[]; + loading: boolean; + dividerRef?: MutableRefObject; + isLast: boolean; + rewrite: (messageId: string) => void; +}) => { + const [parsedMessage, setParsedMessage] = useState(message.content); + + useEffect(() => { + if ( + message.role === 'assistant' && + message?.sources && + message.sources.length > 0 + ) { + const regex = /\[(\d+)\]/g; + + return setParsedMessage( + message.content.replace( + regex, + (_, number) => + `${number}`, + ), + ); + } + setParsedMessage(message.content); + }, [message.content, message.sources, message.role]); + + return ( +
+ {message.role === 'user' && ( +
+

+ {message.content} +

+
+ )} + + {message.role === 'assistant' && ( +
+
+ {message.sources && message.sources.length > 0 && ( +
+
+ +

Sources

+
+ +
+ )} +
+
+ +

Answer

+
+ + {parsedMessage} + + {!loading && ( +
+
+ + +
+
+ + + +
+
+ )} +
+
+
+ +
+
+ +

Search videos

+
+ +
+
+
+ )} +
+ ); +}; + +export default MessageBox; diff --git a/ui/components/MessageBoxLoading.tsx b/ui/components/MessageBoxLoading.tsx new file mode 100644 index 0000000..e070a27 --- /dev/null +++ b/ui/components/MessageBoxLoading.tsx @@ -0,0 +1,11 @@ +const MessageBoxLoading = () => { + return ( +
+
+
+
+
+ ); +}; + +export default MessageBoxLoading; diff --git a/ui/components/MessageInput.tsx b/ui/components/MessageInput.tsx new file mode 100644 index 0000000..051afbc --- /dev/null +++ b/ui/components/MessageInput.tsx @@ -0,0 +1,89 @@ +import { cn } from '@/lib/utils'; +import { ArrowUp } from 'lucide-react'; +import { useEffect, useState } from 'react'; +import TextareaAutosize from 'react-textarea-autosize'; +import { Attach, CopilotToggle } from './MessageInputActions'; + +const MessageInput = ({ + sendMessage, +}: { + sendMessage: (message: string) => void; +}) => { + const [copilotEnabled, setCopilotEnabled] = useState(false); + const [message, setMessage] = useState(''); + const [textareaRows, setTextareaRows] = useState(1); + const [mode, setMode] = useState<'multi' | 'single'>('single'); + + useEffect(() => { + if (textareaRows >= 2 && message && mode === 'single') { + setMode('multi'); + } else if (!message && mode === 'multi') { + setMode('single'); + } + }, [textareaRows, mode, message]); + + return ( +
{ + e.preventDefault(); + sendMessage(message); + setMessage(''); + }} + onKeyDown={(e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + sendMessage(message); + setMessage(''); + } + }} + className={cn( + 'bg-[#111111] p-4 flex items-center overflow-hidden border border-[#1C1C1C]', + mode === 'multi' ? 'flex-col rounded-lg' : 'flex-row rounded-full', + )} + > + {mode === 'single' && } + setMessage(e.target.value)} + onHeightChange={(height, props) => { + setTextareaRows(Math.ceil(height / props.rowHeight)); + }} + className="transition bg-transparent placeholder:text-white/50 placeholder:text-sm text-sm text-white resize-none focus:outline-none w-full px-2 max-h-24 lg:max-h-36 xl:max-h-48 flex-grow flex-shrink" + placeholder="Ask a follow-up" + /> + {mode === 'single' && ( +
+ + +
+ )} + {mode === 'multi' && ( +
+ +
+ + +
+
+ )} + + ); +}; + +export default MessageInput; diff --git a/ui/components/MessageInputActions.tsx b/ui/components/MessageInputActions.tsx new file mode 100644 index 0000000..118f630 --- /dev/null +++ b/ui/components/MessageInputActions.tsx @@ -0,0 +1,58 @@ +import { CopyPlus, ScanEye } from 'lucide-react'; +import { cn } from '@/lib/utils'; +import { Switch } from '@headlessui/react'; + +export const Attach = () => { + return ( + + ); +}; + +export const Focus = () => { + return ( + + ); +}; + +export const CopilotToggle = ({ + copilotEnabled, + setCopilotEnabled, +}: { + copilotEnabled: boolean; + setCopilotEnabled: (enabled: boolean) => void; +}) => { + return ( +
+ + Copilot + + +

setCopilotEnabled(!copilotEnabled)} + className={cn( + 'text-xs font-medium transition-colors duration-150 ease-in-out', + copilotEnabled + ? 'text-[#24A0ED]' + : 'text-white/50 group-hover:text-white', + )} + > + Copilot +

+
+ ); +}; diff --git a/ui/components/MessageSources.tsx b/ui/components/MessageSources.tsx new file mode 100644 index 0000000..8c78f2f --- /dev/null +++ b/ui/components/MessageSources.tsx @@ -0,0 +1,136 @@ +/* eslint-disable @next/next/no-img-element */ +import { cn } from '@/lib/utils'; +import { Dialog, Transition } from '@headlessui/react'; +import { Document } from '@langchain/core/documents'; +import Link from 'next/link'; +import { Fragment, useState } from 'react'; + +const MessageSources = ({ sources }: { sources: Document[] }) => { + const [isDialogOpen, setIsDialogOpen] = useState(false); + function closeModal() { + setIsDialogOpen(false); + document.body.classList.remove('overflow-hidden-scrollable'); + } + + function openModal() { + setIsDialogOpen(true); + document.body.classList.add('overflow-hidden-scrollable'); + } + + return ( +
+ {sources.slice(0, 3).map((source, i) => ( + +

+ {source.metadata.title} +

+
+
+ favicon +

+ {source.metadata.url.replace(/.+\/\/|www.|\..+/g, '')} +

+
+
+
+ {i + 1} +
+
+
+ ))} + {sources.length > 3 && ( + + )} + + + + +
+ ); +}; + +export default MessageSources; diff --git a/ui/components/Navbar.tsx b/ui/components/Navbar.tsx new file mode 100644 index 0000000..a5444c4 --- /dev/null +++ b/ui/components/Navbar.tsx @@ -0,0 +1,29 @@ +import { Clock, Edit, Share, Trash } from 'lucide-react'; + +const Navbar = () => { + return ( +
+ +
+ +

15 minutes ago

+
+

Blog on AI

+
+ + +
+
+ ); +}; + +export default Navbar; diff --git a/ui/components/SearchImages.tsx b/ui/components/SearchImages.tsx new file mode 100644 index 0000000..022c90d --- /dev/null +++ b/ui/components/SearchImages.tsx @@ -0,0 +1,135 @@ +/* eslint-disable @next/next/no-img-element */ +import { ImagesIcon, PlusIcon } from 'lucide-react'; +import { useState } from 'react'; +import Lightbox from 'yet-another-react-lightbox'; +import 'yet-another-react-lightbox/styles.css'; + +type Image = { + url: string; + img_src: string; + title: string; +}; + +const SearchImages = ({ query }: { query: string }) => { + const [images, setImages] = useState(null); + const [loading, setLoading] = useState(false); + const [open, setOpen] = useState(false); + const [slides, setSlides] = useState([]); + + return ( + <> + {!loading && images === null && ( + + )} + {loading && ( +
+ {[...Array(4)].map((_, i) => ( +
+ ))} +
+ )} + {images !== null && images.length > 0 && ( + <> +
+ {images.length > 4 + ? images.slice(0, 3).map((image, i) => ( + { + setOpen(true); + setSlides([ + slides[i], + ...slides.slice(0, i), + ...slides.slice(i + 1), + ]); + }} + key={i} + src={image.img_src} + alt={image.title} + className="h-full w-full aspect-video object-cover rounded-lg transition duration-200 active:scale-95 cursor-pointer" + /> + )) + : images.map((image, i) => ( + { + setOpen(true); + setSlides([ + slides[i], + ...slides.slice(0, i), + ...slides.slice(i + 1), + ]); + }} + key={i} + src={image.img_src} + alt={image.title} + className="h-full w-full aspect-video object-cover rounded-lg transition duration-200 active:scale-95 cursor-pointer" + /> + ))} + {images.length > 4 && ( + + )} +
+ setOpen(false)} slides={slides} /> + + )} + + ); +}; + +export default SearchImages; diff --git a/ui/components/Sidebar.tsx b/ui/components/Sidebar.tsx new file mode 100644 index 0000000..e562160 --- /dev/null +++ b/ui/components/Sidebar.tsx @@ -0,0 +1,96 @@ +'use client'; + +import { cn } from '@/lib/utils'; +import { BookOpenText, Home, Search, SquarePen } from 'lucide-react'; +import { SiGithub } from '@icons-pack/react-simple-icons'; +import Link from 'next/link'; +import { useSelectedLayoutSegments } from 'next/navigation'; +import React from 'react'; +import Layout from './Layout'; + +const Sidebar = ({ children }: { children: React.ReactNode }) => { + const segments = useSelectedLayoutSegments(); + + const navLinks = [ + { + icon: Home, + href: '/', + active: segments.length === 0, + label: 'Home', + }, + { + icon: Search, + href: '/discover', + active: segments.includes('discover'), + label: 'Discover', + }, + { + icon: BookOpenText, + href: '/library', + active: segments.includes('library'), + label: 'Library', + }, + ]; + + return ( +
+
+
+ + + +
+ {navLinks.map((link, i) => ( + + + {link.active && ( +
+ )} + + ))} +
+ + + +
+
+ +
+ {navLinks.map((link, i) => ( + + {link.active && ( +
+ )} + +

{link.label}

+ + ))} +
+ + {children} +
+ ); +}; + +export default Sidebar; diff --git a/ui/lib/utils.ts b/ui/lib/utils.ts new file mode 100644 index 0000000..4c61678 --- /dev/null +++ b/ui/lib/utils.ts @@ -0,0 +1,4 @@ +import clsx, { ClassValue } from 'clsx'; +import { twMerge } from 'tailwind-merge'; + +export const cn = (...classes: ClassValue[]) => twMerge(clsx(...classes)); diff --git a/ui/next.config.mjs b/ui/next.config.mjs new file mode 100644 index 0000000..c3f2e1a --- /dev/null +++ b/ui/next.config.mjs @@ -0,0 +1,12 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + images: { + remotePatterns: [ + { + hostname: 's2.googleusercontent.com', + }, + ], + }, +}; + +export default nextConfig; diff --git a/ui/package.json b/ui/package.json new file mode 100644 index 0000000..bfd8e0c --- /dev/null +++ b/ui/package.json @@ -0,0 +1,42 @@ +{ + "name": "perplexica-frontend", + "version": "1.0.0", + "license": "MIT", + "author": "ItzCrazyKns", + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint", + "format:write": "prettier . --write" + }, + "dependencies": { + "@headlessui/react": "^1.7.18", + "@icons-pack/react-simple-icons": "^9.4.0", + "@langchain/openai": "^0.0.25", + "@tailwindcss/typography": "^0.5.12", + "clsx": "^2.1.0", + "langchain": "^0.1.30", + "lucide-react": "^0.363.0", + "markdown-to-jsx": "^7.4.5", + "next": "14.1.4", + "react": "^18", + "react-dom": "^18", + "react-textarea-autosize": "^8.5.3", + "tailwind-merge": "^2.2.2", + "yet-another-react-lightbox": "^3.17.2", + "zod": "^3.22.4" + }, + "devDependencies": { + "@types/node": "^20", + "@types/react": "^18", + "@types/react-dom": "^18", + "autoprefixer": "^10.0.1", + "eslint": "^8", + "eslint-config-next": "14.1.4", + "postcss": "^8", + "prettier": "^3.2.5", + "tailwindcss": "^3.3.0", + "typescript": "^5" + } +} diff --git a/ui/postcss.config.js b/ui/postcss.config.js new file mode 100644 index 0000000..12a703d --- /dev/null +++ b/ui/postcss.config.js @@ -0,0 +1,6 @@ +module.exports = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +}; diff --git a/ui/public/next.svg b/ui/public/next.svg new file mode 100644 index 0000000..5174b28 --- /dev/null +++ b/ui/public/next.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ui/public/vercel.svg b/ui/public/vercel.svg new file mode 100644 index 0000000..d2f8422 --- /dev/null +++ b/ui/public/vercel.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/ui/tailwind.config.ts b/ui/tailwind.config.ts new file mode 100644 index 0000000..05f107d --- /dev/null +++ b/ui/tailwind.config.ts @@ -0,0 +1,14 @@ +import type { Config } from 'tailwindcss'; + +const config: Config = { + content: [ + './pages/**/*.{js,ts,jsx,tsx,mdx}', + './components/**/*.{js,ts,jsx,tsx,mdx}', + './app/**/*.{js,ts,jsx,tsx,mdx}', + ], + theme: { + extend: {}, + }, + plugins: [require('@tailwindcss/typography')], +}; +export default config; diff --git a/ui/tsconfig.json b/ui/tsconfig.json new file mode 100644 index 0000000..e7ff90f --- /dev/null +++ b/ui/tsconfig.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "preserve", + "incremental": true, + "plugins": [ + { + "name": "next" + } + ], + "paths": { + "@/*": ["./*"] + } + }, + "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], + "exclude": ["node_modules"] +}