Revert "Merge remote-tracking branch 'origin/master' into ollama-auth"

This reverts commit 4447cf7b5f, reversing
changes made to fb6ec2fc8a.
This commit is contained in:
projectmoon 2024-09-14 13:24:02 +02:00
parent 4447cf7b5f
commit 4876a36369
11 changed files with 2420 additions and 128 deletions

View File

@ -1,6 +1,6 @@
{ {
"name": "perplexica-backend", "name": "perplexica-backend",
"version": "1.9.0-rc2", "version": "1.9.0-rc1",
"license": "MIT", "license": "MIT",
"author": "ItzCrazyKns", "author": "ItzCrazyKns",
"scripts": { "scripts": {

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,6 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events'; import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity'; import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger'; import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicAcademicSearchRetrieverPrompt = ` const basicAcademicSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -67,7 +66,7 @@ const basicAcademicSearchResponsePrompt = `
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const handleStream = async ( const handleStream = async (
stream: IterableReadableStream<StreamEvent>, stream: AsyncGenerator<StreamEvent, any, unknown>,
emitter: eventEmitter, emitter: eventEmitter,
) => { ) => {
for await (const event of stream) { for await (const event of stream) {

View File

@ -19,7 +19,6 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events'; import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity'; import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger'; import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicRedditSearchRetrieverPrompt = ` const basicRedditSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -67,7 +66,7 @@ const basicRedditSearchResponsePrompt = `
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const handleStream = async ( const handleStream = async (
stream: IterableReadableStream<StreamEvent>, stream: AsyncGenerator<StreamEvent, any, unknown>,
emitter: eventEmitter, emitter: eventEmitter,
) => { ) => {
for await (const event of stream) { for await (const event of stream) {

View File

@ -22,38 +22,22 @@ import logger from '../utils/logger';
import LineListOutputParser from '../lib/outputParsers/listLineOutputParser'; import LineListOutputParser from '../lib/outputParsers/listLineOutputParser';
import { getDocumentsFromLinks } from '../lib/linkDocument'; import { getDocumentsFromLinks } from '../lib/linkDocument';
import LineOutputParser from '../lib/outputParsers/lineOutputParser'; import LineOutputParser from '../lib/outputParsers/lineOutputParser';
import { IterableReadableStream } from '@langchain/core/utils/stream';
import { ChatOpenAI } from '@langchain/openai';
const basicSearchRetrieverPrompt = ` const basicSearchRetrieverPrompt = `
You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
If it is a smple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response.
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. If the question contains some links and asks to answer from those links or even if they don't you need to return the links inside 'links' XML block and the question inside 'question' XML block. If there are no links then you need to return the question without any XML block.
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. If the user asks to summarrize the content from some links you need to return \`Summarize\` as the question inside the 'question' XML block and the links inside the 'links' XML block.
There are several examples attached for your reference inside the below \`examples\` XML block Example:
1. Follow up question: What is the capital of France?
Rephrased question: \`Capital of france\`
<examples> 2. Follow up question: What is the population of New York City?
1. Follow up question: What is the capital of France Rephrased question: \`Population of New York City\`
Rephrased question:\`
<question>
Capital of france
</question>
\`
2. Hi, how are you?
Rephrased question\`
<question>
not_needed
</question>
\`
3. Follow up question: What is Docker? 3. Follow up question: What is Docker?
Rephrased question: \` Rephrased question: \`What is Docker\`
<question>
What is Docker
</question>
\`
4. Follow up question: Can you tell me what is X from https://example.com 4. Follow up question: Can you tell me what is X from https://example.com
Rephrased question: \` Rephrased question: \`
@ -69,20 +53,16 @@ https://example.com
5. Follow up question: Summarize the content from https://example.com 5. Follow up question: Summarize the content from https://example.com
Rephrased question: \` Rephrased question: \`
<question> <question>
summarize Summarize
</question> </question>
<links> <links>
https://example.com https://example.com
</links> </links>
\` \`
</examples>
Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. Conversation:
<conversation>
{chat_history} {chat_history}
</conversation>
Follow up question: {query} Follow up question: {query}
Rephrased question: Rephrased question:
@ -115,7 +95,7 @@ const basicWebSearchResponsePrompt = `
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const handleStream = async ( const handleStream = async (
stream: IterableReadableStream<StreamEvent>, stream: AsyncGenerator<StreamEvent, any, unknown>,
emitter: eventEmitter, emitter: eventEmitter,
) => { ) => {
for await (const event of stream) { for await (const event of stream) {
@ -152,13 +132,15 @@ type BasicChainInput = {
}; };
const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
(llm as unknown as ChatOpenAI).temperature = 0;
return RunnableSequence.from([ return RunnableSequence.from([
PromptTemplate.fromTemplate(basicSearchRetrieverPrompt), PromptTemplate.fromTemplate(basicSearchRetrieverPrompt),
llm, llm,
strParser, strParser,
RunnableLambda.from(async (input: string) => { RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
}
const linksOutputParser = new LineListOutputParser({ const linksOutputParser = new LineListOutputParser({
key: 'links', key: 'links',
}); });
@ -170,13 +152,9 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
const links = await linksOutputParser.parse(input); const links = await linksOutputParser.parse(input);
let question = await questionOutputParser.parse(input); let question = await questionOutputParser.parse(input);
if (question === 'not_needed') {
return { query: '', docs: [] };
}
if (links.length > 0) { if (links.length > 0) {
if (question.length === 0) { if (question.length === 0) {
question = 'summarize'; question = 'Summarize';
} }
let docs = []; let docs = [];
@ -248,7 +226,7 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
return { query: question, docs: docs }; return { query: question, docs: docs };
} else { } else {
const res = await searchSearxng(question, { const res = await searchSearxng(input, {
language: 'en', language: 'en',
}); });
@ -264,7 +242,7 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
}), }),
); );
return { query: question, docs: documents }; return { query: input, docs: documents };
} }
}), }),
]); ]);
@ -293,7 +271,7 @@ const createBasicWebSearchAnsweringChain = (
return docs; return docs;
} }
if (query.toLocaleLowerCase() === 'summarize') { if (query === 'Summarize') {
return docs; return docs;
} }
@ -316,7 +294,7 @@ const createBasicWebSearchAnsweringChain = (
}); });
const sortedDocs = similarity const sortedDocs = similarity
.filter((sim) => sim.similarity > 0.3) .filter((sim) => sim.similarity > 0.5)
.sort((a, b) => b.similarity - a.similarity) .sort((a, b) => b.similarity - a.similarity)
.slice(0, 15) .slice(0, 15)
.map((sim) => docsWithContent[sim.index]); .map((sim) => docsWithContent[sim.index]);

View File

@ -18,7 +18,6 @@ import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory'; import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events'; import eventEmitter from 'events';
import logger from '../utils/logger'; import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicWolframAlphaSearchRetrieverPrompt = ` const basicWolframAlphaSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -66,7 +65,7 @@ const basicWolframAlphaSearchResponsePrompt = `
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const handleStream = async ( const handleStream = async (
stream: IterableReadableStream<StreamEvent>, stream: AsyncGenerator<StreamEvent, any, unknown>,
emitter: eventEmitter, emitter: eventEmitter,
) => { ) => {
for await (const event of stream) { for await (const event of stream) {

View File

@ -10,7 +10,6 @@ import eventEmitter from 'events';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings'; import type { Embeddings } from '@langchain/core/embeddings';
import logger from '../utils/logger'; import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const writingAssistantPrompt = ` const writingAssistantPrompt = `
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query. You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
@ -20,7 +19,7 @@ Since you are a writing assistant, you would not perform web searches. If you th
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const handleStream = async ( const handleStream = async (
stream: IterableReadableStream<StreamEvent>, stream: AsyncGenerator<StreamEvent, any, unknown>,
emitter: eventEmitter, emitter: eventEmitter,
) => { ) => {
for await (const event of stream) { for await (const event of stream) {

View File

@ -19,7 +19,6 @@ import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events'; import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity'; import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger'; import logger from '../utils/logger';
import { IterableReadableStream } from '@langchain/core/utils/stream';
const basicYoutubeSearchRetrieverPrompt = ` const basicYoutubeSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -67,7 +66,7 @@ const basicYoutubeSearchResponsePrompt = `
const strParser = new StringOutputParser(); const strParser = new StringOutputParser();
const handleStream = async ( const handleStream = async (
stream: IterableReadableStream<StreamEvent>, stream: AsyncGenerator<StreamEvent, any, unknown>,
emitter: eventEmitter, emitter: eventEmitter,
) => { ) => {
for await (const event of stream) { for await (const event of stream) {

View File

@ -3,7 +3,6 @@ import { htmlToText } from 'html-to-text';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { Document } from '@langchain/core/documents'; import { Document } from '@langchain/core/documents';
import pdfParse from 'pdf-parse'; import pdfParse from 'pdf-parse';
import logger from '../utils/logger';
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const splitter = new RecursiveCharacterTextSplitter(); const splitter = new RecursiveCharacterTextSplitter();
@ -17,81 +16,66 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
? link ? link
: `https://${link}`; : `https://${link}`;
try { const res = await axios.get(link, {
const res = await axios.get(link, { responseType: 'arraybuffer',
responseType: 'arraybuffer', });
});
const isPdf = res.headers['content-type'] === 'application/pdf'; const isPdf = res.headers['content-type'] === 'application/pdf';
if (isPdf) { if (isPdf) {
const pdfText = await pdfParse(res.data); const pdfText = await pdfParse(res.data);
const parsedText = pdfText.text const parsedText = pdfText.text
.replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ')
.trim();
const splittedText = await splitter.splitText(parsedText);
const title = 'PDF Document';
const linkDocs = splittedText.map((text) => {
return new Document({
pageContent: text,
metadata: {
title: title,
url: link,
},
});
});
docs.push(...linkDocs);
return;
}
const parsedText = htmlToText(res.data.toString('utf8'), {
selectors: [
{
selector: 'a',
options: {
ignoreHref: true,
},
},
],
})
.replace(/(\r\n|\n|\r)/gm, ' ') .replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
.trim(); .trim();
const splittedText = await splitter.splitText(parsedText); const splittedText = await splitter.splitText(parsedText);
const title = res.data const title = 'PDF Document';
.toString('utf8')
.match(/<title>(.*?)<\/title>/)?.[1];
const linkDocs = splittedText.map((text) => { const linkDocs = splittedText.map((text) => {
return new Document({ return new Document({
pageContent: text, pageContent: text,
metadata: { metadata: {
title: title || link, title: title,
url: link, url: link,
}, },
}); });
}); });
docs.push(...linkDocs); docs.push(...linkDocs);
} catch (err) { return;
logger.error(
`Error at generating documents from links: ${err.message}`,
);
docs.push(
new Document({
pageContent: `Failed to retrieve content from the link: ${err.message}`,
metadata: {
title: 'Failed to retrieve content',
url: link,
},
}),
);
} }
const parsedText = htmlToText(res.data.toString('utf8'), {
selectors: [
{
selector: 'a',
options: {
ignoreHref: true,
},
},
],
})
.replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ')
.trim();
const splittedText = await splitter.splitText(parsedText);
const title = res.data
.toString('utf8')
.match(/<title>(.*?)<\/title>/)?.[1];
const linkDocs = splittedText.map((text) => {
return new Document({
pageContent: text,
metadata: {
title: title || link,
url: link,
},
});
});
docs.push(...linkDocs);
}), }),
); );

View File

@ -83,9 +83,7 @@ const useSocket = (
chatModelProvider = Object.keys(chatModelProviders)[0]; chatModelProvider = Object.keys(chatModelProviders)[0];
if (chatModelProvider === 'custom_openai') { if (chatModelProvider === 'custom_openai') {
toast.error( toast.error('Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL');
'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL',
);
setError(true); setError(true);
return; return;
} else { } else {
@ -222,7 +220,7 @@ const useSocket = (
if (data.type === 'error') { if (data.type === 'error') {
toast.error(data.data); toast.error(data.data);
} }
}); })
setWs(ws); setWs(ws);
}; };
@ -237,6 +235,13 @@ const useSocket = (
setError(true); setError(true);
} }
} }
return () => {
if (ws?.readyState === 1) {
ws?.close();
console.log('[DEBUG] closed');
}
};
}, [ws, url, setIsWSReady, setError]); }, [ws, url, setIsWSReady, setError]);
return ws; return ws;
@ -343,15 +348,6 @@ const ChatWindow = ({ id }: { id?: string }) => {
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, []); }, []);
useEffect(() => {
return () => {
if (ws?.readyState === 1) {
ws.close();
console.log('[DEBUG] closed');
}
};
}, []);
const messagesRef = useRef<Message[]>([]); const messagesRef = useRef<Message[]>([]);
useEffect(() => { useEffect(() => {

View File

@ -1,6 +1,6 @@
{ {
"name": "perplexica-frontend", "name": "perplexica-frontend",
"version": "1.9.0-rc2", "version": "1.9.0-rc1",
"license": "MIT", "license": "MIT",
"author": "ItzCrazyKns", "author": "ItzCrazyKns",
"scripts": { "scripts": {