Compare commits

..

2 Commits

Author SHA1 Message Date
projectmoon c3dac38b6a Merge remote-tracking branch 'origin/master' into ollama-auth 2024-08-02 10:17:08 +02:00
ItzCrazyKns c4932c659a
feat(app): lint 2024-07-31 20:17:57 +05:30
3 changed files with 41 additions and 30 deletions

View File

@ -157,34 +157,42 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
question = 'Summarize'; question = 'Summarize';
} }
let docs = [] let docs = [];
const linkDocs = await getDocumentsFromLinks({ links }); const linkDocs = await getDocumentsFromLinks({ links });
const docGroups: Document[] = []; const docGroups: Document[] = [];
linkDocs.map((doc) => { linkDocs.map((doc) => {
const URLDocExists = docGroups.find((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10); const URLDocExists = docGroups.find(
(d) =>
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
);
if (!URLDocExists) { if (!URLDocExists) {
docGroups.push({ docGroups.push({
...doc, ...doc,
metadata: { metadata: {
...doc.metadata, ...doc.metadata,
totalDocs: 1 totalDocs: 1,
} },
}); });
} }
const docIndex = docGroups.findIndex((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10); const docIndex = docGroups.findIndex(
(d) =>
d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
);
if (docIndex !== -1) { if (docIndex !== -1) {
docGroups[docIndex].pageContent = docGroups[docIndex].pageContent + `\n\n` + doc.pageContent; docGroups[docIndex].pageContent =
docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
docGroups[docIndex].metadata.totalDocs += 1; docGroups[docIndex].metadata.totalDocs += 1;
} }
}) });
await Promise.all(docGroups.map(async (doc) => { await Promise.all(
docGroups.map(async (doc) => {
const res = await llm.invoke(` const res = await llm.invoke(`
You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block. You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block.
You need to summarize the text into 1 or 2 sentences capturing the main idea of the text. You need to summarize the text into 1 or 2 sentences capturing the main idea of the text.
@ -210,10 +218,11 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
title: doc.metadata.title, title: doc.metadata.title,
url: doc.metadata.url, url: doc.metadata.url,
}, },
}) });
docs.push(document) docs.push(document);
})) }),
);
return { query: question, docs: docs }; return { query: question, docs: docs };
} else { } else {

View File

@ -30,9 +30,9 @@ server.listen(port, () => {
startWebSocketServer(server); startWebSocketServer(server);
process.on('uncaughtException', (err, origin) => { process.on('uncaughtException', (err, origin) => {
logger.error(`Uncaught Exception at ${origin}: ${err}`) logger.error(`Uncaught Exception at ${origin}: ${err}`);
}) });
process.on('unhandledRejection', (reason, promise) => { process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`) logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
}) });

View File

@ -1,8 +1,8 @@
import axios from 'axios'; import axios from 'axios';
import { htmlToText } from 'html-to-text' import { htmlToText } from 'html-to-text';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { Document } from '@langchain/core/documents'; import { Document } from '@langchain/core/documents';
import pdfParse from 'pdf-parse' import pdfParse from 'pdf-parse';
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const splitter = new RecursiveCharacterTextSplitter(); const splitter = new RecursiveCharacterTextSplitter();
@ -23,14 +23,14 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const isPdf = res.headers['content-type'] === 'application/pdf'; const isPdf = res.headers['content-type'] === 'application/pdf';
if (isPdf) { if (isPdf) {
const pdfText = await pdfParse(res.data) const pdfText = await pdfParse(res.data);
const parsedText = pdfText.text const parsedText = pdfText.text
.replace(/(\r\n|\n|\r)/gm, ' ') .replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
.trim(); .trim();
const splittedText = await splitter.splitText(parsedText); const splittedText = await splitter.splitText(parsedText);
const title = 'PDF Document' const title = 'PDF Document';
const linkDocs = splittedText.map((text) => { const linkDocs = splittedText.map((text) => {
return new Document({ return new Document({
@ -52,16 +52,18 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
selector: 'a', selector: 'a',
options: { options: {
ignoreHref: true, ignoreHref: true,
}
}, },
] },
],
}) })
.replace(/(\r\n|\n|\r)/gm, ' ') .replace(/(\r\n|\n|\r)/gm, ' ')
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
.trim(); .trim();
const splittedText = await splitter.splitText(parsedText); const splittedText = await splitter.splitText(parsedText);
const title = res.data.toString('utf8').match(/<title>(.*?)<\/title>/)?.[1]; const title = res.data
.toString('utf8')
.match(/<title>(.*?)<\/title>/)?.[1];
const linkDocs = splittedText.map((text) => { const linkDocs = splittedText.map((text) => {
return new Document({ return new Document({