Merge remote-tracking branch 'origin/master' into ollama-auth

2024-08-02 10:17:08 +02:00 · 2024-08-02 10:17:08 +02:00 · c3dac38b6a
parent df8d924a89 c4932c659a
commit c3dac38b6a
3 changed files with 41 additions and 30 deletions
--- a/src/agents/webSearchAgent.ts
+++ b/src/agents/webSearchAgent.ts
@ -157,35 +157,43 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
          question = 'Summarize';
        }
-        let docs = []
+        let docs = [];
        const linkDocs = await getDocumentsFromLinks({ links });
        const docGroups: Document[] = [];
        linkDocs.map((doc) => {
-          const URLDocExists = docGroups.find((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10);
+          const URLDocExists = docGroups.find(
            (d) =>
              d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
          );
          if (!URLDocExists) {
            docGroups.push({
              ...doc,
              metadata: {
                ...doc.metadata,
-                totalDocs: 1
+                totalDocs: 1,
-              }
+              },
            });
          }
-          const docIndex = docGroups.findIndex((d) => d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10);
+          const docIndex = docGroups.findIndex(
            (d) =>
              d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10,
          );
          if (docIndex !== -1) {
-            docGroups[docIndex].pageContent = docGroups[docIndex].pageContent + `\n\n` +  doc.pageContent;
+            docGroups[docIndex].pageContent =
              docGroups[docIndex].pageContent + `\n\n` + doc.pageContent;
            docGroups[docIndex].metadata.totalDocs += 1;
          }
-        })
+        });
-        await Promise.all(docGroups.map(async (doc) => {
+        await Promise.all(
-          const res = await llm.invoke(`
+          docGroups.map(async (doc) => {
            const res = await llm.invoke(`
            You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block. 
            You need to summarize the text into 1 or 2 sentences capturing the main idea of the text.
            You need to make sure that you don't miss any point while summarizing the text.
@ -204,16 +212,17 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
            Make sure to answer the query in the summary.
          `);
-          const document = new Document({
+            const document = new Document({
-            pageContent: res.content as string,
+              pageContent: res.content as string,
-            metadata: {
+              metadata: {
-              title: doc.metadata.title,
+                title: doc.metadata.title,
-              url: doc.metadata.url,
+                url: doc.metadata.url,
-            },
+              },
-          })
+            });
-          docs.push(document)
+            docs.push(document);
-        }))
+          }),
        );
        return { query: question, docs: docs };
      } else {
--- a/src/app.ts
+++ b/src/app.ts
@ -30,9 +30,9 @@ server.listen(port, () => {
 startWebSocketServer(server);
 process.on('uncaughtException', (err, origin) => {
-  logger.error(`Uncaught Exception at ${origin}: ${err}`)
+  logger.error(`Uncaught Exception at ${origin}: ${err}`);
-})
+});
 process.on('unhandledRejection', (reason, promise) => {
-  logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`)
+  logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
-})
+});
--- a/src/lib/linkDocument.ts
+++ b/src/lib/linkDocument.ts
@ -1,8 +1,8 @@
 import axios from 'axios';
-import { htmlToText } from 'html-to-text'
+import { htmlToText } from 'html-to-text';
 import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import { Document } from '@langchain/core/documents';
-import pdfParse from 'pdf-parse'
+import pdfParse from 'pdf-parse';
 export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
  const splitter = new RecursiveCharacterTextSplitter();
@ -23,14 +23,14 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
      const isPdf = res.headers['content-type'] === 'application/pdf';
      if (isPdf) {
-        const pdfText = await pdfParse(res.data)
+        const pdfText = await pdfParse(res.data);
        const parsedText = pdfText.text
          .replace(/(\r\n|\n|\r)/gm, ' ')
          .replace(/\s+/g, ' ')
          .trim();
        const splittedText = await splitter.splitText(parsedText);
-        const title = 'PDF Document'
+        const title = 'PDF Document';
        const linkDocs = splittedText.map((text) => {
          return new Document({
@ -52,16 +52,18 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
            selector: 'a',
            options: {
              ignoreHref: true,
-            }
+            },
          },
-        ]
+        ],
      })
        .replace(/(\r\n|\n|\r)/gm, ' ')
        .replace(/\s+/g, ' ')
        .trim();
      const splittedText = await splitter.splitText(parsedText);
-      const title = res.data.toString('utf8').match(/<title>(.*?)<\/title>/)?.[1];
+      const title = res.data
        .toString('utf8')
        .match(/<title>(.*?)<\/title>/)?.[1];
      const linkDocs = splittedText.map((text) => {
        return new Document({