diff --git a/pages/api/sources.ts b/pages/api/sources.ts index 7b4c14e..dca01bd 100644 --- a/pages/api/sources.ts +++ b/pages/api/sources.ts @@ -19,7 +19,8 @@ const searchHandler = async (req: NextApiRequest, res: NextApiResponse) => const sourceCount = 4; // GET LINKS - const response = await fetch(`https://www.google.com/search?q=${query}`); + const encodedQuery = encodeURIComponent(query); + const response = await fetch(`https://www.google.com/search?q=${encodedQuery}`); const html = await response.text(); const $ = cheerio.load(html); const linkTags = $("a"); @@ -38,13 +39,25 @@ const searchHandler = async (req: NextApiRequest, res: NextApiResponse) => } }); - const filteredLinks = links.filter((link, idx) => { - const domain = new URL(link).hostname; - - const excludeList = ["google", "facebook", "twitter", "instagram", "youtube", "tiktok"]; - if (excludeList.some((site) => domain.includes(site))) return false; - - return links.findIndex((link) => new URL(link).hostname === domain) === idx; + const filteredLinks = links.filter((link) => { + try { + const url = new URL(link); + const domain = url.hostname; + + const excludeList = ["google", "facebook", "twitter", "instagram", "youtube", "tiktok"]; + if (excludeList.some((site) => domain.includes(site))) return false; + + return links.findIndex((l) => { + try { + return new URL(l).hostname === domain; + } catch { + return false; + } + }) === links.indexOf(link); + } catch (error) { + console.error("Invalid URL:", link); + return false; + } }); const finalLinks = filteredLinks.slice(0, sourceCount); @@ -52,29 +65,32 @@ const searchHandler = async (req: NextApiRequest, res: NextApiResponse) => // SCRAPE TEXT FROM LINKS const sources = (await Promise.all( finalLinks.map(async (link) => { - const response = await fetch(link); - const html = await response.text(); - const dom = new JSDOM(html); - const doc = dom.window.document; - const parsed = new Readability(doc).parse(); - - if (parsed) { - let sourceText = cleanSourceText(parsed.textContent); - - return { url: link, text: sourceText }; + try { + const response = await fetch(link); + const html = await response.text(); + const dom = new JSDOM(html); + const doc = dom.window.document; + const parsed = new Readability(doc).parse(); + + if (parsed) { + let sourceText = cleanSourceText(parsed.textContent); + + return { url: link, text: sourceText }; + } + } catch (error) { + console.error("Error scraping:", link, error); } }) - )) as Source[]; - - const filteredSources = sources.filter((source) => source !== undefined); + )).filter((source): source is Source => source !== undefined); - for (const source of filteredSources) { - source.text = source.text.slice(0, 1500); - } + const filteredSources = sources.map(source => ({ + ...source, + text: source.text.slice(0, 1500) + })); res.status(200).json({ sources: filteredSources }); } catch (err) { - console.log(err); + console.error(err); res.status(500).json({ sources: [] }); } };