feat: (re)add favicon scrapper

This commit is contained in:
Sonny
2024-05-10 02:06:51 +02:00
committed by Sonny
parent 73f8c0c513
commit 817b9baafc
6 changed files with 332 additions and 5 deletions

View File

@@ -0,0 +1,203 @@
import type { HttpContext } from '@adonisjs/core/http';
import logger from '@adonisjs/core/services/logger';
import { parse } from 'node-html-parser';
import { createReadStream } from 'node:fs';
import { resolve } from 'node:path';
interface Favicon {
buffer: Buffer;
url: string;
type: string;
size: number;
}
// TODO: refactor this controller (adapted from the previous version of MyLinks)
export default class FaviconsController {
private userAgent =
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0';
private relList = [
'icon',
'shortcut icon',
'apple-touch-icon',
'apple-touch-icon-precomposed',
'apple-touch-startup-image',
'mask-icon',
'fluid-icon',
];
async index(ctx: HttpContext) {
console.log('0');
const url = ctx.request.qs()?.url;
if (!url) {
throw new Error('Missing URL');
}
console.log('1');
const faviconRequestUrl = this.buildFaviconUrl(url, '/favicon.ico');
try {
const favicon = await this.getFavicon(faviconRequestUrl);
return this.sendImage(ctx, favicon);
} catch (error) {
logger.info(
`[Favicon] [first: ${faviconRequestUrl}] Unable to retrieve favicon from favicon.ico url`
);
}
console.log('2');
const requestDocument = await this.makeRequestWithUserAgent(url);
const documentAsText = await requestDocument.text();
const faviconPath = this.findFaviconPath(documentAsText);
if (!faviconPath) {
console.error(
'[Favicon]',
`[first: ${faviconRequestUrl}]`,
'No link/href attribute found'
);
return this.sendDefaultImage(ctx);
}
console.log('3');
const finalUrl = this.buildFaviconUrl(requestDocument.url, faviconPath);
try {
if (!faviconPath) {
throw new Error('Unable to find favicon path');
}
if (this.isBase64Image(faviconPath)) {
console.log(
'[Favicon]',
`[second: ${faviconRequestUrl}]`,
'info: base64, convert it to buffer'
);
const buffer = this.convertBase64ToBuffer(faviconPath);
return this.sendImage(ctx, {
buffer,
type: 'image/x-icon',
size: buffer.length,
url: faviconPath,
});
}
// eslint-disable-next-line @typescript-eslint/no-shadow
const finalUrl = faviconPath.startsWith('http')
? faviconPath
: this.buildFaviconUrl(requestDocument.url, faviconPath);
const favicon = await this.downloadImageFromUrl(finalUrl);
if (!this.isImage(favicon.type)) {
throw new Error('Favicon path does not return an image');
}
console.log('[Favicon]', `[second: ${finalUrl}]`, 'success: image found');
return this.sendImage(ctx, favicon);
} catch (error) {
const errorMessage = error?.message || 'Unable to retrieve favicon';
console.log('[Favicon]', `[second: ${finalUrl}], error:`, errorMessage);
return this.sendDefaultImage(ctx);
}
}
private buildFaviconUrl(url: string, faviconPath: string) {
const { origin } = new URL(url);
if (faviconPath.startsWith('/')) {
// https://example.com + /favicon.ico
return origin + faviconPath;
}
// https://example.com/a/b?c=d -> https://example.com/a/b
const slimUrl = this.urlWithoutSearchParams(url);
// https://example.com/a/b/ -> https://example.com/a/b
const newUrl = slimUrl.endsWith('/') ? slimUrl.slice(0, -1) : slimUrl;
if (newUrl === origin) {
return `${newUrl}/${faviconPath}`;
}
// https://example.com/a/b or https://example.com/a/b/cdef -> https://example.com/a/
const relativeUrl = this.removeLastSectionUrl(newUrl) + '/';
if (relativeUrl.endsWith('/')) {
return relativeUrl + faviconPath;
}
// https://example.com/a -> https://example.com/a/favicon.ico
return `${relativeUrl}/${faviconPath}`;
}
private urlWithoutSearchParams(url: string) {
const newUrl = new URL(url);
return newUrl.protocol + '//' + newUrl.host + newUrl.pathname;
}
private removeLastSectionUrl(url: string) {
const urlArr = url.split('/');
urlArr.pop();
return urlArr.join('/');
}
private findFaviconPath(text: string) {
const document = parse(text);
const favicon = Array.from(document.getElementsByTagName('link')).find(
(element) =>
element &&
this.relList.includes(element.getAttribute('rel')!) &&
element.getAttribute('href')
);
return favicon?.getAttribute('href') || undefined;
}
private async getFavicon(url: string): Promise<Favicon> {
if (!url) throw new Error('Missing URL');
const favicon = await this.downloadImageFromUrl(url);
if (!this.isImage(favicon.type) || favicon.size === 0) {
throw new Error('Favicon path does not return an image');
}
return favicon;
}
private async makeRequestWithUserAgent(url: string) {
const headers = new Headers();
headers.set('User-Agent', this.userAgent);
return await fetch(url, { headers });
}
private async downloadImageFromUrl(url: string): Promise<Favicon> {
const request = await this.makeRequestWithUserAgent(url);
if (!request.ok) {
throw new Error('Request failed');
}
const blob = await request.blob();
return {
buffer: Buffer.from(await blob.arrayBuffer()),
url: request.url,
type: blob.type,
size: blob.size,
};
}
private isImage = (type: string) => type.includes('image');
private isBase64Image = (data: string) => data.startsWith('data:image/');
private convertBase64ToBuffer = (base64: string) =>
Buffer.from(base64, 'base64');
private sendImage(ctx: HttpContext, { buffer, type, size }: Favicon) {
console.log('ouiiiiiiii', type, size);
ctx.response.header('Content-Type', type);
ctx.response.header('Content-Length', size);
ctx.response.send(buffer);
}
private sendDefaultImage(ctx: HttpContext) {
console.log('oui');
const readStream = createReadStream(
resolve(process.cwd(), './public/empty-image.png')
);
ctx.response.writeHead(206);
ctx.response.stream(readStream);
}
}

View File

@@ -1,5 +1,5 @@
import styled from '@emotion/styled';
import { useEffect, useState } from 'react';
import { useEffect, useRef, useState } from 'react';
import { TbLoader3 } from 'react-icons/tb';
import { TfiWorld } from 'react-icons/tfi';
import { rotate } from '~/styles/keyframes';
@@ -36,6 +36,8 @@ export default function LinkFavicon({
size = 32,
noMargin = false,
}: LinkFaviconProps) {
const imgRef = useRef<HTMLImageElement>(null);
const [isFailed, setFailed] = useState<boolean>(false);
const [isLoading, setLoading] = useState<boolean>(true);
@@ -48,7 +50,11 @@ export default function LinkFavicon({
};
useEffect(() => {
if (!isLoading) return;
// Ugly hack, onLoad cb not triggered on first load when SSR
if (imgRef.current?.complete) {
handleStopLoading();
return;
}
const id = setTimeout(() => handleErrorLoading(), IMG_LOAD_TIMEOUT);
return () => clearTimeout(id);
}, [isLoading]);
@@ -57,12 +63,14 @@ export default function LinkFavicon({
<Favicon style={{ marginRight: !noMargin ? '1em' : '0' }}>
{!isFailed ? (
<img
src={`/favicon?urlParam=${url}`}
src={`/favicon?url=${url}`}
onError={handleErrorLoading}
onLoad={handleStopLoading}
height={size}
width={size}
alt="icon"
ref={imgRef}
decoding="async"
/>
) : (
<TfiWorld size={size} />

View File

@@ -36,7 +36,6 @@ export default function HomePage(props: Readonly<HomePageProps>) {
trackMouse: true,
onSwipedRight: open,
});
console.log(props.collections);
useEffect(() => {
if (!isMobile && isShowing) {

114
package-lock.json generated
View File

@@ -26,6 +26,7 @@
"edge.js": "^6.0.2",
"i18next": "^23.11.3",
"luxon": "^3.4.4",
"node-html-parser": "^6.1.13",
"pg": "^8.11.5",
"react": "^18.3.1",
"react-dnd": "^16.0.1",
@@ -3726,6 +3727,11 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="
},
"node_modules/brace-expansion": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
@@ -4474,6 +4480,32 @@
"node": ">= 0.8"
}
},
"node_modules/css-select": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.1.0",
"domhandler": "^5.0.2",
"domutils": "^3.0.1",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/css-what": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/csstype": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
@@ -4788,6 +4820,57 @@
"node": ">=6.0.0"
}
},
"node_modules/dom-serializer": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
"dependencies": {
"domelementtype": "^2.3.0",
"domhandler": "^5.0.2",
"entities": "^4.2.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/domelementtype": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
]
},
"node_modules/domhandler": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
"dependencies": {
"domelementtype": "^2.3.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/domutils": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
"dependencies": {
"dom-serializer": "^2.0.0",
"domelementtype": "^2.3.0",
"domhandler": "^5.0.3"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/dotenv": {
"version": "16.4.5",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
@@ -4911,6 +4994,17 @@
"node": ">=8.6"
}
},
"node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/error-ex": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz",
@@ -7848,6 +7942,15 @@
"node": ">= 0.6"
}
},
"node_modules/node-html-parser": {
"version": "6.1.13",
"resolved": "https://registry.npmjs.org/node-html-parser/-/node-html-parser-6.1.13.tgz",
"integrity": "sha512-qIsTMOY4C/dAa5Q5vsobRpOOvPfC4pB61UVW2uSwZNUp0QU/jCekTal1vMmbO0DgdHeLUJpv/ARmDqErVxA3Sg==",
"dependencies": {
"css-select": "^5.1.0",
"he": "1.2.0"
}
},
"node_modules/node-releases": {
"version": "2.0.14",
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz",
@@ -7921,6 +8024,17 @@
"resolved": "https://registry.npmjs.org/nprogress/-/nprogress-0.2.0.tgz",
"integrity": "sha512-I19aIingLgR1fmhftnbWWO3dXc0hSxqHQHQb3H8m+K3TnEn/iSeTZZOyvKXWqQESMwuUVnatlCnZdLBZZt2VSA=="
},
"node_modules/nth-check": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
"dependencies": {
"boolbase": "^1.0.0"
},
"funding": {
"url": "https://github.com/fb55/nth-check?sponsor=1"
}
},
"node_modules/object-inspect": {
"version": "1.13.1",
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",

View File

@@ -77,6 +77,7 @@
"edge.js": "^6.0.2",
"i18next": "^23.11.3",
"luxon": "^3.4.4",
"node-html-parser": "^6.1.13",
"pg": "^8.11.5",
"react": "^18.3.1",
"react-dnd": "^16.0.1",
@@ -111,4 +112,4 @@
"lint-staged": {
"*.js,*.ts,*.jsx,*.tsx": "eslint --cache --fix"
}
}
}

View File

@@ -7,11 +7,13 @@ const CollectionsController = () =>
import('#controllers/collections_controller');
const UsersController = () => import('#controllers/users_controller');
const AppsController = () => import('#controllers/apps_controller');
const FaviconsController = () => import('#controllers/favicons_controller');
router.get(PATHS.HOME, [AppsController, 'index']);
router.get(PATHS.AUTH.LOGIN, [UsersController, 'login']);
router.get(PATHS.AUTH.GOOGLE, [UsersController, 'google']);
router.get('/auth/callback', [UsersController, 'callbackAuth']);
router.get('/favicon', [FaviconsController, 'index']);
router
.group(() => {