refactor: optimize favicon fetcher with cache

This commit is contained in:
Sonny
2024-05-26 00:19:47 +02:00
committed by Sonny
parent 9481b0ad7d
commit 8437f6b96d
6 changed files with 288 additions and 222 deletions

View File

@@ -1,10 +1,8 @@
import FaviconNotFoundException from '#exceptions/favicon_not_found_exception';
import type { HttpContext } from '@adonisjs/core/http';
import logger from '@adonisjs/core/services/logger';
import { parse } from 'node-html-parser';
import { createReadStream } from 'node:fs';
import { resolve } from 'node:path';
const LOG_PREFIX = '[Favicon]';
import { cache } from '../lib/cache.js';
interface Favicon {
buffer: Buffer;
@@ -33,165 +31,123 @@ export default class FaviconsController {
throw new Error('Missing URL');
}
const faviconRequestUrl = this.buildFaviconUrl(url, '/favicon.ico');
const cacheNs = cache.namespace('favicon');
const favicon = await cacheNs.getOrSet({
key: url,
ttl: '1h',
factory: () => this.tryGetFavicon(url),
});
return this.sendImage(ctx, favicon);
}
private async tryGetFavicon(url: string): Promise<Favicon> {
const faviconUrl = this.buildFaviconUrl(url, '/favicon.ico');
try {
const favicon = await this.getFavicon(faviconRequestUrl);
return this.sendImage(ctx, favicon);
} catch (error) {
logger.debug(
`${LOG_PREFIX} [first: ${faviconRequestUrl}] unable to retrieve favicon from favicon.ico url`
);
return await this.fetchFavicon(faviconUrl);
} catch {
logger.debug(`Unable to retrieve favicon from ${faviconUrl}`);
}
const requestDocument = await this.makeRequestWithUserAgent(url);
const documentAsText = await requestDocument.text();
const documentText = await this.fetchDocumentText(url);
const faviconPath = this.extractFaviconPath(documentText);
const faviconPath = this.findFaviconPath(documentAsText);
if (!faviconPath) {
logger.debug(
`${LOG_PREFIX} [first: ${faviconRequestUrl}] no link/href attribute found`
);
return this.sendDefaultImage(ctx);
throw new FaviconNotFoundException(`No favicon path found in ${url}`);
}
const finalUrl = this.buildFaviconUrl(requestDocument.url, faviconPath);
try {
if (!faviconPath) {
throw new Error('Unable to find favicon path');
}
if (this.isBase64Image(faviconPath)) {
logger.debug(
`${LOG_PREFIX} [second: ${faviconRequestUrl}] info: base64, convert it to buffer`
);
const buffer = this.convertBase64ToBuffer(faviconPath);
return this.sendImage(ctx, {
buffer,
type: 'image/x-icon',
size: buffer.length,
url: faviconPath,
});
}
// eslint-disable-next-line @typescript-eslint/no-shadow
const finalUrl = faviconPath.startsWith('http')
? faviconPath
: this.buildFaviconUrl(requestDocument.url, faviconPath);
const favicon = await this.downloadImageFromUrl(finalUrl);
if (!this.isImage(favicon.type)) {
throw new Error('Favicon path does not return an image');
}
logger.debug(`${LOG_PREFIX} [second: ${finalUrl}] success: image found`);
return this.sendImage(ctx, favicon);
} catch (error) {
const errorMessage = error?.message || 'Unable to retrieve favicon';
logger.debug(`${LOG_PREFIX} [second: ${finalUrl}] error`, {
errorMessage,
});
return this.sendDefaultImage(ctx);
}
return this.fetchFaviconFromPath(url, faviconPath);
}
private buildFaviconUrl(url: string, faviconPath: string) {
const { origin } = new URL(url);
if (faviconPath.startsWith('/')) {
// https://example.com + /favicon.ico
return origin + faviconPath;
}
// https://example.com/a/b?c=d -> https://example.com/a/b
const slimUrl = this.urlWithoutSearchParams(url);
// https://example.com/a/b/ -> https://example.com/a/b
const newUrl = slimUrl.endsWith('/') ? slimUrl.slice(0, -1) : slimUrl;
if (newUrl === origin) {
return `${newUrl}/${faviconPath}`;
private async fetchFavicon(url: string): Promise<Favicon> {
const response = await this.fetchWithUserAgent(url);
if (!response.ok) {
throw new FaviconNotFoundException(`Request to ${url} failed`);
}
// https://example.com/a/b or https://example.com/a/b/cdef -> https://example.com/a/
const relativeUrl = this.removeLastSectionUrl(newUrl) + '/';
if (relativeUrl.endsWith('/')) {
return relativeUrl + faviconPath;
const blob = await response.blob();
if (!this.isImage(blob.type) || blob.size === 0) {
throw new FaviconNotFoundException(`Invalid image at ${url}`);
}
// https://example.com/a -> https://example.com/a/favicon.ico
return `${relativeUrl}/${faviconPath}`;
}
private urlWithoutSearchParams(url: string) {
const newUrl = new URL(url);
return newUrl.protocol + '//' + newUrl.host + newUrl.pathname;
}
private removeLastSectionUrl(url: string) {
const urlArr = url.split('/');
urlArr.pop();
return urlArr.join('/');
}
private findFaviconPath(text: string) {
const document = parse(text);
const favicon = Array.from(document.getElementsByTagName('link')).find(
(element) =>
element &&
this.relList.includes(element.getAttribute('rel')!) &&
element.getAttribute('href')
);
return favicon?.getAttribute('href') || undefined;
}
private async getFavicon(url: string): Promise<Favicon> {
if (!url) throw new Error('Missing URL');
const favicon = await this.downloadImageFromUrl(url);
if (!this.isImage(favicon.type) || favicon.size === 0) {
throw new Error('Favicon path does not return an image');
}
return favicon;
}
private async makeRequestWithUserAgent(url: string) {
const headers = new Headers();
headers.set('User-Agent', this.userAgent);
return await fetch(url, { headers });
}
private async downloadImageFromUrl(url: string): Promise<Favicon> {
const request = await this.makeRequestWithUserAgent(url);
if (!request.ok) {
throw new Error('Request failed');
}
const blob = await request.blob();
return {
buffer: Buffer.from(await blob.arrayBuffer()),
url: request.url,
url: response.url,
type: blob.type,
size: blob.size,
};
}
private isImage = (type: string) => type.includes('image');
private async fetchDocumentText(url: string): Promise<string> {
const response = await this.fetchWithUserAgent(url);
if (!response.ok) {
throw new FaviconNotFoundException(`Request to ${url} failed`);
}
private isBase64Image = (data: string) => data.startsWith('data:image/');
return await response.text();
}
private convertBase64ToBuffer = (base64: string) =>
Buffer.from(base64, 'base64');
private extractFaviconPath(html: string): string | undefined {
const document = parse(html);
const link = document
.getElementsByTagName('link')
.find((element) => this.relList.includes(element.getAttribute('rel')!));
return link?.getAttribute('href');
}
private async fetchFaviconFromPath(
baseUrl: string,
path: string
): Promise<Favicon> {
if (this.isBase64Image(path)) {
const buffer = this.convertBase64ToBuffer(path);
return {
buffer,
type: 'image/x-icon',
size: buffer.length,
url: path,
};
}
const faviconUrl = this.buildFaviconUrl(baseUrl, path);
return this.fetchFavicon(faviconUrl);
}
private buildFaviconUrl(base: string, path: string): string {
const { origin } = new URL(base);
if (path.startsWith('/')) {
return origin + path;
}
const basePath = this.urlWithoutSearchParams(base);
const baseUrl = basePath.endsWith('/') ? basePath.slice(0, -1) : basePath;
return `${baseUrl}/${path}`;
}
private urlWithoutSearchParams(url: string): string {
const { protocol, host, pathname } = new URL(url);
return `${protocol}//${host}${pathname}`;
}
private isImage(type: string): boolean {
return type.startsWith('image/');
}
private isBase64Image(data: string): boolean {
return data.startsWith('data:image/');
}
private convertBase64ToBuffer(base64: string): Buffer {
return Buffer.from(base64.split(',')[1], 'base64');
}
private async fetchWithUserAgent(url: string): Promise<Response> {
const headers = new Headers({ 'User-Agent': this.userAgent });
return fetch(url, { headers });
}
private sendImage(ctx: HttpContext, { buffer, type, size }: Favicon) {
ctx.response.header('Content-Type', type);
ctx.response.header('Content-Length', size);
ctx.response.header('Content-Length', size.toString());
ctx.response.send(buffer, true);
}
private sendDefaultImage(ctx: HttpContext) {
const readStream = createReadStream(
resolve(process.cwd(), './public/empty-image.png')
);
ctx.response.writeHead(206);
ctx.response.stream(readStream);
}
}

View File

@@ -0,0 +1,20 @@
import { Exception } from '@adonisjs/core/exceptions';
import { HttpContext } from '@adonisjs/core/http';
import logger from '@adonisjs/core/services/logger';
import { createReadStream } from 'node:fs';
import { resolve } from 'node:path';
export default class FaviconNotFoundException extends Exception {
static status = 404;
static code = 'E_FAVICON_NOT_FOUND';
async handle(error: this, ctx: HttpContext) {
const readStream = createReadStream(
resolve(process.cwd(), './public/empty-image.png')
);
ctx.response.header('Content-Type', 'image/png');
ctx.response.stream(readStream);
logger.debug(error.message);
}
}

10
app/lib/cache.ts Normal file
View File

@@ -0,0 +1,10 @@
import { BentoCache, bentostore } from 'bentocache';
import { memoryDriver } from 'bentocache/drivers/memory';
export const cache = new BentoCache({
default: 'cache',
stores: {
cache: bentostore().useL1Layer(memoryDriver({ maxSize: 10_000 })),
},
});