mirror of
https://github.com/BradNut/personal-website-sveltekit
synced 2025-09-08 23:20:18 +00:00
Adding retry to bandcamp scraping.
This commit is contained in:
parent
60f53e84ed
commit
8735da795a
2 changed files with 81 additions and 76 deletions
|
|
@ -1,51 +1,51 @@
|
||||||
import { json } from '@sveltejs/kit';
|
import { json } from '@sveltejs/kit';
|
||||||
|
import type { ArticlePageLoad } from '@/lib/types/article.js';
|
||||||
import { PAGE_SIZE } from '$env/static/private';
|
import { PAGE_SIZE } from '$env/static/private';
|
||||||
import { fetchArticlesApi } from '$lib/services/articlesApi';
|
import { fetchArticlesApi } from '$lib/services/articlesApi';
|
||||||
import type { ArticlePageLoad } from '@/lib/types/article.js';
|
|
||||||
|
|
||||||
export async function GET({ setHeaders, url }) {
|
export async function GET({ setHeaders, url }) {
|
||||||
const page = url?.searchParams?.get('page') || '1';
|
const page = url?.searchParams?.get('page') || '1';
|
||||||
let limit = url?.searchParams?.get('limit') ?? PAGE_SIZE;
|
let limit = url?.searchParams?.get('limit') ?? PAGE_SIZE;
|
||||||
if (Number(limit) > 30) {
|
if (Number(limit) > 30) {
|
||||||
limit = PAGE_SIZE;
|
limit = PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response: ArticlePageLoad = await fetchArticlesApi('get', 'fetchArticles', {
|
const response: ArticlePageLoad = await fetchArticlesApi('get', 'fetchArticles', {
|
||||||
page,
|
page,
|
||||||
limit
|
limit,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (response?.articles) {
|
if (response?.articles) {
|
||||||
if (response?.cacheControl) {
|
if (response?.cacheControl) {
|
||||||
if (!response.cacheControl.includes('no-cache')) {
|
if (!response.cacheControl.includes('no-cache')) {
|
||||||
setHeaders({
|
setHeaders({
|
||||||
'cache-control': response?.cacheControl
|
'cache-control': response?.cacheControl,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
setHeaders({
|
setHeaders({
|
||||||
'cache-control': 'max-age=43200'
|
'cache-control': 'max-age=43200',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return json(response);
|
return json(response);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(e);
|
console.error(e);
|
||||||
// Fall back to an empty, cacheable payload so pages can still render in E2E
|
// Fall back to an empty, cacheable payload so pages can still render in E2E
|
||||||
const fallback: ArticlePageLoad = {
|
const fallback: ArticlePageLoad = {
|
||||||
articles: [],
|
articles: [],
|
||||||
currentPage: Number(page) || 1,
|
currentPage: Number(page) || 1,
|
||||||
totalArticles: 0,
|
totalArticles: 0,
|
||||||
totalPages: 1,
|
totalPages: 1,
|
||||||
limit: Number(limit) || 10,
|
limit: Number(limit) || 10,
|
||||||
cacheControl: 'no-cache'
|
cacheControl: 'no-cache',
|
||||||
} as unknown as ArticlePageLoad;
|
} as unknown as ArticlePageLoad;
|
||||||
return json(fallback, {
|
return json(fallback, {
|
||||||
headers: {
|
headers: {
|
||||||
'cache-control': 'no-cache'
|
'cache-control': 'no-cache',
|
||||||
}
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,64 +1,69 @@
|
||||||
import { json, error } from '@sveltejs/kit';
|
import { json } from '@sveltejs/kit';
|
||||||
|
import scrapeIt, { type ScrapeResult } from 'scrape-it';
|
||||||
import { BANDCAMP_USERNAME, USE_REDIS_CACHE } from '$env/static/private';
|
import { BANDCAMP_USERNAME, USE_REDIS_CACHE } from '$env/static/private';
|
||||||
import { redis } from '$lib/server/redis';
|
import { redis } from '$lib/server/redis';
|
||||||
import type { Album, BandCampResults } from '$lib/types/album';
|
import type { Album, BandCampResults } from '$lib/types/album';
|
||||||
import scrapeIt, { type ScrapeResult } from 'scrape-it';
|
|
||||||
|
|
||||||
export async function GET({ setHeaders, url }) {
|
async function retryWithBackoff<T>(fn: () => Promise<T>, maxRetries = 3, baseDelay = 500): Promise<T> {
|
||||||
|
let lastError: Error | undefined;
|
||||||
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
|
try {
|
||||||
|
return await fn();
|
||||||
|
} catch (err) {
|
||||||
|
lastError = err as Error;
|
||||||
|
if (attempt === maxRetries) break;
|
||||||
|
const delay = baseDelay * 2 ** attempt; // 500ms, 1s, 2s
|
||||||
|
await new Promise((r) => setTimeout(r, delay));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function GET({ setHeaders }) {
|
||||||
try {
|
try {
|
||||||
if (USE_REDIS_CACHE === 'true') {
|
if (USE_REDIS_CACHE === 'true') {
|
||||||
const cached: string | null = await redis.get('bandcampAlbums');
|
const cached: string | null = await redis.get('bandcampAlbums');
|
||||||
|
|
||||||
if (cached) {
|
if (cached) {
|
||||||
const response: Album[] = JSON.parse(cached);
|
const response: Album[] = JSON.parse(cached);
|
||||||
const ttl = await redis.ttl("bandcampAlbums");
|
const ttl = await redis.ttl('bandcampAlbums');
|
||||||
if (ttl) {
|
if (ttl) {
|
||||||
setHeaders({
|
setHeaders({
|
||||||
"cache-control": `max-age=${ttl}`,
|
'cache-control': `max-age=${ttl}`,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
setHeaders({
|
setHeaders({
|
||||||
"cache-control": "max-age=43200",
|
'cache-control': 'max-age=43200',
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return json(response);
|
return json(response);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const { data }: ScrapeResult<BandCampResults> = await scrapeIt(`https://bandcamp.com/${BANDCAMP_USERNAME}`, {
|
// Scrape Bandcamp with realistic headers, plus retry/backoff
|
||||||
collectionItems: {
|
const { data }: ScrapeResult<BandCampResults> = await retryWithBackoff(async () =>
|
||||||
listItem: '.collection-item-container',
|
await scrapeIt(`https://bandcamp.com/${BANDCAMP_USERNAME}`, {
|
||||||
data: {
|
collectionItems: {
|
||||||
url: {
|
listItem: '.collection-item-container',
|
||||||
selector: '.collection-title-details > a.item-link',
|
data: {
|
||||||
attr: 'href',
|
url: { selector: '.collection-title-details > a.item-link', attr: 'href' },
|
||||||
},
|
artwork: { selector: 'div.collection-item-art-container a img', attr: 'src' },
|
||||||
artwork: {
|
title: { selector: 'span.item-link-alt > div.collection-item-title' },
|
||||||
selector: 'div.collection-item-art-container a img',
|
artist: { selector: 'span.item-link-alt > div.collection-item-artist' },
|
||||||
attr: 'src',
|
|
||||||
},
|
|
||||||
title: {
|
|
||||||
selector: 'span.item-link-alt > div.collection-item-title',
|
|
||||||
},
|
|
||||||
artist: {
|
|
||||||
selector: 'span.item-link-alt > div.collection-item-artist',
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
})
|
||||||
});
|
);
|
||||||
|
|
||||||
const albums: Album[] = data?.collectionItems || [];
|
const albums: Album[] = data?.collectionItems || [];
|
||||||
|
if (albums && albums.length > 0) {
|
||||||
if (albums && albums?.length > 0) {
|
|
||||||
if (USE_REDIS_CACHE === 'true') {
|
if (USE_REDIS_CACHE === 'true') {
|
||||||
redis.set('bandcampAlbums', JSON.stringify(albums), 'EX', 43200);
|
redis.set('bandcampAlbums', JSON.stringify(albums), 'EX', 43200);
|
||||||
}
|
}
|
||||||
setHeaders({
|
setHeaders({ 'cache-control': 'max-age=43200' });
|
||||||
"cache-control": "max-age=43200",
|
|
||||||
});
|
|
||||||
return json(albums);
|
return json(albums);
|
||||||
}
|
}
|
||||||
return json([]);
|
return json([]);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(error);
|
console.error(error);
|
||||||
return json([]);
|
return json([]);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue