chore: initial commit - CloudSearch v0.0.2
This commit is contained in:
49
packages/backend/src/validation/bounded-pool.ts
Executable file
49
packages/backend/src/validation/bounded-pool.ts
Executable file
@@ -0,0 +1,49 @@
|
||||
export class BoundedPool {
|
||||
private concurrency: number;
|
||||
private running: number;
|
||||
private queue: Array<() => Promise<void>>;
|
||||
|
||||
constructor(concurrency: number = 10) {
|
||||
this.concurrency = concurrency;
|
||||
this.running = 0;
|
||||
this.queue = [];
|
||||
}
|
||||
|
||||
async run<T>(fn: () => Promise<T>): Promise<T> {
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const task = async () => {
|
||||
this.running++;
|
||||
try {
|
||||
const result = await fn();
|
||||
resolve(result);
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
} finally {
|
||||
this.running--;
|
||||
this.processQueue();
|
||||
}
|
||||
};
|
||||
|
||||
if (this.running < this.concurrency) {
|
||||
task();
|
||||
} else {
|
||||
this.queue.push(task);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private processQueue(): void {
|
||||
while (this.running < this.concurrency && this.queue.length > 0) {
|
||||
const task = this.queue.shift();
|
||||
if (task) task();
|
||||
}
|
||||
}
|
||||
|
||||
get pending(): number {
|
||||
return this.queue.length;
|
||||
}
|
||||
|
||||
get active(): number {
|
||||
return this.running;
|
||||
}
|
||||
}
|
||||
375
packages/backend/src/validation/link-validator.service.ts
Executable file
375
packages/backend/src/validation/link-validator.service.ts
Executable file
@@ -0,0 +1,375 @@
|
||||
// Native fetch available in Node 20+
|
||||
import config from '../config';
|
||||
import { RedisClient } from '../middleware/cache';
|
||||
import { BoundedPool } from './bounded-pool';
|
||||
import { BaiduDriver } from '../cloud/drivers/baidu.driver';
|
||||
import { AliyunDriver } from '../cloud/drivers/aliyun.driver';
|
||||
import { getSystemConfig } from '../admin/system-config.service';
|
||||
|
||||
export type LinkStatus = 'valid' | 'invalid' | 'unknown';
|
||||
|
||||
export interface ValidationResult {
|
||||
url: string;
|
||||
status: LinkStatus;
|
||||
cloudType: string;
|
||||
checkedAt: string;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从系统配置加载自定义关键词列表(一行一条)
|
||||
*/
|
||||
function loadCustomKeywords(configKey: string): string[] {
|
||||
try {
|
||||
const rules = getSystemConfig(configKey);
|
||||
if (rules) {
|
||||
return rules.split('\n').map(k => k.trim()).filter(k => k.length > 0);
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
export class LinkValidator {
|
||||
private cache: RedisClient;
|
||||
private pool: BoundedPool;
|
||||
|
||||
constructor(concurrency?: number) {
|
||||
this.cache = new RedisClient();
|
||||
this.pool = new BoundedPool(concurrency || config.validation.concurrency);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a single share link — PanSou only, no local fallback.
|
||||
*/
|
||||
async validate(url: string, cloudType: string): Promise<ValidationResult> {
|
||||
// Check cache first
|
||||
const cacheKey = `link:valid:${cloudType}:${Buffer.from(url).toString('base64').slice(0, 64)}`;
|
||||
|
||||
try {
|
||||
const cached = await this.cache.get(cacheKey);
|
||||
if (cached) {
|
||||
const parsed = JSON.parse(cached);
|
||||
return parsed as ValidationResult;
|
||||
}
|
||||
} catch {
|
||||
// ignore cache errors
|
||||
}
|
||||
|
||||
// Try PanSou's /api/check/links
|
||||
const pansouResult = await this.validateViaPansou(url, cloudType);
|
||||
if (pansouResult) {
|
||||
if (pansouResult.status === 'valid' || pansouResult.status === 'invalid') {
|
||||
// Cache definitive result
|
||||
const ttl = pansouResult.status === 'valid' ? config.validation.cacheTtlValid : config.validation.cacheTtlInvalid;
|
||||
try { await this.cache.setEx(cacheKey, ttl, JSON.stringify(pansouResult)); } catch {}
|
||||
return pansouResult;
|
||||
}
|
||||
// PanSou returned locked/unsupported/uncertain → return unknown, no local fallback
|
||||
return pansouResult;
|
||||
}
|
||||
|
||||
// PanSou unreachable → return unknown
|
||||
return { url, status: 'unknown' as LinkStatus, cloudType, checkedAt: new Date().toISOString(), message: '盘搜不可达' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Full validation with local fallback when PanSou can't determine.
|
||||
*/
|
||||
async validateWithLocalFallback(url: string, cloudType: string): Promise<ValidationResult> {
|
||||
// Check cache first
|
||||
const cacheKey = `link:valid:${cloudType}:${Buffer.from(url).toString('base64').slice(0, 64)}`;
|
||||
|
||||
try {
|
||||
const cached = await this.cache.get(cacheKey);
|
||||
if (cached) {
|
||||
const parsed = JSON.parse(cached);
|
||||
return parsed as ValidationResult;
|
||||
}
|
||||
} catch {
|
||||
// ignore cache errors
|
||||
}
|
||||
|
||||
// Try PanSou
|
||||
const pansouResult = await this.validateViaPansou(url, cloudType);
|
||||
if (pansouResult) {
|
||||
if (pansouResult.status === 'valid' || pansouResult.status === 'invalid') {
|
||||
const ttl = pansouResult.status === 'valid' ? config.validation.cacheTtlValid : config.validation.cacheTtlInvalid;
|
||||
try { await this.cache.setEx(cacheKey, ttl, JSON.stringify(pansouResult)); } catch {}
|
||||
return pansouResult;
|
||||
}
|
||||
// PanSou uncertain → fall through to local validation
|
||||
}
|
||||
|
||||
// Fall back to own validation
|
||||
let result: ValidationResult;
|
||||
|
||||
switch (cloudType) {
|
||||
case 'quark':
|
||||
result = await this.validateQuark(url);
|
||||
break;
|
||||
case 'baidu':
|
||||
result = await this.validateBaidu(url);
|
||||
break;
|
||||
case 'aliyun':
|
||||
result = await this.validateAliyun(url);
|
||||
break;
|
||||
default:
|
||||
result = await this.validateByHtml(url, cloudType);
|
||||
}
|
||||
|
||||
const ttl = result.status === 'valid' ? config.validation.cacheTtlValid : config.validation.cacheTtlInvalid;
|
||||
try { await this.cache.setEx(cacheKey, ttl, JSON.stringify(result)); } catch {}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try PanSou's /api/check/links for validation.
|
||||
* Returns null if PanSou is unreachable.
|
||||
*
|
||||
* Judgment order:
|
||||
* 1. summary "链接有效" → valid (PanSou's own OK signal)
|
||||
* 2. summary 含自定义确认关键词 → valid (from DB link_valid_keywords)
|
||||
* 3. summary 含自定义失效关键词 → invalid (from DB link_invalid_keywords)
|
||||
* 4. 其他 → unknown
|
||||
*/
|
||||
private async validateViaPansou(url: string, cloudType: string): Promise<ValidationResult | null> {
|
||||
const checkedAt = new Date().toISOString();
|
||||
try {
|
||||
const pansouApiUrl = `${config.pansouUrl}/api/check/links`;
|
||||
const response = await fetch(pansouApiUrl, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
items: [{ disk_type: cloudType, url }],
|
||||
}),
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (!response.ok) return null;
|
||||
|
||||
const data = await response.json() as any;
|
||||
const pansouResult = data.results?.[0];
|
||||
if (!pansouResult) return null;
|
||||
|
||||
const summary = pansouResult.summary || '';
|
||||
|
||||
// 1. PanSou 明确返回"链接有效"
|
||||
if (summary.includes('链接有效')) {
|
||||
return { url, status: 'valid', cloudType, checkedAt, message: summary };
|
||||
}
|
||||
|
||||
// 2. 自定义确认关键词(用户配置的"有效"信号)
|
||||
const validKeywords = loadCustomKeywords('link_valid_keywords');
|
||||
if (validKeywords.some(kw => summary.includes(kw))) {
|
||||
return { url, status: 'valid', cloudType, checkedAt, message: summary };
|
||||
}
|
||||
|
||||
// 3. 自定义失效关键词(用户配置的"失效"信号)
|
||||
const invalidKeywords = loadCustomKeywords('link_invalid_keywords');
|
||||
if (invalidKeywords.some(kw => summary.includes(kw))) {
|
||||
return { url, status: 'invalid', cloudType, checkedAt, message: summary };
|
||||
}
|
||||
|
||||
// 4. 其余全部返回 unknown
|
||||
return { url, status: 'unknown', cloudType, checkedAt, message: summary || '盘搜无法确认' };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a Quark share link using the public share token API.
|
||||
*/
|
||||
private async validateQuark(url: string): Promise<ValidationResult> {
|
||||
const checkedAt = new Date().toISOString();
|
||||
|
||||
try {
|
||||
const cleanUrl = url.split('#')[0];
|
||||
const urlObj = new URL(cleanUrl);
|
||||
const pathParts = urlObj.pathname.split('/');
|
||||
const shareToken = pathParts[pathParts.length - 1] || pathParts[pathParts.length - 2];
|
||||
|
||||
if (!shareToken) {
|
||||
return { url, status: 'unknown', cloudType: 'quark', checkedAt, message: '无法解析分享链接 token' };
|
||||
}
|
||||
|
||||
const tokenUrl = 'https://drive-pc.quark.cn/1/clouddrive/share/sharepage/token?pr=ucpro&fr=pc';
|
||||
const response = await fetch(tokenUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
'Origin': 'https://pan.quark.cn',
|
||||
'Referer': 'https://pan.quark.cn/',
|
||||
},
|
||||
body: JSON.stringify({ pwd_id: shareToken, passcode: '' }),
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const msg = response.status === 403 ? '分享已过期或需要密码' : `HTTP ${response.status}`;
|
||||
return { url, status: 'invalid', cloudType: 'quark', checkedAt, message: msg };
|
||||
}
|
||||
|
||||
const data = await response.json() as any;
|
||||
if (data.status === 200 && data.data?.stoken) {
|
||||
const title = data.data?.title || '';
|
||||
const author = data.data?.author?.nick_name || '';
|
||||
const expiredAt = data.data?.expired_at || 0;
|
||||
const expireDate = expiredAt > 0 ? new Date(expiredAt).toISOString().slice(0, 10) : '';
|
||||
return {
|
||||
url,
|
||||
status: 'valid',
|
||||
cloudType: 'quark',
|
||||
checkedAt,
|
||||
message: expireDate ? `有效链接,过期时间: ${expireDate}` : '有效链接',
|
||||
};
|
||||
}
|
||||
|
||||
// API 返回了 200 但无 stoken — 可能是临时异常,保守判 unknown
|
||||
return { url, status: 'unknown', cloudType: 'quark', checkedAt, message: 'API 返回异常(无 stoken),不做失效判定' };
|
||||
} catch (err: any) {
|
||||
return {
|
||||
url,
|
||||
status: 'unknown',
|
||||
cloudType: 'quark',
|
||||
checkedAt,
|
||||
message: `校验异常: ${err.message?.slice(0, 50) || '未知错误'}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async validateBaidu(url: string): Promise<ValidationResult> {
|
||||
const checkedAt = new Date().toISOString();
|
||||
|
||||
try {
|
||||
const driver = new BaiduDriver();
|
||||
const result = await driver.validateShareLink(url);
|
||||
|
||||
return {
|
||||
url,
|
||||
status: result.valid ? 'valid' : 'invalid',
|
||||
cloudType: 'baidu',
|
||||
checkedAt,
|
||||
message: result.message,
|
||||
};
|
||||
} catch (err: any) {
|
||||
return {
|
||||
url,
|
||||
status: 'unknown',
|
||||
cloudType: 'baidu',
|
||||
checkedAt,
|
||||
message: `校验失败: ${err.message || err}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async validateAliyun(url: string): Promise<ValidationResult> {
|
||||
const checkedAt = new Date().toISOString();
|
||||
|
||||
try {
|
||||
const driver = new AliyunDriver();
|
||||
const result = await driver.validateShareLink(url);
|
||||
|
||||
return {
|
||||
url,
|
||||
status: result.valid ? 'valid' : 'invalid',
|
||||
cloudType: 'aliyun',
|
||||
checkedAt,
|
||||
message: result.message,
|
||||
};
|
||||
} catch (err: any) {
|
||||
return {
|
||||
url,
|
||||
status: 'unknown',
|
||||
cloudType: 'aliyun',
|
||||
checkedAt,
|
||||
message: `校验失败: ${err.message || err}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback: validate by fetching the share page as HTML and checking for
|
||||
* custom failure keywords from DB config. Used for providers without a
|
||||
* dedicated API (115, tianyi, 123pan, etc.).
|
||||
*/
|
||||
private async validateByHtml(url: string, cloudType: string): Promise<ValidationResult> {
|
||||
let status: LinkStatus = 'valid';
|
||||
const checkedAt = new Date().toISOString();
|
||||
let message = '';
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), config.validation.timeout);
|
||||
|
||||
const response = await fetch(url, {
|
||||
signal: controller.signal as any,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
},
|
||||
redirect: 'follow',
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
const text = await response.text();
|
||||
const keywords = loadCustomKeywords('link_invalid_keywords');
|
||||
|
||||
const isHttpError = response.status >= 400;
|
||||
if (isHttpError) {
|
||||
status = 'invalid';
|
||||
message = `HTTP ${response.status} ${response.statusText}`;
|
||||
} else {
|
||||
const matched = keywords.find(kw => text.includes(kw));
|
||||
if (matched) {
|
||||
status = 'invalid';
|
||||
message = `页面包含自定义失效关键词: "${matched}"`;
|
||||
} else {
|
||||
message = 'HTML 页面可访问,未检测到失效关键词';
|
||||
}
|
||||
}
|
||||
} catch (err: any) {
|
||||
// On timeout or network error, conservatively mark as valid
|
||||
status = 'valid';
|
||||
message = `网络校验超时,保守标记为有效`;
|
||||
}
|
||||
|
||||
return { url, status, cloudType, checkedAt, message };
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch validate multiple links with bounded concurrency.
|
||||
*/
|
||||
async validateBatch(urls: Array<{ url: string; cloudType: string }>): Promise<ValidationResult[]> {
|
||||
const tasks = urls.map(item => () => this.validate(item.url, item.cloudType));
|
||||
const results: ValidationResult[] = [];
|
||||
|
||||
for (const task of tasks) {
|
||||
try {
|
||||
const result = await this.pool.run(task);
|
||||
results.push(result);
|
||||
} catch (err) {
|
||||
results.push({
|
||||
url: '',
|
||||
status: 'unknown',
|
||||
cloudType: '',
|
||||
checkedAt: new Date().toISOString(),
|
||||
message: '校验执行异常',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
async validateBatchWithPool(urls: Array<{ url: string; cloudType: string }>): Promise<ValidationResult[]> {
|
||||
return this.validateBatch(urls);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user