chore: initial commit - CloudSearch v0.0.2

This commit is contained in:
2026-05-15 05:50:50 +08:00
commit d83225d736
102 changed files with 37926 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
export class BoundedPool {
private concurrency: number;
private running: number;
private queue: Array<() => Promise<void>>;
constructor(concurrency: number = 10) {
this.concurrency = concurrency;
this.running = 0;
this.queue = [];
}
async run<T>(fn: () => Promise<T>): Promise<T> {
return new Promise<T>((resolve, reject) => {
const task = async () => {
this.running++;
try {
const result = await fn();
resolve(result);
} catch (err) {
reject(err);
} finally {
this.running--;
this.processQueue();
}
};
if (this.running < this.concurrency) {
task();
} else {
this.queue.push(task);
}
});
}
private processQueue(): void {
while (this.running < this.concurrency && this.queue.length > 0) {
const task = this.queue.shift();
if (task) task();
}
}
get pending(): number {
return this.queue.length;
}
get active(): number {
return this.running;
}
}

View File

@@ -0,0 +1,375 @@
// Native fetch available in Node 20+
import config from '../config';
import { RedisClient } from '../middleware/cache';
import { BoundedPool } from './bounded-pool';
import { BaiduDriver } from '../cloud/drivers/baidu.driver';
import { AliyunDriver } from '../cloud/drivers/aliyun.driver';
import { getSystemConfig } from '../admin/system-config.service';
export type LinkStatus = 'valid' | 'invalid' | 'unknown';
export interface ValidationResult {
url: string;
status: LinkStatus;
cloudType: string;
checkedAt: string;
message?: string;
}
/**
* 从系统配置加载自定义关键词列表(一行一条)
*/
function loadCustomKeywords(configKey: string): string[] {
try {
const rules = getSystemConfig(configKey);
if (rules) {
return rules.split('\n').map(k => k.trim()).filter(k => k.length > 0);
}
} catch {
// ignore
}
return [];
}
export class LinkValidator {
private cache: RedisClient;
private pool: BoundedPool;
constructor(concurrency?: number) {
this.cache = new RedisClient();
this.pool = new BoundedPool(concurrency || config.validation.concurrency);
}
/**
* Validate a single share link — PanSou only, no local fallback.
*/
async validate(url: string, cloudType: string): Promise<ValidationResult> {
// Check cache first
const cacheKey = `link:valid:${cloudType}:${Buffer.from(url).toString('base64').slice(0, 64)}`;
try {
const cached = await this.cache.get(cacheKey);
if (cached) {
const parsed = JSON.parse(cached);
return parsed as ValidationResult;
}
} catch {
// ignore cache errors
}
// Try PanSou's /api/check/links
const pansouResult = await this.validateViaPansou(url, cloudType);
if (pansouResult) {
if (pansouResult.status === 'valid' || pansouResult.status === 'invalid') {
// Cache definitive result
const ttl = pansouResult.status === 'valid' ? config.validation.cacheTtlValid : config.validation.cacheTtlInvalid;
try { await this.cache.setEx(cacheKey, ttl, JSON.stringify(pansouResult)); } catch {}
return pansouResult;
}
// PanSou returned locked/unsupported/uncertain → return unknown, no local fallback
return pansouResult;
}
// PanSou unreachable → return unknown
return { url, status: 'unknown' as LinkStatus, cloudType, checkedAt: new Date().toISOString(), message: '盘搜不可达' };
}
/**
* Full validation with local fallback when PanSou can't determine.
*/
async validateWithLocalFallback(url: string, cloudType: string): Promise<ValidationResult> {
// Check cache first
const cacheKey = `link:valid:${cloudType}:${Buffer.from(url).toString('base64').slice(0, 64)}`;
try {
const cached = await this.cache.get(cacheKey);
if (cached) {
const parsed = JSON.parse(cached);
return parsed as ValidationResult;
}
} catch {
// ignore cache errors
}
// Try PanSou
const pansouResult = await this.validateViaPansou(url, cloudType);
if (pansouResult) {
if (pansouResult.status === 'valid' || pansouResult.status === 'invalid') {
const ttl = pansouResult.status === 'valid' ? config.validation.cacheTtlValid : config.validation.cacheTtlInvalid;
try { await this.cache.setEx(cacheKey, ttl, JSON.stringify(pansouResult)); } catch {}
return pansouResult;
}
// PanSou uncertain → fall through to local validation
}
// Fall back to own validation
let result: ValidationResult;
switch (cloudType) {
case 'quark':
result = await this.validateQuark(url);
break;
case 'baidu':
result = await this.validateBaidu(url);
break;
case 'aliyun':
result = await this.validateAliyun(url);
break;
default:
result = await this.validateByHtml(url, cloudType);
}
const ttl = result.status === 'valid' ? config.validation.cacheTtlValid : config.validation.cacheTtlInvalid;
try { await this.cache.setEx(cacheKey, ttl, JSON.stringify(result)); } catch {}
return result;
}
/**
* Try PanSou's /api/check/links for validation.
* Returns null if PanSou is unreachable.
*
* Judgment order:
* 1. summary "链接有效" → valid (PanSou's own OK signal)
* 2. summary 含自定义确认关键词 → valid (from DB link_valid_keywords)
* 3. summary 含自定义失效关键词 → invalid (from DB link_invalid_keywords)
* 4. 其他 → unknown
*/
private async validateViaPansou(url: string, cloudType: string): Promise<ValidationResult | null> {
const checkedAt = new Date().toISOString();
try {
const pansouApiUrl = `${config.pansouUrl}/api/check/links`;
const response = await fetch(pansouApiUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
items: [{ disk_type: cloudType, url }],
}),
signal: AbortSignal.timeout(10000),
});
if (!response.ok) return null;
const data = await response.json() as any;
const pansouResult = data.results?.[0];
if (!pansouResult) return null;
const summary = pansouResult.summary || '';
// 1. PanSou 明确返回"链接有效"
if (summary.includes('链接有效')) {
return { url, status: 'valid', cloudType, checkedAt, message: summary };
}
// 2. 自定义确认关键词(用户配置的"有效"信号)
const validKeywords = loadCustomKeywords('link_valid_keywords');
if (validKeywords.some(kw => summary.includes(kw))) {
return { url, status: 'valid', cloudType, checkedAt, message: summary };
}
// 3. 自定义失效关键词(用户配置的"失效"信号)
const invalidKeywords = loadCustomKeywords('link_invalid_keywords');
if (invalidKeywords.some(kw => summary.includes(kw))) {
return { url, status: 'invalid', cloudType, checkedAt, message: summary };
}
// 4. 其余全部返回 unknown
return { url, status: 'unknown', cloudType, checkedAt, message: summary || '盘搜无法确认' };
} catch {
return null;
}
}
/**
* Validate a Quark share link using the public share token API.
*/
private async validateQuark(url: string): Promise<ValidationResult> {
const checkedAt = new Date().toISOString();
try {
const cleanUrl = url.split('#')[0];
const urlObj = new URL(cleanUrl);
const pathParts = urlObj.pathname.split('/');
const shareToken = pathParts[pathParts.length - 1] || pathParts[pathParts.length - 2];
if (!shareToken) {
return { url, status: 'unknown', cloudType: 'quark', checkedAt, message: '无法解析分享链接 token' };
}
const tokenUrl = 'https://drive-pc.quark.cn/1/clouddrive/share/sharepage/token?pr=ucpro&fr=pc';
const response = await fetch(tokenUrl, {
method: 'POST',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Content-Type': 'application/json',
'Accept': 'application/json',
'Origin': 'https://pan.quark.cn',
'Referer': 'https://pan.quark.cn/',
},
body: JSON.stringify({ pwd_id: shareToken, passcode: '' }),
signal: AbortSignal.timeout(15000),
});
if (!response.ok) {
const msg = response.status === 403 ? '分享已过期或需要密码' : `HTTP ${response.status}`;
return { url, status: 'invalid', cloudType: 'quark', checkedAt, message: msg };
}
const data = await response.json() as any;
if (data.status === 200 && data.data?.stoken) {
const title = data.data?.title || '';
const author = data.data?.author?.nick_name || '';
const expiredAt = data.data?.expired_at || 0;
const expireDate = expiredAt > 0 ? new Date(expiredAt).toISOString().slice(0, 10) : '';
return {
url,
status: 'valid',
cloudType: 'quark',
checkedAt,
message: expireDate ? `有效链接,过期时间: ${expireDate}` : '有效链接',
};
}
// API 返回了 200 但无 stoken — 可能是临时异常,保守判 unknown
return { url, status: 'unknown', cloudType: 'quark', checkedAt, message: 'API 返回异常(无 stoken不做失效判定' };
} catch (err: any) {
return {
url,
status: 'unknown',
cloudType: 'quark',
checkedAt,
message: `校验异常: ${err.message?.slice(0, 50) || '未知错误'}`,
};
}
}
private async validateBaidu(url: string): Promise<ValidationResult> {
const checkedAt = new Date().toISOString();
try {
const driver = new BaiduDriver();
const result = await driver.validateShareLink(url);
return {
url,
status: result.valid ? 'valid' : 'invalid',
cloudType: 'baidu',
checkedAt,
message: result.message,
};
} catch (err: any) {
return {
url,
status: 'unknown',
cloudType: 'baidu',
checkedAt,
message: `校验失败: ${err.message || err}`,
};
}
}
private async validateAliyun(url: string): Promise<ValidationResult> {
const checkedAt = new Date().toISOString();
try {
const driver = new AliyunDriver();
const result = await driver.validateShareLink(url);
return {
url,
status: result.valid ? 'valid' : 'invalid',
cloudType: 'aliyun',
checkedAt,
message: result.message,
};
} catch (err: any) {
return {
url,
status: 'unknown',
cloudType: 'aliyun',
checkedAt,
message: `校验失败: ${err.message || err}`,
};
}
}
/**
* Fallback: validate by fetching the share page as HTML and checking for
* custom failure keywords from DB config. Used for providers without a
* dedicated API (115, tianyi, 123pan, etc.).
*/
private async validateByHtml(url: string, cloudType: string): Promise<ValidationResult> {
let status: LinkStatus = 'valid';
const checkedAt = new Date().toISOString();
let message = '';
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), config.validation.timeout);
const response = await fetch(url, {
signal: controller.signal as any,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
},
redirect: 'follow',
});
clearTimeout(timeoutId);
const text = await response.text();
const keywords = loadCustomKeywords('link_invalid_keywords');
const isHttpError = response.status >= 400;
if (isHttpError) {
status = 'invalid';
message = `HTTP ${response.status} ${response.statusText}`;
} else {
const matched = keywords.find(kw => text.includes(kw));
if (matched) {
status = 'invalid';
message = `页面包含自定义失效关键词: "${matched}"`;
} else {
message = 'HTML 页面可访问,未检测到失效关键词';
}
}
} catch (err: any) {
// On timeout or network error, conservatively mark as valid
status = 'valid';
message = `网络校验超时,保守标记为有效`;
}
return { url, status, cloudType, checkedAt, message };
}
/**
* Batch validate multiple links with bounded concurrency.
*/
async validateBatch(urls: Array<{ url: string; cloudType: string }>): Promise<ValidationResult[]> {
const tasks = urls.map(item => () => this.validate(item.url, item.cloudType));
const results: ValidationResult[] = [];
for (const task of tasks) {
try {
const result = await this.pool.run(task);
results.push(result);
} catch (err) {
results.push({
url: '',
status: 'unknown',
cloudType: '',
checkedAt: new Date().toISOString(),
message: '校验执行异常',
});
}
}
return results;
}
async validateBatchWithPool(urls: Array<{ url: string; cloudType: string }>): Promise<ValidationResult[]> {
return this.validateBatch(urls);
}
}