Signed-off-by: Jia Chao <jiac13@chinaunicom.cn>
This commit is contained in:
Jia Chao 2024-07-17 11:17:17 +08:00
parent a238cc5fcc
commit 840855d93b
2 changed files with 15 additions and 3 deletions

4
cuweb-syncer Normal file
View File

@ -0,0 +1,4 @@
[inner]
[inner.csaf]
from="http://mirrors.ustc.edu.cn/openeuler/security/data/csaf/"
dest="test/csaf"

View File

@ -65,9 +65,15 @@ impl Server {
// 读取配置文件并开始执行访问、下载 // 读取配置文件并开始执行访问、下载
async fn websync(&self, config: &Config) -> crate::Result<()> { async fn websync(&self, config: &Config) -> crate::Result<()> {
debug!("Websync start to process: {:#?}", config);
let websyncers = &config.inner; let websyncers = &config.inner;
if websyncers.len() == 0 {
error!("Config file is empty! Quiting...");
return Ok(())
}
for (task, conf) in websyncers { for (task, conf) in websyncers {
info!("Start to run {task} sync task..."); info!("Start to run `{task}` task...\n{:#?}", conf);
Server::download_directory( Server::download_directory(
self.task_sender.clone(), self.task_sender.clone(),
&conf.from(), &conf.from(),
@ -120,7 +126,7 @@ impl Server {
path: &str, path: &str,
) -> crate::Result<()> { ) -> crate::Result<()> {
let client = Client::new(); let client = Client::new();
let response = client.get(url).send().await?.text().await?; let response = client.get(url).header("User-Agent", "reqwest").send().await?.text().await?;
let document = Html::parse_document(&response); let document = Html::parse_document(&response);
let selector = Selector::parse("a").unwrap(); let selector = Selector::parse("a").unwrap();
@ -128,9 +134,11 @@ impl Server {
debug!("Create local directory: {path}"); debug!("Create local directory: {path}");
for element in document.select(&selector) { for element in document.select(&selector) {
if let Some(href) = element.value().attr("href") { if let Some(href) = element.value().attr("href") {
if href.starts_with("../") { // BugFix: 处理 '../' 和 '/' 的情况
if href.starts_with("../") || href.starts_with("/") {
continue; continue;
} }
info!("Found href: {href}");
// 处理目录的情况 // 处理目录的情况
if href.ends_with('/') { if href.ends_with('/') {
let new_url = format!("{}/{}", url.trim_end_matches('/'), href); let new_url = format!("{}/{}", url.trim_end_matches('/'), href);