use std::fmt::Debug;
use std::fmt::Formatter;
use std::sync::Arc;
use bytes::Buf;
use http::Response;
use http::StatusCode;
use log::debug;
use super::core::HuggingfaceCore;
use super::core::HuggingfaceStatus;
use super::error::parse_error;
use super::lister::HuggingfaceLister;
use crate::raw::*;
use crate::services::HuggingfaceConfig;
use crate::*;
impl Configurator for HuggingfaceConfig {
type Builder = HuggingfaceBuilder;
fn into_builder(self) -> Self::Builder {
HuggingfaceBuilder { config: self }
}
}
#[doc = include_str!("docs.md")]
#[derive(Default, Clone)]
pub struct HuggingfaceBuilder {
config: HuggingfaceConfig,
}
impl Debug for HuggingfaceBuilder {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut ds = f.debug_struct("Builder");
ds.field("config", &self.config);
ds.finish()
}
}
impl HuggingfaceBuilder {
pub fn repo_type(mut self, repo_type: &str) -> Self {
if !repo_type.is_empty() {
self.config.repo_type = Some(repo_type.to_string());
}
self
}
pub fn repo_id(mut self, repo_id: &str) -> Self {
if !repo_id.is_empty() {
self.config.repo_id = Some(repo_id.to_string());
}
self
}
pub fn revision(mut self, revision: &str) -> Self {
if !revision.is_empty() {
self.config.revision = Some(revision.to_string());
}
self
}
pub fn root(mut self, root: &str) -> Self {
self.config.root = if root.is_empty() {
None
} else {
Some(root.to_string())
};
self
}
pub fn token(mut self, token: &str) -> Self {
if !token.is_empty() {
self.config.token = Some(token.to_string());
}
self
}
}
impl Builder for HuggingfaceBuilder {
const SCHEME: Scheme = Scheme::Huggingface;
type Config = HuggingfaceConfig;
fn build(self) -> Result<impl Access> {
debug!("backend build started: {:?}", &self);
let repo_type = match self.config.repo_type.as_deref() {
Some("model") => Ok(RepoType::Model),
Some("dataset") => Ok(RepoType::Dataset),
Some("space") => Err(Error::new(
ErrorKind::ConfigInvalid,
"repo type \"space\" is unsupported",
)),
Some(repo_type) => Err(Error::new(
ErrorKind::ConfigInvalid,
format!("unknown repo_type: {}", repo_type).as_str(),
)
.with_operation("Builder::build")
.with_context("service", Scheme::Huggingface)),
None => Ok(RepoType::Model),
}?;
debug!("backend use repo_type: {:?}", &repo_type);
let repo_id = match &self.config.repo_id {
Some(repo_id) => Ok(repo_id.clone()),
None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
.with_operation("Builder::build")
.with_context("service", Scheme::Huggingface)),
}?;
debug!("backend use repo_id: {}", &repo_id);
let revision = match &self.config.revision {
Some(revision) => revision.clone(),
None => "main".to_string(),
};
debug!("backend use revision: {}", &revision);
let root = normalize_root(&self.config.root.unwrap_or_default());
debug!("backend use root: {}", &root);
let token = self.config.token.as_ref().cloned();
let client = HttpClient::new()?;
Ok(HuggingfaceBackend {
core: Arc::new(HuggingfaceCore {
repo_type,
repo_id,
revision,
root,
token,
client,
}),
})
}
}
#[derive(Debug, Clone)]
pub struct HuggingfaceBackend {
core: Arc<HuggingfaceCore>,
}
impl Access for HuggingfaceBackend {
type Reader = HttpBody;
type Writer = ();
type Lister = oio::PageLister<HuggingfaceLister>;
type BlockingReader = ();
type BlockingWriter = ();
type BlockingLister = ();
fn info(&self) -> Arc<AccessorInfo> {
let mut am = AccessorInfo::default();
am.set_scheme(Scheme::Huggingface)
.set_native_capability(Capability {
stat: true,
read: true,
list: true,
list_with_recursive: true,
shared: true,
..Default::default()
});
am.into()
}
async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
if path == "/" {
return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
}
let resp = self.core.hf_path_info(path).await?;
let status = resp.status();
match status {
StatusCode::OK => {
let mut meta = parse_into_metadata(path, resp.headers())?;
let bs = resp.into_body();
let decoded_response: Vec<HuggingfaceStatus> =
serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
if let Some(status) = decoded_response.first() {
if let Some(commit_info) = status.last_commit.as_ref() {
meta.set_last_modified(parse_datetime_from_rfc3339(
commit_info.date.as_str(),
)?);
}
meta.set_content_length(status.size);
match status.type_.as_str() {
"directory" => meta.set_mode(EntryMode::DIR),
"file" => meta.set_mode(EntryMode::FILE),
_ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
};
} else {
return Err(Error::new(ErrorKind::NotFound, "path not found"));
}
Ok(RpStat::new(meta))
}
_ => Err(parse_error(resp)),
}
}
async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
let resp = self.core.hf_resolve(path, args.range(), &args).await?;
let status = resp.status();
match status {
StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
Ok((RpRead::default(), resp.into_body()))
}
_ => {
let (part, mut body) = resp.into_parts();
let buf = body.to_buffer().await?;
Err(parse_error(Response::from_parts(part, buf)))
}
}
}
async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
Ok((RpList::default(), oio::PageLister::new(l)))
}
}
#[derive(Debug, Clone, Copy)]
pub enum RepoType {
Model,
Dataset,
}