opendal/services/huggingface/
backend.rs1use std::fmt::Debug;
19use std::fmt::Formatter;
20use std::sync::Arc;
21
22use bytes::Buf;
23use http::Response;
24use http::StatusCode;
25use log::debug;
26
27use super::HUGGINGFACE_SCHEME;
28use super::core::HuggingfaceCore;
29use super::core::HuggingfaceStatus;
30use super::error::parse_error;
31use super::lister::HuggingfaceLister;
32use crate::raw::*;
33use crate::services::HuggingfaceConfig;
34use crate::*;
35
36#[doc = include_str!("docs.md")]
38#[derive(Default, Clone)]
39pub struct HuggingfaceBuilder {
40 pub(super) config: HuggingfaceConfig,
41}
42
43impl Debug for HuggingfaceBuilder {
44 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
45 let mut ds = f.debug_struct("Builder");
46
47 ds.field("config", &self.config);
48 ds.finish()
49 }
50}
51
52impl HuggingfaceBuilder {
53 pub fn repo_type(mut self, repo_type: &str) -> Self {
62 if !repo_type.is_empty() {
63 self.config.repo_type = Some(repo_type.to_string());
64 }
65 self
66 }
67
68 pub fn repo_id(mut self, repo_id: &str) -> Self {
78 if !repo_id.is_empty() {
79 self.config.repo_id = Some(repo_id.to_string());
80 }
81 self
82 }
83
84 pub fn revision(mut self, revision: &str) -> Self {
92 if !revision.is_empty() {
93 self.config.revision = Some(revision.to_string());
94 }
95 self
96 }
97
98 pub fn root(mut self, root: &str) -> Self {
102 self.config.root = if root.is_empty() {
103 None
104 } else {
105 Some(root.to_string())
106 };
107
108 self
109 }
110
111 pub fn token(mut self, token: &str) -> Self {
115 if !token.is_empty() {
116 self.config.token = Some(token.to_string());
117 }
118 self
119 }
120}
121
122impl Builder for HuggingfaceBuilder {
123 type Config = HuggingfaceConfig;
124
125 fn build(self) -> Result<impl Access> {
127 debug!("backend build started: {:?}", &self);
128
129 let repo_type = match self.config.repo_type.as_deref() {
130 Some("model") => Ok(RepoType::Model),
131 Some("dataset") => Ok(RepoType::Dataset),
132 Some("space") => Err(Error::new(
133 ErrorKind::ConfigInvalid,
134 "repo type \"space\" is unsupported",
135 )),
136 Some(repo_type) => Err(Error::new(
137 ErrorKind::ConfigInvalid,
138 format!("unknown repo_type: {repo_type}").as_str(),
139 )
140 .with_operation("Builder::build")
141 .with_context("service", Scheme::Huggingface)),
142 None => Ok(RepoType::Model),
143 }?;
144 debug!("backend use repo_type: {:?}", &repo_type);
145
146 let repo_id = match &self.config.repo_id {
147 Some(repo_id) => Ok(repo_id.clone()),
148 None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
149 .with_operation("Builder::build")
150 .with_context("service", Scheme::Huggingface)),
151 }?;
152 debug!("backend use repo_id: {}", &repo_id);
153
154 let revision = match &self.config.revision {
155 Some(revision) => revision.clone(),
156 None => "main".to_string(),
157 };
158 debug!("backend use revision: {}", &revision);
159
160 let root = normalize_root(&self.config.root.unwrap_or_default());
161 debug!("backend use root: {}", &root);
162
163 let token = self.config.token.as_ref().cloned();
164
165 Ok(HuggingfaceBackend {
166 core: Arc::new(HuggingfaceCore {
167 info: {
168 let am = AccessorInfo::default();
169 am.set_scheme(HUGGINGFACE_SCHEME)
170 .set_native_capability(Capability {
171 stat: true,
172
173 read: true,
174
175 list: true,
176 list_with_recursive: true,
177
178 shared: true,
179
180 ..Default::default()
181 });
182 am.into()
183 },
184 repo_type,
185 repo_id,
186 revision,
187 root,
188 token,
189 }),
190 })
191 }
192}
193
194#[derive(Debug, Clone)]
196pub struct HuggingfaceBackend {
197 core: Arc<HuggingfaceCore>,
198}
199
200impl Access for HuggingfaceBackend {
201 type Reader = HttpBody;
202 type Writer = ();
203 type Lister = oio::PageLister<HuggingfaceLister>;
204 type Deleter = ();
205
206 fn info(&self) -> Arc<AccessorInfo> {
207 self.core.info.clone()
208 }
209
210 async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
211 if path == "/" {
213 return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
214 }
215
216 let resp = self.core.hf_path_info(path).await?;
217
218 let status = resp.status();
219
220 match status {
221 StatusCode::OK => {
222 let mut meta = parse_into_metadata(path, resp.headers())?;
223 let bs = resp.into_body();
224
225 let decoded_response: Vec<HuggingfaceStatus> =
226 serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
227
228 if let Some(status) = decoded_response.first() {
230 if let Some(commit_info) = status.last_commit.as_ref() {
231 meta.set_last_modified(commit_info.date.parse::<Timestamp>()?);
232 }
233
234 meta.set_content_length(status.size);
235
236 match status.type_.as_str() {
237 "directory" => meta.set_mode(EntryMode::DIR),
238 "file" => meta.set_mode(EntryMode::FILE),
239 _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
240 };
241 } else {
242 return Err(Error::new(ErrorKind::NotFound, "path not found"));
243 }
244
245 Ok(RpStat::new(meta))
246 }
247 _ => Err(parse_error(resp)),
248 }
249 }
250
251 async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
252 let resp = self.core.hf_resolve(path, args.range(), &args).await?;
253
254 let status = resp.status();
255
256 match status {
257 StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
258 Ok((RpRead::default(), resp.into_body()))
259 }
260 _ => {
261 let (part, mut body) = resp.into_parts();
262 let buf = body.to_buffer().await?;
263 Err(parse_error(Response::from_parts(part, buf)))
264 }
265 }
266 }
267
268 async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
269 let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
270
271 Ok((RpList::default(), oio::PageLister::new(l)))
272 }
273}
274
275#[derive(Debug, Clone, Copy)]
278pub enum RepoType {
279 Model,
280 Dataset,
281}