opendal_core/services/huggingface/
backend.rs1use std::sync::Arc;
19
20use bytes::Buf;
21use http::Response;
22use http::StatusCode;
23use log::debug;
24
25use super::HUGGINGFACE_SCHEME;
26use super::config::HuggingfaceConfig;
27use super::core::HuggingfaceCore;
28use super::core::HuggingfaceStatus;
29use super::error::parse_error;
30use super::lister::HuggingfaceLister;
31use crate::raw::*;
32use crate::*;
33
34#[doc = include_str!("docs.md")]
36#[derive(Debug, Default)]
37pub struct HuggingfaceBuilder {
38 pub(super) config: HuggingfaceConfig,
39}
40
41impl HuggingfaceBuilder {
42 pub fn repo_type(mut self, repo_type: &str) -> Self {
52 if !repo_type.is_empty() {
53 self.config.repo_type = Some(repo_type.to_string());
54 }
55 self
56 }
57
58 pub fn repo_id(mut self, repo_id: &str) -> Self {
68 if !repo_id.is_empty() {
69 self.config.repo_id = Some(repo_id.to_string());
70 }
71 self
72 }
73
74 pub fn revision(mut self, revision: &str) -> Self {
82 if !revision.is_empty() {
83 self.config.revision = Some(revision.to_string());
84 }
85 self
86 }
87
88 pub fn root(mut self, root: &str) -> Self {
92 self.config.root = if root.is_empty() {
93 None
94 } else {
95 Some(root.to_string())
96 };
97
98 self
99 }
100
101 pub fn token(mut self, token: &str) -> Self {
105 if !token.is_empty() {
106 self.config.token = Some(token.to_string());
107 }
108 self
109 }
110
111 pub fn endpoint(mut self, endpoint: &str) -> Self {
116 if !endpoint.is_empty() {
117 self.config.endpoint = Some(endpoint.to_string());
118 }
119 self
120 }
121}
122
123impl Builder for HuggingfaceBuilder {
124 type Config = HuggingfaceConfig;
125
126 fn build(self) -> Result<impl Access> {
128 debug!("backend build started: {:?}", &self);
129
130 let repo_type = match self.config.repo_type.as_deref() {
131 Some("model") => Ok(RepoType::Model),
132 Some("dataset") | Some("datasets") => Ok(RepoType::Dataset),
133 Some("space") => Ok(RepoType::Space),
134 Some(repo_type) => Err(Error::new(
135 ErrorKind::ConfigInvalid,
136 format!("unknown repo_type: {repo_type}").as_str(),
137 )
138 .with_operation("Builder::build")
139 .with_context("service", HUGGINGFACE_SCHEME)),
140 None => Ok(RepoType::Model),
141 }?;
142 debug!("backend use repo_type: {:?}", &repo_type);
143
144 let repo_id = match &self.config.repo_id {
145 Some(repo_id) => Ok(repo_id.clone()),
146 None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
147 .with_operation("Builder::build")
148 .with_context("service", HUGGINGFACE_SCHEME)),
149 }?;
150 debug!("backend use repo_id: {}", &repo_id);
151
152 let revision = match &self.config.revision {
153 Some(revision) => revision.clone(),
154 None => "main".to_string(),
155 };
156 debug!("backend use revision: {}", &revision);
157
158 let root = normalize_root(&self.config.root.unwrap_or_default());
159 debug!("backend use root: {}", &root);
160
161 let token = self.config.token.as_ref().cloned();
162
163 let endpoint = match &self.config.endpoint {
164 Some(endpoint) => endpoint.clone(),
165 None => {
166 if let Ok(env_endpoint) = std::env::var("HF_ENDPOINT") {
169 env_endpoint
170 } else {
171 "https://huggingface.co".to_string()
172 }
173 }
174 };
175 debug!("backend use endpoint: {}", &endpoint);
176
177 Ok(HuggingfaceBackend {
178 core: Arc::new(HuggingfaceCore {
179 info: {
180 let am = AccessorInfo::default();
181 am.set_scheme(HUGGINGFACE_SCHEME)
182 .set_native_capability(Capability {
183 stat: true,
184 read: true,
185 list: true,
186 list_with_recursive: true,
187 shared: true,
188 ..Default::default()
189 });
190 am.into()
191 },
192 repo_type,
193 repo_id,
194 revision,
195 root,
196 token,
197 endpoint,
198 }),
199 })
200 }
201}
202
203#[derive(Debug, Clone)]
205pub struct HuggingfaceBackend {
206 core: Arc<HuggingfaceCore>,
207}
208
209impl Access for HuggingfaceBackend {
210 type Reader = HttpBody;
211 type Writer = ();
212 type Lister = oio::PageLister<HuggingfaceLister>;
213 type Deleter = ();
214
215 fn info(&self) -> Arc<AccessorInfo> {
216 self.core.info.clone()
217 }
218
219 async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
220 if path == "/" {
222 return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
223 }
224
225 let resp = self.core.hf_path_info(path).await?;
226
227 let status = resp.status();
228
229 match status {
230 StatusCode::OK => {
231 let mut meta = parse_into_metadata(path, resp.headers())?;
232 let bs = resp.into_body();
233
234 let decoded_response: Vec<HuggingfaceStatus> =
235 serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
236
237 if let Some(status) = decoded_response.first() {
239 if let Some(commit_info) = status.last_commit.as_ref() {
240 meta.set_last_modified(commit_info.date.parse::<Timestamp>()?);
241 }
242
243 meta.set_content_length(status.size);
244
245 let etag = if let Some(lfs) = &status.lfs {
247 &lfs.oid
248 } else {
249 &status.oid
250 };
251 meta.set_etag(etag);
252
253 match status.type_.as_str() {
254 "directory" => meta.set_mode(EntryMode::DIR),
255 "file" => meta.set_mode(EntryMode::FILE),
256 _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
257 };
258 } else {
259 return Err(Error::new(ErrorKind::NotFound, "path not found"));
260 }
261
262 Ok(RpStat::new(meta))
263 }
264 _ => Err(parse_error(resp)),
265 }
266 }
267
268 async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
269 let resp = self.core.hf_resolve(path, args.range(), &args).await?;
270
271 let status = resp.status();
272
273 match status {
274 StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
275 Ok((RpRead::default(), resp.into_body()))
276 }
277 _ => {
278 let (part, mut body) = resp.into_parts();
279 let buf = body.to_buffer().await?;
280 Err(parse_error(Response::from_parts(part, buf)))
281 }
282 }
283 }
284
285 async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
286 let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
287
288 Ok((RpList::default(), oio::PageLister::new(l)))
289 }
290}
291
292#[derive(Debug, Clone, Copy)]
295pub enum RepoType {
296 Model,
297 Dataset,
298 Space,
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304
305 #[test]
306 fn build_accepts_datasets_alias() {
307 HuggingfaceBuilder::default()
308 .repo_id("org/repo")
309 .repo_type("datasets")
310 .build()
311 .expect("builder should accept datasets alias");
312 }
313
314 #[test]
315 fn build_accepts_space_repo_type() {
316 HuggingfaceBuilder::default()
317 .repo_id("org/space")
318 .repo_type("space")
319 .build()
320 .expect("builder should accept space repo type");
321 }
322}