opendal/services/huggingface/
backend.rs1use std::fmt::Debug;
19use std::fmt::Formatter;
20use std::sync::Arc;
21
22use bytes::Buf;
23use http::Response;
24use http::StatusCode;
25use log::debug;
26
27use super::core::HuggingfaceCore;
28use super::core::HuggingfaceStatus;
29use super::error::parse_error;
30use super::lister::HuggingfaceLister;
31use crate::raw::*;
32use crate::services::HuggingfaceConfig;
33use crate::*;
34
35impl Configurator for HuggingfaceConfig {
36 type Builder = HuggingfaceBuilder;
37 fn into_builder(self) -> Self::Builder {
38 HuggingfaceBuilder { config: self }
39 }
40}
41
42#[doc = include_str!("docs.md")]
44#[derive(Default, Clone)]
45pub struct HuggingfaceBuilder {
46 config: HuggingfaceConfig,
47}
48
49impl Debug for HuggingfaceBuilder {
50 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51 let mut ds = f.debug_struct("Builder");
52
53 ds.field("config", &self.config);
54 ds.finish()
55 }
56}
57
58impl HuggingfaceBuilder {
59 pub fn repo_type(mut self, repo_type: &str) -> Self {
68 if !repo_type.is_empty() {
69 self.config.repo_type = Some(repo_type.to_string());
70 }
71 self
72 }
73
74 pub fn repo_id(mut self, repo_id: &str) -> Self {
84 if !repo_id.is_empty() {
85 self.config.repo_id = Some(repo_id.to_string());
86 }
87 self
88 }
89
90 pub fn revision(mut self, revision: &str) -> Self {
98 if !revision.is_empty() {
99 self.config.revision = Some(revision.to_string());
100 }
101 self
102 }
103
104 pub fn root(mut self, root: &str) -> Self {
108 self.config.root = if root.is_empty() {
109 None
110 } else {
111 Some(root.to_string())
112 };
113
114 self
115 }
116
117 pub fn token(mut self, token: &str) -> Self {
121 if !token.is_empty() {
122 self.config.token = Some(token.to_string());
123 }
124 self
125 }
126}
127
128impl Builder for HuggingfaceBuilder {
129 const SCHEME: Scheme = Scheme::Huggingface;
130 type Config = HuggingfaceConfig;
131
132 fn build(self) -> Result<impl Access> {
134 debug!("backend build started: {:?}", &self);
135
136 let repo_type = match self.config.repo_type.as_deref() {
137 Some("model") => Ok(RepoType::Model),
138 Some("dataset") => Ok(RepoType::Dataset),
139 Some("space") => Err(Error::new(
140 ErrorKind::ConfigInvalid,
141 "repo type \"space\" is unsupported",
142 )),
143 Some(repo_type) => Err(Error::new(
144 ErrorKind::ConfigInvalid,
145 format!("unknown repo_type: {}", repo_type).as_str(),
146 )
147 .with_operation("Builder::build")
148 .with_context("service", Scheme::Huggingface)),
149 None => Ok(RepoType::Model),
150 }?;
151 debug!("backend use repo_type: {:?}", &repo_type);
152
153 let repo_id = match &self.config.repo_id {
154 Some(repo_id) => Ok(repo_id.clone()),
155 None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
156 .with_operation("Builder::build")
157 .with_context("service", Scheme::Huggingface)),
158 }?;
159 debug!("backend use repo_id: {}", &repo_id);
160
161 let revision = match &self.config.revision {
162 Some(revision) => revision.clone(),
163 None => "main".to_string(),
164 };
165 debug!("backend use revision: {}", &revision);
166
167 let root = normalize_root(&self.config.root.unwrap_or_default());
168 debug!("backend use root: {}", &root);
169
170 let token = self.config.token.as_ref().cloned();
171
172 Ok(HuggingfaceBackend {
173 core: Arc::new(HuggingfaceCore {
174 info: {
175 let am = AccessorInfo::default();
176 am.set_scheme(Scheme::Huggingface)
177 .set_native_capability(Capability {
178 stat: true,
179 stat_has_content_length: true,
180 stat_has_last_modified: true,
181
182 read: true,
183
184 list: true,
185 list_with_recursive: true,
186 list_has_content_length: true,
187 list_has_last_modified: true,
188
189 shared: true,
190
191 ..Default::default()
192 });
193 am.into()
194 },
195 repo_type,
196 repo_id,
197 revision,
198 root,
199 token,
200 }),
201 })
202 }
203}
204
205#[derive(Debug, Clone)]
207pub struct HuggingfaceBackend {
208 core: Arc<HuggingfaceCore>,
209}
210
211impl Access for HuggingfaceBackend {
212 type Reader = HttpBody;
213 type Writer = ();
214 type Lister = oio::PageLister<HuggingfaceLister>;
215 type Deleter = ();
216 type BlockingReader = ();
217 type BlockingWriter = ();
218 type BlockingLister = ();
219 type BlockingDeleter = ();
220
221 fn info(&self) -> Arc<AccessorInfo> {
222 self.core.info.clone()
223 }
224
225 async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
226 if path == "/" {
228 return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
229 }
230
231 let resp = self.core.hf_path_info(path).await?;
232
233 let status = resp.status();
234
235 match status {
236 StatusCode::OK => {
237 let mut meta = parse_into_metadata(path, resp.headers())?;
238 let bs = resp.into_body();
239
240 let decoded_response: Vec<HuggingfaceStatus> =
241 serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
242
243 if let Some(status) = decoded_response.first() {
245 if let Some(commit_info) = status.last_commit.as_ref() {
246 meta.set_last_modified(parse_datetime_from_rfc3339(
247 commit_info.date.as_str(),
248 )?);
249 }
250
251 meta.set_content_length(status.size);
252
253 match status.type_.as_str() {
254 "directory" => meta.set_mode(EntryMode::DIR),
255 "file" => meta.set_mode(EntryMode::FILE),
256 _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
257 };
258 } else {
259 return Err(Error::new(ErrorKind::NotFound, "path not found"));
260 }
261
262 Ok(RpStat::new(meta))
263 }
264 _ => Err(parse_error(resp)),
265 }
266 }
267
268 async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
269 let resp = self.core.hf_resolve(path, args.range(), &args).await?;
270
271 let status = resp.status();
272
273 match status {
274 StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
275 Ok((RpRead::default(), resp.into_body()))
276 }
277 _ => {
278 let (part, mut body) = resp.into_parts();
279 let buf = body.to_buffer().await?;
280 Err(parse_error(Response::from_parts(part, buf)))
281 }
282 }
283 }
284
285 async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
286 let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
287
288 Ok((RpList::default(), oio::PageLister::new(l)))
289 }
290}
291
292#[derive(Debug, Clone, Copy)]
295pub enum RepoType {
296 Model,
297 Dataset,
298}