opendal/services/huggingface/
backend.rs1use std::sync::Arc;
19
20use bytes::Buf;
21use http::Response;
22use http::StatusCode;
23use log::debug;
24
25use super::HUGGINGFACE_SCHEME;
26use super::config::HuggingfaceConfig;
27use super::core::HuggingfaceCore;
28use super::core::HuggingfaceStatus;
29use super::error::parse_error;
30use super::lister::HuggingfaceLister;
31use crate::raw::*;
32use crate::*;
33
34#[doc = include_str!("docs.md")]
36#[derive(Debug, Default)]
37pub struct HuggingfaceBuilder {
38 pub(super) config: HuggingfaceConfig,
39}
40
41impl HuggingfaceBuilder {
42 pub fn repo_type(mut self, repo_type: &str) -> Self {
51 if !repo_type.is_empty() {
52 self.config.repo_type = Some(repo_type.to_string());
53 }
54 self
55 }
56
57 pub fn repo_id(mut self, repo_id: &str) -> Self {
67 if !repo_id.is_empty() {
68 self.config.repo_id = Some(repo_id.to_string());
69 }
70 self
71 }
72
73 pub fn revision(mut self, revision: &str) -> Self {
81 if !revision.is_empty() {
82 self.config.revision = Some(revision.to_string());
83 }
84 self
85 }
86
87 pub fn root(mut self, root: &str) -> Self {
91 self.config.root = if root.is_empty() {
92 None
93 } else {
94 Some(root.to_string())
95 };
96
97 self
98 }
99
100 pub fn token(mut self, token: &str) -> Self {
104 if !token.is_empty() {
105 self.config.token = Some(token.to_string());
106 }
107 self
108 }
109
110 pub fn endpoint(mut self, endpoint: &str) -> Self {
115 if !endpoint.is_empty() {
116 self.config.endpoint = Some(endpoint.to_string());
117 }
118 self
119 }
120}
121
122impl Builder for HuggingfaceBuilder {
123 type Config = HuggingfaceConfig;
124
125 fn build(self) -> Result<impl Access> {
127 debug!("backend build started: {:?}", &self);
128
129 let repo_type = match self.config.repo_type.as_deref() {
130 Some("model") => Ok(RepoType::Model),
131 Some("dataset") => Ok(RepoType::Dataset),
132 Some("space") => Err(Error::new(
133 ErrorKind::ConfigInvalid,
134 "repo type \"space\" is unsupported",
135 )),
136 Some(repo_type) => Err(Error::new(
137 ErrorKind::ConfigInvalid,
138 format!("unknown repo_type: {repo_type}").as_str(),
139 )
140 .with_operation("Builder::build")
141 .with_context("service", HUGGINGFACE_SCHEME)),
142 None => Ok(RepoType::Model),
143 }?;
144 debug!("backend use repo_type: {:?}", &repo_type);
145
146 let repo_id = match &self.config.repo_id {
147 Some(repo_id) => Ok(repo_id.clone()),
148 None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
149 .with_operation("Builder::build")
150 .with_context("service", HUGGINGFACE_SCHEME)),
151 }?;
152 debug!("backend use repo_id: {}", &repo_id);
153
154 let revision = match &self.config.revision {
155 Some(revision) => revision.clone(),
156 None => "main".to_string(),
157 };
158 debug!("backend use revision: {}", &revision);
159
160 let root = normalize_root(&self.config.root.unwrap_or_default());
161 debug!("backend use root: {}", &root);
162
163 let token = self.config.token.as_ref().cloned();
164
165 let endpoint = match &self.config.endpoint {
166 Some(endpoint) => endpoint.clone(),
167 None => {
168 if let Ok(env_endpoint) = std::env::var("HF_ENDPOINT") {
171 env_endpoint
172 } else {
173 "https://huggingface.co".to_string()
174 }
175 }
176 };
177 debug!("backend use endpoint: {}", &endpoint);
178
179 Ok(HuggingfaceBackend {
180 core: Arc::new(HuggingfaceCore {
181 info: {
182 let am = AccessorInfo::default();
183 am.set_scheme(HUGGINGFACE_SCHEME)
184 .set_native_capability(Capability {
185 stat: true,
186 read: true,
187 list: true,
188 list_with_recursive: true,
189 shared: true,
190 ..Default::default()
191 });
192 am.into()
193 },
194 repo_type,
195 repo_id,
196 revision,
197 root,
198 token,
199 endpoint,
200 }),
201 })
202 }
203}
204
205#[derive(Debug, Clone)]
207pub struct HuggingfaceBackend {
208 core: Arc<HuggingfaceCore>,
209}
210
211impl Access for HuggingfaceBackend {
212 type Reader = HttpBody;
213 type Writer = ();
214 type Lister = oio::PageLister<HuggingfaceLister>;
215 type Deleter = ();
216
217 fn info(&self) -> Arc<AccessorInfo> {
218 self.core.info.clone()
219 }
220
221 async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
222 if path == "/" {
224 return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
225 }
226
227 let resp = self.core.hf_path_info(path).await?;
228
229 let status = resp.status();
230
231 match status {
232 StatusCode::OK => {
233 let mut meta = parse_into_metadata(path, resp.headers())?;
234 let bs = resp.into_body();
235
236 let decoded_response: Vec<HuggingfaceStatus> =
237 serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
238
239 if let Some(status) = decoded_response.first() {
241 if let Some(commit_info) = status.last_commit.as_ref() {
242 meta.set_last_modified(commit_info.date.parse::<Timestamp>()?);
243 }
244
245 meta.set_content_length(status.size);
246
247 match status.type_.as_str() {
248 "directory" => meta.set_mode(EntryMode::DIR),
249 "file" => meta.set_mode(EntryMode::FILE),
250 _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
251 };
252 } else {
253 return Err(Error::new(ErrorKind::NotFound, "path not found"));
254 }
255
256 Ok(RpStat::new(meta))
257 }
258 _ => Err(parse_error(resp)),
259 }
260 }
261
262 async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
263 let resp = self.core.hf_resolve(path, args.range(), &args).await?;
264
265 let status = resp.status();
266
267 match status {
268 StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
269 Ok((RpRead::default(), resp.into_body()))
270 }
271 _ => {
272 let (part, mut body) = resp.into_parts();
273 let buf = body.to_buffer().await?;
274 Err(parse_error(Response::from_parts(part, buf)))
275 }
276 }
277 }
278
279 async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
280 let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
281
282 Ok((RpList::default(), oio::PageLister::new(l)))
283 }
284}
285
286#[derive(Debug, Clone, Copy)]
289pub enum RepoType {
290 Model,
291 Dataset,
292}