opendal/services/huggingface/
backend.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::Debug;
19use std::fmt::Formatter;
20use std::sync::Arc;
21
22use bytes::Buf;
23use http::Response;
24use http::StatusCode;
25use log::debug;
26
27use super::core::HuggingfaceCore;
28use super::core::HuggingfaceStatus;
29use super::error::parse_error;
30use super::lister::HuggingfaceLister;
31use crate::raw::*;
32use crate::services::HuggingfaceConfig;
33use crate::*;
34
35impl Configurator for HuggingfaceConfig {
36    type Builder = HuggingfaceBuilder;
37    fn into_builder(self) -> Self::Builder {
38        HuggingfaceBuilder { config: self }
39    }
40}
41
42/// [Huggingface](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api)'s API support.
43#[doc = include_str!("docs.md")]
44#[derive(Default, Clone)]
45pub struct HuggingfaceBuilder {
46    config: HuggingfaceConfig,
47}
48
49impl Debug for HuggingfaceBuilder {
50    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51        let mut ds = f.debug_struct("Builder");
52
53        ds.field("config", &self.config);
54        ds.finish()
55    }
56}
57
58impl HuggingfaceBuilder {
59    /// Set repo type of this backend. Default is model.
60    ///
61    /// Available values:
62    /// - model
63    /// - dataset
64    ///
65    /// Currently, only models and datasets are supported.
66    /// [Reference](https://huggingface.co/docs/hub/repositories)
67    pub fn repo_type(mut self, repo_type: &str) -> Self {
68        if !repo_type.is_empty() {
69            self.config.repo_type = Some(repo_type.to_string());
70        }
71        self
72    }
73
74    /// Set repo id of this backend. This is required.
75    ///
76    /// Repo id consists of the account name and the repository name.
77    ///
78    /// For example, model's repo id looks like:
79    /// - meta-llama/Llama-2-7b
80    ///
81    /// Dataset's repo id looks like:
82    /// - databricks/databricks-dolly-15k
83    pub fn repo_id(mut self, repo_id: &str) -> Self {
84        if !repo_id.is_empty() {
85            self.config.repo_id = Some(repo_id.to_string());
86        }
87        self
88    }
89
90    /// Set revision of this backend. Default is main.
91    ///
92    /// Revision can be a branch name or a commit hash.
93    ///
94    /// For example, revision can be:
95    /// - main
96    /// - 1d0c4eb
97    pub fn revision(mut self, revision: &str) -> Self {
98        if !revision.is_empty() {
99            self.config.revision = Some(revision.to_string());
100        }
101        self
102    }
103
104    /// Set root of this backend.
105    ///
106    /// All operations will happen under this root.
107    pub fn root(mut self, root: &str) -> Self {
108        self.config.root = if root.is_empty() {
109            None
110        } else {
111            Some(root.to_string())
112        };
113
114        self
115    }
116
117    /// Set the token of this backend.
118    ///
119    /// This is optional.
120    pub fn token(mut self, token: &str) -> Self {
121        if !token.is_empty() {
122            self.config.token = Some(token.to_string());
123        }
124        self
125    }
126}
127
128impl Builder for HuggingfaceBuilder {
129    const SCHEME: Scheme = Scheme::Huggingface;
130    type Config = HuggingfaceConfig;
131
132    /// Build a HuggingfaceBackend.
133    fn build(self) -> Result<impl Access> {
134        debug!("backend build started: {:?}", &self);
135
136        let repo_type = match self.config.repo_type.as_deref() {
137            Some("model") => Ok(RepoType::Model),
138            Some("dataset") => Ok(RepoType::Dataset),
139            Some("space") => Err(Error::new(
140                ErrorKind::ConfigInvalid,
141                "repo type \"space\" is unsupported",
142            )),
143            Some(repo_type) => Err(Error::new(
144                ErrorKind::ConfigInvalid,
145                format!("unknown repo_type: {}", repo_type).as_str(),
146            )
147            .with_operation("Builder::build")
148            .with_context("service", Scheme::Huggingface)),
149            None => Ok(RepoType::Model),
150        }?;
151        debug!("backend use repo_type: {:?}", &repo_type);
152
153        let repo_id = match &self.config.repo_id {
154            Some(repo_id) => Ok(repo_id.clone()),
155            None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
156                .with_operation("Builder::build")
157                .with_context("service", Scheme::Huggingface)),
158        }?;
159        debug!("backend use repo_id: {}", &repo_id);
160
161        let revision = match &self.config.revision {
162            Some(revision) => revision.clone(),
163            None => "main".to_string(),
164        };
165        debug!("backend use revision: {}", &revision);
166
167        let root = normalize_root(&self.config.root.unwrap_or_default());
168        debug!("backend use root: {}", &root);
169
170        let token = self.config.token.as_ref().cloned();
171
172        Ok(HuggingfaceBackend {
173            core: Arc::new(HuggingfaceCore {
174                info: {
175                    let am = AccessorInfo::default();
176                    am.set_scheme(Scheme::Huggingface)
177                        .set_native_capability(Capability {
178                            stat: true,
179                            stat_has_content_length: true,
180                            stat_has_last_modified: true,
181
182                            read: true,
183
184                            list: true,
185                            list_with_recursive: true,
186                            list_has_content_length: true,
187                            list_has_last_modified: true,
188
189                            shared: true,
190
191                            ..Default::default()
192                        });
193                    am.into()
194                },
195                repo_type,
196                repo_id,
197                revision,
198                root,
199                token,
200            }),
201        })
202    }
203}
204
205/// Backend for Huggingface service
206#[derive(Debug, Clone)]
207pub struct HuggingfaceBackend {
208    core: Arc<HuggingfaceCore>,
209}
210
211impl Access for HuggingfaceBackend {
212    type Reader = HttpBody;
213    type Writer = ();
214    type Lister = oio::PageLister<HuggingfaceLister>;
215    type Deleter = ();
216    type BlockingReader = ();
217    type BlockingWriter = ();
218    type BlockingLister = ();
219    type BlockingDeleter = ();
220
221    fn info(&self) -> Arc<AccessorInfo> {
222        self.core.info.clone()
223    }
224
225    async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
226        // Stat root always returns a DIR.
227        if path == "/" {
228            return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
229        }
230
231        let resp = self.core.hf_path_info(path).await?;
232
233        let status = resp.status();
234
235        match status {
236            StatusCode::OK => {
237                let mut meta = parse_into_metadata(path, resp.headers())?;
238                let bs = resp.into_body();
239
240                let decoded_response: Vec<HuggingfaceStatus> =
241                    serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
242
243                // NOTE: if the file is not found, the server will return 200 with an empty array
244                if let Some(status) = decoded_response.first() {
245                    if let Some(commit_info) = status.last_commit.as_ref() {
246                        meta.set_last_modified(parse_datetime_from_rfc3339(
247                            commit_info.date.as_str(),
248                        )?);
249                    }
250
251                    meta.set_content_length(status.size);
252
253                    match status.type_.as_str() {
254                        "directory" => meta.set_mode(EntryMode::DIR),
255                        "file" => meta.set_mode(EntryMode::FILE),
256                        _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
257                    };
258                } else {
259                    return Err(Error::new(ErrorKind::NotFound, "path not found"));
260                }
261
262                Ok(RpStat::new(meta))
263            }
264            _ => Err(parse_error(resp)),
265        }
266    }
267
268    async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
269        let resp = self.core.hf_resolve(path, args.range(), &args).await?;
270
271        let status = resp.status();
272
273        match status {
274            StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
275                Ok((RpRead::default(), resp.into_body()))
276            }
277            _ => {
278                let (part, mut body) = resp.into_parts();
279                let buf = body.to_buffer().await?;
280                Err(parse_error(Response::from_parts(part, buf)))
281            }
282        }
283    }
284
285    async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
286        let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
287
288        Ok((RpList::default(), oio::PageLister::new(l)))
289    }
290}
291
292/// Repository type of Huggingface. Currently, we only support `model` and `dataset`.
293/// [Reference](https://huggingface.co/docs/hub/repositories)
294#[derive(Debug, Clone, Copy)]
295pub enum RepoType {
296    Model,
297    Dataset,
298}