opendal/services/huggingface/
backend.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::sync::Arc;
19
20use bytes::Buf;
21use http::Response;
22use http::StatusCode;
23use log::debug;
24
25use super::HUGGINGFACE_SCHEME;
26use super::config::HuggingfaceConfig;
27use super::core::HuggingfaceCore;
28use super::core::HuggingfaceStatus;
29use super::error::parse_error;
30use super::lister::HuggingfaceLister;
31use crate::raw::*;
32use crate::*;
33
34/// [Huggingface](https://huggingface.co/docs/huggingface_hub/package_reference/hf_api)'s API support.
35#[doc = include_str!("docs.md")]
36#[derive(Debug, Default)]
37pub struct HuggingfaceBuilder {
38    pub(super) config: HuggingfaceConfig,
39}
40
41impl HuggingfaceBuilder {
42    /// Set repo type of this backend. Default is model.
43    ///
44    /// Available values:
45    /// - model
46    /// - dataset
47    ///
48    /// Currently, only models and datasets are supported.
49    /// [Reference](https://huggingface.co/docs/hub/repositories)
50    pub fn repo_type(mut self, repo_type: &str) -> Self {
51        if !repo_type.is_empty() {
52            self.config.repo_type = Some(repo_type.to_string());
53        }
54        self
55    }
56
57    /// Set repo id of this backend. This is required.
58    ///
59    /// Repo id consists of the account name and the repository name.
60    ///
61    /// For example, model's repo id looks like:
62    /// - meta-llama/Llama-2-7b
63    ///
64    /// Dataset's repo id looks like:
65    /// - databricks/databricks-dolly-15k
66    pub fn repo_id(mut self, repo_id: &str) -> Self {
67        if !repo_id.is_empty() {
68            self.config.repo_id = Some(repo_id.to_string());
69        }
70        self
71    }
72
73    /// Set revision of this backend. Default is main.
74    ///
75    /// Revision can be a branch name or a commit hash.
76    ///
77    /// For example, revision can be:
78    /// - main
79    /// - 1d0c4eb
80    pub fn revision(mut self, revision: &str) -> Self {
81        if !revision.is_empty() {
82            self.config.revision = Some(revision.to_string());
83        }
84        self
85    }
86
87    /// Set root of this backend.
88    ///
89    /// All operations will happen under this root.
90    pub fn root(mut self, root: &str) -> Self {
91        self.config.root = if root.is_empty() {
92            None
93        } else {
94            Some(root.to_string())
95        };
96
97        self
98    }
99
100    /// Set the token of this backend.
101    ///
102    /// This is optional.
103    pub fn token(mut self, token: &str) -> Self {
104        if !token.is_empty() {
105            self.config.token = Some(token.to_string());
106        }
107        self
108    }
109}
110
111impl Builder for HuggingfaceBuilder {
112    type Config = HuggingfaceConfig;
113
114    /// Build a HuggingfaceBackend.
115    fn build(self) -> Result<impl Access> {
116        debug!("backend build started: {:?}", &self);
117
118        let repo_type = match self.config.repo_type.as_deref() {
119            Some("model") => Ok(RepoType::Model),
120            Some("dataset") => Ok(RepoType::Dataset),
121            Some("space") => Err(Error::new(
122                ErrorKind::ConfigInvalid,
123                "repo type \"space\" is unsupported",
124            )),
125            Some(repo_type) => Err(Error::new(
126                ErrorKind::ConfigInvalid,
127                format!("unknown repo_type: {repo_type}").as_str(),
128            )
129            .with_operation("Builder::build")
130            .with_context("service", HUGGINGFACE_SCHEME)),
131            None => Ok(RepoType::Model),
132        }?;
133        debug!("backend use repo_type: {:?}", &repo_type);
134
135        let repo_id = match &self.config.repo_id {
136            Some(repo_id) => Ok(repo_id.clone()),
137            None => Err(Error::new(ErrorKind::ConfigInvalid, "repo_id is empty")
138                .with_operation("Builder::build")
139                .with_context("service", HUGGINGFACE_SCHEME)),
140        }?;
141        debug!("backend use repo_id: {}", &repo_id);
142
143        let revision = match &self.config.revision {
144            Some(revision) => revision.clone(),
145            None => "main".to_string(),
146        };
147        debug!("backend use revision: {}", &revision);
148
149        let root = normalize_root(&self.config.root.unwrap_or_default());
150        debug!("backend use root: {}", &root);
151
152        let token = self.config.token.as_ref().cloned();
153
154        Ok(HuggingfaceBackend {
155            core: Arc::new(HuggingfaceCore {
156                info: {
157                    let am = AccessorInfo::default();
158                    am.set_scheme(HUGGINGFACE_SCHEME)
159                        .set_native_capability(Capability {
160                            stat: true,
161
162                            read: true,
163
164                            list: true,
165                            list_with_recursive: true,
166
167                            shared: true,
168
169                            ..Default::default()
170                        });
171                    am.into()
172                },
173                repo_type,
174                repo_id,
175                revision,
176                root,
177                token,
178            }),
179        })
180    }
181}
182
183/// Backend for Huggingface service
184#[derive(Debug, Clone)]
185pub struct HuggingfaceBackend {
186    core: Arc<HuggingfaceCore>,
187}
188
189impl Access for HuggingfaceBackend {
190    type Reader = HttpBody;
191    type Writer = ();
192    type Lister = oio::PageLister<HuggingfaceLister>;
193    type Deleter = ();
194
195    fn info(&self) -> Arc<AccessorInfo> {
196        self.core.info.clone()
197    }
198
199    async fn stat(&self, path: &str, _: OpStat) -> Result<RpStat> {
200        // Stat root always returns a DIR.
201        if path == "/" {
202            return Ok(RpStat::new(Metadata::new(EntryMode::DIR)));
203        }
204
205        let resp = self.core.hf_path_info(path).await?;
206
207        let status = resp.status();
208
209        match status {
210            StatusCode::OK => {
211                let mut meta = parse_into_metadata(path, resp.headers())?;
212                let bs = resp.into_body();
213
214                let decoded_response: Vec<HuggingfaceStatus> =
215                    serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
216
217                // NOTE: if the file is not found, the server will return 200 with an empty array
218                if let Some(status) = decoded_response.first() {
219                    if let Some(commit_info) = status.last_commit.as_ref() {
220                        meta.set_last_modified(commit_info.date.parse::<Timestamp>()?);
221                    }
222
223                    meta.set_content_length(status.size);
224
225                    match status.type_.as_str() {
226                        "directory" => meta.set_mode(EntryMode::DIR),
227                        "file" => meta.set_mode(EntryMode::FILE),
228                        _ => return Err(Error::new(ErrorKind::Unexpected, "unknown status type")),
229                    };
230                } else {
231                    return Err(Error::new(ErrorKind::NotFound, "path not found"));
232                }
233
234                Ok(RpStat::new(meta))
235            }
236            _ => Err(parse_error(resp)),
237        }
238    }
239
240    async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
241        let resp = self.core.hf_resolve(path, args.range(), &args).await?;
242
243        let status = resp.status();
244
245        match status {
246            StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
247                Ok((RpRead::default(), resp.into_body()))
248            }
249            _ => {
250                let (part, mut body) = resp.into_parts();
251                let buf = body.to_buffer().await?;
252                Err(parse_error(Response::from_parts(part, buf)))
253            }
254        }
255    }
256
257    async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
258        let l = HuggingfaceLister::new(self.core.clone(), path.to_string(), args.recursive());
259
260        Ok((RpList::default(), oio::PageLister::new(l)))
261    }
262}
263
264/// Repository type of Huggingface. Currently, we only support `model` and `dataset`.
265/// [Reference](https://huggingface.co/docs/hub/repositories)
266#[derive(Debug, Clone, Copy)]
267pub enum RepoType {
268    Model,
269    Dataset,
270}