opendal_core/services/huggingface/
config.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::Debug;
19
20use serde::Deserialize;
21use serde::Serialize;
22
23use super::HUGGINGFACE_SCHEME;
24use super::backend::HuggingfaceBuilder;
25
26/// Configuration for Huggingface service support.
27#[derive(Default, Serialize, Deserialize, Clone, PartialEq, Eq)]
28#[serde(default)]
29#[non_exhaustive]
30pub struct HuggingfaceConfig {
31    /// Repo type of this backend. Default is model.
32    ///
33    /// Available values:
34    /// - model
35    /// - dataset
36    /// - datasets (alias for dataset)
37    pub repo_type: Option<String>,
38    /// Repo id of this backend.
39    ///
40    /// This is required.
41    pub repo_id: Option<String>,
42    /// Revision of this backend.
43    ///
44    /// Default is main.
45    pub revision: Option<String>,
46    /// Root of this backend. Can be "/path/to/dir".
47    ///
48    /// Default is "/".
49    pub root: Option<String>,
50    /// Token of this backend.
51    ///
52    /// This is optional.
53    pub token: Option<String>,
54    /// Endpoint of the Huggingface Hub.
55    ///
56    /// Default is "https://huggingface.co".
57    pub endpoint: Option<String>,
58}
59
60impl Debug for HuggingfaceConfig {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        f.debug_struct("HuggingfaceConfig")
63            .field("repo_type", &self.repo_type)
64            .field("repo_id", &self.repo_id)
65            .field("revision", &self.revision)
66            .field("root", &self.root)
67            .finish_non_exhaustive()
68    }
69}
70
71impl crate::Configurator for HuggingfaceConfig {
72    type Builder = HuggingfaceBuilder;
73
74    fn from_uri(uri: &crate::types::OperatorUri) -> crate::Result<Self> {
75        let mut map = uri.options().clone();
76        map.retain(|_, v| !v.is_empty());
77
78        if let Some(repo_type) = uri.name() {
79            if !repo_type.is_empty() {
80                map.insert("repo_type".to_string(), repo_type.to_string());
81            }
82        }
83
84        if let Some(raw_path) = uri.root() {
85            let parts: Vec<_> = raw_path.split('/').filter(|s| !s.is_empty()).collect();
86
87            if parts.len() >= 2 {
88                map.insert("repo_id".to_string(), format!("{}/{}", parts[0], parts[1]));
89
90                if parts.len() >= 3 {
91                    if map.contains_key("revision") {
92                        let root_value = parts[2..].join("/");
93                        if !root_value.is_empty() {
94                            map.insert("root".to_string(), root_value);
95                        }
96                    } else {
97                        map.insert("revision".to_string(), parts[2].to_string());
98                        if parts.len() > 3 {
99                            let root_value = parts[3..].join("/");
100                            if !root_value.is_empty() {
101                                map.insert("root".to_string(), root_value);
102                            }
103                        }
104                    }
105                }
106            } else if parts.is_empty() {
107                // no owner/repo provided, fall back to options-only
108            } else {
109                return Err(crate::Error::new(
110                    crate::ErrorKind::ConfigInvalid,
111                    "repository owner and name are required in uri path",
112                )
113                .with_context("service", HUGGINGFACE_SCHEME));
114            }
115        }
116
117        if !map.contains_key("repo_id") {
118            return Err(crate::Error::new(
119                crate::ErrorKind::ConfigInvalid,
120                "repo_id is required via uri path or option",
121            )
122            .with_context("service", HUGGINGFACE_SCHEME));
123        }
124
125        Self::from_iter(map)
126    }
127
128    fn into_builder(self) -> Self::Builder {
129        HuggingfaceBuilder { config: self }
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use crate::Configurator;
137    use crate::types::OperatorUri;
138
139    #[test]
140    fn from_uri_sets_repo_type_id_and_revision() {
141        let uri = OperatorUri::new(
142            "huggingface://model/opendal/sample/main/dataset",
143            Vec::<(String, String)>::new(),
144        )
145        .unwrap();
146
147        let cfg = HuggingfaceConfig::from_uri(&uri).unwrap();
148        assert_eq!(cfg.repo_type.as_deref(), Some("model"));
149        assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample"));
150        assert_eq!(cfg.revision.as_deref(), Some("main"));
151        assert_eq!(cfg.root.as_deref(), Some("dataset"));
152    }
153
154    #[test]
155    fn from_uri_uses_existing_revision_and_sets_root() {
156        let uri = OperatorUri::new(
157            "huggingface://dataset/opendal/sample/data/train",
158            vec![("revision".to_string(), "dev".to_string())],
159        )
160        .unwrap();
161
162        let cfg = HuggingfaceConfig::from_uri(&uri).unwrap();
163        assert_eq!(cfg.repo_type.as_deref(), Some("dataset"));
164        assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample"));
165        assert_eq!(cfg.revision.as_deref(), Some("dev"));
166        assert_eq!(cfg.root.as_deref(), Some("data/train"));
167    }
168
169    #[test]
170    fn from_uri_allows_options_only() {
171        let uri = OperatorUri::new(
172            "huggingface",
173            vec![
174                ("repo_type".to_string(), "model".to_string()),
175                ("repo_id".to_string(), "opendal/sample".to_string()),
176                ("revision".to_string(), "main".to_string()),
177                ("root".to_string(), "".to_string()),
178            ],
179        )
180        .unwrap();
181
182        let cfg = HuggingfaceConfig::from_uri(&uri).unwrap();
183        assert_eq!(cfg.repo_type.as_deref(), Some("model"));
184        assert_eq!(cfg.repo_id.as_deref(), Some("opendal/sample"));
185        assert_eq!(cfg.revision.as_deref(), Some("main"));
186        assert!(cfg.root.is_none());
187    }
188
189    #[test]
190    fn from_uri_requires_owner_and_repo() {
191        let uri = OperatorUri::new(
192            "huggingface://model/opendal",
193            Vec::<(String, String)>::new(),
194        )
195        .unwrap();
196
197        assert!(HuggingfaceConfig::from_uri(&uri).is_err());
198    }
199}