opendal_core/services/ipfs/
core.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::Debug;
19use std::sync::Arc;
20
21use http::Request;
22use http::Response;
23use http::StatusCode;
24
25use super::error::parse_error;
26use crate::raw::*;
27use crate::*;
28
29pub struct IpfsCore {
30    pub info: Arc<AccessorInfo>,
31    pub endpoint: String,
32    pub root: String,
33}
34
35impl Debug for IpfsCore {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        f.debug_struct("IpfsCore")
38            .field("endpoint", &self.endpoint)
39            .field("root", &self.root)
40            .finish_non_exhaustive()
41    }
42}
43
44impl IpfsCore {
45    pub async fn ipfs_get(&self, path: &str, range: BytesRange) -> Result<Response<HttpBody>> {
46        let p = build_rooted_abs_path(&self.root, path);
47
48        let url = format!("{}{}", self.endpoint, percent_encode_path(&p));
49
50        let mut req = Request::get(&url);
51
52        if !range.is_full() {
53            req = req.header(http::header::RANGE, range.to_header());
54        }
55
56        let req = req.body(Buffer::new()).map_err(new_request_build_error)?;
57
58        self.info.http_client().fetch(req).await
59    }
60
61    pub async fn ipfs_head(&self, path: &str) -> Result<Response<Buffer>> {
62        let p = build_rooted_abs_path(&self.root, path);
63
64        let url = format!("{}{}", self.endpoint, percent_encode_path(&p));
65
66        let req = Request::head(&url);
67
68        let req = req.body(Buffer::new()).map_err(new_request_build_error)?;
69
70        self.info.http_client().send(req).await
71    }
72
73    pub async fn ipfs_list(&self, path: &str) -> Result<Response<Buffer>> {
74        let p = build_rooted_abs_path(&self.root, path);
75
76        let url = format!("{}{}", self.endpoint, percent_encode_path(&p));
77
78        let mut req = Request::get(&url);
79
80        // Use "application/vnd.ipld.raw" to disable IPLD codec deserialization
81        // OpenDAL will parse ipld data directly.
82        //
83        // ref: https://github.com/ipfs/specs/blob/main/http-gateways/PATH_GATEWAY.md
84        req = req.header(http::header::ACCEPT, "application/vnd.ipld.raw");
85
86        let req = req.body(Buffer::new()).map_err(new_request_build_error)?;
87
88        self.info.http_client().send(req).await
89    }
90
91    /// IPFS's stat behavior highly depends on its implementation.
92    ///
93    /// Based on IPFS [Path Gateway Specification](https://github.com/ipfs/specs/blob/main/http-gateways/PATH_GATEWAY.md),
94    /// response payload could be:
95    ///
96    /// > - UnixFS (implicit default)
97    /// >   - File
98    /// >     - Bytes representing file contents
99    /// >   - Directory
100    /// >     - Generated HTML with directory index
101    /// >     - When `index.html` is present, gateway can skip generating directory index and return it instead
102    /// > - Raw block (not this case)
103    /// > - CAR (not this case)
104    ///
105    /// When we HEAD a given path, we could have the following responses:
106    ///
107    /// - File
108    ///
109    /// ```http
110    /// :) curl -I https://ipfs.io/ipfs/QmPpCt1aYGb9JWJRmXRUnmJtVgeFFTJGzWFYEEX7bo9zGJ/normal_file
111    /// HTTP/1.1 200 Connection established
112    ///
113    /// HTTP/2 200
114    /// server: openresty
115    /// date: Thu, 08 Sep 2022 00:48:50 GMT
116    /// content-type: application/octet-stream
117    /// content-length: 262144
118    /// access-control-allow-methods: GET
119    /// cache-control: public, max-age=29030400, immutable
120    /// etag: "QmdP6teFTLSNVhT4W5jkhEuUBsjQ3xkp1GmRvDU6937Me1"
121    /// x-ipfs-gateway-host: ipfs-bank11-fr2
122    /// x-ipfs-path: /ipfs/QmPpCt1aYGb9JWJRmXRUnmJtVgeFFTJGzWFYEEX7bo9zGJ/normal_file
123    /// x-ipfs-roots: QmPpCt1aYGb9JWJRmXRUnmJtVgeFFTJGzWFYEEX7bo9zGJ,QmdP6teFTLSNVhT4W5jkhEuUBsjQ3xkp1GmRvDU6937Me1
124    /// x-ipfs-pop: ipfs-bank11-fr2
125    /// timing-allow-origin: *
126    /// x-ipfs-datasize: 262144
127    /// access-control-allow-origin: *
128    /// access-control-allow-methods: GET, POST, OPTIONS
129    /// access-control-allow-headers: X-Requested-With, Range, Content-Range, X-Chunked-Output, X-Stream-Output
130    /// access-control-expose-headers: Content-Range, X-Chunked-Output, X-Stream-Output
131    /// x-ipfs-lb-pop: gateway-bank1-fr2
132    /// strict-transport-security: max-age=31536000; includeSubDomains; preload
133    /// x-proxy-cache: MISS
134    /// accept-ranges: bytes
135    /// ```
136    ///
137    /// - Dir with generated index
138    ///
139    /// ```http
140    /// :( curl -I https://ipfs.io/ipfs/QmPpCt1aYGb9JWJRmXRUnmJtVgeFFTJGzWFYEEX7bo9zGJ/normal_dir
141    /// HTTP/1.1 200 Connection established
142    ///
143    /// HTTP/2 200
144    /// server: openresty
145    /// date: Wed, 07 Sep 2022 08:46:13 GMT
146    /// content-type: text/html
147    /// vary: Accept-Encoding
148    /// access-control-allow-methods: GET
149    /// etag: "DirIndex-2b567f6r5vvdg_CID-QmY44DyCDymRN1Qy7sGbupz1ysMkXTWomAQku5vBg7fRQW"
150    /// x-ipfs-gateway-host: ipfs-bank6-sg1
151    /// x-ipfs-path: /ipfs/QmPpCt1aYGb9JWJRmXRUnmJtVgeFFTJGzWFYEEX7bo9zGJ/normal_dir
152    /// x-ipfs-roots: QmPpCt1aYGb9JWJRmXRUnmJtVgeFFTJGzWFYEEX7bo9zGJ,QmY44DyCDymRN1Qy7sGbupz1ysMkXTWomAQku5vBg7fRQW
153    /// x-ipfs-pop: ipfs-bank6-sg1
154    /// timing-allow-origin: *
155    /// access-control-allow-origin: *
156    /// access-control-allow-methods: GET, POST, OPTIONS
157    /// access-control-allow-headers: X-Requested-With, Range, Content-Range, X-Chunked-Output, X-Stream-Output
158    /// access-control-expose-headers: Content-Range, X-Chunked-Output, X-Stream-Output
159    /// x-ipfs-lb-pop: gateway-bank3-sg1
160    /// strict-transport-security: max-age=31536000; includeSubDomains; preload
161    /// x-proxy-cache: MISS
162    /// ```
163    ///
164    /// - Dir with index.html
165    ///
166    /// ```http
167    /// :) curl -I http://127.0.0.1:8080/ipfs/QmVturFGV3z4WsP7cRV8Ci4avCdGWYXk2qBKvtAwFUp5Az
168    /// HTTP/1.1 302 Found
169    /// Access-Control-Allow-Headers: Content-Type
170    /// Access-Control-Allow-Headers: Range
171    /// Access-Control-Allow-Headers: User-Agent
172    /// Access-Control-Allow-Headers: X-Requested-With
173    /// Access-Control-Allow-Methods: GET
174    /// Access-Control-Allow-Origin: *
175    /// Access-Control-Expose-Headers: Content-Length
176    /// Access-Control-Expose-Headers: Content-Range
177    /// Access-Control-Expose-Headers: X-Chunked-Output
178    /// Access-Control-Expose-Headers: X-Ipfs-Path
179    /// Access-Control-Expose-Headers: X-Ipfs-Roots
180    /// Access-Control-Expose-Headers: X-Stream-Output
181    /// Content-Type: text/html; charset=utf-8
182    /// Location: /ipfs/QmVturFGV3z4WsP7cRV8Ci4avCdGWYXk2qBKvtAwFUp5Az/
183    /// X-Ipfs-Path: /ipfs/QmVturFGV3z4WsP7cRV8Ci4avCdGWYXk2qBKvtAwFUp5Az
184    /// X-Ipfs-Roots: QmVturFGV3z4WsP7cRV8Ci4avCdGWYXk2qBKvtAwFUp5Az
185    /// Date: Thu, 08 Sep 2022 00:52:29 GMT
186    /// ```
187    ///
188    /// In conclusion:
189    ///
190    /// - HTTP Status Code == 302 => directory
191    /// - HTTP Status Code == 200 && ETag starts with `"DirIndex` => directory
192    /// - HTTP Status Code == 200 && ETag not starts with `"DirIndex` => file
193    pub async fn ipfs_stat(&self, path: &str) -> Result<Metadata> {
194        // Stat root always returns a DIR.
195        if path == "/" {
196            return Ok(Metadata::new(EntryMode::DIR));
197        }
198
199        let resp = self.ipfs_head(path).await?;
200        let status = resp.status();
201
202        match status {
203            StatusCode::OK => {
204                let mut m = Metadata::new(EntryMode::Unknown);
205
206                if let Some(v) = parse_content_length(resp.headers())? {
207                    m.set_content_length(v);
208                }
209
210                if let Some(v) = parse_content_type(resp.headers())? {
211                    m.set_content_type(v);
212                }
213
214                if let Some(v) = parse_etag(resp.headers())? {
215                    m.set_etag(v);
216
217                    if v.starts_with("\"DirIndex") {
218                        m.set_mode(EntryMode::DIR);
219                    } else {
220                        m.set_mode(EntryMode::FILE);
221                    }
222                } else {
223                    // Some service will stream the output of DirIndex.
224                    // If we don't have an etag, it's highly to be a dir.
225                    m.set_mode(EntryMode::DIR);
226                }
227
228                if let Some(v) = parse_content_disposition(resp.headers())? {
229                    m.set_content_disposition(v);
230                }
231
232                Ok(m)
233            }
234            StatusCode::FOUND | StatusCode::MOVED_PERMANENTLY => Ok(Metadata::new(EntryMode::DIR)),
235            _ => Err(parse_error(resp)),
236        }
237    }
238}