opendal/services/webhdfs/
core.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::Debug;
19use std::fmt::Formatter;
20use std::sync::Arc;
21
22use bytes::Buf;
23use http::header::CONTENT_LENGTH;
24use http::header::CONTENT_TYPE;
25use http::Request;
26use http::Response;
27use http::StatusCode;
28use serde::Deserialize;
29use tokio::sync::OnceCell;
30
31use super::error::parse_error;
32use crate::raw::*;
33use crate::*;
34
35pub struct WebhdfsCore {
36    pub info: Arc<AccessorInfo>,
37    pub root: String,
38    pub endpoint: String,
39    pub user_name: Option<String>,
40    pub auth: Option<String>,
41    pub root_checker: OnceCell<()>,
42
43    pub atomic_write_dir: Option<String>,
44    pub disable_list_batch: bool,
45}
46
47impl Debug for WebhdfsCore {
48    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
49        f.debug_struct("WebhdfsCore")
50            .field("root", &self.root)
51            .field("endpoint", &self.endpoint)
52            .finish()
53    }
54}
55
56impl WebhdfsCore {
57    pub fn webhdfs_create_dir_request(&self, path: &str) -> Result<Request<Buffer>> {
58        let p = build_abs_path(&self.root, path);
59
60        let mut url = format!(
61            "{}/webhdfs/v1/{}?op=MKDIRS&overwrite=true&noredirect=true",
62            self.endpoint,
63            percent_encode_path(&p),
64        );
65        if let Some(user) = &self.user_name {
66            url += format!("&user.name={user}").as_str();
67        }
68        if let Some(auth) = &self.auth {
69            url += format!("&{auth}").as_str();
70        }
71
72        let req = Request::put(&url);
73
74        req.body(Buffer::new()).map_err(new_request_build_error)
75    }
76
77    /// create object
78    pub async fn webhdfs_create_object_request(
79        &self,
80        path: &str,
81        size: Option<u64>,
82        args: &OpWrite,
83        body: Buffer,
84    ) -> Result<Request<Buffer>> {
85        let p = build_abs_path(&self.root, path);
86
87        let mut url = format!(
88            "{}/webhdfs/v1/{}?op=CREATE&overwrite=true&noredirect=true",
89            self.endpoint,
90            percent_encode_path(&p),
91        );
92        if let Some(user) = &self.user_name {
93            url += format!("&user.name={user}").as_str();
94        }
95        if let Some(auth) = &self.auth {
96            url += format!("&{auth}").as_str();
97        }
98
99        let req = Request::put(&url);
100
101        let req = req.body(Buffer::new()).map_err(new_request_build_error)?;
102
103        let resp = self.info.http_client().send(req).await?;
104
105        let status = resp.status();
106
107        if status != StatusCode::CREATED && status != StatusCode::OK {
108            return Err(parse_error(resp));
109        }
110
111        let bs = resp.into_body();
112
113        let resp: LocationResponse =
114            serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
115
116        let mut req = Request::put(&resp.location);
117
118        if let Some(size) = size {
119            req = req.header(CONTENT_LENGTH, size);
120        };
121
122        if let Some(content_type) = args.content_type() {
123            req = req.header(CONTENT_TYPE, content_type);
124        };
125        let req = req.body(body).map_err(new_request_build_error)?;
126
127        Ok(req)
128    }
129
130    pub async fn webhdfs_init_append_request(&self, path: &str) -> Result<String> {
131        let p = build_abs_path(&self.root, path);
132        let mut url = format!(
133            "{}/webhdfs/v1/{}?op=APPEND&noredirect=true",
134            self.endpoint,
135            percent_encode_path(&p),
136        );
137        if let Some(user) = &self.user_name {
138            url += format!("&user.name={user}").as_str();
139        }
140        if let Some(auth) = &self.auth {
141            url += &format!("&{auth}");
142        }
143
144        let req = Request::post(url)
145            .body(Buffer::new())
146            .map_err(new_request_build_error)?;
147
148        let resp = self.info.http_client().send(req).await?;
149
150        let status = resp.status();
151
152        match status {
153            StatusCode::OK => {
154                let bs = resp.into_body();
155                let resp: LocationResponse =
156                    serde_json::from_reader(bs.reader()).map_err(new_json_deserialize_error)?;
157
158                Ok(resp.location)
159            }
160            _ => Err(parse_error(resp)),
161        }
162    }
163
164    pub async fn webhdfs_rename_object(&self, from: &str, to: &str) -> Result<Response<Buffer>> {
165        let from = build_abs_path(&self.root, from);
166        let to = build_rooted_abs_path(&self.root, to);
167
168        let mut url = format!(
169            "{}/webhdfs/v1/{}?op=RENAME&destination={}",
170            self.endpoint,
171            percent_encode_path(&from),
172            percent_encode_path(&to)
173        );
174        if let Some(user) = &self.user_name {
175            url += format!("&user.name={user}").as_str();
176        }
177        if let Some(auth) = &self.auth {
178            url += &format!("&{auth}");
179        }
180
181        let req = Request::put(&url)
182            .body(Buffer::new())
183            .map_err(new_request_build_error)?;
184
185        self.info.http_client().send(req).await
186    }
187
188    pub fn webhdfs_append_request(
189        &self,
190        location: &str,
191        size: u64,
192        body: Buffer,
193    ) -> Result<Request<Buffer>> {
194        let mut url = location.to_string();
195        if let Some(user) = &self.user_name {
196            url += format!("&user.name={user}").as_str();
197        }
198        if let Some(auth) = &self.auth {
199            url += &format!("&{auth}");
200        }
201
202        let mut req = Request::post(&url);
203
204        req = req.header(CONTENT_LENGTH, size.to_string());
205
206        req.body(body).map_err(new_request_build_error)
207    }
208
209    /// CONCAT will concat sources to the path
210    pub fn webhdfs_concat_request(
211        &self,
212        path: &str,
213        sources: Vec<String>,
214    ) -> Result<Request<Buffer>> {
215        let p = build_abs_path(&self.root, path);
216
217        let sources = sources
218            .iter()
219            .map(|p| build_rooted_abs_path(&self.root, p))
220            .collect::<Vec<String>>()
221            .join(",");
222
223        let mut url = format!(
224            "{}/webhdfs/v1/{}?op=CONCAT&sources={}",
225            self.endpoint,
226            percent_encode_path(&p),
227            percent_encode_path(&sources),
228        );
229        if let Some(user) = &self.user_name {
230            url += format!("&user.name={user}").as_str();
231        }
232        if let Some(auth) = &self.auth {
233            url += &format!("&{auth}");
234        }
235
236        let req = Request::post(url);
237
238        req.body(Buffer::new()).map_err(new_request_build_error)
239    }
240
241    fn webhdfs_open_request(&self, path: &str, range: &BytesRange) -> Result<Request<Buffer>> {
242        let p = build_abs_path(&self.root, path);
243        let mut url = format!(
244            "{}/webhdfs/v1/{}?op=OPEN",
245            self.endpoint,
246            percent_encode_path(&p),
247        );
248        if let Some(user) = &self.user_name {
249            url += format!("&user.name={user}").as_str();
250        }
251        if let Some(auth) = &self.auth {
252            url += &format!("&{auth}");
253        }
254
255        if !range.is_full() {
256            url += &format!("&offset={}", range.offset());
257            if let Some(size) = range.size() {
258                url += &format!("&length={size}")
259            }
260        }
261
262        let req = Request::get(&url)
263            .body(Buffer::new())
264            .map_err(new_request_build_error)?;
265
266        Ok(req)
267    }
268
269    pub async fn webhdfs_list_status_request(&self, path: &str) -> Result<Response<Buffer>> {
270        let p = build_abs_path(&self.root, path);
271        let mut url = format!(
272            "{}/webhdfs/v1/{}?op=LISTSTATUS",
273            self.endpoint,
274            percent_encode_path(&p),
275        );
276        if let Some(user) = &self.user_name {
277            url += format!("&user.name={user}").as_str();
278        }
279        if let Some(auth) = &self.auth {
280            url += format!("&{auth}").as_str();
281        }
282
283        let req = Request::get(&url)
284            .body(Buffer::new())
285            .map_err(new_request_build_error)?;
286        self.info.http_client().send(req).await
287    }
288
289    pub async fn webhdfs_list_status_batch_request(
290        &self,
291        path: &str,
292        start_after: &str,
293    ) -> Result<Response<Buffer>> {
294        let p = build_abs_path(&self.root, path);
295
296        let mut url = format!(
297            "{}/webhdfs/v1/{}?op=LISTSTATUS_BATCH",
298            self.endpoint,
299            percent_encode_path(&p),
300        );
301        if !start_after.is_empty() {
302            url += format!("&startAfter={}", start_after).as_str();
303        }
304        if let Some(user) = &self.user_name {
305            url += format!("&user.name={user}").as_str();
306        }
307        if let Some(auth) = &self.auth {
308            url += format!("&{auth}").as_str();
309        }
310
311        let req = Request::get(&url)
312            .body(Buffer::new())
313            .map_err(new_request_build_error)?;
314        self.info.http_client().send(req).await
315    }
316
317    pub async fn webhdfs_read_file(
318        &self,
319        path: &str,
320        range: BytesRange,
321    ) -> Result<Response<HttpBody>> {
322        let req = self.webhdfs_open_request(path, &range)?;
323        self.info.http_client().fetch(req).await
324    }
325
326    pub(super) async fn webhdfs_get_file_status(&self, path: &str) -> Result<Response<Buffer>> {
327        let p = build_abs_path(&self.root, path);
328        let mut url = format!(
329            "{}/webhdfs/v1/{}?op=GETFILESTATUS",
330            self.endpoint,
331            percent_encode_path(&p),
332        );
333        if let Some(user) = &self.user_name {
334            url += format!("&user.name={user}").as_str();
335        }
336        if let Some(auth) = &self.auth {
337            url += format!("&{auth}").as_str();
338        }
339
340        let req = Request::get(&url)
341            .body(Buffer::new())
342            .map_err(new_request_build_error)?;
343
344        self.info.http_client().send(req).await
345    }
346
347    pub async fn webhdfs_delete(&self, path: &str) -> Result<Response<Buffer>> {
348        let p = build_abs_path(&self.root, path);
349        let mut url = format!(
350            "{}/webhdfs/v1/{}?op=DELETE&recursive=false",
351            self.endpoint,
352            percent_encode_path(&p),
353        );
354        if let Some(user) = &self.user_name {
355            url += format!("&user.name={user}").as_str();
356        }
357        if let Some(auth) = &self.auth {
358            url += format!("&{auth}").as_str();
359        }
360
361        let req = Request::delete(&url)
362            .body(Buffer::new())
363            .map_err(new_request_build_error)?;
364
365        self.info.http_client().send(req).await
366    }
367}
368
369#[derive(Debug, Deserialize)]
370#[serde(rename_all = "PascalCase")]
371pub(super) struct LocationResponse {
372    pub location: String,
373}