opendal/services/gcs/uri.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use percent_encoding::utf8_percent_encode;
19use percent_encoding::AsciiSet;
20use percent_encoding::NON_ALPHANUMERIC;
21
22/// PATH_ENCODE_SET is the encode set for http url path.
23///
24/// This set follows [encodeURIComponent](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent) which will encode all non-ASCII characters except `A-Z a-z 0-9 - _ . ! ~ * ' ( )`
25///
26/// Following characters is allowed in GCS, check "https://cloud.google.com/storage/docs/request-endpoints#encoding" for details
27static GCS_PATH_ENCODE_SET: AsciiSet = NON_ALPHANUMERIC
28 .remove(b'-')
29 .remove(b'_')
30 .remove(b'.')
31 .remove(b'*');
32
33/// percent_encode_path will do percent encoding for http encode path.
34///
35/// Follows [encodeURIComponent](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent)
36/// which will encode all non-ASCII characters except `A-Z a-z 0-9 - _ . *`
37///
38/// GCS does not allow '/'s in paths, this should also be dealt with
39pub(super) fn percent_encode_path(path: &str) -> String {
40 utf8_percent_encode(path, &GCS_PATH_ENCODE_SET).to_string()
41}
42
43#[cfg(test)]
44mod tests {
45 use super::*;
46
47 #[test]
48 fn test_percent_encode_path() {
49 let cases = vec![
50 (
51 "Reserved Characters",
52 ";,/?:@&=+$",
53 "%3B%2C%2F%3F%3A%40%26%3D%2B%24",
54 ),
55 ("Unescaped Characters", "-_.*", "-_.*"),
56 ("Number Sign", "#", "%23"),
57 (
58 "Alphanumeric Characters + Space",
59 "ABC abc 123",
60 "ABC%20abc%20123",
61 ),
62 (
63 "Unicode",
64 "你好,世界!❤",
65 "%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C%EF%BC%81%E2%9D%A4",
66 ),
67 ];
68
69 for (name, input, expected) in cases {
70 let actual = percent_encode_path(input);
71
72 assert_eq!(actual, expected, "{name}");
73 }
74 }
75}