opendal/services/gcs/
uri.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use percent_encoding::utf8_percent_encode;
19use percent_encoding::AsciiSet;
20use percent_encoding::NON_ALPHANUMERIC;
21
22/// PATH_ENCODE_SET is the encode set for http url path.
23///
24/// This set follows [encodeURIComponent](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent) which will encode all non-ASCII characters except `A-Z a-z 0-9 - _ . ! ~ * ' ( )`
25///
26/// Following characters is allowed in GCS, check "https://cloud.google.com/storage/docs/request-endpoints#encoding" for details
27static GCS_PATH_ENCODE_SET: AsciiSet = NON_ALPHANUMERIC
28    .remove(b'-')
29    .remove(b'_')
30    .remove(b'.')
31    .remove(b'*');
32
33/// percent_encode_path will do percent encoding for http encode path.
34///
35/// Follows [encodeURIComponent](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent)
36/// which will encode all non-ASCII characters except `A-Z a-z 0-9 - _ . *`
37///
38/// GCS does not allow '/'s in paths, this should also be dealt with
39pub(super) fn percent_encode_path(path: &str) -> String {
40    utf8_percent_encode(path, &GCS_PATH_ENCODE_SET).to_string()
41}
42
43#[cfg(test)]
44mod tests {
45    use super::*;
46
47    #[test]
48    fn test_percent_encode_path() {
49        let cases = vec![
50            (
51                "Reserved Characters",
52                ";,/?:@&=+$",
53                "%3B%2C%2F%3F%3A%40%26%3D%2B%24",
54            ),
55            ("Unescaped Characters", "-_.*", "-_.*"),
56            ("Number Sign", "#", "%23"),
57            (
58                "Alphanumeric Characters + Space",
59                "ABC abc 123",
60                "ABC%20abc%20123",
61            ),
62            (
63                "Unicode",
64                "你好,世界!❤",
65                "%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C%EF%BC%81%E2%9D%A4",
66            ),
67        ];
68
69        for (name, input, expected) in cases {
70            let actual = percent_encode_path(input);
71
72            assert_eq!(actual, expected, "{name}");
73        }
74    }
75}