opendal

Apache OpenDAL™ Python binding

Installation

pip install opendal

Usage

import opendal

op = Operator("fs", root="/tmp")
op.write("test.txt", b"Hello World")
print(op.read("test.txt"))
print(op.stat("test.txt").content_length)

Or using the async API:

import asyncio

async def main():
op = AsyncOperator("fs", root="/tmp")
await op.write("test.txt", b"Hello World")
print(await op.read("test.txt"))

asyncio.run(main())
 1# Licensed to the Apache Software Foundation (ASF) under one
 2# or more contributor license agreements.  See the NOTICE file
 3# distributed with this work for additional information
 4# regarding copyright ownership.  The ASF licenses this file
 5# to you under the Apache License, Version 2.0 (the
 6# "License"); you may not use this file except in compliance
 7# with the License.  You may obtain a copy of the License at
 8#
 9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18from ._opendal import *
19
20__doc__ = _opendal.__doc__
21__all__ = _opendal.__all__
class Operator:

Operator is the entry for all public blocking APIs

Create a new blocking Operator with the given scheme and options(**kwargs).

Operator(scheme: str, **kwargs)
def layer(self, layer: opendal.layers.Layer):

Add new layers upon existing operator

def open(self, path: str, mode: str) -> File:

Open a file-like reader for the given path.

def read(self, path: str) -> memoryview:

Read the whole path into bytes.

def write( self, path: str, bs: bytes, append: Optional[bool] = None, buffer: Optional[int] = None, content_type: Optional[str] = None, content_disposition: Optional[str] = None, cache_control: Optional[str] = None):

Write bytes into given path.

def stat(self, path: str) -> Metadata:

Get current path's metadata without cache directly.

def copy(self, source: str, target: str):

Copy source to target.

def rename(self, source: str, target: str):

Rename filename.

def remove_all(self, path: str):

Remove all file

def create_dir(self, path: str):

Create a dir at given path.

Notes

To indicate that a path is a directory, it is compulsory to include a trailing / in the path. Failure to do so may result in NotADirectory error being returned by OpenDAL.

Behavior

  • Create on existing dir will succeed.
  • Create dir is always recursive, works like mkdir -p
def delete(self, path: str):

Delete given path.

Notes

  • Delete not existing error won't return errors.
def list(self, path: str) -> Iterable[Entry]:

List current dir path.

def scan(self, path: str) -> Iterable[Entry]:

List dir in flat way.

def capability(self) -> Capability:
def to_async_operator(self) -> AsyncOperator:
class AsyncOperator:

AsyncOperator is the entry for all public async APIs

Create a new AsyncOperator with the given scheme and options(**kwargs).

AsyncOperator(scheme: str, **kwargs)
def layer(self, layer: opendal.layers.Layer):

Add new layers upon existing operator

async def open(self, path: str, mode: str) -> AsyncFile:

Open a file-like reader for the given path.

async def read(self, path: str) -> memoryview:

Read the whole path into bytes.

async def write( self, path: str, bs: bytes, append: Optional[bool] = None, buffer: Optional[int] = None, content_type: Optional[str] = None, content_disposition: Optional[str] = None, cache_control: Optional[str] = None):

Write bytes into given path.

async def stat(self, path: str) -> Metadata:

Get current path's metadata without cache directly.

async def copy(self, source: str, target: str):

Copy source to target.``

async def rename(self, source: str, target: str):

Rename filename

async def remove_all(self, path: str):

Remove all file

async def create_dir(self, path: str):

Create a dir at given path.

Notes

To indicate that a path is a directory, it is compulsory to include a trailing / in the path. Failure to do so may result in NotADirectory error being returned by OpenDAL.

Behavior

  • Create on existing dir will succeed.
  • Create dir is always recursive, works like mkdir -p
async def delete(self, path: str):

Delete given path.

Notes

  • Delete not existing error won't return errors.
async def list(self, path: str) -> AsyncIterable[Entry]:

List current dir path.

async def scan(self, path: str) -> AsyncIterable[Entry]:

List dir in flat way.

async def presign_stat(self, path: str, expire_second: int) -> PresignedRequest:

Presign an operation for stat(head) which expires after expire_second seconds.

async def presign_read(self, path: str, expire_second: int) -> PresignedRequest:

Presign an operation for read which expires after expire_second seconds.

async def presign_write(self, path: str, expire_second: int) -> PresignedRequest:

Presign an operation for write which expires after expire_second seconds.

def capability(self) -> Capability:
def to_operator(self) -> Operator:
class File:

A file-like object. Can be used as a context manager.

def read(self, size: Optional[int] = None) -> memoryview:

Read and return at most size bytes, or if size is not given, until EOF.

def readinto(self, /, buffer):

Read bytes into a pre-allocated, writable buffer

def write(self, bs: bytes):

Write bytes into the file.

def seek(self, offset: int, whence: int = 0) -> int:

Change the stream position to the given byte offset. Offset is interpreted relative to the position indicated by whence. The default value for whence is SEEK_SET. Values for whence are:

  • SEEK_SET or 0 – start of the stream (the default); offset should be zero or positive
  • SEEK_CUR or 1 – current stream position; offset may be negative
  • SEEK_END or 2 – end of the stream; offset is usually negative

Return the new absolute position.

def tell(self) -> int:

Return the current stream position.

def close(self):
def flush(self, /):

Flush the underlying writer. Is a no-op if the file is opened in reading mode.

def readable(self, /):

Return True if the stream can be read from.

def writable(self, /):

Return True if the stream can be written to.

def seekable(self, /):

Return True if the stream can be repositioned.

In OpenDAL this is limited to only readable streams.

closed

Return True if the stream is closed.

class AsyncFile:

A file-like async reader. Can be used as an async context manager.

async def read(self, size: Optional[int] = None) -> memoryview:

Read and return at most size bytes, or if size is not given, until EOF.

async def write(self, bs: bytes):

Write bytes into the file.

async def seek(self, offset: int, whence: int = 0) -> int:

Change the stream position to the given byte offset. offset is interpreted relative to the position indicated by whence. The default value for whence is SEEK_SET. Values for whence are:

  • SEEK_SET or 0 – start of the stream (the default); offset should be zero or positive
  • SEEK_CUR or 1 – current stream position; offset may be negative
  • SEEK_END or 2 – end of the stream; offset is usually negative

Return the new absolute position.

async def tell(self) -> int:

Return the current stream position.

async def close(self):
def readable(self, /):

Check if the stream may be read from.

def writable(self, /):

Check if the stream may be written to.

def seekable(self, /):

Check if the stream reader may be re-located.

closed

Check if the stream is closed.

class Entry:
path: str

Path of entry. Path is relative to operator's root.

class EntryMode:
def is_file(self) -> bool:

Returns True if this is a file.

def is_dir(self) -> bool:

Returns True if this is a directory.

class Metadata:
content_type: Optional[str]

Content Type of this entry.

content_length: int

Content length of this entry.

etag: Optional[str]

ETag of this entry.

mode: EntryMode

mode represent this entry's mode.

content_disposition: Optional[str]
content_md5: Optional[str]

Content MD5 of this entry.

class PresignedRequest:
url: str

Return the URL of this request.

headers: dict[str, str]

Return the HTTP headers of this request.

method: str

Return the HTTP method of this request.

class Capability:

Capability is used to describe what operations are supported by current Operator.

write_can_multi: bool

If operator supports write can be called in multi times.

delete: bool

If operator supports delete.

write_total_max_size: Optional[int]

write_total_max_size is the max size that services support in write_total.

For example, Cloudflare D1 supports 1MB as max in write_total.

presign_stat: bool

If operator supports presign stat.

stat_with_if_none_match: bool

If operator supports stat with if none match.

read: bool

If operator supports read.

read_with_override_content_disposition: bool

if operator supports read with override content disposition.

read_with_if_none_match: bool

If operator supports read with if none match.

list_with_limit: bool

If backend supports list with limit.

batch: bool

If operator supports batch.

create_dir: bool

If operator supports create dir.

read_with_if_match: bool

If operator supports read with if match.

write_with_content_disposition: bool

If operator supports write with content disposition.

stat: bool

If operator supports stat.

write: bool

If operator supports write.

write_can_empty: bool

If operator supports write with empty content.

write_with_cache_control: bool

If operator supports write with cache control.

rename: bool

If operator supports rename.

blocking: bool

If operator supports blocking.

copy: bool

If operator supports copy.

presign_write: bool

If operator supports presign write.

stat_with_if_match: bool

If operator supports stat with if match.

read_with_override_content_type: bool

if operator supports read with override content type.

write_multi_min_size: Optional[int]

write_multi_min_size is the min size that services support in write_multi.

For example, AWS S3 requires at least 5MiB in write_multi expect the last one.

write_with_content_type: bool

If operator supports write with content type.

list_with_recursive: bool

If backend supports list without delimiter.

batch_delete: bool

If operator supports batch delete.

read_with_override_cache_control: bool

if operator supports read with override cache control.

write_can_append: bool

If operator supports write by append.

write_multi_align_size: Optional[int]

write_multi_align_size is the align size that services required in write_multi.

For example, Google GCS requires align size to 256KiB in write_multi.

list: bool

If operator supports list.

batch_max_operations: Optional[int]

The max operations that operator supports in batch.

presign_read: bool

If operator supports presign read.

list_with_start_after: bool

If backend supports list with start after.

write_multi_max_size: Optional[int]

write_multi_max_size is the max size that services support in write_multi.

For example, AWS S3 supports 5GiB as max in write_multi.

presign: bool

If operator supports presign.