Source code distributed/protocol/tests/test_collection_cuda.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pytest

from distributed.protocol import serialize, deserialize
from dask.dataframe.utils import assert_eq
import pandas as pd


@pytest.mark.parametrize("collection", [tuple, dict])
@pytest.mark.parametrize("y,y_serializer", [(50, "cuda"), (None, "pickle")])
def test_serialize_cupy(collection, y, y_serializer):
    cupy = pytest.importorskip("cupy")

    x = cupy.arange(100)
    if y is not None:
        y = cupy.arange(y)
    if issubclass(collection, dict):
        header, frames = serialize(
            {"x": x, "y": y}, serializers=("cuda", "dask", "pickle")
        )
    else:
        header, frames = serialize((x, y), serializers=("cuda", "dask", "pickle"))
    t = deserialize(header, frames, deserializers=("cuda", "dask", "pickle", "error"))

    assert header["is-collection"] is True
    sub_headers = header["sub-headers"]
    assert sub_headers[0]["serializer"] == "cuda"
    assert sub_headers[1]["serializer"] == y_serializer
    assert isinstance(t, collection)

    assert ((t["x"] if isinstance(t, dict) else t[0]) == x).all()
    if y is None:
        assert (t["y"] if isinstance(t, dict) else t[1]) is None
    else:
        assert ((t["y"] if isinstance(t, dict) else t[1]) == y).all()


@pytest.mark.parametrize("collection", [tuple, dict])
@pytest.mark.parametrize(
    "df2,df2_serializer",
    [(pd.DataFrame({"C": [3, 4, 5], "D": [2.5, 3.5, 4.5]}), "cuda"), (None, "pickle")],
)
def test_serialize_pandas_pandas(collection, df2, df2_serializer):
    cudf = pytest.importorskip("cudf")

    df1 = cudf.DataFrame({"A": [1, 2, None], "B": [1.0, 2.0, None]})
    if df2 is not None:
        df2 = cudf.from_pandas(df2)
    if issubclass(collection, dict):
        header, frames = serialize(
            {"df1": df1, "df2": df2}, serializers=("cuda", "dask", "pickle")
        )
    else:
        header, frames = serialize((df1, df2), serializers=("cuda", "dask", "pickle"))
    t = deserialize(header, frames, deserializers=("cuda", "dask", "pickle"))

    assert header["is-collection"] is True
    sub_headers = header["sub-headers"]
    assert sub_headers[0]["serializer"] == "cuda"
    assert sub_headers[1]["serializer"] == df2_serializer
    assert isinstance(t, collection)

    assert_eq(t["df1"] if isinstance(t, dict) else t[0], df1)
    if df2 is None:
        assert (t["df2"] if isinstance(t, dict) else t[1]) is None
    else:
        assert_eq(t["df2"] if isinstance(t, dict) else t[1], df2)