Buy Me a Coffee

*Memos:

ToDtype() can set a dtype to an Image, Video or tensor and scale its values as shown below. *It's about scale=True:
*Memos:

  • The 1st argument for initialization is dtype(Required-Type:Union[dtype, Dict[Union[Type, str], Optional[dtype]]): *Memos:
    • It converts Image, Video or tensor.
    • A dictionary can do more specific conversions, e.g. dtype={tv_tensors.Image: torch.float32, tv_tensors.Mask: torch.int64, "others":None}.
  • The 2nd argument for initialization is scale(Optional-Default:False-Type:bool): *Memos:
    • If it's True, the values of an Image, Video or tensor is scaled to [0.0, 1.0].
    • Depending of the combinations of the dtypes of ToDtype() and Image, Video or tensor, convertion and scale cannot be done.
  • The 1st argument is img(Required-Type:PIL Image or tensor/ndarray(int/float/complex/bool)): *Memos:
    • A tensor must be 0D or more D.
    • A ndarray must be 0D or more D.
    • Don't use img=.
  • ToDtype(dtype, scale=True) is the recommended replacement for ConvertImageDtype(dtype). *ConvertImageDtype() is deprecated.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import ToImage, ToDtype
import torch
import numpy as np

td = ToDtype(dtype=torch.float32)
td = ToDtype(dtype=torch.float32, scale=False)

td
# ToDtype(scale=False)

PILImage_data = OxfordIIITPet(
    root="data",
    transform=None
)

Image_data = OxfordIIITPet(
    root="data",
    transform=ToImage()
)

PILImage_data[0][0].getdata()
# [(37, 20, 12),
#  (35, 18, 10),
#  (36, 19, 11),
#  (36, 19, 11),
#  (37, 18, 11),
#  ...]

Image_data[0][0]
# Image([[[37, 35, 36, ..., 247, 249, 249],
#         [35, 35, 37, ..., 246, 248, 249],
#         ...,
#         [28, 28, 27, ...,  59, 65, 76]],
#        [[20, 18, 19, ..., 248, 248, 248],
#         [18, 18, 20, ..., 247, 247, 248],
#         ...,
#         [27, 27, 27, ...,  94, 106, 117]],
#        [[12, 10, 11, ..., 253, 253, 253],
#         [10, 10, 12, ..., 251, 252, 253],
#         ...,
#         [35, 35, 35,  ..., 214, 232, 223]]], dtype=torch.uint8,)

td = ToDtype(dtype=torch.float32, scale=True)

td(PILImage_data) # It's still PIL Image.
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data

td(PILImage_data[0])
# (, 0)

list(td(PILImage_data[0][0]).getdata())
# [(37, 20, 12),
#  (35, 18, 10),
#  (36, 19, 11),
#  (36, 19, 11),
#  (37, 18, 11),
#  ...]

td(Image_data[0])
# (Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#          [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#          ...,
#          [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]],
#         [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725],
#          [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725],
#          ...,
#          [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]],
#         [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922],
#          [0.0392, 0.0392, 0.0471,  ..., 0.9843, 0.9882, 0.9922],
#          ...,
#          [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],), 0)

td(Image_data[0][0])
# Image([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#         [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#         ...,
#         [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]],
#        [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725],
#         [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725],
#         ...,
#         [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]],
#        [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922],
#         [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922],
#         ...,
#         [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]],)

td((torch.tensor(3), 0)) # int64
td((torch.tensor(3, dtype=torch.int64), 0))
# (tensor(3.2526e-19), 0)

td(torch.tensor(3))
# tensor(3.2526e-19)

td((torch.tensor([0, 1, 2, 3]), 0))
# (tensor([0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]), 0)

td(torch.tensor([0, 1, 2, 3]))
# tensor([0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19])

td((torch.tensor([[0, 1, 2, 3]]), 0))
# (tensor([[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]), 0)

td(torch.tensor([[0, 1, 2, 3]]))
# tensor([[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]])

td((torch.tensor([[[0, 1, 2, 3]]]), 0))
# (tensor([[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]), 0)

td(torch.tensor([[[0, 1, 2, 3]]]))
# tensor([[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]])

td((torch.tensor([[[[0, 1, 2, 3]]]]), 0))
# (tensor([[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]]), 0)

td(torch.tensor([[[[0, 1, 2, 3]]]]))
# tensor([[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]])

td((torch.tensor([[[[[0, 1, 2, 3]]]]]), 0))
# (tensor([[[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]]]), 0)

td(torch.tensor([[[[[0, 1, 2, 3]]]]]))
# tensor([[[[[0.0000e+00, 1.0842e-19, 2.1684e-19, 3.2526e-19]]]]])

td((torch.tensor([[0, 1, 2, 3]], dtype=torch.int32), 0))
# (tensor([[0.0000e+00, 4.6566e-10, 9.3132e-10, 1.3970e-09]]), 0)

td((torch.tensor([[0., 1., 2., 3.]]), 0)) # float32
td((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float32), 0))
td((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0))
td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex64
td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
    dtype=torch.complex64), 0))
td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
    dtype=torch.complex32), 0))
# (tensor([[0., 1., 2., 3.]]), 0)

td((torch.tensor([[True, False, True, False]]), 0)) # bool
td((torch.tensor([[True, False, True, False]], dtype=torch.bool), 0))
# (tensor([[1., 0., 1., 0.]]), 0)

td((np.array(3), 0)) # int32
td((np.array(3, dtype=np.int32), 0))
# (array(3), 0)

td(np.array(3))
# array(3)

td((np.array([0, 1, 2, 3]), 0))
# (array([0, 1, 2, 3]), 0)

td(np.array([0, 1, 2, 3]))
# array([0, 1, 2, 3])

td((np.array([[0, 1, 2, 3]]), 0))
# (array([[0, 1, 2, 3]]), 0)

td(np.array([[0, 1, 2, 3]]))
# array([[0, 1, 2, 3]])

td((np.array([[[0, 1, 2, 3]]]), 0))
# (array([[[0, 1, 2, 3]]]), 0)

td(np.array([[[0, 1, 2, 3]]]))
# array([[[0, 1, 2, 3]]])

td((np.array([[[[0, 1, 2, 3]]]]), 0))
# (array([[[[0, 1, 2, 3]]]]), 0)

td(np.array([[[[0, 1, 2, 3]]]]))
# array([[[[0, 1, 2, 3]]]])

td((np.array([[[[[0, 1, 2, 3]]]]]), 0))
# (array([[[[[0, 1, 2, 3]]]]]), 0)

td(np.array([[[[[0, 1, 2, 3]]]]]))
# array([[[[[0, 1, 2, 3]]]]])

td((np.array([[0, 1, 2, 3]], dtype=np.int64), 0))
# (array([[0, 1, 2, 3]], dtype=int64), 0)

td((np.array([[0., 1., 2., 3.]]), 0)) # float64
td((np.array([[0., 1., 2., 3.]], dtype=np.float64), 0))
# (array([[0., 1., 2., 3.]]), 0)

td((np.array([[0., 1., 2., 3.]], dtype=np.float32), 0))
# (array([[0., 1., 2., 3.]], dtype=float32), 0)

td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex128
td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex128), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)

td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex64), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=complex64), 0)

td((np.array([[True, False, True, False]]), 0)) # bool
td((np.array([[True, False, True, False]], dtype=bool), 0))
# (array([[True, False, True, False]]), 0)

td = ToDtype(dtype=torch.complex64, scale=True)

td(PILImage_data) # It's still PIL Image.
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data

td(PILImage_data[0])
# (, 0)

list(td(PILImage_data[0][0]).getdata())
# [(37, 20, 12),
#  (35, 18, 10),
#  (36, 19, 11),
#  (36, 19, 11),
#  (37, 18, 11),
#  ...]

td(Image_data[0])
td(Image_data[0][0])
td((torch.tensor(3), 0)) # int64
td((torch.tensor(3, dtype=torch.int64), 0))
td(torch.tensor(3))
td((torch.tensor([0, 1, 2, 3]), 0))
td(torch.tensor([0, 1, 2, 3]))
td((torch.tensor([[0, 1, 2, 3]]), 0))
td(torch.tensor([[0, 1, 2, 3]]))
td((torch.tensor([[[0, 1, 2, 3]]]), 0))
td(torch.tensor([[[0, 1, 2, 3]]]))
td((torch.tensor([[[[0, 1, 2, 3]]]]), 0))
td(torch.tensor([[[[0, 1, 2, 3]]]]))
td((torch.tensor([[[[[0, 1, 2, 3]]]]]), 0))
td(torch.tensor([[[[[0, 1, 2, 3]]]]]))
td((torch.tensor([[0, 1, 2, 3]], dtype=torch.int32), 0))
# Error

td((torch.tensor([[0., 1., 2., 3.]]), 0)) # float32
td((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float32), 0))
td((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0))
# (tensor([[0.0000+0.j, 1.9990+0.j, 3.9980+0.j, 5.9970+0.j]]), 0)

td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex64
td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
    dtype=torch.complex64), 0))
# (tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)

td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
    dtype=torch.complex32), 0))
td((torch.tensor([[True, False, True, False]]), 0)) # bool
td((torch.tensor([[True, False, True, False]], dtype=torch.bool), 0))
# Error

td((np.array(3), 0)) # int32
td((np.array(3, dtype=np.int32), 0))
# (array(3), 0)

td(np.array(3))
# array(3)

td((np.array([0, 1, 2, 3]), 0))
# (array([0, 1, 2, 3]), 0)

td(np.array([0, 1, 2, 3]))
# array([0, 1, 2, 3])

td((np.array([[0, 1, 2, 3]]), 0))
# (array([[0, 1, 2, 3]]), 0)

td(np.array([[0, 1, 2, 3]]))
# array([[0, 1, 2, 3]])

td((np.array([[[0, 1, 2, 3]]]), 0))
# (array([[[0, 1, 2, 3]]]), 0)

td(np.array([[[0, 1, 2, 3]]]))
# array([[[0, 1, 2, 3]]])

td((np.array([[0, 1, 2, 3]], dtype=np.int64), 0))
# (array([[0, 1, 2, 3]], dtype=int64), 0)

td((np.array([[0., 1., 2., 3.]]), 0)) # float64
td((np.array([[0., 1., 2., 3.]], dtype=np.float64), 0))
# (array([[0., 1., 2., 3.]]), 0)

td((np.array([[0., 1., 2., 3.]], dtype=np.float32), 0))
# (array([[0., 1., 2., 3.]], dtype=float32), 0)

td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex128
td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex128), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)

td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex64), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=complex64), 0)

td((np.array([[True, False, True, False]]), 0)) # bool
td((np.array([[True, False, True, False]], dtype=bool), 0))
# (array([[True, False, True, False]]), 0)

td = ToDtype(dtype=torch.bool, scale=True)

td(PILImage_data) # It's still PIL Image.
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data

td(PILImage_data[0])
# (, 0)

list(td(PILImage_data[0][0]).getdata())
# [(37, 20, 12),
#  (35, 18, 10),
#  (36, 19, 11),
#  (36, 19, 11),
#  (37, 18, 11),
#  ...]

td(Image_data[0])
td(Image_data[0][0])
td((torch.tensor(3), 0)) # int64
td((torch.tensor(3, dtype=torch.int64), 0))
td(torch.tensor(3))
td((torch.tensor([0, 1, 2, 3]), 0))
td(torch.tensor([0, 1, 2, 3]))
td((torch.tensor([[0, 1, 2, 3]]), 0))
td(torch.tensor([[0, 1, 2, 3]]))
td((torch.tensor([[[0, 1, 2, 3]]]), 0))
td(torch.tensor([[[0, 1, 2, 3]]]))
td((torch.tensor([[[[0, 1, 2, 3]]]]), 0))
td(torch.tensor([[[[0, 1, 2, 3]]]]))
td((torch.tensor([[[[[0, 1, 2, 3]]]]]), 0))
td(torch.tensor([[[[[0, 1, 2, 3]]]]]))
td((torch.tensor([[0, 1, 2, 3]], dtype=torch.int32), 0))
# Error

td((torch.tensor([[0., 1., 2., 3.]]), 0)) # float32
td((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float32), 0))
td((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0))
# (tensor([[False, True, True, True]]), 0)

td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex64
td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
    dtype=torch.complex64), 0))
td((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], 
    dtype=torch.complex32), 0))
# Error

td((torch.tensor([[True, False, True, False]]), 0)) # bool
td((torch.tensor([[True, False, True, False]], dtype=torch.bool), 0))
# (tensor([[True, False, True, False]]), 0)

td((np.array(3), 0)) # int32
td((np.array(3, dtype=np.int32), 0))
# (array(3), 0)

td(np.array(3))
# array(3)

td((np.array([0, 1, 2, 3]), 0))
# (array([0, 1, 2, 3]), 0)

td(np.array([0, 1, 2, 3]))
# array([0, 1, 2, 3])

td((np.array([[0, 1, 2, 3]]), 0))
# (array([[0, 1, 2, 3]]), 0)

td(np.array([[0, 1, 2, 3]]))
# array([[0, 1, 2, 3]])

td((np.array([[[0, 1, 2, 3]]]), 0))
# (array([[[0, 1, 2, 3]]]), 0)

td(np.array([[[0, 1, 2, 3]]]))
# array([[[0, 1, 2, 3]]])

td((np.array([[0, 1, 2, 3]], dtype=np.int64), 0))
# (array([[0, 1, 2, 3]], dtype=int64), 0)

td((np.array([[0., 1., 2., 3.]]), 0)) # float64
td((np.array([[0., 1., 2., 3.]], dtype=np.float64), 0))
# (array([[0., 1., 2., 3.]]), 0)

td((np.array([[0., 1., 2., 3.]], dtype=np.float32), 0))
# (array([[0., 1., 2., 3.]], dtype=float32), 0)

td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex128
td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex128), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)

td((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex64), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=complex64), 0)

td((np.array([[True, False, True, False]]), 0)) # bool
td((np.array([[True, False, True, False]], dtype=bool), 0))
# (array([[True, False, True, False]]), 0)