Buy Me a Coffee

*Memos:

PILToTensor() can convert a PIL(Pillow library) Image([H, W, C]), Image([..., C, H, W]) to a tensor([C, H, W]) and doesn't scale its values to [0.0, 1.0] as shown below:
*Memos:

  • The 1st argument is img(Required-Type:PIL Image, Image or tensor/ndarray(int/float/complex/bool)): *Memos:
    • A tensor can be 0D or more D.
    • A ndarray can be 0D or more D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import ToImage, PILToTensor
import torch
import numpy as np

PILToTensor()
# PILToTensor()

PILImage_data = OxfordIIITPet(
    root="data",
    transform=None
)

Image_data = OxfordIIITPet(
    root="data",
    transform=ToImage()
)

Tensor_data = OxfordIIITPet(
    root="data",
    transform=PILToTensor()
)

Tensor_data
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data
#     StandardTransform
# Transform: PILToTensor()

Tensor_data[0]
# (tensor([[[37, 35, 36, ..., 247, 249, 249],
#           [35, 35, 37, ..., 246, 248, 249],
#           ...,
#           [28, 28, 27, ..., 59, 65, 76]],
#          [[20, 18, 19, ..., 248, 248, 248],
#           [18, 18, 20, ..., 247, 247, 248],
#           ...,
#           [27, 27, 27, ..., 94, 106, 117]],
#          [[12, 10, 11, ..., 253, 253, 253],
#           [10, 10, 12, ..., 251, 252, 253],
#           ...,
#           [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8), 0)

Tensor_data[0][0].size()
# torch.Size([3, 500, 394])

Tensor_data[0][0]
# tensor([[[37, 35, 36, ..., 247, 249, 249],
#          [35, 35, 37, ..., 246, 248, 249],
#          ...,
#          [28, 28, 27, ..., 59, 65, 76]],
#         [[20, 18, 19, ..., 248, 248, 248],
#          [18, 18, 20, ..., 247, 247, 248],
#          ...,
#          [27, 27, 27, ..., 94, 106, 117]],
#         [[12, 10, 11, ..., 253, 253, 253],
#          [10, 10, 12, ..., 251, 252, 253],
#          ...,
#          [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8)

Tensor_data[0][1]
# 0

import matplotlib.pyplot as plt

plt.imshow(X=Tensor_data[0][0])
# TypeError: Invalid shape (3, 500, 394) for image data

ptt = PILToTensor()

ptt(PILImage_data) # It's still PIL Image.
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data

ptt(PILImage_data[0])
# (tensor([[[37, 35, 36, ..., 247, 249, 249],
#           [35, 35, 37, ..., 246, 248, 249],
#           ...,
#           [28, 28, 27, ..., 59, 65, 76]],
#          [[20, 18, 19, ..., 248, 248, 248],
#           [18, 18, 20, ..., 247, 247, 248],
#           ...,
#           [27, 27, 27, ..., 94, 106, 117]],
#          [[12, 10, 11, ..., 253, 253, 253],
#           [10, 10, 12, ..., 251, 252, 253],
#           ...,
#           [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8), 0)

ptt(PILImage_data[0][0])
# tensor([[[37, 35, 36, ..., 247, 249, 249],
#          [35, 35, 37, ..., 246, 248, 249],
#          ...,
#          [28, 28, 27, ..., 59, 65, 76]],
#         [[20, 18, 19, ..., 248, 248, 248],
#          [18, 18, 20, ..., 247, 247, 248],
#          ...,
#          [27, 27, 27, ..., 94, 106, 117]],
#         [[12, 10, 11, ..., 253, 253, 253],
#          [10, 10, 12, ..., 251, 252, 253],
#          ...,
#          [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8)

ptt(Image_data[0][0])
# Image([[[37, 35, 36, ..., 247, 249, 249],
#         [35, 35, 37, ..., 246, 248, 249],
#         ...,
#         [28, 28, 27, ...,  59,  65,  76]],
#        [[20, 18, 19, ..., 248, 248, 248],
#         [18, 18, 20, ..., 247, 247, 248],
#         ...,
#         [27, 27, 27, ...,  94, 106, 117]],
#        [[12, 10, 11, ..., 253, 253, 253],
#         [10, 10, 12, ..., 251, 252, 253],
#         ...,
#         [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8,)

plt.imshow(X=ptt(PILImage_data[0][0]))
plt.imshow(X=ptt(Image_data[0][0]))
# TypeError: Invalid shape (3, 500, 394) for image data

ptt((torch.tensor(3), 0)) # int64
ptt((torch.tensor(3, dtype=torch.int64), 0))
# (tensor(3), 0)

ptt(torch.tensor(3))
# tensor(3)

ptt((torch.tensor([0, 1, 2, 3]), 0))
# (tensor([0, 1, 2, 3]), 0)

ptt(torch.tensor([0, 1, 2, 3]))
# tensor([0, 1, 2, 3])

ptt((torch.tensor([[0, 1, 2, 3]]), 0))
# (tensor([[0, 1, 2, 3]]), 0)

ptt(torch.tensor([[0, 1, 2, 3]]))
# tensor([[0, 1, 2, 3]])

ptt((torch.tensor([[[0, 1, 2, 3]]]), 0))
# (tensor([[[0, 1, 2, 3]]]), 0)

ptt(torch.tensor([[[0, 1, 2, 3]]]))
# tensor([[[0, 1, 2, 3]]])

ptt((torch.tensor([[[[0, 1, 2, 3]]]]), 0))
# (tensor([[[[0, 1, 2, 3]]]]), 0)

ptt(torch.tensor([[[[0, 1, 2, 3]]]]))
# tensor([[[[0, 1, 2, 3]]]])

ptt((torch.tensor([[[[[0, 1, 2, 3]]]]]), 0))
# (tensor([[[[[0, 1, 2, 3]]]]]), 0)

ptt(torch.tensor([[[[[0, 1, 2, 3]]]]]))
# tensor([[[[[0, 1, 2, 3]]]]])

ptt((torch.tensor([[0, 1, 2, 3]], dtype=torch.int32), 0))
# (tensor([[0, 1, 2, 3]], dtype=torch.int32), 0)

ptt((torch.tensor([[0, 1, 2, 3]], dtype=torch.uint8), 0))
# (tensor([[0, 1, 2, 3]], dtype=torch.uint8), 0)

ptt((torch.tensor([[0., 1., 2., 3.]]), 0)) # float32
ptt((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float32), 0))
# (tensor([[0., 1., 2., 3.]]), 0)

ptt((torch.tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0))
# (tensor([[0., 1., 2., 3.]], dtype=torch.float64), 0)

ptt((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex64
ptt((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
     dtype=torch.complex64), 0))
# (tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)

ptt((torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]],
     dtype=torch.complex32), 0))
# (tensor([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=torch.complex32), 0)

ptt((torch.tensor([[True, False, True, False]]), 0)) # bool
ptt((torch.tensor([[True, False, True, False]], dtype=torch.bool), 0))
# (tensor([[True, False, True, False]]), 0)

ptt((np.array(3), 0)) # int32
ptt((np.array(3, dtype=np.int32), 0))
# (array(3), 0)

ptt(np.array(3))
# array(3)

ptt((np.array([0, 1, 2, 3], dtype=np.int32), 0))
# (array([0, 1, 2, 3]), 0)

ptt(np.array([0, 1, 2, 3], dtype=np.int32))
# array([0, 1, 2, 3])

ptt((np.array([[0, 1, 2, 3]], dtype=np.int32), 0))
# (array([[0, 1, 2, 3]]), 0)

ptt(np.array([[0, 1, 2, 3]], dtype=np.int32))
# array([[0, 1, 2, 3]])

ptt((np.array([[[0, 1, 2, 3]]]), 0))
# (array([[[0, 1, 2, 3]]]), 0)

ptt(np.array([[[0, 1, 2, 3]]]))
# array([[[0, 1, 2, 3]]])

ptt((np.array([[[[0, 1, 2, 3]]]]), 0))
# (array([[[[0, 1, 2, 3]]]]), 0)

ptt(np.array([[[[0, 1, 2, 3]]]]))
# array([[[[0, 1, 2, 3]]]])

ptt((np.array([[[[[0, 1, 2, 3]]]]]), 0))
# (array([[[[[0, 1, 2, 3]]]]]), 0)

ptt(np.array([[[[[0, 1, 2, 3]]]]]))
# array([[[[[0, 1, 2, 3]]]]])

ptt((np.array([[0, 1, 2, 3]], dtype=np.int64), 0))
# (array([[0, 1, 2, 3]], dtype=int64), 0)

ptt((np.array([[0, 1, 2, 3]], dtype=np.uint8), 0))
# (array([[0, 1, 2, 3]], dtype=uint8), 0)

ptt((np.array([[0, 1, 2., 3.]], dtype=np.uint16), 0))
# (array([[0, 1, 2, 3]], dtype=uint16), 0)

ptt((np.array([[0., 1., 2., 3.]]), 0)) # float64
ptt((np.array([[0., 1., 2., 3.]], dtype=np.float64), 0))
# (array([[0., 1., 2., 3.]]), 0)

ptt((np.array([[0., 1., 2., 3.]], dtype=np.float32), 0))
# (array([[0., 1., 2., 3.]], dtype=float32), 0)

ptt((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)) # complex128
ptt((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex128), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]]), 0)

ptt((np.array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=np.complex64), 0))
# (array([[0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j]], dtype=complex64), 0)

ptt((np.array([[True, False, True, False]]), 0)) # bool
ptt((np.array([[True, False, True, False]], dtype=bool), 0))
# (array([[True, False, True, False]]), 0)