This page demonstrates how to train various SOTA computer vision self-supervised models just in few lines!
from fastai.vision.all import *
First, you need to create you Dataloaders. You should keep it as simple as just reading and resizing without any data augmentations. Data augmentations will be passed separately as we will see in the next sections.
def get_dls(size, bs, workers=None):
path = URLs.IMAGEWANG_160 if size <= 160 else URLs.IMAGEWANG
source = untar_data(path)
files = get_image_files(source)
tfms = [[PILImage.create, ToTensor, RandomResizedCrop(size, min_scale=0.9)],
[parent_label, Categorize()]]
dsets = Datasets(files, tfms=tfms, splits=RandomSplitter(valid_pct=0.1)(files))
batch_tfms = [IntToFloatTensor]
dls = dsets.dataloaders(bs=bs, num_workers=workers, after_batch=batch_tfms, device='cpu')
return dls
bs,resize,size = 64,160,128
dls = get_dls(resize, bs)
If you are training self-supervised models it's most likely that your data is unlabeled. In this case you can create your dataloaders with a dummy target function as following:
def get_dls(size, bs, workers=None):
path = URLs.IMAGEWANG_160 if size <= 160 else URLs.IMAGEWANG
source = untar_data(path)
def dummy_label(o): return 0
files = get_image_files(source)
tfms = [[PILImage.create, ToTensor, RandomResizedCrop(size, min_scale=0.9)], [dummy_label]]
dsets = Datasets(files, tfms=tfms, splits=RandomSplitter(valid_pct=0.1)(files))
batch_tfms = [IntToFloatTensor]
dls = dsets.dataloaders(bs=bs, num_workers=workers, after_batch=batch_tfms, device='cpu')
return dls
bs,resize,size = 64,160,128
dls = get_dls(resize, bs)
from self_supervised.layers import *
from self_supervised.vision.simclr import *
encoder = create_encoder("xresnet34", n_in=3, pretrained=False)
model = create_simclr_model(encoder, hidden_size=2048, projection_size=128)
aug_pipelines = get_simclr_aug_pipelines(size=size, rotate=True, jitter=True, bw=True, blur=True, blur_s=(4,16), blur_p=0.25, cuda=False)
learn = Learner(dls, model,loss_func=noop,cbs=[SimCLR(aug_pipelines, temp=0.07, print_augs=True),ShortEpochCallback(0.001)])
b = dls.one_batch()
learn._split(b)
learn('before_batch')
Always check your data augmentations to see whether they make sense or not before moving forward with training.
learn.sim_clr.show(n=10);
from self_supervised.vision.moco import *
encoder = create_encoder("xresnet34", n_in=3, pretrained=False)
model = create_moco_model(encoder, hidden_size=1024, projection_size=128)
aug_pipelines = get_moco_aug_pipelines(size=size, rotate=True, jitter=True, bw=True, blur=False, jitter_p=0.5, cuda=False)
learn = Learner(dls, model,loss_func=noop,cbs=[MOCO(aug_pipelines=aug_pipelines, K=128, print_augs=True),ShortEpochCallback(0.001)])
b = dls.one_batch()
learn._split(b)
learn.pred = learn.model(*learn.xb)
axes = learn.moco.show(n=10)
from self_supervised.vision.byol import *
encoder = create_encoder("xresnet34", n_in=3, pretrained=False)
model = create_byol_model(encoder, hidden_size=4096, projection_size=256)
aug_pipelines = get_byol_aug_pipelines(size=size, rotate=True, jitter=True, bw=True, blur=False, jitter_p=0.5, cuda=False)
learn = Learner(dls, model,loss_func=noop,cbs=[BYOL(aug_pipelines=aug_pipelines,print_augs=True),ShortEpochCallback(0.001)])
b = dls.one_batch()
learn._split(b)
learn('before_fit')
learn('before_batch')
learn.byol.show(n=10);
from self_supervised.vision.swav import *
encoder = create_encoder("xresnet34", n_in=3, pretrained=False)
model = create_swav_model(encoder, hidden_size=2048, projection_size=128)
aug_pipelines = get_swav_aug_pipelines(num_crops=[2,6],
crop_sizes=[128,96],
min_scales=[0.25,0.05],
max_scales=[1.0,0.3],
rotate=True, jitter=True, bw=True, blur=False, jitter_p=0.5, cuda=False)
learn = Learner(dls, model, loss_func=noop, cbs=[SWAV(aug_pipelines=aug_pipelines, crop_assgn_ids=[0,1], K=bs*2**6, queue_start_pct=0.5), ShortEpochCallback(0.001)])
b = dls.one_batch()
learn._split(b)
learn('before_batch')
for i in range(5): learn.swav.show_one();