This module combines CLIP and MoCo for increasing negative samples. This is useful when there is no available compute such as GPUs with large memory to support large batch sizes or multi-gpu machines to leverage distributed infonce loss implementation.

Algorithm

CLIP

MoCo

Tokenizer

Model

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Metric

A useful proxy metric for tracking training performance and convergence.

CLIP-MoCo Callback

Example Usage

num2txt = {'3': 'three', '7': 'seven'}
def num_to_txt(o): return num2txt[o]
def dummy_targ(o): return 0 # loss func is not called without it

path = untar_data(URLs.MNIST_TINY)
items = get_image_files(path)
clip_tokenizer = ClipTokenizer()
tds = Datasets(items, [PILImage.create, [parent_label, num_to_txt], dummy_targ], n_inp=2, splits=GrandparentSplitter()(items))
dls = tds.dataloaders(bs=2, after_item=[Resize(224), clip_tokenizer, ToTensor()], after_batch=[IntToFloatTensor()], device='cpu')

vitb32_config_dict = vitb32_config(224, clip_tokenizer.context_length, clip_tokenizer.vocab_size)
clip_model = CLIPMOCO(K=4096,m=0.999, **vitb32_config_dict, checkpoint=False, checkpoint_nchunks=0)
learner = Learner(dls, clip_model, loss_func=noop, cbs=[CLIPMOCOTrainer(), ShortEpochCallback(0.001)],
                  metrics=[RetrievalAtK(k=5), 
                           RetrievalAtK(k=20), 
                           RetrievalAtK(k="mean"),
                           RetrievalAtK(k="median")])

learner.summary()

CLIPMOCO (Input shape: 2 x torch.Size([2, 77]))
============================================================================
Layer (type)         Output Shape         Param #    Trainable 
============================================================================
                     2 x 768 x 7 x 7     
Conv2d                                    2359296    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    True      
LayerNorm                                 1536       True      
LayerNorm                                 1536       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    True      
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 77 x 512        
Embedding                                 25296896   True      
LayerNorm                                 1024       True      
____________________________________________________________________________
                     2 x 768 x 7 x 7     
Conv2d                                    2359296    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
____________________________________________________________________________
                     2 x 1 x 3072        
Linear                                    2362368    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 768         
Linear                                    2360064    False     
LayerNorm                                 1536       False     
LayerNorm                                 1536       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
LayerNorm                                 1024       False     
____________________________________________________________________________
                     2 x 1 x 2048        
Linear                                    1050624    False     
QuickGELU                                                      
____________________________________________________________________________
                     2 x 1 x 512         
Linear                                    1049088    False     
LayerNorm                                 1024       False     
____________________________________________________________________________

Total params: 193,876,992
Total trainable params: 109,587,456
Total non-trainable params: 84,289,536

Optimizer used: <function Adam at 0x7fbd8d0189e0>
Loss function: <bound method CLIPMOCOTrainer.lf of CLIPMOCOTrainer>

Callbacks:
  - TrainEvalCallback
  - ShortEpochCallback
  - CLIPMOCOTrainer
  - Recorder
  - ProgressCallback

	Type	Default	Details
`embed_dim`	`int`		No Content
`image_resolution`	`int`		vision
`vision_layers`	`Tuple[int, int, int, int], int]`		No Content
`vision_width`	`int`		No Content
`vision_patch_size`	`int`		No Content
`context_length`	`int`		text
`vocab_size`	`int`		No Content
`transformer_width`	`int`		No Content
`transformer_heads`	`int`		No Content
`transformer_layers`	`int`		No Content
`K`	`int`	`4096`	No Content
`m`	`float`	`999`	No Content
`kwargs`			No Content

CLIP-MoCo

Algorithm

CLIP

MoCo

Tokenizer

`class` `ClipTokenizer`[source]

Model

`vitb32_config`[source]

`vitl14_config`[source]

`class` `Bottleneck`[source]

`class` `AttentionPool2d`[source]

`class` `ModifiedResNet`[source]

`class` `LayerNorm`[source]

`class` `QuickGELU`[source]

`class` `ResidualAttentionBlock`[source]

`class` `Transformer`[source]

`class` `VisualTransformer`[source]

`class` `CLIPMOCO`[source]

Metric

`class` `RetrievalAtK`[source]

CLIP-MoCo Callback

`class` `CLIPMOCOTrainer`[source]

Example Usage

CLIP-MoCo

Algorithm

CLIP

MoCo

Tokenizer

class ClipTokenizer[source]

Model

vitb32_config[source]

vitl14_config[source]

class Bottleneck[source]

class AttentionPool2d[source]

class ModifiedResNet[source]

class LayerNorm[source]

class QuickGELU[source]

class ResidualAttentionBlock[source]

class Transformer[source]

class VisualTransformer[source]

class CLIPMOCO[source]

Metric

class RetrievalAtK[source]

CLIP-MoCo Callback

class CLIPMOCOTrainer[source]

Example Usage

`class` `ClipTokenizer`[source]

`vitb32_config`[source]

`vitl14_config`[source]

`class` `Bottleneck`[source]

`class` `AttentionPool2d`[source]

`class` `ModifiedResNet`[source]

`class` `LayerNorm`[source]

`class` `QuickGELU`[source]

`class` `ResidualAttentionBlock`[source]

`class` `Transformer`[source]

`class` `VisualTransformer`[source]

`class` `CLIPMOCO`[source]

`class` `RetrievalAtK`[source]

`class` `CLIPMOCOTrainer`[source]