#pip install pytorch_pretrained_vit
import torch
from torch import nn
import numpy as np
from PIL import Image
import pandas as pd
from torchvision import transforms
from pytorch_pretrained_vit import ViT
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader, TensorDataset
train_df=pd.read_csv('../DATA/D1-Tuberculosis_530|132|_Training_genderAge.csv')
test_df=pd.read_csv('../DATA/D1-Tuberculosis_530|132|_Test_genderAge.csv')
print(train_df.shape) #path\target
print(test_df.shape)
prefix = '../'
test_df['path'] = prefix + test_df['path'].astype(str)
train_df['path'] = prefix + train_df['path'].astype(str)
def get_results(model,loader_train,loader_test,device):
features_out_hook = []
labels_list=[]
def hook(module, fea_in, fea_out):
#output=fea_out.cpu().detach().numpy()
#features_out_hook.append(output.reshape(output.shape[0],-1))
features_out_hook.append(fea_out.cpu().detach().numpy())
return None
model_children=model.children()
for layer in model_children:
if isinstance(layer,nn.Linear):
print('LayerNorm exists')
layer.register_forward_hook(hook=hook)
for images,labels in loader_train:
images=images.to(device)
labels=labels.to(device)
labels_list.append(labels.cpu().detach().numpy())
out1 = model(images)
for images,labels in loader_test:
images=images.to(device)
labels=labels.to(device)
labels_list.append(labels.cpu().detach().numpy())
out2 = model(images)
return features_out_hook,labels_list
class MyDataset(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform=transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
img_path = self.df.path.values[idx]
image = Image.open(img_path).convert('RGB')
if "target" in self.df.columns.values: ###
target = np.int(self.df.target.values[idx])
if self.transform:
image = self.transform(image)
return image, target
def train_net(train_df, test_df, size=512):
"""Train network using Resnet18.
"""
batch_size = 2
NUM_CLASSES = 2
trans_train = transforms.Compose([transforms.Resize((384,384)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
trans_test = transforms.Compose([transforms.Resize((384,384)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
dataset_train=MyDataset(train_df,transform=trans_train)
dataset_test=MyDataset(test_df,transform=trans_test)
train_dataloader = DataLoader(dataset = dataset_train, batch_size=batch_size, shuffle=False, num_workers=0)
test_dataloader = DataLoader(dataset = dataset_test, batch_size=batch_size, shuffle=False, num_workers=0)
model = ViT('B_16_imagenet1k', pretrained=True)
#model.fc = nn.Linear(768, 768)
#print(model)
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
features_out_hook,labels_list = get_results(model,train_dataloader,test_dataloader,device)
return features_out_hook,labels_list
def train_model(train_df, test_df):
"""Train network a single time using the given files as input.
inFile => path without extension (more than one file will be read)
"""
print('Training...')
features_out_hook,labels_list = train_net(train_df, test_df)
return features_out_hook,labels_list
features_out_hook,labels_list = train_model(train_df,test_df)
530/2
train_data = np.concatenate((features_out_hook[0:265]),axis=0)
test_data = np.concatenate((features_out_hook[265:]),axis=0)
train_labels = np.concatenate((labels_list[0:265]),axis=0)
test_labels = np.concatenate((labels_list[265:]),axis=0)
# num_datapoints,channels,h, w, = train_data.shape
# train_data = train_data.reshape((num_datapoints, channels*h*w))
# num_datapoints,channels,h, w, = test_data.shape
# test_data = test_data.reshape((num_datapoints, channels*h*w))
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)
Train_age = train_df['Age']
Train_gender = train_df['Gender']
train_data = np.append(train_data, np.reshape(Train_age.to_numpy()/100,(530,1)), axis=1)
train_data = np.append(train_data, np.reshape(Train_gender.to_numpy(),(530,1)), axis=1)
test_age = test_df['Age']
test_gender = test_df['Gender']
test_data = np.append(test_data, np.reshape(test_age.to_numpy()/100,(132,1)), axis=1)
test_data = np.append(test_data, np.reshape(test_gender.to_numpy(),(132,1)), axis=1)
print(train_data.shape)
print(test_data.shape)
print(train_labels.shape)
print(test_labels.shape)
np.save("DATA/R5_Traindata_530|132|_Pretrained_genderAge.npy",train_data)
np.save("DATA/R5_Test_530|132|_Pretrained_genderAge.npy",test_data)
np.save("DATA/R5_Train_label_530|132|_Pretrained_genderAge.npy",train_labels)
np.save("DATA/R5_Test_label_530|132|_Pretrained_genderAge.npy",test_labels)