pytorch runs much slower than tensorflow even for a shallow CNN












0














The input data is an EEG data which was converted into PSD. And its shape is [103600, 59, 51], where 103600 is the number of samples, i.e. the total samples for an epoch. The data was loaded into memory.



And the model I used is very simple, however, the time required for training an epoch was 50s with pytorch 0.4 and a GTX 1080. However, the time for val set interface was 0.3s.



I also tested this model using tensorflow 1.2.0 the time was 8s. I have no idea if I got it wrong.



So I made some test. This img shows the time for runing.
enter image description here



where
time_to_tensor is the time for inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)



time_to_cuda for inputs, targets = inputs.to(self.device), targets.to(self.device)



time_to_model for out = self.model(inputs)



time_criterion for loss = self.criterion(out, targets)



time_backward for self.optimizer.step()



It seems the operation for tensor.to(self.device) (or tensor.cuda()) wasted much time. So I tried to move the data to GPU at once, using



self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)


and commented



inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
inputs, targets = inputs.to(self.device), targets.to(self.device)


However I got the running time in the following img.enter image description here. The time required for an epoch still was about 50s, and time for each operation changed. I am confused.



Could someone help me to find the problem? Thanks.



Code was here.



It's the model I used



import torch
import torch.nn as nn
import torch.nn.functional as F

class TxtCNN4(nn.Module):

def __init__(self, n_classes=6, time_steps=59, psd_lenght=51, filter_numbers=128):
super(TxtCNN4, self).__init__()
self.n_classes = n_classes
self.time_steps = time_steps
self.psd_lenght = psd_lenght
self.filters = filter_numbers

self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.filters//2,
kernel_size=(3, psd_lenght), stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(self.filters//2, momentum=0.05)
self.pool1 = nn.MaxPool2d(kernel_size=(self.time_steps - 3 + 1, 1))

self.conv2 = nn.Conv2d(in_channels=self.filters//2, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool2 = nn.MaxPool2d(kernel_size=(self.time_steps - 5 + 1, 1))

self.conv3 = nn.Conv2d(in_channels=self.filters, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool3 = nn.MaxPool2d(kernel_size=(self.time_steps - 7 + 1, 1))

self.fc = nn.Linear(in_features=2*self.filters+self.filters//2, out_features=128)
self.cls = nn.Linear(in_features=128, out_features=self.n_classes)
self.criterion = nn.CrossEntropyLoss(weight=None)

def forward(self, inputs):
inputs = inputs.view(-1, self.time_steps, self.psd_lenght, 1)
inputs = inputs.permute(0, 3, 1, 2)
conv1 = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
conv2 = F.relu(self.bn2(self.conv2(conv1)), inplace=True)
conv3 = F.relu(self.bn3(self.conv3(conv2)), inplace=True)

x = torch.cat((self.pool1(conv1), self.pool2(conv2), self.pool3(conv3)), dim=1)
x = x.view(-1, 2*self.filters+self.filters//2)
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.fc(x), inplace=True)
x = self.cls(x)

return x


It's the main python file for traning the model



file_path = '/mnt/disk2/wy/SLEEP_EDF/'

batch_size = 32*4*2
dropout_rate = 0.5
nb_classes = 5
max_epochs = 100
early_stop_epoch = 10

learning_rate = 1e-3

model_type='TxtCNN4'
filter_numbers = 128

fs = 100 # Sample frequence
n_Channels = 2
n_Samples = 30*100

load_flag = False
model_path = ''

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
input_len = inputs.shape[0]
assert input_len == len(targets)

if shuffle:
indices = np.arange(input_len)
np.random.shuffle(indices)
for start_idx in range(0, input_len, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)

class Trainer():

def __init__(self, X_inputs, labels, fold, subj_id, log_path, model_type=model_type, args=None):
self.subj_id = subj_id
self.model_type = model_type
self.writer_root_path = os.path.abspath(
os.path.join(
os.path.curdir,
log_path+'_'+model_type,
model_type + '_' + str(subj_id)))
self.train_writer = SummaryWriter(os.path.join(self.writer_root_path, 'train'))
self.val_writer = SummaryWriter(os.path.join(self.writer_root_path, 'val'))
self.test_writer = SummaryWriter(os.path.join(self.writer_root_path, 'test'))

self.curr_epcoh = 0
self.curr_iter = 0

self.train_end_flag = False

(self.X_train, self.y_train), (self.X_val, self.y_val), (self.X_test, self.y_test) = reformatInput(X_inputs, labels, fold)
print('Test set label and BiLi:t', np.unique(self.y_test, return_counts=True))
# normalization between all data
X_mean = self.X_train.mean()
X_std = self.X_train.std()
self.X_train = (self.X_train - X_mean)/X_std
self.X_val = (self.X_val - X_mean)/X_std
self.X_test = (self.X_test - X_mean)/X_std


if model_type == 'TxtCNN4':
model = TxtCNN4()
else:
model = DeepSleepNet()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = model.to(self.device)
self.criterion = nn.CrossEntropyLoss(weight=None)

# self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
# self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)

self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)


def train(self, epoch):
train_loss = AvgMeter()
train_acc = AvgMeter()
start_time = time.time()

time_to_tensor = 0
time_to_cuda = 0
time_model = 0
time_criterion = 0
time_backward = 0
time_step = 0
time_writer = 0
self.curr_epcoh = epoch
self.model.train()
for (inputs, targets) in iterate_minibatches(self.X_train, self.y_train, batch_size, shuffle=False):
self.curr_iter += 1
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
time_to_tensor += (time.time() - time_1)
time_1 = time.time()
inputs, targets = inputs.to(self.device), targets.to(self.device)
time_to_cuda += (time.time() - time_1)

# zero the parameter gradients
self.optimizer.zero_grad()

# forward + backward + optimize
time_1 = time.time()
out = self.model(inputs)
time_model += (time.time() - time_1)

time_1 = time.time()
loss = self.criterion(out, targets)
time_criterion += (time.time() - time_1)

time_1 = time.time()
loss.backward()
time_backward += (time.time() - time_1)

time_1 = time.time()
self.optimizer.step()
time_step += (time.time() - time_1)

if self.curr_iter % 20 == 0:
time_1 = time.time()
pred = out.max(dim=1)[1].cpu().numpy()
gt = targets.cpu().numpy()
acc = np.mean(pred==gt)
_loss = loss.detach().cpu().numpy()
train_acc.update(acc)
train_loss.update(_loss)

self.train_writer.add_scalar('learning_rate', curr_lr, self.curr_iter)
self.train_writer.add_scalar('acc', acc, self.curr_iter)
self.train_writer.add_scalar('loss', _loss, self.curr_iter)
time_writer += (time.time() - time_1)


fmt_str = "TraintEpoch [{:d}/{:d}] train_Loss: {:.4f}ttrain_Acc: {:.2f}tTime per Epoch: {:.4f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
train_loss.avg,
train_acc.avg*100,
time.time()-start_time)
print(print_str)
print('time_to_tensor: ',time_to_tensor)
print('time_to_cuda: ',time_to_cuda)
print('time_model: ',time_model)
print('time_criterion: ',time_criterion)
print('time_backward: ',time_backward)
print('time_step: ',time_step)
print('time_writer: ',time_writer)
print(len(self.y_train))
train_acc.reset()
train_loss.reset()

def validation(self):
self.model.eval()
with torch.no_grad():
# val set
loss_gather = AvgMeter()
pred_gather = PreGather()
tran_time = 0
for (inputs, targets) in iterate_minibatches(self.X_val, self.y_val, batch_size, shuffle=False):
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
inputs, targets = inputs.to(self.device), targets.to(self.device)
tran_time += (time.time() - time_1)

out = self.model(inputs)
_loss = self.criterion(out, targets).detach().cpu().numpy()
pred = out.max(dim=1)[1].cpu().numpy()
loss_gather.update(_loss)
pred_gather.update(pred)


val_loss = loss_gather.avg
pred = pred_gather.pred
val_acc = np.mean(pred==self.y_val)
val_kappa = cohen_kappa_score(self.y_val, pred)
val_BCA = recall_score(self.y_val, pred, average='macro')
loss_gather.reset()
pred_gather.reset()

# Then we print the results for this epoch:
fmt_str = "VAL tEpoch [{:d}/{:d}] val_Loss: {:.4f}tval_Acc: {:.2f}tval_kappa: {:.2f}tval_BCA: {:.2f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
val_loss,
val_acc*100,
val_kappa*100,
val_BCA*100)
print(print_str)
print(tran_time)
print(len(self.y_val))
self.val_writer.add_scalar('acc', val_acc, self.curr_iter)
self.val_writer.add_scalar('loss', val_loss, self.curr_iter)
self.val_writer.add_scalar('kappa', val_kappa, self.curr_iter)
self.val_writer.add_scalar('bca', val_BCA, self.curr_iter)


def train_all_subject(num_epochs=max_epochs, log_path=None):
# Leave-Subject-Out cross validation
subj_nums, fold_pairs, EEGs, labels = load_data(file_path, subj_nums=38, channel=1)
for subj_id in range(subj_nums):
print('The subj_id', subj_id, 'tt Training the ' + model_type + ' Model...')
trainer = Trainer(EEGs, labels, fold_pairs[subj_id], subj_id, log_path)
for epoch in range(trainer.curr_epcoh, max_epochs):
if trainer.train_end_flag is False:
trainer.train(epoch)
trainer.validation()
print('-'*50)
else:
break









share|improve this question
























  • It would be easier to respond on this if you would use generated training data for this test, so that someone else would be able to reproduce the results in order to track down potential inefficiencies. Without being able to test it is difficult to say. That said, I wonder why time for backward and step are really low in the first picture, but extremely high in the second picture. That is a bit weird. You may also try torch.tensor(..., device=device) as this is generally (sometimes much) faster than using .to(device). No detour over CPU memory.
    – blue-phoenox
    Nov 12 at 12:17












  • Thanks for your reply. It might be hard to generate suitable data, and more code is needed to add. Actually, I also tried torch.tensor(..., device=self.device). There is no essential difference in the training time required.
    – user9724030
    Nov 13 at 2:22
















0














The input data is an EEG data which was converted into PSD. And its shape is [103600, 59, 51], where 103600 is the number of samples, i.e. the total samples for an epoch. The data was loaded into memory.



And the model I used is very simple, however, the time required for training an epoch was 50s with pytorch 0.4 and a GTX 1080. However, the time for val set interface was 0.3s.



I also tested this model using tensorflow 1.2.0 the time was 8s. I have no idea if I got it wrong.



So I made some test. This img shows the time for runing.
enter image description here



where
time_to_tensor is the time for inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)



time_to_cuda for inputs, targets = inputs.to(self.device), targets.to(self.device)



time_to_model for out = self.model(inputs)



time_criterion for loss = self.criterion(out, targets)



time_backward for self.optimizer.step()



It seems the operation for tensor.to(self.device) (or tensor.cuda()) wasted much time. So I tried to move the data to GPU at once, using



self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)


and commented



inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
inputs, targets = inputs.to(self.device), targets.to(self.device)


However I got the running time in the following img.enter image description here. The time required for an epoch still was about 50s, and time for each operation changed. I am confused.



Could someone help me to find the problem? Thanks.



Code was here.



It's the model I used



import torch
import torch.nn as nn
import torch.nn.functional as F

class TxtCNN4(nn.Module):

def __init__(self, n_classes=6, time_steps=59, psd_lenght=51, filter_numbers=128):
super(TxtCNN4, self).__init__()
self.n_classes = n_classes
self.time_steps = time_steps
self.psd_lenght = psd_lenght
self.filters = filter_numbers

self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.filters//2,
kernel_size=(3, psd_lenght), stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(self.filters//2, momentum=0.05)
self.pool1 = nn.MaxPool2d(kernel_size=(self.time_steps - 3 + 1, 1))

self.conv2 = nn.Conv2d(in_channels=self.filters//2, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool2 = nn.MaxPool2d(kernel_size=(self.time_steps - 5 + 1, 1))

self.conv3 = nn.Conv2d(in_channels=self.filters, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool3 = nn.MaxPool2d(kernel_size=(self.time_steps - 7 + 1, 1))

self.fc = nn.Linear(in_features=2*self.filters+self.filters//2, out_features=128)
self.cls = nn.Linear(in_features=128, out_features=self.n_classes)
self.criterion = nn.CrossEntropyLoss(weight=None)

def forward(self, inputs):
inputs = inputs.view(-1, self.time_steps, self.psd_lenght, 1)
inputs = inputs.permute(0, 3, 1, 2)
conv1 = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
conv2 = F.relu(self.bn2(self.conv2(conv1)), inplace=True)
conv3 = F.relu(self.bn3(self.conv3(conv2)), inplace=True)

x = torch.cat((self.pool1(conv1), self.pool2(conv2), self.pool3(conv3)), dim=1)
x = x.view(-1, 2*self.filters+self.filters//2)
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.fc(x), inplace=True)
x = self.cls(x)

return x


It's the main python file for traning the model



file_path = '/mnt/disk2/wy/SLEEP_EDF/'

batch_size = 32*4*2
dropout_rate = 0.5
nb_classes = 5
max_epochs = 100
early_stop_epoch = 10

learning_rate = 1e-3

model_type='TxtCNN4'
filter_numbers = 128

fs = 100 # Sample frequence
n_Channels = 2
n_Samples = 30*100

load_flag = False
model_path = ''

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
input_len = inputs.shape[0]
assert input_len == len(targets)

if shuffle:
indices = np.arange(input_len)
np.random.shuffle(indices)
for start_idx in range(0, input_len, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)

class Trainer():

def __init__(self, X_inputs, labels, fold, subj_id, log_path, model_type=model_type, args=None):
self.subj_id = subj_id
self.model_type = model_type
self.writer_root_path = os.path.abspath(
os.path.join(
os.path.curdir,
log_path+'_'+model_type,
model_type + '_' + str(subj_id)))
self.train_writer = SummaryWriter(os.path.join(self.writer_root_path, 'train'))
self.val_writer = SummaryWriter(os.path.join(self.writer_root_path, 'val'))
self.test_writer = SummaryWriter(os.path.join(self.writer_root_path, 'test'))

self.curr_epcoh = 0
self.curr_iter = 0

self.train_end_flag = False

(self.X_train, self.y_train), (self.X_val, self.y_val), (self.X_test, self.y_test) = reformatInput(X_inputs, labels, fold)
print('Test set label and BiLi:t', np.unique(self.y_test, return_counts=True))
# normalization between all data
X_mean = self.X_train.mean()
X_std = self.X_train.std()
self.X_train = (self.X_train - X_mean)/X_std
self.X_val = (self.X_val - X_mean)/X_std
self.X_test = (self.X_test - X_mean)/X_std


if model_type == 'TxtCNN4':
model = TxtCNN4()
else:
model = DeepSleepNet()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = model.to(self.device)
self.criterion = nn.CrossEntropyLoss(weight=None)

# self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
# self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)

self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)


def train(self, epoch):
train_loss = AvgMeter()
train_acc = AvgMeter()
start_time = time.time()

time_to_tensor = 0
time_to_cuda = 0
time_model = 0
time_criterion = 0
time_backward = 0
time_step = 0
time_writer = 0
self.curr_epcoh = epoch
self.model.train()
for (inputs, targets) in iterate_minibatches(self.X_train, self.y_train, batch_size, shuffle=False):
self.curr_iter += 1
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
time_to_tensor += (time.time() - time_1)
time_1 = time.time()
inputs, targets = inputs.to(self.device), targets.to(self.device)
time_to_cuda += (time.time() - time_1)

# zero the parameter gradients
self.optimizer.zero_grad()

# forward + backward + optimize
time_1 = time.time()
out = self.model(inputs)
time_model += (time.time() - time_1)

time_1 = time.time()
loss = self.criterion(out, targets)
time_criterion += (time.time() - time_1)

time_1 = time.time()
loss.backward()
time_backward += (time.time() - time_1)

time_1 = time.time()
self.optimizer.step()
time_step += (time.time() - time_1)

if self.curr_iter % 20 == 0:
time_1 = time.time()
pred = out.max(dim=1)[1].cpu().numpy()
gt = targets.cpu().numpy()
acc = np.mean(pred==gt)
_loss = loss.detach().cpu().numpy()
train_acc.update(acc)
train_loss.update(_loss)

self.train_writer.add_scalar('learning_rate', curr_lr, self.curr_iter)
self.train_writer.add_scalar('acc', acc, self.curr_iter)
self.train_writer.add_scalar('loss', _loss, self.curr_iter)
time_writer += (time.time() - time_1)


fmt_str = "TraintEpoch [{:d}/{:d}] train_Loss: {:.4f}ttrain_Acc: {:.2f}tTime per Epoch: {:.4f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
train_loss.avg,
train_acc.avg*100,
time.time()-start_time)
print(print_str)
print('time_to_tensor: ',time_to_tensor)
print('time_to_cuda: ',time_to_cuda)
print('time_model: ',time_model)
print('time_criterion: ',time_criterion)
print('time_backward: ',time_backward)
print('time_step: ',time_step)
print('time_writer: ',time_writer)
print(len(self.y_train))
train_acc.reset()
train_loss.reset()

def validation(self):
self.model.eval()
with torch.no_grad():
# val set
loss_gather = AvgMeter()
pred_gather = PreGather()
tran_time = 0
for (inputs, targets) in iterate_minibatches(self.X_val, self.y_val, batch_size, shuffle=False):
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
inputs, targets = inputs.to(self.device), targets.to(self.device)
tran_time += (time.time() - time_1)

out = self.model(inputs)
_loss = self.criterion(out, targets).detach().cpu().numpy()
pred = out.max(dim=1)[1].cpu().numpy()
loss_gather.update(_loss)
pred_gather.update(pred)


val_loss = loss_gather.avg
pred = pred_gather.pred
val_acc = np.mean(pred==self.y_val)
val_kappa = cohen_kappa_score(self.y_val, pred)
val_BCA = recall_score(self.y_val, pred, average='macro')
loss_gather.reset()
pred_gather.reset()

# Then we print the results for this epoch:
fmt_str = "VAL tEpoch [{:d}/{:d}] val_Loss: {:.4f}tval_Acc: {:.2f}tval_kappa: {:.2f}tval_BCA: {:.2f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
val_loss,
val_acc*100,
val_kappa*100,
val_BCA*100)
print(print_str)
print(tran_time)
print(len(self.y_val))
self.val_writer.add_scalar('acc', val_acc, self.curr_iter)
self.val_writer.add_scalar('loss', val_loss, self.curr_iter)
self.val_writer.add_scalar('kappa', val_kappa, self.curr_iter)
self.val_writer.add_scalar('bca', val_BCA, self.curr_iter)


def train_all_subject(num_epochs=max_epochs, log_path=None):
# Leave-Subject-Out cross validation
subj_nums, fold_pairs, EEGs, labels = load_data(file_path, subj_nums=38, channel=1)
for subj_id in range(subj_nums):
print('The subj_id', subj_id, 'tt Training the ' + model_type + ' Model...')
trainer = Trainer(EEGs, labels, fold_pairs[subj_id], subj_id, log_path)
for epoch in range(trainer.curr_epcoh, max_epochs):
if trainer.train_end_flag is False:
trainer.train(epoch)
trainer.validation()
print('-'*50)
else:
break









share|improve this question
























  • It would be easier to respond on this if you would use generated training data for this test, so that someone else would be able to reproduce the results in order to track down potential inefficiencies. Without being able to test it is difficult to say. That said, I wonder why time for backward and step are really low in the first picture, but extremely high in the second picture. That is a bit weird. You may also try torch.tensor(..., device=device) as this is generally (sometimes much) faster than using .to(device). No detour over CPU memory.
    – blue-phoenox
    Nov 12 at 12:17












  • Thanks for your reply. It might be hard to generate suitable data, and more code is needed to add. Actually, I also tried torch.tensor(..., device=self.device). There is no essential difference in the training time required.
    – user9724030
    Nov 13 at 2:22














0












0








0







The input data is an EEG data which was converted into PSD. And its shape is [103600, 59, 51], where 103600 is the number of samples, i.e. the total samples for an epoch. The data was loaded into memory.



And the model I used is very simple, however, the time required for training an epoch was 50s with pytorch 0.4 and a GTX 1080. However, the time for val set interface was 0.3s.



I also tested this model using tensorflow 1.2.0 the time was 8s. I have no idea if I got it wrong.



So I made some test. This img shows the time for runing.
enter image description here



where
time_to_tensor is the time for inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)



time_to_cuda for inputs, targets = inputs.to(self.device), targets.to(self.device)



time_to_model for out = self.model(inputs)



time_criterion for loss = self.criterion(out, targets)



time_backward for self.optimizer.step()



It seems the operation for tensor.to(self.device) (or tensor.cuda()) wasted much time. So I tried to move the data to GPU at once, using



self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)


and commented



inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
inputs, targets = inputs.to(self.device), targets.to(self.device)


However I got the running time in the following img.enter image description here. The time required for an epoch still was about 50s, and time for each operation changed. I am confused.



Could someone help me to find the problem? Thanks.



Code was here.



It's the model I used



import torch
import torch.nn as nn
import torch.nn.functional as F

class TxtCNN4(nn.Module):

def __init__(self, n_classes=6, time_steps=59, psd_lenght=51, filter_numbers=128):
super(TxtCNN4, self).__init__()
self.n_classes = n_classes
self.time_steps = time_steps
self.psd_lenght = psd_lenght
self.filters = filter_numbers

self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.filters//2,
kernel_size=(3, psd_lenght), stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(self.filters//2, momentum=0.05)
self.pool1 = nn.MaxPool2d(kernel_size=(self.time_steps - 3 + 1, 1))

self.conv2 = nn.Conv2d(in_channels=self.filters//2, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool2 = nn.MaxPool2d(kernel_size=(self.time_steps - 5 + 1, 1))

self.conv3 = nn.Conv2d(in_channels=self.filters, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool3 = nn.MaxPool2d(kernel_size=(self.time_steps - 7 + 1, 1))

self.fc = nn.Linear(in_features=2*self.filters+self.filters//2, out_features=128)
self.cls = nn.Linear(in_features=128, out_features=self.n_classes)
self.criterion = nn.CrossEntropyLoss(weight=None)

def forward(self, inputs):
inputs = inputs.view(-1, self.time_steps, self.psd_lenght, 1)
inputs = inputs.permute(0, 3, 1, 2)
conv1 = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
conv2 = F.relu(self.bn2(self.conv2(conv1)), inplace=True)
conv3 = F.relu(self.bn3(self.conv3(conv2)), inplace=True)

x = torch.cat((self.pool1(conv1), self.pool2(conv2), self.pool3(conv3)), dim=1)
x = x.view(-1, 2*self.filters+self.filters//2)
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.fc(x), inplace=True)
x = self.cls(x)

return x


It's the main python file for traning the model



file_path = '/mnt/disk2/wy/SLEEP_EDF/'

batch_size = 32*4*2
dropout_rate = 0.5
nb_classes = 5
max_epochs = 100
early_stop_epoch = 10

learning_rate = 1e-3

model_type='TxtCNN4'
filter_numbers = 128

fs = 100 # Sample frequence
n_Channels = 2
n_Samples = 30*100

load_flag = False
model_path = ''

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
input_len = inputs.shape[0]
assert input_len == len(targets)

if shuffle:
indices = np.arange(input_len)
np.random.shuffle(indices)
for start_idx in range(0, input_len, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)

class Trainer():

def __init__(self, X_inputs, labels, fold, subj_id, log_path, model_type=model_type, args=None):
self.subj_id = subj_id
self.model_type = model_type
self.writer_root_path = os.path.abspath(
os.path.join(
os.path.curdir,
log_path+'_'+model_type,
model_type + '_' + str(subj_id)))
self.train_writer = SummaryWriter(os.path.join(self.writer_root_path, 'train'))
self.val_writer = SummaryWriter(os.path.join(self.writer_root_path, 'val'))
self.test_writer = SummaryWriter(os.path.join(self.writer_root_path, 'test'))

self.curr_epcoh = 0
self.curr_iter = 0

self.train_end_flag = False

(self.X_train, self.y_train), (self.X_val, self.y_val), (self.X_test, self.y_test) = reformatInput(X_inputs, labels, fold)
print('Test set label and BiLi:t', np.unique(self.y_test, return_counts=True))
# normalization between all data
X_mean = self.X_train.mean()
X_std = self.X_train.std()
self.X_train = (self.X_train - X_mean)/X_std
self.X_val = (self.X_val - X_mean)/X_std
self.X_test = (self.X_test - X_mean)/X_std


if model_type == 'TxtCNN4':
model = TxtCNN4()
else:
model = DeepSleepNet()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = model.to(self.device)
self.criterion = nn.CrossEntropyLoss(weight=None)

# self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
# self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)

self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)


def train(self, epoch):
train_loss = AvgMeter()
train_acc = AvgMeter()
start_time = time.time()

time_to_tensor = 0
time_to_cuda = 0
time_model = 0
time_criterion = 0
time_backward = 0
time_step = 0
time_writer = 0
self.curr_epcoh = epoch
self.model.train()
for (inputs, targets) in iterate_minibatches(self.X_train, self.y_train, batch_size, shuffle=False):
self.curr_iter += 1
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
time_to_tensor += (time.time() - time_1)
time_1 = time.time()
inputs, targets = inputs.to(self.device), targets.to(self.device)
time_to_cuda += (time.time() - time_1)

# zero the parameter gradients
self.optimizer.zero_grad()

# forward + backward + optimize
time_1 = time.time()
out = self.model(inputs)
time_model += (time.time() - time_1)

time_1 = time.time()
loss = self.criterion(out, targets)
time_criterion += (time.time() - time_1)

time_1 = time.time()
loss.backward()
time_backward += (time.time() - time_1)

time_1 = time.time()
self.optimizer.step()
time_step += (time.time() - time_1)

if self.curr_iter % 20 == 0:
time_1 = time.time()
pred = out.max(dim=1)[1].cpu().numpy()
gt = targets.cpu().numpy()
acc = np.mean(pred==gt)
_loss = loss.detach().cpu().numpy()
train_acc.update(acc)
train_loss.update(_loss)

self.train_writer.add_scalar('learning_rate', curr_lr, self.curr_iter)
self.train_writer.add_scalar('acc', acc, self.curr_iter)
self.train_writer.add_scalar('loss', _loss, self.curr_iter)
time_writer += (time.time() - time_1)


fmt_str = "TraintEpoch [{:d}/{:d}] train_Loss: {:.4f}ttrain_Acc: {:.2f}tTime per Epoch: {:.4f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
train_loss.avg,
train_acc.avg*100,
time.time()-start_time)
print(print_str)
print('time_to_tensor: ',time_to_tensor)
print('time_to_cuda: ',time_to_cuda)
print('time_model: ',time_model)
print('time_criterion: ',time_criterion)
print('time_backward: ',time_backward)
print('time_step: ',time_step)
print('time_writer: ',time_writer)
print(len(self.y_train))
train_acc.reset()
train_loss.reset()

def validation(self):
self.model.eval()
with torch.no_grad():
# val set
loss_gather = AvgMeter()
pred_gather = PreGather()
tran_time = 0
for (inputs, targets) in iterate_minibatches(self.X_val, self.y_val, batch_size, shuffle=False):
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
inputs, targets = inputs.to(self.device), targets.to(self.device)
tran_time += (time.time() - time_1)

out = self.model(inputs)
_loss = self.criterion(out, targets).detach().cpu().numpy()
pred = out.max(dim=1)[1].cpu().numpy()
loss_gather.update(_loss)
pred_gather.update(pred)


val_loss = loss_gather.avg
pred = pred_gather.pred
val_acc = np.mean(pred==self.y_val)
val_kappa = cohen_kappa_score(self.y_val, pred)
val_BCA = recall_score(self.y_val, pred, average='macro')
loss_gather.reset()
pred_gather.reset()

# Then we print the results for this epoch:
fmt_str = "VAL tEpoch [{:d}/{:d}] val_Loss: {:.4f}tval_Acc: {:.2f}tval_kappa: {:.2f}tval_BCA: {:.2f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
val_loss,
val_acc*100,
val_kappa*100,
val_BCA*100)
print(print_str)
print(tran_time)
print(len(self.y_val))
self.val_writer.add_scalar('acc', val_acc, self.curr_iter)
self.val_writer.add_scalar('loss', val_loss, self.curr_iter)
self.val_writer.add_scalar('kappa', val_kappa, self.curr_iter)
self.val_writer.add_scalar('bca', val_BCA, self.curr_iter)


def train_all_subject(num_epochs=max_epochs, log_path=None):
# Leave-Subject-Out cross validation
subj_nums, fold_pairs, EEGs, labels = load_data(file_path, subj_nums=38, channel=1)
for subj_id in range(subj_nums):
print('The subj_id', subj_id, 'tt Training the ' + model_type + ' Model...')
trainer = Trainer(EEGs, labels, fold_pairs[subj_id], subj_id, log_path)
for epoch in range(trainer.curr_epcoh, max_epochs):
if trainer.train_end_flag is False:
trainer.train(epoch)
trainer.validation()
print('-'*50)
else:
break









share|improve this question















The input data is an EEG data which was converted into PSD. And its shape is [103600, 59, 51], where 103600 is the number of samples, i.e. the total samples for an epoch. The data was loaded into memory.



And the model I used is very simple, however, the time required for training an epoch was 50s with pytorch 0.4 and a GTX 1080. However, the time for val set interface was 0.3s.



I also tested this model using tensorflow 1.2.0 the time was 8s. I have no idea if I got it wrong.



So I made some test. This img shows the time for runing.
enter image description here



where
time_to_tensor is the time for inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)



time_to_cuda for inputs, targets = inputs.to(self.device), targets.to(self.device)



time_to_model for out = self.model(inputs)



time_criterion for loss = self.criterion(out, targets)



time_backward for self.optimizer.step()



It seems the operation for tensor.to(self.device) (or tensor.cuda()) wasted much time. So I tried to move the data to GPU at once, using



self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)


and commented



inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
inputs, targets = inputs.to(self.device), targets.to(self.device)


However I got the running time in the following img.enter image description here. The time required for an epoch still was about 50s, and time for each operation changed. I am confused.



Could someone help me to find the problem? Thanks.



Code was here.



It's the model I used



import torch
import torch.nn as nn
import torch.nn.functional as F

class TxtCNN4(nn.Module):

def __init__(self, n_classes=6, time_steps=59, psd_lenght=51, filter_numbers=128):
super(TxtCNN4, self).__init__()
self.n_classes = n_classes
self.time_steps = time_steps
self.psd_lenght = psd_lenght
self.filters = filter_numbers

self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.filters//2,
kernel_size=(3, psd_lenght), stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(self.filters//2, momentum=0.05)
self.pool1 = nn.MaxPool2d(kernel_size=(self.time_steps - 3 + 1, 1))

self.conv2 = nn.Conv2d(in_channels=self.filters//2, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool2 = nn.MaxPool2d(kernel_size=(self.time_steps - 5 + 1, 1))

self.conv3 = nn.Conv2d(in_channels=self.filters, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool3 = nn.MaxPool2d(kernel_size=(self.time_steps - 7 + 1, 1))

self.fc = nn.Linear(in_features=2*self.filters+self.filters//2, out_features=128)
self.cls = nn.Linear(in_features=128, out_features=self.n_classes)
self.criterion = nn.CrossEntropyLoss(weight=None)

def forward(self, inputs):
inputs = inputs.view(-1, self.time_steps, self.psd_lenght, 1)
inputs = inputs.permute(0, 3, 1, 2)
conv1 = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
conv2 = F.relu(self.bn2(self.conv2(conv1)), inplace=True)
conv3 = F.relu(self.bn3(self.conv3(conv2)), inplace=True)

x = torch.cat((self.pool1(conv1), self.pool2(conv2), self.pool3(conv3)), dim=1)
x = x.view(-1, 2*self.filters+self.filters//2)
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.fc(x), inplace=True)
x = self.cls(x)

return x


It's the main python file for traning the model



file_path = '/mnt/disk2/wy/SLEEP_EDF/'

batch_size = 32*4*2
dropout_rate = 0.5
nb_classes = 5
max_epochs = 100
early_stop_epoch = 10

learning_rate = 1e-3

model_type='TxtCNN4'
filter_numbers = 128

fs = 100 # Sample frequence
n_Channels = 2
n_Samples = 30*100

load_flag = False
model_path = ''

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
input_len = inputs.shape[0]
assert input_len == len(targets)

if shuffle:
indices = np.arange(input_len)
np.random.shuffle(indices)
for start_idx in range(0, input_len, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)

class Trainer():

def __init__(self, X_inputs, labels, fold, subj_id, log_path, model_type=model_type, args=None):
self.subj_id = subj_id
self.model_type = model_type
self.writer_root_path = os.path.abspath(
os.path.join(
os.path.curdir,
log_path+'_'+model_type,
model_type + '_' + str(subj_id)))
self.train_writer = SummaryWriter(os.path.join(self.writer_root_path, 'train'))
self.val_writer = SummaryWriter(os.path.join(self.writer_root_path, 'val'))
self.test_writer = SummaryWriter(os.path.join(self.writer_root_path, 'test'))

self.curr_epcoh = 0
self.curr_iter = 0

self.train_end_flag = False

(self.X_train, self.y_train), (self.X_val, self.y_val), (self.X_test, self.y_test) = reformatInput(X_inputs, labels, fold)
print('Test set label and BiLi:t', np.unique(self.y_test, return_counts=True))
# normalization between all data
X_mean = self.X_train.mean()
X_std = self.X_train.std()
self.X_train = (self.X_train - X_mean)/X_std
self.X_val = (self.X_val - X_mean)/X_std
self.X_test = (self.X_test - X_mean)/X_std


if model_type == 'TxtCNN4':
model = TxtCNN4()
else:
model = DeepSleepNet()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = model.to(self.device)
self.criterion = nn.CrossEntropyLoss(weight=None)

# self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
# self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)

self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)


def train(self, epoch):
train_loss = AvgMeter()
train_acc = AvgMeter()
start_time = time.time()

time_to_tensor = 0
time_to_cuda = 0
time_model = 0
time_criterion = 0
time_backward = 0
time_step = 0
time_writer = 0
self.curr_epcoh = epoch
self.model.train()
for (inputs, targets) in iterate_minibatches(self.X_train, self.y_train, batch_size, shuffle=False):
self.curr_iter += 1
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
time_to_tensor += (time.time() - time_1)
time_1 = time.time()
inputs, targets = inputs.to(self.device), targets.to(self.device)
time_to_cuda += (time.time() - time_1)

# zero the parameter gradients
self.optimizer.zero_grad()

# forward + backward + optimize
time_1 = time.time()
out = self.model(inputs)
time_model += (time.time() - time_1)

time_1 = time.time()
loss = self.criterion(out, targets)
time_criterion += (time.time() - time_1)

time_1 = time.time()
loss.backward()
time_backward += (time.time() - time_1)

time_1 = time.time()
self.optimizer.step()
time_step += (time.time() - time_1)

if self.curr_iter % 20 == 0:
time_1 = time.time()
pred = out.max(dim=1)[1].cpu().numpy()
gt = targets.cpu().numpy()
acc = np.mean(pred==gt)
_loss = loss.detach().cpu().numpy()
train_acc.update(acc)
train_loss.update(_loss)

self.train_writer.add_scalar('learning_rate', curr_lr, self.curr_iter)
self.train_writer.add_scalar('acc', acc, self.curr_iter)
self.train_writer.add_scalar('loss', _loss, self.curr_iter)
time_writer += (time.time() - time_1)


fmt_str = "TraintEpoch [{:d}/{:d}] train_Loss: {:.4f}ttrain_Acc: {:.2f}tTime per Epoch: {:.4f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
train_loss.avg,
train_acc.avg*100,
time.time()-start_time)
print(print_str)
print('time_to_tensor: ',time_to_tensor)
print('time_to_cuda: ',time_to_cuda)
print('time_model: ',time_model)
print('time_criterion: ',time_criterion)
print('time_backward: ',time_backward)
print('time_step: ',time_step)
print('time_writer: ',time_writer)
print(len(self.y_train))
train_acc.reset()
train_loss.reset()

def validation(self):
self.model.eval()
with torch.no_grad():
# val set
loss_gather = AvgMeter()
pred_gather = PreGather()
tran_time = 0
for (inputs, targets) in iterate_minibatches(self.X_val, self.y_val, batch_size, shuffle=False):
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
inputs, targets = inputs.to(self.device), targets.to(self.device)
tran_time += (time.time() - time_1)

out = self.model(inputs)
_loss = self.criterion(out, targets).detach().cpu().numpy()
pred = out.max(dim=1)[1].cpu().numpy()
loss_gather.update(_loss)
pred_gather.update(pred)


val_loss = loss_gather.avg
pred = pred_gather.pred
val_acc = np.mean(pred==self.y_val)
val_kappa = cohen_kappa_score(self.y_val, pred)
val_BCA = recall_score(self.y_val, pred, average='macro')
loss_gather.reset()
pred_gather.reset()

# Then we print the results for this epoch:
fmt_str = "VAL tEpoch [{:d}/{:d}] val_Loss: {:.4f}tval_Acc: {:.2f}tval_kappa: {:.2f}tval_BCA: {:.2f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
val_loss,
val_acc*100,
val_kappa*100,
val_BCA*100)
print(print_str)
print(tran_time)
print(len(self.y_val))
self.val_writer.add_scalar('acc', val_acc, self.curr_iter)
self.val_writer.add_scalar('loss', val_loss, self.curr_iter)
self.val_writer.add_scalar('kappa', val_kappa, self.curr_iter)
self.val_writer.add_scalar('bca', val_BCA, self.curr_iter)


def train_all_subject(num_epochs=max_epochs, log_path=None):
# Leave-Subject-Out cross validation
subj_nums, fold_pairs, EEGs, labels = load_data(file_path, subj_nums=38, channel=1)
for subj_id in range(subj_nums):
print('The subj_id', subj_id, 'tt Training the ' + model_type + ' Model...')
trainer = Trainer(EEGs, labels, fold_pairs[subj_id], subj_id, log_path)
for epoch in range(trainer.curr_epcoh, max_epochs):
if trainer.train_end_flag is False:
trainer.train(epoch)
trainer.validation()
print('-'*50)
else:
break






python tensorflow pytorch






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 13 at 8:27

























asked Nov 12 at 11:34









user9724030

12




12












  • It would be easier to respond on this if you would use generated training data for this test, so that someone else would be able to reproduce the results in order to track down potential inefficiencies. Without being able to test it is difficult to say. That said, I wonder why time for backward and step are really low in the first picture, but extremely high in the second picture. That is a bit weird. You may also try torch.tensor(..., device=device) as this is generally (sometimes much) faster than using .to(device). No detour over CPU memory.
    – blue-phoenox
    Nov 12 at 12:17












  • Thanks for your reply. It might be hard to generate suitable data, and more code is needed to add. Actually, I also tried torch.tensor(..., device=self.device). There is no essential difference in the training time required.
    – user9724030
    Nov 13 at 2:22


















  • It would be easier to respond on this if you would use generated training data for this test, so that someone else would be able to reproduce the results in order to track down potential inefficiencies. Without being able to test it is difficult to say. That said, I wonder why time for backward and step are really low in the first picture, but extremely high in the second picture. That is a bit weird. You may also try torch.tensor(..., device=device) as this is generally (sometimes much) faster than using .to(device). No detour over CPU memory.
    – blue-phoenox
    Nov 12 at 12:17












  • Thanks for your reply. It might be hard to generate suitable data, and more code is needed to add. Actually, I also tried torch.tensor(..., device=self.device). There is no essential difference in the training time required.
    – user9724030
    Nov 13 at 2:22
















It would be easier to respond on this if you would use generated training data for this test, so that someone else would be able to reproduce the results in order to track down potential inefficiencies. Without being able to test it is difficult to say. That said, I wonder why time for backward and step are really low in the first picture, but extremely high in the second picture. That is a bit weird. You may also try torch.tensor(..., device=device) as this is generally (sometimes much) faster than using .to(device). No detour over CPU memory.
– blue-phoenox
Nov 12 at 12:17






It would be easier to respond on this if you would use generated training data for this test, so that someone else would be able to reproduce the results in order to track down potential inefficiencies. Without being able to test it is difficult to say. That said, I wonder why time for backward and step are really low in the first picture, but extremely high in the second picture. That is a bit weird. You may also try torch.tensor(..., device=device) as this is generally (sometimes much) faster than using .to(device). No detour over CPU memory.
– blue-phoenox
Nov 12 at 12:17














Thanks for your reply. It might be hard to generate suitable data, and more code is needed to add. Actually, I also tried torch.tensor(..., device=self.device). There is no essential difference in the training time required.
– user9724030
Nov 13 at 2:22




Thanks for your reply. It might be hard to generate suitable data, and more code is needed to add. Actually, I also tried torch.tensor(..., device=self.device). There is no essential difference in the training time required.
– user9724030
Nov 13 at 2:22

















active

oldest

votes











Your Answer






StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














draft saved

draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53261322%2fpytorch-runs-much-slower-than-tensorflow-even-for-a-shallow-cnn%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown






























active

oldest

votes













active

oldest

votes









active

oldest

votes






active

oldest

votes
















draft saved

draft discarded




















































Thanks for contributing an answer to Stack Overflow!


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.





Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


Please pay close attention to the following guidance:


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.




draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53261322%2fpytorch-runs-much-slower-than-tensorflow-even-for-a-shallow-cnn%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







Popular posts from this blog

List item for chat from Array inside array React Native

Thiostrepton

Caerphilly