LSTM model does not learn a simple pattern












0















I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:



data:



[[[ 1  2  3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0

[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1

[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0


target:



[0 1 0]


code:



import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle


def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])

dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)

opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()


num_features = data.shape[2]
num_samples = data.shape[1]

first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])

model.summary()

tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)

loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')

model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])


demo()


I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here



What could be improved, in order for the network to perform better?










share|improve this question

























  • LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

    – Danylo Baibak
    Nov 15 '18 at 15:21











  • Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

    – Roni Gadot
    Nov 15 '18 at 15:23
















0















I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:



data:



[[[ 1  2  3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0

[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1

[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0


target:



[0 1 0]


code:



import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle


def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])

dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)

opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()


num_features = data.shape[2]
num_samples = data.shape[1]

first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])

model.summary()

tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)

loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')

model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])


demo()


I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here



What could be improved, in order for the network to perform better?










share|improve this question

























  • LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

    – Danylo Baibak
    Nov 15 '18 at 15:21











  • Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

    – Roni Gadot
    Nov 15 '18 at 15:23














0












0








0


3






I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:



data:



[[[ 1  2  3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0

[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1

[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0


target:



[0 1 0]


code:



import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle


def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])

dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)

opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()


num_features = data.shape[2]
num_samples = data.shape[1]

first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])

model.summary()

tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)

loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')

model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])


demo()


I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here



What could be improved, in order for the network to perform better?










share|improve this question
















I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:



data:



[[[ 1  2  3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0

[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1

[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0


target:



[0 1 0]


code:



import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle


def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])

dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)

opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()


num_features = data.shape[2]
num_samples = data.shape[1]

first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])

model.summary()

tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)

loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')

model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])


demo()


I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here



What could be improved, in order for the network to perform better?







python tensorflow machine-learning keras lstm






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 16 '18 at 5:31









Milo Lu

1,65211528




1,65211528










asked Nov 15 '18 at 14:54









Roni GadotRoni Gadot

458516




458516













  • LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

    – Danylo Baibak
    Nov 15 '18 at 15:21











  • Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

    – Roni Gadot
    Nov 15 '18 at 15:23



















  • LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

    – Danylo Baibak
    Nov 15 '18 at 15:21











  • Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

    – Roni Gadot
    Nov 15 '18 at 15:23

















LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21





LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21













Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23





Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23












1 Answer
1






active

oldest

votes


















0














According to the your comment, I would suggest to change dataset. Try something like:



data:



[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]


target:[1, 0, 1]



You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.






share|improve this answer
























  • Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

    – Roni Gadot
    Nov 18 '18 at 8:25











Your Answer






StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














draft saved

draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53322139%2flstm-model-does-not-learn-a-simple-pattern%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown

























1 Answer
1






active

oldest

votes








1 Answer
1






active

oldest

votes









active

oldest

votes






active

oldest

votes









0














According to the your comment, I would suggest to change dataset. Try something like:



data:



[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]


target:[1, 0, 1]



You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.






share|improve this answer
























  • Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

    – Roni Gadot
    Nov 18 '18 at 8:25
















0














According to the your comment, I would suggest to change dataset. Try something like:



data:



[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]


target:[1, 0, 1]



You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.






share|improve this answer
























  • Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

    – Roni Gadot
    Nov 18 '18 at 8:25














0












0








0







According to the your comment, I would suggest to change dataset. Try something like:



data:



[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]


target:[1, 0, 1]



You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.






share|improve this answer













According to the your comment, I would suggest to change dataset. Try something like:



data:



[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]


target:[1, 0, 1]



You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.







share|improve this answer












share|improve this answer



share|improve this answer










answered Nov 15 '18 at 15:57









Danylo BaibakDanylo Baibak

116115




116115













  • Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

    – Roni Gadot
    Nov 18 '18 at 8:25



















  • Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

    – Roni Gadot
    Nov 18 '18 at 8:25

















Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25





Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25




















draft saved

draft discarded




















































Thanks for contributing an answer to Stack Overflow!


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.




draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53322139%2flstm-model-does-not-learn-a-simple-pattern%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







Popular posts from this blog

Xamarin.iOS Cant Deploy on Iphone

Glorious Revolution

Dulmage-Mendelsohn matrix decomposition in Python