LSTM model does not learn a simple pattern
I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:
data:
[[[ 1 2 3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0
[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1
[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0
target:
[0 1 0]
code:
import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])
dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)
opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()
num_features = data.shape[2]
num_samples = data.shape[1]
first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])
model.summary()
tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')
model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])
demo()
I'm expecting the net to learn this simple pattern however it fails, see the loss below:
What could be improved, in order for the network to perform better?
python tensorflow machine-learning keras lstm
add a comment |
I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:
data:
[[[ 1 2 3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0
[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1
[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0
target:
[0 1 0]
code:
import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])
dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)
opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()
num_features = data.shape[2]
num_samples = data.shape[1]
first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])
model.summary()
tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')
model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])
demo()
I'm expecting the net to learn this simple pattern however it fails, see the loss below:
What could be improved, in order for the network to perform better?
python tensorflow machine-learning keras lstm
LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.
– Danylo Baibak
Nov 15 '18 at 15:21
Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn
– Roni Gadot
Nov 15 '18 at 15:23
add a comment |
I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:
data:
[[[ 1 2 3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0
[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1
[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0
target:
[0 1 0]
code:
import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])
dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)
opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()
num_features = data.shape[2]
num_samples = data.shape[1]
first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])
model.summary()
tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')
model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])
demo()
I'm expecting the net to learn this simple pattern however it fails, see the loss below:
What could be improved, in order for the network to perform better?
python tensorflow machine-learning keras lstm
I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:
data:
[[[ 1 2 3]
[ 2 3 4]
[ 3 4 5]
[ 4 5 6]
[ 5 6 7]] #starts with 1 -> 0
[[ 6 7 8]
[ 7 8 9]
[ 8 9 10]
[ 9 10 11]
[10 11 12]] #starts with 6 -> 1
[[11 12 13]
[12 13 14]
[13 14 15]
[14 15 16]
[15 16 17]]] #starts with 11 -> 0
target:
[0 1 0]
code:
import numpy as np
import pandas as pd
from keras import callbacks
from keras import optimizers
from keras.layers import LSTM, Dense, Flatten, Dropout
from keras.layers.advanced_activations import LeakyReLU
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
def demo():
scaler = StandardScaler()
dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],
columns=['a', 'b', 'c', 'target'])
dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])
data = dummy_data.loc[:, dummy_data.columns != 'target']
target = dummy_data['target']
data = np.array(np.split(data.values, 200))
target = np.array(np.split(target.values, 200))
data, target = shuffle(data, target)
target = np.array(list(map(lambda x: x[0],target)))
print(data[:3,:],target[:3])
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)
opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)
# build the model
model = Sequential()
num_features = data.shape[2]
num_samples = data.shape[1]
first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')
model.add(
first_lstm)
model.add(LeakyReLU())
model.add(Dropout(0.2))
model.add(LSTM(16, return_sequences=True, activation='tanh'))
model.add(Dropout(0.2))
model.add(LeakyReLU())
model.add(LSTM(8, return_sequences=True, activation='tanh'))
model.add(LeakyReLU())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=opt,
metrics=['accuracy'])
model.summary()
tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,
batch_size=128,
write_graph=True, write_grads=True, write_images=False,
embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,
mode='min')
model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),
callbacks=[tb, loss_checkpoint])
demo()
I'm expecting the net to learn this simple pattern however it fails, see the loss below:
What could be improved, in order for the network to perform better?
python tensorflow machine-learning keras lstm
python tensorflow machine-learning keras lstm
edited Nov 16 '18 at 5:31
Milo Lu
1,65211528
1,65211528
asked Nov 15 '18 at 14:54
Roni GadotRoni Gadot
458516
458516
LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.
– Danylo Baibak
Nov 15 '18 at 15:21
Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn
– Roni Gadot
Nov 15 '18 at 15:23
add a comment |
LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.
– Danylo Baibak
Nov 15 '18 at 15:21
Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn
– Roni Gadot
Nov 15 '18 at 15:23
LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.
– Danylo Baibak
Nov 15 '18 at 15:21
LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.
– Danylo Baibak
Nov 15 '18 at 15:21
Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn
– Roni Gadot
Nov 15 '18 at 15:23
Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn
– Roni Gadot
Nov 15 '18 at 15:23
add a comment |
1 Answer
1
active
oldest
votes
According to the your comment, I would suggest to change dataset. Try something like:
data:
[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]
target:[1, 0, 1]
You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)
– Roni Gadot
Nov 18 '18 at 8:25
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53322139%2flstm-model-does-not-learn-a-simple-pattern%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
According to the your comment, I would suggest to change dataset. Try something like:
data:
[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]
target:[1, 0, 1]
You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)
– Roni Gadot
Nov 18 '18 at 8:25
add a comment |
According to the your comment, I would suggest to change dataset. Try something like:
data:
[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]
target:[1, 0, 1]
You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)
– Roni Gadot
Nov 18 '18 at 8:25
add a comment |
According to the your comment, I would suggest to change dataset. Try something like:
data:
[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]
target:[1, 0, 1]
You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.
According to the your comment, I would suggest to change dataset. Try something like:
data:
[
[1, 3, 5],
[2, 4, 6],
[3, 5, 7]
]
target:[1, 0, 1]
You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.
answered Nov 15 '18 at 15:57
Danylo BaibakDanylo Baibak
116115
116115
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)
– Roni Gadot
Nov 18 '18 at 8:25
add a comment |
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)
– Roni Gadot
Nov 18 '18 at 8:25
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number (
[[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)– Roni Gadot
Nov 18 '18 at 8:25
Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number (
[[1,3,5] , [2,4,6] , [3,5,7]] -> 1
)– Roni Gadot
Nov 18 '18 at 8:25
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53322139%2flstm-model-does-not-learn-a-simple-pattern%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.
– Danylo Baibak
Nov 15 '18 at 15:21
Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn
– Roni Gadot
Nov 15 '18 at 15:23