LSTM model does not learn a simple pattern

I'm a noob in ML, and tried to write an LSTM model that will process batches of sequences and will detect the following simple pattern: If a sequence starts with an odd number then the target is 0 otherwise it is 1:

data:

[[[ 1  2  3]

  [ 2  3  4]

  [ 3  4  5]

  [ 4  5  6]

  [ 5  6  7]] #starts with 1 -> 0



 [[ 6  7  8]

  [ 7  8  9]

  [ 8  9 10]

  [ 9 10 11]

  [10 11 12]] #starts with 6 -> 1



 [[11 12 13]

  [12 13 14]

  [13 14 15]

  [14 15 16]

  [15 16 17]]] #starts with 11 -> 0

target:

[0 1 0]

code:

import numpy as np

import pandas as pd

from keras import callbacks

from keras import optimizers

from keras.layers import LSTM, Dense, Flatten, Dropout

from keras.layers.advanced_activations import LeakyReLU

from keras.models import Sequential

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle





def demo():

    scaler = StandardScaler()

    dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],

                              columns=['a', 'b', 'c', 'target'])



    dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])

    data = dummy_data.loc[:, dummy_data.columns != 'target']

    target = dummy_data['target']

    data = np.array(np.split(data.values, 200))

    target = np.array(np.split(target.values, 200))

    data, target = shuffle(data, target)

    target = np.array(list(map(lambda x: x[0],target)))

    print(data[:3,:],target[:3])

    x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)



    opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)

    # build the model

    model = Sequential()





    num_features = data.shape[2]

    num_samples = data.shape[1]



    first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')

    model.add(

        first_lstm)

    model.add(LeakyReLU())

    model.add(Dropout(0.2))

    model.add(LSTM(16, return_sequences=True, activation='tanh'))

    model.add(Dropout(0.2))

    model.add(LeakyReLU())

    model.add(LSTM(8, return_sequences=True, activation='tanh'))

    model.add(LeakyReLU())

    model.add(Flatten())

    model.add(Dense(1, activation='sigmoid'))



    model.compile(loss='binary_crossentropy', optimizer=opt,

                  metrics=['accuracy'])



    model.summary()



    tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,

                               batch_size=128,

                               write_graph=True, write_grads=True, write_images=False,

                               embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)



    loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,

                                                mode='min')



    model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),

                        callbacks=[tb, loss_checkpoint])





demo()

I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here

What could be improved, in order for the network to perform better?

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

asked Nov 15 '18 at 14:54

Roni Gadot

458516

LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21

Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23

add a comment |

data:

[[[ 1  2  3]

  [ 2  3  4]

  [ 3  4  5]

  [ 4  5  6]

  [ 5  6  7]] #starts with 1 -> 0



 [[ 6  7  8]

  [ 7  8  9]

  [ 8  9 10]

  [ 9 10 11]

  [10 11 12]] #starts with 6 -> 1



 [[11 12 13]

  [12 13 14]

  [13 14 15]

  [14 15 16]

  [15 16 17]]] #starts with 11 -> 0

target:

[0 1 0]

code:

import numpy as np

import pandas as pd

from keras import callbacks

from keras import optimizers

from keras.layers import LSTM, Dense, Flatten, Dropout

from keras.layers.advanced_activations import LeakyReLU

from keras.models import Sequential

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle





def demo():

    scaler = StandardScaler()

    dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],

                              columns=['a', 'b', 'c', 'target'])



    dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])

    data = dummy_data.loc[:, dummy_data.columns != 'target']

    target = dummy_data['target']

    data = np.array(np.split(data.values, 200))

    target = np.array(np.split(target.values, 200))

    data, target = shuffle(data, target)

    target = np.array(list(map(lambda x: x[0],target)))

    print(data[:3,:],target[:3])

    x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)



    opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)

    # build the model

    model = Sequential()





    num_features = data.shape[2]

    num_samples = data.shape[1]



    first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')

    model.add(

        first_lstm)

    model.add(LeakyReLU())

    model.add(Dropout(0.2))

    model.add(LSTM(16, return_sequences=True, activation='tanh'))

    model.add(Dropout(0.2))

    model.add(LeakyReLU())

    model.add(LSTM(8, return_sequences=True, activation='tanh'))

    model.add(LeakyReLU())

    model.add(Flatten())

    model.add(Dense(1, activation='sigmoid'))



    model.compile(loss='binary_crossentropy', optimizer=opt,

                  metrics=['accuracy'])



    model.summary()



    tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,

                               batch_size=128,

                               write_graph=True, write_grads=True, write_images=False,

                               embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)



    loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,

                                                mode='min')



    model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),

                        callbacks=[tb, loss_checkpoint])





demo()

I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here

What could be improved, in order for the network to perform better?

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

asked Nov 15 '18 at 14:54

Roni Gadot

458516

LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21

Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23

add a comment |

data:

[[[ 1  2  3]

  [ 2  3  4]

  [ 3  4  5]

  [ 4  5  6]

  [ 5  6  7]] #starts with 1 -> 0



 [[ 6  7  8]

  [ 7  8  9]

  [ 8  9 10]

  [ 9 10 11]

  [10 11 12]] #starts with 6 -> 1



 [[11 12 13]

  [12 13 14]

  [13 14 15]

  [14 15 16]

  [15 16 17]]] #starts with 11 -> 0

target:

[0 1 0]

code:

import numpy as np

import pandas as pd

from keras import callbacks

from keras import optimizers

from keras.layers import LSTM, Dense, Flatten, Dropout

from keras.layers.advanced_activations import LeakyReLU

from keras.models import Sequential

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle





def demo():

    scaler = StandardScaler()

    dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],

                              columns=['a', 'b', 'c', 'target'])



    dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])

    data = dummy_data.loc[:, dummy_data.columns != 'target']

    target = dummy_data['target']

    data = np.array(np.split(data.values, 200))

    target = np.array(np.split(target.values, 200))

    data, target = shuffle(data, target)

    target = np.array(list(map(lambda x: x[0],target)))

    print(data[:3,:],target[:3])

    x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)



    opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)

    # build the model

    model = Sequential()





    num_features = data.shape[2]

    num_samples = data.shape[1]



    first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')

    model.add(

        first_lstm)

    model.add(LeakyReLU())

    model.add(Dropout(0.2))

    model.add(LSTM(16, return_sequences=True, activation='tanh'))

    model.add(Dropout(0.2))

    model.add(LeakyReLU())

    model.add(LSTM(8, return_sequences=True, activation='tanh'))

    model.add(LeakyReLU())

    model.add(Flatten())

    model.add(Dense(1, activation='sigmoid'))



    model.compile(loss='binary_crossentropy', optimizer=opt,

                  metrics=['accuracy'])



    model.summary()



    tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,

                               batch_size=128,

                               write_graph=True, write_grads=True, write_images=False,

                               embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)



    loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,

                                                mode='min')



    model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),

                        callbacks=[tb, loss_checkpoint])





demo()

I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here

What could be improved, in order for the network to perform better?

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

asked Nov 15 '18 at 14:54

Roni Gadot

458516

data:

[[[ 1  2  3]

  [ 2  3  4]

  [ 3  4  5]

  [ 4  5  6]

  [ 5  6  7]] #starts with 1 -> 0



 [[ 6  7  8]

  [ 7  8  9]

  [ 8  9 10]

  [ 9 10 11]

  [10 11 12]] #starts with 6 -> 1



 [[11 12 13]

  [12 13 14]

  [13 14 15]

  [14 15 16]

  [15 16 17]]] #starts with 11 -> 0

target:

[0 1 0]

code:

import numpy as np

import pandas as pd

from keras import callbacks

from keras import optimizers

from keras.layers import LSTM, Dense, Flatten, Dropout

from keras.layers.advanced_activations import LeakyReLU

from keras.models import Sequential

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle





def demo():

    scaler = StandardScaler()

    dummy_data = pd.DataFrame(data=[[x, x+1, x+2, int((x - 1) / 5 % 2)] for x in range(1, 1001)],

                              columns=['a', 'b', 'c', 'target'])



    dummy_data[['a', 'b', 'c']] = scaler.fit_transform(dummy_data[['a', 'b', 'c']])

    data = dummy_data.loc[:, dummy_data.columns != 'target']

    target = dummy_data['target']

    data = np.array(np.split(data.values, 200))

    target = np.array(np.split(target.values, 200))

    data, target = shuffle(data, target)

    target = np.array(list(map(lambda x: x[0],target)))

    print(data[:3,:],target[:3])

    x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=4)



    opt = optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0001)

    # build the model

    model = Sequential()





    num_features = data.shape[2]

    num_samples = data.shape[1]



    first_lstm = LSTM(32, batch_input_shape=(None, num_samples, num_features), return_sequences=True, activation='tanh')

    model.add(

        first_lstm)

    model.add(LeakyReLU())

    model.add(Dropout(0.2))

    model.add(LSTM(16, return_sequences=True, activation='tanh'))

    model.add(Dropout(0.2))

    model.add(LeakyReLU())

    model.add(LSTM(8, return_sequences=True, activation='tanh'))

    model.add(LeakyReLU())

    model.add(Flatten())

    model.add(Dense(1, activation='sigmoid'))



    model.compile(loss='binary_crossentropy', optimizer=opt,

                  metrics=['accuracy'])



    model.summary()



    tb = callbacks.TensorBoard(log_dir='./logs/', histogram_freq=10,

                               batch_size=128,

                               write_graph=True, write_grads=True, write_images=False,

                               embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)



    loss_checkpoint = callbacks.ModelCheckpoint('./best_loss.hdf5', monitor='val_loss', verbose=1, save_best_only=True,

                                                mode='min')



    model.fit(x_train, y_train, batch_size=128, epochs=5000, validation_data=(x_test, y_test),

                        callbacks=[tb, loss_checkpoint])





demo()

I'm expecting the net to learn this simple pattern however it fails, see the loss below:
enter image description here

What could be improved, in order for the network to perform better?

python tensorflow machine-learning keras lstm

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

asked Nov 15 '18 at 14:54

Roni Gadot

458516

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

asked Nov 15 '18 at 14:54

Roni Gadot

458516

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

edited Nov 16 '18 at 5:31

Milo Lu

1,65211528

asked Nov 15 '18 at 14:54

Roni Gadot

458516

asked Nov 15 '18 at 14:54

Roni Gadot

458516

asked Nov 15 '18 at 14:54

Roni Gadot

458516

LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21

Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23

add a comment |

LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21

Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23

LSTM works good with sequences. With your example, maybe, you can try something more simple - for instance, a simple NN with 2-3 hidden layers.

– Danylo Baibak
Nov 15 '18 at 15:21

Thanks for your reply, my example uses sequences of 5, and that's what I'm trying to learn

– Roni Gadot
Nov 15 '18 at 15:23

add a comment |

1 Answer
1

active

oldest

votes

According to the your comment, I would suggest to change dataset. Try something like:

data:

[

    [1, 3, 5],

    [2, 4, 6],

    [3, 5, 7]

]

target:[1, 0, 1]

You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53322139%2flstm-model-does-not-learn-a-simple-pattern%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

According to the your comment, I would suggest to change dataset. Try something like:

data:

[

    [1, 3, 5],

    [2, 4, 6],

    [3, 5, 7]

]

target:[1, 0, 1]

You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25

add a comment |

According to the your comment, I would suggest to change dataset. Try something like:

data:

[

    [1, 3, 5],

    [2, 4, 6],

    [3, 5, 7]

]

target:[1, 0, 1]

You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25

add a comment |

According to the your comment, I would suggest to change dataset. Try something like:

data:

[

    [1, 3, 5],

    [2, 4, 6],

    [3, 5, 7]

]

target:[1, 0, 1]

You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

According to the your comment, I would suggest to change dataset. Try something like:

data:

[

    [1, 3, 5],

    [2, 4, 6],

    [3, 5, 7]

]

target:[1, 0, 1]

You should try dataset with a more pronounced pattern in the sequence. In theory, LSTM should perform better for such samples.

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

answered Nov 15 '18 at 15:57

Danylo Baibak

116115

Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25

add a comment |

Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25

Thanks for your reply, but my goal is different, i.e: I want to label the entire batch as 1 if it starts with an odd number ([[1,3,5] , [2,4,6] , [3,5,7]] -> 1)

– Roni Gadot
Nov 18 '18 at 8:25

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

VQJoj,F7 x3G,Ss Nhs9ygnVvDKqWlBugLbtZmIrsrCPdTBeW5azINnK8NH1ZMR0R47LJ UmBOcK9tf,y

搜尋此網誌

Vfrdtyky