Autoencoder for Character Time-Series with deeplearning4j

up vote
0
down vote

favorite

I'm trying to create and train an LSTM Autoencoder on character sequences (strings). This is simply for dimensionality reduction, i.e. to be able to represent strings of up to T=1000 characters as fixed-length vectors of size N. For the sake of this example, let N = 10. Each character is one-hot encoded by arrays of size validChars (in my case validChars = 77).

I'm using ComputationalGraph in be able to later remove decoder layers and use remaining for encoding. By looking at dl4j-examples I have come up with this:

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()

            .seed(12345)

            .l2(0.0001)

            .weightInit(WeightInit.XAVIER)

            .updater(new Adam(0.005))

            .graphBuilder()

            .addInputs("input")

            .addLayer("encoder1", new LSTM.Builder().nIn(dictSize).nOut(250)

                    .activation(Activation.TANH).build(), "input")

            .addLayer("encoder2", new LSTM.Builder().nIn(250).nOut(10)

                    .activation(Activation.TANH).build(), "encoder1")



            .addVertex("fixed", new PreprocessorVertex(new RnnToFeedForwardPreProcessor()), "encoder2")

            .addVertex("sequenced", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "fixed")



            .addLayer("decoder1", new LSTM.Builder().nIn(10).nOut(250)

                    .activation(Activation.TANH).build(), "sequenced")

            .addLayer("decoder2", new LSTM.Builder().nIn(250).nOut(dictSize)

                    .activation(Activation.TANH).build(), "decoder1")



            .addLayer("output", new RnnOutputLayer.Builder()

                    .lossFunction(LossFunctions.LossFunction.MCXENT)

                    .activation(Activation.SOFTMAX).nIn(dictSize).nOut(dictSize).build(), "decoder2")



            .setOutputs("output")

            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength)

            .build();

With this, I expected the number of features to follow the path:
[77,T] -> [250,T] -> [10,T] -> [10] -> [10,T] -> [250, T] -> [77,T]

I have trained this network, and removed decoder part like so:

    ComputationGraph encoder = new TransferLearning.GraphBuilder(net)

            .setFeatureExtractor("fixed")

            .removeVertexAndConnections("sequenced")

            .removeVertexAndConnections("decoder1")

            .removeVertexAndConnections("decoder2")

            .removeVertexAndConnections("output")

            .addLayer("output", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), "fixed")

            .setOutputs("output")

            .setInputs("input")

            .build();

But, when I encode a string of length 1000 with this encoder, it outputs an NDArray of shape [1000, 10], instead of 1-dimensional vector of length 10. My purpose is to represent the whole 1000 character sequence with one vector of length 10. What am I missing?

asked Nov 10 at 16:03

Gena L

274

add a comment |

up vote
0
down vote

favorite

I'm using ComputationalGraph in be able to later remove decoder layers and use remaining for encoding. By looking at dl4j-examples I have come up with this:

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()

            .seed(12345)

            .l2(0.0001)

            .weightInit(WeightInit.XAVIER)

            .updater(new Adam(0.005))

            .graphBuilder()

            .addInputs("input")

            .addLayer("encoder1", new LSTM.Builder().nIn(dictSize).nOut(250)

                    .activation(Activation.TANH).build(), "input")

            .addLayer("encoder2", new LSTM.Builder().nIn(250).nOut(10)

                    .activation(Activation.TANH).build(), "encoder1")



            .addVertex("fixed", new PreprocessorVertex(new RnnToFeedForwardPreProcessor()), "encoder2")

            .addVertex("sequenced", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "fixed")



            .addLayer("decoder1", new LSTM.Builder().nIn(10).nOut(250)

                    .activation(Activation.TANH).build(), "sequenced")

            .addLayer("decoder2", new LSTM.Builder().nIn(250).nOut(dictSize)

                    .activation(Activation.TANH).build(), "decoder1")



            .addLayer("output", new RnnOutputLayer.Builder()

                    .lossFunction(LossFunctions.LossFunction.MCXENT)

                    .activation(Activation.SOFTMAX).nIn(dictSize).nOut(dictSize).build(), "decoder2")



            .setOutputs("output")

            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength)

            .build();

With this, I expected the number of features to follow the path:
[77,T] -> [250,T] -> [10,T] -> [10] -> [10,T] -> [250, T] -> [77,T]

I have trained this network, and removed decoder part like so:

    ComputationGraph encoder = new TransferLearning.GraphBuilder(net)

            .setFeatureExtractor("fixed")

            .removeVertexAndConnections("sequenced")

            .removeVertexAndConnections("decoder1")

            .removeVertexAndConnections("decoder2")

            .removeVertexAndConnections("output")

            .addLayer("output", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), "fixed")

            .setOutputs("output")

            .setInputs("input")

            .build();

asked Nov 10 at 16:03

Gena L

274

add a comment |

up vote
0
down vote

favorite

I'm using ComputationalGraph in be able to later remove decoder layers and use remaining for encoding. By looking at dl4j-examples I have come up with this:

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()

            .seed(12345)

            .l2(0.0001)

            .weightInit(WeightInit.XAVIER)

            .updater(new Adam(0.005))

            .graphBuilder()

            .addInputs("input")

            .addLayer("encoder1", new LSTM.Builder().nIn(dictSize).nOut(250)

                    .activation(Activation.TANH).build(), "input")

            .addLayer("encoder2", new LSTM.Builder().nIn(250).nOut(10)

                    .activation(Activation.TANH).build(), "encoder1")



            .addVertex("fixed", new PreprocessorVertex(new RnnToFeedForwardPreProcessor()), "encoder2")

            .addVertex("sequenced", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "fixed")



            .addLayer("decoder1", new LSTM.Builder().nIn(10).nOut(250)

                    .activation(Activation.TANH).build(), "sequenced")

            .addLayer("decoder2", new LSTM.Builder().nIn(250).nOut(dictSize)

                    .activation(Activation.TANH).build(), "decoder1")



            .addLayer("output", new RnnOutputLayer.Builder()

                    .lossFunction(LossFunctions.LossFunction.MCXENT)

                    .activation(Activation.SOFTMAX).nIn(dictSize).nOut(dictSize).build(), "decoder2")



            .setOutputs("output")

            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength)

            .build();

With this, I expected the number of features to follow the path:
[77,T] -> [250,T] -> [10,T] -> [10] -> [10,T] -> [250, T] -> [77,T]

I have trained this network, and removed decoder part like so:

    ComputationGraph encoder = new TransferLearning.GraphBuilder(net)

            .setFeatureExtractor("fixed")

            .removeVertexAndConnections("sequenced")

            .removeVertexAndConnections("decoder1")

            .removeVertexAndConnections("decoder2")

            .removeVertexAndConnections("output")

            .addLayer("output", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), "fixed")

            .setOutputs("output")

            .setInputs("input")

            .build();

asked Nov 10 at 16:03

Gena L

274

I'm using ComputationalGraph in be able to later remove decoder layers and use remaining for encoding. By looking at dl4j-examples I have come up with this:

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()

            .seed(12345)

            .l2(0.0001)

            .weightInit(WeightInit.XAVIER)

            .updater(new Adam(0.005))

            .graphBuilder()

            .addInputs("input")

            .addLayer("encoder1", new LSTM.Builder().nIn(dictSize).nOut(250)

                    .activation(Activation.TANH).build(), "input")

            .addLayer("encoder2", new LSTM.Builder().nIn(250).nOut(10)

                    .activation(Activation.TANH).build(), "encoder1")



            .addVertex("fixed", new PreprocessorVertex(new RnnToFeedForwardPreProcessor()), "encoder2")

            .addVertex("sequenced", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "fixed")



            .addLayer("decoder1", new LSTM.Builder().nIn(10).nOut(250)

                    .activation(Activation.TANH).build(), "sequenced")

            .addLayer("decoder2", new LSTM.Builder().nIn(250).nOut(dictSize)

                    .activation(Activation.TANH).build(), "decoder1")



            .addLayer("output", new RnnOutputLayer.Builder()

                    .lossFunction(LossFunctions.LossFunction.MCXENT)

                    .activation(Activation.SOFTMAX).nIn(dictSize).nOut(dictSize).build(), "decoder2")



            .setOutputs("output")

            .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength)

            .build();

With this, I expected the number of features to follow the path:
[77,T] -> [250,T] -> [10,T] -> [10] -> [10,T] -> [250, T] -> [77,T]

I have trained this network, and removed decoder part like so:

    ComputationGraph encoder = new TransferLearning.GraphBuilder(net)

            .setFeatureExtractor("fixed")

            .removeVertexAndConnections("sequenced")

            .removeVertexAndConnections("decoder1")

            .removeVertexAndConnections("decoder2")

            .removeVertexAndConnections("output")

            .addLayer("output", new ActivationLayer.Builder().activation(Activation.IDENTITY).build(), "fixed")

            .setOutputs("output")

            .setInputs("input")

            .build();

machine-learning deep-learning deeplearning4j

asked Nov 10 at 16:03

Gena L

274

asked Nov 10 at 16:03

Gena L

274

asked Nov 10 at 16:03

Gena L

274

asked Nov 10 at 16:03

Gena L

274

asked Nov 10 at 16:03

Gena L

274

add a comment |

active

oldest

votes

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53240765%2fautoencoder-for-character-time-series-with-deeplearning4j%23new-answer', 'question_page');
}
);

Post as a guest

Name

active

oldest

votes

draft saved

draft discarded

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Name

This page is only for reference, If you need detailed information, please check here

Mj95Pq

搜尋此網誌

Vfrdtyky