Save Tensorflow model to Minio repository 05/88105/1
authorDileep Ranganathan <dileep.ranganathan@intel.com>
Mon, 20 May 2019 18:29:01 +0000 (11:29 -0700)
committerDileep Ranganathan <dileep.ranganathan@intel.com>
Mon, 20 May 2019 18:29:01 +0000 (11:29 -0700)
Export and save the tensorflow model using Keras API.
Added support for Minio model repo by injecting the credentials to
Horovod pods. This model then can be served using Tensorflow Serving.

Change-Id: Id1e0b6696bc2bb1699786b08651c4d3bc353976c
Issue-ID: ONAPARC-460
Signed-off-by: Dileep Ranganathan <dileep.ranganathan@intel.com>
vnfs/DAaaS/applications/sample-horovod-app/Dockerfile
vnfs/DAaaS/applications/sample-horovod-app/keras_mnist_advanced_modified.py
vnfs/DAaaS/applications/sample-horovod-app/sample_values.yaml
vnfs/DAaaS/applications/sample-horovod-app/templates/job.yaml
vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml [new file with mode: 0644]

index 8bdcf5b..5b8f563 100644 (file)
@@ -121,7 +121,7 @@ COPY ${spark_jars} /opt/spark/jars
 COPY bin /opt/spark/bin
 COPY sbin /opt/spark/sbin
 COPY ${img_path}/spark/entrypoint.sh /opt/
-COPY examples /opt/spark/examples
+
 COPY ${k8s_tests} /opt/spark/tests
 COPY data /opt/spark/data
 ENV SPARK_HOME /opt/spark
@@ -135,6 +135,7 @@ ENV PATH /opt/conda/envs/tf_env/bin:$PATH
 RUN echo "export PATH=/opt/conda/envs/tf_env/bin:$PATH" >> ~/.bashrc
 #    echo "activate tf_env\n" >> ~/.bashrc
 RUN pip install petastorm
+COPY examples /opt/spark/examples
 WORKDIR /opt/spark/work-dir
 
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
index 03425ff..fa39cb6 100644 (file)
@@ -1,13 +1,19 @@
 from __future__ import print_function
 import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras.preprocessing.image import ImageDataGenerator
-from keras import backend as K
+import os
+from tensorflow.keras.datasets import mnist
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Dropout, Flatten
+from tensorflow.keras.layers import Conv2D, MaxPooling2D
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras import backend as K
+from tensorflow_estimator.python.estimator.export import export as export_helpers
+from tensorflow.python.saved_model import builder as saved_model_builder
+from tensorflow.python.saved_model import tag_constants, signature_constants
+from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def
 import tensorflow as tf
-import horovod.keras as hvd
+import horovod.tensorflow.keras as hvd
+
 
 # Horovod: initialize Horovod.
 hvd.init()
@@ -53,8 +59,8 @@ print(x_train.shape[0], 'train samples')
 print(x_test.shape[0], 'test samples')
 
 # Convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
+y_train = tf.keras.utils.to_categorical(y_train, num_classes)
+y_test = tf.keras.utils.to_categorical(y_test, num_classes)
 
 model = Sequential()
 model.add(Conv2D(32, kernel_size=(3, 3),
@@ -69,12 +75,12 @@ model.add(Dropout(0.5))
 model.add(Dense(num_classes, activation='softmax'))
 
 # Horovod: adjust learning rate based on number of GPUs.
-opt = keras.optimizers.Adadelta(lr=1.0 * hvd.size())
+opt = tf.keras.optimizers.Adadelta(lr=1.0 * hvd.size())
 
 # Horovod: add Horovod Distributed Optimizer.
 opt = hvd.DistributedOptimizer(opt)
 
-model.compile(loss=keras.losses.categorical_crossentropy,
+model.compile(loss=tf.keras.losses.categorical_crossentropy,
               optimizer=opt,
               metrics=['accuracy'])
 
@@ -96,12 +102,13 @@ callbacks = [
     hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1),
 
     # Reduce the learning rate if training plateaues.
-    keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1),
+    tf.keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1),
 ]
 
 # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
 if hvd.rank() == 0:
-    callbacks.append(keras.callbacks.ModelCheckpoint('./checkpoint-{epoch}.h5'))
+    callbacks.append(tf.keras.callbacks.ModelCheckpoint(
+        './checkpoint-{epoch}.h5'))
 
 # Set up ImageDataGenerators to do data augmentation for the training images.
 train_gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
@@ -118,10 +125,45 @@ model.fit_generator(train_gen.flow(x_train, y_train, batch_size=batch_size),
                     callbacks=callbacks,
                     epochs=epochs,
                     verbose=1,
-                    validation_data=test_gen.flow(x_test, y_test, batch_size=batch_size),
+                    validation_data=test_gen.flow(
+                        x_test, y_test, batch_size=batch_size),
                     validation_steps=3 * test_batches // hvd.size())
 
 # Evaluate the model on the full data set.
 score = model.evaluate(x_test, y_test, verbose=0)
 print('Test loss:', score[0])
 print('Test accuracy:', score[1])
+
+# Save Model to Minio
+if hvd.rank() == 0:
+    print('Model Summary')
+    model.summary()
+    print('Exporting trained model to Minio Model Repo')
+    base_path = os.environ['MODEL_BASE_PATH']
+
+    # Option 1(Preferred) - Using Keras api and Tensorflow v1.13 version
+    saved_model_path = tf.contrib.saved_model.save_keras_model(model, base_path)
+    print('Model Saved to {} Using new Keras API!!!'.format(saved_model_path))
+    # Option 2 - Tensorflow v1.13+ Builder saved_model api.
+    # builder = saved_model_builder.SavedModelBuilder(base_path)
+
+    # print(model.input)
+    # print(model.outputs)
+
+    # signature = predict_signature_def(inputs={"inputs": model.input},
+    #                                   outputs={t.name:t for t in model.outputs})
+    # print(signature)
+    # K.set_learning_phase(0)
+    # with K.get_session() as sess:
+    #     builder.add_meta_graph_and_variables(sess=sess,
+    #                                          tags=[tag_constants.SERVING],
+    #                                          signature_def_map={'predict': signature})
+    #     builder.save()
+    # print('Model Saved to S3 Using Builder!!!')
+
+    # Option 3 - Tensorflow v1.13 Will be deprecated in Tensorflow v2
+    # tf.saved_model.simple_save(
+    #     keras.backend.get_session(),
+    #     export_path,
+    #     inputs={'input_image': model.input},
+    #     outputs={t.name: t for t in model.outputs})
index 6ac3135..7030dd2 100644 (file)
@@ -3,6 +3,7 @@
 
 ssh:
   useSecrets: true
+  port: 22
   hostKey: |-
     -----BEGIN RSA PRIVATE KEY-----
     ThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKeyThisIsPrivateKey
@@ -42,3 +43,20 @@ master:
     pullPolicy: Never
   args:
     - "mpirun -np 3 --hostfile /horovod/generated/hostfile --mca orte_keep_fqdn_hostnames t --allow-run-as-root --display-map --tag-output --timestamp-output sh -c '/opt/conda/envs/tf_env/bin/python /opt/spark/examples/src/main/python/tensorflow/keras_mnist_advanced_modified.py'"
+
+## Model repository information (Minio)
+minio:
+  existingSecret: ""
+  accessKey: "onapdaas"
+  secretKey: "onapsecretdaas"
+  environment:
+    AWS_REGION: "us-west-1"
+    S3_REGION: "us-west-1"
+    S3_ENDPOINT: "minio.edge1.svc.cluster.local:9000"
+    AWS_ENDPOINT_URL: "http://minio.edge1.svc.cluster.local:9000"
+    S3_USE_HTTPS: 0
+    S3_VERIFY_SSL: 0
+    AWS_LOG_LEVEL: 3
+    TF_CPP_MIN_LOG_LEVEL: 3
+    MODEL_NAME: "mnist"
+    MODEL_BASE_PATH: "s3://models/mnist/export/"
index 4e59b27..da42ded 100644 (file)
@@ -70,6 +70,20 @@ spec:
            value: "{{ $value }}"
        {{- end }}
        {{- end }}
+         - name: AWS_ACCESS_KEY_ID
+           valueFrom:
+             secretKeyRef:
+               name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }}
+               key: accesskey
+         - name: AWS_SECRET_ACCESS_KEY
+           valueFrom:
+             secretKeyRef:
+               name: {{ if .Values.minio.existingSecret }}{{ .Values.minio.existingSecret }}{{ else }}{{ template "horovod.fullname" . }}-minio{{ end }}
+               key: secretkey
+         {{- range $key, $val := .Values.minio.environment }}
+         - name: {{ $key }}
+           value: {{ $val | quote }}
+         {{- end}}
 {{- if .Values.master.privileged }}
         securityContext:
           privileged: true
diff --git a/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml b/vnfs/DAaaS/applications/sample-horovod-app/templates/minio-secrets.yaml
new file mode 100644 (file)
index 0000000..c99abe6
--- /dev/null
@@ -0,0 +1,31 @@
+{{/*
+# Copyright 2019 Intel Corporation, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/}}
+
+{{- if not .Values.minio.existingSecret }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ template "horovod.fullname" . }}-minio
+  labels:
+    app: {{ template "horovod.name" . }}
+    chart: {{ template "horovod.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+type: Opaque
+data:
+  accesskey: {{ .Values.minio.accessKey | b64enc }}
+  secretkey: {{ .Values.minio.secretKey | b64enc }}
+{{- end }}