Please pay attention to these notes:
########################################
# Put your implementation here #
########################################
Assignment Page: https://iust-deep-learning.github.io/981/assignments/03_transfer_learning_and_sequence_to_sequence_models
Course Forum: https://groups.google.com/forum/#!forum/dl981/
Fill your information here & run the cell
#@title Enter your information & "RUN the cell!!" { run: "auto" }
student_id = 0 #@param {type:"integer"}
student_name = "" #@param {type:"string"}
Your_Github_account_Email = "" #@param {type:"string"}
print("your student id:", student_id)
print("your name:", student_name)
from pathlib import Path
ASSIGNMENT_PATH = Path('asg03')
ASSIGNMENT_PATH.mkdir(parents=True, exist_ok=True)
Pre-trained networks are networks that are already trained and stored using a lot of data. We use these networks because:
Usually, first layers in deep networks extract general features, and as we move forward in the layers, the network learns patterns more specific to the task. So if we freeze the first layers and update the weights of the last layers according to your data, the network can better learn the patterns in our data for a particular task in less time and using relatively low data.
We now want to separate ambulance classes and ordinary cars using the VGG network. To do this, we use the data you generated for the previous assignment. You must generate those data in this assignment again and split the dataset into train and test.
! pip install google_images_download
from google_images_download import google_images_download
response = google_images_download.googleimagesdownload()
emergency_car_limit = 20
private_car_limit = 20
search_queries = {'car': private_car_limit,
'ambulance car': emergency_car_limit,
'police car': emergency_car_limit,
'fire truck':emergency_car_limit,
'bus': emergency_car_limit}
def downloadimages(query, limit):
arguments = {"keywords": query,
"format": "jpg",
"limit": limit,
"print_urls":True,
"size": "medium"}
try:
response.download(arguments)
except FileNotFoundError:
print("couldn't download a file")
for query, limit in search_queries.items():
downloadimages(query, limit)
images = {'train': [], 'test': []} # Put the images here
labels = {'train': [], 'test': []} # Put the labels here
from os import listdir
from os.path import join
import cv2
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
images = {'train': [], 'test': []}
labels = {'train': [], 'test': []}
emergency_types = ['ambulance car', 'bus', 'police car', 'fire truck']
private_types = ['car']
base_dir = 'downloads'
train_percentage = 0.7
def get_images(types):
images = []
for typ in types:
directory_addr = join(base_dir, typ)
img_names = listdir(directory_addr)
for img_name in img_names:
img_addr = join(directory_addr, img_name)
images.append(img_addr)
return images
emergency_cars_images = get_images(emergency_types)
private_cars_images = get_images(private_types)
emergency_threshold = int(len(emergency_cars_images) * train_percentage)
private_threshold = int(len(private_cars_images) * train_percentage)
images['train'] = np.array(emergency_cars_images[:emergency_threshold] + private_cars_images[:private_threshold])
images['test'] = np.array(emergency_cars_images[emergency_threshold:] + private_cars_images[private_threshold:])
labels['train'] = np.array([[1, 0]] * emergency_threshold + [[0, 1]] * private_threshold)
labels['test'] = np.array([[1, 0]] * (len(emergency_cars_images) - emergency_threshold) + \
[[0, 1]] * (len(private_cars_images) - private_threshold))
Now at first, we need to declare a VGG modal to extract the image features. You can see the structure and layers of the network below. A pre-trained model can extract the features of an image itself. So at first, please do this classification task just with Features extracted from the VGG model.
from keras.applications import VGG16
import cv2
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
vgg16_model = VGG16(weights='imagenet', include_top=False)
vgg16_model.summary()
def scale(X, x_min, x_max):
nom = (X-X.min())*(x_max-x_min)
denom = X.max() - X.min()
denom = denom + (denom is 0)
return x_min + nom/denom
def preprocess_image(img):
img = scale(img, -1, 1)
return img
xs = {'train': [], 'test': []}
for image in images['train']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_image(img)
features = vgg16_model.predict(img)
ff = features.flatten()
xs['train'].append(features)
for image in images['test']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_image(img)
features = vgg16_model.predict(img)
ff = features.flatten()
xs['test'].append(features)
ys = {'train': [], 'test': []}
ys['train'] = labels['train'][:]
ys['test'] = labels['test'][:]
from keras.applications import VGG16
import cv2
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
vgg16_model = VGG16(weights='imagenet', include_top=False)
vgg16_model.summary()
def scale(X, x_min, x_max):
nom = (X-X.min())*(x_max-x_min)
denom = X.max() - X.min()
denom = denom + (denom is 0)
return x_min + nom/denom
def preprocess_image(img):
img = scale(img, -1, 1)
return img
xs = {'train': [], 'test': []}
for image in images['train']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_image(img)
features = vgg16_model.predict(img)
ff = features.flatten()
xs['train'].append(features)
for image in images['test']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_image(img)
features = vgg16_model.predict(img)
ff = features.flatten()
xs['test'].append(features)
ys = {'train': [], 'test': []}
ys['train'] = labels['train'][:]
ys['test'] = labels['test'][:]
from keras.applications import VGG16
import cv2
import numpy as np
from keras.preprocessing.image import load_img, img_to_array
vgg16_model = VGG16(weights='imagenet', include_top=False)
vgg16_model.summary()
def scale(X, x_min, x_max):
nom = (X-X.min())*(x_max-x_min)
denom = X.max() - X.min()
denom = denom + (denom is 0)
return x_min + nom/denom
def preprocess_image(img):
img = scale(img, -1, 1)
return img
xs = {'train': [], 'test': []}
for image in images['train']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_image(img)
features = vgg16_model.predict(img)
ff = features.flatten()
xs['train'].append(features)
for image in images['test']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = np.expand_dims(img, axis=0)
img = preprocess_image(img)
features = vgg16_model.predict(img)
ff = features.flatten()
xs['test'].append(features)
ys = {'train': [], 'test': []}
ys['train'] = labels['train'][:]
ys['test'] = labels['test'][:]
xs['train'] = np.array([np.squeeze(l, axis=0) for l in xs['train']])
xs['test'] = np.array([np.squeeze(l, axis=0) for l in xs['test']])
Finally, implement a simple Keras model to the classification of these images.
from keras import models
from keras import optimizers
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=xs['train'][0].shape))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation = 'softmax'))
model.compile(optimizer='adam', loss ='categorical_crossentropy', metrics=["accuracy"])
model.fit(xs['train'], ys['train'], batch_size=50, epochs=20, verbose=1)
loss, acc = model.evaluate(xs['test'], ys['test'], verbose = 1)
print(acc * 100)
Next, you need to do this task with fine-tuned model. How many layers of model do you think you need to update according to your data?
Now implement fine-tuning on the model and repeat classifying data with the same model as the previous part.
Note that some parts of this code must be changed. According to the description finetuning is always on the last layers. Also, the wrong part is marked in the code. The range can change to range(int(len(model.layers) / 2), len(model.layers))
.
from keras.applications import VGG16
from keras import models
from keras import layers
from keras.optimizers import Adam
x_new = {'train': [], 'test': []}
for image in images['train']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = preprocess_image(img)
x_new['train'].append(img)
for image in images['test']:
img = img_to_array(load_img(image))
img = cv2.resize(img, (224, 224))
img = preprocess_image(img)
x_new['test'].append(img)
x_new['train'] = np.array(x_new['train'])
x_new['test'] = np.array(x_new['test'])
vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape=x_new['train'][0].shape)
model = Sequential()
for layer in vgg16_model.layers:
model.add(layer)
for i in range(len(model.layers)):
model.layers[i].trainable = False
#################### This loop most change ##################
for i in range(int(len(model.layers) / 2)):
model.layers[i].trainable = True
#############################################################
model.summary()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='softmax', name='output'))
model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_new['train'], ys['train'], batch_size=50, epochs=5, verbose=1)
model.summary()
loss, acc = model.evaluate(x_new['test'], ys['test'], verbose = 1)
print(acc * 100)
Remember the Transfer Learning and Sequence to Sequence Model practical class; you learned how to use a pre-trained model, and how to train a sequence to sequence model. In this task, you are going to build a sequence to sequence model by feeding English characters and predicting French and Persian characters. There are some criteria that you must consider:
You have learned about building a character level sequence to sequence model in the practical class. You can review the full instruction and the code here (We suggest practicing with this notebook before jumping to the answer). For your homework, you must implement a character-level multi-objective sequence to sequence model, which translates English texts to French and Persian Simultaneously. You can learn about building neural networks with multiple outputs here.
After training the model, you must use the prediction and test some samples, as it was provided in your practical class notebook.
You must use the dataset which was used in your practical class. For Persian translation, you must translate it from the English part by using any translation APIs such as Google Translate.
Remember saving all of your trained models under the path 'ASSIGNMENT_PATH /'.
The answer provided by Ali Safarpoor is available here. Please note that there are some mistakes in naming attributes, such as using "eng" prefix before all attributes with French data.
In computational linguistics, word-sense disambiguation (WSD) is an open problem concerned with identifying which sense of a word is used in a sentence. Imagine a bank of water, or a bank which accepts deposits. You can distinguish these two "banks" easily because you understand the context of the sentence which is used for it. What if we want to distinguish senses of a word automatically? You have learned about word embeddings and W2V from the first assignment. W2V extracts a unique vector that is a representation of that word. That is sufficiant we are going distinguish between two different words, such as "Hi" and "Bye"; but we cannot purely depend on it when we want to distinguish between a specific word with several senses.
Researchers have developed several methods for tackling this issue. One of these techniques named DeConf is proposed by your professor, Dr. Pilehvar. He tackled this problem by de-conflating the representations of words based on the deep knowledge it derives from a semantic network. You can read the complete paper here. Besides, A full survey about WSD is available here.
In this task, we are going to distinguish between two senses of the word (noun) "bat" using BERT. BERT is a context-aware language representation model that can produce different vectors for a specific word according to the context of the sentence.
According to Cambridge dictionary, "bat" has two meanings:
A specially shaped piece of wood used for hitting the ball in some games (cricket bat)
A small animal like a mouse with wings that flies at night (mammal bat)
We use two simple datasets for our task. First, cricketbat.txt, which contains few sentences referring to bat used in cricket sport, and second, vampirebat.txt, which contains few sentences referring to the mammal bird bat. These datasets are available in this repository, which is used for another WSD approach using Wordnet. The full explanation of WSD using this method is available here.
!git clone
x = open("cricketbat.txt","r",encoding="UTF-8").read().lower()
first = [i for i in x.split(".") if "bat" in i]
y = open("vampirebat.txt","r",encoding="UTF-8").read().lower()
second = [i for i in y.split(".") if "bat" in i]
Now, we use BERT to encode words of these datasets. You can use this repository to extract word embeddings of the sentences. Use BERT-Base, Uncased as your pre-trained BERT model. Then, insert the vectors of all words with root "bat" in another list for each dataset.
!pip install bert-serving-server # server
!pip install bert-serving-client # client, independent of `bert-serving-server`
!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
!unzip uncased_L-12_H-768_A-12.zip
! nohup bert-serving-start -model_dir /content/uncased_L-12_H-768_A-12/ -num_worker=1 -cpu -pooling_strategy=NONE -max_seq_len=NONE -show_tokens_to_client &
# Use bert to extract the embeddings of "bat"s.
from bert_serving.client import BertClient
bc = BertClient(check_length=False)
tmp = bc.encode(first,show_tokens=True)
tmp2 = bc.encode(second,show_tokens=True)
bat1 = []
for (i,j) in zip(tmp[0],tmp[1]):
for (i2,j2) in zip(i,j):
if("bat" in j2):
bat1.append((i2,j2))
# print(len(bat1))
bat2 = []
for (i,j) in zip(tmp2[0],tmp2[1]):
for (i2,j2) in zip(i,j):
if("bat" in j2):
bat2.append((i2,j2))
# print(len(bat2))
vec1 = [i[0] for i in bat1]
vec2 = [i[0] for i in bat2]
Now, you need to find a specific embedding for each sense of the "bat". A simple approach is averaging all "bat" vectors of each document. Then, you should calculate the distance between the BERT embedding of "bat" in our test cases with your document "bat"s.
Hint: This function may be useful.
import numpy as np
test_cases = ["bats are mammals","bats breed","bats are used to play cricket", "which bat has handle?","bats can fly"]
test_labels = ["mammal bat","mammal bat","cricket bat","cricket bat","mammal bat"]
your_labels = []
word1 = np.mean(vec1,axis=0)
word2 = np.mean(vec2,axis=0)
tmp3 =bc.encode(test_cases,show_tokens=True)
bat3 = []
for (i,j) in zip(tmp3[0],tmp3[1]):
for (i2,j2) in zip(i,j):
if("bat" in j2):
bat3.append((i2,j2))
vec3 = [i[0] for i in bat3]
for word3 in vec3:
if np.linalg.norm(word3-word1)< np.linalg.norm(word3-word2):
your_labels.append("cricket bat")
else:
your_labels.append("mammal bat")
assert test_labels==your_labels
Hmm... It makes sense. Let's see what you produced. You should use PCA to compress the 768 dimensions of BERT embeddings into just 2 dimensions. Then, use matplotlib to visualize them.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, TruncatedSVD
# make a list of embeddings like this: [ALL_CRICKET_BAT_EMBEDDINGS,ALL_MAMMAL_BAT_EMBEDDINGS,AVERAGE_CRICKET_BAT_EMBEDDING,AVERAGE_MAMMAL_BAT_EMBEDDING,ALL_TEST_BAT_EMBEDDINS]
vec_all = [i for i in vec1]
for i in vec2:
vec_all.append(i)
vec_all.append(word1)
vec_all.append(word2)
for i in vec3:
vec_all.append(i)
pca = TruncatedSVD(n_components=2)
principalComponents = pca.fit_transform(vec_all)
print(pca.explained_variance_ratio_)
principalDf = pd.DataFrame(data=principalComponents
, columns=['principal component 1', 'principal component 2'])
# make a colors list. use "b" for CRICKET_BATs, "r" for MAMMAL_BATs
colors =["b" for i in vec1]
for j in vec2:
colors.append("r")
#extract pc1 and pc2 for document bats
pc1_document= principalDf['principal component 1'][:(len(vec1)+len(vec2))]
pc2_document= principalDf['principal component 2'][:(len(vec1)+len(vec2))]
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('Principal Component 1', fontsize=15)
ax.set_ylabel('Principal Component 2', fontsize=15)
for pc1, pc2, color in zip( pc1_document,pc2_document, colors):
ax.scatter(pc1, pc2, s=50, color=color)
ax.legend()
ax.grid()
plt.show()
It seems that despite this considerable compression, the clusters are distinguishable. Now, for the last part, visualize the average vectors and the test case vectors.
#extract pc1 and pc2 for average and test bats
pc1_remainings= principalDf['principal component 1'][(len(vec1)+len(vec2)):]
pc2_remainings= principalDf['principal component 2'][(len(vec1)+len(vec2)):]
colors =["b","r","m","m","c","c","m"]
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('Principal Component 1', fontsize=15)
ax.set_ylabel('Principal Component 2', fontsize=15)
for pc1, pc2, color in zip( pc1_remainings,pc2_remainings, colors):
ax.scatter(pc1, pc2, s=50, color=color)
ax.legend()
ax.grid()
plt.show()
Congratulations! You finished the assignment & you're ready to submit your work. Please follow the instruction:
dl_asg03__xx__xx.zip
) and submit it via https://forms.gle/W8AMoNffho8TQLB87.Note: We need your Github token to create (if doesn't exist previously) new repository to store learned model data. Also Google Drive token enables us to download the current notebook & create a submission. If you are interested feel free to check our code.
#@title
! pip install -U --quiet PyDrive > /dev/null
! wget -q https://github.com/github/hub/releases/download/v2.10.0/hub-linux-amd64-2.10.0.tgz
import os
import time
import yaml
import json
from google.colab import files
from IPython.display import Javascript
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
asg_name = 'assignment_03'
script_save = '''
require(["base/js/namespace"],function(Jupyter) {
Jupyter.notebook.save_checkpoint();
});
'''
repo_name = 'iust-deep-learning-assignments'
submission_file_name = 'dl_asg03__%s__%s.zip'%(student_id, student_name.lower().replace(' ', '_'))
! tar xf hub-linux-amd64-2.10.0.tgz
! cd hub-linux-amd64-2.10.0/ && chmod a+x install && ./install
! hub config --global hub.protocol https
! hub config --global user.email "$Your_Github_account_Email"
! hub config --global user.name "$student_name"
! hub api --flat -X GET /user
! hub api -F affiliation=owner -X GET /user/repos > repos.json
repos = json.load(open('repos.json'))
repo_names = [r['name'] for r in repos]
has_repository = repo_name in repo_names
if not has_repository:
get_ipython().system_raw('! hub api -X POST -F name=%s /user/repos > repo_info.json' % repo_name)
repo_info = json.load(open('repo_info.json'))
repo_url = repo_info['clone_url']
else:
for r in repos:
if r['name'] == repo_name:
repo_url = r['clone_url']
stream = open("/root/.config/hub", "r")
token = list(yaml.load_all(stream))[0]['github.com'][0]['oauth_token']
repo_url_with_token = 'https://'+token+"@" +repo_url.split('https://')[1]
! git clone "$repo_url_with_token"
! cp -r "$ASSIGNMENT_PATH" "$repo_name"/
! cd "$repo_name" && git add -A
! cd "$repo_name" && git commit -m "Add assignment 03 results"
! cd "$repo_name" && git push -u origin master
sub_info = {
'student_id': student_id,
'student_name': student_name,
'repo_url': repo_url,
'asg_dir_contents': os.listdir(str(ASSIGNMENT_PATH)),
'dateime': str(time.time()),
'asg_name': asg_name
}
json.dump(sub_info, open('info.json', 'w'))
Javascript(script_save)
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
file_id = drive.ListFile({'q':"title='%s.ipynb'"%asg_name}).GetList()[0]['id']
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('%s.ipynb'%asg_name)
! jupyter nbconvert --to script "$asg_name".ipynb > /dev/null
! jupyter nbconvert --to html "$asg_name".ipynb > /dev/null
! zip "$submission_file_name" "$asg_name".ipynb "$asg_name".html "$asg_name".txt info.json > /dev/null
print("##########################################")
print("Done! Submisson created, Please download using the bellow cell!")
#@title
files.download(submission_file_name)
If that cell makes an error when running you can download file dl_asg03_your_struden_id_your_name.zip from left panel and files section by right-clicking on it and choosing download button.
Special thanks to Amirhossein Kazemnejad and Kiamehr Razaee for creating the template of deep learning course assignments.