MABe Task 1: Classical Classification
[Task 1] Classical Classification [Baseline]
Baseline notebook for MABe Classical Classification
🐀🐀🐀🐀🐀🐀🐀🐀🐀🐀🐀🐁🐁🐁🐁🐁🐁🐁🐁🐁🐁
🐀 MABe Classical Classification: Starter kit 🐁
🐀🐀🐀🐀🐀🐀🐀🐀🐀🐀🐀🐁🐁🐁🐁🐁🐁🐁🐁🐁🐁
🐀 MABe Classical Classification: Starter kit 🐁
🐀🐀🐀🐀🐀🐀🐀🐀🐀🐀🐀🐁🐁🐁🐁🐁🐁🐁🐁🐁🐁
How to use this notebook 📝¶
- Copy the notebook. This is a shared template and any edits you make here will not be saved. You should copy it into your own drive folder. For this, click the "File" menu (top-left), then "Save a Copy in Drive". You can edit your copy however you like.
- Link it to your AIcrowd account. In order to submit your predictions to AIcrowd, you need to provide your account's API key.
Setup AIcrowd Utilities 🛠¶
!pip install -U aicrowd-cli==0.1 > /dev/null
Install packages 🗃¶
Please add all pacakages installations in this section
!pip install numpy pandas
Import necessary modules and packages 📚¶
import pandas as pd
import numpy as np
import os
Download the dataset 📲¶
Please get your API key from https://www.aicrowd.com/participants/me
API_KEY = "6d9b67915c94a7b780aef947bb84bd93"
!aicrowd login --api-key $API_KEY
!aicrowd dataset download --challenge mabe-task-1-classical-classification
Extract the downloaded dataset to data
directory
!rm -rf data
!mkdir data
!mv train.npy data/train.npy
!mv test-release.npy data/test.npy
!mv sample-submission.npy data/sample_submission.npy
train = np.load('data/train.npy',allow_pickle=True).item()
test = np.load('data/test.npy',allow_pickle=True).item()
sample_submission = np.load('data/sample_submission.npy',allow_pickle=True).item()
Dataset Specifications 💾¶
train.npy
- Training set for the task, which follows the following schema:
test-release.npy
- Test set for the task, which follows the following schema :
sample_submission.npy
- Template for a sample submission which follows the following schema
{
"<sequence_id-1>" : [0, 0, 1, 2, ...],
"<sequence_id-2>" : [0, 1, 2, 0, ...]
}
Each key in the dictionary here refers to the unique sequence id obtained for the sequences in the test set. The value for each of the keys is expected to hold a list of corresponing annotations. The annotations are represented by the index of the corresponding annotation words in the vocabular provided in the test set.
print("Dataset keys - ", train.keys())
print("Vocabulary - ", train['vocabulary'])
print("Number of train Sequences - ", len(train['sequences']))
print("Number of test Sequences - ", len(test['sequences']))
Sample overview¶
sequence_names = list(train["sequences"].keys())
sequence_key = sequence_names[0]
single_sequence = train["sequences"][sequence_key]
print("Sequence name - ", sequence_key)
print("Single Sequence keys ", single_sequence.keys())
print(f"Number of Frames in {sequence_key} - ", len(single_sequence['annotations']))
print(f"Keypoints data shape of {sequence_key} - ", single_sequence['keypoints'].shape)
print(f"annotator_id of {sequence_key} - ", single_sequence['annotator_id'])
Helper function for visualization 💁¶
Don't forget to run the cell 😉
import matplotlib.pyplot as plt
from matplotlib import animation
from matplotlib import colors
from matplotlib import rc
rc('animation', html='jshtml')
# Note: Image processing may be slow if too many frames are animated.
#Plotting constants
FRAME_WIDTH_TOP = 1024
FRAME_HEIGHT_TOP = 570
RESIDENT_COLOR = 'lawngreen'
INTRUDER_COLOR = 'skyblue'
PLOT_MOUSE_START_END = [(0, 1), (0, 2), (1, 3), (2, 3), (3, 4),
(3, 5), (4, 6), (5, 6), (1, 2)]
class_to_color = {'other': 'white', 'attack' : 'red', 'mount' : 'green',
'investigation': 'orange'}
class_to_number = {s: i for i, s in enumerate(train['vocabulary'])}
number_to_class = {i: s for i, s in enumerate(train['vocabulary'])}
def num_to_text(anno_list):
return np.vectorize(number_to_class.get)(anno_list)
def set_figax():
fig = plt.figure(figsize=(6, 4))
img = np.zeros((FRAME_HEIGHT_TOP, FRAME_WIDTH_TOP, 3))
ax = fig.add_subplot(111)
ax.imshow(img)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
return fig, ax
def plot_mouse(ax, pose, color):
# Draw each keypoint
for j in range(7):
ax.plot(pose[j, 0], pose[j, 1], 'o', color=color, markersize=5)
# Draw a line for each point pair to form the shape of the mouse
for pair in PLOT_MOUSE_START_END:
line_to_plot = pose[pair, :]
ax.plot(line_to_plot[:, 0], line_to_plot[
:, 1], color=color, linewidth=1)
def animate_pose_sequence(video_name, keypoint_sequence, start_frame = 0, stop_frame = 100,
annotation_sequence = None):
# Returns the animation of the keypoint sequence between start frame
# and stop frame. Optionally can display annotations.
seq = keypoint_sequence.transpose((0,1,3,2))
image_list = []
counter = 0
for j in range(start_frame, stop_frame):
if counter%20 == 0:
print("Processing frame ", j)
fig, ax = set_figax()
plot_mouse(ax, seq[j, 0, :, :], color=RESIDENT_COLOR)
plot_mouse(ax, seq[j, 1, :, :], color=INTRUDER_COLOR)
if annotation_sequence is not None:
annot = annotation_sequence[j]
annot = number_to_class[annot]
plt.text(50, -20, annot, fontsize = 16,
bbox=dict(facecolor=class_to_color[annot], alpha=0.5))
ax.set_title(
video_name + '\n frame {:03d}.png'.format(j))
ax.axis('off')
fig.tight_layout(pad=0)
ax.margins(0)
fig.canvas.draw()
image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(),
dtype=np.uint8)
image_from_plot = image_from_plot.reshape(
fig.canvas.get_width_height()[::-1] + (3,))
image_list.append(image_from_plot)
plt.close()
counter = counter + 1
# Plot animation.
fig = plt.figure()
plt.axis('off')
im = plt.imshow(image_list[0])
def animate(k):
im.set_array(image_list[k])
return im,
ani = animation.FuncAnimation(fig, animate, frames=len(image_list), blit=True)
return ani
def plot_annotation_strip(annotation_sequence, start_frame = 0, stop_frame = 100, title="Behavior Labels"):
# Plot annotations as a annotation strip.
# Map annotations to a number.
annotation_num = []
for item in annotation_sequence[start_frame:stop_frame]:
annotation_num.append(class_to_number[item])
all_classes = list(set(annotation_sequence[start_frame:stop_frame]))
cmap = colors.ListedColormap(['red', 'orange', 'green', 'white'])
bounds=[-0.5,0.5,1.5, 2.5, 3.5]
norm = colors.BoundaryNorm(bounds, cmap.N)
height = 200
arr_to_plot = np.repeat(np.array(annotation_num)[:,np.newaxis].transpose(),
height, axis = 0)
fig, ax = plt.subplots(figsize = (16, 3))
ax.imshow(arr_to_plot, interpolation = 'none',cmap=cmap, norm=norm)
ax.set_yticks([])
ax.set_xlabel('Frame Number')
plt.title(title)
import matplotlib.patches as mpatches
legend_patches = []
for item in all_classes:
legend_patches.append(mpatches.Patch(color=class_to_color[item], label=item))
plt.legend(handles=legend_patches,loc='center left', bbox_to_anchor=(1, 0.5))
plt.tight_layout()
Visualize the mouse movements🎥¶
Sample visualization for plotting pose gifs.
keypoint_sequence = single_sequence['keypoints']
annotation_sequence = single_sequence['annotations']
ani = animate_pose_sequence(sequence_key,
keypoint_sequence,
start_frame = 3000,
stop_frame = 3100,
annotation_sequence = annotation_sequence)
# Display the animaion on colab
ani
Showing a section of the validation data (Index needs to be selected for a full video)¶
annotation_sequence = single_sequence['annotations']
text_sequence = num_to_text(annotation_sequence)
plot_annotation_strip(
text_sequence,
start_frame=0,
stop_frame=len(annotation_sequence) + 1000
)
Basic EDA 🤓¶
Each sequence has different amounts of each behavior, here we get the percentage of frames of each behavior in each sequence. We can use this to split the dataset for validation in a stratified way.
vocabulary = train['vocabulary']
def get_percentage(sequence_key):
anno_seq = num_to_text(train['sequences'][sequence_key]['annotations'])
counts = {k: np.mean(np.array(anno_seq) == k) for k in vocabulary}
return counts
anno_percentages = {k: get_percentage(k) for k in train['sequences']}
anno_perc_df = pd.DataFrame(anno_percentages).T
print("Percentage of frames in every sequence for every class")
anno_perc_df.head()
Percentage Frames of all behaviors¶
Lets look at the class imbalance
all_annotations = []
for sk in train['sequences']:
anno = train['sequences'][sk]['annotations']
all_annotations.extend(list(anno))
all_annotations = num_to_text(all_annotations)
classes, counts = np.unique(all_annotations, return_counts=True)
pd.DataFrame({"Behavior": classes,
"Percentage Frames": counts/len(all_annotations)})
Generate predictions 💪¶
# Generating Random Predictions
submission = {}
test = np.load('data/test.npy',allow_pickle=True).item()
for sequence_id, sequence in test["sequences"].items():
keypoint_sequence = sequence['keypoints']
submission[sequence_id] = np.random.randint(4, size=len(sequence['keypoints']))
Validate the submission ✅¶
The submssion should follow these constraints:
- It should be a dictionary
- It should be have same keys as sample_submission
- The lengths of the arrays are same
- All values are intergers
You can use the helper function below to check these
def validate_submission(submission, sample_submission):
if not isinstance(submission, dict):
print("Submission should be dict")
return False
if not submission.keys() == sample_submission.keys():
print("Submission keys don't match")
return False
for key in submission:
sv = submission[key]
ssv = sample_submission[key]
if not len(sv) == len(ssv):
print(f"Submission lengths of {key} doesn't match")
return False
for key, sv in submission.items():
if not all(isinstance(x, (np.int32, np.int64, int)) for x in list(sv)):
print(f"Submission of {key} is not all integers")
return False
print("All tests passed")
return True
validate_submission(submission, sample_submission)
Save the prediction as npy
📨¶
np.save("submission.npy", submission)
Submit to AIcrowd 🚀¶
!aicrowd submission create -c mabe-task-1-classical-classification -f submission.npy
Content
Comments
You must login before you can post a comment.