|
118 | 118 | # * [Attention? |
119 | 119 | # Attention! |
120 | 120 | # (Lilian Weng, 2018)](https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html) - A nice blog post summarizing attention mechanisms in many domains including vision. |
121 | | -# * [Illustrated: Self-Attention (Raimi Karim, 2019)](https://towardsdatascience.com/illustrated-self-attention-2d627e33b20a) - A nice visualization of the steps of self-attention. |
| 121 | +# * [Illustrated: Self-Attention (Raimi Karim, 2019)](https://medium.com/data-science/illustrated-self-attention-2d627e33b20a) - A nice visualization of the steps of self-attention. |
122 | 122 | # Recommended going through if the explanation below is too abstract for you. |
123 | 123 | # * [The Transformer family (Lilian Weng, 2020)](https://lilianweng.github.io/lil-log/2020/04/07/the-transformer-family.html) - A very detailed blog post reviewing more variants of Transformers besides the original one. |
124 | 124 |
|
@@ -633,8 +633,8 @@ def forward(self, x): |
633 | 633 | fig, ax = plt.subplots(2, 2, figsize=(12, 4)) |
634 | 634 | ax = [a for a_list in ax for a in a_list] |
635 | 635 | for i in range(len(ax)): |
636 | | - ax[i].plot(np.arange(1, 17), pe[i, :16], color="C%i" % i, marker="o", markersize=6, markeredgecolor="black") |
637 | | - ax[i].set_title("Encoding in hidden dimension %i" % (i + 1)) |
| 636 | + ax[i].plot(np.arange(1, 17), pe[i, :16], color=f"C{i}", marker="o", markersize=6, markeredgecolor="black") |
| 637 | + ax[i].set_title(f"Encoding in hidden dimension {i + 1}") |
638 | 638 | ax[i].set_xlabel("Position in sequence", fontsize=10) |
639 | 639 | ax[i].set_ylabel("Positional encoding", fontsize=10) |
640 | 640 | ax[i].set_xticks(np.arange(1, 17)) |
@@ -1088,7 +1088,7 @@ def plot_attention_maps(input_data, attn_maps, idx=0): |
1088 | 1088 | ax[row][column].set_xticklabels(input_data.tolist()) |
1089 | 1089 | ax[row][column].set_yticks(list(range(seq_len))) |
1090 | 1090 | ax[row][column].set_yticklabels(input_data.tolist()) |
1091 | | - ax[row][column].set_title("Layer %i, Head %i" % (row + 1, column + 1)) |
| 1091 | + ax[row][column].set_title(f"Layer {row + 1}, Head {column + 1}") |
1092 | 1092 | fig.subplots_adjust(hspace=0.5) |
1093 | 1093 | plt.show() |
1094 | 1094 |
|
@@ -1590,7 +1590,7 @@ def visualize_prediction(idx): |
1590 | 1590 | visualize_prediction(mistakes[-1]) |
1591 | 1591 | print("Probabilities:") |
1592 | 1592 | for i, p in enumerate(preds[mistakes[-1]].cpu().numpy()): |
1593 | | - print("Image %i: %4.2f%%" % (i, 100.0 * p)) |
| 1593 | + print(f"Image {i}: {100.0 * p:4.2f}%") |
1594 | 1594 |
|
1595 | 1595 | # %% [markdown] |
1596 | 1596 | # In this example, the model confuses a palm tree with a building, giving a probability of ~90% to image 2, and 8% to the actual anomaly. |
|
0 commit comments