Commit 6ea31fce authored by Loïc Barrault's avatar Loïc Barrault
Browse files

more printing friendly

parent cd08954c
......@@ -16,8 +16,9 @@
\begin{frame}
\frametitle{Reminder: RNNLM}
\centering
\centerline{
\includegraphics[width=0.55\textwidth]{figures_en/rnn_unrolled_all}
}
\begin{itemize}
\item<+-> Probability of a word sequence $\vw = (w_1, w_2, ..., w_\ell)$
\item[]{ \small{ $ p(\vw) = p(w_1) \times p(w_2|w_1) \times p(w_3 | w_1, w_2) \times \dots \times p(w_l | w_1, ..., w_{\ell-1}) = \ds \prod_{t=1}^{\ell} p(w_t|w_1, ..., w_{t-1})$ } }
......@@ -125,52 +126,55 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Back to the encoder}
\begin{block}{How to represent the source sequence with a fixed size vector $\edinred{\vm{x}}$ ? }
\textbf{How to represent the source sequence with a fixed size vector $\edinred{\vm{x}}$ ? }
\begin{itemize}
\item Previous part: RNN, GRU, LSTM
\item What about this architecture? \\
{\centering
\centerline{
\includegraphics[width=0.35\textwidth]{figures_en/bow}
\item[]}
}
\item<2> \textbf{Bag of words} representation
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Back to the encoder}
\begin{block}{\cite{kalchbrenner2013} }
\textbf{\cite{kalchbrenner2013}}
\vfill
\begin{itemize}
\item[]
{ \centering
\includegraphics[width=0.35\textwidth]{figures_en/conv_sent_encoder}
\item[]
\centerline{
\includegraphics[width=0.35\textwidth]{figures_en/conv_sent_encoder}
}
\vfill
\item<+-> \edinred{Convolutional} encoder
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{How to condition on $\vm{x}$ ?}
\begin{block}{\cite{kalchbrenner2013} }
\centering
\includegraphics[width=0.95\textwidth]<+>{figures_en/rnn_unrolled_4}
\includegraphics[width=0.95\textwidth]<+>{figures_en/cond_rnn_unrolled_1}
\includegraphics[width=0.95\textwidth]<+>{figures_en/cond_rnn_unrolled_2}
\includegraphics[width=0.95\textwidth]<+>{figures_en/cond_rnn_unrolled_3}
\includegraphics[width=0.95\textwidth]<+>{figures_en/cond_rnn_unrolled_all}
\includegraphics[width=0.55\textwidth]<+>{figures_en/cond_rnn_unrolled_all}
\textbf{\cite{kalchbrenner2013}}
\centerline{
\includegraphics[width=0.95\textwidth]{figures_en/rnn_unrolled_4}<+>
\includegraphics[width=0.95\textwidth]{figures_en/cond_rnn_unrolled_1}<+>
\includegraphics[width=0.95\textwidth]{figures_en/cond_rnn_unrolled_2}<+>
\includegraphics[width=0.95\textwidth]{figures_en/cond_rnn_unrolled_3}<+>
\includegraphics[width=0.95\textwidth]{figures_en/cond_rnn_unrolled_all}<+>
\includegraphics[width=0.55\textwidth]{figures_en/cond_rnn_unrolled_all}<+>
}
\begin{itemize}
\item[]<.-> $ \vm{h}_t = \phi(\vm{M}[\vm{h}_{t-1}; \vm{w}_{t-1}] \edinred{+ \vm{x}} + \vm{b}) $
\item[]<.-> $ \vm{z}_t = \vm{S}~\vm{h}_{t} + \vm{b'} $
\item[]<.-> $ p(\vm{w}_t | \edinred{\vm{x}}, \vm{w}_{<t}) = softmax(\vm{u}_t) $
\item[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -179,7 +183,7 @@ A document & A summary \\
\begin{columns}
\column{0.5\textwidth}
\begin{block}{Architecture}
\textbf{Architecture}
\begin{itemize}
\item Encoder: $LSTM(\edinred{\vm{x}_i}, \vm{c}_{i-1}, \vm{h}_{i-1})$
\item[\ra] Provides vector $\edinred{\vm{x}_{\ell}}$ with $\ell$ the considered fixed size.
......@@ -194,7 +198,7 @@ A document & A summary \\
\item[]
\end{itemize}
\end{itemize}
\end{block}
\column{0.5\textwidth} \\
\centering \includegraphics[height=4cm]{figures_en/lstm}
\end{columns}
......@@ -205,7 +209,7 @@ A document & A summary \\
\frametitle{Machine Translation: \cite{sutskever2014}}
\begin{columns}
\column{0.5\textwidth}
\begin{block}{}
\begin{itemize}
\item<2->{ {\color{edinred} [1.]} ~Word encoded into \emph{1-hot} vector }
\item<3->{ {\color{cyan} [2.]} Projection into an \textbf{\textit{embedding}} }
......@@ -219,18 +223,16 @@ A document & A summary \\
\item<8->{ {\color{orange} [7.]} Next word (most probable) }
\end{itemize}
\end{block}
\column{0.5\textwidth} \\
\centering{
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_1}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_2}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_3}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_4}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_5}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_6}
\includegraphics[height=0.8\textwidth]<+>{figures_en/enc_dec_all_7}
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_1}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_2}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_3}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_4}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_5}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_6}<+>
\includegraphics[height=0.8\textwidth]{figures_en/enc_dec_all_7}<+>
}%centering
\end{columns}
......@@ -240,7 +242,6 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Results}
\begin{block}{}
\centerline{
\includegraphics[width=0.8\textwidth]{figures_en/nmt_sentence_length}
}
......@@ -260,7 +261,6 @@ A document & A summary \\
\end{enumerate}
}
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -306,7 +306,7 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{From vector to matrix representation}
\begin{block}{}
\begin{itemize}
\item Represent input sequence with a matrix
\item Generate output sequence using the matrix
......@@ -317,13 +317,13 @@ A document & A summary \\
\item[\ra] Solve the problem of gradient stream
\item[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Representing sentences with a matrix}
\begin{block}{}
\begin{itemize}
\item Fixed size vector: regardless the input sequence size
\item[]
......@@ -334,13 +334,13 @@ A document & A summary \\
\item[\ra] How to build this matrix?
\item[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Concatenation}
\begin{block}{}
\begin{itemize}
\item Concatenation of word embeddings
\item simplest possible model
......@@ -352,14 +352,14 @@ A document & A summary \\
\item Using bidirectional RNNs \cite{bahdanau2014}
\item[\ra] most used method
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Bidirectional Encoder}
\begin{block}{}
\centering{
\only<1>{ \includegraphics[height=0.5\textheight]{figures_en/bidir_enc_1} \\
{\color{gray} [1.]} ~\emph{1-hot} vector + projection + update \alert{forward} hidden unit }
......@@ -369,14 +369,14 @@ A document & A summary \\
{\color{brown} [2.]} \alert{Annotation} = concatenation of \alert{forward} and \alert{backward} vectors \\
{\small Every $\vm{h}_i$ encodes the full sentence with a focus on the \ith\ word} }
}
\end{block}
\vspace{.2cm}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Attention Mechanism}
\begin{block}{}
\begin{itemize}
\item How to process this matrix into the decoder?
\item Reminder: decoder is made of one (or several) recurrent units
......@@ -389,28 +389,26 @@ A document & A summary \\
\item[\ra] \textbf{Attention mechanism}
\item[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Attention mechanism}
\begin{block}{Before: sentence represented by a vector }
\centering{
\textbf{Before: sentence represented by a vector }
\centerline{
\includegraphics[height=0.7\textheight]{figures_en/enc_dec_all}
}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Attention mechanism}
\begin{block}{After: sentence represented by a matrix}
\centering{
\textbf{After: sentence represented by a matrix}
\centerline{
\includegraphics[height=0.7\textheight]{figures_en/dec_attention_0}
}
\end{block}
\end{frame}
......@@ -425,7 +423,7 @@ A document & A summary \\
%
\begin{columns}
\column{0.5\textwidth}
\begin{block}{}
\begin{itemize}
\item<+-> {\color{brown} [2.]} ~Decoder gets the \alert{annotations} from encoder.
\item<+-> {\color{cyan} [3.]} ~ \alert{Attention weights} calculated with feedforward NN. \\
......@@ -435,7 +433,7 @@ A document & A summary \\
\item<+-> {\color{purple} [5.]} Calculate probability distribution for \alert{all} words
\item<+-> {\color{orange} [6.]} Generate next word (most probable)
\end{itemize}
\end{block}
\column{0.5\textwidth}
\end{columns}
......@@ -474,35 +472,33 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{A word on gradients}
\begin{block}{Without attention mechanism: }
\centering{
\textbf{Without attention mechanism: }
\centerline{
\only<1>{ \includegraphics[height=0.6\textheight]{figures_en/enc_dec_all} }
\only<2>{ \includegraphics[height=0.6\textheight]{figures_en/dec_backprop} }
}
\begin{itemize}
\item<2-> Gradients go through the last encoder hidden state.
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{A word on gradients}
\begin{block}{With attention mechanism: }
\centering{
\textbf{With attention mechanism: }
\centerline{
\only<1>{ \includegraphics[height=0.6\textheight]{figures_en/dec_attention_backprop} }
}
\begin{itemize}
\item Attention mechanism facilitate gradients propagation towards the encoder
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Attention and translation}
\begin{block}{ Some considerations/remarks :}
\textbf{Some considerations/remarks:}
\begin{itemize}
\item Does a human translator memorise the whole source sentence and then proceed to translate?
\begin{itemize}
......@@ -516,7 +512,6 @@ A document & A summary \\
\item Should humans be a model for machines? that's another story...
\item[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -524,14 +519,13 @@ A document & A summary \\
\frametitle{Attention and translation}
\begin{columns}
\column{0.5\textwidth}
\begin{block}{Attention Mechanism \Ra\ alignment }
\textbf{Attention Mechanism \Ra\ alignment}
\begin{itemize}
\item For each produced word, a set of attention weights is created (set length is size of source sequence)
\item \textbf{Alignment} and translation models jointly trained!
\item[\ra] \cite{bahdanau2014}
\item[]
\end{itemize}
\end{block}
\column{0.5\textwidth}
\centering{
......@@ -543,7 +537,6 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Summary}
\begin{block}{ }
\begin{itemize}
\item Attention
\begin{itemize}
......@@ -556,13 +549,12 @@ A document & A summary \\
\end{itemize}
\item[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Algorithm}
\centering{
\centerline{
\includegraphics[height=0.8\textheight]{figures_en/dec_algo}
}
\begin{itemize}
......
......@@ -52,8 +52,6 @@
\usepackage[english]{babel}
\usepackage[utf8]{inputenc}
\usepackage{times}
\usepackage{epsfig}
\usepackage{comment}
......@@ -68,9 +66,10 @@
%\usepackage{xspace}
%\usepackage{amsmath}
\input ../macros.tex
\input ../macros_en.tex
\input ../macros_beamer.tex
\input ../mycolors.tex
\usepackage[absolute,showboxes,overlay]{textpos}
%\TPshowboxestrue % commenter une fois fini
......@@ -168,7 +167,6 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Motivations}
\begin{block}{}
\begin{itemize}
\item Many problems can reduce to transforming one sequence into another:
\begin{itemize}
......@@ -190,7 +188,6 @@
\item image = pixel sequence (eventually 2D)
\end{itemize}
\end{itemize}
\end{block}
\end{frame}
......@@ -230,7 +227,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Summary}
\begin{block}{Sentence representations}
\textbf{Sentence representations}
\begin{itemize}
\item Fixed size vector from an RNN
\item Matrix + attention mechanism
......@@ -242,17 +239,17 @@
\item[]
\end{itemize}
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Attention Mechanism}
\begin{block}{Image captioning \cite{xu2015showattendtell}}
\centering{
\textbf{Image captioning \cite{xu2015showattendtell}}
\centerline{
\includegraphics[height=0.75\textheight]{figures/img_caption_1}
}
\end{block}
\end{frame}
......
......@@ -12,30 +12,32 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Language Modelling}
\begin{block}{Reminder: language modelling }
\frametitle{Reminder: Language Modelling}
\begin{itemize}
\item A language model (LM) assigns a non-zero probability to a word sequence $\vw = (w_1, w_2, ..., w_\ell)$
\begin{eqnarray} p(\vw) & = & p(w_1) \times p(w_2|w_1) \times p(w_3 | w_1, w_2) \times \dots \times \nonumber \\
& & ~~~~~~ p(w_l | w_1, ..., w_{\ell-1}) \nonumber\\
& = & \prod_{t=1}^{\ell} p(w_t|w_1, ..., w_{t-1}) \nonumber
\end{eqnarray}
\end{itemize}
\begin{itemize}
\item Modelling language is done by {\bf modelling the probability of the next word } given the history of previous words.
\item In practice: reduce history so that it is tractable and relevant (Markov hypothesis) \ra\ n-gram
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Neural Language Model}
\begin{block}{Reminder: Feed-forward neural LM}
\begin{itemize}
% \item Toujours un modèle n-gram
\item $ p(w_i | w_{i-n}, \cdots, w_{i-1}) \approx f(w_{i-n}, \cdots, w_{i-1}) $
\item $f$: function estimating probability of word $w_i$ from the $n-1$ previous words \ra\ learn with a NN
\end{itemize}
\end{block}
\centerline{
\includegraphics[width=0.30\textwidth]{figures_en/fflm_all}
}
......@@ -45,7 +47,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Neural Language Model}
\begin{block}{Feedforward NN}
\textbf{Feedforward NN}
\begin{description}
\item[1.] Word representation with '\alert{1-hot}' vector
\item[\ra] $ w_j = \left[ 0, \cdots, 0, 1, 0, \cdots, 0 \right]^\top $ (1 at position $j$)
......@@ -61,7 +63,7 @@
\item[\ra] $\vm{d} = \phi ( \vm{U}^\top \vm{c} + \vm{b_U}) $ with $\vm{b_U}$ the bias
\item[\ra] $\phi$: non-linear activation function (tanh)
\end{description}
\end{block}
\begin{textblock*}{40mm}[0,0](93mm,20mm)
\includegraphics[height=5cm]{figures_en/fflm_proj}
\end{textblock*}
......@@ -70,7 +72,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Neural Language Model}
\begin{block}{Feedforward NN}
\textbf{Feedforward NN}
%\begin{varblock}[7cm]{RdN Feedforward}
\begin{description}
\item[5 .] Calculate non-normalized score
......@@ -83,7 +85,7 @@
\item[] with $z_j$ the \jth\ element of $\vm{z}$ and $\|V\|$ the vocabulary size
\item[]
\end{description}
\end{block}
%\end{varblock}
\begin{textblock*}{30mm}[0,0](90mm,12mm)
\includegraphics[height=4.5cm]{figures_en/fflm_estim}
......@@ -93,7 +95,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Neural Language Model}
\begin{block}{}
\begin{itemize}
% \item Entraînement par back-propagation du gradient d'erreur
\item Maximum likelihood + backprop
......@@ -109,7 +111,7 @@
\item $\vm{d}$: sentence representation?
\item[]
\end{itemize}
\end{block}
\begin{textblock*}{30mm}[0,0](78mm,23mm)
\includegraphics[height=4.5cm]{figures_en/fflm_all}
\end{textblock*}
......@@ -118,23 +120,22 @@
%---------------------------------------------------------
\begin{frame}
\frametitle{Embeddings}
\begin{figure}
\centering
\includegraphics[height=6cm]{figures_en/Turian-WordTSNE_all}%
\onslide<2->{
\llap{\raisebox{1cm}{% move next graphics to top right corner
\centerline{
\includegraphics[width=0.6\textwidth]{figures_en/Turian-WordTSNE}
}
}}}
\end{figure}
\centering
\includegraphics[height=\textheight]{figures_en/Turian-WordTSNE_all}%
\begin{textblock*}{90mm}[0,0](40mm,10mm)
\only<2>{ \includegraphics[width=.9\textwidth]{figures_en/Turian-WordTSNE}%
}
\end{textblock*}
\end{frame}
%---------------------------------------------------------
\begin{frame}
\frametitle{Why does it work?}
\begin{block}{}
\begin{itemize}
\item Better estimation for n-grams unseen in training corpus
\item[\ra] backoff LM: reduce history size + weighting
......@@ -151,13 +152,14 @@
\item[] What is the probability that \edinred{10} is followed by \edinorange{dollars}?
\item[]
\end{itemize}
\end{block}
\end{frame}
%---------------------------------------------------------
\begin{frame}
\begin{block}{}
\frametitle{}
\begin{itemize}
\item[] What is the probability that \edinred{10} is followed by \edinorange{dollars}?
\item[]
......@@ -173,7 +175,7 @@
% \item[]
\end{itemize}
\end{block}
\end{frame}
%---------------------------------------------------------
......@@ -181,7 +183,6 @@
\begin{frame}
\frametitle{}
\begin{block}{}
\begin{itemize}
\item Can we free the model from Markov property?
\item[\ra] Non-Markovian model
......@@ -196,7 +197,7 @@
\item<2> Solution: \alert{compress history!}
% \item[]
\end{itemize}
\end{block}
\end{frame}
%------------------------------------------------------------------------------------------------------------
......@@ -205,12 +206,11 @@
\begin{frame}
\frametitle{Recurrent Neural Networks}
\begin{block}{}
\begin{itemize}
\item Problem: \textbf{sentences are of variable-length, not bounded!}
\item Solution: \alert{compress history!}
\item[] \Ra\ Solution: \alert{compress history!}
\end{itemize}
\end{block}
\begin{block}{Protocol}
\begin{enumerate}
\item Initialise history $\vm{h}$
......@@ -218,6 +218,7 @@
\item Predict next word $w_{i+1}$ using $\vm{h}_i$
\end{enumerate}
\end{block}
\end{frame}
......@@ -226,7 +227,6 @@
\frametitle{Recurrent Neural Networks}
%\vspace{-.5cm}
\begin{block}{}
\begin{description}
\item[1. \& 2.] 1-hot vectors + project $w_i$ into continuous space
\item[\ra] $ \vm{c}_i = \vm{W} ^\top w_i \in \mathbb{R}^d $ \\
......@@ -245,7 +245,6 @@