Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Loïc Barrault
supports_cours
Commits
6ea31fce
Commit
6ea31fce
authored
Dec 18, 2019
by
Loïc Barrault
Browse files
more printing friendly
parent
cd08954c
Changes
10
Show whitespace changes
Inline
Side-by-side
SequenceModeling/conditional_lm_en.tex
View file @
6ea31fce
...
...
@@ -16,8 +16,9 @@
\begin{frame}
\frametitle
{
Reminder: RNNLM
}
\centerin
g
\center
l
in
e
{
\includegraphics
[width=0.55\textwidth]
{
figures
_
en/rnn
_
unrolled
_
all
}
}
\begin{itemize}
\item
<+-> Probability of a word sequence
$
\vw
=
(
w
_
1
, w
_
2
, ..., w
_
\ell
)
$
\item
[]
{
\small
{
$
p
(
\vw
)
=
p
(
w
_
1
)
\times
p
(
w
_
2
|w
_
1
)
\times
p
(
w
_
3
| w
_
1
, w
_
2
)
\times
\dots
\times
p
(
w
_
l | w
_
1
, ..., w
_{
\ell
-
1
}
)
=
\ds
\prod
_{
t
=
1
}^{
\ell
}
p
(
w
_
t|w
_
1
, ..., w
_{
t
-
1
}
)
$
}
}
...
...
@@ -125,52 +126,55 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Back to the encoder
}
\
begin{block}
{
How to represent the source sequence with a fixed size vector
$
\edinred
{
\vm
{
x
}}$
?
}
\
textbf
{
How to represent the source sequence with a fixed size vector
$
\edinred
{
\vm
{
x
}}$
?
}
\begin{itemize}
\item
Previous part: RNN, GRU, LSTM
\item
What about this architecture?
\\
{
\centerin
g
\center
l
in
e
{
\includegraphics
[width=0.35\textwidth]
{
figures
_
en/bow
}
\item
[]
}
}
\item
<2>
\textbf
{
Bag of words
}
representation
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Back to the encoder
}
\begin{block}
{
\cite
{
kalchbrenner2013
}
}
\textbf
{
\cite
{
kalchbrenner2013
}}
\vfill
\begin{itemize}
\item
[]
{
\centerin
g
\center
l
in
e
{
\includegraphics
[width=0.35\textwidth]
{
figures
_
en/conv
_
sent
_
encoder
}
\item
[]
}
\vfill
\item
<+->
\edinred
{
Convolutional
}
encoder
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
How to condition on
$
\vm
{
x
}$
?
}
\begin{block}
{
\cite
{
kalchbrenner2013
}
}
\centering
\includegraphics
[width=0.95\textwidth]
<+>
{
figures
_
en/rnn
_
unrolled
_
4
}
\includegraphics
[width=0.95\textwidth]
<+>
{
figures
_
en/cond
_
rnn
_
unrolled
_
1
}
\includegraphics
[width=0.95\textwidth]
<+>
{
figures
_
en/cond
_
rnn
_
unrolled
_
2
}
\includegraphics
[width=0.95\textwidth]
<+>
{
figures
_
en/cond
_
rnn
_
unrolled
_
3
}
\includegraphics
[width=0.95\textwidth]
<+>
{
figures
_
en/cond
_
rnn
_
unrolled
_
all
}
\includegraphics
[width=0.55\textwidth]
<+>
{
figures
_
en/cond
_
rnn
_
unrolled
_
all
}
\textbf
{
\cite
{
kalchbrenner2013
}}
\centerline
{
\includegraphics
[width=0.95\textwidth]
{
figures
_
en/rnn
_
unrolled
_
4
}
<+>
\includegraphics
[width=0.95\textwidth]
{
figures
_
en/cond
_
rnn
_
unrolled
_
1
}
<+>
\includegraphics
[width=0.95\textwidth]
{
figures
_
en/cond
_
rnn
_
unrolled
_
2
}
<+>
\includegraphics
[width=0.95\textwidth]
{
figures
_
en/cond
_
rnn
_
unrolled
_
3
}
<+>
\includegraphics
[width=0.95\textwidth]
{
figures
_
en/cond
_
rnn
_
unrolled
_
all
}
<+>
\includegraphics
[width=0.55\textwidth]
{
figures
_
en/cond
_
rnn
_
unrolled
_
all
}
<+>
}
\begin{itemize}
\item
[]
<.->
$
\vm
{
h
}_
t
=
\phi
(
\vm
{
M
}
[
\vm
{
h
}_{
t
-
1
}
;
\vm
{
w
}_{
t
-
1
}
]
\edinred
{
+
\vm
{
x
}}
+
\vm
{
b
}
)
$
\item
[]
<.->
$
\vm
{
z
}_
t
=
\vm
{
S
}
~
\vm
{
h
}_{
t
}
+
\vm
{
b'
}
$
\item
[]
<.->
$
p
(
\vm
{
w
}_
t |
\edinred
{
\vm
{
x
}}
,
\vm
{
w
}_{
<t
}
)
=
softmax
(
\vm
{
u
}_
t
)
$
\item
[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
...
...
@@ -179,7 +183,7 @@ A document & A summary \\
\begin{columns}
\column
{
0.5
\textwidth
}
\
begin{block}
{
Architecture
}
\
textbf
{
Architecture
}
\begin{itemize}
\item
Encoder:
$
LSTM
(
\edinred
{
\vm
{
x
}_
i
}
,
\vm
{
c
}_{
i
-
1
}
,
\vm
{
h
}_{
i
-
1
}
)
$
\item
[\ra]
Provides vector
$
\edinred
{
\vm
{
x
}_{
\ell
}}$
with
$
\ell
$
the considered fixed size.
...
...
@@ -194,7 +198,7 @@ A document & A summary \\
\item
[]
\end{itemize}
\end{itemize}
\end{block}
\column
{
0.5
\textwidth
}
\\
\centering
\includegraphics
[height=4cm]
{
figures
_
en/lstm
}
\end{columns}
...
...
@@ -205,7 +209,7 @@ A document & A summary \\
\frametitle
{
Machine Translation:
\cite
{
sutskever2014
}}
\begin{columns}
\column
{
0.5
\textwidth
}
\begin{block}
{}
\begin{itemize}
\item
<2->
{
{
\color
{
edinred
}
[1.]
}
~Word encoded into
\emph
{
1-hot
}
vector
}
\item
<3->
{
{
\color
{
cyan
}
[2.]
}
Projection into an
\textbf
{
\textit
{
embedding
}}
}
...
...
@@ -219,18 +223,16 @@ A document & A summary \\
\item
<8->
{
{
\color
{
orange
}
[7.]
}
Next word (most probable)
}
\end{itemize}
\end{block}
\column
{
0.5
\textwidth
}
\\
\centering
{
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
1
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
2
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
3
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
4
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
5
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
6
}
\includegraphics
[height=0.8\textwidth]
<+>
{
figures
_
en/enc
_
dec
_
all
_
7
}
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
1
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
2
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
3
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
4
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
5
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
6
}
<+>
\includegraphics
[height=0.8\textwidth]
{
figures
_
en/enc
_
dec
_
all
_
7
}
<+>
}
%centering
\end{columns}
...
...
@@ -240,7 +242,6 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Results
}
\begin{block}
{}
\centerline
{
\includegraphics
[width=0.8\textwidth]
{
figures
_
en/nmt
_
sentence
_
length
}
}
...
...
@@ -260,7 +261,6 @@ A document & A summary \\
\end{enumerate}
}
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
...
...
@@ -306,7 +306,7 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
From vector to matrix representation
}
\begin{block}
{}
\begin{itemize}
\item
Represent input sequence with a matrix
\item
Generate output sequence using the matrix
...
...
@@ -317,13 +317,13 @@ A document & A summary \\
\item
[\ra]
Solve the problem of gradient stream
\item
[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Representing sentences with a matrix
}
\begin{block}
{}
\begin{itemize}
\item
Fixed size vector: regardless the input sequence size
\item
[]
...
...
@@ -334,13 +334,13 @@ A document & A summary \\
\item
[\ra]
How to build this matrix?
\item
[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Concatenation
}
\begin{block}
{}
\begin{itemize}
\item
Concatenation of word embeddings
\item
simplest possible model
...
...
@@ -352,14 +352,14 @@ A document & A summary \\
\item
Using bidirectional RNNs
\cite
{
bahdanau2014
}
\item
[\ra]
most used method
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Bidirectional Encoder
}
\begin{block}
{}
\centering
{
\only
<1>
{
\includegraphics
[height=0.5\textheight]
{
figures
_
en/bidir
_
enc
_
1
}
\\
{
\color
{
gray
}
[1.]
}
~
\emph
{
1-hot
}
vector + projection + update
\alert
{
forward
}
hidden unit
}
...
...
@@ -369,14 +369,14 @@ A document & A summary \\
{
\color
{
brown
}
[2.]
}
\alert
{
Annotation
}
= concatenation of
\alert
{
forward
}
and
\alert
{
backward
}
vectors
\\
{
\small
Every
$
\vm
{
h
}_
i
$
encodes the full sentence with a focus on the
\ith\
word
}
}
}
\end{block}
\vspace
{
.2cm
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Attention Mechanism
}
\begin{block}
{}
\begin{itemize}
\item
How to process this matrix into the decoder?
\item
Reminder: decoder is made of one (or several) recurrent units
...
...
@@ -389,28 +389,26 @@ A document & A summary \\
\item
[\ra]
\textbf
{
Attention mechanism
}
\item
[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Attention mechanism
}
\
begin{block}
{
Before: sentence represented by a vector
}
\centerin
g
{
\
textbf
{
Before: sentence represented by a vector
}
\center
l
in
e
{
\includegraphics
[height=0.7\textheight]
{
figures
_
en/enc
_
dec
_
all
}
}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Attention mechanism
}
\
begin{block}
{
After: sentence represented by a matrix
}
\centerin
g
{
\
textbf
{
After: sentence represented by a matrix
}
\center
l
in
e
{
\includegraphics
[height=0.7\textheight]
{
figures
_
en/dec
_
attention
_
0
}
}
\end{block}
\end{frame}
...
...
@@ -425,7 +423,7 @@ A document & A summary \\
%
\begin{columns}
\column
{
0.5
\textwidth
}
\begin{block}
{}
\begin{itemize}
\item
<+->
{
\color
{
brown
}
[2.]
}
~Decoder gets the
\alert
{
annotations
}
from encoder.
\item
<+->
{
\color
{
cyan
}
[3.]
}
~
\alert
{
Attention weights
}
calculated with feedforward NN.
\\
...
...
@@ -435,7 +433,7 @@ A document & A summary \\
\item
<+->
{
\color
{
purple
}
[5.]
}
Calculate probability distribution for
\alert
{
all
}
words
\item
<+->
{
\color
{
orange
}
[6.]
}
Generate next word (most probable)
\end{itemize}
\end{block}
\column
{
0.5
\textwidth
}
\end{columns}
...
...
@@ -474,35 +472,33 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
A word on gradients
}
\
begin{block}
{
Without attention mechanism:
}
\centerin
g
{
\
textbf
{
Without attention mechanism:
}
\center
l
in
e
{
\only
<1>
{
\includegraphics
[height=0.6\textheight]
{
figures
_
en/enc
_
dec
_
all
}
}
\only
<2>
{
\includegraphics
[height=0.6\textheight]
{
figures
_
en/dec
_
backprop
}
}
}
\begin{itemize}
\item
<2-> Gradients go through the last encoder hidden state.
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
A word on gradients
}
\
begin{block}
{
With attention mechanism:
}
\centerin
g
{
\
textbf
{
With attention mechanism:
}
\center
l
in
e
{
\only
<1>
{
\includegraphics
[height=0.6\textheight]
{
figures
_
en/dec
_
attention
_
backprop
}
}
}
\begin{itemize}
\item
Attention mechanism facilitate gradients propagation towards the encoder
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Attention and translation
}
\
begin{block}
{
Some considerations/remarks
:
}
\
textbf
{
Some considerations/remarks:
}
\begin{itemize}
\item
Does a human translator memorise the whole source sentence and then proceed to translate?
\begin{itemize}
...
...
@@ -516,7 +512,6 @@ A document & A summary \\
\item
Should humans be a model for machines? that's another story...
\item
[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
...
...
@@ -524,14 +519,13 @@ A document & A summary \\
\frametitle
{
Attention and translation
}
\begin{columns}
\column
{
0.5
\textwidth
}
\
begin{block}
{
Attention Mechanism
\Ra\
alignment
}
\
textbf
{
Attention Mechanism
\Ra\
alignment
}
\begin{itemize}
\item
For each produced word, a set of attention weights is created (set length is size of source sequence)
\item
\textbf
{
Alignment
}
and translation models jointly trained!
\item
[\ra]
\cite
{
bahdanau2014
}
\item
[]
\end{itemize}
\end{block}
\column
{
0.5
\textwidth
}
\centering
{
...
...
@@ -543,7 +537,6 @@ A document & A summary \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Summary
}
\begin{block}
{
}
\begin{itemize}
\item
Attention
\begin{itemize}
...
...
@@ -556,13 +549,12 @@ A document & A summary \\
\end{itemize}
\item
[]
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Algorithm
}
\centerin
g
{
\center
l
in
e
{
\includegraphics
[height=0.8\textheight]
{
figures
_
en/dec
_
algo
}
}
\begin{itemize}
...
...
SequenceModeling/figures_en/Turian-WordTSNE.pdf
0 → 100644
View file @
6ea31fce
File added
SequenceModeling/figures_en/Turian-WordTSNE_all.pdf
0 → 100644
View file @
6ea31fce
File added
SequenceModeling/figures_en/Turian-WordTSNE.png
→
SequenceModeling/figures_en/
old_
Turian-WordTSNE.png
View file @
6ea31fce
File moved
SequenceModeling/figures_en/Turian-WordTSNE_all.png
→
SequenceModeling/figures_en/
old_
Turian-WordTSNE_all.png
View file @
6ea31fce
File moved
SequenceModeling/m2_DL_sequence_modeling_en.tex
View file @
6ea31fce
...
...
@@ -52,8 +52,6 @@
\usepackage
[english]
{
babel
}
\usepackage
[utf8]
{
inputenc
}
\usepackage
{
times
}
\usepackage
{
epsfig
}
\usepackage
{
comment
}
...
...
@@ -68,9 +66,10 @@
%\usepackage{xspace}
%\usepackage{amsmath}
\input
../macros.tex
\input
../macros
_
en.tex
\input
../macros
_
beamer.tex
\input
../mycolors.tex
\usepackage
[absolute,showboxes,overlay]
{
textpos
}
%\TPshowboxestrue % commenter une fois fini
...
...
@@ -168,7 +167,6 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Motivations
}
\begin{block}
{}
\begin{itemize}
\item
Many problems can reduce to transforming one sequence into another:
\begin{itemize}
...
...
@@ -190,7 +188,6 @@
\item
image = pixel sequence (eventually 2D)
\end{itemize}
\end{itemize}
\end{block}
\end{frame}
...
...
@@ -230,7 +227,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Summary
}
\
begin{block}
{
Sentence representations
}
\
textbf
{
Sentence representations
}
\begin{itemize}
\item
Fixed size vector from an RNN
\item
Matrix + attention mechanism
...
...
@@ -242,17 +239,17 @@
\item
[]
\end{itemize}
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Attention Mechanism
}
\
begin{block}
{
Image captioning
\cite
{
xu2015showattendtell
}}
\centerin
g
{
\
textbf
{
Image captioning
\cite
{
xu2015showattendtell
}}
\center
l
in
e
{
\includegraphics
[height=0.75\textheight]
{
figures/img
_
caption
_
1
}
}
\end{block}
\end{frame}
...
...
SequenceModeling/unconditional_lm_en.tex
View file @
6ea31fce
...
...
@@ -12,30 +12,32 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Language Modelling
}
\begin{block}
{
Reminder: language modelling
}
\frametitle
{
Reminder: Language Modelling
}
\begin{itemize}
\item
A language model (LM) assigns a non-zero probability to a word sequence
$
\vw
=
(
w
_
1
, w
_
2
, ..., w
_
\ell
)
$
\begin{eqnarray}
p(
\vw
)
&
=
&
p(w
_
1)
\times
p(w
_
2|w
_
1)
\times
p(w
_
3 | w
_
1, w
_
2)
\times
\dots
\times
\nonumber
\\
&
&
~~~~~~ p(w
_
l | w
_
1, ..., w
_{
\ell
-1
}
)
\nonumber\\
&
=
&
\prod
_{
t=1
}^{
\ell
}
p(w
_
t|w
_
1, ..., w
_{
t-1
}
)
\nonumber
\end{eqnarray}
\end{itemize}
\begin{itemize}
\item
Modelling language is done by
{
\bf
modelling the probability of the next word
}
given the history of previous words.
\item
In practice: reduce history so that it is tractable and relevant (Markov hypothesis)
\ra\
n-gram
\end{itemize}
\end{block}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Neural Language Model
}
\begin{block}
{
Reminder: Feed-forward neural LM
}
\begin{itemize}
% \item Toujours un modèle n-gram
\item
$
p
(
w
_
i | w
_{
i
-
n
}
,
\cdots
, w
_{
i
-
1
}
)
\approx
f
(
w
_{
i
-
n
}
,
\cdots
, w
_{
i
-
1
}
)
$
\item
$
f
$
: function estimating probability of word
$
w
_
i
$
from the
$
n
-
1
$
previous words
\ra\
learn with a NN
\end{itemize}
\end{block}
\centerline
{
\includegraphics
[width=0.30\textwidth]
{
figures
_
en/fflm
_
all
}
}
...
...
@@ -45,7 +47,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Neural Language Model
}
\
begin{block}
{
Feedforward NN
}
\
textbf
{
Feedforward NN
}
\begin{description}
\item
[1.]
Word representation with '
\alert
{
1-hot
}
' vector
\item
[\ra]
$
w
_
j
=
\left
[
0
,
\cdots
,
0
,
1
,
0
,
\cdots
,
0
\right
]
^
\top
$
(1 at position
$
j
$
)
...
...
@@ -61,7 +63,7 @@
\item
[\ra]
$
\vm
{
d
}
=
\phi
(
\vm
{
U
}^
\top
\vm
{
c
}
+
\vm
{
b
_
U
}
)
$
with
$
\vm
{
b
_
U
}$
the bias
\item
[\ra]
$
\phi
$
: non-linear activation function (tanh)
\end{description}
\end{block}
\begin{textblock*}
{
40mm
}
[0,0](93mm,20mm)
\includegraphics
[height=5cm]
{
figures
_
en/fflm
_
proj
}
\end{textblock*}
...
...
@@ -70,7 +72,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Neural Language Model
}
\
begin{block}
{
Feedforward NN
}
\
textbf
{
Feedforward NN
}
%\begin{varblock}[7cm]{RdN Feedforward}
\begin{description}
\item
[5 .]
Calculate non-normalized score
...
...
@@ -83,7 +85,7 @@
\item
[]
with
$
z
_
j
$
the
\jth\
element of
$
\vm
{
z
}$
and
$
\|
V
\|
$
the vocabulary size
\item
[]
\end{description}
\end{block}
%\end{varblock}
\begin{textblock*}
{
30mm
}
[0,0](90mm,12mm)
\includegraphics
[height=4.5cm]
{
figures
_
en/fflm
_
estim
}
...
...
@@ -93,7 +95,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle
{
Neural Language Model
}
\begin{block}
{}
\begin{itemize}
% \item Entraînement par back-propagation du gradient d'erreur
\item
Maximum likelihood + backprop
...
...
@@ -109,7 +111,7 @@
\item
$
\vm
{
d
}$
: sentence representation?
\item
[]
\end{itemize}
\end{block}
\begin{textblock*}
{
30mm
}
[0,0](78mm,23mm)
\includegraphics
[height=4.5cm]
{
figures
_
en/fflm
_
all
}
\end{textblock*}
...
...
@@ -118,23 +120,22 @@
%---------------------------------------------------------
\begin{frame}
\frametitle
{
Embeddings
}
\begin{figure}
\centering
\includegraphics
[height=6cm]
{
figures
_
en/Turian-WordTSNE
_
all
}
%
\onslide
<2->
{
\llap
{
\raisebox
{
1cm
}{
% move next graphics to top right corner
\centerline
{
\includegraphics
[width=0.6\textwidth]
{
figures
_
en/Turian-WordTSNE
}
}
}}}
\end{figure}
\includegraphics
[height=\textheight]
{
figures
_
en/Turian-WordTSNE
_
all
}
%
\begin{textblock*}
{
90mm
}
[0,0](40mm,10mm)
\only
<2>
{
\includegraphics
[width=.9\textwidth]
{
figures
_
en/Turian-WordTSNE
}
%
}
\end{textblock*}
\end{frame}
%---------------------------------------------------------
\begin{frame}
\frametitle
{
Why does it work?
}
\begin{block}
{}
\begin{itemize}
\item
Better estimation for n-grams unseen in training corpus
\item
[\ra]
backoff LM: reduce history size + weighting
...
...
@@ -151,13 +152,14 @@
\item
[]
What is the probability that
\edinred
{
10
}
is followed by
\edinorange
{
dollars
}
?
\item
[]
\end{itemize}
\end{block}
\end{frame}
%---------------------------------------------------------
\begin{frame}
\begin{block}
{}
\frametitle
{}
\begin{itemize}
\item
[]
What is the probability that
\edinred
{
10
}
is followed by
\edinorange
{
dollars
}
?
\item
[]
...
...
@@ -173,7 +175,7 @@
% \item[]
\end{itemize}
\end{block}
\end{frame}
%---------------------------------------------------------
...
...
@@ -181,7 +183,6 @@
\begin{frame}
\frametitle
{}
\begin{block}
{}
\begin{itemize}
\item
Can we free the model from Markov property?
\item
[\ra]
Non-Markovian model
...
...
@@ -196,7 +197,7 @@
\item
<2> Solution:
\alert
{
compress history!
}
% \item[]
\end{itemize}
\end{block}
\end{frame}
%------------------------------------------------------------------------------------------------------------
...
...
@@ -205,12 +206,11 @@
\begin{frame}
\frametitle
{
Recurrent Neural Networks
}
\begin{block}
{}
\begin{itemize}
\item
Problem:
\textbf
{
sentences are of variable-length, not bounded!
}
\item
Solution:
\alert
{
compress history!
}
\item
[]
\Ra\
Solution:
\alert
{
compress history!
}
\end{itemize}
\end{block}
\begin{block}
{
Protocol
}
\begin{enumerate}
\item
Initialise history
$
\vm
{
h
}$
...
...
@@ -218,6 +218,7 @@
\item
Predict next word
$
w
_{
i
+
1
}$
using
$
\vm
{
h
}_
i
$
\end{enumerate}
\end{block}
\end{frame}
...
...
@@ -226,7 +227,6 @@
\frametitle
{
Recurrent Neural Networks
}
%\vspace{-.5cm}
\begin{block}
{}
\begin{description}
\item
[1. \& 2.]
1-hot vectors + project
$
w
_
i
$
into continuous space
\item
[\ra]
$
\vm
{
c
}_
i
=
\vm
{
W
}
^
\top
w
_
i
\in
\mathbb
{
R
}^
d
$
\\
...
...
@@ -245,7 +245,6 @@
\item
[]
\item
[]
\end{description}
\end{block}
\begin{textblock*}
{
30mm
}
[0,0](75mm,20mm)