Commit 645f1e98 authored by Loïc Barrault's avatar Loïc Barrault
Browse files

trad stat in english

parent 51f680fe
......@@ -40,6 +40,9 @@
\newcommand{\liumcyan}[1]{{\color{liumcyan} #1}}
\newcommand{\bos}{\textnormal{<s>}}
\newcommand{\eos}{\textnormal{</s>}}
\newcommand{\unk}{\textnormal{<unk>}}
\newcommand{\hs}{\hspace{1cm}}
......@@ -97,7 +100,10 @@
\newcommand{\ra}{$\rightarrow$}
\newcommand{\Ra}{$\Rightarrow$}
\newcommand{\la}{$\leftarrow$}
\newcommand{\La}{$\Leftarrow$}
\newcommand{\eg}{e.g.}
%\DeclareMathOperator*{\argmax}{argmax}
\newcommand{\argmaxx}{\operatornamewithlimits{argmax}}
......
......@@ -37,6 +37,9 @@
\newcommand{\liumcyan}[1]{{\color{liumcyan} #1}}
\newcommand{\bos}{\textnormal{$<$s$>$}}
\newcommand{\eos}{\textnormal{$<$/s$>$}}
\newcommand{\unk}{\textnormal{$<$unk$>$}}
\newcommand{\hs}{\hspace{1cm}}
......@@ -107,6 +110,9 @@
\newcommand{\ith}{$i^{\mbox{\tiny{th}}}$}
\newcommand{\jth}{$j^{\mbox{\tiny{th}}}$}
\newcommand{\ieme}{$i^{\mbox{\tiny{th}}}$}
\newcommand{\jeme}{$j^{\mbox{\tiny{th}}}$}
\graphicspath{{../}{../figures/}{./figures/}}
%{../figures/figures.pivot}{../figures/figures.dataselection}}
......
......@@ -52,28 +52,16 @@
\usefonttheme[onlymath]{serif}
\usepackage[english]{babel}
\usepackage[utf8]{inputenc}
\usepackage{times}
\usepackage{epsfig}
\usepackage{comment}
\usepackage{url}
\usepackage{multirow}
\usepackage[T1]{fontenc}
%\usepackage{natbib}
%\usepackage{multimedia}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\setlength{\extrarowheight}{3pt}
%\usepackage{xspace}
\usepackage{algorithm}
\usepackage[noend]{algpseudocode}
\usepackage{multicol}
\input ../macros.tex
\input ../macros_en.tex
\input ../macros_beamer.tex
......@@ -89,12 +77,12 @@
\subject{Statistical Machine Translation}
\title[]{Statistical Machine Translation}
\author[]{Loïc Barrault}
\author[]{Lo\"ic Barrault}
\institute[LIUM, Le Mans Université]
\institute[LIUM, Le Mans Universit\'e]
{
Loic.Barrault@univ-lemans.fr \\
Laboratoire d'Informatique de l'Université du Maine \\
loic.barrault@univ-lemans.fr \\
Laboratoire d'Informatique de l'Universit\'e du Maine \\
}
%\date{09 janvier 2017}
......
%!TEX root = m2_trad_statistique.tex
\section{Statistical Machine Translation}
%\section{Statistical Machine Translation}
%\subsection{Introduction}
......@@ -49,7 +49,7 @@
\frametitle{Statistical MT}
\centerline{
\includegraphics[width=0.50\textwidth]{figures/smt}
\includegraphics[width=0.50\textwidth]{figures/smt_en}
}
\end{frame}
......@@ -66,6 +66,14 @@
\item Statistical MT
\end{itemize}
\end{block}
\vspace{\stretch{1}}
\centerline{
\includegraphics[width=0.50\textwidth]{figures/triangle_vauquois}
}
\centerline{
\tiny{Fig. Vauquois triangle.}
}
\end{frame}
......@@ -169,9 +177,9 @@
\item Translate "He buys a book on machine translation"
\item Examples in data base:
\begin{itemize}
\item \orange{He buys} an apple : \cyan{Il achète} une pomme
\item \orange{He buys} an apple : \cyan{Il ach\`ete} une pomme
\item I read \orange{a book on} statistics : Je lis \cyan{un livre sur} les statistiques
\item \orange{machine translation} is great! : \cyan{la traduction automatique}, c'est génial !
\item \orange{machine translation} is great! : \cyan{la traduction automatique}, c'est g\'enial !
\end{itemize}
\end{itemize}
\end{block}
......@@ -226,7 +234,7 @@
\begin{columns}
\begin{column}[T]{.45\textwidth}
\begin{itemize}
\item When I look at an article in Russian, I say: This is really written in English, but it has been coded in some strange symbols. I will now proceed to decode.
\item When I look at an article in Russian, I say: "This is really written in English, but it has been coded in some strange symbols. I will now proceed to decode."
\item Warren Weaver (1949)
\end{itemize}
\end{column}%
......@@ -527,9 +535,9 @@ Total & 1000 & 1
\begin{frame}
\frametitle{IBM model 1: example}
\includegraphics[height=0.3\textheight]{figures/lex_1} \hspace{0.1 cm}
\includegraphics[height=0.3\textheight]{figures/lex_2} \hspace{0.1 cm}
\includegraphics[height=0.3\textheight]{figures/lex_3} \hspace{0.1 cm}
\includegraphics[height=0.3\textheight]{figures/lex_1} \hspace{0.05 cm}
\includegraphics[height=0.3\textheight]{figures/lex_2} \hspace{0.05 cm}
\includegraphics[height=0.3\textheight]{figures/lex_3} \hspace{0.05 cm}
\includegraphics[height=0.3\textheight]{figures/lex_4}
\begin{itemize}
......@@ -1092,7 +1100,7 @@ $p(W) = \ds \prod_{i=1}^{T} p(w_i|h_i)$
\end{itemize}
\item Probabilities are independent from the position in the sentence
\begin{itemize}
\item add begin (<s>) and end (</s>) of sentence tokens
\item add begin (\bos) and end (\eos) of sentence tokens
\end{itemize}
\item Probabilities are estimated using a large quantity of data (corpus), which are supposed to be {\bf well written}
\end{itemize}
......@@ -1116,7 +1124,7 @@ $p(W) = \ds \prod_{i=1}^{T} p(w_i|h_i)$
\begin{itemize}
\item Sequences that are not allowed by the language
\begin{itemize}
\item Ex.: "ils part tôt", "elle est beau"
\item Ex.: "ils part t\^ot", "elle est beau"
\end{itemize}
\item Sequences that are not seen in the training corpus
\end{itemize}
......@@ -1226,7 +1234,7 @@ PPL & = & 2^H \mbox{~~~~~{\small [$H$ is the cross-entropy}]}\\
\item[] use the outputs of the following command: {\bf ngram -debug 2 -ppl ...}
\item Compute perplexity, interpolated model: ngram
\item[] {\bf -ppl <dev corpus>}: compute perplexity on development corpus
\item[] {\bf -mix-lmK <lmK> -mix-lambdaK <coeffK>} : interpolate several models <lmK> with weights <coeffK> (K ranging from 0 to 9)
\item[] {\bf -mix-lmK $<$lmK$>$ -mix-lambdaK $<$coeffK$>$} : interpolate several models $<$lmK$>$ with weights $<$coeffK$>$ (K ranging from 0 to 9)
\end{itemize}
\end{frame}
......@@ -1272,7 +1280,7 @@ PPL & = & 2^H \mbox{~~~~~{\small [$H$ is the cross-entropy}]}\\
\item 2st iteration: cover 2 source words
\item etc.
\end{itemize}
\item Ex: \emph{la voiture de course rouge a été volée il y a trois jours}
\item Ex: \emph{la voiture de course rouge a \'et\'e vol\'ee il y a trois jours}
\item[]
\centerline{ \includegraphics[width=0.8\textwidth]{figures/decoder_stacks_en} }
\item Scores are propagated along with the hypotheses
......@@ -1293,9 +1301,9 @@ PPL & = & 2^H \mbox{~~~~~{\small [$H$ is the cross-entropy}]}\\
\only<1>{ \begin{itemize}
\item Hypotheses leading to the same state but with a lower score can be stopped
\begin{itemize}
\item Ex.: \emph{la voiture de course rouge a été volée il y a trois jours}
\item 1st hypo: "la voiture de course rouge" -> "the red race car", using only 1 phrase pair (seen in phrase-table)
\item 2nd hypo: "la voiture de course rouge" -> "the red race car", using 3 phrase pairs "la"->"the", "voiture de course"->"race car", "rouge"->"red"
\item Ex.: \emph{la voiture de course rouge a \'et\'e vol\'ee il y a trois jours}
\item 1st hypo: "la voiture de course rouge"\ra"the red race car", using only 1 phrase pair (seen in phrase-table)
\item 2nd hypo: "la voiture de course rouge"\ra"the red race car", using 3 phrase pairs "la"\ra"the", "voiture de course"\ra"race car", "rouge"\ra"red"
\item[\ra] Source word coverage and partial hypotheses identical \Ra\ keep only the one with best score
\end{itemize}
\end{itemize} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment