Commit 2e6468b1 authored by Loïc Barrault's avatar Loïc Barrault
Browse files

images + NER

parent 83162b0d
% !TEX root = text_processing.tex
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{}
\vfill
\centering
\Huge{\edinred{[Information Extraction]\\Named Entity Recognition}}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: Overview}
\begin{itemize}
\item \gray{Introduction to Information Extraction}
\begin{itemize}
\item \gray{Definition + contrast with IR}
\item \gray{Example Applications}
\item \gray{Overview of Tasks}
\item \gray{Overview of Approaches}
\item \gray{Evaluation + Shared Task Challenges}
\item \gray{Brief(est) history of IE}
\end{itemize}
\item \textbf{Named Entity Recognition}
\begin{itemize}
\item \textbf{Task}
\item \textbf{Approaches: Rule-based, Supervised Learning}
\item \textbf{Entity Linking}
\end{itemize}
\item Relation Extraction
\begin{itemize}
\item Task
\item Approaches: Rule-based, Supervised Learning, Bootstrapping, Distant Supervision
\end{itemize}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER - recap}
\begin{block}{\textbf{Entity Extraction/Named Entity Recognition (NER)}}
Task: Identify the \myemph{extent} and the \myemph{type} of each textual mention of an entity\\
The set of types is determined in advance (e.g. organisation, person, date, etc...)
\end{block}
\begin{center}
\begin{tabular}{ll}
\myhl{cyan!40}{Cable and Wireless} today announced \ldots & Extent: 0-3 ; Type = \myhl{cyan!40}{ORG} \\
\myhl{cyan!40}{IBM} and \myhl{cyan!40}{Microsoft} today announced \ldots & Extent: 0-1 ; Type = \myhl{cyan!40}{ORG} \\
& Extent: 2-3 ; Type = \myhl{cyan!40}{ORG} \\
\myhl{brown!90}{John Lewis} hired \ldots & Extent: 0-2 ; Type = \myhl{cyan!40}{ORG} \\
\myhl{brown!90}{Theresa May} hired. & Extent: 0-2 ; Type = \myhl{brown!90}{PER}
\end{tabular}
\end{center}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER - recap}
\textbf{Types of entities addressed by IE systems include:}\\
\begin{itemize}
\item \textbf{Named individuals}
\begin{itemize}
\item Organisations (ORG), persons (PER), books, films, ships, restaurants . . .
\item[\ra] \myhl{cyan!40}{Cable and Wireless} today announced \ldots ; Extent: \textbf{0-3} ; Type = \textbf{ORG} \\
\item[\ra] \myhl{brown!90}{Barack Obama} was the 44th president... \ldots ; Extent: \textbf{0-3} ; Type = \textbf{PER} \\
\item Geo-Political entities (GPE), locations (LOC)
\item[\ra] The \myhl{carminered}{Mont Blanc} intersects France, Italy and Switzerland. ; Extent: \textbf{1-3} ; Type = \textbf{LOC} \\
\item[\ra] The Mont Blanc intersects \myhl{carminered!60}{France}, \myhl{carminered!60}{Italy} and \myhl{carminered!60}{Switzerland}. ; Extent: \textbf{4-5} ; Type = \textbf{GPE} \\
\end{itemize}
%\item Named kinds
%\begin{itemize}
%\item Proteins, chemical compounds/drugs, diseases, aircraft components . . .
%\end{itemize}
\item \textbf{Times}: temporal expressions dates, times of day
\begin{itemize}
\item[\ra] Let's meet at \myhl{orange}{2pm} next Friday \ldots ; Extent: \textbf{3-4} ; Type = \textbf{TIME} \\
\item[\ra] Let's meet at 2pm next \myhl{orange!50}{Friday} \ldots ; Extent: \textbf{5-6} ; Type = \textbf{DATE} \\
\end{itemize}
\item \textbf{Measures}: monetary expressions, distances/sizes, weights . . .
\begin{itemize}
\item[\ra] This watch costs \myhl{bananayellow}{£35} \ldots ; Extent: \textbf{3-4} ; Type = \textbf{MONEY} \\
\end{itemize}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: coreference - recap}
\begin{block}{\textbf{Coreference}}
Different textual expressions that refer to the same real world entity are said to \myemph{corefer}.
\textbf{Coreference Task}: link together all textual references to the same \myemph{real world entity},
\end{block}
Multiple references to the same entity in a text are rarely made using the same string:
\begin{itemize}
\item Pronouns: \textbf{Tony Blair} == \textbf{he}
\item Names/definite descriptions: \textbf{Tony Blair} == \textbf{the Prime Minister}
\item Abbreviated forms: \textbf{Theresa May} == \textbf{May}; \textbf{European Union} == \textbf{EU}
\item Orthographic variants: \textbf{alpha helix} == \textbf{alpha-helix} == \textbf{$\bm{\alpha}$-helix} == \textbf{a-helix}
\end{itemize}
\vfill
Can be seen as a separate task or as part of entity extraction task
\vfill
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: Overview}
\begin{itemize}
\item \gray{Introduction to Information Extraction}
\begin{itemize}
\item \gray{Definition + contrast with IR}
\item \gray{Example Applications}
\item \gray{Overview of Tasks}
\item \gray{Overview of Approaches}
\item \gray{Evaluation + Shared Task Challenges}
\item \gray{Brief(est) history of IE}
\end{itemize}
\item \textbf{Named Entity Recognition}
\begin{itemize}
\item \gray{Task}
\item \textbf{Approaches: Rule-based, Supervised Learning}
\item Entity Linking
\end{itemize}
\item Relation Extraction
\begin{itemize}
\item Task
\item Approaches: Rule-based, Supervised Learning, Bootstrapping, Distant Supervision
\end{itemize}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: Approaches to NER}
\textbf{Knowledge-engineering}
\begin{itemize}
\item leverage linguistic resources created by experts
\end{itemize}
\textbf{Supervised learning}
\begin{itemize}
\item use of machine learning techniques
\end{itemize}
\mycolor{lightgray}{\textbf{Bootstrapping}}<2->
\begin{itemize}
\item \mycolor{lightgray}{Use of \textbf{seed patterns} to identify named entities}<2->
\item \mycolor{lightgray}{Use known named entities to generate new patterns}<2->
\item \mycolor{lightgray}{Rinse, repeat}<2->
\end{itemize}
%https://arxiv.org/ftp/arxiv/papers/1511/1511.06833.pdf
\mycolor{lightgray}{\textbf{Distant supervision / lightly supervised methods}}<2->
\begin{itemize}
\item \mycolor{lightgray}{$\sim$ bootstrapping a machine learning system}<2->
\end{itemize}
%https://www.aclweb.org/anthology/C18-1183/
\vfill
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
Dominant approach in the 1990s and still in use in many IE systems today.
Such systems typically use
\begin{itemize}
\item named entity lexicons and
\item manually authored pattern/action rules or regular expression/FST recognisers
\end{itemize}
Example: NER system, developed for participation in MUC-6
\begin{itemize}
\item described in Wakao et al. (1996) \cite{wakao-etal-1996-evaluation}
\item[\ra] recognizes \myemph{organisation}, \myemph{person}, \myemph{location} and \myemph{time} expressions in \textbf{newswire texts}
\end{itemize}
System has three main stages:
\begin{enumerate}
\item Lexical processing
\item NE parsing
\item Discourse interpretation - Coreference Resolution
\item Discourse interpretation - Semantic Type Resolution
\end{enumerate}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\begin{enumerate}
\item Lexical processing
\end{enumerate}
Rule-based NER systems use \textbf{specialized lexicons} \ra\ \myemph{gazetteers} (= geographical directory)
The Wakao et al. system has specialised lexicons for:
\vspace{-.5cm}
\begin{columns}
\begin{column}{.37\textwidth}
\begin{itemize}
\item \myemph{Organisations}\textbf{2600} entries
\item \myemph{Locations}\textbf{2200} entries
\item \myemph{Person names}\textbf{500} entries
\end{itemize}
\end{column}
\begin{column}{.63\textwidth}
\begin{itemize}
\item \myemph{Company designators}: e.g. Corp, Ltd – \textbf{94} entries
\item \myemph{Person titles}: e.g. Mr, Dr, Reverend – \textbf{160} entries
\end{itemize}
\end{column}
\end{columns}
\vfill
\only<2->{
Why not use even larger gazetteers?
\begin{itemize}
\item Gazetteer of British Place Names containing over 50,000 entries
\item[\ra] Many NEs occur in multiple categories
\item[\ra] \textbf{The larger the lexicons the greater the ambiguity}
\item Ex.: Ford \Ra\ \myemph{company} or \myemph{person} or \myemph{place}
\item listing of names is never complete \ra\ need a mechanism to type unseen NEs!
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\begin{enumerate}
\item Lexical processing
\end{enumerate}
Example sentence: "Norwich Investment Bank plc. today announced ..."
\begin{enumerate}
\renewcommand{\theenumi}{1\alph{enumi}}
\item<2-> Tokenisation, sentence splitting, morphological analysis, Part-Of-Speech tagging
\item[\ra]<2-> \scriptsize{\annot{Norwich}{NNP} \annot{Investment}{NNP} \annot{Bank}{NNP} \annot{plc.}{NN} \annot{today}{RB} \annot{announced}{VBD} ...}
\vfill
\item<3-> Gazetteer Lookup and Tagging:
\begin{itemize}
\item \textbf{ORG}ganisations, \textbf{LOC}ations, \textbf{PER}sons, company designators (\textbf{CDG}), person titles
\end{itemize}
\item[\ra]<3-> \scriptsize{\annot{Norwich}{NNP/\textbf{LOC}} \annot{Investment}{NNP} \annot{Bank}{NNP} \annot{plc.}{NN/\textbf{CDG}} \annot{today}{RB} \annot{announced}{VBD} ...}
\vfill
\item<4-> Trigger Word Tagging
\begin{itemize}
\item \textbf{trigger words} allow to classify certain multi-word names
\item[\ra] Ex.: \myemph{Airlines} in "\textbf{Wing and Prayer} \myemph{Airlines}"
\item system has trigger words for \textbf{ORG}anisations, \textbf{GOV}ernment institutions, \textbf{LOC}ations
\end{itemize}
\item[\ra]<4-> \scriptsize{\annot{Norwich}{NNP/\textbf{LOC}} \annot{Investment}{NNP} \annot{Bank}{NNP/\red{\bf ORG-TRIGGER}} \annot{plc.}{NN/\textbf{CDG}} \annot{today}{RB} \annot{announced}{VBD} ...}
\end{enumerate}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\begin{enumerate}
\setcounter{enumi}{1}
\item NE parsing
\end{enumerate}
Hand-produced rules:\\
\vspace{-.5cm}
\begin{columns}
\begin{column}{.5\textwidth}
\begin{itemize}
\item[]
\begin{itemize}
\item 177 for proper names
\item 94 for organisation
\item 54 for person
\end{itemize}
\end{itemize}
\end{column}
\begin{column}{.5\textwidth}
\begin{itemize}
\item 11 for location
\item18 for time expressions.
\end{itemize}
\end{column}
\end{columns}
\vfill
A fragment of the proper name grammar:
\vspace{-.5cm}
\begin{columns}
\begin{column}{.4\textwidth}
\scriptsize{\begin{itemize}
\item NP --> ORGAN\_NP
\item NAMES\_NP --> NNP NAMES\_NP
\item NAMES\_NP --> NNP
\end{itemize}}
\end{column}
\begin{column}{.6\textwidth}
\scriptsize{\begin{itemize}
\item ORGAN\_NP --> LIST\_LOC\_NP NAMES\_NP CDG\_NP
\item ORGAN\_NP --> LIST\_ORGAN\_NP NAMES\_NP CDG\_NP
\item ORGAN\_NP --> NAMES\_NP '\&' NAMES\_NP
\end{itemize}}
\end{column}
\end{columns}
\vfill
Rule {\scriptsize "ORGAN\_NP --> NAMES\_NP '\&' NAMES\_NP"} means:\\
an unclassified \myemph{proper name} (NAMES\_NP) followed by '\&' followed by an unclassified \myemph{proper name} is an \myemph{organisation name}\\
\ra\ \textbf{Marks \& Spencer} or \textbf{American Telephone \& Telegraph}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\begin{enumerate}
\setcounter{enumi}{2}
\item Discourse interpretation - \textbf{Coreference Resolution}
\end{enumerate}
\begin{enumerate}
\renewcommand{\theenumi}{3\alph{enumi}}
\item When the name class of an \textbf{antecedent} (resp. \textbf{postcedent}) is known then establishing coreference allows the name class of the \textbf{anaphor} (resp. \textbf{cataphor}) to be established.
\end{enumerate}
\begin{block}{Anaphora/cataphora}
In a narrower sense, \myemph{anaphora} is the use of an expression that depends specifically upon an \textbf{antecedent} expression and thus is contrasted with \myemph{cataphora}, which is the use of an expression that depends upon a \textbf{postcedent} expression. \source{Wikipedia}
\end{block}
\begin{itemize}
\item Ex1.: \myemph{Ford Motor Co.} was founded in Detroit in 1903. \myemph{It} was the first to introduce...
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\begin{enumerate}
\setcounter{enumi}{2}
\item Discourse interpretation - \textbf{Coreference Resolution}
\end{enumerate}
\begin{enumerate}
\setcounter{enumi}{1}
\renewcommand{\theenumi}{3\alph{enumi}}
\item An unclassified PN may be co-referential with a variant form of a classified PN, e.g.:
\end{enumerate}
\begin{itemize}
\item Ex2.:
\begin{itemize}
\item \myemph{Ford Motor Co.} was founded in Detroit in 1903, ..., \myemph{Ford} was the first to introduce...
\item \myemph{Creative Artists Agency} is a US talent agency ... In 2016, \myemph{CAA} had 1,800 employees
\end{itemize}
\item[\ra] The unclassified PN may be inferred to have the same class as the classified PN.\\
\item[\ra] Wakao et al. use 45 heuristics of this type for organisation, location, and person names.
\end{itemize}
\vspace{.3cm}
\only<2->{
\begin{enumerate}
\setcounter{enumi}{2}
\renewcommand{\theenumi}{3\alph{enumi}}
\item An unclassified PN may be co-referential with a definite NP
\end{enumerate}
\begin{itemize}
\item Ex3.: \myemph{Kellogg}, the breakfast cereal \myemph{\underline{manufacturer}}
\end{itemize}
}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\begin{enumerate}
\setcounter{enumi}{3}
\item Discourse interpretation - \textbf{Semantic Type Inference}
\end{enumerate}
\begin{block}{}
Semantic type information about the arguments in certain \textbf{syntactic relations} is used to make inferences permitting the classification of PNs
\end{block}
\begin{enumerate}
\renewcommand{\theenumi}{4\alph{enumi}}
\item<1-> \myemph{noun-nous qualification}: PN qualifies an organisation-related object \ra\ organisation
\begin{itemize}
\item[\ra] Erickson \textbf{\underline{stocks}} \Ra\ \textbf{\annot{Erickson}{ORG}}
\end{itemize}
\item<2-> \myemph{possessives}: PN stands in a possessive relation to an organisation post \ra\ organisation
\begin{itemize}
\item[\ra] \textbf{\underline{vice president of}} ABC, ABC\textbf{\underline{’s vice president}} \Ra\ \textbf{\annot{ABC}{ORG}}
\end{itemize}
\item<3-> \myemph{apposition}: PN is apposed with a known organisation post \ra\ person name
\begin{itemize}
\item[\ra] Miodrag Jones, \textbf{\underline{president of XYZ}} \Ra\ \textbf{\annot{Miodrag Jones}{PER}}
\end{itemize}
\item<4-> \myemph{verbal arguments}: PN names an entity involved in a verbal frame where the semantic type of the argument position is known \ra\ classify accordingly
\begin{itemize}
\item[\ra] Smith \textbf{\underline{retired from his position}} as \Ra\ \textbf{\annot{Smith}{PER}}
\end{itemize}
\end{enumerate}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\textbf{Evaluation of Wakao et al.}
MUC-6 NE evaluation set: a \textbf{blind test set} of 30 Wall Street Journal articles containing:
\begin{columns}
\begin{column}{.5\textwidth}
\begin{itemize}
\item 449 organisation names
\item 373 person names
\item 110 location names
\item 111 time expressions
\end{itemize}
\end{column}
\begin{column}{.5\textwidth}
\begin{center}
Results:
\scriptsize{
\begin{center}
\begin{tabular}{llll}
\toprule
Proper Name Class & Recall & Precision & F1\\ \midrule
Organisation & 91 \% & 91 \% & 91.0 \%\\
Person & 90 \% & 95 \% & 92.4 \% \\
Location & 88 \% & 89 \% & 88.5 \% \\
Time & 94 \% & 97 \% & 95.5 \% \\ \midrule
Overall & 91 \% & 93 \% & 92.0 \% \\ \bottomrule
\end{tabular}
\end{center}
}
\end{center}
\end{column}
\end{columns}
\vfill
\Ra\ Best system results on this evaluation had F1 measure = 96.42\%\\
\Ra\ Human results were 96.68\%
\vfill
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Knowledge Engineering Approaches}
\vfill
\textbf{Strengths}
\begin{itemize}
\item \textbf{High performance} – only several points behind human annotators
\item \textbf{Transparent} – easy to understand what system is doing/why
\end{itemize}
\vfill
\textbf{Weaknesses}
\begin{itemize}
\item Porting to another domain requires substantial \textbf{rule re-engineering}
\item Acquisition of \textbf{domain-specific lexicons}
\item Rule writing requires high \textbf{levels of expertise}
\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: Approaches to NER}
\mycolor{lightgray}{\textbf{Knowledge-engineering}}
\begin{itemize}
\item \mycolor{lightgray}{leverage linguistic resources created by experts}
\end{itemize}
\textbf{Supervised learning}
\begin{itemize}
\item use of machine learning techniques
\end{itemize}
\mycolor{lightgray}{\textbf{Bootstrapping}}
\begin{itemize}
\item \mycolor{lightgray}{Use of \textbf{seed patterns} to identify named entities}
\item \mycolor{lightgray}{Use known named entities to generate new patterns}
\item \mycolor{lightgray}{Rinse, repeat}
\end{itemize}
%https://arxiv.org/ftp/arxiv/papers/1511/1511.06833.pdf
\mycolor{lightgray}{\textbf{Distant supervision / lightly supervised methods}}
\begin{itemize}
\item \mycolor{lightgray}{$\sim$ bootstrapping a machine learning system}
\end{itemize}
%https://www.aclweb.org/anthology/C18-1183/
\vfill
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Supervised Learning}
\begin{block}{Aim of Supervised Learning}
Address the \textbf{portability} / \textbf{generalisation} problems inherent in knowledge engineering NER
\end{block}
\begin{itemize}
\item Manually authoring rules \Ra\ systems learn from \textbf{annotated examples}
\item Moving to new domain requires only annotated data in the domain
\item[\ra] supplied by \textbf{domain expert} without need for expert computational linguist
\end{itemize}
\vfill
A wide variety of supervised learning techniques have been tried, including:
\vspace{-.3cm}
\begin{columns}
\begin{column}{.5\textwidth}
\begin{itemize}
\item Hidden Markov Models (HMM)
\item Decision Trees
\item Maximum Entropy models
\item Support Vector Machines (SVM)
\end{itemize}
\end{column}
\begin{column}{.5\textwidth}
\begin{itemize}
\item Conditional Random Fields (CRF)
\item AdaBoost
\item Deep Learning
\end{itemize}
\end{column}
\end{columns}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}
\frametitle{Information Extraction: NER: Supervised Learning}
Two types of systems that may learn:
\begin{enumerate}
\item \myemphb{patterns} that match extraction targets \ra\ less developed recently
\item \myemphb{classifiers} that label tokens as \myemph{beginning/inside/outside} a \textbf{tag type}
\item[\ra] Systems operate as \textbf{Sequence Labelling} systems
\end{enumerate}
\only<2>{
In \textbf{sequence labelling}, each token is given one of three label types \myemph{B}, \myemph{I} or \myemph{O}:
\begin{itemize}
\item \myemph{B$_{CLASS}$} if the token is at the \myemph{beginning} of a named entity of class = $CLASS$
\begin{itemize}
\item where CLASS $\in$ \{\textbf{ORG}, \textbf{PER}, \textbf{LOC}, etc.\}