Commit adcd87f9 authored by Loïc Barrault's avatar Loïc Barrault
Browse files

textprocessing updates

parent 025ecbab
rm ./Exam3110_0506/PDFs/exam0506_3110.pdf
rm ./Exam3110_0506/PDFs/exam0506_3110_soln.pdf
rm ./Exam3110_0506/PDFs/exam0506_6150.pdf
rm ./Exam3110_0506/PDFs/exam0506_6150_soln.pdf
rm ./Exam3110_0506/TP_exam_soln.pdf
rm ./Exam3110_0708/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_0809/exam3110_6150_0809.pdf
rm ./Exam3110_0809/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_0910/Special_Paper/special_exam6150_0910.pdf
rm ./Exam3110_0910/Special_Paper/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_0910/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1011/exam3110_6150_1011.pdf
rm ./Exam3110_1011/past/com3110_0203.pdf
rm ./Exam3110_1011/past/com3110_0304_qus.solns.pdf
rm ./Exam3110_1011/past/com3110_0405_qus.solns.pdf
rm ./Exam3110_1011/past/exam01-02.pdf
rm ./Exam3110_1011/past/exam0506_3110_6150.pdf
rm ./Exam3110_1011/past/exam0506_3110_6150_soln.pdf
rm ./Exam3110_1011/past/exams_old_combined.pdf
rm ./Exam3110_1011/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1112/exam_4115_and_6115.pdf
rm ./Exam3110_1112/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1112/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1213/Compression_Qu_bits/compression_all.pdf
rm ./Exam3110_1213/Compression_Qu_bits/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1213/Compression_Qu_bits/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1213/exam_3110_1213.pdf
rm ./Exam3110_1213/exam_3110_1213_SOLN.pdf
rm ./Exam3110_1213/exam_4115_6115.pdf
rm ./Exam3110_1213/exam_4115_6115_1213.pdf
rm ./Exam3110_1213/exam_4115_6115_1213_SOLN.pdf
rm ./Exam3110_1213/TUOS/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1213/TUOS/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1213/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1314/Exam3110-4115-6115_2013/exam_4115_6115.pdf
rm ./Exam3110_1314/Exam6115_2013_R/exam_6115_resit.pdf
rm ./Exam3110_1314/Exam6115_2013_R/old/exam_3110.pdf
rm ./Exam3110_1314/Exam6115_2013_R/old/exam_4115_6115.pdf
rm ./Exam3110_1314/Exam6115_2013_R/TUOS/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1314/Exam6115_2013_R/TUOS/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1314/Exam6115_2013_R/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1415/exam_3110.pdf
rm ./Exam3110_1415/exam_4115_6115.pdf
rm ./Exam3110_1415/TUOS/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1415/TUOS/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1415/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1516/TUOS/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1516/TUOS/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1516/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1617/TUOS/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1617/TUOS/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1617/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1718/com3110_1718.pdf
rm ./Exam3110_1718/com4115_1718.pdf
rm ./Exam3110_1718/COM6115-Early-Resit/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1718/COM6115-Early-Resit/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1718/com6115_1718.pdf
rm ./Exam3110_1718/com6115_1718S/com6115_1718S-questions-v1.pdf
rm ./Exam3110_1718/com6115_1718S/com6115_1718S-solutions-v1.pdf
rm ./Exam3110_1718/com6115_1718S/com6115_1718S.pdf
rm ./Exam3110_1718/com6115_1718S/exam4115_6115_1617-resit-final.pdf
rm ./Exam3110_1718/com6115_1718S/exam4115_6115_1617-resit.pdf
rm ./Exam3110_1718/com6115_1718S/exam6115_1718S.pdf
rm ./Exam3110_1718/com6115_1718S/tuoslogo_bw_nobo-eps-converted-to.pdf
rm ./Exam3110_1718/com6115_1718S/tuoslogo_bw_nobo.pdf
rm ./Exam3110_1718/MarcomsMCQpaper.pdf
\examtitle{{\bf Text Processing}}
\examdate{{\bf Autumn Semester 2001-02}}
\examtime{{\bf 2 hours}}
\rubric{{\bf Answer THREE questions.
All questions carry equal weight. Figures in square brackets indicate the
percentage of available marks allocated to each part of a question.}}
%%%% local macros %%%%
\newcommand{\seq}[2]{\mbox{$#1_{1},\- \ldots ,\-#1_{#2}$}}
\newenvironment{proof}{\begin{trivlist}\item[\hskip \labelsep {\bf Proof}]}{\nopagebreak
%\newcommand{\psrule}[2]{\mbox{{\rm #1}\ \ $\rightarrow$\ \ {\rm #2}}}
\mbox{{\rm #1}\ \ $\rightarrow$\ \ {\rm #2}\ :\ ${\it #3}$}}
% \begin{qupart}
% \begin{exlist}
% \exitem
% \mypercent{10}
% \exitem
% \mypercent{20}
% \end{exlist}
% \end{qupart}
% Character encoding, compression and text markup
% Character Encoding + Unicode
Schemes for electronic encoding of text which support all human
languages and permit interoperability of software on a global scale
require careful analysis of underlying issues in text
representation and design of appropriate standards.
\exitem Explain each of the following terms and make clear
the relations between them: {\bf language}, {\bf script},
{\bf character}, {\bf glyph}, {\bf font}. Give examples of each.
%\exitem Explain the overall goals and high level design principles
%underlying Unicode.
%What distinguishes Unicode from earlier character coding schemes?
\exitem Describe the Unicode coding model, making clear the
levels in the model, explaining the differences between and the
motivations for UTF-8 and UTF-16, and describing the purpose and
implementation of surrogate pairs. \mypercent{30}
% Text Compression
% \begin{qupart}
% Text compression techniques are important because growth in volume
% of text continually threatens to outstrip increases in storage,
% bandwidth and processing capacity. Briefly explain the differences between:
% \begin{exlist}
% \exitem {\bf symbolwise} (or
% statistical) and {\bf dictionary} text compression methods;
% \mypercent{10}
% \exitem {\bf static}, {\bf semi-static} and {\bf adaptive}
% models for text compression;
% \mypercent{10}
% \exitem {\bf Huffman coding} and {\bf arithmetic coding} methods
% for text compression.
% \mypercent{10}
% \end{exlist}
% \end{qupart}
% SGML + Markup
Information about documents is frequently stored in the document itself
using embedded annotations called ``markup''.
\exitem What is the difference between a markup
metalanguage and a markup language? Give at least two examples of
\exitem What is a DTD? Propose a simple SGML DTD for the abstract
of a journal article. It should require that the abstract
contain one or more author names, a title, a journal name,
volume number, issue number, and page numbers and the text of
the abstract. Each author should have associated with them an
affiliation (e.g. their university). Give a simple example
of a fictitious abstract marked up using your DTD.
% Perl
% Data Types + Data Structures (References)
Perl has three basic data types.
\exitem What are these three types? Give an example of each and
indicate how the type of data stored in a variable is conveyed
syntactically by the name of the variable.
%\exitem With these basic types more complex data structures may
%be built using references. Explain what references are in Perl,
%and how they are created and used.
\exitem With these basic types more complex data structures may
be built using references. Describe a data structure that would
be appropriate to hold contact details for a set of persons --
for each person a telephone number, email address and fax number
is to be held.
%This should be done in a fashion that permits retrieval of, e.g.
%a fax number given the person's name and the keyword {\tt fax}.
\medskip Give Perl code that adds a new person's data to the
structure -- you may assume each piece of data (person name,
telephone number, email address, fax number) is already held in
a distinct named variable. Also give code that extracts a data
element (e.g. fax number) into a variable given the data
structure, a person's name, and a data element keyword (e.g.
{\tt fax}). Explain how your code works.
% Suppose you have a text file containing lines
% of the form:
% \begin{verbatim}
% Phil Jones tel: 766 8970 email: fax: 766 8975
% Tom Smith email: tel: 236 5770 fax: 766 8975
% \end{verbatim}
% Subroutines + Scoping?
Explain the difference between lexical and dynamic scoping of
variables in Perl, and the role of the {\tt my}, {\tt local}
and {\tt our} declarations.
% Regular Expressions
Regular expressions provide a very expressive language for pattern
matching in strings.
\exitem Explain the difference between metacharacters and
metasymbols in Perl regular expressions and give at least two
examples of each.
\exitem Write a Perl regular expression which will match
HTML anchor tags and capture the value of the {\tt HREF}
attribute and the contents of the anchor tag itself.
For example, suppose the following assignment has been made
in a Perl program:
$s = "<A HREF=><B>CPAN</B></A>";
Your regular expression should match
such strings and capture the substrings \verb+">"+
and \verb+"<B>CPAN</B>"+.
%{\tt $anchor_href}
Explain how your regular expression works.
% Packages, Modules + OO
% \begin{qupart}
% \mypercent{20}
% \end{qupart}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment