From 6fdaab1cc0f230073a625123dbc9e06b88d9dc32 Mon Sep 17 00:00:00 2001 From: japm48 Date: Sat, 3 Jun 2023 23:07:46 +0200 Subject: [PATCH] Initial Overleaf Import --- fix-unnumbered-sections.sty | 16 + homework.cls | 156 ++++ img_Polyanskiy_mutual.pdf | Bin 0 -> 13417 bytes main.tex | 1765 +++++++++++++++++++++++++++++++++++ 4 files changed, 1937 insertions(+) create mode 100644 fix-unnumbered-sections.sty create mode 100644 homework.cls create mode 100644 img_Polyanskiy_mutual.pdf create mode 100644 main.tex diff --git a/fix-unnumbered-sections.sty b/fix-unnumbered-sections.sty new file mode 100644 index 0000000..7caa7bf --- /dev/null +++ b/fix-unnumbered-sections.sty @@ -0,0 +1,16 @@ +\ProvidesPackage{fix-unnumbered-sections} + +% This package is available at http://tex.stackexchange.com/q/33696/ +% It patches the standard classes so that they treat unnumbered and numbered sections equally! (At least, I've tested it with the article class.) +% As a result, even the unnumbered sections gets entries in the TOC and, when hyperref is loaded, they also get bookmarks as per the default hyperref setting for bookmarks. +% Unnumbered sections can also have short titles for TOC and bookmark purposes, just like numbered sections. +% +% It works by pretending the unnumbered sections are deeper than \c@secnumdepth (in fact, just by assuming that they are 1000 (\@m) levels deep). +% I believe there are no side effects to this... + +% We just pretend that \@ssect, which LaTeX uses to treat unnumbered [sub*]sections differently to (maybe) numbered ones (if they are no deeper than \c@secnumdepth levels), doesn't exist. +% Instead, we always use \@sect, which LaTeX uses for numbered sections, and sections which would otherwise be numbered if they weren't deeper than \c@secnumdepth levels. + + +\RequirePackage{etoolbox} +\patchcmd{\@startsection}{\@ssect{#3}{#4}{#5}{#6}}{\@dblarg{\@sect{#1}{\@m}{#3}{#4}{#5}{#6}}}{}{\PackageError{fix-unnumbered-sections}{Unable to patch \string\@startsection; are you using a non-standard document class?}\@ehd} diff --git a/homework.cls b/homework.cls new file mode 100644 index 0000000..c8ee5dd --- /dev/null +++ b/homework.cls @@ -0,0 +1,156 @@ +\NeedsTeXFormat{LaTeX2e} + +\ProvidesClass{homework}[2014/12/16 Class file for homework assignments] + +\LoadClassWithOptions{article} + + + +% ----- Options --------------------------------------------------------------- +%\newcommand\@opnewpage{0} +%\DeclareOption{newpage}{\renewcommand\@opnewpage{1}} +%\newcommand\@oplargemargins{0} +%\DeclareOption{largemargins}{\renewcommand\@oplargemargins{1}} +%\ProcessOptions + + +% ----- Packages -------------------------------------------------------------- + +% Better fonts with accents +\RequirePackage[T1]{fontenc} + +% Required for starred commands +\RequirePackage{suffix} + +\RequirePackage{parskip} + +% Math symbols +\RequirePackage{amsmath} +\RequirePackage{amsfonts} +\RequirePackage{amsthm} +\RequirePackage{amssymb} +\RequirePackage{centernot} + +% Nice lists +\RequirePackage{enumerate} +\RequirePackage{enumitem} + +% Nice images, figures, and listings +\RequirePackage{graphicx} +\RequirePackage{grffile} +\RequirePackage[all]{xy} +\RequirePackage{wrapfig} +\RequirePackage{fancyvrb} +\RequirePackage{listings} + +% Conditionals +\RequirePackage{ifthen} + +% Header & Page Setup +\RequirePackage{fancyhdr} +%\ifthenelse{\equal{\@oplargemargins}{1}}{}{\RequirePackage{fullpage}} +\RequirePackage{fullpage} + +\RequirePackage[a4paper, + hmargin=1.5cm, + vmargin=1cm, + includeheadfoot]{geometry} + +\setlength{\headheight}{12pt} +\addtolength{\headsep}{0.7cm} +\addtolength{\textheight}{-0.6cm} + +% Links +\RequirePackage{hyperref} + +% ----- Questions ------------------------------------------------------------- +\newcounter{questionCounter} +\newcounter{partCounter}[questionCounter] + + +% +\WithSuffix\providecommand\assignment*{} +\WithSuffix\renewcommand\assignment*[1]{% + % Wrap in minipage so that we don't get a line break anywhere in between + \begin{minipage}{\linewidth}% + \setcounter{partCounter}{0}% + \vspace{.2in}% + \noindent{\bf \large Assignment #1}% + \vspace{0.3em} \hrule \vspace{.1in}% + \end{minipage} +} + + + +% ----- Question Parts -------------------------------------------------------- + +\newenvironment{alphaparts}[0]{% + \begin{enumerate}[label=\textbf{(\alph{partCounter})}]% +}{\end{enumerate}} + +\newenvironment{arabicparts}[0]{% + \begin{enumerate}[label=\textbf{\arabic{questionCounter}.\arabic{partCounter}})]% +}{\end{enumerate}} + +\newcommand{\questionpart}[0]{\stepcounter{partCounter}\item} + + +% ----- Answer Box ------------------------------------------------------------ + +\newcommand{\answerbox}[1]{% +\begin{framed} +\vspace{#1} +\end{framed}} + +% ----- Page Setup ------------------------------------------------------------ + +% Use block style paragraphs +\setlength{\parindent}{0pt} +%\setlength{\parskip}{5pt plus 1pt} +%\setlength{\parskip}{3pt plus 1pt} + +\def\indented#1{\list{}{}\item[]} +\let\indented=\endlist + +% ----- Title & Header -------------------------------------------------------- +% \pagestyle{empty} +\pagestyle{fancy} + +\newcommand{\mysetupheader}{ + % Setup header + % \setlength{\headheight}{15.2pt} + % \setlength{\headsep}{0.2in} + \lhead{\hwclassshort{} -- \hwshorttitle{}}% + \rhead{\hwauthor{}}% + + \renewcommand{\headrulewidth}{0.4pt} + \renewcommand{\footrulewidth}{0.4pt} +} + +%\if\@opanon% + \renewcommand{\maketitle}[0]{% + \mysetupheader + + % Setup hrule in header + \renewcommand{\headrulewidth}{0pt} + \headrule{} + % Don't put header on first page + \thispagestyle{plain} + + \begin{center} + {\LARGE \hwtitle{}} + + {\Large \hwclass{}} + + Student: \hwauthor{} \hwauthorextra{} + + \end{center} + \renewcommand{\headrulewidth}{0.4pt} + } + +% ----- For usage with pandoc converted documents ----------------------------- + +\providecommand{\tightlist}{% + \setlength{\itemsep}{1pt}\setlength{\parskip}{0pt}} + +% ----------------------------------------------------------------------------- diff --git a/img_Polyanskiy_mutual.pdf b/img_Polyanskiy_mutual.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0c9b942a4c422dfd28333f4d9493920c969d2aa0 GIT binary patch literal 13417 zcmcJ01yEeg)-4bqFt~eg_kqFP-QC?`2n3yMM8?1odpD-ZvIyD0N^BJBLkb+0R#ldSQRXt ztRXgJpchCDKt@K!DrM^cv2=Oq9ZVsX5|-v*3rm2I5Wp4UVrl9K@XU^VHx|d#9=9RT zRp8gNkgqMn&TWzx_{P9zD{wn7@yA)Plp|4#V4d*hj-IdNzEpo}tMyV6HXyP<240eV zZ0XbU?fK}FvhBAAA$#HBrIXu>+g-4+zwrL;9mB0h=i1UczqE&^w%S_%M^fYGpC|rj zxNvA!-4BWWci@91;pg*O|GVy`rS50r;mzsZ^vxwdhWoOEU(d(p4e?kw&-YWtRXjC6 z-=&_M9B&qWtlcyYI>0noLVYH?>*)waI;;sg7DUP4oQ+;G+Pt*{^TO7&ofg+}cc`8S3TC3OE;iCjXM-OcHJjzoZ+u@2sbQq2|sW8t35?@8fs+bhjWM+xB~grb)=&s~dZbRxJvP-5I{ z$9klDkQlk5p*cI(8u&=p7XoYE67&hkle`Ahu$XU3=nvKkr@(!P- zv=?o53vvCbHUCg#@bJb*+X2HlSlZ%7l;AXOqWIROsYRFc^2{>plI1ah_F`85!@Se2 zFfbJFyCCwp|GtC2h$fZD)~Nr!Na3iz6}`wqZd!+9R9TyB_Tw26)n;?Jcp}m&zNc?m zqa3Li?n>rqi&%pJpMh6-PnWL*jQ-eR)oQwqOc{sb=Mf(hlrQvMO98Kvz7m?JImEP} zB^(^s8~FxRew;CEbNt4uYNNJI4X2$!hc2cVw@)I%jiZ?<+6qLE|~B85XVb#JH4@oJ7s6D8;`V|Wt} zn<7l&(S>W)7!3Pms+jrf0>%4~EuU(JB{|q7DHRrt29~fK9)u#BkHDyZ648M6x_bM7e;g)yR z!-5W&)mqEH%#JFBZH`5f$GTTe!&0{q;ZY~ftS$0{z6-BJvGiaC@HfpN19M4?Ea>XSmH zniZhD{t5>ykA1(|2V1}ec}J9WT*h|cL`lQSGr!=!C4x8PJel`vYrnBVk))-Ri@43q zU<#UiFJ)PO6fyW?&=q3H)AjY4+}1dO5CYaWRww3f+M8D~!9!yq#{^L8X;I;ZRg3bOj@DkdVNPtRc4)6OD0rw`!r(OrTJP@wYI#=Bhmu*Y1h|$tfx}bp_PEtz3sHC_|TH& zAI7t#-4|P%gIBdE7||d3s*FlF+_d<3#{i|wgo}>oy6)h!{c1UTvC2UdU)5a!^G~v~ z+|Q3EI-^e{RgVw$<_kFuB&ln94S3vbS&RjskzBiL2OB+m@KNj#e*V@XB-u>3ab{kc z)FwJ?H|2t+Ti_^lTkT;EyP<;v=`iq9s?h zjG-uA)tf}?pbGtrF*gLh`=5Ff3Ld&*W`0;Qq=?^C1!rD{b1wgx->B^$(I{F9d$sBk zuUzDiZ~^dfZdgmvu2fyJcWf+AM$vo9T#jXI4s8%E&YY2EB)y4aJhU=U88L1mLE**Y z8FHvlGZ01FJr`L^u}||8bm-D`qH{_6geDuq&N88PB9f+*s;?L=^057tOL^%+RYg`4 z^mqhy>~dI?>|$st4c!B!wL%GIyXkx$y3UtIGlo@fth5BRClUmb#McycS&pV@zR|hg z5v)EN&3<%m)NR%}V5L0%b;>30dUOz>Sz^nZcaO@ncT{qN$VNwN$ITd;!Y3V6@(!EC zhUCmccN9>;jh(qmoj0!OT|jG*?Dtl=ul|$S(~fUo#92ewu_U{lK?SV&=2(aj#yeGP zb^muZVbzSdoeld2=nCMXt>(jdZ|Q4uDyCd%7;8;Z1ik`kje%(=-#MI0;AKgvOGKb! z0Ed8)QC>*L5kh-DPx0nS5Jy;c^H!hwK|E^Yp-f#dG!l0C@I{5XEbSR;S?u7PobP9{ zdpJh;%DU%r&1u9F9f1?bXT%(?AhgUATLhN);8;}=ByHk{&SktOSKc}cFw!+r+@r^^ z?*?fw)?R5B4hJ3o*5e2{v`5;B-#Xmx@K7C|= zYezz{*uEkf<|59KI=}k%b}LNEn~%H67gwFg%h0&7Syy>f*jK$7aQ;R)y|tHpzjFqV z1mdNZp|bb3+gbq8F%-7T?}SOQcDwXTz2g&xA{lyBtHBJzNWL8=!UY zP+TS=Y*@C?Y{+~BTY?M7;%3`gLt?r#{BZK4nrSQUJnfN1P&$E+6z@%C!_*_c5JL4r ztiv|Kd(M6lttSW*>~vual4CyD0bxSfq(^c035v7+)mUkV04}V}dt<5cR8)%?L=?_% zg@-)1e*EKXWY?hx8lBa=4lt&rT&MH94X+5YsJd7E$xeTri)I|12zP~dyP7nGqiSEjju{-lC+>L{Nfk9sFLcL@ z>bw(XZlBiB_~hdzk;TK|0M&U75BW*rfuNTq@P0CV`;mj(0p&qP8B}aM^A`8b@yHG- zqFO3ZMf^vd^XLYoVb%2KBfp+b=ojL>>W*`FYNRGXU2rVb;E$^`%e{M>P0k<3QRVj; zWzBNBcT0k zA<3|8_t(o^o_FOW!$%RX8uHcZ+qzD#Qw-cQ;{u;ZvSIs;HfCU^7J@QAi>;OtW7r zS?keoy{d!mRYakD2eHU`hWQ~9E^ZP9vEiK!jJ3|3qMXjRTsz&On>;hd3X}+=fdxyM ztmFn6IGQQg^{Ko;C^2Nn)x9)g0At7m*`y?l@J&E@d}^|?!ZlEQ)^+5b)`kNug zoyED?^mLFdUTMl3@%R5ghJE$T)8v)hbLy_uv?H2%mu41k>ZeiK{>0Y2%!~Ym1Dw<1 zp#YDn)SxA&AAI8mlnMqa4ZL>od~#g8@bxIdE$y%sU)!8Sb7DWuXn18!NiIh9Y5s~d zsN~S#$_IF6oZ{T8dAo13vXb1s#@|yLHpx9+MC!$1XbepZX(O*GDz<&|Jge zYnHonmqLa=_&w9*R)^tFgj2>MHN4tDe%c@CVylj9Qv`iZyBN~w zM7<8PuOB>8-H2*DMq^#msCMq%!@Ll2PdM#r&#V=R zVmY$WZK}kVj`j4#jzI0-77gZt-*f35A(-_O_#tw31(k9UMs8pe;K2>KCrSr<4|+xw zRqKC|vXo@1V~`iMZ*6%0sDIRGArgfi_J$U|$RURO>bhE6h4_N*%R

T=CO+-ks{z zvbz~6;g-5sv{RJkV&Wjjc@0t-5it>XD%jLHanZT=Dc2kn6sG+rQ59}z&s*z-yyrN_ zF6LDQOCNoxVBy&u>6>a=hPe=$)5>LZxB0J&nYxF=0N@zqy5=~S6x;y#hh9?*BA8>G zc?N8d7Ppr}FnBTmSsKxR2dj?Pk(6j%V}nLNsewrR9-|j(sNVDamspwQ-arX&;b}%w ziLHp3Pmk)lO%e^2upiiOti=5&LLR!l0ALUmiMrup?_z!8-EwB7mlRrs``)py8`WwE z?x4#{K*l;|W1q@MHt7W{3$F75hg`!(m7WAO{)2 z0Mqwn76@1pZ@JGJY~Fixc7i^=_PVt9*zGmf>m}O0yC=Em=QHIcJ)SdrC0bx>eIr16 zyr(#abdnVp{peWhpS8B5qTnyLROa#q)olp#ZabkPM6}rq>e%zm;rIUhGB`&bo#p_@ zq7&(e4~sBU68n7rWEfW_vi<&w$^j|fR2bK6C8n7>ArS5Lg{igpyOsKniY%!xV>cgp zy5rQM2AgKZLxL=8{PTD|;CaIN=8MEkP=Z?5mR|E!*Y`a#Vs%U=Rt^gQoA5CXwdw1Y;|$5_LI`Nk$&t8H&55pf0UBFvLix5U8c=-M z2DWo;hyI2cG1hUB=sG9kY+}*PygI7#Z2Bnm_>AHo03A3Mt%N>VR9d-Q*{(xgs=Tzi zoPg9p_3u400EdBs!9YrMvAjVf*S>bB(i7EO3Moth)42l}Q<6Y!#LLX!0 zC_a4gHZJr!PW7*h4qjF0P+SqKCBa0%mGoc-T1(*6KMW{+?ZDh64Gi|FA*Mz4q}5Z3 zM&s5~3L`3ocl(H}Zos`}&&Hm95W`kok6gl0JTF3%lkYrrbo=VP*JSj3p|6mi3NS2} z8OpmOBya8&&E7@g4hyUo)Mes!`l?QS8G=A%Z(r0I!bJ+Vtw7CsgY52JBgXQIwh>(A z5)vXbCHz+vU@;x`^kBbAT`2uHAhq^u_GRt*)Mznoh$SVdGyos)jXS zFc^KVRDYbN5&)dI*I{dPpI|?|DhOyyP98Tc)eLyp@H3!hHMU>%O6kczNGpPOUUoI= zp=3N#E=XX_*3G|NeVmpv_Dc_SFrPE8M4rkVs?li9uwFj0;r6*OC$X?~swvI*r3RI6 z-_@qZy{UQ|O~%md`KvUcY)9{N;%|lI4My}9P0de>JKB>a&D`Ir zZ$M{=rp~-VcL_7KjHlz?J5Hg36K+bKxo8W<$_)))s9a7f9(5DQ4!ph5-!`9_Drd0N zmJU9MxxLn~iVN~b8~pT zv#gbO7CKL{0q`8*=tv4Qdq{A61oS+x@flg&?-CTEfs2#!_rw`&!wW_hyBsj7@}>$; zSiEDrG$__lLqc~Ymw8@Y=dwJ%5)|@=zN^2&YgcysAdDWzms=xD&-2Ja_!*eW``jVi z9^?Mmd7&-wEh486)FqF8IIBrxC>JlZyn&otx>ApZbN+2&U+~ON-lToEIc%$yFx-up zeEZW*B52@zM&&_cb4H0B>uB4!#j7&iTJg8)$3Ulw6QyQ%e_IlQOoA=AqEQ9BnCXRa zs_c?HQ?^vwFf_!o1k^SS6Qr`+!=m}&Bael;i85ACUB!VY!ZHY2K~JX~qG&!0$9tAmCw{fmlBLJE&Xu)hd(W(hMwwhlpcnBX0U5IQ- zpt?s{P#xNhDKFP;6{$a99=g~&^hc|h;LyW~YZ&lojgwN@w^+oDGW=cEdWs>HN-V`} z=ZvX&R8!8;PGlbBDx_NRJnD^Osa*-J+LF8p`~GT1?Py-9FE1fjNV*0OYscCCm}@G` zPBvqS#ECz{?Gka3UtilnNm+@L=YStzKE@VXxkfUy@Zh(c-$KA)z#EPZLZpDA#o4-q z`|fM8Zp88-N!m5WM}Y&A@UAl=kV?hT0VlHJ zjyVD0q$mEJ6sBw-13Lgc+~rMhdArQlwd7mrp_hO4Iem+2BvFO3=KHcf1v4TW5ljL< z=Q4V+Ilwg{6$-_WM%GR|C3+XH8JdI>4u@}F=_lY+bQ3U0$MGj-)^*cBa(5P z9(zKNMA~7YIh0fWb3yf1t^&lG#LcQ)Y4_CH5!TTg(n^HLM zk%x3zH-r9(we%syW{lIOrM3$dseJASW%@Olwy84rbewKgWLXP9?e9$JN?i^`$g`R>S5)K_+mOm5sNZEj@tHYX*u zjs)EF+Ea7WL{cm>0=zO?^B6?({937~PELojvGkqLxwtY-IiW=7p$j=UoyHS>9Z?U@ zs*xMT(R>$%N+V?0wHzMOR_EAbdyAqX)vk0Yw~5NuPeSSH?%OivI#k(9QK>&gM)7)j z5l=)TEPs_s6z(2gyHpx{6m8fXd^T1a?fVykw$cgiP*q({m!{Iy*32-Mrws>pEpPGM}xlfO! zRlz7SeH7+?bj0T;@f)XD(!c>ZA8H%*dEwZC&!NeP9Y@S9l_7C1&j;KkZi;YH ze5qJvU#7uwQ^IpnrnQDPT@s(n-pY;vIAW7q1f?=TZVHHMnnD``TyRh@)F@qG&R!!I zPd-d$Edm4yQi^x6a&&U_%#<8YiaSU1ZG+z3Xbcq|`-;5Af8Ha=S9TK{$5O&~oR9m_ za+zK*=`rI(=tBO5fBGv3N&bvPc9JQ<_$*mgt%k-#bAnK6g30j7S5~izNGkSF3%c&C z2MQiShw5YPE#0_HwUmnBn@jYKCTGnOes=}tkim*4*)l}I7*riVMNQN-KdvX7I-cpSnL1;UifwNSR2EFLLL*U&s1-~QWT|o;>EUUQE$^rChdKZR z=Si1J?i`L1^&peXXXH>l#N2z9FXOmG;HTEsy0N)#6j@nnGs;N?z+=CtD#TL7Nxdic_m>0;X{Gs8~BAi!k&Y6~n zYjTEJ=wOlQ)K5QSk|It04VwIX`!i2ynKJ|419rZhd7&K)l7@_ds+Tajs$G7h+AmQ; z9ZcEcsQe4qT4bxI{o?^mZ#5A@F{pCZJt!VRnXQ|h zipkf;VH@An=Ou5t7%}UQ;pXP%_0p*y)ui0&Zmec9cEd;YF#UL&G1NfLY+X@K@aTg3B95LsI8C~Q1mk7xqw2^`II zz1k2-aVA2p;^k<|>Il>yfcDW!#t~`&75Z1bt?en(NYwKsTn!s_U$|U|F9j9AyYQ;y z7dGp+pqm?gwF7cD3?5r%&w86omN#1&_{NYLwdfkjXYSBvGvqM(Fq9Q)n*sd*Z~z0y z((e}AZ^%4p>XoXLn4EF#X?yOJPQn03Kr)8D=hP&I6-* z)Sfva^b=4(#OtljBbMoOZ-$)sf11%iaT`$^9m5qw9H3?X0CMHI=X~m;* zlf{940j}A$xO6BT?(kxVt9R_2a=3mbNhAl#=x#mHd~&eLd5@g;JG>vY7XP4F(e!G{)97nhO*p z%qVOe#m)GkSdR?kYtXd1FDR!5f{gbgv24;8pbci96hK`MR$Y z_Giqjgdf_>(@CqcLDg<}Z9s3WiL3_q{#x$c2Y0%Ar5?Rs$im+{PK8G>!y#_k>~79@ z>t`Jo%(Xp_*kMQfD^t~4N2SZ{ntUlZhjkP`{Y>fF-at4>Wo%KAh2e?_r7M+Ca;o`7 zsPc_E)vFE0m^wCCDIIv$L{~A9b=5{ih&JK4KYa3qA1}!0 zH+SQV_tpXPy_&w=c>yHiogLGz%m!RX0Mf&kc^ zj{727a#Q?ve{2GVo1_Y4yfoI!&QYnn?283A(54opit@zzX!RHJs!q5-BqJO%UJd?hIrz)XRTqqKmL4a4$$1uNy( z$KKqD6VWYA83l#MQ8n{bX}rmf5)`q$b5Twa#I_iqlav!Cc2KGd$Sl{Z0m75y8rXZJ5cDG_(Y)ODS zl3&c-8rjH|Pq&39me!29rl33xH%f8~hp;(Wo9VdUMB+-c103MnQ}eztBz zZ`-(R9Fl@wg)N7g<8xC_rdX*ESoY4e+RXlt_xTig*pn+waYGqt;hZ>~z@T4kBmJG= z^ok0j_;O{glLY-UGf0KO`ymlI&WdnU$RyBDL#9_<=<9a z%&Kna`^Fj!1>Z})4P~!ZrlHEklU6Ywl75A0|G~CzLoZ_ZYlE+$YnGhfIQJ2aq1KpUfaHWOfM?n*u2(Q+PbHD8LN;<8QW>E62Rd1SywJAVmCGg5w)m}so5nT-1%(<8q@iVveoji} zR)9-f!U-3@rfFxLB;TWt95Xqd!s3^~`Uc?R7Ag&t>4279_o99M17X%-`&Y?j&xgXJ zjfv>~uu2|LJ%djoE>=fd3%=}X=weyIWb;NS_`43cPbD2rFr(Aqz{0?oQ7)c}OfIa9MhPw6W#jQXkQgt03 zt-yA5a+lX;tUhl6qw*d+y%3d31+>L230S{YTKI6CK-LJrHj=t@+T|{+cD*~(={SDX z;`FU^%hQZkhQ;t4t85H#)mh$kSipcVTn47zI2)P+Q%edtPdCyS#}rA>Sx+=NZHw7v zZfz5P>J&9E(I_~gOrphNg<3G*s4(hZgkPwgOXjE`;u%r^=@QPWv*)6d<~3*YlrI;n z0@X&WDthVyQ5|!6D>CzbTBQ`k!^E8V3J5NEAW7}~Dua$mk!yWZ zd*3rER}N*R&5jmvq1M{mUM#5UuY9r2z9I|bOFcG}lp2Ei(SlN|z)SZjV)$BsR&qh8 zPb*`5AO>b^50eWPv$RS2m8*Ir8q$cO9`;^yJva7k5kAe$L{%@$mQg9L?s1c+t!5=q zO7kY2NssFF@C6;dw{DwIc4!Zp8BR;8H`8(1aN$S=+98*6eMY+E=c%Qh7`Z99=5E2D zh>VcR{ZOJ6ul#|$iqgh0I9CQ+CvQTZ=5Q>y81=Z&J39xU>DYTmNOIPC@~IL`EIorcQ88s%QuS(n6} zZhp4`PUWx1P=EuLbIZ76RFdejtS(fS+YT@(reu67Q&L!fvC zI>Uq*yy-l|bHW$jZs&pR25BpB3IhvqDIJS)yMrdxb?cLo0w3FTlcG?gziYE-GEBph z$uZSqx#MedVa(DRKHS@Gu@`G|V_{67XA?0cWb;PN1B!tT4H~lu_ z56EpgR%wTZ2mIVK?F@_1;@pMLXCGv%_j5=a4RpSzD8#oU#qio*b!2WLQ&6se7y5J; zKFa9u3Z<>r?8*EbBZgfqY)I|Uq<1u(1X5ShE)h#m&v#maw-1V$-G8c2m%^{8e{nSqmw0de%(qt4d{UQ6a09 z<(BPoxigs%A!iQEHv{&QG`vHsgJ3w#dFt%VkzW)QBd5%Rw`ZNcuS#sOjowDxdoOT% z3h1cmr)&|l~il{N|E%mMS6yoiS&g|O|H|BDQjC1cx$jK^kV|dw_)B4mXLDS90#Te&4flgMA6)IZU$C>NSR-#L*WatkPqRLVbxU_uAu!#A5ne73TvIwn7{Gzv0%~w@`6@j~b%Ma92 zrvk`i7PP!+AA?S&lv?kT$Nwa&{x}?5*>h9D6^|obl@2d?IFjH(1W_*~`ccV1PC)BA z!vtCwxHgHLnJbDI3ihzPoELeGNv)VMi5uB4#==}yJ|^83?IV(VN~ERbjZGIL19gp; zq?gG~AoG9(w0b*IFyGsm05cAI5W-nqG{ zmmi(3P6VOx2)vV#H%3_icT0)Nj+fon6vMmEeXKM)n(RD`{7L);CDT}Do8sc_h)7CA z%n86%)*t?dvJNv#DzWK%%_og?BF7YAD%`l0AfC90BnC`~%A)hLA+&q7zg1G`YpC9IGMC_HZr0-fg-w zswj}MY7iHW&y!89DIQd^)+1=OHe$Tu9cS*_ekvez6?T4hd&(B}) zSfAbSJ{Uhg(!Hox-#tE7=FvYt+%vx(z0*qLjKqt(Z5DCa*a0s3-{0&1im6*Ny1XK` zss0)8wPpD`5%=ds{X2CPXG-?TL}|4~J?J>vpV2K(13k5-o*wZYotK=#C4mC=nU9HA z-;;!usVdxPCLiwIj}TW4UxzE_3*>BBFq;81?jXN1vM81xd^@f;KP3H%x{u^%!OG~J zjG9tcRR1BAliV$-@iRGRwxhpZYXRro<JHzG=l#pR0=H1}Rs@U9(X( z|LIeW*GlbGSLgZWhsbsLE^zM17D42)Kt#O1LXo=fodA4L#>ML5p6X8wTfA=rKWOjBA5chA3-`5=fR51V1EK7Pqq}3rWdSx;Jf#1N(!D`C(azVx_E=tBu z#%gZ*^7*1-e!20FPgWgeGdoLj$R8U|E;6p)`e!m$Pi`{qKW8siFMoe|1OC$+8LNhu zvn3g;imA0F88sREAMYxrE|yM^7YBb!s1+?OY)!?$o@Dwj5E~gg2*kq4#svhCv2$>+ z@Uj6p*+685e;ySFJ3(HabtU^P$^M7_TG|Ed=KOE#_=m$k=-*3If0>4hsgtYoZ^!0d zf5Wn55EnPgzpun!tRyVmZOtv!q{V)Ne=S_i(iQCHVs7b5_B)7b|F+EEG58(af0Kpt z{&QOYJ6ZVeF#ltEe?{}R8&-8UGsvGHNr7D){~%)j9t*CQC~*F@{C~}b=kK}vQ)g}& zuLyu)M-IK^9c=FzXv%rogALj;gV3`S1yPKfId9*5kzfm zMa7W4AN~glVtv`uiD8-4!4QLkZR$b4B$l`(dV?}?UP>T!X z1mO|Wai+m>U>DuQ+BjgOv?b-ODB##7rjX*C^@$@FHQZrB%{j3T50~4btN#i&@-{&t zHr;yc6SK~&Za!y1C}>&0NKh6B2nPxceP0_12THq2+T;h+_&cqc=LLQM?+Q%(cZ&W* z?4LOT{hgyf%lm(kq~8hoJ4yc{3fDiGm$$WWeIbP3T%z{R-S76BH~wu4vj02PsJMVF z+{|Boi$P~@YU=_fV`pLKX5l2Gvw=XIUHMpFNbLo+b}@Cfu{C#P0lQe!)Bo`#{_;x= z2-t;8KuS_dij9qvmyL}}@}-Hfv2k;}^uHVX%Q@Ri4`O2jzVsX~*T5H>ms|hnU%=lu zkmtqbrEzoq)n2gQV{nMR+QZ zf)B_BWM>1t81aDEK|G9XKw367`hSnjzexXY+7UN}m^y&1|E&5qCA^6+2pC0Qq{mlFI_zQ2>=3Ndwo{E03*2OEeD LKus;FECu*qjv_R_ literal 0 HcmV?d00001 diff --git a/main.tex b/main.tex new file mode 100644 index 0000000..68c0f5b --- /dev/null +++ b/main.tex @@ -0,0 +1,1765 @@ +\documentclass[10pt]{homework} + +\usepackage[utf8]{inputenc} + +\usepackage{amsmath} +\usepackage{amssymb} + +\usepackage[english]{babel} + +\usepackage{blindtext} +\usepackage{minted} +\usepackage{braket} + +\usepackage{longtable} + +\usepackage{parskip} + +\usepackage[open,openlevel=1]{bookmark} +\bookmarksetup{} + +\usepackage{fix-unnumbered-sections} + + +% \usepackage{scrextend} +% \deffootnote{1.5em}{0em}{\textsuperscript\thefootnotemark\,} +% \setlength{\footnotesep}{11pt} + +% \hypersetup{ +% colorlinks=true, +% urlcolor=blue, +% } + +\newcommand{\hwauthor}{JAPM} +\newcommand{\hwauthorextra}{} +\newcommand{\hwtitle}{Notes} +\newcommand{\hwshorttitle}{Notes} + +% CHANGE THESE ONLY ONCE PER CLASS +\newcommand{\hwclass}{Information Theory} +\newcommand{\hwclassshort}{Information Theory} + +\newcommand{\mysep}{\vspace{0.5em} \hrule \vspace{.1in}} +\usepackage{stackrel} + +\DeclareMathOperator*{\mysimbig}{\scalebox{2.75}{\raisebox{-0.35ex}{$\sim$}}} +\newcommand{\simover}[1]{ + \overset + {\resizebox{0.27in}{!}{#1}} + {\resizebox{0.30in}{!}{$\mysimbig$}} +} + +\newcommand{\pluseq}{\mathrel{+}=} +\newcommand{\lelem}[1]{% elementary operations (left alignment) + \begin{subarray}{l}#1\end{subarray}% +} + + +\usepackage{bbm} + +\usepackage{nicematrix} +\usepackage{tikz} +\usetikzlibrary{fit,shapes.geometric} +\tikzset{highlight/.style={rectangle, draw=black!40, semithick, inner sep=2pt}} + +\newcommand{\tikzHlcol}[2]{ + \tikz \node [highlight, fit=(1-#1) (#2-#1)] {} ; +} +\newcommand{\tikzHlrow}[2]{ + \tikz \node [highlight, fit=(#1-1) (#1-#2), inner sep=1pt] {} ; +} + +\usepackage{array} % for \newcolumntype macro +\newcolumntype{M}[1]{>{$\displaystyle\quad}p{#1}<{$}} + +\usepackage{mathtools} + +\usepackage{bm} + +% https://tex.stackexchange.com/questions/343494/ +\usepackage{xpatch} +\makeatletter +\AtBeginEnvironment{minted}{\dontdofcolorbox} +\def\dontdofcolorbox{\renewcommand\fcolorbox[4][]{##4}} +\xpatchcmd{\inputminted}{\minted@fvset}{\minted@fvset\dontdofcolorbox}{}{} +\xpatchcmd{\mintinline}{\minted@fvset}{\minted@fvset\dontdofcolorbox}{}{} % see https://tex.stackexchange.com/a/401250/ +\makeatother + +\newcommand{\indep}{\perp \!\!\! \perp} + +\newenvironment{mytable} + { % begin + \bgroup + \centering + \def\arraystretch{2.3}% 1 is the default, change whatever you need + \begin{longtable}{wr{0.34\textwidth}M{0.66\textwidth}} + } + { % end + \end{longtable} + \egroup + } + +\newenvironment{mytextcol} + { % begin + \begin{minipage}[t]{0.6\textwidth} + } + { % end + \end{minipage} + } + +% Use for publications... +%\newcommand{\entr}{\mathcal{H}} + +\usepackage[overlay,absolute]{textpos} +\newcommand\PlaceText[3]{% +\begin{textblock*}{10in}(#1,#2) %% change width of box from 10in as you wish +#3 +\end{textblock*} +}% + +\begin{document} + +\PlaceText{0.88\textwidth}{\voffset+1.6cm}{Version: \texttt{\detokenize{2023_06_03_0}}} +\mysetupheader +% TODO: fix this... +% \vspace{1cm}\phantom{M} +%%%% +The author of this document is the sole responsible for all the typos/mistakes found in it and would be grateful to receive corrections and constructive feedback in general. +\\[-20pt] +\section*{References} +\begin{itemize} + \item + Stefan Höst, \emph{Information and Communication Theory}, 1st ed., ISBN: 978-1-119-43378-1, Lund University. + +% TODO: add McEliece book !! + + \item Raymond W.\@ Yeung, \emph{Information Theory and Network Coding}, 1st ed., ISBN: 978-0-387-79234-7, CUHK. +1 + \item + Akshay Krishnamurthy, Aarti Singh, \emph{10-704 Lecture Notes}, Winter 2016-2017, CMU. + \\ + \url{https://www.cs.cmu.edu/~aarti/Class/10704_Spring15/lecs.html} + + \item + Eirik Rosnes, \emph{INF 144}, Spring 2017, UiB. + \\ + \url{https://folk.uib.no/st03333/INF144_2017/Lectures/} + +% TODO: http://isl.stanford.edu/~abbas/ee376b/lect02.pdf + \item + David Tse, \emph{EE376A Lecture Notes}, Winter 2016-2017, Stanford University. + \\ + \url{https://tselab.stanford.edu/mirror/ee376a_winter1617/lectures.html} + + \item + Stefan M. Moser, \emph{IT Lecture Notes}, version 6.9. ETHZ. + \\ + \url{https://moser-isi.ethz.ch/docs/it_script_v69.pdf} + + % https://www.icg.isy.liu.se/courses/infotheory/ + +% Robert Gallager +% https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-450-principles-of-digital-communications-i-fall-2006/lecture-notes/ + +% https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-451-principles-of-digital-communication-ii-spring-2005/readings-and-lecture-notes/ + + \item + Yury Polyanskiy, Yihong Wu, \emph{Lecture notes on Information Theory}, v.\@\! 2019-May-15, MIT LIDS. + \\ + Too advanced! (but really good), requires measure theory notions.\\ + \url{http://people.lids.mit.edu/yp/homepage/data/itbook-export.pdf} +\end{itemize} + +\section*{Probability} + +\subsection*{Basic definitions} + +\begin{mytable} + +Conditional probability: & +P(A|B) = \frac{P(A \cap B)}{P(B)} +\,;\quad +p_{X|Y}(x|y) = \frac{p_{X,Y}(x,y)}{p_Y(y)} +\\[-8pt]& +\text{given }P(B)\neq 0,\, p_Y(y)\neq 0 +\\ +Independence ($X \indep Y$): +& +p_{XY}(x,y) = p_{X}(x)\cdot p_{Y}(y) +\iff +p_{X|Y}(x|y) = p_{X}(x),\, \forall y\, (\text{with } p(y)\neq 0) +\\ + +Marginalization (total probability): & +P(X=x) = \sum_{y\in \mathcal Y} P(X=x,Y=y) = \sum_{y\in \mathcal Y} P(Y=y)\cdot P(X=x|Y=y) \\ + + & +p_X(x) = \sum_{y\in \mathcal Y} p_{XY}(x,y) = \sum_{y\in \mathcal Y} p_Y(y)\cdot p_{X|Y}(x|y) \\ + +Expectation: & +\mathbb{E}[X] = \sum_{x\in \mathcal X} x\cdot p_X(x) = \mu_X; \quad +\mathbb{E}[g(X)] = \sum_{x\in \mathcal X} g(x)\cdot p_X(x) +\\ + +Variance: & +\mathbb{V}[X] = \mathbb{E} [(X-\mu_X)^2] = \sigma_X^2 \text{, where } \mu_X = \mathbb{E}[X]\\ +Power (Mean Square): & \mathbb{E}[X^2] = \mathbb{V}[X] + (\mathbb{E}[X])^2 +\\ + +Covariance: & +\text{Cov}(X,Y) = \mathbb{E}_{XY} [(X-\mu_X)\cdot (Y-\mu_Y)] = \mathbb{E}_{XY} [X\cdot Y] -\mu_X \cdot \mu_Y +\\ + +RV linear combination: & +Y = \sum_{i=1}^{N}\alpha_n\cdot X_i \implies +\mathbb{E}[Y]=\sum_{i=1}^{N}\alpha_i\cdot \mathbb{E}[X_i] +\\ & +\mathbb{V}[Y] = \sum_{i=1}^{N}\alpha_n^2\cdot \mathbb{V}[X_n] ++ 2\cdot \sum_{i=1}^{N}\sum_{j=i+1}^{N} \text{Cov}(X_i, X_j) +\\ + +Entropy (discrete RV): & +H(X) = \mathbb{E}_X[-\log_2(p_X(X))]\text{ bits}; +\quad H_e(X) = \ln 2\cdot H(X)\text{ ``nats''} +\\[-8pt] +& H(X) = H(p_1, p_2, \cdots, p_n) = \sum_i p_i\cdot \log \frac{1}{p_i} +\\ + +Differential entropy (continuous RV): & +H_\text{Dif}(X) = \mathbb{E}_X[-\log_2(f_X(X))] = \int_{\mathbb{R}} f_X(x)\cdot\log_2 \frac{1}{f_X(x)}\, dx, \newline\phantom{M} (X \text{ with pdf } f_X) +\\\pagebreak + +Wide-Sense Stationary (WSS) process: & +E[X_i] = E[X_1]; \quad r_{XX}(n, n+k) = r_{XX}(k) +\\ +(Strongly/Strictly) Stationary process: & +p_X(x_{1},x_{2}, \cdots ,x_{n}) = +p_X(x_{1+\Delta},x_{2+\Delta}, \cdots ,x_{n+\Delta}) +\newline\phantom{M} +\text{(time shift invariance)} +\\ + +Ergodic process: &\text{WSS and (in mean) } +\hat\mu_X = \langle X_i \rangle_N = \frac{1}{N}\sum^N{X_i} = \mathbb{E}[X_0] = \mu_X\\ +% &\text{(in autocovariance) }\newline\phantom{MM} \hat r_{XX}(\tau) = (????) = \mathbb{E}[(X_t-\mu_{X_t})(X_{t-\tau}-\mu_{X_{t-\tau}})] = r_{XX}(\tau) +% \\ +\end{mytable} + +\subsection*{Basic properties} + +\begin{mytable} +Bayes' theorem: & +p_{X|Y}(x|y) = +\frac{p_{Y|X}(y|x)\cdot p_X(x)}{p_Y(y)} = +\frac{p_{Y|X}(y|x)\cdot p_X(x)}{ + \sum\limits_{x^\prime\in\mathcal{X}} p_{Y|X}(y|x^\prime)\cdot p_X(x^\prime) +} +\\ + +Chain rule: & +P(X_1,X_2, \dots, X_n) = P(X_1,X_2, \dots, X_{n-1}) +\cdot P(X_n|X_1, X_2, \dots, X_{n-1}) +\\ + & += \cdots = +\overbrace{\underbrace{P(X_1)\cdot P(X_2|X_1)}_{P(X_1,X_2)} +\cdot P(X_3|X_1,X_2)}^{P(X_1,X_2,X_3)} \cdots + P(X_n|X_1, X_2, \dots, X_{n-1}) +\\[-4pt] + & += \prod_{i=1}^n P(X_i | X_1, X_2, \dots, X_{i-1}), \quad \text{(convention: first factor is $P(X_1)$)} +\\ + +Markov's inequality: & +P(X>a) \le \frac{\mathbb{E}[X]}{a} +\quad \text{($X$ is a nonnegative RV)} +\\ +Chebyshev's inequality: & +P\left(\left|X-\mathbb{E}[X]\right|>\varepsilon\right) \le \frac{\mathbb{V}[X]}{\varepsilon^2}; +\quad +P\left(\left|X-\mathbb{E}[X]\right|>\frac{\sigma_X}{\varepsilon}\right) \le \varepsilon^2 +\\ +Weak Law of Large Numbers (LLN): & Y_N = \frac{1}{N}\sum_{i=1}^{N} X_i \quad (X_i \text{ i.i.d.}); +\quad +P\left(\left|Y_N-\mathbb{E}[X]\right|>\varepsilon\right) \le \frac{\mathbb{V}[X]}{\varepsilon^2} +\\& +\lim_{N\to\infty} P\left(\left|Y_N-\mathbb{E}[X]\right|<\varepsilon\right) = 1; +\quad +Y_N \xrightarrow[]{P} \mathbb{E}[X] +\\ +Convergence in probability: +& \text{[TODO]} +\\ +Central Limit Theorem (CLT): & Y_N = \frac{1}{N}\sum_{i=1}^{N} X_i \quad (X_i \text{ i.i.d., } \mathbb{E}[X_i] = \mu,\,\mathbb{V}[X_i]=\sigma^2) +\\& +\frac{Y_N-\mu}{\sigma/\sqrt{N}} +\,\simover{$(N\to\infty)$}\, +\mathcal{N}(0,1) +\\ +Convexity:& +f(\lambda \cdot x_1+(1-\lambda) \cdot x_2) \le + \lambda\cdot f( x_1 ) + (1-\lambda)\cdot f( x_2 ),\, \forall\lambda \in [0,1] +\\ +Concavity:& \text{$f$ is convex $\iff$ $-f$ is concave} +\\ + +Jensen's inequality: & g \text{ is convex} \implies \mathbb{E}[g(X)] \ge g(\mathbb{E}[X]) +\\ + +Log-sum inequality: & \sum_i a_i \cdot \log \frac{a_i}{b_i} \ge +A \cdot \log \frac{A}{B}; \quad A =\sum_i a_i; \, B =\sum_i b_i; \, a_i\ge 0,\,b_i\ge 0 +\\ + +IT inequality: & \log_B (r) \le (r-1)\cdot \log_B(e); \\[-10pt]& +\text{``$\log x$ is concave so it lies below its tangent line at $(1,0)$''} +\\ + +\end{mytable} + +\newpage +\subsection*{Discrete Distributions} + +Hint -- Use combinatorics: \url{https://en.wikipedia.org/wiki/Urn_problem} + +\begin{mytable} + +Uniform (discrete): & X\sim U(n),\quad +p_X(x) = \frac{1}{|\mathcal{A}|}\cdot \mathbbm{1}_\mathcal{A}(x),\quad \mathcal{A} = \{0,\dots, n-1\} \subset \mathbb{N}, \, |\mathcal{A}| = n +\\ +& \mathbb{E}[X] = \frac{n-1}{2}; \quad \mathbb{V}[X] = \frac{n^2-1}{2}; +\quad H(X) = H(p, p, \dots, p) = \log_2 p +\\ + +Bernoulli:&\text{``heads or tails (loaded coin), black/white ball, success/failure, 1 or 0, ...''} +\\[-8pt] +& +X\sim\text{Be}(p); \quad +P(X=\text{``1''}) = p = 1 - P(X=\text{``0''}) +\\[-12pt] +& \mathbb{E}[X] = p; \quad \mathbb{V}[X] = p\cdot (1-p); +\quad H(X) = h_2(p) = H(p, 1-p) +\\[-6pt] +&\begin{mytextcol} +``Probability of $k$ successes (with prob.\,$p$) in $n$ attempts'' = \\= ``Probability of a binary string of length $n$ with $k$ ones'' =\\ = $P(\bm X=\bm x)=p^k\cdot (1-p)^{n-k}$ +\end{mytextcol} +\\ +Geometric: & +\text{``infinite attempts where the first success is at the }k\text{-th attempt''} +\\ & +X\sim\text{Ge}(p), +\quad p=P(X=\text{``}\underbrace{\text{00...01}}_k\text{...''}) +\\ +& +p_X(k)= p\cdot (1-p)^{k-1}; +\quad +\mathbb{E}[X] = \frac{1}{p}; \quad \mathbb{V}[X] = \frac{1-p}{p^2}; \quad H(X) = \frac{h_2(p)}{p} +\\ +Binomial: +& +\text{``number of ones (successes) out of }n\text{ attempts (with replacement)''} +\\& +X\sim \text{Bin}(n,p), +\quad p=P(X_{\text{Bern}(i)}=\text{``1''}) +\\& +p_X(k) = \binom{n}{k}\cdot p^k\cdot (1-p)^{n-k} +\\& +\mathbb{E}[X] =n\cdot p ; \quad \mathbb{V}[X] = n\cdot p\cdot (1-p); \\& H(X) = \frac{1}{2}\cdot \log_2\left(2\pi \cdot e\cdot n\cdot p\cdot (1-p)\right) + \mathcal{O}(1/n) +\\ + +Hypergeometric: +& +\begin{mytextcol} +``number of black balls (successes) out of $n$ total balls drawn \emph{without replacement} from an urn with initially $N$ balls, of which $K$ are black''. +\end{mytextcol} +% +\\& +X\sim \text{Hypergeometric}(N,K,n);\quad \left(n=1\implies X\sim Be(p), \, p=K/N\right) +\\& +p_X(k) = \frac{\binom{K}{k}\binom{N-K}{n-k}}{\binom{N}{n}} +;\quad \mathbb{E}[X] =n\cdot \frac{K}{N} +\\& +\begin{mytextcol} +\footnotesize{Sometimes `hypergeometric' refers to the multivariate hypergeometric distribution (multiple colors), here we refer only to the \emph{bivariate} distribution (2 colors: black and white). Also, this is different from the `multinomial distribution' which is the `with replacement' case, the generalization of the binomial distribution.} +\end{mytextcol} +\end{mytable} + +%\pagebreak +\subsection*{Continuous Distributions} +\begin{mytable} +Uniform (continuous): & X\sim U(a, b),\quad +f_X(x) = \frac{1}{\Delta}\cdot \mathbbm{1}_\mathcal{A}(x),\quad \mathcal{A} = [a, b] \subset \mathbb{R}, \, \Delta = b - a +\\ +& \mathbb{E}[X] = \frac{a+b}{2}; \quad \mathbb{V}[X] = \frac{\Delta^2}{12}; \quad H_\text{Dif}(X) = \log_2 \Delta +\\ + +Gaussian distribution: & X \sim \mathcal N(\mu, \sigma), \,\sigma > 0 +\\ +& f_X(x)=\frac{1}{\sqrt{2\pi\sigma^2}}\cdot +\exp\left[-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2\right] +\\ +& \mathbb{E}[X] = \mu; \quad \mathbb{V}[X] = \sigma^2; \quad H_\text{Dif}(X) = +\frac{1}{2} +\log_2(2\pi\cdot e\cdot \sigma^2) +\\ + +Exponential distribution: & X \sim \text{Exp}(\lambda), \,\lambda > 0 \implies +f_X(x) = \lambda \cdot e ^ {-\lambda x}, \, x\ge 0 +\\ +& \mathbb{E}[X] = \frac{1}{\lambda}; \quad \mathbb{V}[X] = \frac{1}{\lambda^2} +\\ +\end{mytable} + +\newpage +\section*{Information and entropy} + +\begin{mytable} +Hartley's information measure: & I_H(X) = \log k = \log |\mathcal{X}| +\\[-8pt]& +\text{Number of possible outcomes (disregarding probabilities).} +\\[-8pt]& +I_H(X_1,X_2, \cdots, X_n) = \log k^n = n\cdot I_H(X) +\\ +Mutual information (between events): +& I(A;B) = \log \frac{P(A|B)}{P(A)} = \log \frac{P(A\cap B)}{P(A)\cdot P(B)} = I(B;A) +\\ +Bounds on $I(A;B)$: +& +-\infty \le I(A;B) \le \min \{ + -\log P(A), \, -\log P(B) +\} +\\& +\begin{cases} + P(A|B) = 0 \implies + ( B \Rightarrow \lnot A ) \implies + A \cap B = \varnothing \implies + I(A;B) = -\infty +\\ + P(A|B) = 1 \implies + ( B \Rightarrow A ) \implies + A \subseteq B \implies + I(A;B) = -\log P(A) +\\ + P(B|A) = 1 \implies + ( A \Rightarrow B ) \implies + B \subseteq A \implies + I(A;B) = -\log P(B) +\\ + P(A|B) = P(A) \implies + A \indep B \implies I(A;B) = 0 +\end{cases} +\\ +Self-information (of an event):& +I(A) = I(A;A) = -\log P(A) \ge 0 +\\ +Entropy (of a discrete RV):& +H(X) = \mathbb{E}_x[I(X=x)] = \mathbb{E}_X[-\log p_X(X)] = -\sum_x p_X(x)\cdot \log p_X(x) +\\[-12pt]& +H(X)\ge 0 +\\[-8pt]& +\text{Notation: }H(p_1, p_2, \cdots, p_n) = -\sum_i p_i\cdot \log p_i, \,\,\, \text{ given } \sum_i p_i = 1, p_i\ge 0 +\\ +Binary entropy function: +& h_2(p) = H(p, 1-p),\, 0\le p\le 1; \, \quad \frac{\partial h_2(p)}{\partial p} = \log_2 \frac{1-p}{p} = -\mathrm{logit}_2\,p +\\ +Entropy bounds:& 0 \le H(X) \le \log k, \,\, k = |\mathcal{X}| +\\& +H(X)=0 \Rightarrow X\text{ deterministic};\,H(X)=\log k \Rightarrow X\sim\text{Uniform}(k) +\\& +H(X) \le \log k = I_H(X=x) = H\left(\frac{1}{k}, \frac{1}{k}, \cdots, \frac{1}{k}\right) +\\ +Property for 3 outcomes: +&H(p_1,p_2,p_3) = h(p_1) + (1-p_1)\cdot h\left(\frac{p_2}{1-p_1}\right), \text{ given }p_1\neq 1 +\\ +Property for $N$ outcomes: +&H(p_1,p_2,\cdots,p_N) = h(p_1) + (1-p_1)\cdot H\left(\frac{p_2}{1-p_1},\frac{p_3}{1-p_1},\cdots \frac{p_N}{1-p_1} \right) \newline\phantom{M}\text{given }p_1\neq 1 +\\ +Permutation invariance: +& +\begin{mytextcol} +$H(X) = H(f(X))$, for $f$ injective (usually bijective, i.e.~invertible)\\ (a.k.a.\@ invariance under relabeling)\\[5pt] +In general: $H(X) \ge H(f(X))$ +\end{mytextcol} +\\ +Joint entropy: & +H(X,Y) = \mathbb{E}_{XY}[-\log p(X,Y)] = -\sum_x \sum_y p_{XY}(x,y)\cdot \log p_{XY}(x,y) +\\[-8pt] & +H(X_1,X_2, \cdots, X_n) = \mathbb{E}_{\bm X}[-\log p(\bm X)] +\\ +Entropy conditioned on an outcome: & +H(X|Y=y) = H(X|\{Y=y\}) = - \sum_x p_{X|Y}(x|y) \cdot \log p_{X|Y}(x|y) \\ +Conditional entropy: & +H(X|Y) = \mathbb{E}_y[H(X|\{Y=y\})] = \sum_y H(X|\{Y=y\}) \cdot p_Y(y) +\\& +H(X|Y) = \mathbb{E}_{XY}[-\log p(X,Y)] = -\sum_x \sum_y \underbrace{p_{XY}(x,y)}_\text{!!!}\cdot \log p_{X|Y}(x|y) +\\\pagebreak +Mutual information (between RVs): +& I(X;Y) = \mathbb{E}_{XY}[I(X=x;Y=y)] = I(Y;X) +\\ & +I(X;Y) = H(X) - H(X|Y) = H(X) + H(Y) - H(X,Y) +\\& +\phantom{MM}\includegraphics[width=0.3\textwidth]{img_Polyanskiy_mutual.pdf} \text{\footnotesize{(source: Y.\@ Polyanskiy IT\,lectures)}} +\\ & +I(X;Y) =\mathbb{E}_{XY}\hspace{-1.4mm}\left[\log \frac{p_{XY}(x,y)}{p_X(x)\cdot p_Y(y)}\right] = \sum_{x,y}p_{XY}(x,y)\cdot \log\frac{p_{XY}(x,y)}{p_X(x)\cdot p_Y(y)} +\\ +Conditioning reduces entropy: & +I(X;Y) = H(X) - H(X|Y) \ge 0 \Rightarrow H(X) \ge H(X|Y) \text{, equal iff }X\indep Y +\\ +RV self-information is entropy: & +H(X|X) = 0 \implies I(X;X) = H(X) +\\ +Mutual info.\,is bounded by entropy: +& H(X) \ge H(X) - H(X|Y) = I(X;Y) +\\[-8pt]&\phantom{M} +\implies I(X;Y) \le \min\{H(X),H(Y)\} +\\[-8pt]& +\text{Also: } \max_{p_X(x)} H(X) = \log |\mathcal X| = I_H(X) \\[-8pt]&\phantom{M} +\implies +I(X;Y) \le \min\{\log |\mathcal X|,\log |\mathcal Y|\} +\\ +Conditional mutual information: & +I(X;Y|Z) = +\mathbb{E}_{XYZ}\left[\log + \frac{p(X,Y|Z)}{p(X|Z)p(Y|Z)} +\right] = +\sum_{x,y,z} p(x,y,z)\log + \frac{p(x,y|z)}{p(x|z)p(y|z)} +\\& +I(X;Y|Z) = + H(X|Z)+H(X|Z)-H(X,Y|Z) +\\& +I(X;Y|Z) = + H(X|Z)-H(X|Y,Z)=H(Y|Z)-H(Y|X,Z) +\\ +Chain rule for entropy: & +H(X,Y) = H(X|Y) + H(Y) = H(Y|X) + H(X) \le H(X) + H(Y) +\\& +H(X_1, X_2, \cdots, X_n) = \sum_{i=1}^n H(X_i|X_1,X_2,\cdots X_{i-1}) \le \sum_{i=1}^n H(X_i) +\\ +KL divergence (a.k.a.\,relative entropy): +& +D(p||q) = \mathbb{E}_p\left[\log\frac{p(X)}{q(X)}\right] = +\sum_x p(x) \cdot \log\frac{p(x)}{q(x)} \ge 0 +\\& +\text{in general:} +\begin{cases} +\text{no symmetry,}& + D(p||q) \neq D(q||p)\\ +\text{no triangle inequality,}& +D(p||q) + D(q||r) \ngeq D(p||r) +\end{cases} +\\ +Mutual information as KL div.: +& +I(X;Y) = D(p_{XY}(x,y)||p_X(x)\cdot p_Y(y)) \ge 0 +\\ +Entropy rate (of a random process): & +H_\infty(X) = \lim_{n\to\infty}\frac{1}{n}\cdot H(X_1,X_2,\cdots,X_n) +\\ +\emph{Alternative} entropy rate: & +H(X|X^\infty) = \lim_{n\to\infty} H(X_n|X_1,X_2,\cdots,X_{n-1}) +\\[-12pt] +&\text{Equality in stationary processes: } H_\infty(X) = H(X|X^\infty) +\\ +Bounds for stationary processes: & +0\le H_\infty (X)\le H(X) \le \log k; \quad H(X) = H(X_i) +\end{mytable} + +\section*{Markov chains} + +\begin{mytable} + +Markov property: & +P(X_i | X_{i-1}, \cdots, X_1) = P(X_i|X_{i-1}),\,\,\forall i>1\\ +&\text{The RVs } X_i\text{ are in a total order (\emph{chain}): } X_1\to X_2 \to \cdots \to X_N +\\ +Time-invariant (TI) Markov chains: & +p(x_i|x_{i-1}) = p(x_{i+\ell}|x_{i-1+\ell}) \text{ (assumed in general)} +\\ +Chain rule for Markov chains: & +P(X_1,X_2, \dots, X_N) = \prod_{i=1}^n P(X_i | X_1, X_2, \dots, X_{i-1}) = +\\ + & += P(X_1)\cdot \prod_{i=2}^N P(X_i | X_{i-1}) +\stackrel{\text{(TI)}}{=} + P(X_1)\cdot \left( P(X_2 | X_{1})\right)^{N-1} +\\ + & +H(X_1,X_2,\cdots,X_N) = + H(X_1) + \sum_{i=2}^{N} H(X_i|X_{i-1}) +\newline\phantom{MMMMMMMM...} +\stackrel{\text{(TI)}}{=} +H(X_1) + (N-1)\cdot H(X_2|X_{1}) +\\ +State transition matrix:& +\mathcal P = [p_{ij}]; \quad p_{ij} = p_{X_2|X_1}(x_i|x_j), \quad i\text{-th row adds up to }1 +\\[-6pt]& +\pi_k^{(n)} = P(X_n=x_k), +\quad \bm{\pi}^{(n+1)} = \bm{\pi}^{(n)}\cdot \mathcal P +\quad \text{($\bm \pi^{(n)}$ are row vectors)} +\\ + +Asymptotic distribution:& +\bm{\pi} = \lim_{n\to \infty} \bm{\pi}^{(n)} = \lim_{n\to \infty} \bm{\pi}^{(0)} \cdot \mathcal P^n; \quad \lim_{n\to \infty} \mathcal P^n = +\bgroup\def\arraystretch{1} +\begin{pmatrix} + \bm{\pi} \\ + \vdots \\ + \bm{\pi} \\ +\end{pmatrix}\egroup ; \quad \bm\pi\cdot \mathcal P = \bm\pi +\\ +Existence (and uniqueness) of $\bm\pi$: +& \exists n_0 >0 \text{ s.t.\,all entries of } \mathcal P^{n_0} \text{ are strictly positive (nonzero)} \implies \exists!\bm \pi +\\[-12pt] & +\text{(particular case of Perron–Frobenius thrm.\@ for strongly connected graphs)} +\\ + +Computation of $\bm\pi$: +& +\begin{cases} + \bm\pi \cdot (\mathcal P-I) = 0 & \text{ (rank $N-1$ due to $\mathcal P$'s nullspace dimesnion)} \\ + \sum_k \pi_k = 1 & \text{ (extra equation needed for full rank)} +\end{cases} +\\ +$H_\infty(X)$ of a stationary Markov chain: & +H_\infty(X) = H(X_2|X_1)= +\\[-12pt]& +H_\infty(X) = \sum_i \bm\pi_i\cdot H(X_2|X_1=x_i); \quad +H(X_2|X_1=x_i)=-\sum_j p_{ij}\log p_{ij} +\\[-12pt]& +H_\infty(X)\text{ is the min.\,average info required to encode one transition.} +\\ +Data processing lemma: +& X\to Y\to Z \text{ is a Markov chain} \implies +\begin{cases} + I(X;Z) \le I(X;Y) \\ + I(X;Z) \le I(Y;Z) +\end{cases} +\\ +& +I(X; Z) = H(X) - H(X|Z) \le H(X) - H(X|YZ) =\newline\phantom{MMMMMMMMMMM.M}= H(X) - H(X|Y) = I(X;Y) +\end{mytable} +\section*{Source coding} + +\begin{mytable} +Compression ratio: & R = \frac{\#\text{Source bits}}{\#\text{Compressed bits}}, \quad \text{($R > 1$, except in pathological cases)} +\\ +Classification of source codes: & +\begin{mytextcol} +\begin{itemize} + \item Non-singular codes: coding is injective (lossless compression) + + $\bm x_1 \neq \bm x_2 \implies C(\bm x_1) \neq C(\bm x_2)$ + \item Uniquely decodable codes: sequence of symbols is unambiguously decodable (\emph{extension} code is non-singular) + + Sequences $\bm x_1^n \neq \bm x_2^m \implies C_{ext}(\bm x_1^n) \neq C_{ext}(\bm x_2^m)$ + + where $C_{ext}(\bm x^n) = \text{Concat}(C(\bm x_{(1)}),...,C(\bm x_{(n)}))$ + \item Prefix codes: no codeword is prefix of any other codeword. +\end{itemize} +\end{mytextcol} +\\ +Average codeword length: & +L=\mathbb{E}[\ell_x] = \sum_x p_X(x)\cdot\ell_x +\\ +Path length lemma: & +L=\sum_{n_i\in\text{\,``inner nodes''}} p(n_i) +\quad \text{ (valid for prefix codes)} +\\ +Kraft inequality: & +\sum_{i=1}^k D^{-\ell_i}\le 1 \text{ (valid for }D\text{-ary prefix codes)} +\\[-4pt] & +\text{all prefix codes must obey this constraint.} +\\ +McMillan inequality: & +\begin{mytextcol} +Kraft inequality also holds for $D$-ary uniquely decodable codes.\\ +Therefore, those are also subject to the same constraint. +\end{mytextcol} +\\ +$L$, lower bound: & +L \ge H_D(X) = \frac{H(X)}{\log_2 D} +\\ & \text{equality if all }\ell_x =-\log_D p(x) +\text{ (only possible if all are integers)} +\\ +$L$, achievable upper bound: & + L < H_D(X)+1 \text{ (for some }D\text{-ary prefix code, guaranteed to exist)} +\\ +Shannon-Fano code & +\begin{mytextcol} +Prefix code with $\displaystyle\ell_{x_i} = \lceil -\log_D p(x_i) \rceil $ +\\[8pt] +$L$ below upper bound, may have unused leaves, not optimal in general +\end{mytextcol} +\\ +Fano code: & +\begin{mytextcol} + ``Binary partition into sets as equiprobable as possible''.\\[3pt] + $L$ is similar to Shannon-Fano code.\\[3pt] + Provided sorted probabilities: $ p_1\ge p_2\ge\cdots\ge p_k$, \\[3pt] + recursively split at index $q$ that minimizes: + $\displaystyle + \left|\sum_{i=1}^q p_i - \sum_{i=q+1}^k p_i\right| + $ +\end{mytextcol} +\\ + +Huffman code: & +\begin{mytextcol} +Tree algorithm: replace smallest (least prob.)\@ 2 nodes with an inner node with their sum; repeat until only one node (root) remains.\\[3pt] +Optimal: no better binary prefix code in terms of $L$. + +Note: [verify this claim??] if an optimal code is built using a distribution $q(x)$, but it is applied to a source with a distribution $p(x)$, then the resulting average codeword length is equal to the cross-entropy (used in MachLearn field): $L = \mathbb{E}_p[q] = - \sum p(x) \log q(x)$ +\end{mytextcol} +\\ + +Optimal code for i.i.d.\,sequences: & +\bm{X} = (X_1,\cdots,X_n); \quad \bm{x} \xmapsto[]{\text{Huff}_n} +\bm{y}; \quad |\bm{y}| = \ell^{(n)}_{\bm{x}}; \quad L^{(n)} = \mathbb{E}[\ell^{(n)}_{\bm{x}}] +\\& +H(X_1,\cdots X_n) \le L^{(n)} \le H(X_1,\cdots X_n) + 1 +\\& +\xRightarrow[]{\text{i.i.d}} +n\cdot H(X_1) \le L^{(n)} \le n\cdot H(X_1) + 1; \quad H(X) = H(X_1) +\\ +Optimal $L$ for i.i.d.\,sequences: & +H(X) \le L \le H(X) + \frac{1}{n} +;\quad +\left[L= +\frac{\mathbb E[\ell_{\bm x}]}{n}=\frac{L^{(n)}}{n}\right] +\\& +\lim_{n\to \infty} L = H(X) \quad \text{(length per symbol $L$ is optimized as $n$ increases)} +\\ +Optimal $L$ for ergodic processes: & +H(X_1,\cdots X_n) \le L^{(n)} \le H(X_1,\cdots X_n) + 1 +\\& +\implies \frac{1}{n}\cdot H(X_1,\cdots X_n) \le L \le \frac{1}{n} \cdot H(X_1,\cdots X_n) + \frac{1}{n} +\\& +\implies \boxed{\lim_{n\to \infty} L = H_\infty(X)} \le H(X) = H(X_1) \\[-6pt] +&\begin{mytextcol} +Better than i.i.d.\\(cannot be attained by Huffman codes unless symbols are grouped) +\end{mytextcol} +\\\end{mytable} + +\newpage +\section*{Universal compression/source coding} +A source code is universal if it can be constructed without knowledge of the statistics of the +source. +\begin{mytable} +LZ77 codeword: & +\bm y = (j,l,c); \text{ where:}\begin{cases} + j,&\text{match offset (in $S$, from the right)}\\ + l,&\text{match length (can overlap $B$)}\\ + c,&\text{character after the match (in $B$)} +\end{cases} +\\& +\begin{cases} +\text{no match }&\implies j=0,\, l=0,\, c=\text{``first char in $B$''} +\\ +\text{match }&\implies j>0,\, l>0 +\end{cases} +\\[-4pt]& +\text{If match, advance buffers +$l+1$ +chars (otherwise: $1$ char)} +\\[-4pt] +LZ77 codeword length: & +\ell_{\bm y} = \ell((j,l,c)) = +\lceil \log(S+1) \rceil + +\lceil \log(B+1) \rceil + +|c| +\\[-8pt] +& |c| = \lceil \log k \rceil \text{; when }c\in \text{``ASCII 8-bit'', }|c| = \log_2 2^8 =8 \text{ bits} +\\ +LZ77 total compressed size: & + \underbrace{S \cdot |c|}_\text{initial buffer} + \sum_i \ell_{\bm{y}_i} = {S \cdot |c|} + N_{cw} \cdot \ell_{\bm{y}}, \quad N_{cw} = \text{num.\,of codewords} +\\ +LZSS codeword: & +\bm y = \begin{cases} + (0,j,l),&\text{match}\\ + (1, c),&\text{no match}\\ +\end{cases} +; \text{ where:}\begin{cases} + j,&\text{match offset (never $0$)}\\ + l,&\text{match length (never $0$)}\\ + c,&\text{first char in $B$} +\end{cases} +\\[-4pt]& +\text{If match, advance buffers $l$ chars (otherwise: $1$ char)} +\\ +LZSS codeword length: & \ell_{\bm{y}} = +\begin{cases} + \text{match:} & + 1 + \lceil \log(S+1) \rceil + + \lceil \log(B+1) \rceil + \\ + \text{no match:} & + 1 + |c| +\end{cases} +\\ +LZSS total compressed size: & + \underbrace{S \cdot |c|}_\text{initial buffer} + \sum_i \ell_{\bm{y}_i} = + S \cdot |c| + + N_\text{match} \cdot \ell_{\text{match}} + + N_\text{no-match} \cdot \ell_{\text{no-match}} +\\ +LZ78: & [...] \text{Dictionary} [...] +\\ +LZW: & [...] \text{Dictionary with preinitialization (usually with the 1-char entries)} [...] +\end{mytable} + +\newpage +\section*{Asymptotic Equipartition Property or Principle (AEP)} +\begin{mytable} +AEP concept: & +\begin{mytextcol} + There exists some class/set of sequences $A_\varepsilon^{(n)}$ of length $n$ called \emph{typical} such that, as $n$ increases ($n\to\infty$), they become almost sure ($P\to 1$), and, at the same time, they remain a negligible portion (as $\varepsilon\to 0$) of the total set of sequences of length $n$, $\mathcal X^{(n)}$, with the only exception of the sequence generated by the uniform distribution with support $\mathcal X$.\\ + Incidentally, sequences that are individually the most probable tend not to be in $A_\varepsilon^{(n)}$, i.e. are \emph{atypical} (same for the \emph{least} probable ones). + \\ + The typical sequences are defined as precisely those that enable the convergence to $H(X)$ using Weak LLN (i.e.\@ convergence in probability). + +% TODO: It also happens that those sequences tend to not contain +% the most probable words nor the least probable words. +% (see Sergio Verdú video). +\end{mytextcol} +\\ +Sequence of $n$ i.i.d.\@ RVs: & +\begin{mytextcol} +``$\bm X$ is i.i.d'' means the sequence of $n$ RVs $\bm X = (X_1, X_2, \cdots, X_i, \cdots, X_n)$\\ with $\bm X \in \mathcal{X}^n$ has the property: $X_{i}, X_{j}$ are i.i.d., $\forall i,j; i \neq j$\\ (i.e.\@ pairwise i.i.d.) +\end{mytextcol} +\\ +Set of $\varepsilon$-typical sequences for i.i.d.\@ RVs: & +\text{Given a sequence $\bm X$ of $n$ i.i.d.\@ $n$ RVs, for each and $\varepsilon>0$ define:} +\\[-8pt]& +A_\varepsilon^{(n)} (X) = \left\{ + \bm x = (x_1,x_2,\cdots,x_n): +\, +\left| +-\frac{1}{n}\log p_{\bm X}(\bm x)-H(X) +\right|\le \varepsilon +\right\} = +\\& +A_\varepsilon^{(n)} (X) = \left\{ + \bm x = (x_1,\cdots,x_n): +\, +2^{-n\cdot(H(X)+\varepsilon)} +\le p_{\bm{X}}(\bm x) +\le +2^{-n\cdot(H(X)-\varepsilon)} +\right\} +\\ +AEP (Weak LLN redux): & +\text{Because $\bm X$ are i.i.d.:}\, -\frac{1}{n}\log p_{\bm X}(\bm X) = +-\frac{1}{n}\log \prod_{i} p_{X}(X_i) \stackrel[n\to\infty]{P}{\longrightarrow} H(X) +\\& +\forall\varepsilon>0,\, \exists n_0:\, \forall n>n_0 \implies +P(``\bm x \in A_\varepsilon^{(n)} (X) ") \ge 1-\varepsilon +\\& +\text{Main idea: } \underset{(n,\,\varepsilon)\to(\infty,\,0^+)}{\text{lim}^*}\, P(``\bm x \in A_\varepsilon^{(n)} (X) ") = 1 +\\[-0.3cm]&\text{\footnotesize{${}^*$Note: this is not actually a limit...}} +\\[-0.3cm]& +\begin{mytextcol} +Also we have (as $\varepsilon\to 0$): $p_{\bm X}(\bm x)|_{\bm x\in A_\varepsilon^{(n)}} +\approx 2^{-n H(X)};\quad p_{\bm X}(\bm x)|_{\bm x\notin A_\varepsilon^{(n)}} \approx 0$ +\\ (i.e.\@ sequences inside the typical set tend to be equiprobable and sequences outside of it tend to be impossible) +\end{mytextcol} +\\ +Size of $A^{(n)}_\varepsilon (X)$ is negligible: & +(1-\varepsilon)\cdot 2^{n\cdot (H(X)-\varepsilon)} +\le +|A_\varepsilon^{(n)} (X)| +\le +2^{n\cdot (H(X)+\varepsilon)},\, \forall n\ge n_0 +\\& +\implies \underset{(n,\,\varepsilon)\to(\infty,\,0^+)}{\text{lim}^*}\frac{|A_\varepsilon^{(n)} (X)|}{|\mathcal{X}^{n}|} = +\lim_{n\to\infty}\frac{2^{n\cdot H(X)}}{2^{n\cdot \log_2 |\mathcal{X}|}} = +\begin{cases} + 1, &\text{ if $X$ is uniform} \\ + 0 ?, &\text{ otherwise} +\end{cases} +\\[12pt] +AEP generalizations: & +\text{AEP holds for \emph{any} $\bm X$} +\begin{cases} + \text{i.i.d.} & \text{as shown here} \\ + \text{independent} & \text{not shown here} \\ + \text{ergodic} & \text{using next definition of $A_\varepsilon^{(n)}$} \\ +\end{cases} +\\ +$A_\varepsilon^{(n)}$ for ergodic $\bm X$: & +A_\varepsilon^{(n)} (X) = \left\{\bm x = (x_1,x_2,\cdots,x_n): +\, +\left| +-\frac{1}{n}\log p_{\bm X}(\bm x)-H_\infty(X) +\right|\le \varepsilon +\right\} +\\ +Set of jointly $\varepsilon$-typical sequences: & +A_\varepsilon^{(n)} (X,Y) = +\left\{(\bm x,\bm y): +\, +\bm x \in A_\varepsilon^{(n)} (X),\, +\bm y \in A_\varepsilon^{(n)} (Y) +\right\} \cap +\newline\phantom{MMMMMM} +\cap\left\{(\bm x,\bm y): +\, +\left| +-\frac{1}{n}\log p_{\bm{XY}}(\bm{xy})-H(X,Y) +\right|\le \varepsilon +\right\} +\\ +$A_\varepsilon^{(n)} (X,Y)$ is defined for:& +\text{i.i.d.\,$\bm X$ and i.i.d.\,$\bm Y$ (but $\bm X,\bm Y$ could be jointly not i.i.d.)} +\\ +Properties of $A_\varepsilon^{(n)} (X,Y)$: & +\text{AEP: } +P +\left((\bm{x},\bm{y}) \in A_\varepsilon^{(n)} (X,Y) +\right) \ge 1-\varepsilon, +\,\, \forall n\ge n_0 +\,\, \forall \varepsilon>0 +\\ & +(1-\varepsilon)\cdot 2^{n\cdot (H(X,Y)-\varepsilon)} +\le +\left| + A_\varepsilon^{(n)} (X,Y) +\right| +\le +2^{n\cdot (H(X,Y)+\varepsilon)},\, \forall n\ge n_0 +\\ +When $\bm X \indep \bm Y$:& +(1-\varepsilon)\cdot 2^{-n\cdot (H(X,Y)+3\cdot\varepsilon)} +\le +P\left( + (\bm x,\bm y)\in A_\varepsilon^{(n)} (X,Y) +\right) +\le +2^{-n\cdot (H(X,Y)-3\cdot\varepsilon)} +\\[-12pt]& +\forall n\ge n_0. \quad \text{Meaning of }\text{``}\bm X \indep \bm Y \text{'' here} +%\Longleftrightarrow +:\quad +p_{\bm{XY}}(\bm x,\bm y) + = p_{\bm{X}}(\bm x)\cdot p_{\bm{Y}}(\bm y) +% \\[-10pt]& +\end{mytable} +\section*{Source Coding Theorem} + +\begin{mytable} +Source Coding Theorem for i.i.d.\@$\bm X$: & \forall \delta >0,\, \exists n_0 \text{ s.t. } + n\ge n_0 \implies \boxed{ \frac{L^{(n)}}{n} \le H(X) + \delta} +\\[-8pt]& +\begin{mytextcol} +assuming i.i.d.\@ sequences of $n$ RVs $\bm X = (X_1, X_2, \cdots,X_i,\cdots, X_n)$ and $L^{(n)} = \mathbb E [\ell_{ +\bm x}]$ for some source code that is guaranteed to exist. +\end{mytextcol} +\\[8pt] +Proposed optimal source code:& +\begin{mytextcol} +Given $\varepsilon$ and $n$, add prefix bit $1$ if $\bm x\in A_\varepsilon^{(n)}$ or $0$ otherwise. +Then, concatenate with index in set if $\bm x\in A_\varepsilon^{(n)}$ or with index in $\bm X$ otherwise. +\\[5pt] +With that, the resulting codeword lengths are:\\[3pt] +\end{mytextcol} +\\[16pt]& +\ell_{\bm x, \varepsilon} = +\begin{cases} + \text{if } \bm x \in A_\varepsilon^{(n)}, & + 1 + \lceil \log |A_\varepsilon^{(n)} | \rceil \underset{\text{(AEP)}}{\le} + 2 + n (H(X)+\varepsilon) + \\ + \text{if }\bm x \notin A_\varepsilon^{(n)}, & + 1 + \lceil \log k^n \rceil + \le + 2 + n\log k + \\ +\end{cases} +\\& +\text{for } \delta = \varepsilon \cdot (1+\log k) + \frac{2}{n} \text{,\, $L^{(n)}$ verifies the Source Coding Theorem.} +\\[-10pt]&\text{(this proves the \emph{achievability})} +\\ +Source Coding Theorem for ergodic $\bm X$: & +\boxed{ \frac{L^{(n)}}{n} \le H_\infty(X) + \delta} +\quad\text{(just replace $H(X)$ with $H_\infty(X)$)} +\\ +\end{mytable} + +\section*{Channel Coding Theorem} + +\begin{mytable} +Channel code definition: & \text{(See Channel Coding section)} +\\ +Error probability (after decoding): & P_e = P(g(Y)\neq u | U=u) = P( +\widehat U\neq u | U=u), \, \widehat U = g(Y) +\\ +Fano's lemma: & +H(U|\widehat U) \le h(P_e) + P_e \cdot \log (M-1),\, P_e = P(\widehat U \neq U) +\\[-4pt] +Properties of $H(U|\widehat U)$: +& +H(U|\widehat U) = +H(U|\widehat U) + \underbrace{H(Z|U\widehat U)}_{=0} +\overset{ + \begin{subarray}{c} + \text{chain} \\ + \text{rule} \\ + \end{subarray} +}{=} +H(UZ|\widehat U),\, Z=\delta_{U=\widehat U} +\\[-8pt]& +0 \le +H(U|\widehat U) +\le \log M +\\[-4pt]& +H(U|\widehat U) =0 \implies \text{perfect decoding possible ($U$ deterministic given $\widehat U$)} +\\[-4pt]& +H(U|\widehat U) =H(U) \implies \text{message impossible to recover ($\underbrace{I(U;\widehat U)= 0}_{U\indep\widehat U}$)} +% H(U|\widehat U) = +% \begin{cases} +% 0, & \text{perfect decoding possible ($U$ deterministic given $\widehat U$)}\\ +% \log M, & \text{message impossible to recover, $U\indep\widehat U$} \\ +% \end{cases} +\\ +Channel code rate: & R=\frac{k}{n} \le 1; \quad M=|U|;\quad k=\log_2 M \text{ bits} +\\ +Channel capacity: & C = \max_{p_X(x)} I(X;Y) +\\[-8pt] +&\text{$I(X;Y)$ is concave on $p_X(x) \Rightarrow$ a local maximum is global (and unique).} +\\ +Channel coding theorem: & +\begin{mytextcol} +$R < C \implies$ a target error probability $P_e>0$ (arbitrarily small) can be attained using a coding scheme with a rate-$R$ channel code, that is guaranteed to exist (\emph{achievability}). +\\ +Otherwise ($R\ge C$) there will exist no such channel code for some $P_e C$, then this ``error-free'' transmission cannot be achieved by any code at all. +\end{mytextcol} +\end{mytable} + +\section*{Discrete Memoryless Channels (DMC)} +\begin{mytable} +Discrete channel: & +\begin{mytextcol} +SISO system (Single Input/Single Output), with input $X_i$ and output $Y_i$ symbols at discrete time $i$, defining sequences (signals) $\bm X$ and $\bm Y$; the input and output alphabets ($\mathcal{X}$ and $\mathcal{Y}$) are discrete sets.\\[4pt] +% This is from it_script_v69.pdf +A channel is characterized by the input and output alphabets and the probabilities on an output given all inputs and all other outputs: +\\ +$(\mathcal{X}, \{P(Y_i|\bm X\bm Y_{k\neq i})\}_{\forall i}, \mathcal{Y})$, where $\mathcal{X}, \mathcal{Y}$ (alphabets) are discrete sets. +\end{mytextcol} +\\ +Discrete Memoryless Ch.\@ (DMC): & +\text{Discrete and $P(Y_i| \bm X\bm Y_{k\neq i}) = P(Y_i| \bm X) = P(Y_i|X_i) =P(Y|X)$}\newline\phantom{M}\text{(current output only depends on current input symbol)} +\\[-8pt]& +\text{Characterized by just } (\mathcal{X}, P( Y| X), \mathcal{Y}) +\\[-8pt]& +\text{This defines a Markov chain: } X \to Y +\\ Capacity bounds: & +0\le C \le \min\{\log |\mathcal X|, \log |\mathcal Y|\} +\text{, (same as bounds on $I(X;Y)$)} +\\[-8pt]& +\text{if $\mathcal X$ or $\mathcal Y$ is binary, then: } C \le 1 +\\ +Binary Symmetric Ch.\@ (BSC): & +\text{[TODO: Image]} +\\[-4pt]& +x,y\in \{0,1\},\quad +P_{Y|X}(y|x) = \begin{cases} + 1-p, & y = x \\ + p, & y \neq x \\ +\end{cases} +\\[-4pt]& +I(X;Y) = H(Y) - h_2(p) \le 1 -h_2(p) = C_\text{BSC} +\\[-12pt]&\text{Capacity achieved when $Y$ uniform, therefore $X$ uniform.} +\\ +Binary Erasure Ch.\@ (BEC): & +\text{[TODO: Image]} +\\[-4pt]& + x\in \{0,1\}, \, y\in \{0,1,\Delta\},\quad +P_{Y|X}(y|x) = \begin{cases} + \alpha, & y = \Delta \\ + 1-\alpha, & y \neq \Delta, x=y \\ + 0, & \text{otherwise} \\ +\end{cases} +\\[-4pt]& +I(X;Y) = H(Y) - h_2(\alpha) \le 1 -\alpha = C_\text{BEC} +\\[-12pt] +&\begin{mytextcol} +No symmetry; $Y$ cannot be uniform, thus $H_{\max}(Y)=\log 3$ cannot be achieved. Let $p=P(X=1)=1-P(X=0)$ +\end{mytextcol} +\\[-4pt]& H(Y) = H\Big((1-p)(1-\alpha), \alpha, p(1-\alpha)\Big)=(1-\alpha)h_2(p) + h_2(\alpha) +\\[-12pt] & \text{optimal for $p=1/2$} +\\State transition matrix: & +\begin{mytextcol} +Corresponds to the Markov chain $X\to Y$\\[4pt] +$\mathcal P = [p_{Y|X}(y|x)]$; size $N\times M=|\mathcal X| \times |\mathcal Y|$, weighted bipartite graph +\\ +$N$ rows ($x$, outgoing edges), $M$ columns ($y$, incoming edges)\\[8pt] Rows must add up to 1: $\sum\limits_y p_{Y|X}(y|x) = 1$ +\end{mytextcol} +\\[-4pt]& +\mathcal P_\text{BSC} = +\bgroup\def\arraystretch{0.8} +\begin{bNiceMatrix}[last-row,last-col] +1-p & p \\[-4pt] +p & 1-p & +\Vdots[line-style={solid,<->}]^{X}\\ +& \Ldots[line-style={solid,<->},shorten=0pt]_{Y} \\ +\end{bNiceMatrix} +\egroup +\quad\quad +\mathcal P_\text{BEC} = +\bgroup\def\arraystretch{0.8} +% \begin{pmatrix} +% 1-\alpha & \alpha & 0 \\ +% 0 & \alpha & 1-\alpha \\ +% \end{pmatrix} +%% +\begin{bNiceMatrix}[last-row,last-col] + 1-\alpha & \alpha & 0 \\[-4pt] + 0 & \alpha & 1-\alpha & +\Vdots[line-style={solid,<->}]^{X}\\ +& \Ldots[line-style={solid,<->},shorten=0pt]_{Y} \\ +\end{bNiceMatrix} +%% +% \begin{bNiceMatrix}[last-row,last-col] +% 1-\alpha & \alpha & 0 & +% \Vdots[line-style={solid,<->}]^{X} \\ +% 0 & \alpha & 1-\alpha & \\ +% & & \Ldots[line-style={solid,<->},shorten=0pt]_{Y} +% \end{bNiceMatrix} +\egroup +\\ +DMC capacity -- General case: & \text{Maximize $I(X;Y)$ for $p_i=p_X(x_i)$, constrained to $p_i>0$ and $\displaystyle\sum_i p_i=1$.} +\\[-4pt] & +I(X;Y) = H(Y) - H(Y|X) = H(Y) - \sum_x +\underbrace{H(Y|X=x)}_{H(\bm r_i)} +\cdot\, p_X(x) +\\[-4pt] & +I(X;Y) = H(Y) - \sum_{i} +H(\bm r_i) +\cdot\, p_X(x_i); \quad +\bm r_i =\text{$i$-th row of $\mathcal P$} +\\[-4pt] & +\text{To obtain $H(Y)$, compute $p_Y(y)=P(Y=y)$ using total probability:} +\\[-4pt] & +p_Y(y) = \sum_{x\in \mathcal X} p_X(x)\cdot p_{Y|X}(y|x); \quad \boxed{\bm p_Y = \bm p_X \cdot \mathcal P}; \quad H(Y) = H(\bm p_Y) +\\[-4pt] & +\begin{mytextcol} +If the matrix for $p_{XY}(x,y)$ is desired, use $\bm p_{XY} = \bm p_X^{T}\,\texttt{.*} \mathcal P$, where \texttt{.*} is the broadcasting multiplication (as in MATLAB or NumPy). +\\ +This can also be described with Einstein notation: [TODO] +\end{mytextcol} +\\ +Uniformly dispersive DMC: & +\begin{mytextcol} + rows ($x$) are taken from the same set (permutations) + \end{mytextcol} +\\[-4pt] & +I(X;Y) = H(Y) - H(Y|X) = H(Y) - \sum_x +\underbrace{H(Y|X=x)}_{\text{constant}} +\cdot\, p_X(x) +\\[-4pt] & +H(Y|X=x) = H(\bm r), \text{ ($\bm r$ is any row of $\mathcal P$)} +\\[-4pt] & +I(X;Y) = H(Y) - H(\bm r) \le \boxed{ \left(\max_{p_X(x)} H(Y)\right) - H(\bm r) = C_\text{UnifDisp}} +\\ +(Strongly) Symmetric DMC: & +\begin{mytextcol} + rows ($x$) are taken from the same set (permutations; unif.\@ disp.), \\columns ($y$) have the same property. +\end{mytextcol} +\\[-4pt] & +I(X;Y) = H(Y) - H(\bm r) \le \boxed{ \log |\mathcal Y| - H(\bm r) = C_\text{Symm}} +\\[-4pt] & \text{optimal for $X$ uniform (implies $Y$ uniform).} +\\ +Weakly Symmetric DMC: & +\begin{mytextcol} + rows ($x$) are taken from the same set (permutations), \\columns ($y$) must add up to the same number: $\displaystyle A=\sum_x p(y|x)$.\\ + Same results as symmetric: +\end{mytextcol} +\\[-4pt] & +I(X;Y) = H(Y) - H(\bm r) \le \boxed{ \log |\mathcal Y| - H(\bm r) = C_\text{WSymm}} +\\[-4pt] & \text{optimal for $X$ uniform (implies $Y$ uniform).} +\\ +Useful properties: & +\frac{\partial }{\partial x}\bigg( +x\cdot \log_2 x +\bigg) += +\frac{1}{\ln 2}+ +\log_2 x +\\ & +\frac{\partial}{\partial x}\bigg( +f(x)\cdot \log_2 f(x) +\bigg) += +\frac{\partial f(x)}{\partial x}\cdot\bigg( +\frac{1}{\ln 2}+ +\log_2 f(x) +\bigg) +\\ & +\frac{\partial}{\partial x}h_2(x) += +\log_2\frac{1-x}{x}=-\mathrm{logit}_2\,x +\\ +\end{mytable} + +\newpage +\section*{Channel Coding} +\begin{mytable} +Channel code definition: & \text{[TODO]}[\text{Code Rate: }R] +\\ +Single parity bit check code: & \text{[TODO]} +\\ +Linear code: & \text{[TODO]} +\\ +Generator matrix of a linear code: & \text{[TODO]} +\\ +Hamming distance and weight: & \text{[TODO]} +\\ +Decoding criteria: & +\begin{cases} + \text{Maximum a posteriori (MAP):} & + \hat{\bm x} = \arg\max_{\bm x\in \mathcal B} + \{P(\bm x| \bm y) \} + \\\hline + \text{Maximum likelihood (ML):} & + \hat{\bm x} = \arg\max_{\bm x\in \mathcal B} + \{P(\bm y| \bm x) \} + \\ + \text{ = MAP if equiprobable CWs $\bm x$}& + \\\hline + \text{Minimum distance (MD):} & + \hat{\bm x} = \arg\min_{\bm x\in \mathcal B} + \{d_H(\bm x, \bm y) \} + \\ + \text{ = MAP for BSC}\\\text{ (¿¿requires also equiprobable\@ x??)}& + \\ +\end{cases} +\\ +Hamming code: & \text{Parity bits are the sums of all possible subsets of info bits} +\\ +Detected error bits: & \boxed{w_H(\bm e) \le d_{\min} -1} +\\ +Corrected error bits: & \boxed{w_H(\bm e) \le +\left\lfloor + \frac{d_{\min} -1}{2} +\right\rfloor} +\le \frac{d_{\min} -1}{2} +\\ +Parity-check matrix of a linear code: & \text{[TODO]} +\\ +Syndrome: & \bm s = \bm y \cdot H^T = \bm e \cdot H^T +\\ +Syndrome decoder: & +\text{[TODO]} +\end{mytable} + +\section*{Information in continuous RV} + +\begin{mytable} +Differential entropy: & H_\text{Dif}(X) = \mathbb{E}_X[-\log_2 f_X(X)] = - \int_{\mathbb{R}} f_X(x)\cdot\log_2 f_X(x)\, dx \newline\phantom{M} (X \text{ with pdf } f_X) +\\[-12pt] +& \text{May be negative, e.g.\,$X\sim$ Uniform, with }\Delta<1 \text{ (similar w/Gaussian)} \newline\phantom{MMMMMM..MMM} \implies H_\text{Dif}(X)=\log \Delta<0 +\\ +Joint differential entropy: & +H_\text{Dif}(X,Y) = +\mathbb{E}_{XY}[-\log_2 f_{XY}(X,Y)] = +-\iint\limits_{\mathbb{R}^2} f(x,y)\cdot\log_2 f(x,y)\, dxdy +\\[-8pt]& +H_\text{Dif}(X_1,X_2,\dots,X_n) = +\mathbb{E}_{\bm X}[-\log_2 f(X_1, X_2, \dots, X_n)] +\\ +Relative entropy: & +D(f||g) = \mathbb{E}_{|f}\left[\log\frac{f(X)}{g(X)}\right] = +\int_{\mathbb R}f(x)\cdot\log\frac{f(x)}{g(x)}dx +\ge 0 +\\ +Mutual information: & +I(X;Y) = +\mathbb{E}_{XY}\!\left[\log_2 + \frac{f_{XY}(X,Y)}{f_X(X)\cdot f_Y(Y)} +\right]= D(f_{XY}(x,y)||f_{X}(x)\cdot f_{Y}(y)) \ge 0 +\\& +\text{Chain rule and ``Conditioning reduces entropy'': same as discrete RVs.} +\\ +Translation and scaling: & +H_\text{Dif}(a\cdot X+c) = H_\text{Dif}(X) + \log a +\\[-12pt]& +\begin{mytextcol} +Scaling breaks ``permutation invariance''\\(due to change of variable in integral) +\end{mytextcol} +\\ +Gaussian RV diff.\@ entropy: & +H_\text{Dif}(X) = \frac{1}{2}\log(2\pi e\sigma^2) +\\ +Gaussian RV maximizes $H_\text{Dif}(X)$: & +H_\text{Dif}(X)\ge H_\text{Dif}(Y), +\quad +X\sim \mathcal{N}(\mu,\sigma), \, \forall Y,\, \mathbb{E}[Y] = \mu, \, +\mathbb{V}[Y] = \sigma^2 +\\[-8pt]& +H_\text{Dif}(X) - H_\text{Dif}(Y) = \underbrace{\cdots}_{\text{lemma}} = D(f||g) \ge 0 +\\ Lemma: & +\mathbb{E}_{|f}[\log g(X)] = \int_{\mathbb{R}} +f(x)\cdot\log g(x)\, dx = +\int_{\mathbb{R}} +g(x)\cdot\log g(x)\, dx = +H_{\text{Dif}|g}(X) +\\[-5pt]& +g\text{ is the pdf of }X \sim \mathcal N(\mu,\sigma),\, f\text{ is pdf of }Y,\,\mathbb{E}[Y] = \mu, \, +\mathbb{V}[Y] = \sigma^2 +\\ +Discrete $H$ does not converge to $H_\text{Dif}$: & +\text{in general, } +H(X^\Delta) \xrightarrow[]{\Delta\to 0} \infty +\\ +Discrete $I$ agrees with continuous $I$: & +I(X^\Delta;Y^\delta) \xrightarrow[]{(\Delta,\delta)\to (0, 0)} I(X;Y) +\end{mytable} + +\newpage +\section*{Multivariate Gaussian distribution} +\begin{mytable} +$n$-D Gaussian RV (Multivariate): & +\begin{mytextcol} +Vector of Gaussians $\bm X = (X_1, X_2, \cdots, X_n)\sim \mathcal N(\bm \mu, \Lambda_{\bm X})$ s.t.\,all linear combinations are Gaussian or deterministic (a.k.a.\,\emph{jointly} Gaussian): +\\ +\phantom{M}\quad $\forall \bm a \in \mathbb{R}^n,\, \bm a^T\cdot X = Z \sim \mathcal N(\mu,\sigma)$ (if $\sigma = 0$, deterministic $Z=\mu$) +\\ +$X_i \sim \mathcal N(\mu_i,\sigma_i)$, +and covariance matrix $\Lambda_{\bm X} = \mathbb E[(\bm X - \bm \mu)\cdot (\bm X - \bm \mu)^T]$, we assume full-rank $\Lambda_{\bm X}$ (non-degenerate, i.e.\,all nontrivial linear combinations are nondeterministic). \\ +PDF (for non-degenerate case): \\\phantom{M}\quad $\displaystyle +f_{\bm X}(\bm x) = +\frac{1}{\sqrt{\text{Det}(2\pi\cdot \Lambda_{\bm X})}}\cdot +\text{exp}\left[ + -\frac{1}{2} + (\bm x - \bm \mu)^T \cdot + \Lambda_{\bm X}^{-1} \cdot + (\bm x - \bm \mu) +\right] +$ +\\ +$\Lambda_{\bm X}$ is positive definite ($\Lambda_{\bm X}>0$) (in general it can be pos.\@ semidef.) +\end{mytextcol} +\\ +$n$-D Gaussian RV normalization: & +\bm Y = \Lambda_{\bm X}^{-1/2}\cdot (\bm X - \bm \mu_{\bm X}),\, +\bm X \sim \mathcal N(\bm \mu_{\bm X}, \Lambda_{\bm X}) +\implies \bm Y \sim \mathcal N(\bm 0, I) +\\[-5pt] +& \text{(Note: $\Lambda_{\bm X}^{-1/2}$ is uniquely well defined because $\Lambda_{\bm X}$ is positive definite)} +\\ +Linear transformation: & +\begin{mytextcol} +$\bm Y = A\cdot \bm X + \bm a +;\quad$ +$A$ \text{ is square full rank (invertible transformation)} +\\[4pt] +($\bm X$ is arbitrary RV) +\\[4pt] +$\displaystyle +f_{\bm Y}(\bm y) = +\frac{1}{|A|}\cdot +f_{\bm X} +\left( + A^{-1}\cdot (\bm y-\bm a) +\right) +$ +\\[4pt] +$\mathbb{E}[\bm Y] = A\cdot \bm \mu_{\bm X} + \bm a,$\quad +$\Lambda_{\bm Y} = A \cdot \Lambda_{\bm X} \cdot A^T$ +\\[4pt] +$H(\bm Y) = H(\bm X) + \log \text{Det}|A|$ +\end{mytextcol} +\\ +Property: & +\mathbb{E} +[ + (\bm X - \bm \mu)^T\cdot + \Lambda_{\bm X}^{-1} + \cdot (\bm X - \bm \mu) +] = n, \,\text{ ($\bm X$ is arbitrary RV)} +\\ +(Simple) Gaussian quadratic form: & +Z = (\bm X - \bm \mu_{\bm X})^T\cdot + \Lambda_{\bm X}^{-1} + \cdot (\bm X - \bm \mu_{\bm X}) + = \sum_i Y_i^2 + \sim \chi^2(n) +\\[-8pt]& +\text{Given }\bm X \sim \mathcal N(\bm \mu_{\bm X}, \Lambda_{\bm X}).\,\text{ It can be shown that }\bm Y \sim \mathcal N(\bm 0, I) +\\ +Entropy of $n$-D Gaussian: & +H(\bm X) = \frac{1}{2}\cdot\log\text{Det}(2\pi\cdot e\cdot\Lambda_{\bm X}) +\\ +Gaussian RV maximizes $H_\text{Dif}(\bm X)$: +& +H_{\text{Diff}}(\bm X) \ge H_{\text{Diff}}(\bm Y), +\quad +\bm X\sim \mathcal N(\bm\mu,\Lambda), \, \forall \bm Y,\, \mathbb{E}[\bm Y] = \bm\mu, \, +\text{Cov}(\bm Y) = \Lambda +\\[-8pt]& +H_{\text{Diff}}(\bm X) - H_{\text{Diff}}(\bm Y) = \underbrace{\cdots}_{\text{lemma}} = D(f||g) \ge 0 +\\ +Lemma: & +\mathbb{E}_{|f}[-\log g(\bm X)] = +\mathbb{E}_{|g}[-\log g(\bm X)] = H_{\text{Diff}|g}(\bm X) +\end{mytable} + +\newpage +\section*{Communication over Gaussian channels} +\subsection*{Single Gaussian channel} +\begin{mytable} +Gaussian channel: & +%TODO: Gaussian N to mathcal N... +Y = X + Z, \quad Z\sim \mathcal N(0, \sqrt{N}), \quad X\indep Z +\\[-8pt]& +\text{assuming tx.\@ power limitation }\mathbb{E}[X^2]\le P, \text{ and }\mathbb E[X] = 0 +\\[-16pt]& +\text{($\mathbb E[X]\neq 0$ would waste power)} +\\[-16pt]& +\text{Each RV realization corresponds a transmission or channel use.} +\\ +Mutual info.\@ in Gaussian channel: & +\boxed{I(X;Y) = H(Y) - H(Z)}; \quad H(Y|X) = H(Z|X) = H(Z) +\\[-4pt]& +H(Z) = \frac{1}{2}\log(2\pi e\cdot \sigma_Z^2) +\\ +Capacity of the Gaussian channel: & +C = +\max_{\substack{f_X(x)\\ \mathbb{E}[X^2] = P}} +I(X;Y) = +\frac{1}{2}\log(2\pi e\cdot (P+N)) - H(Z) = +\\[12pt]& +\boxed{C = \frac{1}{2} \log +\left( +1 + \frac{P}{N} +\right) +\,\text{[bits/tx.]}} +\text{ attained for $X\sim \mathcal N(0,\sqrt{P})$} +\\ +Sampling theorem: +& +x(t) = \sum\limits_{n=-\infty}^{+\infty} +x[n]\cdot \text{sinc}_W\left( + t - \frac{n}{2W} +\right) +\iff x[n] = x(n\cdot T_s) = +x\left( +\frac{n}{2W} +\right) +, +\newline \phantom{M} \text{ provided $x(t)$ is band-limited to $f_{\max} \le W = F_s / 2$;\quad $F_s = 1/T_s$} +\newline \phantom{M} \text{ where: } +\text{sinc}_W(t)=\frac{\sin (2\pi \cdot W\cdot t)}{2\pi \cdot W\cdot t},\,\, t\neq 0; \text{ sinc}_W(0) =1, \text{ zeros at } t=\frac{i}{2W} +\\ +Band-limited Gaussian channel: +& +\begin{mytextcol} +$y(t) = (x(t) + \eta(t))*h_W(t)$, assuming $X(|f|>W)=0$ +\\[8pt] +$W$ is the bandwidth of the positive frequency interval (definition also valid for passband signals). +\\[8pt]{Continous-Time (CT) model:} +\\[4pt] +$y(t) = x(t) + z(t)$, then $Z(|f|>W)=0$ +\\[8pt] +$z(t) = \eta(t) * h_W(t)$, band-limited AWGN ($\eta(t)$ is perfectly filtered) +\\[8pt]{Discrete-Time (DT) model:} +\\[4pt] +$y_k = x_k + z_k$, after sampling at $F_s = 2W$\\[8pt] +Noise PSDs: $R_\eta(f)= \frac{N_0}{2}$ and $R_z(f) = \frac{N_0}{2}\cdot\mathbbm{1}_{[-W, +W]}(f)$ +\\ +$N_0$ is defined so that the total noise power is $N=W \cdot N_0$ +\\CT noise autocorrelation: $r_z(\tau) = \mathcal{F}^{-1}\{R_z(f)\}= \frac{N_0}{2}\cdot \text{sinc}_W(\tau)$\\ +DT noise autocorrelation: $r_z[k] = + r_z\left(\frac{k}{2W}\right) = \frac{N_0}{2} \cdot \delta[k]$ +\\Noise power (CT mean square): $\mathbb E[Z_{t}^2] = N = N_0\cdot W = N_0 \cdot \frac{F_s}{2}$ +\\Noise energy (DT mean square): $E_N = \mathbb E[Z_{k}^2] = \frac{N_0}{2}$ (per sample) +\\DT AWGN process: $z_k \sim \mathcal N\left(0, \sqrt{\frac{N_0}{2}}\right)$ +\\Tx.\@ signal power: $P$ +\\Tx.\@ signal energy: $E_X=\frac{P}{2W}$ (per sample) +\end{mytextcol} +\\[8pt] +Band-limited Gaussian ch.\@ capacity: +& +C_{\text{eff}} = \frac{1}{2}\log\left( + 1 + \frac{E_X}{E_N} +\right) += +\frac{1}{2}\log\left( + 1 + \frac{\frac{P}{2W}}{\frac{N_0}{2}} +\right) += +\frac{1}{2}\log\left( + 1 + \frac{P}{N_0 W} +\right). +\\& +\begin{mytextcol} +Because of sampling theorem, $C_{\text{eff}}$ is the maximum spectral efficiency: +\\ +$C_{\text{eff}} > R_b / W$ (in (bits/s)/Hz or bits/sample or bits/(DT channel use)). +\end{mytextcol} +\\[8pt]& +\boxed{ + C = W\cdot \log\left( + 1 + \frac{P}{N_0 W} + \right)\text{ [bits/s]} +} +\text{ (Shannon–Hartley theorem)} +\\& +C_{\text{eff}} > R_b / W +\implies C > R_b +\\ +Attenuated (band-lim.) Gaussian ch.: & +y(t) = x(t)\cdot G + z(t), \quad |G| < 1 +\\& +\boxed{ + C = W\cdot \log\left( + 1 + \frac{P|G|^2}{N_0 W} + \right)\text{ [bits/s]} +} +\\ +Fundamental theorem (max.\@ capacity):& + C_{\infty} = \lim_{W\to\infty} C = + \lim_{W\to\infty} \log\underbrace{\left( + 1 + \frac{P}{N_0 W} + \right)^W}_{\to \text{exp}(P/N_0) } = \frac{P}{N_0} \log e= +\\[-4pt]& +\boxed{ + C_{\infty} =\frac{P}{N_0}\,\frac{1}{\ln 2} + \text{ [bits/s]} +}\\[4pt]& + \text{Bit rate: } R_b \text{ [bits/s], } + \text{bit time: } T_b = \frac{1}{R_b} \text{ [s/bit], }\\[-4pt]& + \text{Energy per bit: } E_b = {P}\cdot{T_b} = \frac{P}{R_b} \text{ [``J''/bit]} +\\[0pt]& + % \boxed{ + \frac{C_{\infty}}{R_b} = + \frac{P}{N_0}\,\frac{1}{R_b\ln 2} = + \frac{E_b}{N_0}\,\frac{1}{\ln 2} > 1 + \quad + \boxed{ + \frac{E_b}{N_0} > \ln 2 \approx 0.69 = -1.59 \text{ dB} + } + % } +\\Alternative expression for $C$: & + C = W\cdot \log\left( + 1 + \frac{P}{N_0 W} + \right) = + W\cdot \log\left( + 1 + \frac{R_b}{W}\cdot\frac{E_b}{N_0} + \right) +\\Spectral eff.~limit ($C/W$ vs $E_b/N_0$): & +C_{\text{eff}} = \frac{C}{W} = \log\left( + 1 + \frac{R_b}{W}\cdot\frac{E_b}{N_0} + \right) < +\log\left( + 1 + \frac{C}{W}\cdot\frac{E_b}{N_0} + \right) +\\ +Equivalent bound: & +\frac{E_b}{N_0} > \frac{2^{C/W}-1}{C/W} \stackrel[(C/W\to 0)]{}{\longrightarrow} -1.59 \text{ dB} +\\Limit for channel coding: & +\text{code rate $R=K/N$, $N$ samples/codeword and $K$ message bits.} +\\[-8pt]& +F_s = 2W = N/T_{cw}, \text{ $T_{cw}$ codeword time}\implies R_b = 2WR +\\[-8pt]& +\frac{P}{N_0 W} = +\frac{K E_b/T_{cw}}{N_0 W} = +2R\frac{E_b}{N_0} +\\& +R_b = 2WR < C = W\log \left(1 + 2R \cdot \frac{E_b}{N_0} \right) +\\& +\implies 2R < \frac{C}{W} = \log \left(1 + 2R \cdot \frac{E_b}{N_0} \right) +\\[8pt]& +\implies +\boxed{ + \frac{E_b}{N_0} > \frac{2^{2R}-1}{2R} +}\stackrel[(R\to 0)]{}{>} -1.59 \text{ dB} +\\ +\end{mytable} + +\subsection*{Multiple independent Gaussian channels} + +\begin{mytable} +Parallel Gaussian channels: +& +Y_i = X_i + Z_i +\\[-8pt] +& \text{$n$ independent AWGN channels with variance $N_i$, for $i=1,...,n$} +\\[-8pt] +& \text{and total tx.\@ power restriction: }P=\sum_i P_i = \mathbb E[\bm X^T \cdot \bm X] +\\& +I(\bm X; \bm Y) = H(\bm Y)- H(\bm Y | \bm X) += \sum_i H(Y_i) - \sum_i H(Y_i | X_i) +\\[-0pt] +& +I(\bm X; \bm Y) = +\sum_i I(X_i|Y_i) \le \sum_i\frac{1}{2}\log\left(1 + \frac{P_i}{N_i}\right),\\[-8pt]& +\text{equality if }X_i\sim \mathcal N(0,\sqrt{P_i}) \text{ where $P_i$ need to be calculated.} +\\ +Water filling for parallel Gaussian ch.: +& +\begin{mytextcol} +Search for $P_i$ that max.\@ $I(X;Y)$, constrained to $P_i\ge 0$ and $\sum_i P_i = 1$. +\\ +Note: solution is unique because of $I(X;Y)$ concavity. +\\ +Use Lagrange multiplier $\lambda$: $ +\displaystyle +J = C - \lambda \cdot \sum_i (P_i - P)$ +\\ +$\displaystyle +\nabla_{\!\bm P}\, J =\bm 0 \implies +\frac{1}{P_i+N_i} = -\lambda \cdot 2\ln 2 \implies +B = \frac{-1}{\lambda\ln 2} = P_i + N_i +$ +\\[8pt] +Find optimal $B$ s.t. +$\displaystyle +\begin{cases} +P_i = (B-N_i)^+\\ +\sum_i P_i = P \\ +\end{cases}\, \text{ where: } (x)^+ = x \cdot \mathbbm{1}_{[0,\infty)}(x) +$ +\\[8pt] +$ +\displaystyle +P = \sum_i^n P_i = n B - \sum_i N_i$ and solve for $B$, +if any $P_i<0$, set $P_i=0$ (reduce $nB$, etc.); check and repeat until it it is valid. +\\[6pt] +Alternative validity test: $\displaystyle +\sum_i (N_K-N_i) \le P +\iff +B - N_K \ge 0 +$ +\end{mytextcol} +\\& +C = \frac{1}{2}\sum_{i=1}^n\log\left( +1+\frac{P_i}{N_i} +\right) \text{ [bits/tx.]} +\\ +(O)FDM Gaussian channel: & +\begin{mytextcol} +$n$ parallel band-limited attenuated Gaussian channels. \\ +Per-channel BW: $W_\Delta = \frac{W}{n}$, channel gain $H_i$, ch.\@ noise PSD: $N_{0,i}/2$. +$\displaystyle +C = \sum_{i=1}^n +W_\Delta \log\left( +1+\frac{P_i |H_i|^2}{N_{0,i} W_\Delta} +\right) +$\\[4pt] +Attained by $P_i$ obtained via water-filling power allocation:\\[4pt] +Find optimal $B$ s.t. $\displaystyle +\begin{cases} +P_i = W_\Delta\cdot \left(B-\frac{N_{0,i}}{|H_i|^2}\right)^+\\ +\sum_i P_i = P \\ +\end{cases} +$\\ +Compute $\displaystyle\sum_i P_i = P$ and solve for $B$, check and repeat until it is valid. +\end{mytextcol} +\end{mytable} + +\subsection*{MIMO Gaussian channels} +\begin{mytable} +MIMO Gaussian channel: & +\bm Y = H\cdot \bm X + \bm Z, \text{ where } \bm Y, \bm Z \in \mathbb{R}^{n_r}, \, \bm X \in \mathbb{R}^{n_t}, \, H\in \mathbb{R}^{n_r\times n_t},\newline\phantom{MMMMMMMMMM...} \bm Z \sim \mathcal N(\bm 0, \Lambda_{\bm Z}) +\\[-8pt]& +\begin{mytextcol} +Tx.\@ power constraint: $ + \text{trace}(\Lambda_{\bm X}) = + \sum_i \mathbb E [X_i^2] = + \sum_i P_i \le P$ +\end{mytextcol} +\\& +\begin{mytextcol} +$\displaystyle +I(\bm X; \bm Y) = H(\bm Y)- H(\bm Y | \bm X) += H(\bm Y)- H(\bm Z) \le \frac{1}{2}\log \text{Det}(\Lambda_{\bm Y} \Lambda_{\bm Z}^{-1})$ +\\[4pt] +equality iff $\bm Y \sim \mathcal N(\bm 0, \Lambda_{\bm Y})$ +and $\Lambda_{\bm Y}= H \cdot \Lambda_{\bm X} \cdot H^T+ \Lambda_{\bm Z}$ +\\[4pt] +$\displaystyle +I(\bm X; \bm Y) \le \frac{1}{2}\cdot +\log \text{Det}(I+H\Lambda_{\bm X} H^T\Lambda^{-1}_{\bm Z}) +$ +\\ +We can assume i.i.d.\@ noise per channel, $\Lambda_{\bm Z} = N\cdot I$ +\\[4pt] +[SVD magic...] +\\[4pt] +[Water filling...] +\end{mytextcol} +\end{mytable} + +\subsection*{Discrete input Gaussian channels (digital modulations)} +\begin{mytable} +& \text{[TODO]} +\end{mytable} + +\newpage +\section*{Rate–distortion theory} +\begin{mytable} +Rate-distortion function (RDF): +& +R(\delta) = \min_{p(\hat{\bm x}|{\bm x}):\mathbb{E}[(d({\bm x};\hat{\bm x})]\ge \delta}\, +\frac{1}{n}\, I({\bm X};\hat{\bm X}) +\\& \text{Defined for a source (random vector) }\bm X \text{ with a distortion measure }d +\\ +RDF for i.i.d.\@ source: +& +R(\delta) = \min_{p(\hat{ x}|{ x}):\mathbb{E}[(d({ x};\hat{ x})]\ge \delta} +I(X;\hat{X}) < C +\\ +Hamming RDF for Bernoulli($p$) source: +& R(\delta) = (h_2(p) - h_2(\delta))\cdot\mathbbm{1}_{[0, \min(p,1-p)]]}(\delta) +\\[-12pt] +& \text{usually $p=1/2$: equip.\@ binary source} +\\[-12pt] &\quad \implies R(\delta) = (1 - h_2(\delta))\cdot \mathbbm 1_{[0,0.5]}(\delta) +\\ +Coding rate bound for distortion $\delta$: +& +R \le \frac{C}{R(\delta)}\quad \text{(noisy/source- channel coding theorem, [th. 5.1, McEliece])} +\\ +Coding rate bound for BSC($\varepsilon$): +& +R \le \frac{1 - h_2(\varepsilon)}{1 - h_2(P_b)}; \quad P_b = \delta = \text{``acceptable'' BER after decoding} +\\[8pt] +C.r. bound for digital Gaussian ch.: +& +R \le \frac{\frac{1}{2}\log\left(1+2R\frac{E_b}{N_0}\right)}{1 - h_2(P_b)} +\quad\text{ (for BIAWGN or BAWGNC)} +\\[8pt]& +\implies +\boxed{ + \frac{E_b}{N_0} \ge + \frac{ + 2^{2R(1-h_2(P_b))} + -1 + }{2R} +} \underset{R\to 0}{>} + (1 - h_2(P_b)) \ln (2) +\\[8pt] & +\text{Using $P_b\to 0$ gives the error-free (capacity) expressions.} +\\[-8pt] & +\begin{mytextcol} +\footnotesize +Note: (from [McEliece]) a lossy compressor can be built from a channel code by using the decoder at the transmitter and the encoder at the receiver. +\end{mytextcol} +\end{mytable} + + + + +\end{document}