From 6fdaab1cc0f230073a625123dbc9e06b88d9dc32 Mon Sep 17 00:00:00 2001
From: japm48 <japm48gh@gmail.com>
Date: Sat, 3 Jun 2023 23:07:46 +0200
Subject: [PATCH] Initial Overleaf Import

---
 fix-unnumbered-sections.sty |   16 +
 homework.cls                |  156 ++++
 img_Polyanskiy_mutual.pdf   |  Bin 0 -> 13417 bytes
 main.tex                    | 1765 +++++++++++++++++++++++++++++++++++
 4 files changed, 1937 insertions(+)
 create mode 100644 fix-unnumbered-sections.sty
 create mode 100644 homework.cls
 create mode 100644 img_Polyanskiy_mutual.pdf
 create mode 100644 main.tex

diff --git a/fix-unnumbered-sections.sty b/fix-unnumbered-sections.sty
new file mode 100644
index 0000000..7caa7bf
--- /dev/null
+++ b/fix-unnumbered-sections.sty
@@ -0,0 +1,16 @@
+\ProvidesPackage{fix-unnumbered-sections}
+
+% This package is available at http://tex.stackexchange.com/q/33696/
+% It patches the standard classes so that they treat unnumbered and numbered sections equally! (At least, I've tested it with the article class.)
+% As a result, even the unnumbered sections gets entries in the TOC and, when hyperref is loaded, they also get bookmarks as per the default hyperref setting for bookmarks.
+% Unnumbered sections can also have short titles for TOC and bookmark purposes, just like numbered sections.
+%
+% It works by pretending the unnumbered sections are deeper than \c@secnumdepth (in fact, just by assuming that they are 1000 (\@m) levels deep).
+% I believe there are no side effects to this...
+
+% We just pretend that \@ssect, which LaTeX uses to treat unnumbered [sub*]sections differently to (maybe) numbered ones (if they are no deeper than \c@secnumdepth levels), doesn't exist.
+% Instead, we always use \@sect, which LaTeX uses for numbered sections, and sections which would otherwise be numbered if they weren't deeper than \c@secnumdepth levels.
+
+
+\RequirePackage{etoolbox}
+\patchcmd{\@startsection}{\@ssect{#3}{#4}{#5}{#6}}{\@dblarg{\@sect{#1}{\@m}{#3}{#4}{#5}{#6}}}{}{\PackageError{fix-unnumbered-sections}{Unable to patch \string\@startsection; are you using a non-standard document class?}\@ehd}
diff --git a/homework.cls b/homework.cls
new file mode 100644
index 0000000..c8ee5dd
--- /dev/null
+++ b/homework.cls
@@ -0,0 +1,156 @@
+\NeedsTeXFormat{LaTeX2e}
+
+\ProvidesClass{homework}[2014/12/16 Class file for homework assignments]
+
+\LoadClassWithOptions{article}
+
+
+
+% ----- Options ---------------------------------------------------------------
+%\newcommand\@opnewpage{0}
+%\DeclareOption{newpage}{\renewcommand\@opnewpage{1}}
+%\newcommand\@oplargemargins{0}
+%\DeclareOption{largemargins}{\renewcommand\@oplargemargins{1}}
+%\ProcessOptions
+
+
+% ----- Packages --------------------------------------------------------------
+
+% Better fonts with accents
+\RequirePackage[T1]{fontenc}
+
+% Required for starred commands
+\RequirePackage{suffix}
+
+\RequirePackage{parskip}
+
+% Math symbols
+\RequirePackage{amsmath}
+\RequirePackage{amsfonts}
+\RequirePackage{amsthm}
+\RequirePackage{amssymb}
+\RequirePackage{centernot}
+
+% Nice lists
+\RequirePackage{enumerate}
+\RequirePackage{enumitem}
+
+% Nice images, figures, and listings
+\RequirePackage{graphicx}
+\RequirePackage{grffile}
+\RequirePackage[all]{xy}
+\RequirePackage{wrapfig}
+\RequirePackage{fancyvrb}
+\RequirePackage{listings}
+
+% Conditionals
+\RequirePackage{ifthen}
+
+% Header & Page Setup
+\RequirePackage{fancyhdr}
+%\ifthenelse{\equal{\@oplargemargins}{1}}{}{\RequirePackage{fullpage}}
+\RequirePackage{fullpage}
+
+\RequirePackage[a4paper,
+    hmargin=1.5cm,
+    vmargin=1cm,
+    includeheadfoot]{geometry}
+
+\setlength{\headheight}{12pt}
+\addtolength{\headsep}{0.7cm}
+\addtolength{\textheight}{-0.6cm}
+
+% Links
+\RequirePackage{hyperref}
+
+% ----- Questions -------------------------------------------------------------
+\newcounter{questionCounter}
+\newcounter{partCounter}[questionCounter]
+
+
+%
+\WithSuffix\providecommand\assignment*{}
+\WithSuffix\renewcommand\assignment*[1]{%
+  % Wrap in minipage so that we don't get a line break anywhere in between
+  \begin{minipage}{\linewidth}%
+    \setcounter{partCounter}{0}%
+    \vspace{.2in}%
+    \noindent{\bf \large Assignment #1}%
+    \vspace{0.3em} \hrule \vspace{.1in}%
+  \end{minipage}
+}
+
+
+
+% ----- Question Parts --------------------------------------------------------
+
+\newenvironment{alphaparts}[0]{%
+  \begin{enumerate}[label=\textbf{(\alph{partCounter})}]%
+}{\end{enumerate}}
+
+\newenvironment{arabicparts}[0]{%
+  \begin{enumerate}[label=\textbf{\arabic{questionCounter}.\arabic{partCounter}})]%
+}{\end{enumerate}}
+
+\newcommand{\questionpart}[0]{\stepcounter{partCounter}\item}
+
+
+% ----- Answer Box ------------------------------------------------------------
+
+\newcommand{\answerbox}[1]{%
+\begin{framed}
+\vspace{#1}
+\end{framed}}
+
+% ----- Page Setup ------------------------------------------------------------
+
+% Use block style paragraphs
+\setlength{\parindent}{0pt}
+%\setlength{\parskip}{5pt plus 1pt}
+%\setlength{\parskip}{3pt plus 1pt}
+
+\def\indented#1{\list{}{}\item[]}
+\let\indented=\endlist
+
+% ----- Title & Header --------------------------------------------------------
+% \pagestyle{empty}
+\pagestyle{fancy}
+
+\newcommand{\mysetupheader}{
+    % Setup header
+    % \setlength{\headheight}{15.2pt}
+    % \setlength{\headsep}{0.2in}
+    \lhead{\hwclassshort{} -- \hwshorttitle{}}%
+    \rhead{\hwauthor{}}%
+
+    \renewcommand{\headrulewidth}{0.4pt}
+    \renewcommand{\footrulewidth}{0.4pt}
+}
+
+%\if\@opanon%
+  \renewcommand{\maketitle}[0]{%
+    \mysetupheader
+    
+    % Setup hrule in header
+    \renewcommand{\headrulewidth}{0pt}
+    \headrule{}
+    % Don't put header on first page
+    \thispagestyle{plain}
+
+    \begin{center}
+      {\LARGE \hwtitle{}}
+
+      {\Large \hwclass{}}
+      
+      Student: \hwauthor{} \hwauthorextra{}
+
+    \end{center}
+    \renewcommand{\headrulewidth}{0.4pt}
+  }
+
+% ----- For usage with pandoc converted documents -----------------------------
+
+\providecommand{\tightlist}{%
+  \setlength{\itemsep}{1pt}\setlength{\parskip}{0pt}}
+
+% -----------------------------------------------------------------------------
diff --git a/img_Polyanskiy_mutual.pdf b/img_Polyanskiy_mutual.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..0c9b942a4c422dfd28333f4d9493920c969d2aa0
GIT binary patch
literal 13417
zcmcJ01yEeg)-4bqFt~eg_kqFP-QC?`2n<eecL@%`-7OH@3GNO-g9m~I0s$V${l0te
zfA6b$^{U=GMNglz&OW`n_ujqEP_>3yMM8?1odpD-ZvIyD0N^BJBLkb+0R#ldSQRXt
ztRXgJpchCDKt@K!DrM^cv2=Oq9ZVsX5|-v*3rm2I5Wp4UVrl9K@XU^VHx|d#9=9RT
zRp8gNkgqMn&TWzx_{P9zD{wn7@yA)Plp|4#V4d*hj-IdNzEpo}tMyV6HXyP<240eV
zZ0XbU?fK}FvhBAAA$#HBrIXu>+g-4+zwrL;9mB0h=i1UczqE&^w%S_%M^fYGpC|rj
zxNvA!-4BWWci@91;pg*O|GVy`rS50r;mzsZ^vxwdhWoOEU(d(p4e?kw&-YWtRXjC6
z-=&_M9B&qWtlcyYI>0noLV<YX@xd7$I37}a))d^Kw+`hHGV(Q7_qsx-{BU!GQRK|&
z?>YH?>*)waI;;sg7DUP4oQ+;G+Pt*{^TO7&ofg+}cc`8S3TC3OE;iCjX<IW2m0Wbr
zCIp_yH8<XP%y)B(6FXGF<;{`N6K@n!W1?lD+toG^<Et}Qo$~E*5rgNIs~kue^A}k@
zf_&>M-OcHJjzoZ+u@2sbQq2|sW8t35?@8fs+bhjWM+xB~grb)=&s~dZbRxJvP-5I{
z$9klDkQlk5p*cI(8u&=p7XoYE67&hkle`Ahu$XU3=nvK<h?2s`?xc8LA5$OmR7U$s
zlKJf8s8}_VW`cTbsBP*^KNUF5-63d%(&8S*mn?^6_1%uzG{;nMOG-ONXO>kr@(!P-
zv=?o53vvCbHUCg#@bJb*+X2HlSlZ%7l;AXOqWIROsYRFc^2{>plI1ah_F`85!@Se2
zFfbJFyCCwp|GtC2h$fZD)~Nr!Na3iz6}`wqZd!+9R9TyB_Tw26)n;?Jcp}m&zNc?m
zqa3Li?n>rqi&%pJpMh6-PnWL*jQ-eR)oQwqOc{sb=Mf(hlrQvMO98Kvz7m?JImEP}
zB^(^s8~FxRew;CEbNt4uYNNJI4X2$!hc2cVw@)I<L{v17HLt)#8UUnAb|DHTisV+?
ziDb(us*7HY6~#yyAPOqIo{5h49>%jiZ?<+6qLE|~B85XVb#JH4@oJ7s6D8;`V|Wt}
zn<7l&(S>W)7!3Pms+jrf0>%4~EuU(JB{|q7DHRr<ev#?PNyiZ<e^~KGwT}Rbj5de7
z0KSqQADiSshE47^JvE%h$|6`ihobA(hQv9moO#gHex(KwOMwv=RTPS&)l(3{nP_)k
zm)05`M+_XH?kaE5%3ZH8$~D4!qu`ItO4>t29~fK9)u#BkHDyZ648M6x_bM7e;g)yR
z!-5W&)mqEH%#JFBZH`5f$GTTe!&0{q;<S5I@!?+uo!%$z<6nigAkl{$nvGQ*+M8Xl
zOF7Jj3ULL+mDr_+&MIq;hpjf<RvW;M+CyU%452VRBiNMyir38A&O_szx-=LnW9A^I
zQaX%tt-vl`^=~+i@hH8p^IcTWc447g6WHJ3f8biCxx|;PWgUBxjoS-9%HcOUEyENg
zTsH*i#kz#2Ml%U%zD78a;ksosB}j>ZY~ftS$0{z6-BJvGiaC@HfpN19M4?Ea>XSmH
zniZhD{t5>ykA1(|2V1}ec}J9WT*h|cL`lQSGr!=!C4x8PJel`vYrnBVk))-Ri@43q
zU<#UiFJ)PO6fyW?&=q3H)Aj<MWKN;Gwa}+LhU(q141i*2F)HcV7xjaP(Sa6Jn-;h;
z6)HxZZQzgd)oJ{S;y^JP;~~SQ-sgLjulk?BNTFGVxb}=5gINM=zF%y{<7#0N=htE~
z2B+nnZPhQ{EOWJ{>Y4+}1dO5CYaWRww3f+M8D~!9!yq#{^L8X;I;ZRg3bOj@Dkd<m
zaPea3>VNPtRc4)6OD0rw`!r(OrTJP@wYI#=Bhmu*Y1h|$tfx}bp_PEtz3sHC_|TH&
zAI7t#-4|P%gIBdE7||d3s*FlF+_d<3#{i|wgo}>oy6)h!{c1UTvC2UdU)5a!^G~v~
z+|Q3EI-^e{RgVw$<_kFuB&ln94S3vbS&RjskzBiL2OB+m@KNj#e*V@XB-u>3ab{kc
z)FwJ?H|2t+Ti_^lTkT;EyP<;<yCJ4bn;VHsfEPX!?gSXhq(dXLGZT&z`EfbTu9jsZ
z`iqV=%g`#Lslow}139GiG+A2F#;dmCNpMl*vMogFT@y}X2L`9`DSr&2>v=`iq9s?h
zjG-uA)tf}?pbGtrF*gLh`=5Ff3Ld&*W`0;Qq=?^C1!rD{b1wgx->B^$(I{F9d$sBk
zuUzDiZ~^dfZdgmvu2fyJcWf+AM$vo9T#jXI4s8%E&YY2EB)y4aJhU=U88L1mLE**Y
z8FHvlGZ01FJr`L^u}||8bm-D`qH{_6geDuq&N88PB9f+*s;?L=^057tOL^%+RYg`4
z^mqhy>~dI?>|$st4c!B!wL%GIyXkx$y3UtIGlo@fth5BRClUmb#McycS&pV@zR|hg
z5v)EN&3<%m)NR%}V5L0%b;>30dUOz>Sz^nZcaO@ncT{qN$VNwN$ITd;!Y3V6@(!EC
zhUCmccN9>;jh(qmoj0!OT|jG*?Dtl=ul|$S(~fUo#92ewu_U{lK?SV&=2(aj#yeGP
zb^muZVbzSdoeld2=nCMXt>(jdZ|Q4uDyCd%7;8;Z1ik`kje%(=-#MI0;AKgvOGKb!
z0Ed8)QC>*L5kh-DPx0nS5Jy;c^H!hwK|E^Yp-f#dG!l0C@I{5XEbSR;S?u7PobP9{
zdpJh;%DU%r&1u9F9f1?bXT%(?AhgUATLhN);8;}=ByHk{&SktOSKc}cFw!+r+@r^^
z?*?fw<RjAXuP00adVelWSZ2Hxx7iUp7~kE6e&r7G{ry;+oYloGzxcd4OZRuJM`)Z~
z$1)3i5{WZ6mvL+}u?K?c-&*@fn@nAf3*Wk&fGw2Xc`444Yd&u4aqOvVd-f&V1hSut
zmev=HP4UVAdF0|X9BbSPO0G}&R=bGD=Ew1@{kbs~YLR!(JE4n$$8tn`9~Q><o_<!K
z=m+Uw%WSw3Uax+LULK-dpB|~=Tb&GlXLp>)?R5B4hJ3o*5e2{v`5;B-#Xmx@K7C|=
zYezz{*uEkf<|59KI=}k%b}LNEn~%H67gwFg%h0&7Syy>f*jK$7aQ;R)y|tHpzjFqV
z1mdNZp|bb3+gbq8F%-7T?}SOQcDwXTz2g&xA{lyBtHBJzNWL8<RexxpDofo)>=!UY
zP+TS=Y*@C?Y{+~BTY?M7;%3`gLt?r#{BZK4nrSQUJnfN1P&$E+6z@%C!_*_c5JL4r
ztiv|Kd(M6lttSW*>~vual4CyD0bxSfq(^c035v7+)mUkV04}V}dt<5cR8)%?L=?_%
zg@-)1e*EKXWY?hx8lBa=4lt&rT&MH94X+5YsJd7E$xeTri)I|12zP~dyP7nGqiS<k
zOjKU+%pM03ou21s8PYQhCFQLlv#*^<J@hILw~3Z4;E>Ejju{-lC+>L{Nfk9sFLcL@
z>bw(XZlBiB_~hdzk;TK|0M&U75BW*rfuNTq@P0CV`;mj(0p&qP8B}aM^A`8b@yHG-
zqFO3ZMf^vd^XLYoVb%2KBfp+b=ojL>>W*`FYNRGXU2rVb;E$^`%e{M>P0k<3QRVj;
zWzBN<?P|5Ycu*SYuwzrrkJll)tvx_m(T(_B*qkFCr*4R%aNL?r3{M<~&G6f>BcT0k
zA<3|8_t(o^o_FOW!$%R<JS^nt^?{)wd~+e$9ZmV0FJB}?lpiK4zUa7RvDvkce@_9~
zkMiErx8~=ZMfY3w7g?|R>X8uHcZ+qzD#Qw-cQ;{u;ZvSIs;HfCU^7J@QAi>;OtW7r
zS?keoy{d!mRYakD2eHU`hWQ~9E^ZP9vEiK!jJ3|3qMXjRTsz&On>;hd3X}+=fdxyM
ztmFn6IGQQg^{Ko;C^2Nn)x9)g0At7m*`y?l@J&E@d}^|?!Zl<es>EQ)^+5b)`kNug
zoyED?^mLFdUTMl3@%R5ghJE$T)8v)hbLy_uv?H2%mu41k>ZeiK{>0Y2%!~Ym1Dw<1
zp#YDn)SxA&AAI8mlnMqa4ZL>od~#g8@bxIdE$y%sU)!8Sb7DWuXn18!NiIh9Y5s~d
zsN~S#$_IF6oZ{T8dAo13vXb1s#@|yLHpx9+MC!$1XbepZX(O<jGL<*=t^-98?JS<o
zVV$e!FJn8na)buec_jcyK_pHj`gt+)?%G50A}FLY<kQX1@U}*%hwg|>*GDz<&|Jge
zYnHon<xD-*ni4~X`6gLJwrv`7$T^hc8=4Z+68Ea2rO|oqA#;N?%DK_w=rl|z#WQ^!
zEcG{%JN|icM2@rUcT3?x%M808%i!J`JEm;w*c?JJ*vzEZumpab_d<=`Ly)&J+35*X
z5ulFfcR@(Bg`yf@j4L|hg{gE8p(S}c{B*;fVJr=v?{=?P_$uEd9Uzw`e0cB$%?aXl
zsGIhxk?QC{vN6OiKRUR>mqLa=_&w9*R)^tFgj2>MHN4tDe%c@CVylj9Qv`iZyBN~w
zM7<8PuOB>8-H2*D<?+so;=_AjH!gZm*{YR-2llc%3fmOk-%1<2#t<d|lT>Mq^#<i7
zGn`bFQ#zP?O@+9iD}4hBCzix095uLkOO!x(`&kyx(VVZ!ghdxLm_*(rX+~9?#6p&~
zxml8wn=^h!l{?W`OOwerT280NU4yCb&nRkT9%?vR^Ah>msCMq%!@Ll2PdM#r&#V=R
zVmY$WZK}kVj`j4#jzI0-77gZt-*f35A(-_O_#tw31(k9UMs8pe;K2>KCrSr<4|+xw
zRqKC|vXo@1V~`iMZ*6%0sDIRGArgfi_J$U|$RURO>bhE6h4_N*%R<p>T=CO+-ks{z
zvbz~6;g-5sv{RJkV&Wjjc@0t-5it>XD%jLHanZT=Dc2kn6sG+rQ59}z&s*z-yyrN_
zF6LDQOCNoxVBy&u>6>a=hPe=$)5>LZxB0J&nYxF=0N@zqy5=~S6x;y#hh9?*BA8>G
zc?N8d7Ppr}FnBTmSsKxR2dj?Pk(6j%V}nLNsewrR9-|j(sNVDamspwQ-arX&;b}%w
ziLHp3Pmk)lO%e^2upiiOti=5&LLR!l0ALUmiMrup?_z!8-EwB7mlRrs``)py8`WwE
z?x4#{K*l;|W1q@MHt7W{3$F<gEPX5Oy8C|u6l1FumSS8aEbD3>75hg`!(m7WAO{)2
z0Mqwn76@1pZ@JGJY~Fixc7i^=_PVt9*zGmf>m}O0yC=Em=QHIcJ)SdrC0bx>eIr16
zyr(#abdnVp{peWhpS8B5qTnyLROa#q)olp#ZabkPM6}rq>e%zm;rIUhGB`&bo#p_@
zq7&(e4~sBU68n7rWEfW_vi<&w$^j|fR2bK6C8n7>ArS5Lg{igpyOsKniY%!xV>cgp
zy5rQM2AgKZLxL=8{PTD|;CaIN=8MEkP=Z?5mR|E!*Y`a#<n82o9m?_ntHV6JTEA-(
zmeGx?DV-p>Vs%U=Rt^gQoA5CXwdw1Y;|$5_LI`Nk$&t8H&55pf0UBFvLix5U8c=-M
z2DWo;hyI2cG1hUB=sG9kY+}*PygI7#Z2Bnm_>AHo03A3Mt%N>VR9d-Q*{(xgs=Tzi
zoPg9p_3u400EdBs!9YrMvAjVf*S>bB(i7EO3Moth)42l}Q<6Y!#LL<aLWd81F>X!0
zC_a4gHZJr!PW7*h4qjF0P+SqKCBa0%mGoc-T1(*6KMW{+?ZDh64Gi|FA*Mz4q}5Z3
zM&s5~3L`3ocl(H}Zos`}&&Hm95W`kok6gl0JTF3%lkYrrbo=VP*JSj3p|6mi3NS2}
z8OpmOBya8&&E7@g4hyUo)Mes!`l?QS8G=A%Z(r0I!bJ+Vtw7CsgY52JBgXQIwh>(A
z5)vXbCHz+vU@;x`^kBbAT`<K?hG&&VJEsafrK=37Im2O*C#+?#uD<;B+zne4{gt<Q
z?B{%I*ts&N*H&frC*<_Q`d5mTu=z}rw%fu2?=E%M5|aUolquP_x6b0~7oW^vZ_V$<
z%;U0TF=F**?Wm8jP_O_}E=*f;fgKEDaZMsEaQub$T&Vkv9%l?AKGk?{jdrGIzkbT_
zNI1kGfD1fLJPN8A`1sYr*=fG!9{Rm1vB&6*1gckfXeddE#J<A9hHUgj^PXsbjDRye
z`P+PI9a2YI(QI5@*3CUKIPM@rf(JIy0fv>2uHAhq^u_GRt*)Mznoh$SVdGyos)jXS
zFc^KVRDYbN5&)dI*I{dPpI|?|DhOyyP98Tc)eLyp@H3!hHMU>%O6kczNGpPOUUoI=
zp=3N#E=XX_*3G|NeVmpv_Dc_SFrPE8M4rkVs?li9uwFj0;r6*OC$X?~swvI*r3RI6
z-_@qZy{UQ|O~%md`KvUcY)9{N;<Yw2FDAZ<xpnUM>%|lI4My}9P0de>JKB>a&D`Ir
zZ$M{=rp~-VcL_7KjHlz?J5Hg36K+bKxo8W<$_)))s9a7f9(5DQ4!ph5-!`9_Drd0N
zmJU9MxxLn~iVN~<G(wTy4p?8aoLI2Ss^b82m5QS=zFYi$n9?ww6d-?YGUD<T&GYAo
zZqUc20uiQ26i_-rct((QdZ$C~xKyB(ruh|*Ky~w0Fr1Mi*JA@w+K+ejF2ua>b8~pT
zv#gbO7CKL{0q`8*=tv4Qdq{A61oS+x@flg&?-CTEfs2#!_rw`&!wW_hyBsj7@}>$;
zSiEDrG$__lLqc~Ymw8@Y=dwJ%5)|@=zN^2&YgcysAdDWzms=xD&-2Ja_!*eW``jVi
z9^?Mmd7&-wEh486)FqF8IIBrxC>JlZyn&otx>ApZbN+2&U+~ON-lToEIc%$yFx-up
zeEZW*B52@zM&&_cb4H0B>uB4!#j7&iTJg8)$3Ulw6QyQ%e_IlQOoA=AqEQ9BnCXRa
zs_c?HQ?^vwFf_!o1k^SS6Qr`+!=m}&Bael;i85ACUB!VY!ZHY2K~JX~qG&!0$<d7#
z$}0$^;VVEkzu}O%ISKKRGl$tzB!`Cp0JDP!`vIyhvB|qg52W!av&Gm$Bl`SDk85@J
z2AIj^&OjUB;@XLcqSwHBl3_fw|AU#5wx=eDmrK=_l0Dhvna$<#R~$U6SaFh4EKbYn
zbh0b157&^r*SupHmxd7M^L99>9tAmCw{fmlBLJE&Xu)hd(W(hMwwhlpcnBX0U5IQ-
zpt?s{P#xNhDKFP;6{$a99=g~&^hc|h;LyW~YZ&lojgwN@w^+oDGW=cEdWs>HN-V`}
z=ZvX&R8!8;PGlbBDx_NRJnD^Osa*-J+LF8p`~GT1?Py-9FE1fjNV*0OYscCCm}@G`
zPBvqS#ECz{?Gka3UtilnNm+@L=YStzKE@VXxkfUy@Zh(c-$KA)z#EPZLZpDA#o4-q
z`|f<exRp_3B^#*s0i6Y=(s;7Zne#Q2$41jU=4@8oSIemMUNJhJ#~`gRGkuIVUL9Wo
zuDUN5AU%w0&<pA;TS-yKhcRouh!wgY>M8Zp88<S7n{HR^omBdDFg=I1%Q#8VUK=*s
z_x-s0j`VAkmU2zm%@YPeXRtrJdWcs19I&=7HF^>-N!m5WM}Y&A@UAl=kV?hT0VlHJ
zjyVD0q$mEJ6sBw-13Lgc+~rMhdArQlwd7mrp_hO4Iem+2BvFO3=KHcf1v4TW5ljL<
z=Q4V+Ilwg{6$-_WM%GR|C3+XH8JdI><RG@Z4{xYi`83qI#XSidY91Ra*a*cx<_Z~5
zuhW1Ex-g(g*xYDPOkdFDnTnVfQ^%9dahHxA<N1aku9Or~>4<N}gUP_`H$<3!mFeWk
zD@lW<E1F~_ghrbXN<@8BKY2lmfU1DkDTF<}C|kWZW}r{|4F)!++zRIodVXO`g?LUx
zV^+sMP`m9|Q}0GZGqD4+M=R~aK5w4^nqv+0p>u@}F=_lY+bQ3U0$MGj-)^*cBa(5P
z9<sjjwZnYLvfGdRyRVXdq^Dav7CHi_7!hS0{7cCpj^1N|*!D4zyM+AL_Qpnije<8n
zAFzFZ-cR=iRmKsHY5Ml*17NRjOy=U9<m73a_<n6TiI0y3rFK%9-CPqKt0H}8Mu=SA
zOZQxv_{5Bv&U7N;>(zKNMA~7YIh0fW<TL6g#@=W~Q5K|Ik)aSxmu-ba3oxaKbyhO9
zZBVCeznUX<*3lGq9lGZ&Ai~SBE)GhYSriq6h2wb7sykRxta*|?Ta%o|%N^ZqV6%_7
zmKM3EuD?;{c7w)M78!OD9=tDUfAH9oR0U>b3yf1t^&lG#LcQ)Y4_CH5!TTg(n^HLM
zk%x3zH-r9(we%syW{lIOrM3$dseJASW%@Olwy84r<E{h^x&AJyi&C_8?#Bu<){Tpj
z54Z*&gV{GsKc_9{h3Uley)syeqMk&*JQsr5=u7`N^a_y*0dBpm@4l3!E}sR@23diN
z$fFAUnqE%Q`4KIKh@FgyPS>bewKgWLXP9?e9$<k|*tYN#37<Ao4~Xc*Jd|VIj50Qy
ziKg)Ju132sTei6Ji|LWBkUW-=j&4lk_Z=@^B0<8N-vO_{r}BI-M}KuhTiDj`^D|7M
zsuOc)J$(E|rp|ydF_sa%nU;yCfaQoYP>JN?i^`$g`R>S5)K_+mOm5sNZEj@tHYX*u
zjs)EF+Ea7WL{cm>0=zO?^B6?({937~PELojvGkqLxwtY-IiW=7p$j=UoyHS>9Z?U@
zs*xMT(R>$%N+V?0wHzMOR_EAbdyAqX)vk0Yw~5NuPeSSH?%OivI#k(9QK>&gM)7)j
z5l=)TEPs_s6z(<aU7Q*rR_n$M;A2J2-%(5x6C|<{2V-$j<fDHVeEV_7;bN(M=!yDg
z6;)PIk=rA)dl*7zsH$vSH?x=1KoO&eOw3m?Yy)(7)~S&}O<u8N+7;^~d?BE6<hq&w
z@0hX%r>2gyHpx{6m8fXd^T1a?fVykw$cgiP*q({m!{Iy*32-Mrws>pEpPGM}xlfO!
zRlz7SeH7+?bj0T;@f)XD(!c>ZA8H%*dEwZC&!NeP9Y@S9l_7C1&j;K<Os5>kZi;YH
ze5qJvU#7uwQ^IpnrnQDPT@s(n-pY;vIAW7q1f?=TZVHHMnnD``TyRh@)F@qG&R!!I
zPd-d$Edm4yQi^x6a&&U_%#<8YiaSU1ZG+z3Xbcq|`-;5Af8Ha=S9TK{$5O&~oR9m_
za+zK*=`rI(=tBO5fBGv3N&bvPc9JQ<_$*mgt%k-#bAnK6g30j7S5~izNGkSF3%c&C
z2MQiShw5YPE#0_HwUmnBn@jYKCTGnOes=}tkim*4*)l}I7*riVMNQN-K<QvGV!0oa
z=QZg)k7BVP>dvX7I-cpSnL1;UifwNSR2EFLLL*U&s1-~QWT|o;>EUUQE$^rChdKZR
z=Si1J?i`L1^&peXXXH>l#N2z9FXOmG;HTEsy0N)#6j@<E(|r%+!K%8~Xv`B~wKFD)
zq%)&Y$mDHJqaLBqw2kWUT$Uf-F2z+BZxdW#R1R6&snsC6-Gnu)!7nbf5SozEJWFxS
z{24M3?TlQO`MxMt<0`?Dq>nnGs;N?z+=CtD#TL7Nxdic_m>0;X{Gs8~BAi!k&Y6~n
zYjTEJ=wOlQ)K5QSk|It04VwIX`!i2ynKJ|419rZhd7&K)l7@_ds+Tajs$G7h+AmQ;
z9ZcEcsQe4qT4b<nnmc_x`70Ln=1EUJ8iiGbtk>xI{o?^mZ#5A@F{pCZJt!VRnXQ|h
zipkf;VH@An=Ou5t7%}UQ;pXP%_0p*y)ui0&Zmec9cEd;YF#UL&G1NfLY+X@K@aT<X
zFArLMjp=wRhhN$K{Vh&e?<=RhoJsZO<A`2{oSp79Z!tJwx3gOFoL1FJt>g3B9<z-q
zEN-8LDjizpgc3VZHv2S#YXi=5I6HHSOQXIug!-DcU)0$>5LsI8C~Q1mk7xqw2^`II
zz1k2-aVA2p;^k<|>Il>yfcDW!#t~`&75Z1bt?en(NYwKsTn!s_U$|U|F9j9AyYQ;y
z7dGp+pqm?gwF7cD3?5r%&w86omN#1&_{NYLwdfkjXYSBvGvqM(Fq9Q)n*sd*Z~z0y
z<WoWlB7wi{bIo~?DqbO9hg_UYLKdRz$+F0LfP6_maiRlX2Eu&Xj1QC+_xK&qL~eTf
z4QB(A(V5gN%exw^14cQV*SE-I>((e}AZ^%4p>XoXLn4EF#X?yOJPQn03Kr)8D=h<y
z=ndx{jF@yGsu=UdL%7MuL;+)$rp7vLnCmB~!_RCqJn7k!B1o~_7!6o@I1T>P&I6-*
z)Sfva^b=4(<D|ew;cD>#OtljBbMoOZ-$)sf11%iaT`$^9m5qw9H3?X0CMHI=X~m;*
zlf{940j}A$xO6BT?(kxVt9R_2a=3mbNhAl#=x#mHd~&eLd5@g;JG>v<tV)x5F28gM
zVcfc#ER$q%&kO_&Zf8tS^ElO#R18=w-GV_JL&Zn<Y5rA6>Y7XP4F(e!G{)97nhO*p
z%qVOe#m)GkSdR?kY<bb-&qC`+L7yeJZql<T8LE58uxeTH2~8XexuxoZc7lMdO}Xl?
z1xi@gr#ZZycEo}P@jvM7eYy@=R$yrCUB>tXd1FDR!5f{gbgv24;8pbci96hK`MR$Y
z_Giqjgdf_>(@CqcLDg<}Z9s3WiL3_q{#x$c2Y0%Ar5?Rs$im+{PK8G>!y#_k>~79@
z>t`Jo%(Xp_*kMQfD^t~4N2SZ{ntUlZhjkP`{Y>fF-at4>Wo%KAh2e?_r7M+Ca;o`7
zsPc_E)vFE0m^wCCDIIv$L{~A9b=5{ih&JK4KYa3qA1}<m3#(fZsaX~+OE!@6(ajuI
z`^86<KWOni2H&7NaZZfl0KIbr8&5o#7Ze;qxgR=Rr-`_U(ONUVoWPT291k<0-^(_g
zg-BaMuBg<DE}}KeHt0qZZoa54N)sAHIjjMOt|EF9M6l)CzZ6!)w;ZQ8jjv<D%1>!0
zH+SQV_tpXPy_&w=c<U}!$#+IP4U4M_6P<nL<AtNa0ZH>>yHiogLGz%m!RX0Mf&kc^
zj{727a#Q?v<?NZhHa!{V7G$|Ros@VBi=(+<Yix8@RKqTmNXNRQe5Z2a!Q~T2a^39G
zJ*gm_hSkbuVX{kE7L!amDdnVJH<NqPCt4^&WT7Y(Lxy}=^Iv}fy2=P|@xBpQ+Bh^I
z;R}xijh5;A;C(AJ(G~m>e{2GVo1_Y4yfoI!&QYnn?283A(54opit@z<LvRxb){S})
z^~!EFIbfqLIvz%xO=Y9Qo>zX!RHJs!q5-BqJO%UJd?hIrz)XRTqqKmL4a4$$1uNy(
z$KKqD6VWYA83l#MQ8n{bX}rmf5)`q$b5Twa#I_iqlav<d)&csgu=JYOl&^+R&<?Ce
zhz;ji$15gL2yNN-nt1#RE?gcgrBr+f!MEM_^C=g0qj2G+Zd$msZTQ%;+sssxWBYaP
z;HjXi$vV3?ny_RDMEH(8j{uhYx6qkLbn&sStL3w0SH5*QQ}l<a@raw=gFPp)9bo72
zn#-Xh)=ID8)!H)(DxQOVt;L|BXpC^d8uHj)K9=3i533{j7^kx#5`$?Ha(WlBK}$GB
z?{BtcM89LbUsE_uVOycafkfRakf?@r(wJ483abStl`*%?*cdVy=ET)<H0FrfBqfu+
z?-eeGR)|nnjEK8x9wK7x74m;aNWR806Rd^U^Cjsgw;GEof?15Bp#$8PH{m!3UA1&J
zJ7vP(o5H5H-|`O?KM;E@9GaSm24nqFMnCARHBif7ibGZBxtSp%cEGcW2~iNI+!q0=
z?Kmev1ih+wl)NNG1Jml=h`B}HF~xn5lU;!W3+n*QS|b3tXr6JLp2gJ@g1zcKM4^Fm
zq?H#Zhoyc++0JWEQfHVIU!pS<oIs!9u>!Cc2KGd$Sl{Z0m75y8rXZJ5cDG_(Y)ODS
zl3&c-8rjH|Pq&39me!29rl33xH%f8~hp;(Wo9VdUMB+-c<ctT^z~>103MnQ}eztBz
zZ`-(R9Fl@wg)N7g<8xC_rdX*ESoY4e+RXlt_xTig*pn+waYGqt;hZ>~z@T4kBmJG=
z^ok0j_;O{glLY-UGf0KO`ymlI&WdnU$RyBDL#9<EtbLdJZZ@cY|BDuWn}pSl4Piau
zzUmxZXAJ5t%Eyo1^lW7xI}9m6{kd|+jHN3?_p$7c`ZC?3B^mN%UB6P-VkO>_<=<9a
z%&Kna`^Fj!1>Z})4P~!ZrlHEklU6Ywl75A0|G~<AGeA;UN&~BmMD?x9m|Gi-<tO5p
zya5d&D$^he>CzLoZ_ZYlE+$YnGhfIQJ2aq1KpUfaHWOfM?n*u2(Q+PbHD8LN;<u0G
z6r(5fINw?JO8BSh&}3uZz$t<smv<Q*BF2Cli9f#dR0qH<wk0D*{O|yZtv;ro#1f(B
zAva2UgAiK8XtCO)K89x7lE;1YJzh0ZG+a1jWID&<00l!I(~D|)sKU6l{A!3UTl@I_
zJIe5p5`lry(Q@dWDx><8QW>E62Rd1SywJAVmCGg5w)m}so5nT-1%(<8q@iVveoji}
zR)9-f!U-3@rfFxLB;TWt95Xqd!s3^~`Uc?R7Ag&t>4279_o99M17X%-`&Y?j&xgXJ
zjfv>~uu2|LJ%djoE>=fd3%=}X=we<?We(246vY1VO&)?eeY|ZY*%Z5hPC4~L2FH|j
zx-3R2WTSlQb`@FMj)`^e&rfJPy24#bGlJE2#K4ynmY4Ng`xYWb?o212Z$KSP1ZwmG
za^6?%X$(1U<>yIWb;NS_`43cPbD2rFr(Aqz{0?oQ7)c}OfIa9MhPw6W#jQXkQgt03
zt-yA5a+lX;tUhl6qw*d+y%3d31+>L230S{YTKI6CK-LJrHj=t@+T|{+cD*~(={SDX
z;`FU^%hQZkhQ;t4t85H#)mh$kSipcVTn47zI2)P+Q%edtPdCyS#}rA>Sx+=NZHw7v
zZfz5P>J&9E(I_~gOrphNg<3G*s4(hZgkPwgOXjE`;u%r^=@QPWv*)6d<~3*YlrI;n
z0@X&WDthVyQ5|!6<V{vlP_ZZ_5$~B>D>CzbTBQ`k!^E8V3J5NEAW7}~Dua$mk!yWZ
zd*3rER}N*R&5jmvq1M{mUM#5UuY9r2z9I|bOFcG}lp2Ei(SlN|z)SZjV)$BsR&qh8
zPb*`5AO>b^50eWPv$RS2m8*Ir8q$cO9`;^yJva7k5kAe$L{%@$mQg9L?s1c+t!5=q
zO7kY2NssFF@C6;dw{DwIc4!Zp8BR;8H`8(1aN$S=+98*6eMY+E=c%Qh7`Z99=5E2D
zh>VcR{ZOJ6ul#|$iqgh0I9CQ+CvQTZ=5Q>y<pY<HX_-0j8E_YpHTR1I4Ns!=fW&^I
zLDaF}us%UiIKCjXy+Pz**1T96g*nM+mego%8mrd?vt1Y%?6#@zdx9`#FbsOb^xAE;
zbMS(W4W0-sjiSwSx{_b}Rz>81=Z&J39xU>DYTmNOIPC@~IL`EIorcQ88s%QuS(n6}
zZ<Jvh>hp4`<iI#jYvXn*R+6E?g;#hLX=vIh2Ikl07&YcZ0&eegD$yaut9Zjh`E}JW
z#%lz;_H}9vrxsAWbl?P!Tm>PUWx1P=EuLbIZ76RFdejtS(fS+YT@(reu67Q&L!fvC
zI>Uq*yy-l|bHW$jZs&pR25BpB3IhvqDIJS)yMrdxb?cLo0w3FTlcG?gziYE-GEBph
z$uZSqx#MedVa<aXBM;^={o2wM>(DRKHS@Gu@`G|V_{67XA?0cWb;PN1B!tT4H~lu_
z56EpgR%wTZ2mIVK?F@_1;@pMLXCGv%_j5=a4RpSzD8#oU#qio*b!2WLQ&6se7y5J;
zKFa9u3Z<>r?8*EbBZgfqY)I|Uq<<CMW(#FQ-g%1qzDOoj1n;q_nfq5=UTZbucQxQD
zs(V$iM5G>1u(1X5ShE)h#m&v#maw-1V$-G8c2m%^{8e{nSqmw0de%(qt4d{UQ6a09
z<(BPoxigs%A!iQEHv{&QG`vHsgJ3w#dFt%VkzW)QBd5%Rw`ZNcuS#sOjowDxdoOT%
z3h1cm<C{jG>r)&|l~il{N|E%mMS6yoiS&g|O|H|BDQjC1cx$jK^kV|dw_)B4<X8xN
z7CCPeC3D4okFsGopeA{6Hi)^HSrRit>mXLDS90#Te&4flgMA6)<xEmi)?fZOBReY@
z%>IZU$C>NSR-#L*WatkPqRLVbxU_uAu!#A5ne73TvIwn7{Gzv0%~w@`6@j~b%Ma92
zrvk`i7PP!+AA?S&lv?kT$Nwa&{x}?5*>h9D6^|obl@2d?IFjH(1W_*~`ccV1PC)BA
z!vtCwxHgHLnJbDI3ihzPoELeGNv)VMi5uB4#==}yJ|^83?IV(VN~ERbjZGIL19gp;
zq?g<Jq7%x4G)aQnC<7Fg-|s!e5_p_fthKC`>G~AoG9(w0b*IFyGsm05cAI5W-nqG{
zmmi(3P6VOx2)vV#H%3_icT0)Nj+fon6vMmEeXKM)n(RD`{7L);CDT}Do8sc_h)7CA
z%n86%)*t?dvJNv#DzWK%%_og?BF7YAD%`l0AfC90BnC`<Q=(_*Lz+6XcIZZQ=1M=^
zYri=lbN{8yXYdw@)A(xobHKu{32O5J>~%A)hLA+&q7zg1G`YpC9IGMC_HZr0-fg-w
zswj}<USoljbUD_NmA>MY7iHW&y!89DIQd^)+1=OHe$Tu9cS*_ekvez6?T4hd&(B})
zSfAbSJ{Uhg(!Hox-#tE7=FvYt+%vx(z0*qLjKqt(Z5DCa*a0s3-{0&1im6*Ny1XK`
zss0)8wPpD`5%=ds{X2CPXG-?TL}|4~J?J>vpV2K(13k5-o*wZYotK=#C4mC=nU9HA
z-;;!usVdxPCLiwIj}TW4UxzE_3*>BBFq;81?jXN1vM81xd^@f;KP3H%x{u^%!OG~J
zjG9tcRR1BAliV$-@iRGRwxhpZYXRro<<hp~0$GdKpz>JHzG=l#pR0=H1}Rs@U9(X(
z|LIeW*GlbGSLgZWhsbsLE^zM17D42)Kt#O1LXo=fodA4L#>ML5p6X8w<c#@LKp}Z;
zb;VBF{N-!w(slXgpM-E7WbKX$N3wnrQbk|=zFi3dpSnq&(Hx(I-2j$O7JuoCU(S9j
zlz+>TfA=rKWOjBA5chA3-`5=fR51V1EK7Pqq}3rWdSx;Jf#1N(!D`C(azVx_E=tBu
z#%gZ*^7*1-e!20FPgWgeGdoLj$R8U|E;6p)`e!m$Pi`{qKW8siFMoe|1OC$+8LNhu
zvn3g;imA0F88sREAMYxrE|yM^7YBb!s1+?OY)!?$o@Dwj5E~gg2*kq4#svhCv2$>+
z@Uj6p*+685e;ySFJ3(HabtU^P$^M7_TG|Ed=KOE#_=m$k=-*3If0>4hsgtYoZ^!0d
zf5Wn55EnPgzpun!tRyVmZOtv!q{V)Ne=S_i(iQCHVs7b5_B)7b|F+EEG58(af0Kpt
z{&QOYJ6ZVeF#ltEe?{}R8&-8UGsvGHNr7D){~%)j9t*CQC~*F@{C~}b=kK}vQ)g}&
zuLyu)M-IK^9c=FzXv%rogALj;gV3`S1yPKfId9*5<N1alNB?zf%tw|MbNrxgaG#=8
zP<FW-38S{)boAZy5wdA-N{mT7;{NVoDs6mGSRoIDyzV{%qk5V@PKaDjDRX+I>kzfm
zMa7W4AN~glVtv`uiD8-4!4QLkZ<!KhBq$x!3Y_@n2C=uz(tMuUC%Ao{B4(c!0nb?S
zuK#zf{*Kk(waUf*ckTRM*gt~$ClvpemHMw5{9Ts+ksS7aRowsQ?6ChX<3FYH&-6I{
zkMtDflzc;k?HxQ+GmwFkOds6pJJEM536epiHun26CK0r>R$b4B$l`(dV?}?UP>T!X
z1mO|Wai+m>U>DuQ+BjgOv?b-ODB##7rjX*C^@$@FHQZrB%{j3T50~4btN#i&@-{&t
zHr;yc6SK~&Za!y1C}>&0NKh6B2nPxceP0_12THq2+T;h+_&cqc=LLQM?+Q%(cZ&W*
z?4LOT{hgyf%lm(kq~8hoJ4yc{3fDiGm$$WWeIbP3T%z{R-S76BH~wu4vj02PsJMVF
z+{|Boi$P~@YU=_fV`pLKX5l2Gvw=XIUHMpFNbLo+b}@Cfu{C#P0lQe!)Bo`#{_;x=
z2-t;8KuS_dij9qvmyL}}@}-Hfv2k;}^uHVX%Q@Ri4`O2jzVsX~*T5H>ms|hnU%=lu
zkmtqbrEzoq)n2gQV{nMR+<O`OcjNj$jsM&CiyimNfLt$5fxLfsnDx&zZNW|wzj>QZ
zf)B_BWM>1t81aDEK|G9XKw367`hSnjzexXY+7UN}m^y&1|E&5qCA<Db!vESfo-USF
zfENnp1iZ}ruY-&W1mXaZS&{t%V+V3^zU<Q92bt5~*vkxF-2Df}$^D<$ivjz~JL*5}
zIA6HnKQJEl7mEL%7!U}2+13Any}T^`8{_y-zwAH|_kY`gKriL<ALDVe|M#=Jy#MAG
z;$mv+VCnLQ_0?^?Eq}-AH;t=-!7sJ>^6+2pC0Qq{mlFI_zQ2>=3Ndwo{E03*2OEeD
LKus;FECu*qjv_R_

literal 0
HcmV?d00001

diff --git a/main.tex b/main.tex
new file mode 100644
index 0000000..68c0f5b
--- /dev/null
+++ b/main.tex
@@ -0,0 +1,1765 @@
+\documentclass[10pt]{homework}
+
+\usepackage[utf8]{inputenc}
+
+\usepackage{amsmath}
+\usepackage{amssymb}
+
+\usepackage[english]{babel}
+
+\usepackage{blindtext}
+\usepackage{minted}
+\usepackage{braket}
+
+\usepackage{longtable}
+
+\usepackage{parskip}
+
+\usepackage[open,openlevel=1]{bookmark}
+\bookmarksetup{}
+
+\usepackage{fix-unnumbered-sections}
+
+
+% \usepackage{scrextend}
+% \deffootnote{1.5em}{0em}{\textsuperscript\thefootnotemark\,}
+% \setlength{\footnotesep}{11pt}
+
+% \hypersetup{
+%     colorlinks=true,
+%     urlcolor=blue,
+% }
+
+\newcommand{\hwauthor}{JAPM}
+\newcommand{\hwauthorextra}{}
+\newcommand{\hwtitle}{Notes}
+\newcommand{\hwshorttitle}{Notes}
+
+% CHANGE THESE ONLY ONCE PER CLASS
+\newcommand{\hwclass}{Information Theory}
+\newcommand{\hwclassshort}{Information Theory}
+
+\newcommand{\mysep}{\vspace{0.5em} \hrule \vspace{.1in}}
+\usepackage{stackrel}
+
+\DeclareMathOperator*{\mysimbig}{\scalebox{2.75}{\raisebox{-0.35ex}{$\sim$}}}
+\newcommand{\simover}[1]{
+    \overset
+        {\resizebox{0.27in}{!}{#1}}
+        {\resizebox{0.30in}{!}{$\mysimbig$}}
+}
+
+\newcommand{\pluseq}{\mathrel{+}=}
+\newcommand{\lelem}[1]{% elementary operations (left alignment)
+  \begin{subarray}{l}#1\end{subarray}%
+}
+
+
+\usepackage{bbm}
+
+\usepackage{nicematrix}
+\usepackage{tikz}
+\usetikzlibrary{fit,shapes.geometric}
+\tikzset{highlight/.style={rectangle, draw=black!40, semithick, inner sep=2pt}}
+
+\newcommand{\tikzHlcol}[2]{
+    \tikz \node [highlight, fit=(1-#1) (#2-#1)] {} ;
+}
+\newcommand{\tikzHlrow}[2]{
+    \tikz \node [highlight, fit=(#1-1) (#1-#2), inner sep=1pt] {} ;
+}
+
+\usepackage{array}   % for \newcolumntype macro
+\newcolumntype{M}[1]{>{$\displaystyle\quad}p{#1}<{$}}
+
+\usepackage{mathtools}
+
+\usepackage{bm}
+
+% https://tex.stackexchange.com/questions/343494/
+\usepackage{xpatch}
+\makeatletter
+\AtBeginEnvironment{minted}{\dontdofcolorbox}
+\def\dontdofcolorbox{\renewcommand\fcolorbox[4][]{##4}}
+\xpatchcmd{\inputminted}{\minted@fvset}{\minted@fvset\dontdofcolorbox}{}{}
+\xpatchcmd{\mintinline}{\minted@fvset}{\minted@fvset\dontdofcolorbox}{}{} % see https://tex.stackexchange.com/a/401250/
+\makeatother
+
+\newcommand{\indep}{\perp \!\!\! \perp}
+
+\newenvironment{mytable}
+    { % begin
+        \bgroup
+        \centering
+        \def\arraystretch{2.3}%  1 is the default, change whatever you need
+        \begin{longtable}{wr{0.34\textwidth}M{0.66\textwidth}}
+    }
+    { % end
+        \end{longtable}
+        \egroup
+    }
+
+\newenvironment{mytextcol}
+    { % begin
+        \begin{minipage}[t]{0.6\textwidth}
+    }
+    { % end
+        \end{minipage}
+    }
+
+% Use for publications...
+%\newcommand{\entr}{\mathcal{H}}
+
+\usepackage[overlay,absolute]{textpos}
+\newcommand\PlaceText[3]{%
+\begin{textblock*}{10in}(#1,#2)  %% change width of box from 10in as you wish
+#3
+\end{textblock*}
+}%
+
+\begin{document}
+
+\PlaceText{0.88\textwidth}{\voffset+1.6cm}{Version: \texttt{\detokenize{2023_06_03_0}}}
+\mysetupheader
+% TODO: fix this...
+% \vspace{1cm}\phantom{M}
+%%%%
+The author of this document is the sole responsible for all the typos/mistakes found in it and would be grateful to receive corrections and constructive feedback in general.
+\\[-20pt]
+\section*{References}
+\begin{itemize}
+    \item
+    Stefan Höst, \emph{Information and Communication Theory}, 1st ed., ISBN: 978-1-119-43378-1, Lund University.
+
+% TODO: add McEliece book !!
+
+    \item Raymond W.\@ Yeung, \emph{Information Theory and Network Coding}, 1st ed., ISBN: 978-0-387-79234-7, CUHK.
+1
+    \item
+    Akshay Krishnamurthy, Aarti Singh, \emph{10-704 Lecture Notes}, Winter 2016-2017, CMU.
+    \\
+    \url{https://www.cs.cmu.edu/~aarti/Class/10704_Spring15/lecs.html}
+
+    \item
+    Eirik Rosnes, \emph{INF 144}, Spring 2017, UiB.
+    \\
+    \url{https://folk.uib.no/st03333/INF144_2017/Lectures/}
+
+% TODO: http://isl.stanford.edu/~abbas/ee376b/lect02.pdf
+    \item
+    David Tse, \emph{EE376A Lecture Notes}, Winter 2016-2017, Stanford University.
+    \\
+    \url{https://tselab.stanford.edu/mirror/ee376a_winter1617/lectures.html}
+
+    \item
+    Stefan M. Moser, \emph{IT Lecture Notes}, version 6.9. ETHZ.
+    \\
+    \url{https://moser-isi.ethz.ch/docs/it_script_v69.pdf}
+
+    % https://www.icg.isy.liu.se/courses/infotheory/
+
+% Robert Gallager
+% https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-450-principles-of-digital-communications-i-fall-2006/lecture-notes/
+
+% https://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-451-principles-of-digital-communication-ii-spring-2005/readings-and-lecture-notes/
+
+    \item
+    Yury Polyanskiy, Yihong Wu, \emph{Lecture notes on Information Theory}, v.\@\! 2019-May-15, MIT LIDS.
+    \\
+    Too advanced! (but really good), requires measure theory notions.\\
+    \url{http://people.lids.mit.edu/yp/homepage/data/itbook-export.pdf}
+\end{itemize}
+
+\section*{Probability}
+
+\subsection*{Basic definitions}
+
+\begin{mytable}
+
+Conditional probability: &
+P(A|B) = \frac{P(A \cap B)}{P(B)}
+\,;\quad
+p_{X|Y}(x|y) = \frac{p_{X,Y}(x,y)}{p_Y(y)}
+\\[-8pt]&
+\text{given }P(B)\neq 0,\, p_Y(y)\neq 0
+\\
+Independence ($X \indep Y$): 
+& 
+p_{XY}(x,y) = p_{X}(x)\cdot p_{Y}(y)
+\iff
+p_{X|Y}(x|y) = p_{X}(x),\, \forall y\, (\text{with } p(y)\neq 0)
+\\
+
+Marginalization (total probability): &
+P(X=x) = \sum_{y\in \mathcal Y} P(X=x,Y=y) = \sum_{y\in \mathcal Y} P(Y=y)\cdot P(X=x|Y=y) \\
+
+ &
+p_X(x) = \sum_{y\in \mathcal Y} p_{XY}(x,y) = \sum_{y\in \mathcal Y} p_Y(y)\cdot p_{X|Y}(x|y) \\
+
+Expectation: &
+\mathbb{E}[X] = \sum_{x\in \mathcal X} x\cdot p_X(x) = \mu_X; \quad
+\mathbb{E}[g(X)] = \sum_{x\in \mathcal X} g(x)\cdot p_X(x)
+\\
+
+Variance: &
+\mathbb{V}[X] = \mathbb{E} [(X-\mu_X)^2] = \sigma_X^2 \text{, where } \mu_X = \mathbb{E}[X]\\
+Power (Mean Square): & \mathbb{E}[X^2] = \mathbb{V}[X]  + (\mathbb{E}[X])^2
+\\
+
+Covariance: &
+\text{Cov}(X,Y) = \mathbb{E}_{XY} [(X-\mu_X)\cdot (Y-\mu_Y)] = \mathbb{E}_{XY} [X\cdot Y] -\mu_X \cdot \mu_Y
+\\
+
+RV linear combination: &
+Y = \sum_{i=1}^{N}\alpha_n\cdot X_i \implies
+\mathbb{E}[Y]=\sum_{i=1}^{N}\alpha_i\cdot \mathbb{E}[X_i]
+\\ &
+\mathbb{V}[Y] = \sum_{i=1}^{N}\alpha_n^2\cdot \mathbb{V}[X_n] 
++ 2\cdot \sum_{i=1}^{N}\sum_{j=i+1}^{N} \text{Cov}(X_i, X_j)
+\\
+
+Entropy (discrete RV): &
+H(X) = \mathbb{E}_X[-\log_2(p_X(X))]\text{ bits};
+\quad H_e(X) = \ln 2\cdot H(X)\text{ ``nats''}
+\\[-8pt]
+& H(X) = H(p_1, p_2, \cdots, p_n) = \sum_i p_i\cdot \log \frac{1}{p_i}
+\\
+
+Differential entropy (continuous RV): &
+H_\text{Dif}(X) = \mathbb{E}_X[-\log_2(f_X(X))] = \int_{\mathbb{R}} f_X(x)\cdot\log_2 \frac{1}{f_X(x)}\, dx, \newline\phantom{M} (X \text{ with pdf } f_X)
+\\\pagebreak
+
+Wide-Sense Stationary (WSS) process: &
+E[X_i] = E[X_1]; \quad r_{XX}(n, n+k) = r_{XX}(k)
+\\
+(Strongly/Strictly) Stationary process: &
+p_X(x_{1},x_{2}, \cdots ,x_{n}) = 
+p_X(x_{1+\Delta},x_{2+\Delta}, \cdots ,x_{n+\Delta}) 
+\newline\phantom{M}
+\text{(time shift invariance)}
+\\
+
+Ergodic process: &\text{WSS and (in mean) }
+\hat\mu_X = \langle X_i \rangle_N = \frac{1}{N}\sum^N{X_i} = \mathbb{E}[X_0] = \mu_X\\
+% &\text{(in autocovariance) }\newline\phantom{MM} \hat r_{XX}(\tau) = (????) = \mathbb{E}[(X_t-\mu_{X_t})(X_{t-\tau}-\mu_{X_{t-\tau}})] = r_{XX}(\tau)
+% \\
+\end{mytable}
+
+\subsection*{Basic properties}
+
+\begin{mytable}
+Bayes' theorem: &
+p_{X|Y}(x|y) =
+\frac{p_{Y|X}(y|x)\cdot p_X(x)}{p_Y(y)} =
+\frac{p_{Y|X}(y|x)\cdot p_X(x)}{
+    \sum\limits_{x^\prime\in\mathcal{X}} p_{Y|X}(y|x^\prime)\cdot p_X(x^\prime)
+}
+\\
+
+Chain rule: &
+P(X_1,X_2, \dots, X_n) = P(X_1,X_2, \dots, X_{n-1})
+\cdot P(X_n|X_1, X_2, \dots, X_{n-1})
+\\
+ &
+= \cdots =
+\overbrace{\underbrace{P(X_1)\cdot P(X_2|X_1)}_{P(X_1,X_2)}
+\cdot P(X_3|X_1,X_2)}^{P(X_1,X_2,X_3)} \cdots 
+ P(X_n|X_1, X_2, \dots, X_{n-1})
+\\[-4pt]
+ &
+= \prod_{i=1}^n P(X_i | X_1, X_2, \dots, X_{i-1}), \quad \text{(convention: first factor is $P(X_1)$)}
+\\
+
+Markov's inequality: &
+P(X>a) \le \frac{\mathbb{E}[X]}{a}
+\quad \text{($X$ is a nonnegative RV)}
+\\
+Chebyshev's inequality: &
+P\left(\left|X-\mathbb{E}[X]\right|>\varepsilon\right) \le \frac{\mathbb{V}[X]}{\varepsilon^2};
+\quad 
+P\left(\left|X-\mathbb{E}[X]\right|>\frac{\sigma_X}{\varepsilon}\right) \le \varepsilon^2
+\\
+Weak Law of Large Numbers (LLN): & Y_N = \frac{1}{N}\sum_{i=1}^{N} X_i \quad (X_i \text{ i.i.d.});
+\quad 
+P\left(\left|Y_N-\mathbb{E}[X]\right|>\varepsilon\right) \le \frac{\mathbb{V}[X]}{\varepsilon^2}
+\\&
+\lim_{N\to\infty} P\left(\left|Y_N-\mathbb{E}[X]\right|<\varepsilon\right) = 1; 
+\quad
+Y_N \xrightarrow[]{P} \mathbb{E}[X] 
+\\
+Convergence in probability:
+& \text{[TODO]}
+\\
+Central Limit Theorem (CLT): & Y_N = \frac{1}{N}\sum_{i=1}^{N} X_i \quad (X_i \text{ i.i.d., } \mathbb{E}[X_i] = \mu,\,\mathbb{V}[X_i]=\sigma^2)
+\\&
+\frac{Y_N-\mu}{\sigma/\sqrt{N}}
+\,\simover{$(N\to\infty)$}\,
+\mathcal{N}(0,1) 
+\\
+Convexity:&
+f(\lambda \cdot x_1+(1-\lambda) \cdot x_2) \le 
+    \lambda\cdot f( x_1 ) + (1-\lambda)\cdot f( x_2 ),\, \forall\lambda \in [0,1]
+\\
+Concavity:& \text{$f$ is convex $\iff$ $-f$ is concave}
+\\
+
+Jensen's inequality: & g \text{ is convex} \implies \mathbb{E}[g(X)] \ge g(\mathbb{E}[X])
+\\
+
+Log-sum inequality: & \sum_i a_i \cdot \log \frac{a_i}{b_i} \ge
+A \cdot \log \frac{A}{B}; \quad A =\sum_i a_i; \, B =\sum_i b_i; \, a_i\ge 0,\,b_i\ge 0
+\\
+
+IT inequality: & \log_B (r) \le (r-1)\cdot \log_B(e); \\[-10pt]&
+\text{``$\log x$ is concave so it lies below its tangent line at $(1,0)$''}
+\\
+
+\end{mytable}
+
+\newpage
+\subsection*{Discrete Distributions}
+
+Hint -- Use combinatorics: \url{https://en.wikipedia.org/wiki/Urn_problem}
+
+\begin{mytable}
+
+Uniform (discrete):  & X\sim U(n),\quad
+p_X(x) = \frac{1}{|\mathcal{A}|}\cdot \mathbbm{1}_\mathcal{A}(x),\quad \mathcal{A} = \{0,\dots, n-1\} \subset \mathbb{N}, \, |\mathcal{A}| = n 
+\\
+& \mathbb{E}[X] = \frac{n-1}{2}; \quad \mathbb{V}[X] = \frac{n^2-1}{2};
+\quad H(X) = H(p, p, \dots, p) = \log_2 p
+\\
+
+Bernoulli:&\text{``heads or tails (loaded coin), black/white ball, success/failure, 1 or 0, ...''}
+\\[-8pt]
+&
+X\sim\text{Be}(p); \quad
+P(X=\text{``1''}) = p = 1 - P(X=\text{``0''}) 
+\\[-12pt]
+& \mathbb{E}[X] = p; \quad \mathbb{V}[X] = p\cdot (1-p);
+\quad H(X) = h_2(p) = H(p, 1-p)
+\\[-6pt]
+&\begin{mytextcol}
+``Probability of $k$ successes (with prob.\,$p$) in $n$ attempts'' = \\= ``Probability of a binary string of length $n$ with $k$ ones'' =\\ = $P(\bm X=\bm x)=p^k\cdot (1-p)^{n-k}$
+\end{mytextcol}
+\\
+Geometric: & 
+\text{``infinite attempts where the first success is at the }k\text{-th attempt''}
+\\ & 
+X\sim\text{Ge}(p),
+\quad p=P(X=\text{``}\underbrace{\text{00...01}}_k\text{...''})
+\\
+&
+p_X(k)= p\cdot (1-p)^{k-1};
+\quad
+\mathbb{E}[X] = \frac{1}{p}; \quad \mathbb{V}[X] = \frac{1-p}{p^2}; \quad H(X) = \frac{h_2(p)}{p}
+\\
+Binomial:
+&
+\text{``number of ones (successes) out of }n\text{ attempts (with replacement)''}
+\\&
+X\sim \text{Bin}(n,p),
+\quad p=P(X_{\text{Bern}(i)}=\text{``1''})
+\\&
+p_X(k) = \binom{n}{k}\cdot p^k\cdot (1-p)^{n-k}
+\\&
+\mathbb{E}[X] =n\cdot p ; \quad \mathbb{V}[X] = n\cdot p\cdot (1-p); \\& H(X) = \frac{1}{2}\cdot \log_2\left(2\pi \cdot e\cdot n\cdot p\cdot (1-p)\right) + \mathcal{O}(1/n)
+\\
+
+Hypergeometric:
+&
+\begin{mytextcol}
+``number of black balls (successes) out of $n$ total balls drawn \emph{without replacement} from an urn with initially $N$ balls, of which $K$ are black''.
+\end{mytextcol}
+%
+\\&
+X\sim \text{Hypergeometric}(N,K,n);\quad \left(n=1\implies X\sim Be(p), \, p=K/N\right)
+\\&
+p_X(k) = \frac{\binom{K}{k}\binom{N-K}{n-k}}{\binom{N}{n}}
+;\quad \mathbb{E}[X] =n\cdot \frac{K}{N}
+\\&
+\begin{mytextcol}
+\footnotesize{Sometimes `hypergeometric' refers to the multivariate hypergeometric distribution (multiple colors), here we refer only to the \emph{bivariate} distribution (2 colors: black and white). Also, this is different from the  `multinomial distribution' which is the `with replacement' case, the generalization of the binomial distribution.}
+\end{mytextcol}
+\end{mytable}
+
+%\pagebreak
+\subsection*{Continuous Distributions}
+\begin{mytable}
+Uniform (continuous):  & X\sim U(a, b),\quad
+f_X(x) = \frac{1}{\Delta}\cdot \mathbbm{1}_\mathcal{A}(x),\quad \mathcal{A} = [a, b] \subset \mathbb{R}, \, \Delta = b - a
+\\
+& \mathbb{E}[X] = \frac{a+b}{2}; \quad \mathbb{V}[X] = \frac{\Delta^2}{12}; \quad H_\text{Dif}(X) = \log_2 \Delta
+\\
+
+Gaussian distribution: & X \sim \mathcal N(\mu, \sigma), \,\sigma > 0
+\\
+& f_X(x)=\frac{1}{\sqrt{2\pi\sigma^2}}\cdot
+\exp\left[-\frac{1}{2}\left(\frac{x-\mu}{\sigma}\right)^2\right]
+\\
+& \mathbb{E}[X] = \mu; \quad \mathbb{V}[X] = \sigma^2; \quad H_\text{Dif}(X) =
+\frac{1}{2}
+\log_2(2\pi\cdot e\cdot \sigma^2)
+\\
+
+Exponential distribution: & X \sim \text{Exp}(\lambda), \,\lambda > 0 \implies
+f_X(x) = \lambda \cdot e ^ {-\lambda x}, \, x\ge 0
+\\
+& \mathbb{E}[X] = \frac{1}{\lambda}; \quad \mathbb{V}[X] = \frac{1}{\lambda^2}
+\\
+\end{mytable}
+
+\newpage
+\section*{Information and entropy}
+
+\begin{mytable}
+Hartley's information measure: & I_H(X) = \log k = \log |\mathcal{X}|
+\\[-8pt]&
+\text{Number of possible outcomes (disregarding probabilities).}
+\\[-8pt]&
+I_H(X_1,X_2, \cdots, X_n) = \log k^n = n\cdot I_H(X)
+\\
+Mutual information (between events):
+& I(A;B) = \log \frac{P(A|B)}{P(A)} = \log \frac{P(A\cap B)}{P(A)\cdot P(B)} = I(B;A)
+\\
+Bounds on $I(A;B)$:
+&
+-\infty \le I(A;B) \le \min \{
+    -\log P(A), \, -\log P(B)
+\}
+\\&
+\begin{cases}
+    P(A|B) = 0 \implies
+    ( B \Rightarrow \lnot A ) \implies
+    A \cap B = \varnothing \implies
+    I(A;B) = -\infty
+\\
+    P(A|B) = 1 \implies
+    ( B \Rightarrow A ) \implies
+    A \subseteq B \implies
+    I(A;B) = -\log P(A)
+\\
+    P(B|A) = 1 \implies
+    ( A \Rightarrow B ) \implies
+    B \subseteq A \implies
+    I(A;B) = -\log P(B)
+\\
+    P(A|B) = P(A) \implies
+    A \indep B \implies  I(A;B) = 0
+\end{cases}
+\\
+Self-information (of an event):&
+I(A) = I(A;A) = -\log P(A) \ge 0
+\\
+Entropy (of a discrete RV):&
+H(X) = \mathbb{E}_x[I(X=x)]  = \mathbb{E}_X[-\log p_X(X)] = -\sum_x p_X(x)\cdot \log p_X(x)
+\\[-12pt]&
+H(X)\ge 0
+\\[-8pt]&
+\text{Notation: }H(p_1, p_2, \cdots, p_n) = -\sum_i p_i\cdot \log p_i, \,\,\, \text{ given } \sum_i p_i = 1, p_i\ge 0
+\\
+Binary entropy function:
+& h_2(p) = H(p, 1-p),\, 0\le p\le 1;  \, \quad \frac{\partial h_2(p)}{\partial p} = \log_2 \frac{1-p}{p} = -\mathrm{logit}_2\,p 
+\\
+Entropy bounds:& 0 \le H(X) \le \log k, \,\, k = |\mathcal{X}|
+\\&
+H(X)=0 \Rightarrow X\text{ deterministic};\,H(X)=\log k \Rightarrow X\sim\text{Uniform}(k)
+\\&
+H(X) \le \log k = I_H(X=x) = H\left(\frac{1}{k}, \frac{1}{k}, \cdots, \frac{1}{k}\right)
+\\
+Property for 3 outcomes:
+&H(p_1,p_2,p_3) = h(p_1) + (1-p_1)\cdot h\left(\frac{p_2}{1-p_1}\right), \text{ given }p_1\neq 1
+\\
+Property for $N$ outcomes:
+&H(p_1,p_2,\cdots,p_N) = h(p_1) + (1-p_1)\cdot H\left(\frac{p_2}{1-p_1},\frac{p_3}{1-p_1},\cdots \frac{p_N}{1-p_1} \right) \newline\phantom{M}\text{given }p_1\neq 1
+\\
+Permutation invariance:
+&
+\begin{mytextcol}
+$H(X) = H(f(X))$, for $f$ injective (usually bijective, i.e.~invertible)\\ (a.k.a.\@ invariance under relabeling)\\[5pt]
+In general: $H(X) \ge H(f(X))$
+\end{mytextcol}
+\\
+Joint entropy: &
+H(X,Y) = \mathbb{E}_{XY}[-\log p(X,Y)]  = -\sum_x \sum_y p_{XY}(x,y)\cdot \log p_{XY}(x,y)
+\\[-8pt] &
+H(X_1,X_2, \cdots, X_n) = \mathbb{E}_{\bm X}[-\log p(\bm X)] 
+\\
+Entropy conditioned on an outcome: &
+H(X|Y=y) = H(X|\{Y=y\}) = - \sum_x p_{X|Y}(x|y) \cdot \log p_{X|Y}(x|y) \\
+Conditional entropy: &
+H(X|Y) = \mathbb{E}_y[H(X|\{Y=y\})] = \sum_y H(X|\{Y=y\}) \cdot p_Y(y)
+\\&
+H(X|Y) = \mathbb{E}_{XY}[-\log p(X,Y)]  = -\sum_x \sum_y \underbrace{p_{XY}(x,y)}_\text{!!!}\cdot \log p_{X|Y}(x|y)
+\\\pagebreak
+Mutual information (between RVs):
+& I(X;Y) =  \mathbb{E}_{XY}[I(X=x;Y=y)] = I(Y;X)
+\\ &
+I(X;Y) = H(X) - H(X|Y) = H(X) + H(Y) - H(X,Y)
+\\&
+\phantom{MM}\includegraphics[width=0.3\textwidth]{img_Polyanskiy_mutual.pdf} \text{\footnotesize{(source: Y.\@ Polyanskiy IT\,lectures)}}
+\\ &
+I(X;Y) =\mathbb{E}_{XY}\hspace{-1.4mm}\left[\log \frac{p_{XY}(x,y)}{p_X(x)\cdot p_Y(y)}\right] = \sum_{x,y}p_{XY}(x,y)\cdot \log\frac{p_{XY}(x,y)}{p_X(x)\cdot p_Y(y)}
+\\
+Conditioning reduces entropy: &
+I(X;Y) = H(X) - H(X|Y) \ge 0 \Rightarrow H(X) \ge H(X|Y) \text{, equal iff }X\indep Y
+\\
+RV self-information is entropy: &
+H(X|X) = 0 \implies I(X;X) = H(X)
+\\
+Mutual info.\,is bounded by entropy:
+& H(X) \ge H(X) - H(X|Y) = I(X;Y)
+\\[-8pt]&\phantom{M}
+\implies I(X;Y) \le \min\{H(X),H(Y)\}
+\\[-8pt]&
+\text{Also: } \max_{p_X(x)} H(X) = \log |\mathcal X| = I_H(X) \\[-8pt]&\phantom{M}
+\implies
+I(X;Y) \le \min\{\log |\mathcal X|,\log |\mathcal Y|\} 
+\\
+Conditional mutual information: &
+I(X;Y|Z) =
+\mathbb{E}_{XYZ}\left[\log
+    \frac{p(X,Y|Z)}{p(X|Z)p(Y|Z)}
+\right] =
+\sum_{x,y,z} p(x,y,z)\log
+    \frac{p(x,y|z)}{p(x|z)p(y|z)}
+\\&
+I(X;Y|Z) =
+    H(X|Z)+H(X|Z)-H(X,Y|Z)
+\\&
+I(X;Y|Z) =
+    H(X|Z)-H(X|Y,Z)=H(Y|Z)-H(Y|X,Z)
+\\
+Chain rule for entropy: &
+H(X,Y) = H(X|Y) + H(Y) = H(Y|X) + H(X) \le H(X) + H(Y)
+\\&
+H(X_1, X_2, \cdots, X_n) = \sum_{i=1}^n H(X_i|X_1,X_2,\cdots X_{i-1}) \le \sum_{i=1}^n H(X_i)
+\\
+KL divergence (a.k.a.\,relative entropy):
+&
+D(p||q) = \mathbb{E}_p\left[\log\frac{p(X)}{q(X)}\right] =
+\sum_x p(x) \cdot \log\frac{p(x)}{q(x)} \ge 0
+\\&
+\text{in general:}
+\begin{cases}
+\text{no symmetry,}&
+    D(p||q) \neq D(q||p)\\
+\text{no triangle inequality,}&
+D(p||q) + D(q||r) \ngeq D(p||r)
+\end{cases}
+\\
+Mutual information as KL div.:
+&
+I(X;Y) = D(p_{XY}(x,y)||p_X(x)\cdot p_Y(y)) \ge 0
+\\
+Entropy rate (of a random process): &
+H_\infty(X) = \lim_{n\to\infty}\frac{1}{n}\cdot H(X_1,X_2,\cdots,X_n)
+\\
+\emph{Alternative} entropy rate: &
+H(X|X^\infty) = \lim_{n\to\infty} H(X_n|X_1,X_2,\cdots,X_{n-1})
+\\[-12pt]
+&\text{Equality in stationary processes: } H_\infty(X) = H(X|X^\infty)
+\\
+Bounds for stationary processes: &
+0\le H_\infty (X)\le H(X) \le \log k; \quad H(X) = H(X_i)
+\end{mytable}
+
+\section*{Markov chains}
+
+\begin{mytable}
+
+Markov property: &
+P(X_i | X_{i-1}, \cdots, X_1) = P(X_i|X_{i-1}),\,\,\forall i>1\\
+&\text{The RVs } X_i\text{ are in a total order (\emph{chain}): } X_1\to X_2 \to \cdots \to X_N
+\\
+Time-invariant (TI) Markov chains: &
+p(x_i|x_{i-1}) = p(x_{i+\ell}|x_{i-1+\ell}) \text{ (assumed in general)}
+\\
+Chain rule for Markov chains: &
+P(X_1,X_2, \dots, X_N) = \prod_{i=1}^n P(X_i | X_1, X_2, \dots, X_{i-1}) =
+\\
+ &
+= P(X_1)\cdot \prod_{i=2}^N P(X_i | X_{i-1}) 
+\stackrel{\text{(TI)}}{=}
+ P(X_1)\cdot  \left( P(X_2 | X_{1})\right)^{N-1}
+\\
+ &
+H(X_1,X_2,\cdots,X_N) =
+    H(X_1) + \sum_{i=2}^{N} H(X_i|X_{i-1})
+\newline\phantom{MMMMMMMM...}
+\stackrel{\text{(TI)}}{=}
+H(X_1) + (N-1)\cdot H(X_2|X_{1})
+\\
+State transition matrix:&
+\mathcal P = [p_{ij}]; \quad p_{ij} = p_{X_2|X_1}(x_i|x_j), \quad i\text{-th row adds up to }1
+\\[-6pt]&
+\pi_k^{(n)} = P(X_n=x_k),
+\quad \bm{\pi}^{(n+1)} = \bm{\pi}^{(n)}\cdot \mathcal P
+\quad \text{($\bm \pi^{(n)}$ are row vectors)}
+\\
+
+Asymptotic distribution:&
+\bm{\pi} = \lim_{n\to \infty} \bm{\pi}^{(n)} = \lim_{n\to \infty} \bm{\pi}^{(0)} \cdot \mathcal P^n; \quad \lim_{n\to \infty} \mathcal P^n =
+\bgroup\def\arraystretch{1}
+\begin{pmatrix}
+    \bm{\pi} \\
+    \vdots \\
+    \bm{\pi} \\
+\end{pmatrix}\egroup ; \quad \bm\pi\cdot \mathcal P = \bm\pi
+\\
+Existence (and uniqueness) of $\bm\pi$:
+& \exists n_0 >0 \text{ s.t.\,all entries of } \mathcal P^{n_0} \text{ are strictly positive (nonzero)} \implies \exists!\bm \pi
+\\[-12pt] &
+\text{(particular case of Perron–Frobenius thrm.\@ for strongly connected graphs)}
+\\
+
+Computation of $\bm\pi$:
+&
+\begin{cases}
+    \bm\pi \cdot (\mathcal P-I) = 0 & \text{ (rank $N-1$ due to $\mathcal P$'s nullspace dimesnion)} \\
+    \sum_k \pi_k = 1 & \text{ (extra equation needed for full rank)}
+\end{cases}
+\\
+$H_\infty(X)$ of a stationary Markov chain: &
+H_\infty(X) = H(X_2|X_1)=
+\\[-12pt]&
+H_\infty(X) = \sum_i \bm\pi_i\cdot H(X_2|X_1=x_i); \quad
+H(X_2|X_1=x_i)=-\sum_j p_{ij}\log p_{ij}
+\\[-12pt]&
+H_\infty(X)\text{ is the min.\,average info required to encode one transition.}
+\\
+Data processing lemma:
+& X\to Y\to Z \text{ is a Markov chain} \implies
+\begin{cases}
+    I(X;Z) \le I(X;Y) \\
+    I(X;Z) \le I(Y;Z)
+\end{cases}
+\\
+&
+I(X; Z) = H(X) - H(X|Z) \le H(X) - H(X|YZ) =\newline\phantom{MMMMMMMMMMM.M}= H(X) - H(X|Y) = I(X;Y)  
+\end{mytable}
+\section*{Source coding}
+
+\begin{mytable}
+Compression ratio: & R = \frac{\#\text{Source bits}}{\#\text{Compressed bits}}, \quad \text{($R > 1$, except in pathological cases)}
+\\
+Classification of source codes: &
+\begin{mytextcol}
+\begin{itemize}
+    \item Non-singular codes: coding is injective (lossless compression)
+    
+    $\bm x_1 \neq \bm x_2 \implies C(\bm x_1) \neq C(\bm x_2)$
+    \item Uniquely decodable codes: sequence of symbols is unambiguously decodable (\emph{extension} code is non-singular)
+    
+    Sequences $\bm x_1^n \neq \bm x_2^m \implies C_{ext}(\bm x_1^n) \neq C_{ext}(\bm x_2^m)$
+    
+    where $C_{ext}(\bm x^n) = \text{Concat}(C(\bm x_{(1)}),...,C(\bm x_{(n)}))$
+    \item Prefix codes: no codeword is prefix of any other codeword.
+\end{itemize}
+\end{mytextcol}
+\\
+Average codeword length: &
+L=\mathbb{E}[\ell_x] = \sum_x p_X(x)\cdot\ell_x
+\\
+Path length lemma: &
+L=\sum_{n_i\in\text{\,``inner nodes''}} p(n_i)
+\quad \text{ (valid for prefix codes)}
+\\
+Kraft inequality: &
+\sum_{i=1}^k D^{-\ell_i}\le 1 \text{ (valid for }D\text{-ary prefix codes)}
+\\[-4pt] &
+\text{all prefix codes must obey this constraint.}
+\\
+McMillan inequality: &
+\begin{mytextcol}
+Kraft inequality also holds for $D$-ary uniquely decodable codes.\\
+Therefore, those are also subject to the same constraint.
+\end{mytextcol}
+\\
+$L$, lower bound: &
+L \ge H_D(X) = \frac{H(X)}{\log_2 D}
+\\ & \text{equality if all }\ell_x =-\log_D p(x)
+\text{ (only possible if all are integers)}
+\\
+$L$, achievable upper bound: &
+    L < H_D(X)+1 \text{ (for some }D\text{-ary prefix code, guaranteed to exist)}
+\\
+Shannon-Fano code &
+\begin{mytextcol}
+Prefix code with $\displaystyle\ell_{x_i} = \lceil -\log_D p(x_i) \rceil $
+\\[8pt]
+$L$ below upper bound, may have unused leaves, not optimal in general
+\end{mytextcol}
+\\
+Fano code: &
+\begin{mytextcol}
+  ``Binary partition into sets as equiprobable as possible''.\\[3pt]
+  $L$ is similar to Shannon-Fano code.\\[3pt]
+  Provided sorted probabilities: $ p_1\ge p_2\ge\cdots\ge p_k$, \\[3pt]
+  recursively split at index $q$ that minimizes:
+    $\displaystyle
+    \left|\sum_{i=1}^q p_i - \sum_{i=q+1}^k p_i\right|
+    $
+\end{mytextcol}
+\\
+
+Huffman code: &
+\begin{mytextcol}
+Tree algorithm: replace smallest (least prob.)\@ 2 nodes with an inner node with their sum; repeat until only one node (root) remains.\\[3pt]
+Optimal: no better binary prefix code in terms of $L$.
+
+Note: [verify this claim??] if an optimal code is built using a distribution $q(x)$, but it is applied to a source with a distribution $p(x)$, then the resulting average codeword length is equal to the cross-entropy (used in MachLearn field): $L = \mathbb{E}_p[q] = - \sum p(x) \log q(x)$
+\end{mytextcol}
+\\
+
+Optimal code for i.i.d.\,sequences: &
+\bm{X} = (X_1,\cdots,X_n); \quad \bm{x} \xmapsto[]{\text{Huff}_n}
+\bm{y}; \quad |\bm{y}| = \ell^{(n)}_{\bm{x}}; \quad L^{(n)} = \mathbb{E}[\ell^{(n)}_{\bm{x}}]
+\\&
+H(X_1,\cdots X_n) \le L^{(n)} \le H(X_1,\cdots X_n) + 1
+\\&
+\xRightarrow[]{\text{i.i.d}}
+n\cdot H(X_1) \le L^{(n)} \le n\cdot H(X_1) + 1; \quad H(X) = H(X_1)
+\\
+Optimal $L$ for i.i.d.\,sequences: &
+H(X) \le L \le  H(X) + \frac{1}{n}
+;\quad
+\left[L=
+\frac{\mathbb E[\ell_{\bm x}]}{n}=\frac{L^{(n)}}{n}\right]
+\\&
+\lim_{n\to \infty} L = H(X) \quad \text{(length per symbol $L$ is optimized as $n$ increases)}
+\\
+Optimal $L$ for ergodic processes: &
+H(X_1,\cdots X_n) \le L^{(n)} \le H(X_1,\cdots X_n) + 1
+\\&
+\implies \frac{1}{n}\cdot H(X_1,\cdots X_n) \le L \le \frac{1}{n} \cdot H(X_1,\cdots X_n) + \frac{1}{n}
+\\&
+\implies \boxed{\lim_{n\to \infty} L = H_\infty(X)} \le H(X) = H(X_1) \\[-6pt]
+&\begin{mytextcol}
+Better than i.i.d.\\(cannot be attained by Huffman codes unless symbols are grouped)
+\end{mytextcol}
+\\\end{mytable}
+
+\newpage
+\section*{Universal compression/source coding}
+A source code is universal if it can be constructed without knowledge of the statistics of the
+source.
+\begin{mytable}
+LZ77 codeword: &
+\bm y = (j,l,c); \text{ where:}\begin{cases}
+    j,&\text{match offset (in $S$, from the right)}\\
+    l,&\text{match length (can overlap $B$)}\\
+    c,&\text{character after the match (in $B$)}
+\end{cases}
+\\&
+\begin{cases}
+\text{no match }&\implies j=0,\, l=0,\, c=\text{``first char in $B$''}
+\\
+\text{match }&\implies j>0,\, l>0
+\end{cases}
+\\[-4pt]&
+\text{If match, advance buffers 
+$l+1$
+chars (otherwise: $1$ char)}
+\\[-4pt]
+LZ77 codeword length: &
+\ell_{\bm y} = \ell((j,l,c)) = 
+\lceil \log(S+1) \rceil +
+\lceil \log(B+1) \rceil +
+|c|
+\\[-8pt]
+& |c| = \lceil \log k \rceil \text{; when }c\in \text{``ASCII 8-bit'', }|c| = \log_2 2^8 =8 \text{ bits}
+\\
+LZ77 total compressed size: &
+ \underbrace{S \cdot |c|}_\text{initial buffer} + \sum_i \ell_{\bm{y}_i} = {S \cdot |c|} + N_{cw} \cdot \ell_{\bm{y}}, \quad N_{cw} = \text{num.\,of codewords}
+\\
+LZSS codeword: &
+\bm y = \begin{cases}
+    (0,j,l),&\text{match}\\
+    (1, c),&\text{no match}\\
+\end{cases}
+; \text{ where:}\begin{cases}
+    j,&\text{match offset (never $0$)}\\
+    l,&\text{match length (never $0$)}\\
+    c,&\text{first char in $B$}
+\end{cases}
+\\[-4pt]&
+\text{If match, advance buffers $l$ chars (otherwise: $1$ char)}
+\\
+LZSS codeword length: & \ell_{\bm{y}} = 
+\begin{cases}
+    \text{match:} &
+    1 + \lceil \log(S+1) \rceil +
+    \lceil \log(B+1) \rceil
+    \\
+    \text{no match:} &
+    1 + |c|
+\end{cases}
+\\
+LZSS total compressed size: &
+ \underbrace{S \cdot |c|}_\text{initial buffer} + \sum_i \ell_{\bm{y}_i} =
+  S \cdot |c| +
+  N_\text{match} \cdot \ell_{\text{match}} +
+  N_\text{no-match} \cdot \ell_{\text{no-match}}
+\\
+LZ78: & [...] \text{Dictionary} [...]
+\\
+LZW: & [...] \text{Dictionary with preinitialization (usually with the 1-char entries)} [...]
+\end{mytable}
+
+\newpage
+\section*{Asymptotic Equipartition Property or Principle (AEP)}
+\begin{mytable}
+AEP concept: &
+\begin{mytextcol}
+  There exists some class/set of sequences $A_\varepsilon^{(n)}$ of length $n$ called \emph{typical} such that, as $n$ increases ($n\to\infty$), they become almost sure ($P\to 1$), and, at the same time, they remain a negligible portion (as $\varepsilon\to 0$) of the total set of sequences of length $n$, $\mathcal X^{(n)}$, with the only exception of the sequence generated by the uniform distribution with support $\mathcal X$.\\
+  Incidentally, sequences that are individually the most probable tend not to be in $A_\varepsilon^{(n)}$, i.e. are \emph{atypical} (same for the \emph{least} probable ones).
+  \\
+  The typical sequences are defined as precisely those that enable the convergence to $H(X)$ using Weak LLN (i.e.\@ convergence in probability).
+
+% TODO: It also happens that those sequences tend to not contain
+%       the most probable words nor the least probable words.
+%       (see Sergio Verdú video).
+\end{mytextcol}
+\\
+Sequence of $n$ i.i.d.\@ RVs: &
+\begin{mytextcol}
+``$\bm X$ is i.i.d'' means the sequence of $n$ RVs $\bm X = (X_1, X_2, \cdots, X_i, \cdots, X_n)$\\ with $\bm X \in \mathcal{X}^n$ has the property: $X_{i}, X_{j}$ are i.i.d., $\forall i,j; i \neq j$\\ (i.e.\@ pairwise i.i.d.)
+\end{mytextcol}
+\\
+Set of $\varepsilon$-typical sequences for i.i.d.\@ RVs: &
+\text{Given a sequence $\bm X$ of $n$ i.i.d.\@ $n$ RVs, for each and $\varepsilon>0$ define:}
+\\[-8pt]&
+A_\varepsilon^{(n)} (X) = \left\{
+    \bm x = (x_1,x_2,\cdots,x_n):
+\,
+\left|
+-\frac{1}{n}\log p_{\bm X}(\bm x)-H(X)
+\right|\le \varepsilon
+\right\} =
+\\&
+A_\varepsilon^{(n)} (X) = \left\{
+    \bm x = (x_1,\cdots,x_n):
+\,
+2^{-n\cdot(H(X)+\varepsilon)}
+\le p_{\bm{X}}(\bm x)
+\le
+2^{-n\cdot(H(X)-\varepsilon)}
+\right\}
+\\
+AEP (Weak LLN redux): &
+\text{Because $\bm X$ are i.i.d.:}\, -\frac{1}{n}\log p_{\bm X}(\bm X) =
+-\frac{1}{n}\log \prod_{i} p_{X}(X_i) \stackrel[n\to\infty]{P}{\longrightarrow} H(X)
+\\&
+\forall\varepsilon>0,\, \exists n_0:\, \forall n>n_0 \implies
+P(``\bm x \in A_\varepsilon^{(n)} (X) ") \ge 1-\varepsilon
+\\&
+\text{Main idea: } \underset{(n,\,\varepsilon)\to(\infty,\,0^+)}{\text{lim}^*}\, P(``\bm x \in A_\varepsilon^{(n)} (X) ") = 1
+\\[-0.3cm]&\text{\footnotesize{${}^*$Note: this is not actually a limit...}}
+\\[-0.3cm]&
+\begin{mytextcol}
+Also we have (as $\varepsilon\to 0$): $p_{\bm X}(\bm x)|_{\bm x\in A_\varepsilon^{(n)}} 
+\approx 2^{-n H(X)};\quad p_{\bm X}(\bm x)|_{\bm x\notin A_\varepsilon^{(n)}} \approx 0$
+\\ (i.e.\@ sequences inside the typical set tend to be equiprobable and sequences outside of it tend to be impossible)
+\end{mytextcol}
+\\
+Size of $A^{(n)}_\varepsilon (X)$ is negligible: &
+(1-\varepsilon)\cdot 2^{n\cdot (H(X)-\varepsilon)}
+\le
+|A_\varepsilon^{(n)} (X)|
+\le
+2^{n\cdot (H(X)+\varepsilon)},\, \forall n\ge n_0
+\\&
+\implies \underset{(n,\,\varepsilon)\to(\infty,\,0^+)}{\text{lim}^*}\frac{|A_\varepsilon^{(n)} (X)|}{|\mathcal{X}^{n}|} =
+\lim_{n\to\infty}\frac{2^{n\cdot H(X)}}{2^{n\cdot \log_2 |\mathcal{X}|}} = 
+\begin{cases}
+    1,        &\text{ if $X$ is uniform} \\
+    0 ?, &\text{ otherwise}
+\end{cases}
+\\[12pt]
+AEP generalizations: &
+\text{AEP holds for \emph{any} $\bm X$}
+\begin{cases}
+    \text{i.i.d.} & \text{as shown here} \\
+    \text{independent} & \text{not shown here} \\
+    \text{ergodic} & \text{using next definition of $A_\varepsilon^{(n)}$} \\
+\end{cases}
+\\
+$A_\varepsilon^{(n)}$ for ergodic $\bm X$: &
+A_\varepsilon^{(n)} (X) = \left\{\bm x = (x_1,x_2,\cdots,x_n):
+\,
+\left|
+-\frac{1}{n}\log p_{\bm X}(\bm x)-H_\infty(X)
+\right|\le \varepsilon
+\right\}
+\\
+Set of jointly $\varepsilon$-typical sequences: & 
+A_\varepsilon^{(n)} (X,Y) =
+\left\{(\bm x,\bm y):
+\,
+\bm x \in A_\varepsilon^{(n)} (X),\,
+\bm y \in A_\varepsilon^{(n)} (Y)
+\right\} \cap
+\newline\phantom{MMMMMM}
+\cap\left\{(\bm x,\bm y):
+\,
+\left|
+-\frac{1}{n}\log p_{\bm{XY}}(\bm{xy})-H(X,Y)
+\right|\le \varepsilon
+\right\}
+\\
+$A_\varepsilon^{(n)} (X,Y)$ is defined for:&
+\text{i.i.d.\,$\bm X$ and i.i.d.\,$\bm Y$ (but $\bm X,\bm Y$ could be jointly not i.i.d.)}
+\\
+Properties of $A_\varepsilon^{(n)} (X,Y)$: & 
+\text{AEP: }
+P
+\left((\bm{x},\bm{y}) \in A_\varepsilon^{(n)} (X,Y)
+\right) \ge 1-\varepsilon,
+\,\, \forall n\ge n_0
+\,\, \forall \varepsilon>0
+\\ &
+(1-\varepsilon)\cdot 2^{n\cdot (H(X,Y)-\varepsilon)}
+\le
+\left|
+    A_\varepsilon^{(n)} (X,Y)
+\right|
+\le
+2^{n\cdot (H(X,Y)+\varepsilon)},\, \forall n\ge n_0
+\\
+When $\bm X \indep \bm Y$:&
+(1-\varepsilon)\cdot 2^{-n\cdot (H(X,Y)+3\cdot\varepsilon)}
+\le
+P\left(
+    (\bm x,\bm y)\in A_\varepsilon^{(n)} (X,Y)
+\right)
+\le
+2^{-n\cdot (H(X,Y)-3\cdot\varepsilon)}
+\\[-12pt]&
+\forall n\ge n_0. \quad \text{Meaning of }\text{``}\bm X \indep \bm Y \text{'' here}
+%\Longleftrightarrow
+:\quad
+p_{\bm{XY}}(\bm x,\bm y)
+    = p_{\bm{X}}(\bm x)\cdot p_{\bm{Y}}(\bm y)
+% \\[-10pt]&
+\end{mytable}
+\section*{Source Coding Theorem}
+
+\begin{mytable}
+Source Coding Theorem for i.i.d.\@$\bm X$: & \forall \delta >0,\, \exists n_0 \text{ s.t. }
+ n\ge n_0 \implies \boxed{ \frac{L^{(n)}}{n}  \le H(X) + \delta}
+\\[-8pt]&
+\begin{mytextcol}
+assuming i.i.d.\@ sequences of $n$ RVs $\bm X = (X_1, X_2, \cdots,X_i,\cdots, X_n)$ and $L^{(n)} = \mathbb E [\ell_{
+\bm x}]$ for some source code that is guaranteed to exist.
+\end{mytextcol}
+\\[8pt]
+Proposed optimal source code:&
+\begin{mytextcol}
+Given $\varepsilon$ and $n$, add prefix bit $1$ if $\bm x\in A_\varepsilon^{(n)}$ or $0$ otherwise.
+Then, concatenate with index in set if $\bm x\in A_\varepsilon^{(n)}$ or with index in $\bm X$ otherwise.
+\\[5pt]
+With that, the resulting codeword lengths are:\\[3pt]
+\end{mytextcol}
+\\[16pt]&
+\ell_{\bm x, \varepsilon} =
+\begin{cases}
+    \text{if } \bm x \in A_\varepsilon^{(n)}, &
+    1 + \lceil \log |A_\varepsilon^{(n)} | \rceil \underset{\text{(AEP)}}{\le}
+    2 + n (H(X)+\varepsilon) 
+    \\
+    \text{if }\bm x \notin A_\varepsilon^{(n)}, &
+    1 + \lceil \log k^n \rceil
+    \le
+    2 + n\log k
+    \\
+\end{cases}
+\\&
+\text{for } \delta = \varepsilon \cdot (1+\log k) + \frac{2}{n} \text{,\, $L^{(n)}$ verifies the Source Coding Theorem.}
+\\[-10pt]&\text{(this proves the \emph{achievability})}
+\\
+Source Coding Theorem for ergodic $\bm X$: &
+\boxed{ \frac{L^{(n)}}{n}  \le H_\infty(X) + \delta}
+\quad\text{(just replace $H(X)$ with $H_\infty(X)$)}
+\\
+\end{mytable}
+
+\section*{Channel Coding Theorem}
+
+\begin{mytable}
+Channel code definition: & \text{(See Channel Coding section)}
+\\
+Error probability (after decoding):  & P_e = P(g(Y)\neq u | U=u) = P( 
+\widehat U\neq u | U=u), \, \widehat U = g(Y)
+\\
+Fano's lemma:  &
+H(U|\widehat U) \le h(P_e) + P_e \cdot \log (M-1),\, P_e = P(\widehat U \neq U)
+\\[-4pt]
+Properties of $H(U|\widehat U)$:
+&
+H(U|\widehat U) = 
+H(U|\widehat U) + \underbrace{H(Z|U\widehat U)}_{=0} 
+\overset{
+    \begin{subarray}{c}
+        \text{chain} \\
+        \text{rule} \\
+    \end{subarray}
+}{=}
+H(UZ|\widehat U),\, Z=\delta_{U=\widehat U}
+\\[-8pt]&
+0 \le 
+H(U|\widehat U)
+\le \log M
+\\[-4pt]&
+H(U|\widehat U) =0 \implies \text{perfect decoding possible ($U$ deterministic given $\widehat U$)}
+\\[-4pt]&
+H(U|\widehat U) =H(U) \implies \text{message impossible to recover ($\underbrace{I(U;\widehat U)=  0}_{U\indep\widehat U}$)}
+% H(U|\widehat U) =
+% \begin{cases}
+%     0,  & \text{perfect decoding possible ($U$ deterministic given $\widehat U$)}\\
+%     \log M, & \text{message impossible to recover, $U\indep\widehat U$} \\
+% \end{cases}
+\\
+Channel code rate:  & R=\frac{k}{n} \le 1; \quad M=|U|;\quad k=\log_2 M \text{ bits}
+\\
+Channel capacity:  & C = \max_{p_X(x)} I(X;Y)
+\\[-8pt]
+&\text{$I(X;Y)$ is concave on $p_X(x) \Rightarrow$ a local maximum is global (and unique).}
+\\
+Channel coding theorem:  & 
+\begin{mytextcol}
+$R < C \implies$ a target error probability $P_e>0$ (arbitrarily small) can be attained using a coding scheme with a rate-$R$ channel code, that is guaranteed to exist (\emph{achievability}).
+\\
+Otherwise ($R\ge C$) there will exist no such channel code for some $P_e<P_{e0}$ (i.e.\@ $P_e$ cannot be made arbitrarily small).
+\end{mytextcol}
+\\
+Proposed optimal ch.\@ coding scheme: &
+\begin{mytextcol}
+Encoder: build a codebook, by randomly selecting/sampling all the $M=2^k$ codewords $\bm x$ (sequences of $x_i\in \mathcal{X}$), according to the optimizing distribution:\\
+\phantom{MMM}$\displaystyle p^*(x) = \arg\max_{p(x)} I(X;Y)$, so that\@ $\displaystyle p(\bm x) = \prod_i p^*(x_i)$.\\
+Assign randomly the set of $\bm u$ (messages) to the set of $\bm x$ (codewords).
+\\[4pt]
+Decoder: find unique $\hat{\bm x}$ such that $(\hat{\bm x}, \bm y) \in A_\varepsilon^{(n)} (X,Y)$. \\If $\bm y$ cannot be found in the typical set or if there are multiple candidates for $\hat{\bm x}$, then fail at decoding (error, asymptotically improbable). 
+\\[4pt] Note: this entails impractical complexity in both encoder and decoder.
+\\[4pt]
+With this: $P_e < 2^{n(R-C+3\varepsilon)}$; so $n\to\infty \Rightarrow P_e \to 0$ (i.e.\@ asymptotic error-free transmission), iff $R<C-3\varepsilon$. \\[4pt]
+Besides, it can be shown (via Fano's Lemma) that if $R> C$, then this ``error-free'' transmission cannot be achieved by any code at all.
+\end{mytextcol}
+\end{mytable}
+
+\section*{Discrete Memoryless Channels (DMC)}
+\begin{mytable}
+Discrete channel: & 
+\begin{mytextcol}
+SISO system (Single Input/Single Output), with input $X_i$ and output $Y_i$ symbols at discrete time $i$, defining sequences (signals) $\bm X$ and $\bm Y$; the input and output alphabets ($\mathcal{X}$ and $\mathcal{Y}$) are discrete sets.\\[4pt]
+% This is from it_script_v69.pdf
+A channel is characterized by the input and output alphabets and the probabilities on an output given all inputs and all other outputs:
+\\
+$(\mathcal{X}, \{P(Y_i|\bm X\bm Y_{k\neq i})\}_{\forall i}, \mathcal{Y})$, where $\mathcal{X}, \mathcal{Y}$ (alphabets) are discrete sets.
+\end{mytextcol}
+\\
+Discrete Memoryless Ch.\@ (DMC): &
+\text{Discrete and $P(Y_i| \bm X\bm Y_{k\neq i}) = P(Y_i| \bm X) = P(Y_i|X_i) =P(Y|X)$}\newline\phantom{M}\text{(current output only depends on current input symbol)}
+\\[-8pt]&
+\text{Characterized by just } (\mathcal{X}, P( Y| X), \mathcal{Y})
+\\[-8pt]&
+\text{This defines a Markov chain: } X \to Y
+\\ Capacity bounds: &
+0\le C \le \min\{\log |\mathcal X|, \log |\mathcal Y|\}
+\text{, (same as bounds on $I(X;Y)$)}
+\\[-8pt]&
+\text{if $\mathcal X$ or $\mathcal Y$ is binary, then: } C \le 1
+\\
+Binary Symmetric Ch.\@ (BSC): &
+\text{[TODO: Image]}
+\\[-4pt]&
+x,y\in \{0,1\},\quad
+P_{Y|X}(y|x) = \begin{cases}
+    1-p, & y = x \\
+    p, & y \neq x \\
+\end{cases}
+\\[-4pt]&
+I(X;Y) = H(Y) - h_2(p) \le 1 -h_2(p) = C_\text{BSC}
+\\[-12pt]&\text{Capacity achieved when $Y$ uniform, therefore $X$ uniform.}
+\\
+Binary Erasure Ch.\@ (BEC): &
+\text{[TODO: Image]}
+\\[-4pt]&
+ x\in \{0,1\}, \, y\in \{0,1,\Delta\},\quad
+P_{Y|X}(y|x) = \begin{cases}
+    \alpha, & y = \Delta \\
+    1-\alpha, & y \neq \Delta, x=y \\
+    0, & \text{otherwise} \\
+\end{cases}
+\\[-4pt]&
+I(X;Y) = H(Y) - h_2(\alpha) \le 1 -\alpha = C_\text{BEC}
+\\[-12pt]
+&\begin{mytextcol}
+No symmetry; $Y$ cannot be uniform, thus $H_{\max}(Y)=\log 3$ cannot be achieved. Let $p=P(X=1)=1-P(X=0)$
+\end{mytextcol}
+\\[-4pt]& H(Y) = H\Big((1-p)(1-\alpha), \alpha, p(1-\alpha)\Big)=(1-\alpha)h_2(p) + h_2(\alpha) 
+\\[-12pt] & \text{optimal for $p=1/2$}
+\\State transition matrix: &
+\begin{mytextcol}
+Corresponds to the Markov chain $X\to Y$\\[4pt]
+$\mathcal P = [p_{Y|X}(y|x)]$; size $N\times M=|\mathcal X| \times |\mathcal Y|$, weighted bipartite graph
+\\
+$N$ rows ($x$, outgoing edges),  $M$ columns ($y$, incoming edges)\\[8pt] Rows must add up to 1: $\sum\limits_y  p_{Y|X}(y|x) = 1$
+\end{mytextcol}
+\\[-4pt]&
+\mathcal P_\text{BSC} = 
+\bgroup\def\arraystretch{0.8}
+\begin{bNiceMatrix}[last-row,last-col]
+1-p & p \\[-4pt]
+p & 1-p &
+\Vdots[line-style={solid,<->}]^{X}\\
+& \Ldots[line-style={solid,<->},shorten=0pt]_{Y} \\
+\end{bNiceMatrix}
+\egroup
+\quad\quad
+\mathcal P_\text{BEC} = 
+\bgroup\def\arraystretch{0.8}
+% \begin{pmatrix}
+%     1-\alpha & \alpha & 0        \\
+%     0        & \alpha & 1-\alpha \\
+% \end{pmatrix}
+%%
+\begin{bNiceMatrix}[last-row,last-col]
+    1-\alpha & \alpha & 0        \\[-4pt]
+    0        & \alpha & 1-\alpha &
+\Vdots[line-style={solid,<->}]^{X}\\
+& \Ldots[line-style={solid,<->},shorten=0pt]_{Y} \\
+\end{bNiceMatrix}
+%%
+% \begin{bNiceMatrix}[last-row,last-col]
+%     1-\alpha & \alpha & 0        &
+%     \Vdots[line-style={solid,<->}]^{X} \\
+%     0        & \alpha & 1-\alpha & \\
+%  &  & \Ldots[line-style={solid,<->},shorten=0pt]_{Y}
+% \end{bNiceMatrix}
+\egroup
+\\
+DMC capacity -- General case: & \text{Maximize $I(X;Y)$ for $p_i=p_X(x_i)$, constrained to $p_i>0$ and $\displaystyle\sum_i p_i=1$.}
+\\[-4pt] & 
+I(X;Y) = H(Y) - H(Y|X) = H(Y) - \sum_x 
+\underbrace{H(Y|X=x)}_{H(\bm r_i)}
+\cdot\, p_X(x)
+\\[-4pt] &
+I(X;Y) = H(Y) - \sum_{i} 
+H(\bm r_i)
+\cdot\, p_X(x_i); \quad
+\bm r_i =\text{$i$-th row of $\mathcal P$}
+\\[-4pt] & 
+\text{To obtain $H(Y)$, compute $p_Y(y)=P(Y=y)$ using total probability:}
+\\[-4pt] &
+p_Y(y) = \sum_{x\in \mathcal X} p_X(x)\cdot p_{Y|X}(y|x); \quad \boxed{\bm p_Y = \bm p_X \cdot \mathcal P}; \quad H(Y) = H(\bm p_Y)
+\\[-4pt] & 
+\begin{mytextcol}
+If the matrix for $p_{XY}(x,y)$ is desired, use $\bm p_{XY} = \bm p_X^{T}\,\texttt{.*} \mathcal P$, where \texttt{.*} is the broadcasting multiplication (as in MATLAB or NumPy).
+\\
+This can also be described with Einstein notation: [TODO]
+\end{mytextcol}
+\\
+Uniformly dispersive DMC: &
+\begin{mytextcol}
+ rows ($x$) are taken from the same set (permutations)
+ \end{mytextcol}
+\\[-4pt] & 
+I(X;Y) = H(Y) - H(Y|X) = H(Y) - \sum_x 
+\underbrace{H(Y|X=x)}_{\text{constant}}
+\cdot\, p_X(x)
+\\[-4pt] & 
+H(Y|X=x) = H(\bm r), \text{ ($\bm r$ is any row of $\mathcal P$)}
+\\[-4pt] & 
+I(X;Y) = H(Y) - H(\bm r)  \le \boxed{ \left(\max_{p_X(x)} H(Y)\right) - H(\bm r) = C_\text{UnifDisp}}
+\\
+(Strongly) Symmetric DMC: &
+\begin{mytextcol}
+ rows ($x$) are taken from the same set (permutations; unif.\@ disp.), \\columns ($y$) have the same property.
+\end{mytextcol}
+\\[-4pt] & 
+I(X;Y) = H(Y) - H(\bm r)  \le \boxed{ \log |\mathcal Y| - H(\bm r) = C_\text{Symm}}
+\\[-4pt] & \text{optimal for $X$ uniform (implies $Y$ uniform).}
+\\
+Weakly Symmetric DMC: &
+\begin{mytextcol}
+ rows ($x$) are taken from the same set (permutations), \\columns ($y$) must add up to the same number: $\displaystyle A=\sum_x p(y|x)$.\\
+ Same results as symmetric:
+\end{mytextcol}
+\\[-4pt] & 
+I(X;Y) = H(Y) - H(\bm r)  \le \boxed{ \log |\mathcal Y| - H(\bm r) = C_\text{WSymm}}
+\\[-4pt] & \text{optimal for $X$ uniform (implies $Y$ uniform).}
+\\
+Useful properties: & 
+\frac{\partial }{\partial x}\bigg(
+x\cdot \log_2 x
+\bigg)
+=
+\frac{1}{\ln 2}+
+\log_2 x
+\\ & 
+\frac{\partial}{\partial x}\bigg(
+f(x)\cdot \log_2 f(x)
+\bigg)
+=
+\frac{\partial f(x)}{\partial x}\cdot\bigg(
+\frac{1}{\ln 2}+
+\log_2 f(x)
+\bigg)
+\\ & 
+\frac{\partial}{\partial x}h_2(x)
+=
+\log_2\frac{1-x}{x}=-\mathrm{logit}_2\,x
+\\
+\end{mytable}
+
+\newpage
+\section*{Channel Coding}
+\begin{mytable}
+Channel code definition: & \text{[TODO]}[\text{Code Rate: }R]
+\\ 
+Single parity bit check code: & \text{[TODO]}
+\\
+Linear code: & \text{[TODO]}
+\\
+Generator matrix of a linear code: & \text{[TODO]}
+\\
+Hamming distance and weight: & \text{[TODO]}
+\\
+Decoding criteria: & 
+\begin{cases}
+    \text{Maximum a posteriori (MAP):} &
+    \hat{\bm x} = \arg\max_{\bm x\in \mathcal B}
+    \{P(\bm x| \bm y) \}
+    \\\hline
+    \text{Maximum likelihood (ML):} &
+    \hat{\bm x} = \arg\max_{\bm x\in \mathcal B}
+    \{P(\bm y| \bm x) \}
+    \\
+    \text{ = MAP if equiprobable CWs $\bm x$}&
+    \\\hline
+    \text{Minimum distance (MD):} &
+    \hat{\bm x} = \arg\min_{\bm x\in \mathcal B}
+    \{d_H(\bm x, \bm y) \}
+    \\
+    \text{ = MAP for BSC}\\\text{ (¿¿requires also equiprobable\@ x??)}&
+    \\
+\end{cases}
+\\
+Hamming code: & \text{Parity bits are the sums of all possible subsets of info bits}
+\\
+Detected error bits: & \boxed{w_H(\bm e) \le d_{\min} -1}
+\\
+Corrected error bits: & \boxed{w_H(\bm e) \le 
+\left\lfloor
+    \frac{d_{\min} -1}{2}
+\right\rfloor}
+\le \frac{d_{\min} -1}{2}
+\\
+Parity-check matrix of a linear code: & \text{[TODO]}
+\\
+Syndrome: & \bm s = \bm y \cdot H^T = \bm e \cdot H^T
+\\
+Syndrome decoder: &
+\text{[TODO]}
+\end{mytable}
+
+\section*{Information in continuous RV}
+
+\begin{mytable}
+Differential entropy: & H_\text{Dif}(X) = \mathbb{E}_X[-\log_2 f_X(X)] = - \int_{\mathbb{R}} f_X(x)\cdot\log_2 f_X(x)\, dx \newline\phantom{M} (X \text{ with pdf } f_X)
+\\[-12pt]
+& \text{May be negative, e.g.\,$X\sim$ Uniform, with }\Delta<1 \text{ (similar w/Gaussian)} \newline\phantom{MMMMMM..MMM} \implies H_\text{Dif}(X)=\log \Delta<0
+\\
+Joint differential entropy: &
+H_\text{Dif}(X,Y) = 
+\mathbb{E}_{XY}[-\log_2 f_{XY}(X,Y)] = 
+-\iint\limits_{\mathbb{R}^2} f(x,y)\cdot\log_2 f(x,y)\, dxdy
+\\[-8pt]&
+H_\text{Dif}(X_1,X_2,\dots,X_n) = 
+\mathbb{E}_{\bm X}[-\log_2 f(X_1, X_2, \dots, X_n)]
+\\
+Relative entropy: &
+D(f||g) = \mathbb{E}_{|f}\left[\log\frac{f(X)}{g(X)}\right] =
+\int_{\mathbb R}f(x)\cdot\log\frac{f(x)}{g(x)}dx
+\ge 0
+\\
+Mutual information: &
+I(X;Y) = 
+\mathbb{E}_{XY}\!\left[\log_2
+    \frac{f_{XY}(X,Y)}{f_X(X)\cdot f_Y(Y)}
+\right]= D(f_{XY}(x,y)||f_{X}(x)\cdot f_{Y}(y)) \ge 0
+\\&
+\text{Chain rule and ``Conditioning reduces entropy'': same as discrete RVs.}
+\\
+Translation and scaling: &
+H_\text{Dif}(a\cdot X+c) = H_\text{Dif}(X) + \log a
+\\[-12pt]&
+\begin{mytextcol}
+Scaling breaks ``permutation invariance''\\(due to change of variable in integral)
+\end{mytextcol}
+\\
+Gaussian RV diff.\@ entropy: &
+H_\text{Dif}(X) = \frac{1}{2}\log(2\pi e\sigma^2)
+\\
+Gaussian RV maximizes $H_\text{Dif}(X)$: &
+H_\text{Dif}(X)\ge H_\text{Dif}(Y),
+\quad
+X\sim \mathcal{N}(\mu,\sigma), \, \forall Y,\, \mathbb{E}[Y] = \mu, \,
+\mathbb{V}[Y] = \sigma^2
+\\[-8pt]&
+H_\text{Dif}(X) - H_\text{Dif}(Y) = \underbrace{\cdots}_{\text{lemma}} = D(f||g) \ge 0
+\\ Lemma: &
+\mathbb{E}_{|f}[\log g(X)] = \int_{\mathbb{R}}
+f(x)\cdot\log g(x)\, dx =
+\int_{\mathbb{R}}
+g(x)\cdot\log g(x)\, dx =
+H_{\text{Dif}|g}(X)
+\\[-5pt]&
+g\text{ is the pdf of }X \sim \mathcal N(\mu,\sigma),\, f\text{ is pdf of }Y,\,\mathbb{E}[Y] = \mu, \,
+\mathbb{V}[Y] = \sigma^2 
+\\
+Discrete $H$ does not converge to $H_\text{Dif}$: &
+\text{in general, }
+H(X^\Delta) \xrightarrow[]{\Delta\to 0} \infty
+\\
+Discrete $I$ agrees with continuous $I$: &
+I(X^\Delta;Y^\delta) \xrightarrow[]{(\Delta,\delta)\to (0, 0)} I(X;Y)
+\end{mytable}
+
+\newpage
+\section*{Multivariate Gaussian distribution}
+\begin{mytable}
+$n$-D Gaussian RV (Multivariate): & 
+\begin{mytextcol}
+Vector of Gaussians $\bm X = (X_1, X_2, \cdots, X_n)\sim \mathcal N(\bm \mu, \Lambda_{\bm X})$ s.t.\,all linear combinations are Gaussian or deterministic (a.k.a.\,\emph{jointly} Gaussian):
+\\
+\phantom{M}\quad $\forall \bm a \in \mathbb{R}^n,\, \bm a^T\cdot X = Z \sim  \mathcal N(\mu,\sigma)$ (if $\sigma = 0$, deterministic $Z=\mu$)
+\\
+$X_i \sim \mathcal N(\mu_i,\sigma_i)$, 
+and covariance matrix $\Lambda_{\bm X} = \mathbb E[(\bm X - \bm \mu)\cdot (\bm X - \bm \mu)^T]$, we assume full-rank $\Lambda_{\bm X}$ (non-degenerate, i.e.\,all nontrivial linear combinations are nondeterministic). \\
+PDF (for non-degenerate case): \\\phantom{M}\quad $\displaystyle 
+f_{\bm X}(\bm x) = 
+\frac{1}{\sqrt{\text{Det}(2\pi\cdot \Lambda_{\bm X})}}\cdot 
+\text{exp}\left[
+    -\frac{1}{2}
+    (\bm x - \bm \mu)^T \cdot
+    \Lambda_{\bm X}^{-1} \cdot
+    (\bm x - \bm \mu)
+\right]
+$
+\\
+$\Lambda_{\bm X}$ is positive definite ($\Lambda_{\bm X}>0$) (in general it can be pos.\@ semidef.)
+\end{mytextcol}
+\\
+$n$-D Gaussian RV normalization: &
+\bm Y = \Lambda_{\bm X}^{-1/2}\cdot (\bm X - \bm \mu_{\bm X}),\,
+\bm X \sim \mathcal N(\bm \mu_{\bm X}, \Lambda_{\bm X})
+\implies \bm Y \sim \mathcal N(\bm 0, I)
+\\[-5pt]
+& \text{(Note: $\Lambda_{\bm X}^{-1/2}$ is uniquely well defined because $\Lambda_{\bm X}$ is positive definite)}
+\\
+Linear transformation: & 
+\begin{mytextcol}
+$\bm Y = A\cdot \bm X + \bm a
+;\quad$
+$A$ \text{ is square full rank (invertible transformation)}
+\\[4pt]
+($\bm X$ is arbitrary RV)
+\\[4pt]
+$\displaystyle
+f_{\bm Y}(\bm y) =
+\frac{1}{|A|}\cdot
+f_{\bm X}
+\left(
+    A^{-1}\cdot (\bm y-\bm a)
+\right)
+$
+\\[4pt]
+$\mathbb{E}[\bm Y] = A\cdot \bm \mu_{\bm X} + \bm a,$\quad
+$\Lambda_{\bm Y} = A \cdot \Lambda_{\bm X} \cdot A^T$
+\\[4pt]
+$H(\bm Y) = H(\bm X) + \log \text{Det}|A|$
+\end{mytextcol}
+\\
+Property: &
+\mathbb{E}
+[
+    (\bm X - \bm \mu)^T\cdot
+    \Lambda_{\bm X}^{-1}
+    \cdot (\bm X - \bm \mu)
+] = n, \,\text{ ($\bm X$ is arbitrary RV)}
+\\
+(Simple) Gaussian quadratic form: &
+Z =     (\bm X - \bm \mu_{\bm X})^T\cdot
+    \Lambda_{\bm X}^{-1}
+    \cdot (\bm X - \bm \mu_{\bm X})
+    = \sum_i Y_i^2
+    \sim \chi^2(n)
+\\[-8pt]&
+\text{Given }\bm X \sim \mathcal N(\bm \mu_{\bm X}, \Lambda_{\bm X}).\,\text{ It can be shown that }\bm Y \sim \mathcal N(\bm 0, I)
+\\
+Entropy of $n$-D Gaussian: &
+H(\bm X) = \frac{1}{2}\cdot\log\text{Det}(2\pi\cdot e\cdot\Lambda_{\bm X})
+\\
+Gaussian RV maximizes $H_\text{Dif}(\bm X)$:
+&
+H_{\text{Diff}}(\bm X) \ge H_{\text{Diff}}(\bm Y),
+\quad
+\bm X\sim \mathcal N(\bm\mu,\Lambda), \, \forall \bm Y,\, \mathbb{E}[\bm Y] = \bm\mu, \,
+\text{Cov}(\bm Y) = \Lambda
+\\[-8pt]&
+H_{\text{Diff}}(\bm X) - H_{\text{Diff}}(\bm Y) = \underbrace{\cdots}_{\text{lemma}} = D(f||g) \ge 0
+\\
+Lemma: &
+\mathbb{E}_{|f}[-\log g(\bm X)] =
+\mathbb{E}_{|g}[-\log g(\bm X)] = H_{\text{Diff}|g}(\bm X)
+\end{mytable}
+
+\newpage
+\section*{Communication over Gaussian channels}
+\subsection*{Single Gaussian channel}
+\begin{mytable}
+Gaussian channel: & 
+%TODO: Gaussian N to mathcal N...
+Y = X + Z, \quad Z\sim \mathcal N(0, \sqrt{N}), \quad X\indep Z
+\\[-8pt]&
+\text{assuming tx.\@ power limitation }\mathbb{E}[X^2]\le P, \text{ and }\mathbb E[X] = 0
+\\[-16pt]&
+\text{($\mathbb E[X]\neq 0$ would waste power)}
+\\[-16pt]&
+\text{Each RV realization corresponds a transmission or channel use.}
+\\
+Mutual info.\@ in Gaussian channel: &
+\boxed{I(X;Y) = H(Y) - H(Z)}; \quad H(Y|X) = H(Z|X) = H(Z)
+\\[-4pt]&
+H(Z) = \frac{1}{2}\log(2\pi e\cdot \sigma_Z^2)
+\\
+Capacity of the Gaussian channel: &
+C =
+\max_{\substack{f_X(x)\\ \mathbb{E}[X^2] = P}}
+I(X;Y) = 
+\frac{1}{2}\log(2\pi e\cdot (P+N)) - H(Z) =
+\\[12pt]&
+\boxed{C = \frac{1}{2} \log
+\left(
+1 + \frac{P}{N}
+\right)
+\,\text{[bits/tx.]}}
+\text{ attained for $X\sim \mathcal N(0,\sqrt{P})$}
+\\
+Sampling theorem:
+&
+x(t) = \sum\limits_{n=-\infty}^{+\infty}
+x[n]\cdot \text{sinc}_W\left(
+    t - \frac{n}{2W}
+\right)
+\iff x[n] = x(n\cdot T_s) =
+x\left(
+\frac{n}{2W}
+\right)
+,
+\newline \phantom{M} \text{ provided $x(t)$ is band-limited to $f_{\max} \le W = F_s / 2$;\quad $F_s = 1/T_s$}
+\newline \phantom{M} \text{ where: }
+\text{sinc}_W(t)=\frac{\sin (2\pi \cdot W\cdot t)}{2\pi \cdot W\cdot t},\,\, t\neq 0; \text{ sinc}_W(0) =1, \text{ zeros at } t=\frac{i}{2W}
+\\
+Band-limited Gaussian channel:
+&
+\begin{mytextcol}
+$y(t) = (x(t) + \eta(t))*h_W(t)$, assuming $X(|f|>W)=0$
+\\[8pt]
+$W$ is the bandwidth of the positive frequency interval (definition also valid for passband signals).
+\\[8pt]{Continous-Time (CT) model:}
+\\[4pt]
+$y(t) = x(t) + z(t)$, then $Z(|f|>W)=0$
+\\[8pt]
+$z(t) = \eta(t) * h_W(t)$, band-limited AWGN ($\eta(t)$ is perfectly filtered)
+\\[8pt]{Discrete-Time (DT) model:}
+\\[4pt]
+$y_k = x_k + z_k$, after sampling at $F_s = 2W$\\[8pt]
+Noise PSDs: $R_\eta(f)= \frac{N_0}{2}$ and $R_z(f) = \frac{N_0}{2}\cdot\mathbbm{1}_{[-W, +W]}(f)$
+\\
+$N_0$ is defined so that the total noise power is $N=W \cdot N_0$
+\\CT noise autocorrelation: $r_z(\tau) = \mathcal{F}^{-1}\{R_z(f)\}= \frac{N_0}{2}\cdot \text{sinc}_W(\tau)$\\
+DT noise autocorrelation: $r_z[k] =
+    r_z\left(\frac{k}{2W}\right) = \frac{N_0}{2} \cdot \delta[k]$
+\\Noise power (CT mean square): $\mathbb E[Z_{t}^2] = N = N_0\cdot W = N_0 \cdot \frac{F_s}{2}$
+\\Noise energy (DT mean square): $E_N = \mathbb E[Z_{k}^2] = \frac{N_0}{2}$ (per sample)
+\\DT AWGN process: $z_k \sim \mathcal N\left(0, \sqrt{\frac{N_0}{2}}\right)$
+\\Tx.\@ signal power: $P$
+\\Tx.\@ signal energy: $E_X=\frac{P}{2W}$ (per sample)
+\end{mytextcol}
+\\[8pt]
+Band-limited Gaussian ch.\@ capacity:
+&
+C_{\text{eff}} = \frac{1}{2}\log\left(
+    1 + \frac{E_X}{E_N}
+\right)
+=
+\frac{1}{2}\log\left(
+    1 + \frac{\frac{P}{2W}}{\frac{N_0}{2}}
+\right)
+=
+\frac{1}{2}\log\left(
+    1 + \frac{P}{N_0 W}
+\right).
+\\&
+\begin{mytextcol}
+Because of sampling theorem, $C_{\text{eff}}$ is the maximum spectral efficiency: 
+\\
+$C_{\text{eff}} > R_b / W$  (in (bits/s)/Hz or bits/sample or bits/(DT channel use)).
+\end{mytextcol}
+\\[8pt]&
+\boxed{
+    C = W\cdot \log\left(
+        1 + \frac{P}{N_0 W}
+    \right)\text{ [bits/s]}
+} 
+\text{ (Shannon–Hartley theorem)}
+\\&
+C_{\text{eff}} > R_b / W
+\implies C > R_b
+\\
+Attenuated (band-lim.) Gaussian ch.: &
+y(t) = x(t)\cdot G + z(t), \quad |G| < 1
+\\&
+\boxed{
+    C = W\cdot \log\left(
+        1 + \frac{P|G|^2}{N_0 W}
+    \right)\text{ [bits/s]}
+}
+\\
+Fundamental theorem (max.\@ capacity):&
+    C_{\infty} = \lim_{W\to\infty} C =
+    \lim_{W\to\infty} \log\underbrace{\left(
+        1 + \frac{P}{N_0 W}
+    \right)^W}_{\to \text{exp}(P/N_0) } = \frac{P}{N_0} \log e=
+\\[-4pt]&
+\boxed{
+    C_{\infty} =\frac{P}{N_0}\,\frac{1}{\ln 2}
+    \text{ [bits/s]}
+}\\[4pt]&
+    \text{Bit rate: } R_b \text{ [bits/s], }
+    \text{bit time: } T_b = \frac{1}{R_b} \text{ [s/bit], }\\[-4pt]&
+    \text{Energy per bit: } E_b = {P}\cdot{T_b} = \frac{P}{R_b} \text{ [``J''/bit]}
+\\[0pt]&
+    % \boxed{
+        \frac{C_{\infty}}{R_b} =
+        \frac{P}{N_0}\,\frac{1}{R_b\ln 2} =
+        \frac{E_b}{N_0}\,\frac{1}{\ln 2} > 1
+        \quad
+        \boxed{
+            \frac{E_b}{N_0} > \ln 2 \approx 0.69 = -1.59 \text{ dB}
+        }
+    % }
+\\Alternative expression for $C$: &
+    C = W\cdot \log\left(
+        1 + \frac{P}{N_0 W}
+    \right) =
+    W\cdot \log\left(
+        1 + \frac{R_b}{W}\cdot\frac{E_b}{N_0}
+    \right)
+\\Spectral eff.~limit ($C/W$ vs $E_b/N_0$): &
+C_{\text{eff}} = \frac{C}{W} = \log\left(
+        1 + \frac{R_b}{W}\cdot\frac{E_b}{N_0}
+    \right) <
+\log\left(
+        1 + \frac{C}{W}\cdot\frac{E_b}{N_0}
+    \right)
+\\
+Equivalent bound: &
+\frac{E_b}{N_0} > \frac{2^{C/W}-1}{C/W} \stackrel[(C/W\to 0)]{}{\longrightarrow} -1.59 \text{ dB}
+\\Limit for channel coding: &
+\text{code rate $R=K/N$, $N$ samples/codeword and $K$ message bits.}
+\\[-8pt]&
+F_s = 2W = N/T_{cw}, \text{ $T_{cw}$ codeword time}\implies R_b = 2WR
+\\[-8pt]&
+\frac{P}{N_0 W} = 
+\frac{K E_b/T_{cw}}{N_0 W} =
+2R\frac{E_b}{N_0}
+\\&
+R_b = 2WR < C = W\log \left(1 + 2R \cdot \frac{E_b}{N_0} \right)
+\\&
+\implies 2R < \frac{C}{W} = \log \left(1 + 2R \cdot \frac{E_b}{N_0} \right)
+\\[8pt]&
+\implies 
+\boxed{
+    \frac{E_b}{N_0} > \frac{2^{2R}-1}{2R}
+}\stackrel[(R\to 0)]{}{>} -1.59 \text{ dB}
+\\
+\end{mytable}
+
+\subsection*{Multiple independent Gaussian channels}
+
+\begin{mytable}
+Parallel Gaussian channels:
+&
+Y_i = X_i + Z_i
+\\[-8pt]
+& \text{$n$ independent AWGN channels with variance $N_i$, for $i=1,...,n$}
+\\[-8pt]
+& \text{and total tx.\@ power restriction: }P=\sum_i P_i = \mathbb E[\bm X^T \cdot \bm X]
+\\&
+I(\bm X; \bm Y) = H(\bm Y)- H(\bm Y | \bm X)
+= \sum_i H(Y_i) - \sum_i H(Y_i | X_i)
+\\[-0pt]
+&
+I(\bm X; \bm Y) =
+\sum_i I(X_i|Y_i) \le \sum_i\frac{1}{2}\log\left(1 + \frac{P_i}{N_i}\right),\\[-8pt]&
+\text{equality if }X_i\sim \mathcal N(0,\sqrt{P_i}) \text{ where $P_i$ need to be calculated.}
+\\
+Water filling for parallel Gaussian ch.:
+&
+\begin{mytextcol}
+Search for $P_i$ that max.\@ $I(X;Y)$, constrained to $P_i\ge 0$ and $\sum_i P_i = 1$.
+\\
+Note: solution is unique because of $I(X;Y)$ concavity.  
+\\
+Use Lagrange multiplier $\lambda$: $
+\displaystyle
+J = C - \lambda \cdot \sum_i (P_i - P)$
+\\
+$\displaystyle
+\nabla_{\!\bm P}\, J =\bm 0 \implies
+\frac{1}{P_i+N_i} = -\lambda \cdot 2\ln 2  \implies
+B = \frac{-1}{\lambda\ln 2} = P_i + N_i
+$
+\\[8pt]
+Find optimal $B$ s.t.
+$\displaystyle
+\begin{cases}
+P_i = (B-N_i)^+\\
+\sum_i P_i = P \\
+\end{cases}\, \text{ where: } (x)^+ = x \cdot \mathbbm{1}_{[0,\infty)}(x)
+$
+\\[8pt]
+$
+\displaystyle
+P = \sum_i^n P_i = n B - \sum_i N_i$ and solve for $B$,
+if any $P_i<0$, set $P_i=0$ (reduce $nB$, etc.); check and repeat until it it is valid.
+\\[6pt]
+Alternative validity test: $\displaystyle
+\sum_i (N_K-N_i) \le P
+\iff
+B - N_K \ge 0
+$
+\end{mytextcol}
+\\&
+C = \frac{1}{2}\sum_{i=1}^n\log\left(
+1+\frac{P_i}{N_i}
+\right) \text{ [bits/tx.]}
+\\
+(O)FDM Gaussian channel: &
+\begin{mytextcol}
+$n$ parallel band-limited attenuated Gaussian channels. \\
+Per-channel BW: $W_\Delta = \frac{W}{n}$, channel gain $H_i$, ch.\@ noise PSD: $N_{0,i}/2$.
+$\displaystyle
+C = \sum_{i=1}^n
+W_\Delta \log\left(
+1+\frac{P_i |H_i|^2}{N_{0,i} W_\Delta}
+\right)
+$\\[4pt]
+Attained by $P_i$ obtained via water-filling power allocation:\\[4pt]
+Find optimal $B$ s.t. $\displaystyle
+\begin{cases}
+P_i = W_\Delta\cdot \left(B-\frac{N_{0,i}}{|H_i|^2}\right)^+\\
+\sum_i P_i = P \\
+\end{cases}
+$\\
+Compute $\displaystyle\sum_i P_i = P$ and solve for $B$, check and repeat until it is valid.
+\end{mytextcol}
+\end{mytable}
+
+\subsection*{MIMO Gaussian channels}
+\begin{mytable}
+MIMO Gaussian channel: &
+\bm Y = H\cdot \bm X + \bm Z, \text{ where } \bm Y, \bm Z \in \mathbb{R}^{n_r}, \, \bm X \in \mathbb{R}^{n_t}, \, H\in \mathbb{R}^{n_r\times n_t},\newline\phantom{MMMMMMMMMM...} \bm Z \sim \mathcal N(\bm 0, \Lambda_{\bm Z})
+\\[-8pt]&
+\begin{mytextcol}
+Tx.\@ power constraint: $
+    \text{trace}(\Lambda_{\bm X}) =
+    \sum_i \mathbb E [X_i^2] =
+    \sum_i P_i \le P$
+\end{mytextcol}
+\\&
+\begin{mytextcol}
+$\displaystyle
+I(\bm X; \bm Y) = H(\bm Y)- H(\bm Y | \bm X)
+= H(\bm Y)- H(\bm Z) \le \frac{1}{2}\log \text{Det}(\Lambda_{\bm Y} \Lambda_{\bm Z}^{-1})$
+\\[4pt]
+equality iff $\bm Y \sim \mathcal N(\bm 0, \Lambda_{\bm Y})$
+and $\Lambda_{\bm Y}= H \cdot \Lambda_{\bm X} \cdot H^T+ \Lambda_{\bm Z}$
+\\[4pt]
+$\displaystyle
+I(\bm X; \bm Y) \le \frac{1}{2}\cdot 
+\log \text{Det}(I+H\Lambda_{\bm X} H^T\Lambda^{-1}_{\bm Z})
+$
+\\
+We can assume i.i.d.\@ noise per channel, $\Lambda_{\bm Z} = N\cdot I$
+\\[4pt]
+[SVD magic...]
+\\[4pt]
+[Water filling...]
+\end{mytextcol}
+\end{mytable}
+
+\subsection*{Discrete input Gaussian channels (digital modulations)}
+\begin{mytable}
+& \text{[TODO]}
+\end{mytable}
+
+\newpage
+\section*{Rate–distortion theory}
+\begin{mytable}
+Rate-distortion function (RDF):
+&
+R(\delta) = \min_{p(\hat{\bm x}|{\bm x}):\mathbb{E}[(d({\bm x};\hat{\bm x})]\ge \delta}\,
+\frac{1}{n}\, I({\bm X};\hat{\bm X})
+\\& \text{Defined for a source (random vector) }\bm X \text{ with a distortion measure }d
+\\
+RDF for i.i.d.\@ source:
+&
+R(\delta) = \min_{p(\hat{ x}|{ x}):\mathbb{E}[(d({ x};\hat{ x})]\ge \delta}
+I(X;\hat{X}) < C
+\\
+Hamming RDF for Bernoulli($p$) source:
+& R(\delta) = (h_2(p) - h_2(\delta))\cdot\mathbbm{1}_{[0, \min(p,1-p)]]}(\delta)
+\\[-12pt]
+& \text{usually $p=1/2$: equip.\@ binary source}
+\\[-12pt] &\quad \implies R(\delta) = (1 - h_2(\delta))\cdot \mathbbm 1_{[0,0.5]}(\delta)
+\\
+Coding rate bound for distortion $\delta$:
+&
+R \le \frac{C}{R(\delta)}\quad \text{(noisy/source- channel coding theorem, [th. 5.1, McEliece])}
+\\
+Coding rate bound for BSC($\varepsilon$):
+&
+R \le \frac{1 - h_2(\varepsilon)}{1 - h_2(P_b)}; \quad P_b = \delta = \text{``acceptable'' BER after decoding}
+\\[8pt]
+C.r. bound for digital Gaussian ch.:
+&
+R \le \frac{\frac{1}{2}\log\left(1+2R\frac{E_b}{N_0}\right)}{1 - h_2(P_b)}
+\quad\text{ (for BIAWGN or BAWGNC)}
+\\[8pt]&
+\implies
+\boxed{
+    \frac{E_b}{N_0} \ge
+    \frac{
+        2^{2R(1-h_2(P_b))}
+        -1
+    }{2R}
+} \underset{R\to 0}{>}
+ (1 - h_2(P_b)) \ln (2)
+\\[8pt] &
+\text{Using $P_b\to 0$ gives the error-free (capacity) expressions.}
+\\[-8pt] &
+\begin{mytextcol}
+\footnotesize
+Note: (from [McEliece]) a lossy compressor can be built from a channel code by using the decoder at the transmitter and the encoder at the receiver.
+\end{mytextcol}
+\end{mytable}
+
+
+
+
+\end{document}