commit fa044613bb52fdd7944bfa4361e3821f28fcb698
parent e5f74412ec08e0a87badcf8e40966179f6d1072f
Author: AndrewLockVI <andrew@laack.co>
Date: Mon, 2 Jun 2025 16:31:22 -0500
Started notes on node, took notes on deep learning
Diffstat:
15 files changed, 1334 insertions(+), 183 deletions(-)
diff --git a/latex/DeepLearning-Goodfellow/DeepLearning.tex b/latex/DeepLearning-Goodfellow/DeepLearning.tex
@@ -0,0 +1,654 @@
+\documentclass[12pt, letterpaper]{article}
+\usepackage{xcolor}
+
+
+\usepackage{enumitem}
+\usepackage{graphicx}
+\usepackage{listings}
+\usepackage{caption}
+\usepackage{tcolorbox}
+\usepackage{datetime}
+\usepackage{amsfonts}
+\usepackage{amsmath}
+\usepackage{geometry}
+\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
+\usepackage{amssymb,enumerate}
+\usepackage{amsthm,stmaryrd}
+\usepackage[all]{xy}
+
+\newenvironment{definition}{
+ \begin{quote}
+ \textbf{Definition:}
+ }{
+ \end{quote}
+}
+
+
+\newenvironment{explanation}{
+ \begin{quote}
+ \textbf{Explanation:}
+ }{
+ \end{quote}
+}
+
+\newenvironment{example}{
+ \begin{quote}
+ \textbf{Example:}
+ }{
+ \end{quote}
+}
+
+\lstnewenvironment{code}{
+ \hspace{.45cm}\textbf{Code:}
+ \lstset{
+ basicstyle=\ttfamily,
+ columns=fullflexible,
+ breaklines=true
+ }
+}{
+}
+
+
+\begin{document}
+
+\noindent{\large \textbf{Deep Learning by Ian Goodfellow}}
+
+\noindent Notes by Andrew Laack
+
+\tableofcontents
+
+\section{Linear Algebra}
+
+
+\subsection{Broadcasting}
+
+\begin{definition}
+Broadcasting is the process of iteratively applying a lower dimensional operation on higher dimensionalt structures.
+\end{definition}
+
+\begin{explanation}
+
+Assume \[ C = A + b\]
+
+and \[ C,A \in \mathbb{R}^{m,n}, b \in \mathbb{R}^n. \]
+
+With this, we see we are adding an $n$ dimentional vector to an $m \times n$ matrix. For this to be well defined, we know the $n$ indicies must relate to each other. In the case of the matrix, we have $n$ columns and thus we add each $b_i$ to $A_{:,i}$.
+
+\end{explanation}
+
+\begin{example}
+
+Assume
+
+\begin{align*}
+ A = \begin{bmatrix}
+ 3 & 4 & 1 \\
+ 5 & 7 & 8
+ \end{bmatrix},
+ b = \begin{bmatrix} 1 & 2 & 3 \end{bmatrix}^\top
+\end{align*}
+
+Compute $C = A + b$
+
+\begin{align*}
+&= \begin{bmatrix}
+ 3 & 4 & 1 \\
+ 5 & 7 & 8
+ \end{bmatrix} + \begin{bmatrix}1 & 2 & 3\end{bmatrix}\\
+&= \begin{bmatrix}
+ 4 & 6 & 4 \\
+ 6 & 9 & 11
+\end{bmatrix}
+\end{align*}
+
+\end{example}
+
+\begin{code}
+ import numpy as np
+
+ B = np.array([[6,2,8], [2,9,8]])
+ a = [2, 1, 2]
+ D = np.empty((len(B), len(B[0])))
+
+ for i in range(len(B)):
+ for j in range(len(B[i])):
+ D[i][j] = B[i][j] + a[j]
+
+ # Alternatively
+ D = B + a
+
+\end{code}
+
+\subsection{Matrix Multiplication}
+
+\begin{definition}
+ The product of A and B, C, is defined as \[C_{i,j} = A_{i,:}^\top \cdot B_{:,j}.\]
+\end{definition}
+
+\begin{explanation}
+ If $C = AB$ then the index $i,j$ is the dot product between the transpose of the $i$'th row vector of A and the $j$'th column vector of B. The row vector must be transposed, or vice versa, as dot products are defined for like vectors.
+
+ Additionally, we note that the number of \textbf{columns} of A must be equal to the number of \textbf{rows} in B. The result will be a matrix that has as many rows as A and as many columns as B.
+
+ \textbf{Properties:} Matrix multiplication is distributive and associative, but not commutative. Transposition also has an interesting property.
+ \begin{align*}
+ A(B + C) &= AB + AC\\
+ A(BC) &= (AB)C \\
+ AB &\not\equiv BA \\
+ (AB)^\top &= B^\top A^\top
+ \end{align*}
+\end{explanation}
+
+\begin{example}
+ Compute $C = A \cdot B$
+ \begin{align*}
+ A = \begin{bmatrix}
+ 4 & 3 & 4 \\
+ 1 & 1 & 5 \\
+ 2 & 3 & 6
+ \end{bmatrix},
+ B = \begin{bmatrix}
+ 3 & 1 & 7 \\
+ 1 & 2 & 1 \\
+ 2 & 4 & 2
+ \end{bmatrix}
+ \end{align*}
+
+ \begin{align*}
+ C &= AB \\
+ &= \begin{bmatrix}
+ 4 & 3 & 4 \\
+ 1 & 1 & 5 \\
+ 2 & 3 & 6 \\
+ \end{bmatrix}
+ \cdot
+ \begin{bmatrix}
+ 4 & 3 & 4 \\
+ 1 & 1 & 5 \\
+ 2 & 3 & 6 \\
+ \end{bmatrix} \\
+ & =
+ \begin{bmatrix}
+ 23 & 26 & 39 \\
+ 14 & 23 & 18 \\
+ 21 & 32 & 29 \\
+ \end{bmatrix} \\
+ \end{align*}
+
+\end{example}
+
+ \begin{code}
+ import numpy as np
+
+ A = np.array([[4,3,4],[1,1,5],[2,3,6]])
+ B = np.array([[3,1,7],[1,2,1],[2,4,2]])
+ C = A @ B # matrix multiplication
+ C = A.__matmul__(B) # more explicit
+
+ def matMul(A, B):
+ C = np.empty((len(A), len(B[0])))
+ for i in range(0, len(A)):
+ for j in range(0, len(B[0])):
+ C[i][j] = A[i].dot(B[:,j])
+ return C
+
+ C = matMul(A,B)
+ \end{code}
+
+\subsection{Inverse Matrices}
+
+\begin{definition}
+ The inverse matrix of $A$ is $A^{-1}$ such that $A^{-1}A = AA^{-1} = I_n$.
+\end{definition}
+
+\begin{explanation}
+The inverse of a matrix is the matrix whose product is the identity matrix. If both the left and right inverses exist, the matrix $A$ is considered invertible. If $A$ only does not have the same left and right inverses, it is not invertible. For a matrix to be invertible $\text{rank}(A) = n$ and $A \in R^{n \times n}$. Non-square matrices may have inverses, if they have full column or row rank, but such inverses are not two-sided or unique.
+
+\end{explanation}
+
+\subsubsection{Finding Inverse Matrices (Gauss-Jordan)}
+
+\begin{explanation}
+ We can solve for the inverse of a matrix using a variant of Gaussian elimination called Gauss-Jordan elimination. We start by constructing an augmented matrix of the form
+
+ \begin{align*}
+ \begin{bmatrix}
+ A & | & I_n
+ \end{bmatrix}.
+ \end{align*}
+
+ We then perform elementary row operations on the matrix until we have
+
+ \begin{align*}
+ \begin{bmatrix}
+ I_n & | & A^{-1}
+ \end{bmatrix}.
+ \end{align*}
+
+ In essence, we convert $A$ into the identity matrix, and the same row operations applied to $I_n$ produces $A^{-1}$.
+
+\end{explanation}
+
+\begin{example}
+ Invert the matrix
+ \begin{align*}
+ A :=
+ \begin{bmatrix}
+ 2 & 8 \\
+ 1 & 3
+ \end{bmatrix}
+ \end{align*}
+
+ Clearly, the matrix is linearly independent and square thus invertible. We thus perform the following steps:
+
+ \begin{align*}
+ &\begin{bmatrix}
+ 2 & 8 & | & 1 & 0 & \\
+ 1 & 3 & | & 0 & 1 & \\
+ \end{bmatrix} \\
+ = &
+ \begin{bmatrix}
+ 1 & 5 & | & 1 & -1 & \\
+ 1 & 3 & | & 0 & 1 & \\
+ \end{bmatrix} \\
+ = &
+ \begin{bmatrix}
+ 1 & 5 & | & 1 & -1 & \\
+ 0 & -2 & | & -1 & 2 & \\
+ \end{bmatrix} \\
+ = &
+ \begin{bmatrix}
+ 1 & 0 & | & -1.5 & 4 & \\
+ 0 & 1 & | & .5 & -1 & \\
+ \end{bmatrix} \\
+ \end{align*}
+
+ As such, we have
+
+ \begin{align*}
+ A^{-1} = &
+ \begin{bmatrix}
+ & -1.5 & 4 & \\
+ & .5 & -1 & \\
+ \end{bmatrix} \\
+ \end{align*}
+
+\end{example}
+
+\subsubsection{Solving a System of Linear Equations With Inverse Matrices}
+
+\begin{explanation}
+ To find the solution to the system of equations
+
+ \begin{align*}
+ Ax &= b
+ \end{align*}
+
+ where
+
+ \begin{align*}
+ A \in \mathbb{R}^{n,n}, \text{ rank } (A) = n, x,b \in \mathbb{R}^n
+ \end{align*}
+
+ We first note that since the rank of $A$ is $n$, and $A \in \mathbb{R}^{n,n}$, $A$ is invertible. We use this information as follows:
+
+ \begin{align*}
+ Ax &= b \\
+ A^{-1}Ax &= A^{-1}b\\
+ I_nx &= A^{-1}b \\
+ x &= A^{-1}b
+ \end{align*}
+
+ The vector $b$ is known, and we are trying to solve for $x$. Given this, we can use our procedure for finding $A^{-1}$ to solve the system of equations for $x$.
+
+\end{explanation}
+
+\begin{example}
+ example
+\end{example}
+
+\begin{code}
+ import numpy as np
+
+ A = np.array([[10, 2, 3], [1, 4, 1], [9, 1, 3]])
+ b = np.array([10, 2, 3])
+ A_inv = np.linalg.inv(A)
+ result = A_inv @ b
+ print(result)
+\end{code}
+
+\subsection{Norm}
+
+\begin{definition}
+ A norm is a function $f : X \to \mathbb{R}$, where $X$ is a vector space, such that
+
+ \begin{align*}
+ f(x + y) &\leq f(x) + f(y), &&\text{for all } x, y \in X \quad \text{(Triangle Inequality)} \\
+ f(ax) &= |a| \cdot f(x), &&\text{for all } a \in \mathbb{R}, x \in X \quad \text{(Absolute Homogeneity)} \\
+ f(x) = 0 &\implies x = 0, &&\text{for all } x \in X \quad \text{(Positive Definiteness)}
+ \end{align*}
+\end{definition}
+
+\begin{explanation}
+
+ While a norm has a formal definition, we are often concerned with the Frobenius and $L_p$ for $p \in \mathbb{N} \cup \{\infty\}$ norms in machine learning.
+
+ The $L_2$ norm is simply the euclidean distance between $\vec{0}$ and $x$. This function can be stated as
+
+ \begin{align*}
+ L_2(x) &= \sqrt{\sum_{i=1}^{n} x_i^2}
+ \end{align*}
+
+ More generally, the $L_p$ norm is defined as:
+
+ \begin{align*}
+ L_p(x) &= \left( \sum_{i=1}^{n} |x_i|^p \right)^{1/p}
+ \end{align*}
+
+ It is important to point out that the $L_0$ norm is not a true norm. It does not have the absolute homogeneity property, but the metric is nevertheless useful for counting the number of non-zero coordinates in a vector. We also note the $L_\infty$ norm is defined as \[ ||x||_\infty = \max_{i} (x_i).\]
+
+The Frobenius norm is used on matrices and is a generalization of the $L_2$ norm. It can be stated as follows:
+
+ \begin{align*}
+ \|X\|_F = \left( \sum_{i=1}^{n} \sum_{j=1}^{n} x_{i,j}^2 \right)^{1/2}
+ \end{align*}
+\end{explanation}
+
+\begin{code}
+ x = [1, 3, 5]
+ p = 2
+
+ sum = 0
+ for i in x:
+ sum += i**p
+
+ res = sum ** (1/p)
+\end{code}
+
+
+\subsection{Orthogonal Matrix}
+
+\begin{definition}
+ definition
+\end{definition}
+
+\begin{explanation}
+ explain
+\end{explanation}
+
+\begin{example}
+ example
+\end{example}
+
+\subsection{LU Decomposition}
+
+\begin{definition}
+ definition
+\end{definition}
+
+\begin{explanation}
+ explain
+\end{explanation}
+
+\begin{example}
+ example
+\end{example}
+
+\subsection{Determinant}
+
+\begin{definition}
+ The determinant of a matrix
+\end{definition}
+
+\begin{explanation}
+ Conceptually, the determinant of a matrix is the signed $n$-dimensional volume of the image of the unit $n$-cube under the linear transformation defined by the matrix. Given this, we see if det$(A) = 0$, $A$ is singular.
+
+ To solve for the determinant, we can use Gaussian elimination, cofactor expansion, or a handful of other approaches. For simplicity, I will use Gaussian elimination in my example, and we note the product of the diagonal entries of the upper triangular matrix are the determinant of the matrix.
+\end{explanation}
+
+\begin{example}
+ Compute the determinant of
+ \begin{align*}
+ A :=
+ \begin{bmatrix}
+ 10 & 4 & 2 & 1 \\
+ 8 & 3 & 1 & 2 \\
+ 2 & 6 & 7 & 5 \\
+ 5 & 1 & 1 & 3 \\
+ \end{bmatrix}.
+ \end{align*}
+
+ Perform LU decomposition, find we don't need any row swaps, and
+
+ \begin{align*}
+ U
+ \approx
+ \begin{bmatrix}
+ 10 & 4& 2& 1 \\
+ 0 & 5.2& 6.6 & 4.8 \\
+ 0 & 0. & 1.26923077 & 3.42307692 \\
+ 0 & 0. & 0. & 2.31818182 \\
+ \end{bmatrix}.
+ \end{align*}
+
+ (Note: if we have an even number of row swaps, the determinant is positive. If we have an odd number, it is negative.)
+
+ Multiply the pivots together to find det$(A) = 153$.
+
+
+\end{example}
+
+\begin{code}
+ import numpy as np
+ import scipy.linalg
+
+ A = np.array([[10,4,2,1],[8,3,1,2],[2,6,7,5],[5,1,1,3]])
+
+ upper = scipy.linalg.lu(A)[2]
+
+ det = 1
+ for i in range(0,len(upper)):
+ det *= upper[i][i]
+
+\end{code}
+
+\subsection{Eigen Vectors}
+
+\begin{definition}
+ An eigenvector of $A$ is a vector $v$ such that $Av = \lambda v$ where $\lambda$ is a scalar.
+\end{definition}
+
+\begin{explanation}
+ An eigenvector is a vector that is only scaled by some scalar when multiplied by the matrix it is an eigenvector of. The associated scalar ($\lambda$) is called the eigenvalue of $v$.
+
+\end{explanation}
+
+\subsubsection{Eigen Decomposition}
+
+\begin{definition}
+ The eigen decomposition of the matrix $A$ is $V\Lambda V^{-1}$ where $V$ is the matrix of eigenvectors and $\Lambda$ is the diagonal matrix of eigenvalues.
+\end{definition}
+
+\begin{explanation}
+
+The eigen decomposition is important as it allows us to decompose specific matrices into simpler ones with nicer properties. Particularly, diagonal matrices are easy to take to powers when compared with their non-diagonal counterparts.
+
+\textbf{Solving:}
+\begin{align*}
+ Av &= \lambda I_n v \\
+ Av - \lambda I_n v &= \vec{0} \\
+ (A - \lambda I_n)v &= \vec{0} \\
+\end{align*}
+
+Given this, we need to find a matrix $A - \lambda I_n$ that takes $v$ to the zero vector. Trivially, this is true when $v = \vec{0}$, but we are not interested in that. We know that det$(A) = 0$ means dimensionallity is being reduced and this must be true for $v$ to go to $\vec{0}$ when $v \neq \vec{0}$. Thus we have:
+
+ \begin{align*}
+ \text{det}(A - \lambda I_n) = 0
+ \end{align*}
+
+Shown above is referred to as the characteristic polynomial. This can be stated in matrix form as
+
+ \begin{align*}
+ \text{det} (\begin{bmatrix}
+ a_{1,1} - \lambda & a_{1,2} & \hdots & a_{1,n}\\
+ a_{2,1} & a_{2,2} - \lambda & \hdots & a_{2,n}\\
+ \vdots & \vdots & \ddots & \vdots\\
+ a_{n,1} & a_{n,2} & \hdots & a_{n,n} - \lambda\\
+ \end{bmatrix}) = 0.
+ \end{align*}
+
+We solve this by computing the eigenvalues that make the equality true. This is done using basic algebra on the equation for the determinant of the above matrix. With the resulting eigenvalues, we plug in each $\lambda_i \in \Lambda$ into the equation $A - \lambda_i I_n$. We know this matrix is singular as it has a non-trivial null space, because it has a determinant of $0$. Define $B = A - \lambda_i I_n$. We can state the equation as
+
+\begin{align*}
+ Bv = \vec{0}
+\end{align*}
+
+ We solve this by computing the nullspace of $B$. This is done by solving the system of equations. Once done for each eigenvalue, we have each eigenvector. These eigenvectors define $V$ and thus the final part is computing $V^{-1}$. I refer back to the section on computing inverses of matrices for the derivation of $V^{-1}$. Once done, we can state the eigen decomposition as
+
+\begin{align*}
+ A = V \Lambda V^{-1}.
+\end{align*}
+
+\end{explanation}
+
+\begin{example}
+ Compute the eigen decomposition for
+\begin{align*}
+ A =
+ \begin{bmatrix}
+ 1 & 8 \\
+ 2 & 3 \\
+ \end{bmatrix}.
+\end{align*}
+
+Find the eigenvalues:
+
+\[
+\det\left(\begin{bmatrix} 1 - \lambda & 8 \\ 2 & 3 - \lambda \end{bmatrix}\right) = 0
+\]
+
+\[
+(1 - \lambda)(3 - \lambda) - (8)(2) = 0
+\]
+
+\[
+3 - \lambda - 3\lambda + \lambda^2 - 16 = 0
+\]
+
+\[
+\lambda^2 - 4\lambda - 13 = 0
+\]
+
+\[
+\lambda = 2 \pm \sqrt{17}
+\]
+
+Now, for \(\lambda_1 = 2 + \sqrt{17}\):
+
+\[
+A - \lambda_1 I =
+\begin{bmatrix}
+1 - (2 + \sqrt{17}) & 8 \\
+2 & 3 - (2 + \sqrt{17})
+\end{bmatrix}
+=
+\begin{bmatrix}
+-1 - \sqrt{17} & 8 \\
+2 & 1 - \sqrt{17}
+\end{bmatrix}
+\]
+
+Solve for the null space:
+
+\[
+\begin{bmatrix}
+-1 - \sqrt{17} & 8 \\
+2 & 1 - \sqrt{17}
+\end{bmatrix}
+\begin{bmatrix}
+x \\
+y
+\end{bmatrix}
+=
+\begin{bmatrix}
+0 \\
+0
+\end{bmatrix}
+\]
+
+From the first row:
+
+\[
+(-1 - \sqrt{17})x + 8y = 0
+\Rightarrow (1 + \sqrt{17})x = 8y
+\Rightarrow x = \frac{8y}{1 + \sqrt{17}}
+\]
+
+So one eigenvector is:
+
+\[
+v_1 = \begin{bmatrix} \frac{8}{1 + \sqrt{17}} \\ 1 \end{bmatrix}
+\]
+
+Do the same for \(\lambda_2 = 2 - \sqrt{17}\):
+
+\[
+v_2 = \begin{bmatrix} \frac{8}{1 - \sqrt{17}} \\ 1 \end{bmatrix}
+\]
+
+Construct matrix \(V\) of eigenvectors:
+
+\[
+V = \begin{bmatrix}
+\frac{8}{1 + \sqrt{17}} & \frac{8}{1 - \sqrt{17}} \\
+1 & 1
+\end{bmatrix}
+\]
+
+Diagonal matrix \(D\) of eigenvalues:
+
+\[
+D = \begin{bmatrix}
+2 + \sqrt{17} & 0 \\
+0 & 2 - \sqrt{17}
+\end{bmatrix}
+\]
+
+We then have the eigen decomposition:
+
+\[
+A = V D V^{-1}
+\]
+
+Now compute \(V^{-1}\) based on the prior section's description for matrix inversion to find:
+
+\[
+V^{-1} = \frac{1}{\sqrt{17}}
+\begin{bmatrix}
+1 & -\frac{8}{1 - \sqrt{17}} \\
+-1 & \frac{8}{1 + \sqrt{17}}
+\end{bmatrix}
+\]
+
+Thus we have:
+
+\[
+A =
+\begin{bmatrix}
+\frac{8}{1 + \sqrt{17}} & \frac{8}{1 - \sqrt{17}} \\
+1 & 1
+\end{bmatrix}
+\begin{bmatrix}
+2 + \sqrt{17} & 0 \\
+0 & 2 - \sqrt{17}
+\end{bmatrix}
+\frac{1}{\sqrt{17}}
+\begin{bmatrix}
+1 & -\frac{8}{1 - \sqrt{17}} \\
+-1 & \frac{8}{1 + \sqrt{17}}
+\end{bmatrix}
+\]
+
+\end{example}
+
+\end{document}
diff --git a/latex/DeepLearning.tex b/latex/DeepLearning.tex
@@ -1,183 +0,0 @@
-\documentclass[12pt, letterpaper]{article}
-\usepackage{enumitem}
-\usepackage{graphicx}
-\usepackage{amsthm}
-\usepackage{listings}
-\usepackage{caption}
-\usepackage{tcolorbox}
-\usepackage{datetime}
-\usepackage{amsfonts}
-\usepackage{amsmath}
-
-\newenvironment{definition}{
- \begin{quote}
- \textbf{Definition:}
- }{
- \end{quote}
-}
-
-
-\newenvironment{explanation}{
- \begin{quote}
- \textbf{Explanation:}
- }{
- \end{quote}
-}
-
-\newenvironment{example}{
- \begin{quote}
- \textbf{Example:}
- }{
- \end{quote}
-}
-
-\lstnewenvironment{code}{
- \hspace{.45cm}\textbf{Code:}
- \lstset{
- basicstyle=\ttfamily,
- columns=fullflexible,
- breaklines=true
- }
-}{
-}
-
-
-\begin{document}
-
-\noindent{\large \textbf{Deep Learning by Ian Goodfellow}}
-
-\noindent Notes by Andrew Laack
-
-\tableofcontents
-
-\section{Linear Algebra}
-
-
-\subsection{Broadcasting}
-
-\begin{definition}
-Broadcasting is the process of iteratively applying a lower dimensional operation on higher dimensionalt structures.
-\end{definition}
-
-\begin{explanation}
-
-Assume \[ C = A + b\]
-
-and \[ C,A \in \mathbb{R}^{m,n}, b \in \mathbb{R}^n. \]
-
-With this, we see we are adding an $n$ dimentional vector to an $m \times n$ matrix. For this to be well defined, we know the $n$ indicies must relate to each other. In the case of the matrix, we have $n$ columns and thus we add each $b_i$ to $A_{:,i}$.
-
-\end{explanation}
-
-\begin{example}
-
-Assume
-
-\begin{align*}
- A = \begin{bmatrix}
- 3 & 4 & 1 \\
- 5 & 7 & 8
- \end{bmatrix},
- b = \begin{bmatrix} 1 & 2 & 3 \end{bmatrix}^\top
-\end{align*}
-
-Compute $C = A + b$
-
-\begin{align*}
-&= \begin{bmatrix}
- 3 & 4 & 1 \\
- 5 & 7 & 8
- \end{bmatrix} + \begin{bmatrix}1 & 2 & 3\end{bmatrix}\\
-&= \begin{bmatrix}
- 4 & 6 & 4 \\
- 6 & 9 & 11
-\end{bmatrix}
-\end{align*}
-
-\end{example}
-
-\begin{code}
- import numpy as np
-
- B = np.array([[6,2,8], [2,9,8]])
- a = [2, 1, 2]
- D = np.empty((len(B), len(B[0])))
-
- for i in range(len(B)):
- for j in range(len(B[i])):
- D[i][j] = B[i][j] + a[j]
-
- # Alternatively
- D = B + a
-
-\end{code}
-
-\subsection{Matrix Multiplication}
-
-\begin{definition}
- The product of A and B, C, is defined as \[C_{i,j} = A_{i,:}^\top \cdot B_{:,j}.\]
-\end{definition}
-
-\begin{explanation}
- If $C = AB$ then the index $i,j$ is the dot product between the transpose of the $i$'th row vector of A and the $j$'th column vector of B. The row vector must be transposed, or vice versa, as dot products are defined for like vectors.
-
- Additionally, we note that the number of \textbf{columns} of A must be equal to the number of \textbf{rows} in B. The result will be a matrix that has as many rows as A and as many columns as B.
-\end{explanation}
-
-\begin{example}
- Compute $C = A \cdot B$
- \begin{align*}
- A = \begin{bmatrix}
- 4 & 3 & 4 \\
- 1 & 1 & 5 \\
- 2 & 3 & 6
- \end{bmatrix},
- B = \begin{bmatrix}
- 3 & 1 & 7 \\
- 1 & 2 & 1 \\
- 2 & 4 & 2
- \end{bmatrix}
- \end{align*}
-
- \begin{align*}
- C &= AB \\
- &= \begin{bmatrix}
- 4 & 3 & 4 \\
- 1 & 1 & 5 \\
- 2 & 3 & 6 \\
- \end{bmatrix}
- \cdot
- \begin{bmatrix}
- 4 & 3 & 4 \\
- 1 & 1 & 5 \\
- 2 & 3 & 6 \\
- \end{bmatrix} \\
- & =
- \begin{bmatrix}
- 23 & 26 & 39 \\
- 14 & 23 & 18 \\
- 21 & 32 & 29 \\
- \end{bmatrix} \\
- \end{align*}
-
-\end{example}
-
-\begin{code}
- import numpy as np
-
- A = np.array([[4,3,4],[1,1,5],[2,3,6]])
- B = np.array([[3,1,7],[1,2,1],[2,4,2]])
- C = A @ B # matrix multiplication
- C = A.__matmul__(B) # more explicit
-
- def matMul(A, B):
- C = np.empty((len(A), len(B[0])))
- for i in range(0, len(A)):
- for j in range(0, len(B[0])):
- C[i][j] = A[i].dot(B[:,j])
- return C
-
- C = matMul(A,B)
-\end{code}
-
-\end{document}
diff --git a/latex/leetcode/Leetcode.tex b/latex/leetcode/Leetcode.tex
@@ -0,0 +1,143 @@
+\documentclass[12pt, letterpaper]{article}
+\usepackage{xcolor}
+
+
+\usepackage{enumitem}
+\usepackage{graphicx}
+\usepackage{listings}
+\usepackage{caption}
+\usepackage{tcolorbox}
+\usepackage{datetime}
+\usepackage{amsfonts}
+\usepackage{amsmath}
+\usepackage{geometry}
+\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
+\usepackage{amssymb,enumerate}
+\usepackage{amsthm,stmaryrd}
+\usepackage[all]{xy}
+
+\newenvironment{definition}{
+ \begin{quote}
+ \textbf{Definition:}
+ }{
+ \end{quote}
+}
+
+
+\newenvironment{explanation}{
+ \begin{quote}
+ \textbf{Explanation:}
+ }{
+ \end{quote}
+}
+
+\newenvironment{example}{
+ \begin{quote}
+ \textbf{Example:}
+ }{
+ \end{quote}
+}
+
+\lstnewenvironment{code}{
+ \textbf{Code:}
+ \lstset{
+ basicstyle=\ttfamily,
+ columns=fullflexible,
+ breaklines=true
+ }
+}{
+}
+
+
+\begin{document}
+
+\noindent{\large \textbf{Leetcode}}
+
+\noindent Notes by Andrew Laack
+
+\tableofcontents
+
+
+\section{Merge k Sorted Lists}
+
+\textbf{Problem:}
+
+\noindent You are given an array of k linked-lists lists, each linked-list is sorted in ascending order. Merge all the linked-lists into one sorted linked-list and return it.
+
+\vspace{.25cm}
+
+\noindent\textbf{Explanation:}
+
+\noindent We first find the lowest value that starts a list. This node will be the head of our list. From here, we iterate through each of the lists at each step selecting the lowest value and making it the next node in the list. Once all items have been added to the list, we return the head. There are quite a few edge cases to consider related to data nullity and how to handle empty lists, but that's the extent of the complication.
+
+\vspace{.25cm}
+
+\noindent\begin{code}
+# Definition for singly-linked list.
+# class ListNode:
+# def __init__(self, val=0, next=None):
+# self.val = val
+# self.next = next
+
+class Solution:
+ def mergeKLists(self, lists: List[Optional[ListNode]]) -> Optional[ListNode]:
+
+ if len(lists) == 0:
+ return None
+
+ lowest = 10**4 # upper bound
+ head = lists[0]
+ headList = 0
+
+ for i in range(0,len(lists)):
+ if lists[i] is not None and lists[i].val < lowest:
+ head = lists[i]
+ lowest = lists[i].val
+ headList = i
+
+ if lists[headList] is not None:
+ lists[headList] = lists[headList].next
+
+
+ i = 0
+ while i < len(lists):
+ if lists[i] is None:
+ lists.pop(i)
+ else:
+ i += 1
+
+
+ if len(lists) == 0:
+ return head
+
+ done = False
+ current = head
+
+ while not done:
+ lowest = 10**4 # upper bound
+ listNum = 0
+
+ for i in range(0, len(lists)):
+ if lists[i].val < lowest:
+ lowest = lists[i].val
+ listNum = i
+
+ current.next = lists[listNum]
+ lists[listNum] = lists[listNum].next
+ current = current.next
+ if lists[listNum] is None:
+ lists.pop(listNum)
+ if len(lists) == 0:
+ done = True
+
+
+ return head
+\end{code}
+
+\section{Section}
+
+\begin{code}
+ code
+\end{code}
+
+\end{document}
diff --git a/latex/leetcode/c++/a.out b/latex/leetcode/c++/a.out
Binary files differ.
diff --git a/latex/leetcode/c++/puzzle.txt b/latex/leetcode/c++/puzzle.txt
@@ -0,0 +1,9 @@
+.........
+.........
+.........
+.........
+..1......
+......2..
+.........
+.........
+.........
diff --git a/latex/leetcode/c++/reverseVowels.cpp b/latex/leetcode/c++/reverseVowels.cpp
@@ -0,0 +1,51 @@
+#include <string>
+#include <unordered_set>
+
+class Solution {
+public:
+ std::string reverseVowels(std::string s) {
+
+ std::unordered_set<char> set = {'a', 'e', 'i', 'o', 'u'};
+
+
+ int left = 0;
+ int right = s.length();
+
+ while(right > left){
+
+ auto itl = set.find(tolower(s[left]));
+ auto itr = set.find(tolower(s[right]));
+
+ bool leftFound = false;
+ bool rightFound = false;
+ if(itl != set.end()){
+ leftFound = true;
+ }
+ if(itr != set.end()){
+ rightFound = true;
+ }
+
+ if(leftFound && rightFound){
+ char temp = s[left];
+ s[left] = s[right];
+ s[right] = temp;
+ left += 1;
+ right -= 1;
+ }
+
+ if(!leftFound){
+ left += 1;
+ }
+ if(!rightFound){
+ right -= 1;
+ }
+
+ }
+
+ return s;
+ }
+};
+
+int main(){
+ Solution sln;
+}
diff --git a/latex/leetcode/c++/sudoku.cpp b/latex/leetcode/c++/sudoku.cpp
@@ -0,0 +1,126 @@
+#include <bits/stdc++.h>
+using namespace std;
+
+class Solution {
+public:
+ void solveSudoku(vector<vector<char>>& board) {
+ recurse(board, 0, 0);
+ return;
+ }
+
+ bool recurse(std::vector<std::vector<char>>& board, int xPos, int yPos) {
+ if (yPos == 9) {
+ return true;
+ }
+
+ int newX = (xPos + 1) % 9;
+ int newY = yPos;
+ if (newX < xPos) {
+ newY = yPos + 1;
+ }
+
+ if (board[xPos][yPos] != '.') {
+ return recurse(board, newX, newY);
+ } else {
+ for (int i = 1; i < 10; ++i) {
+ board[xPos][yPos] = '0' + i;
+ // only move forward with valid choices.
+ if (!isValid(board, xPos, yPos)) {
+ continue;
+ }
+ if (recurse(board, newX, newY)) {
+ return true;
+ }
+ }
+ // failed
+ board[xPos][yPos] = '.';
+ }
+
+ return false;
+ }
+
+ bool isValid(std::vector<std::vector<char>>& board, int posX, int posY) {
+ auto brd = board;
+ char current = brd[posX][posY];
+
+ // Check column
+ for (int i = 0; i < 9; ++i) {
+ if (current == brd[i][posY] && i != posX) {
+ return false;
+ }
+ }
+
+ // Check row
+ for (int i = 0; i < 9; ++i) {
+ if (current == brd[posX][i] && i != posY) {
+ return false;
+ }
+ }
+
+ // Check 3x3 sub-grid
+ int x = (posX / 3) * 3;
+ int y = (posY / 3) * 3;
+ for (int rw = 0; rw < 3; ++rw) {
+ for (int cl = 0; cl < 3; ++cl) {
+ if (current == brd[x + cl][y + rw] && (x + cl != posX || y + rw != posY)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+};
+
+int main() {
+ ios::sync_with_stdio(false);
+ cin.tie(nullptr);
+
+ // Read a 9x9 Sudoku board from standard input.
+ // Expect 9 lines, each containing exactly 9 characters:
+ // digits '1'–'9' for filled cells and '.' for empty cells.
+ vector<vector<char>> board(9, vector<char>(9));
+ string line;
+ for (int i = 0; i < 9; ++i) {
+ if (!getline(cin, line)) {
+ cerr << "Insufficient input: expected 9 lines, got fewer.\n";
+ return 1;
+ }
+ // Remove any spaces in the line (in case input uses spaces)
+ string filtered;
+ for (char c : line) {
+ if (c != ' ' && c != '\t') {
+ filtered.push_back(c);
+ }
+ }
+ if ((int)filtered.size() != 9) {
+ cerr << "Invalid input on line " << i + 1 << ": expected 9 non-space characters.\n";
+ return 1;
+ }
+ for (int j = 0; j < 9; ++j) {
+ char c = filtered[j];
+ if ((c >= '1' && c <= '9') || c == '.') {
+ board[i][j] = c;
+ } else {
+ cerr << "Invalid character '" << c << "' on line " << i + 1
+ << ". Use digits '1'-'9' or '.'.\n";
+ return 1;
+ }
+ }
+ }
+
+ Solution solver;
+ solver.solveSudoku(board);
+
+ // Output the solved board: 9 lines of 9 characters each
+ for (int i = 0; i < 9; ++i) {
+ for (int j = 0; j < 9; ++j) {
+ cout << board[i][j];
+ }
+ cout << "\n";
+ }
+
+ return 0;
+}
+
+
diff --git a/latex/node/node.tex b/latex/node/node.tex
@@ -0,0 +1,170 @@
+\documentclass[12pt, letterpaper]{article}
+\usepackage{xcolor}
+
+
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{.5em}
+
+\usepackage{xcolor}
+\definecolor{lightgray}{gray}{0.95}
+\usepackage{listings}
+\usepackage{enumitem}
+\usepackage{graphicx}
+\usepackage{listings}
+\usepackage{caption}
+\usepackage{tcolorbox}
+\usepackage{datetime}
+\usepackage{amsfonts}
+\usepackage{amsmath}
+\usepackage{geometry}
+\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
+\usepackage{amssymb,enumerate}
+\usepackage{amsthm,stmaryrd}
+\usepackage[all]{xy}
+
+\usepackage{tcolorbox} % for padding
+
+\newenvironment{definition}{
+ \begin{quote}
+ \textbf{Definition:}
+ }{
+ \end{quote}
+}
+
+
+\newenvironment{explanation}{
+ \begin{quote}
+ \textbf{Explanation:}
+ }{
+ \end{quote}
+}
+
+\newenvironment{example}{
+ \begin{quote}
+ \textbf{Example:}
+ }{
+ \end{quote}
+}
+
+\lstnewenvironment{code}{
+ \lstset{
+ basicstyle=\ttfamily,
+ columns=fullflexible,
+ breaklines=true,
+ backgroundcolor=\color{lightgray},
+ frame=single,
+ framerule=0pt,
+ rulecolor=\color{lightgray}
+ }
+}{}
+
+
+\begin{document}
+
+\noindent{\large \textbf{Node.js Design Patterns by Mario Casciaro}}
+
+\noindent Notes by Andrew Laack
+
+\tableofcontents
+
+\section{Node.js Platform}
+
+\subsection{Multiplexer}
+
+A multiplexer is a process that interlaces signals together.
+
+\subsection{Demultiplexer}
+
+A demultiplexer is a process that de-interlaces signals from each other.
+
+\subsection{Reactor}
+
+Given that I/O is often slow, it is preferable to have non-blocking I/O calls. However, there is overhead associated with having lots of threads thus spawning threads for each non-blocking call can also be suboptimal. To overcome this, NodeJS uses the reactor pattern.
+
+The reactor pattern first accepts an I/O operation and handler from the application and dispatches it to the event demultiplexer. The handlers are callbacks and when the operation receives an event, the event is sent to the event queue. Once here, events are handled in series via the event loop where the callback is executed. Shown below is a simple implementation of this concept:
+
+\begin{code}
+watchedList.add(socketA, FOR_READ)
+watchedList.add(fileB, FOR_READ)
+while (events = demultiplexer.watch(watchedList)) {
+ // event loop
+ for (event of events) {
+ // This read will never block and will always return data
+ data = event.resource.read()
+ if (data === RESOURCE_CLOSED) {
+ // the resource was closed, remove it from the watched list
+ demultiplexer.unwatch(event.resource)
+ } else {
+ // some actual data was received, process it
+ consumeData(data)
+ }
+ }
+}
+\end{code}
+
+\subsection{Composition of NodeJS}
+
+\subsubsection{Libuv}
+
+Libuv is the implementation of the reactor pattern that works across operating systems, used by NodeJS.
+
+\subsubsection{V8}
+
+This is the JS engine developed by Google for Chrome that runs on the server side.
+
+\subsubsection{Core JS APIs}
+
+The core JS APIs are the APIs that implement the core (high-level) functionality of NodeJS.
+
+\subsubsection{Bindings}
+
+The binding layer exposes libuv and other JS functionality.
+
+\subsection{Polyfill / Transpiler}
+
+A polyfill is code (often JS) that allows the use of newer features in older runtimes. This is a common practice in browsers to ensure older browsers can still view modern content.
+
+A transpiler fits a similar purpose where it converts one source to another source (aka source-to-source compiler).
+
+
+\subsection{Modules}
+
+Modules are the cornerstone of NodeJS as NodeJS is intended to be minimal. Similarly, the philosophy of modules is similar, they are supposed to be small with a minimal number of features.
+
+\subsubsection{CommonJS (CJS)}
+
+CommonJS is the classic JS module system.
+
+\subsubsection{ECMAScript (ES/ESM)}
+
+ECMAScript is the other JS module system.
+
+\subsubsection{crypto}
+
+The crypto module is provides standard implementations of encryption and hashing via OpenSSL.
+
+\subsubsection{http}
+
+The http module is used to create an http server.
+
+\subsubsection{https}
+
+The https module is used to create an https server.
+
+\subsubsection{net}
+
+The net module is used to write to TCP sockets.
+
+\subsubsection{dgram}
+
+The dgram module is used to write to UDP sockets.
+
+\subsubsection{fs}
+
+The fs module is used to access the underlying filesystem.
+
+\subsubsection{express}
+
+The express module is a web framework for NodeJS. This serves as a way to make the NodeJS runtime environment into a web server.
+
+\end{document}
diff --git a/work/deep-learning/cov.py b/work/deep-learning/cov.py
@@ -0,0 +1,31 @@
+# Covarianc mat calc
+
+import numpy as np
+
+mat = np.array([
+ [1, 3, 3, 1],
+ [2, 6, 9, 2]
+ ])
+
+
+print(np.cov(mat))
+means = []
+m = len(mat[0])
+
+for i in range(0, len(mat)):
+ means.append(np.mean(mat[i]))
+
+
+res = np.zeros((len(mat), len(mat)))
+print(means)
+
+for x in range(0, len(mat)):
+ for y in range(0, len(mat)):
+ m = len(mat[x]) - 1
+ m_inv = 1/m
+ sum = 0
+ for i in range(0, len(mat[x])):
+ sum += (mat[x][i] - means[x]) * (mat[y][i] - means[y])
+ res[x][y] = sum * m_inv
+
+print(res)
diff --git a/work/deep-learning/determinant.py b/work/deep-learning/determinant.py
@@ -0,0 +1,14 @@
+import numpy as np
+import scipy.linalg
+
+A = np.array([[10,4,2,1],[8,3,1,2],[2,6,7,5],[5,1,1,3]])
+
+upper = scipy.linalg.lu(A)[2]
+
+print(upper)
+
+det = 1
+for i in range(0,len(upper)):
+ det *= upper[i][i]
+
+print(det)
diff --git a/work/deep-learning/gauss-jordan.py b/work/deep-learning/gauss-jordan.py
@@ -0,0 +1,59 @@
+# this solver is only for 2x2 matrices as a generalized solver is longer.
+# we assume A is invertible.
+
+import numpy as np
+
+A = np.array([[2.,8.],[1.,3.]])
+A = np.hstack([A, np.eye(2)])
+
+def switch(frm, to, A):
+ res = A.copy()
+ fr = res[frm].copy()
+ res[frm] = res[to]
+ res[to] = fr
+ return res
+
+def addRows(frm, to, coeff, A):
+ res = A.copy()
+ fr = A[frm].copy()
+ fr *= coeff
+ res[to] += fr
+ return res
+
+# get 1 in 0,0
+# ensure the first row has some value in [0,0], otherwise switch
+if A[0][0] == 0:
+ switch(0,1,A)
+
+# get 1 in 0,0
+target = A[0][0] - 1
+otr = A[1][0]
+coeff = (target/otr) * -1
+A = addRows(1, 0, coeff, A)
+
+# get 1 in 1,1
+target = A[1][1] - 1
+otr = A[0][1]
+coeff = (target/otr) * -1
+A = addRows(0, 1, coeff, A)
+
+# get 0 in 0,1
+target = A[0][1]
+otr = A[1][1]
+coeff = (target/otr) * -1
+A = addRows(1, 0, coeff, A)
+
+
+# get 0 in 1,0
+target = A[1][0]
+otr = A[0][0]
+coeff = (target/otr) * -1
+A = addRows(0, 1, coeff, A)
+
+# ensure 0,0 is 1
+A[0] = A[0] / A[0][0]
+# ensure 1,1 is 1
+A[1] = A[1] / A[1][1]
+
+# this is the entire augmented matrix, but we only care about columns 3 and 4
+print(A)
diff --git a/work/deep-learning/norm.py b/work/deep-learning/norm.py
@@ -0,0 +1,9 @@
+x = [10,8]
+p = 2
+
+sum = 0
+for i in x:
+ sum += i**p
+
+res = sum ** (1/p)
+print(res)
diff --git a/work/deep-learning/norm.txt b/work/deep-learning/norm.txt
@@ -0,0 +1,8 @@
+consider L_1 norm:
+
+L_1(<1,3>) = 4
+L_1(<4,1>) = 5
+
+L_1(a+b) = 9
+
+
diff --git a/work/deep-learning/scratch.txt b/work/deep-learning/scratch.txt
@@ -0,0 +1,53 @@
+[1 8]
+[2 3]
+
+
+det ([1 - lambda 8]) = 0
+ [2 3 - lambda]
+
+
+(1 - lam)(3 - lam) - (8)(2) = 0
+(3 - lam - 3lam + lam^2) - 16 = 0
+4lam + lam^2 - 13 = 0
+
+lam = 2 +- \sqrt(17)
+
+[1 - (2 + sqrt(17)) 8]
+[2 3 - (2 + sqrt(17))]
+
+[-1 - sqrt(17) 8 ]
+[2 1 - sqrt(17)]
+
+solve for nullspace.
+
+
+[-1 - sqrt(17) 8 ][x] = [0]
+[2 1 - sqrt(17)][y] [0]
+
+0 = (-1 - sqrt(17))x + 8y
+
+0 = -x - sqrt(17)x + 8y
+
+(1 + sqrt(17))x = 8y
+
+x = 8y / (1 + sqrt(17))
+
+v_1 = [8 / (1 + sqrt(17))]
+ [1 ]
+
+Do the same for \lam_2 to find
+
+v_2 = [8 / (1 - sqrt(17))]
+ [1 ]
+
+We then have
+
+V = [v_1 v_2]
+ = [8/(1+sqrt(17)) 8/(1-sqrt(17))]
+ [1 1]
+
+A = [8/(1+sqrt(17)) 8/(1-sqrt(17))] [2 + sqrt(17) 0] V^-1
+ [1 1] [0 2 - sqrt(17)]
+
+= [8/(1+sqrt(17)) 8/(1-sqrt(17))] [2 + sqrt(17) 0] 1/sqrt(17)[1 -8 / (1 - sqrt(17))]
+ [1 1] [0 2 - sqrt(17)] [-1 8/ (1 + sqrt(17))]
diff --git a/work/deep-learning/solvingWithInversion.py b/work/deep-learning/solvingWithInversion.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+A = np.array([[10, 2, 3], [1, 4, 1], [9, 1, 3]])
+b = np.array([10, 2, 3])
+A_inv = np.linalg.inv(A)
+result = A_inv @ b
+print(result)