commit a60e2476686b4bda4c8927c2fd01526749401704
parent fa044613bb52fdd7944bfa4361e3821f28fcb698
Author: AndrewLockVI <andrew@laack.co>
Date: Tue, 10 Jun 2025 13:24:41 -0500
migration
Diffstat:
5 files changed, 129 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
*.aux
*.log
*.toc
+.venv/*+
\ No newline at end of file
diff --git a/latex/DeepLearning-Goodfellow/DeepLearning.tex b/latex/DeepLearning-Goodfellow/DeepLearning.tex
@@ -649,6 +649,7 @@ A =
\end{bmatrix}
\]
+
\end{example}
\end{document}
diff --git a/latex/designing/DesigningDataIntensiveApplications.tex b/latex/designing/DesigningDataIntensiveApplications.tex
@@ -0,0 +1,119 @@
+\documentclass[12pt, letterpaper]{article}
+\usepackage{xcolor}
+
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{.5em}
+
+\usepackage{enumitem}
+\usepackage{graphicx}
+\usepackage{listings}
+\usepackage{caption}
+\usepackage{tcolorbox}
+\usepackage{datetime}
+\usepackage{amsfonts}
+\usepackage{amsmath}
+\usepackage{geometry}
+\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
+\usepackage{amssymb,enumerate}
+\usepackage{amsthm,stmaryrd}
+\usepackage[all]{xy}
+
+\newenvironment{definition}{
+ \begin{quote}
+ \textbf{Definition:}
+ }{
+ \end{quote}
+}
+
+
+\newenvironment{explanation}{
+ \begin{quote}
+ \textbf{Explanation:}
+ }{
+ \end{quote}
+}
+
+\newenvironment{example}{
+ \begin{quote}
+ \textbf{Example:}
+ }{
+ \end{quote}
+}
+
+\lstnewenvironment{code}{
+ \hspace{.45cm}\textbf{Code:}
+ \lstset{
+ basicstyle=\ttfamily,
+ columns=fullflexible,
+ breaklines=true
+ }
+}{
+}
+
+
+\begin{document}
+
+\noindent{\large \textbf{Designing Data-Intensive Applications by Martin Kleppmann}}
+
+\noindent Notes by Andrew Laack
+
+\tableofcontents
+
+\section{Reliable, Scalable, and Maintainable Applications}
+
+Our goal when architecting data systems is to ensure they are reliable, scalable, and maintainable.
+
+\subsection{Reliability}
+
+The system should work correctly even in cases of \textit{adversity}. These adversities include hardware, software, and human errors. Oftentimes we refer to this as the system being fault-tolerant (tolerant to certain types of faults) or resilient.
+
+An important distinction is that of fault vs. failure. A fault is when a component fails to achieve their specification while a failure is the system as a whole failing to provide the expected service. By these definitions, we try to decrease the likelihood that faults result in failures.
+
+\subsection{Scalability}
+
+As the needs of the system grow, the demands should be dealt with in a reasonable way. When discussing scalability, we are primarily concerned with load parameters. This is a blanket term that can describe ingress, egress, request counts, etc..
+
+When discussing scalability, it is important to consider response times (roundabout times). In particular, the median, p95, p99, and p999 are quite important. These upper percentiles are referred to as tail latencies. An extension of this are tail latency amplifications, which is where multiple requests are made to a service which increases the likelihood of causing a tail latent event.
+
+Head-of-line blocking is where queueing delays, caused by a few slow requests in the front, cause a pile up of requests in the queue. This is part of the reason why it is important to track response times on the side of the client instead of based on processing time.
+
+\subsection{Maintainability}
+
+Disparate individuals should be able to work with the system with minimal hassle.
+
+\section{Data Models and Query Languages}
+
+\subsection{Relational vs. Document Model}
+
+Relational databases have relations (tables) that can be joined together when querying. While these joins exist in \textit{some} document databases, they are often quite slow and outside the intended use case for the database. To combat this limitation, we often run multiple queries against document databases cases where there are \textit{document references} (when we don't want denormalized data).
+
+Often it is simpler to use the document model when our data is structured in a tree like form that has lots of one-to-many relationships. This removes the need for \textit{shredding}, the process of mapping a document structure to a relational structure.
+
+\subsection{Data Locality}
+
+Data locality refers to the location data is stored on disk. In document databases, documents are often stored in contiguous memory as some form of encoded string. This storage locality allows for faster lookups because data is stored near each other, which is more efficient on spinning disks.
+
+Despite this improvement in data locality, we often need to load entire documents to look for information in them which can sometimes render this improvement moot when the document is large. Beyond this, rewriting documents often requires moving them when the size of the encoded string changes which is an expensive procedure.
+
+This can limit the applicability of document databases. Additionally, there are approaches, like Google's Spanner database, that attempt to achieve data locality without requiring a schemaless approach. This is done by interleaving (nesting) rows within a parent table. Oracle also allows this with \textit{multi-table index cluster tables}.
+
+Similarly, \textit{column-family} databases (like Cassandra) achieve good data locality as well.
+
+\subsection{Declarative vs. Imperative}
+
+Imperative languages state the steps needed to perform a task. Declarative languages declare the result that is expected. Based on this, we see SQL is a declarative language because we declare the data we want, and then the database's query optimizer determines how best to do that. This adds complexity and provides value for the \textit{EXPLAIN} keyword, used to introspect a query.
+
+Additionally, given the necessity of execution order for imperative languages, it can be easier to parallelize execution when using a declarative style.
+
+\section{Storage and Retrieval}
+\section{Encoding and Evolution}
+\section{Replication}
+\section{Partitioning}
+\section{Transactions}
+\section{The Trouble with Distributed Systems}
+\section{Consistency and Consensus}
+\section{Batch Processing}
+\section{Stream Processing}
+\section{The Future of Data Systems}
+
+\end{document}
diff --git a/latex/node/node.tex b/latex/node/node.tex
@@ -131,13 +131,17 @@ A transpiler fits a similar purpose where it converts one source to another sour
Modules are the cornerstone of NodeJS as NodeJS is intended to be minimal. Similarly, the philosophy of modules is similar, they are supposed to be small with a minimal number of features.
+\subsubsection{Revealing Module Pattern}
+
+This is the JS pattern of revealing functionallity of a module as needed instead of scoping everything to global.
+
\subsubsection{CommonJS (CJS)}
-CommonJS is the classic JS module system.
+CommonJS is the classic NodeJS module system. This differs from web based module systems because a browser must be able to access all modules using http whereas NodeJS has access the to filesystem.
\subsubsection{ECMAScript (ES/ESM)}
-ECMAScript is the other JS module system.
+ECMAScript is the other NodeJS module system. In the modern era, this is what people are using moving forwards.
\subsubsection{crypto}
diff --git a/testing/test.py b/testing/test.py
@@ -0,0 +1 @@
+import torch