commit 9e174e8cfa389796dd06596021026e8e4d5a6d2a
parent 0ac5f34d9f609c11a9fff4edff64dad10d2ce954
Author: Andrew D. Laack <andrew@laack.co>
Date: Fri, 20 Jun 2025 23:57:34 -0500
Added Blame Caching (#4)
* Added caching file, new branch, updated version
* Created class scaffold for planned blame DB.
* Added query for table creation and basic insertion
* Consolidated logic for creation and updating for ease of use by consumer
* Started preliminary testing of caching. It seems to mostly work.
* There seems to be an issue with certain codebases where author, authortime or something else can't be retrieved properly.
* The issue appeared to be failures which were being indiscriminantly caught when running 'git blame' on files that weren't checked into git. Resolved by throwing descriptive error.
* Added perf. benchmark for django in current state w/ caching.
* Fixed README typo
* Updated validation signatures and logic to be better
* Created BlameRecord class to simplify signatures
* Removed unnecessary logic and replaced it with an error when file-line content can't be found. Updating caching to 'improve' write perf.
* Improved perf. for blaming
* Added clear cache option along with mutex logic for it. I now need to add more tests. There are still perf. gains to be made, but at this point they feel nominal.
* Added benchmarking for blames on my system
* Added additonal test for db
* Updated README.md
* Updated version number
Diffstat:
16 files changed, 597 insertions(+), 142 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -194,3 +194,5 @@ cython_debug/
.cursorindexingignore
notes/
data/
+testing.db
+.ratchet_blame.db
diff --git a/README.md b/README.md
@@ -61,7 +61,21 @@ description = "Bare except clauses catch all exceptions indiscriminately. This c
```
-The valid and invalid entries are not necessary, but we provide a CLI utility, executable with ```python3 -m ratchets.validate```, to verify the regular expressions don't exist in the valid string and do exist in the invalid string. If you are testing a .toml file that is not the repository default, specify it with ```python3 -m ratchets.validate -t FILENAME```.
+The valid and invalid entries are not necessary, but we provide a CLI utility to verify the regular expressions don't exist in the valid strings and do exist in the invalid strings. This can be ran with:
+
+```bash
+
+python3 -m ratchets.validate
+
+```
+
+If you are testing a .toml file that is not the repository default, it can be specified with:
+
+```bash
+
+python3 -m ratchets.validate -t FILENAME
+
+```
The description entry is also optional, but if provided, it will be included in the output of failing PyTest tests.
@@ -90,7 +104,7 @@ This is an example of an `awk` command being used to print each line that has mo
## Updating Ratchets
-Once your rules are defined, you need to count the infractions. This is done by running.
+Once your rules are defined, you need to count the infractions. This is done by running:
```bash
python3 -m ratchets -u
@@ -125,7 +139,7 @@ python3 -m ratchets --help
Where you will see the following help message describing CLI usage for Ratchets:
```
-usage: run_tests.py [-h] [-t TOML_FILE] [-f FILES [FILES ...]] [-s] [-r] [-v] [-b] [-m MAX_COUNT] [-c] [-u]
+usage: __main__.py [-h] [-t TOML_FILE] [-f FILES [FILES ...]] [-s] [-r] [-v] [-b] [--clear-cache] [-m MAX_COUNT] [-c] [-u]
Python ratchet testing
@@ -139,6 +153,7 @@ options:
-r, --regex-only run only regex-based tests
-v, --verbose run verbose tests, printing each infringing line
-b, --blame run an additional git-blame for each infraction, ordering results by timestamp
+ --clear-cache clear the blame cache
-m MAX_COUNT, --max-count MAX_COUNT
maximum infractions to display per test (only applies with --blame; default is 10)
-c, --compare-counts show only the differences in infraction counts between the current and last saved tests
@@ -146,6 +161,8 @@ options:
update ratchets_values.json
```
+**Note:** Ensure you add `.ratchet_blame.db` to your .gitignore file when using the `--blame` option. This is the location Ratchets caches blame evaluations to improve performance for larger codebases.
+
# Testing Ratchets Locally
To run the tests for the source code of Ratchets, you can clone this repository with:
diff --git a/benchmarks/blame_django_benchmarks.md b/benchmarks/blame_django_benchmarks.md
@@ -0,0 +1,68 @@
+# Benchmarking Blame Options on Django Codebase
+*Shell rule: max 80 characters per line, 3582 blames evaluated*
+
+---
+
+## Saving each blame one at a time to SQLite DB
+- **First run:** `11m7.030s`
+- **Cache run:** `0m2.409s`
+
+---
+
+## Saving all blames at the end using `save_blames()`
+*(Still running one insert at a time, no commit/close per insert)*
+- **First run:** `11m37.642s`
+- **Cache run:** `0m2.550s`
+- **Conclusion:** No real performance change.
+
+---
+
+## Using `executemany`, with `synchronous=OFF` and `journal_mode=OFF`
+*(Performance-optimized SQLite setup)*
+- **First run:** `11m55.330s`
+- **Cache run:** `0m2.557s`
+- **Conclusion:** Problem lies with Git speed, not SQLite.
+
+---
+
+## After parallelizing Git blames
+*(System: 4 cores / 8 threads)*
+- **First run:** `2m18.302s`
+- **Cache run:** `0m3.878s`
+
+---
+
+## After parallelizing Git blames *and* file map creation
+*(Shell line lookup optimization)*
+- **First run:** `2m20.776s`
+- **Cache run:** `0m4.035s`
+
+---
+
+## Thread Count Scaling
+Tested on a 4-core / 8-thread system (one run per setting):
+
+| Thread Count | First Run Time |
+|--------------|----------------|
+| 8 threads | 2m20.804s |
+| 16 threads | 2m17.790s |
+| 32 threads | 2m15.553s |
+| 64 threads | 2m7.714s |
+| 128 threads | 2m8.690s |
+
+> Improvement levels off beyond 64 threads. System becomes sluggish at higher counts.
+> Using 1x (8 threads) is a reasonable balance between speed and usability.
+
+---
+
+## Conclusion: Git + SQLite Performance Strategy
+
+- Run **map creation** in **series**
+- Run **cache lookup** in **series**
+- Run **Git blame operations** in **parallel**
+
+---
+
+## Final Optimized Results
+- **First run:** `2m16.388s`
+- **Cache run:** `0m2.588s`
diff --git a/examples/example_test_ratchet.py b/examples/example_test_ratchet.py
@@ -33,7 +33,7 @@ def test_shell_rule(test_name: str, test_dict: dict) -> None:
# errors.append(f"{test_name}")
# if errors:
# pytest.fail(" - ".join(errors) + "\n\n" + "\n\n".join(descriptions))
-#
+#
# def test_all_shell_rules():
# """Runs a test for all shell rules."""
# errors = []
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "ratchets"
-version = "0.2.5"
+version = "0.2.7"
description = "Ratcheted testing in Python."
authors = [
{ name = "Andrew Laack", email = "andrew@laack.co" }
diff --git a/src/ratchets/abstracted_tests.py b/src/ratchets/abstracted_tests.py
@@ -107,13 +107,14 @@ def check_regex_rule(test_name: str, rule: Dict[str, Any]) -> None:
baseline_counts = get_baseline_counts()
baseline_count = baseline_counts.get(test_name, 0)
if current_count > baseline_count:
- description = rule.get('description')
+ description = rule.get("description")
if description is None:
description = ""
raise Exception(
f"'{test_name}' increased from {baseline_count} to {current_count}"
- + ". " + str(description)
+ + ". "
+ + str(description)
)
@@ -127,10 +128,11 @@ def check_shell_rule(test_name: str, test_dict: Dict[str, Any]) -> None:
baseline_counts = get_baseline_counts()
baseline_count = baseline_counts.get(test_name, 0)
if current_count > baseline_count:
- description = test_dict.get('description')
+ description = test_dict.get("description")
if description is None:
description = ""
raise Exception(
f"'{test_name}' increased from {baseline_count} to {current_count}"
- + ". " + str(description)
+ + ". "
+ + str(description)
)
diff --git a/src/ratchets/caching.py b/src/ratchets/caching.py
@@ -0,0 +1,175 @@
+import sqlite3
+import argparse
+from datetime import datetime
+from typing import Optional, Dict, List
+
+
+class BlameRecord:
+ def __init__(
+ self,
+ line_content: str,
+ line_number: int,
+ timestamp: datetime,
+ file_name: str,
+ author: str,
+ ):
+ """Creates a record based on required fields for compatability with blame cache."""
+ self.line_content = line_content
+ self.line_number = line_number
+ self.timestamp = timestamp
+ self.file_name = file_name
+ self.author = author
+
+
+class CachingDatabase:
+
+ def __init__(self, path: str):
+ """Initialization: verify/create DB on disk for caching."""
+ self.db_path = path
+ self.__create_db__(path)
+
+ def __create_db__(self, path: str):
+ """Create table if needed, and add 'author' column if missing."""
+ conn = sqlite3.connect(path)
+ cursor = conn.cursor()
+
+ cursor.execute(
+ """
+ CREATE TABLE IF NOT EXISTS blames (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ file_name TEXT,
+ line_number INTEGER,
+ line_content TEXT,
+ timestamp TEXT,
+ author TEXT,
+ UNIQUE(file_name, line_number)
+ )
+ """
+ )
+
+ cursor.execute(
+ """
+ CREATE INDEX IF NOT EXISTS idx_blame_file_line
+ ON blames(file_name, line_number)
+ """
+ )
+
+ conn.commit()
+ cursor.close()
+ conn.close()
+
+ def create_or_update_blames(self, blames: List[BlameRecord]):
+ """
+ Insert or update a list of blames:
+ if (file_name, line_number) exists, update it; otherwise insert.
+ """
+
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ cursor.execute("PRAGMA journal_mode = OFF")
+ cursor.execute("PRAGMA synchronous = OFF")
+
+ upsert_query = """
+ INSERT INTO blames (file_name, line_number, line_content, timestamp, author)
+ VALUES (?, ?, ?, ?, ?)
+ ON CONFLICT(file_name, line_number) DO UPDATE SET
+ line_content = excluded.line_content,
+ timestamp = excluded.timestamp,
+ author = excluded.author
+ """
+
+ cursor.executemany(
+ upsert_query,
+ [
+ (
+ blame.file_name,
+ blame.line_number,
+ blame.line_content,
+ blame.timestamp.isoformat(),
+ blame.author,
+ )
+ for blame in blames
+ ],
+ )
+
+ conn.commit()
+ cursor.close()
+ conn.close()
+
+ def create_or_update_blame(self, blame: BlameRecord):
+ """
+ Insert or update a blame:
+ if (file_name, line_number) exists, update it; otherwise insert.
+ """
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ upsert_query = """
+ INSERT INTO blames (file_name, line_number, line_content, timestamp, author)
+ VALUES (?, ?, ?, ?, ?)
+ ON CONFLICT(file_name, line_number) DO UPDATE SET
+ line_content = excluded.line_content,
+ timestamp = excluded.timestamp,
+ author = excluded.author
+ """
+ cursor.execute(
+ upsert_query,
+ (
+ blame.file_name,
+ blame.line_number,
+ blame.line_content,
+ blame.timestamp.isoformat(),
+ blame.author,
+ ),
+ )
+
+ conn.commit()
+ cursor.close()
+ conn.close()
+
+ def get_blame(self, line_number: int, file_name: str) -> Optional[BlameRecord]:
+ """
+ Lookup the blame for the specified file and line number.
+ Returns None if not found, else:
+ {
+ 'author': AUTHOR,
+ 'timestamp': TS (datetime),
+ 'line_content': content (str)
+ }
+ """
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+
+ select_query = """
+ SELECT author, timestamp, line_content
+ FROM blames
+ WHERE file_name = ? AND line_number = ?
+ """
+ cursor.execute(select_query, (file_name, line_number))
+ row = cursor.fetchone()
+
+ cursor.close()
+ conn.close()
+
+ if not row:
+ return None
+
+ author, ts_str, line_content = row
+ try:
+ ts = datetime.fromisoformat(ts_str)
+ except Exception:
+ return None
+
+ blame = BlameRecord(line_content, line_number, ts, file_name, author)
+
+ return blame
+
+ def clear_cache(self) -> None:
+ """Clear the local blame caching DB."""
+ conn = sqlite3.connect(self.db_path)
+ cursor = conn.cursor()
+ cursor.execute("DELETE FROM blames")
+ conn.commit()
+ cursor.close()
+ conn.close()
diff --git a/src/ratchets/run_tests.py b/src/ratchets/run_tests.py
@@ -1,3 +1,6 @@
+from ratchets.caching import CachingDatabase, BlameRecord
+import queue
+from datetime import datetime
import os
import threading
import pathspec
@@ -14,6 +17,8 @@ EXCLUDED_FILENAME = "ratchet_excluded.txt"
IGNORE_FILENAME = ".gitignore"
RATCHET_FILENAME = "ratchet_values.json"
TEST_FILENAME = "tests.toml"
+CACHING_FILENAME = ".ratchet_blame.db"
+MAX_THREADS = os.cpu_count() or 1
def print_diff(current_json: Dict[str, int], previous_json: Dict[str, int]) -> None:
@@ -184,6 +189,7 @@ def evaluate_regex_tests(
results_lock = threading.Lock()
def eval_thread(test_name: str, rule: Dict[str, Any]):
+ """Evaluate a single regular expression across all specified files."""
pattern = re.compile(rule["regex"])
matches = []
@@ -236,24 +242,9 @@ def evaluate_shell_tests(
# as they are used, if they are
# used.
- file_lines_map: Dict[str, Dict[str, List[int]]] = {}
-
- # TODO:
- # Parallelize map creation; this is heavily I/O bound.
- # Also, check if this is the best approach. Would it
- # just be better to run in O(n) given smaller coefficients?
+ file_strs = list(map(str, files))
- for file_path in files:
- try:
- with open(file_path, "r", encoding="utf-8") as f:
- lines = f.readlines()
- file_map: Dict[str, List[int]] = {}
- for idx, line in enumerate(lines):
- normalized = line.rstrip("\n")
- file_map.setdefault(normalized, []).append(idx + 1)
- file_lines_map[str(file_path)] = file_map
- except Exception as e:
- raise Exception(f"Error reading {file_path}: {e}")
+ file_lines_map: Dict[str, Dict[str, List[int]]] = build_file_lines_map(file_strs)
def worker(test_name: str, shell_template: str, file_path: Path):
"""Evaluate an individual shell test for a given file."""
@@ -407,7 +398,7 @@ def print_issues_with_blames(
def add_blames(
results: Tuple[Dict[str, List[Dict[str, Any]]], Dict[str, List[Dict[str, Any]]]],
) -> Tuple[Dict[str, List[Dict[str, Any]]], Dict[str, List[Dict[str, Any]]]]:
- """Add blame information to each result in the 'results' tuple."""
+ """Add blame information: check cache in series, then run git blame in parallel for misses."""
test_issues, shell_issues = results
try:
@@ -415,76 +406,119 @@ def add_blames(
except Exception:
repo_root = None
- def get_blame_for_line(
- file_path: str, line_no: Optional[int]
- ) -> Tuple[Optional[str], Optional[str]]:
- """Internal method for getting the blame information of a specific LoC."""
- if repo_root is None:
- return None, None
- cmd = ["git", "blame", "-L", f"{line_no},{line_no}", "--porcelain", file_path]
- try:
- res = subprocess.run(
- cmd, capture_output=True, text=True, cwd=repo_root, timeout=5
- )
- if res.returncode != 0:
- return None, None
+ db_path = os.path.join(str(repo_root), CACHING_FILENAME)
+ db = CachingDatabase(db_path)
- author: Optional[str] = None
- author_time: Optional[str] = None
-
- for l in res.stdout.splitlines():
- if l.startswith("author "):
- author = l[len("author ") :].strip()
- elif l.startswith("author-time "):
- try:
- ts = int(l[len("author-time ") :].strip())
- author_time = datetime.fromtimestamp(ts).isoformat()
- except Exception:
- author_time = None
- if author is not None and author_time is not None:
- break
- return author, author_time
- except Exception:
- return None, None
+ new_records: List[BlameRecord] = []
+ needs_blame: List[Tuple[Dict[str, Any], str, int, str]] = []
- def get_last_commit_for_file(file_path: str) -> Tuple[Optional[str], Optional[str]]:
- """Internal method to get the most recent commit's information for a file."""
- if repo_root is None:
- return None, None
- cmd = ["git", "log", "-1", "--format=%an;%at", "--", file_path]
- try:
- res = subprocess.run(
- cmd, capture_output=True, text=True, cwd=repo_root, timeout=5
- )
- if res.returncode != 0 or not res.stdout.strip():
- return None, None
- out = res.stdout.strip()
- parts = out.split(";", 1)
- if len(parts) != 2:
- return None, None
- author = parts[0].strip()
- try:
- ts = int(parts[1].strip())
- author_time = datetime.fromtimestamp(ts).isoformat()
- except Exception:
- author_time = None
- return author, author_time
- except Exception:
- return None, None
+ # serial cache lookup as running this in
+ # parallel imposes too much overhead for the minimal
+ # cache lookup cost.
for issues in (test_issues, shell_issues):
for test_name, matches in issues.items():
for match in matches:
file_path = match.get("file")
+ line_content = match.get("content")
+ assert line_content is not None
+
line_no = match.get("line")
if not file_path:
continue
- if line_no is not None:
- author, author_time = get_blame_for_line(file_path, line_no)
- else:
- author, author_time = get_last_commit_for_file(file_path)
- match["blame_author"] = author if author is not None else None
- match["blame_time"] = author_time if author_time is not None else None
+ if line_no is None:
+ raise LookupError(f"No line found matching: {line_content}")
+
+ if repo_root is not None:
+ blame_res: Optional[BlameRecord] = db.get_blame(line_no, file_path)
+ if blame_res is not None and blame_res.line_content == line_content:
+ match["blame_author"] = blame_res.author
+ match["blame_time"] = (
+ blame_res.timestamp.isoformat()
+ if isinstance(blame_res.timestamp, datetime)
+ else str(blame_res.timestamp)
+ )
+ continue
+ needs_blame.append((match, file_path, line_no, line_content))
+
+ if needs_blame:
+ task_q = queue.Queue()
+ for item in needs_blame:
+ task_q.put(item)
+
+ def worker():
+ """Lookup"""
+ while True:
+ try:
+ match, file_path, line_no, line_content = task_q.get(block=False)
+ except queue.Empty:
+ break
+ author, author_time = None, None
+ if repo_root is not None:
+ try:
+ cmd = [
+ "git",
+ "blame",
+ "-L",
+ f"{line_no},{line_no}",
+ "--porcelain",
+ file_path,
+ ]
+ res = subprocess.run(
+ cmd,
+ capture_output=True,
+ text=True,
+ cwd=repo_root,
+ timeout=5,
+ )
+ if res.returncode == 0:
+ parsed_author = None
+ parsed_time = None
+ for l in res.stdout.splitlines():
+ if l.startswith("author "):
+ parsed_author = l[len("author ") :].strip()
+ elif l.startswith("author-time "):
+ ts_int = int(l[len("author-time ") :].strip())
+ parsed_time = datetime.fromtimestamp(ts_int)
+ if (
+ parsed_author is not None
+ and parsed_time is not None
+ ):
+ break
+ if parsed_author is not None and parsed_time is not None:
+ author = parsed_author
+ author_time = parsed_time.isoformat()
+ new_records.append(
+ BlameRecord(
+ line_content=line_content,
+ line_number=int(line_no),
+ timestamp=parsed_time,
+ file_name=file_path,
+ author=parsed_author,
+ )
+ )
+ else:
+ print(res.stderr)
+ except Exception:
+ pass
+ match["blame_author"] = author
+ match["blame_time"] = author_time
+ task_q.task_done()
+
+ num_tasks = task_q.qsize()
+ num_threads = min(MAX_THREADS, num_tasks) if num_tasks > 0 else 0
+ threads = []
+ for _ in range(num_threads):
+ t = threading.Thread(target=worker)
+ t.daemon = True
+ t.start()
+ threads.append(t)
+ task_q.join()
+ for t in threads:
+ t.join()
+
+ if new_records:
+ db.create_or_update_blames(new_records)
return test_issues, shell_issues
@@ -542,6 +576,10 @@ def cli():
)
parser.add_argument(
+ "--clear-cache", action="store_true", help="clear the blame cache"
+ )
+
+ parser.add_argument(
"-m",
"--max-count",
type=int,
@@ -569,6 +607,7 @@ def cli():
update: bool = args.update_ratchets
compare_counts: bool = args.compare_counts
blame: bool = args.blame
+ clear_cache: bool = args.clear_cache
verbose: bool = args.verbose
max_count: Optional[int] = args.max_count
path_files: List[str] = args.files
@@ -584,12 +623,23 @@ def cli():
excludes_path = get_excludes_path()
- mutex_options = [[cmd_mode, regex_mode], [blame, verbose, update, compare_counts]]
+ mutex_options = [
+ [cmd_mode, regex_mode, clear_cache],
+ [blame, verbose, update, compare_counts, clear_cache],
+ ]
for ls in mutex_options:
if not ls.count(True) <= 1:
raise Exception("Mutually exclusive options selected.")
+ if clear_cache:
+ repo_root = find_project_root()
+ db_path = os.path.join(str(repo_root), CACHING_FILENAME)
+ db = CachingDatabase(db_path)
+ db.clear_cache()
+ print("Cache cleared.")
+ return
+
if not os.path.isfile(excludes_path):
with open(excludes_path, "a"):
pass
@@ -638,6 +688,35 @@ def cli():
print_diff(current_json, previous_json)
+def process_file(file_path: str) -> Dict[str, List[int]]:
+ """Read a file and build a map."""
+ file_map: Dict[str, List[int]] = {}
+ with open(file_path, "r", encoding="utf-8") as f:
+ for idx, line in enumerate(f, start=1):
+ normalized = line.rstrip("\n")
+ file_map.setdefault(normalized, []).append(idx)
+ return file_map
+
+
+# After comparing this and a parallelized version; this runs faster.
+# The parallel version used threading which imposed an overhead cost
+# so it may be possible to speed this up, but it is not obvious.
+
+
+def build_file_lines_map(files: List[str]) -> Dict[str, Dict[str, List[int]]]:
+ """
+ Process files serially, returning a dict mapping file_path to its line-content map.
+ """
+ file_lines_map: Dict[str, Dict[str, List[int]]] = {}
+ for fp in files:
+ try:
+ file_map = process_file(fp)
+ file_lines_map[fp] = file_map
+ except Exception as e:
+ raise Exception(f"Error reading {fp}: {e}")
+ return file_lines_map
+
+
if __name__ == "__main__":
"""Entry point when the file is executed directly, envokes CLI method."""
cli()
diff --git a/src/ratchets/validate.py b/src/ratchets/validate.py
@@ -23,7 +23,7 @@ def check_valid(regex_tests: Dict[str, Dict[str, Any]]) -> None:
raise Exception(f"Regex: {regex} matched {line}")
-def check_invalid(regex_tests: Dict[str, Dict[str, Any]]) -> int:
+def check_invalid(regex_tests: Dict[str, Dict[str, Any]]) -> None:
"""Given a dict of regex test and strings, returns if all of the regexps match all of their strings."""
for test in regex_tests:
regex: str = regex_tests[test]["regex"]
@@ -34,10 +34,9 @@ def check_invalid(regex_tests: Dict[str, Dict[str, Any]]) -> int:
found = True
if not found:
raise Exception(f"Regex: {regex} not matched in {validation}")
- return 0
-def validate(filename: Optional[str]) -> Optional[bool]:
+def validate(filename: Optional[str]) -> None:
"""Verify the given file's example expressions match the regexps."""
test_path: str = get_file_path(filename)
config: Dict[str, Any] = toml.load(test_path)
@@ -47,11 +46,14 @@ def validate(filename: Optional[str]) -> Optional[bool]:
if regex_tests is None:
print("No regex tests found, there is nothing to validate.")
- return True
+ return
+
+ # these will throw errors if not valid otherwise simply return.
+ # this allows for stderr to be used, as well as exit
+ # statuses.
check_valid(regex_tests)
check_invalid(regex_tests)
- return True
if __name__ == "__main__":
diff --git a/tests/file_spec_files/spec_file_1.py b/tests/file_spec_files/spec_file_1.py
@@ -1,6 +1,29 @@
+try:
+ print()
except:
+ print()
+
+try:
+ print()
except:
+ print()
+
+try:
+ print()
except:
+ print()
+
+try:
+ print()
except:
+ print()
+
+try:
+ print()
except:
+ print()
+
+try:
+ print()
except:
+ print()
diff --git a/tests/file_spec_files/spec_file_2.py b/tests/file_spec_files/spec_file_2.py
@@ -1,8 +1,8 @@
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
-oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
+# oairsetnaoristenaorstin aorsient oarisen oarisen oariest oairestnoiares noairest orasitenarsoienarsotienarsotienarsotie
diff --git a/tests/test_files/test_caching.py b/tests/test_files/test_caching.py
@@ -0,0 +1,87 @@
+from ratchets.caching import CachingDatabase, BlameRecord
+from ratchets.abstracted_tests import find_project_root
+import os
+from datetime import datetime
+
+CACHING_FILENAME = ".ratchet_blame.db"
+
+
+def test_create_new_db():
+ """Ensure DB creation when one does not exist works as expected."""
+ repo_root = find_project_root()
+ db_path = os.path.join(str(repo_root), CACHING_FILENAME)
+ os.remove(db_path)
+ db = CachingDatabase(db_path)
+
+
+def test_create_multi_connections():
+ """Ensure multiple DB connections can be created concurrently."""
+ for i in range(0, 10):
+ repo_root = find_project_root()
+ db_path = os.path.join(str(repo_root), CACHING_FILENAME)
+ os.remove(db_path)
+ db = CachingDatabase(db_path)
+
+
+def test_record_updating():
+ """Ensure records are updated correctly when line numbers and file names match."""
+
+ repo_root = find_project_root()
+ db_path = os.path.join(str(repo_root), CACHING_FILENAME)
+ if os.path.exists(db_path):
+ os.remove(db_path)
+ db = CachingDatabase(db_path)
+
+ file_name = "example.py"
+ line_number = 42
+
+ # create record
+ record1 = BlameRecord(
+ line_content="print('Hello, world!')",
+ line_number=line_number,
+ timestamp=datetime(2020, 1, 1, 12, 0, 0),
+ file_name=file_name,
+ author="Author1",
+ )
+ db.create_or_update_blame(record1)
+
+ # update with new author/timestamp/content
+ record2 = BlameRecord(
+ line_content="print('Updated!')",
+ line_number=line_number,
+ timestamp=datetime(2021, 1, 1, 12, 0, 0),
+ file_name=file_name,
+ author="Author2",
+ )
+ db.create_or_update_blame(record2)
+
+ updated = db.get_blame(line_number, file_name)
+
+ assert updated is not None, "We inserted this record; it should not be 'None'"
+
+ assert updated.author == "Author2", "Single-record update failed"
+ assert updated.line_content == "print('Updated!')"
+
+ # test batch updating
+ record3 = BlameRecord(
+ line_content="print('Batch update!')",
+ line_number=line_number,
+ timestamp=datetime(2022, 1, 1, 12, 0, 0),
+ file_name=file_name,
+ author="Author3",
+ )
+ db.create_or_update_blames([record3])
+
+ updated = db.get_blame(line_number, file_name)
+
+ assert updated is not None, "We inserted this record; it should not be 'None'"
+ assert updated.author == "Author3", "Batch-record update failed"
+ assert updated.line_content == "print('Batch update!')"
+
+ print("test_record_updating passed")
+
+
+if __name__ == "__main__":
+ test_create_new_db()
+ test_create_multi_connections()
+ test_record_updating()
diff --git a/tests/test_files/test_exclusion.py b/tests/test_files/test_exclusion.py
@@ -16,7 +16,7 @@ def test_config():
issues = run_tests.evaluate_tests(test_path, True, True, None)
run_tests.update_ratchets(test_path, True, True, None)
except Exception as e:
- raise Exception(f"Unable to update ratchets using 'tests.toml': {e}")
+ assert False, f"Unable to update ratchets using 'tests.toml': {e}"
# TODO:
@@ -56,20 +56,22 @@ def test_exclusion():
filtered = run_tests.filter_excluded_files(
python_files_no_exclusion, exclusion_path, ignore_path
)
- if len(python_files_no_exclusion) != length_starting:
- raise Exception("There is a side effect in filter_excluded_files")
-
- if not filename in expected_results:
- raise Exception(
- "An additional excluded.txt file was added, but the corresponding expected count was not add to the dict"
- )
- if not expected_results[filename] == len(filtered):
- raise Exception("Filter count differs from expected value")
-
- if count != len(expected_results):
- raise Exception(
- "There is an entry in the expected_results dictionary that does not correspond with a file tested"
- )
+
+ assert (
+ len(python_files_no_exclusion) == length_starting
+ ), "There is a side effect in filter_excluded_files"
+
+ assert (
+ filename in expected_results
+ ), "An additional excluded.txt file was added, but the corresponding expected count was not added to the dict"
+
+ assert expected_results[filename] == len(
+ filtered
+ ), "Filter count differs from expected value"
+
+ assert count == len(
+ expected_results
+ ), "There is an entry in the expected_results dictionary that does not correspond with a file tested"
if __name__ == "__main__":
diff --git a/tests/test_files/test_files.py b/tests/test_files/test_files.py
@@ -5,10 +5,7 @@ import json
def test_files():
- """Tests the functionallity of .toml and file specification."""
-
- # directory: Union[str, Path], paths: Optional[List[str]]
- # ) -> List[Path]:
+ """Tests the functionality of .toml and file specification."""
proj_root = run_tests.find_project_root()
@@ -43,10 +40,12 @@ def test_files():
for key in json2:
exception2_sum += json2[key]
- if exception2_sum != 8:
- raise Exception(f"Incorrect number of infractions counted for {filtered2_file}")
- if exception1_sum != 6:
- raise Exception(f"Incorrect number of infractions counted for {filtered1_file}")
+ assert (
+ exception2_sum == 8
+ ), f"Incorrect number of infractions counted for {filtered2_file}"
+ assert (
+ exception1_sum == 6
+ ), f"Incorrect number of infractions counted for {filtered1_file}"
if __name__ == "__main__":
diff --git a/tests/test_files/test_toml_configs.py b/tests/test_files/test_toml_configs.py
@@ -19,14 +19,13 @@ import json
def test_config():
test_path = run_tests.get_file_path(None)
- if not os.path.isfile(test_path):
- raise Exception("tests.toml not found")
+ assert os.path.isfile(test_path), "tests.toml not found"
try:
issues = run_tests.evaluate_tests(test_path, True, True, None)
run_tests.update_ratchets(test_path, True, True, None)
except Exception as e:
- raise Exception(f"Unable to update ratchets using 'tests.toml': {e}")
+ assert False, f"Unable to update ratchets using 'tests.toml': {e}"
def test_formatting():
@@ -41,10 +40,11 @@ def test_formatting():
full_path = os.path.abspath(os.path.join(toml_file_directory, filename))
run_tests.evaluate_tests(full_path, True, True, None)
except Exception as e:
- if not isinstance(e, toml.TomlDecodeError):
- raise Exception(f"Expected TomlDecodeError, got {type(e)}: {e}")
+ assert isinstance(
+ e, toml.TomlDecodeError
+ ), f"Expected TomlDecodeError, got {type(e)}: {e}"
else:
- raise Exception(f"Expected error to be thrown for invalid toml file.")
+ assert False, "Expected error to be thrown for invalid toml file."
else:
full_path = os.path.abspath(os.path.join(toml_file_directory, filename))
@@ -68,10 +68,9 @@ def verify_updating():
current_json: Dict[str, Any] = json.loads(run_tests.results_to_json(issues))
previous_json: Dict[str, Any] = run_tests.load_ratchet_results()
- if current_json != previous_json:
- raise Exception(
- "JSON should be identical when running evals and updating ratchets."
- )
+ assert (
+ current_json == previous_json
+ ), "JSON should be identical when running evals and updating ratchets."
# test how things behave when ratchet_values.json does not exist
@@ -83,16 +82,16 @@ def test_ratchet_excluded_missing():
try:
os.remove(ratchet_path)
except Exception as e:
- raise Exception("Unable to delete ratchet_values.json")
+ assert False, "Unable to delete ratchet_values.json"
test_path = run_tests.get_file_path(None)
try:
previous = run_tests.load_ratchet_results()
except Exception:
- raise Exception(
- "If ratchet_values.json does not exist, we don't throw, assume all 0's"
- )
+ assert (
+ False
+ ), "If ratchet_values.json does not exist, we don't throw, assume all 0's"
issues = run_tests.evaluate_tests(test_path, True, True, None)
diff --git a/tests/test_files/test_validation.py b/tests/test_files/test_validation.py
@@ -18,7 +18,7 @@ def test_validate_regex():
# Throws if not valid
validate.validate(full_path)
except Exception as e:
- raise Exception(f"{full_path}, was deemed to be invalid \n {e}")
+ assert False, f"{full_path}, was deemed to be invalid \n {e}"
for filename in os.listdir(toml_file_directory_invalid):
full_path = os.path.abspath(os.path.join(toml_file_directory_invalid, filename))
@@ -29,9 +29,9 @@ def test_validate_regex():
except Exception:
pass
else:
- raise Exception(
- f"Expected validation to fail for {full_path}, but it passed"
- )
+ assert (
+ False
+ ), f"Expected validation to fail for {full_path}, but it passed"
if __name__ == "__main__":