commit 1805f4de2a290eb8b923e70c13aba2a749051136 parent 412e80eb21a10d83d526043fadcd2bcd75c9ac6c Author: Andrew Laack <andrew@laack.co> Date: Tue, 5 May 2026 01:52:45 -0500 Refactoring and recursion Diffstat:
108 files changed, 190 insertions(+), 135 deletions(-)
diff --git a/go.mod b/crawl/go.mod diff --git a/go.sum b/crawl/go.sum diff --git a/crawl/main.go b/crawl/main.go @@ -0,0 +1,190 @@ +package main + +import ( + "database/sql" + "fmt" + "github.com/makeworld-the-better-one/go-gemini" + _ "github.com/mattn/go-sqlite3" + "io" + "net/url" + "os" + "strings" +) + +func fetchSite(currentUrl string) string { + + resp, err := gemini.Fetch(currentUrl) + + if err != nil { + panic(err) + } + + bodyBytes, err := io.ReadAll(resp.Body) + + body := string(bodyBytes) + + if err != nil { + panic(err) + } + + return body + +} + +func parseLinks(body string, currentUrl string) []string { + + base, err := url.Parse(currentUrl) + + if err != nil { + panic(err) + } + + lines := strings.Split(body, "\n") + + links := []string{} + + escaped := false + escape := "```" + + for _, item := range lines { + + if strings.Compare(escape, item) == 0 { + escaped = !escaped + } + + if len(item) > 3 && !escaped { + if item[0] == '=' && item[1] == '>' { + // TODO: is =>link valid? + links = append(links, item[3:]) + } + } + + } + + geminiLinks := []string{} + + for _, item := range links { + + indexOfSpace := strings.Index(item, " ") + + if indexOfSpace == -1 { + indexOfSpace = len(item) + } + if strings.Compare(item[:9], "gemini://") == 0 { + geminiLinks = append(geminiLinks, item[0:indexOfSpace]) + } + if strings.Contains(item, "://") == false { + // relative link + + u, err := url.Parse(item[0:indexOfSpace]) + + if err != nil { + panic(err) + } + + geminiLinks = append(geminiLinks, base.ResolveReference(u).String()) + } + } + + return geminiLinks +} + +func setupDB(dir string, filename string) *sql.DB { + + os.MkdirAll(dir, 0755) + db, err := sql.Open("sqlite3", "./"+dir+"/"+filename) + + if err != nil { + panic(err) + } + + createDb := "CREATE TABLE IF NOT EXISTS link (source, destination, CONSTRAINT unq UNIQUE (source, destination));" + + _, err = db.Exec(createDb) + + if err != nil { + panic(err) + } + + return db +} + +func writeSiteOutput(outputDir string, siteURL string, body string) { + + u, err := url.Parse(siteURL) + + if err != nil { + panic(err) + } + + host := u.Host + path := u.Path + + if len(path) > 0 && path[0] == '/' { + path = path[1:] + } + + indexOfLastSlash := strings.LastIndex(path, "/") + + if indexOfLastSlash != -1 { + pathParts := path[0:indexOfLastSlash] + os.MkdirAll(outputDir+"/"+host+"/"+pathParts, 0755) + } else { + os.MkdirAll(outputDir+"/"+host, 0755) + } + + + // TODO: Is this right? Seems like it but is there a case where it isn't? + if strings.Compare(path, "") == 0{ + path = "index.gmi" + } + + fmt.Println(outputDir+"/"+host+"/"+path) + err = os.WriteFile(outputDir+"/"+host+"/"+path, []byte(body), 0644) + + if err != nil { + panic(err) + } +} + +func main() { + + outputDir := "outputs" + dbName := "main.db" + + + db := setupDB(outputDir, dbName) + defer db.Close() + + links := []string{"gemini://laack.co"} + + + for len(links) > 0 { + + currentUrl := links[0] + links = links[1:] + + body := fetchSite(currentUrl) + forwardGeminiLinks := parseLinks(body, currentUrl) + + writeSiteOutput(outputDir, currentUrl, body) + + insertLinkQuery := "INSERT INTO link (source, destination) VALUES (?, ?) ON CONFLICT DO NOTHING" + + tx, _ := db.Begin() + + for index, link := range forwardGeminiLinks { + + links = append(links, link) + _, err := tx.Exec(insertLinkQuery, currentUrl, link) + fmt.Printf("Inserting link %d into table\n", index) + + if err != nil { + panic(err) + } + + } + tx.Commit() + } + +} diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/.gitignore b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/.gitignore diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/LICENSE b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/LICENSE diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/LICENSE-GO b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/LICENSE-GO diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/README.md b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/README.md diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/client.go b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/client.go diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/doc.go b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/doc.go diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/gemini.go b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/gemini.go diff --git a/vendor/github.com/makeworld-the-better-one/go-gemini/verify_hostname.go b/crawl/vendor/github.com/makeworld-the-better-one/go-gemini/verify_hostname.go diff --git a/vendor/github.com/mattn/go-sqlite3/.codecov.yml b/crawl/vendor/github.com/mattn/go-sqlite3/.codecov.yml diff --git a/vendor/github.com/mattn/go-sqlite3/.gitignore b/crawl/vendor/github.com/mattn/go-sqlite3/.gitignore diff --git a/vendor/github.com/mattn/go-sqlite3/LICENSE b/crawl/vendor/github.com/mattn/go-sqlite3/LICENSE diff --git a/vendor/github.com/mattn/go-sqlite3/README.md b/crawl/vendor/github.com/mattn/go-sqlite3/README.md diff --git a/vendor/github.com/mattn/go-sqlite3/SECURITY.md b/crawl/vendor/github.com/mattn/go-sqlite3/SECURITY.md diff --git a/vendor/github.com/mattn/go-sqlite3/backup.go b/crawl/vendor/github.com/mattn/go-sqlite3/backup.go diff --git a/vendor/github.com/mattn/go-sqlite3/callback.go b/crawl/vendor/github.com/mattn/go-sqlite3/callback.go diff --git a/vendor/github.com/mattn/go-sqlite3/convert.go b/crawl/vendor/github.com/mattn/go-sqlite3/convert.go diff --git a/vendor/github.com/mattn/go-sqlite3/doc.go b/crawl/vendor/github.com/mattn/go-sqlite3/doc.go diff --git a/vendor/github.com/mattn/go-sqlite3/error.go b/crawl/vendor/github.com/mattn/go-sqlite3/error.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3-binding.c b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3-binding.c diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3-binding.h b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3-binding.h diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_context.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_context.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_func_crypt.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_func_crypt.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_libsqlite3.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_libsqlite3.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_load_extension.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_load_extension.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_load_extension_omit.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_load_extension_omit.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_allow_uri_authority.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_allow_uri_authority.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_app_armor.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_app_armor.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_column_metadata.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_column_metadata.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_foreign_keys.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_foreign_keys.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_fts5.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_fts5.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_icu.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_icu.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_introspect.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_introspect.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_math_functions.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_math_functions.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_os_trace.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_os_trace.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_percentile.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_percentile.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_preupdate.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_preupdate.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_preupdate_hook.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_preupdate_hook.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_preupdate_omit.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_preupdate_omit.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_secure_delete.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_secure_delete.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_secure_delete_fast.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_secure_delete_fast.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_serialize.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_serialize.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_serialize_omit.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_serialize_omit.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_stat4.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_stat4.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_unlock_notify.c b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_unlock_notify.c diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_unlock_notify.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_unlock_notify.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_userauth.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_userauth.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_userauth_omit.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_userauth_omit.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_vacuum_full.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_vacuum_full.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_vacuum_incr.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_vacuum_incr.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_vtable.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_opt_vtable.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_other.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_other.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_solaris.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_solaris.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_sql.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_sql.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_trace.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_trace.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_type.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_type.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_usleep_windows.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_usleep_windows.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3_windows.go b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3_windows.go diff --git a/vendor/github.com/mattn/go-sqlite3/sqlite3ext.h b/crawl/vendor/github.com/mattn/go-sqlite3/sqlite3ext.h diff --git a/vendor/github.com/mattn/go-sqlite3/static_mock.go b/crawl/vendor/github.com/mattn/go-sqlite3/static_mock.go diff --git a/vendor/golang.org/x/net/LICENSE b/crawl/vendor/golang.org/x/net/LICENSE diff --git a/vendor/golang.org/x/net/PATENTS b/crawl/vendor/golang.org/x/net/PATENTS diff --git a/vendor/golang.org/x/net/idna/go118.go b/crawl/vendor/golang.org/x/net/idna/go118.go diff --git a/vendor/golang.org/x/net/idna/idna10.0.0.go b/crawl/vendor/golang.org/x/net/idna/idna10.0.0.go diff --git a/vendor/golang.org/x/net/idna/idna9.0.0.go b/crawl/vendor/golang.org/x/net/idna/idna9.0.0.go diff --git a/vendor/golang.org/x/net/idna/pre_go118.go b/crawl/vendor/golang.org/x/net/idna/pre_go118.go diff --git a/vendor/golang.org/x/net/idna/punycode.go b/crawl/vendor/golang.org/x/net/idna/punycode.go diff --git a/vendor/golang.org/x/net/idna/tables10.0.0.go b/crawl/vendor/golang.org/x/net/idna/tables10.0.0.go diff --git a/vendor/golang.org/x/net/idna/tables11.0.0.go b/crawl/vendor/golang.org/x/net/idna/tables11.0.0.go diff --git a/vendor/golang.org/x/net/idna/tables12.0.0.go b/crawl/vendor/golang.org/x/net/idna/tables12.0.0.go diff --git a/vendor/golang.org/x/net/idna/tables13.0.0.go b/crawl/vendor/golang.org/x/net/idna/tables13.0.0.go diff --git a/vendor/golang.org/x/net/idna/tables9.0.0.go b/crawl/vendor/golang.org/x/net/idna/tables9.0.0.go diff --git a/vendor/golang.org/x/net/idna/trie.go b/crawl/vendor/golang.org/x/net/idna/trie.go diff --git a/vendor/golang.org/x/net/idna/trieval.go b/crawl/vendor/golang.org/x/net/idna/trieval.go diff --git a/vendor/golang.org/x/text/LICENSE b/crawl/vendor/golang.org/x/text/LICENSE diff --git a/vendor/golang.org/x/text/PATENTS b/crawl/vendor/golang.org/x/text/PATENTS diff --git a/vendor/golang.org/x/text/secure/bidirule/bidirule.go b/crawl/vendor/golang.org/x/text/secure/bidirule/bidirule.go diff --git a/vendor/golang.org/x/text/secure/bidirule/bidirule10.0.0.go b/crawl/vendor/golang.org/x/text/secure/bidirule/bidirule10.0.0.go diff --git a/vendor/golang.org/x/text/secure/bidirule/bidirule9.0.0.go b/crawl/vendor/golang.org/x/text/secure/bidirule/bidirule9.0.0.go diff --git a/vendor/golang.org/x/text/transform/transform.go b/crawl/vendor/golang.org/x/text/transform/transform.go diff --git a/vendor/golang.org/x/text/unicode/bidi/bidi.go b/crawl/vendor/golang.org/x/text/unicode/bidi/bidi.go diff --git a/vendor/golang.org/x/text/unicode/bidi/bracket.go b/crawl/vendor/golang.org/x/text/unicode/bidi/bracket.go diff --git a/vendor/golang.org/x/text/unicode/bidi/core.go b/crawl/vendor/golang.org/x/text/unicode/bidi/core.go diff --git a/vendor/golang.org/x/text/unicode/bidi/prop.go b/crawl/vendor/golang.org/x/text/unicode/bidi/prop.go diff --git a/vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go b/crawl/vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go diff --git a/vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go b/crawl/vendor/golang.org/x/text/unicode/bidi/tables11.0.0.go diff --git a/vendor/golang.org/x/text/unicode/bidi/tables12.0.0.go b/crawl/vendor/golang.org/x/text/unicode/bidi/tables12.0.0.go diff --git a/vendor/golang.org/x/text/unicode/bidi/tables13.0.0.go b/crawl/vendor/golang.org/x/text/unicode/bidi/tables13.0.0.go diff --git a/vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go b/crawl/vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go diff --git a/vendor/golang.org/x/text/unicode/bidi/trieval.go b/crawl/vendor/golang.org/x/text/unicode/bidi/trieval.go diff --git a/vendor/golang.org/x/text/unicode/norm/composition.go b/crawl/vendor/golang.org/x/text/unicode/norm/composition.go diff --git a/vendor/golang.org/x/text/unicode/norm/forminfo.go b/crawl/vendor/golang.org/x/text/unicode/norm/forminfo.go diff --git a/vendor/golang.org/x/text/unicode/norm/input.go b/crawl/vendor/golang.org/x/text/unicode/norm/input.go diff --git a/vendor/golang.org/x/text/unicode/norm/iter.go b/crawl/vendor/golang.org/x/text/unicode/norm/iter.go diff --git a/vendor/golang.org/x/text/unicode/norm/normalize.go b/crawl/vendor/golang.org/x/text/unicode/norm/normalize.go diff --git a/vendor/golang.org/x/text/unicode/norm/readwriter.go b/crawl/vendor/golang.org/x/text/unicode/norm/readwriter.go diff --git a/vendor/golang.org/x/text/unicode/norm/tables10.0.0.go b/crawl/vendor/golang.org/x/text/unicode/norm/tables10.0.0.go diff --git a/vendor/golang.org/x/text/unicode/norm/tables11.0.0.go b/crawl/vendor/golang.org/x/text/unicode/norm/tables11.0.0.go diff --git a/vendor/golang.org/x/text/unicode/norm/tables12.0.0.go b/crawl/vendor/golang.org/x/text/unicode/norm/tables12.0.0.go diff --git a/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go b/crawl/vendor/golang.org/x/text/unicode/norm/tables13.0.0.go diff --git a/vendor/golang.org/x/text/unicode/norm/tables9.0.0.go b/crawl/vendor/golang.org/x/text/unicode/norm/tables9.0.0.go diff --git a/vendor/golang.org/x/text/unicode/norm/transform.go b/crawl/vendor/golang.org/x/text/unicode/norm/transform.go diff --git a/vendor/golang.org/x/text/unicode/norm/trie.go b/crawl/vendor/golang.org/x/text/unicode/norm/trie.go diff --git a/vendor/modules.txt b/crawl/vendor/modules.txt diff --git a/main.go b/main.go @@ -1,135 +0,0 @@ -package main - -import ( - "fmt" - "github.com/makeworld-the-better-one/go-gemini" - "io" - "net/url" - "os" - "strings" - "database/sql" - _ "github.com/mattn/go-sqlite3" -) - -func index(currentUrl string) (string, []string) { - - base, err := url.Parse(currentUrl) - - if err != nil { - panic(err) - } - - resp, err := gemini.Fetch(currentUrl) - - if err != nil { - panic(err) - } - - bodyBytes, err := io.ReadAll(resp.Body) - - body := string(bodyBytes) - - if err != nil { - panic(err) - } - - lines := strings.Split(body, "\n") - - links := []string{} - - escaped := false - escape := "```" - - for _, item := range lines { - - if strings.Compare(escape, item) == 0 { - escaped = !escaped - } - - if len(item) > 3 && !escaped { - if item[0] == '=' && item[1] == '>' { - // TODO: is =>link valid? - links = append(links, item[3:]) - } - } - - } - - geminiLinks := []string{} - - for _, item := range links { - - indexOfSpace := strings.Index(item, " ") - - if indexOfSpace == -1 { - indexOfSpace = len(item) - } - if strings.Compare(item[:9], "gemini://") == 0 { - geminiLinks = append(geminiLinks, item[0:indexOfSpace]) - } - if strings.Contains(item, "://") == false { - // relative link - - u, err := url.Parse(item[0:indexOfSpace]) - - if err != nil { - panic(err) - } - - geminiLinks = append(geminiLinks, base.ResolveReference(u).String()) - } - } - - return body, geminiLinks - -} - -func main() { - - // TODO: Read line from file, go from there; append only - // how to multi-thread? - // go mutex duh - - os.Mkdir("outputs/", 0755) - db, err := sql.Open("sqlite3", "./outputs/main.db") - - if err != nil { - panic(err) - } - - defer db.Close() - - createDb := "CREATE TABLE IF NOT EXISTS link (source, destination, CONSTRAINT unq UNIQUE (source, destination));" - - _, err = db.Exec(createDb) - - if err != nil { - panic(err) - } - - - currentUrl := "gemini://blog.laack.co" - body, forwardGeminiLinks := index(currentUrl) - - err = os.WriteFile("outputs/" + url.PathEscape(currentUrl), []byte(body), 0644) - - if err != nil { - panic(err) - } - - insertLinkQuery := "INSERT INTO link (source, destination) VALUES (?, ?) ON CONFLICT DO NOTHING" - - tx, _ := db.Begin() - - for index, link := range forwardGeminiLinks { - - _, err := tx.Exec(insertLinkQuery, currentUrl, link) - fmt.Printf("Inserting link %d into table\n", index) - - if err != nil { - panic(err) - } - - } - tx.Commit() -}