Skip to content

Commit

Permalink
mostrecent: reuse parallel directory walking from linkindex
Browse files Browse the repository at this point in the history
  • Loading branch information
stapelberg committed Dec 29, 2024
1 parent 096ebf9 commit 6cb5927
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 42 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ bull uses the yuin/goldmark markdown renderer, specifically:
* you can leave a favicon.ico in your content directory
* handle front matter better (ignore? format differently?), e.g. [[untagged/prober7]]
* make each heading foldable
* mostrecent: use parallel walk
* mostrecent: paginate to make the page manageable for large gardens
* content settings: make title_format configurable

Expand Down
64 changes: 47 additions & 17 deletions cmd/bull/linkindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,25 @@ func (b *bullServer) linkTargets(pg *page) ([]string, error) {
}

type indexer struct {
// config
contentRoot *os.Root
readModTime bool

// state
walkq *queue
readq chan string
readq chan page
dirs, pages atomic.Uint64
pending atomic.Int64
}

func newIndexer(content *os.Root) *indexer {
return &indexer{
contentRoot: content,
walkq: newQueue(),
readq: make(chan page),
}
}

func (i *indexer) dirDiscovered() {
i.pending.Add(1)
i.dirs.Add(1)
Expand Down Expand Up @@ -90,24 +102,29 @@ func (i *indexer) walkN(dir string) error {
continue
}
i.pages.Add(1)
i.readq <- path.Join(dir, name)
fn := path.Join(dir, name)
pg := page{
PageName: file2page(fn),
FileName: fn,
// Content is empty; page not read yet
// ModTime is empty
}
if i.readModTime {
info, err := dirent.Info()
if err != nil {
return err
}
pg.ModTime = info.ModTime()
}
i.readq <- pg
}
return nil
}

func (b *bullServer) index() (*idx, error) {
i := &indexer{
contentRoot: b.content,
walkq: newQueue(),
readq: make(chan string),
}
func (i *indexer) walk() error {
i.dirDiscovered()
i.walkq.Push(".")

var (
linksMu sync.Mutex
links = make(map[string][]string)
)
ctx, canc := context.WithCancel(context.Background())
defer canc()
walkg, gctx := errgroup.WithContext(ctx)
Expand Down Expand Up @@ -139,13 +156,27 @@ func (b *bullServer) index() (*idx, error) {
return nil
})
}
var readg errgroup.Group
if err := walkg.Wait(); err != nil && !errors.Is(err, context.Canceled) {
return err
}
close(i.readq)
return nil
}

func (b *bullServer) index() (*idx, error) {
i := newIndexer(b.content)

var (
linksMu sync.Mutex
links = make(map[string][]string)
readg errgroup.Group
)
for range runtime.NumCPU() {
readg.Go(func() error {
linksN := make(map[string][]string)
for fn := range i.readq {
for pg := range i.readq {
// fmt.Printf("reading %s\n", fn)
pg, err := read(b.content, fn)
pg, err := read(b.content, pg.FileName)
if err != nil {
return err
}
Expand All @@ -161,10 +192,9 @@ func (b *bullServer) index() (*idx, error) {
return nil
})
}
if err := walkg.Wait(); err != nil && !errors.Is(err, context.Canceled) {
if err := i.walk(); err != nil {
return nil, err
}
close(i.readq)
if err := readg.Wait(); err != nil {
return nil, err
}
Expand Down
41 changes: 17 additions & 24 deletions cmd/bull/mostrecent.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,32 @@ package main
import (
"bytes"
"fmt"
"io/fs"
"net/http"
"sort"
"sync"
)

func (b *bullServer) mostrecent(w http.ResponseWriter, r *http.Request) error {
// walk the entire content directory
var pages []*page
err := fs.WalkDir(b.content.FS(), ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
i := newIndexer(b.content)
i.readModTime = true // required for sorting by most recent
var (
pages []page
readg sync.WaitGroup
)
readg.Add(1)
// one reading goroutine is sufficient, we only collect metadata
go func() {
defer readg.Done()
for pg := range i.readq {
pages = append(pages, pg)
}

if d.IsDir() || !isMarkdown(path) {
return nil
}

// save path and modtime for sorting
info, err := d.Info()
if err != nil {
return err
}
pages = append(pages, &page{
PageName: file2page(path),
FileName: path,
Content: "", // intentionally left blank
ModTime: info.ModTime(),
})
return nil
})
if err != nil {
}()
if err := i.walk(); err != nil {
return err
}
readg.Wait()

sort.Slice(pages, func(i, j int) bool {
return pages[i].ModTime.After(pages[j].ModTime)
})
Expand Down

0 comments on commit 6cb5927

Please sign in to comment.