diff --git a/README.md b/README.md index 16e6ca7..b632301 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,6 @@ bull uses the yuin/goldmark markdown renderer, specifically: * you can leave a favicon.ico in your content directory * handle front matter better (ignore? format differently?), e.g. [[untagged/prober7]] * make each heading foldable -* mostrecent: use parallel walk * mostrecent: paginate to make the page manageable for large gardens * content settings: make title_format configurable diff --git a/cmd/bull/linkindex.go b/cmd/bull/linkindex.go index 324910d..3060aed 100644 --- a/cmd/bull/linkindex.go +++ b/cmd/bull/linkindex.go @@ -49,13 +49,25 @@ func (b *bullServer) linkTargets(pg *page) ([]string, error) { } type indexer struct { + // config contentRoot *os.Root + readModTime bool + + // state walkq *queue - readq chan string + readq chan page dirs, pages atomic.Uint64 pending atomic.Int64 } +func newIndexer(content *os.Root) *indexer { + return &indexer{ + contentRoot: content, + walkq: newQueue(), + readq: make(chan page), + } +} + func (i *indexer) dirDiscovered() { i.pending.Add(1) i.dirs.Add(1) @@ -90,24 +102,29 @@ func (i *indexer) walkN(dir string) error { continue } i.pages.Add(1) - i.readq <- path.Join(dir, name) + fn := path.Join(dir, name) + pg := page{ + PageName: file2page(fn), + FileName: fn, + // Content is empty; page not read yet + // ModTime is empty + } + if i.readModTime { + info, err := dirent.Info() + if err != nil { + return err + } + pg.ModTime = info.ModTime() + } + i.readq <- pg } return nil } -func (b *bullServer) index() (*idx, error) { - i := &indexer{ - contentRoot: b.content, - walkq: newQueue(), - readq: make(chan string), - } +func (i *indexer) walk() error { i.dirDiscovered() i.walkq.Push(".") - var ( - linksMu sync.Mutex - links = make(map[string][]string) - ) ctx, canc := context.WithCancel(context.Background()) defer canc() walkg, gctx := errgroup.WithContext(ctx) @@ -139,13 +156,27 @@ func (b *bullServer) index() (*idx, error) { return nil }) } - var readg errgroup.Group + if err := walkg.Wait(); err != nil && !errors.Is(err, context.Canceled) { + return err + } + close(i.readq) + return nil +} + +func (b *bullServer) index() (*idx, error) { + i := newIndexer(b.content) + + var ( + linksMu sync.Mutex + links = make(map[string][]string) + readg errgroup.Group + ) for range runtime.NumCPU() { readg.Go(func() error { linksN := make(map[string][]string) - for fn := range i.readq { + for pg := range i.readq { // fmt.Printf("reading %s\n", fn) - pg, err := read(b.content, fn) + pg, err := read(b.content, pg.FileName) if err != nil { return err } @@ -161,10 +192,9 @@ func (b *bullServer) index() (*idx, error) { return nil }) } - if err := walkg.Wait(); err != nil && !errors.Is(err, context.Canceled) { + if err := i.walk(); err != nil { return nil, err } - close(i.readq) if err := readg.Wait(); err != nil { return nil, err } diff --git a/cmd/bull/mostrecent.go b/cmd/bull/mostrecent.go index 01ece0a..4e7a0ce 100644 --- a/cmd/bull/mostrecent.go +++ b/cmd/bull/mostrecent.go @@ -3,39 +3,32 @@ package main import ( "bytes" "fmt" - "io/fs" "net/http" "sort" + "sync" ) func (b *bullServer) mostrecent(w http.ResponseWriter, r *http.Request) error { // walk the entire content directory - var pages []*page - err := fs.WalkDir(b.content.FS(), ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err + i := newIndexer(b.content) + i.readModTime = true // required for sorting by most recent + var ( + pages []page + readg sync.WaitGroup + ) + readg.Add(1) + // one reading goroutine is sufficient, we only collect metadata + go func() { + defer readg.Done() + for pg := range i.readq { + pages = append(pages, pg) } - - if d.IsDir() || !isMarkdown(path) { - return nil - } - - // save path and modtime for sorting - info, err := d.Info() - if err != nil { - return err - } - pages = append(pages, &page{ - PageName: file2page(path), - FileName: path, - Content: "", // intentionally left blank - ModTime: info.ModTime(), - }) - return nil - }) - if err != nil { + }() + if err := i.walk(); err != nil { return err } + readg.Wait() + sort.Slice(pages, func(i, j int) bool { return pages[i].ModTime.After(pages[j].ModTime) })