Skip to content

Commit

Permalink
Fix rubocop
Browse files Browse the repository at this point in the history
  • Loading branch information
mkasberg committed Apr 23, 2024
1 parent f6d82a7 commit e42d55a
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 105 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,7 @@ gem "rake", "~> 13.0"
gem "rspec", "~> 3.0"

gem "rubocop", "~> 1.21"
gem "rubocop-rake"
gem "rubocop-rspec"

gem "debug"
15 changes: 15 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,19 @@ GEM
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.31.2)
parser (>= 3.3.0.4)
rubocop-capybara (2.20.0)
rubocop (~> 1.41)
rubocop-factory_bot (2.25.1)
rubocop (~> 1.41)
rubocop-rake (0.6.0)
rubocop (~> 1.0)
rubocop-rspec (2.29.1)
rubocop (~> 1.40)
rubocop-capybara (~> 2.17)
rubocop-factory_bot (~> 2.22)
rubocop-rspec_rails (~> 2.28)
rubocop-rspec_rails (2.28.3)
rubocop (~> 1.40)
ruby-progressbar (1.13.0)
safe_yaml (1.0.5)
sass-embedded (1.75.0-arm64-darwin)
Expand Down Expand Up @@ -183,6 +196,8 @@ DEPENDENCIES
rake (~> 13.0)
rspec (~> 3.0)
rubocop (~> 1.21)
rubocop-rake
rubocop-rspec

BUNDLED WITH
2.5.6
2 changes: 1 addition & 1 deletion lib/jekyll_ai_related_posts.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
loader.setup

module JekyllAiRelatedPosts
GEM_ROOT = File.expand_path('..', __dir__)
GEM_ROOT = File.expand_path("..", __dir__)

class Error < StandardError; end
end
89 changes: 45 additions & 44 deletions lib/jekyll_ai_related_posts/generator.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
require 'active_record'
require 'sqlite3'
require 'sqlite_vss'
require 'jekyll'
require 'json'
# frozen_string_literal: true

require "active_record"
require "sqlite3"
require "sqlite_vss"
require "jekyll"
require "json"

module JekyllAiRelatedPosts
class Generator < Jekyll::Generator

def generate(site)
@site = site
setup_database

if fetch_enabled?
Jekyll.logger.info '[ai_related_posts] Generating related posts...'
Jekyll.logger.info "[ai_related_posts] Generating related posts..."
@embeddings_fetcher = new_fetcher

@site.posts.docs.each do |p|
Expand All @@ -28,7 +29,7 @@ def generate(site)
find_related(p)
end
else
Jekyll.logger.info '[ai_related_posts] Using cached related posts data...'
Jekyll.logger.info "[ai_related_posts] Using cached related posts data..."

@site.posts.docs.each do |p|
fallback_generate_related(p)
Expand All @@ -40,30 +41,30 @@ def generate(site)

def fetch_enabled?
enabled = true
if @site.config['ai_related_posts']['fetch_enabled'].is_a? String
enabled = ENV['JEKYLL_ENV'] == @site.config['ai_related_posts']['fetch_enabled']
elsif [true, false].include? @site.config['ai_related_posts']['fetch_enabled']
enabled = @site.config['ai_related_posts']['fetch_enabled']
if @site.config["ai_related_posts"]["fetch_enabled"].is_a? String
enabled = ENV["JEKYLL_ENV"] == @site.config["ai_related_posts"]["fetch_enabled"]
elsif [true, false].include? @site.config["ai_related_posts"]["fetch_enabled"]
enabled = @site.config["ai_related_posts"]["fetch_enabled"]
end

enabled
enabled
end

def fallback_generate_related(post)
existing = Models::Post.find_by(relative_path: post.relative_path)
if existing.nil?
post.data['ai_related_posts'] = post.related_posts
post.data["ai_related_posts"] = post.related_posts
else
find_related(post)
end
end

def new_fetcher
case @site.config['ai_related_posts']['embeddings_source']
when 'mock'
case @site.config["ai_related_posts"]["embeddings_source"]
when "mock"
MockEmbeddings.new
else
OpenAiEmbeddings.new(@site.config['ai_related_posts']['openai_api_key'])
OpenAiEmbeddings.new(@site.config["ai_related_posts"]["openai_api_key"])
end
end

Expand All @@ -72,25 +73,27 @@ def ensure_embedding_cached(post)

# Clear cache if post has been updated
if !existing.nil? && existing.embedding_text != embedding_text(post)
sql = 'DELETE FROM vss_posts WHERE rowid = (SELECT rowid FROM posts WHERE relative_path = :relative_path);'
ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([sql, relative_path: post.relative_path]))
sql = "DELETE FROM vss_posts WHERE rowid = (SELECT rowid FROM posts WHERE relative_path = :relative_path);"
ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([sql,
{ relative_path: post.relative_path }]))
existing.destroy!
existing = nil
end

if existing.nil?
Models::Post.create!(
relative_path: post.relative_path,
embedding_text: embedding_text(post),
embedding: embedding_for(post).to_json
)
return unless existing.nil?

Models::Post.create!(
relative_path: post.relative_path,
embedding_text: embedding_text(post),
embedding: embedding_for(post).to_json
)

sql = <<-SQL
sql = <<-SQL
INSERT INTO vss_posts (rowid, post_embedding)
SELECT rowid, embedding FROM posts WHERE relative_path = :relative_path;
SQL
ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([sql, relative_path: post.relative_path]))
end
SQL
ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([sql,
{ relative_path: post.relative_path }]))
end

def find_related(post)
Expand All @@ -104,9 +107,11 @@ def find_related(post)
LIMIT 10000;
SQL

results = ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([sql, relative_path: post.relative_path]))
results = ActiveRecord::Base.connection.execute(ActiveRecord::Base.sanitize_sql([sql, {
relative_path: post.relative_path
}]))
# The first result is the post itself, with a distance of 0.
rowids = results.sort_by { |r| r['distance'] }.drop(1).first(3).map { |r| r['rowid'] }
rowids = results.sort_by { |r| r["distance"] }.drop(1).first(3).map { |r| r["rowid"] }

posts_by_rowid = {}
rowids.each do |rowid|
Expand All @@ -118,21 +123,17 @@ def find_related(post)
end

related_posts = rowids.map do |rowid|
relative_path = posts_by_rowid[rowid]['relative_path']
relative_path = posts_by_rowid[rowid]["relative_path"]
@indexed_posts[relative_path]
end

post.data['ai_related_posts'] = related_posts
post.data["ai_related_posts"] = related_posts
end

def embedding_text(post)
text = "Title: #{post.data['title']}"
unless post.data['categories'].empty?
text += "; Categories: #{post.data['categories'].join(', ')}"
end
unless post.data['tags'].empty?
text += "; Tags: #{post.data['tags'].join(', ')}"
end
text = "Title: #{post.data["title"]}"
text += "; Categories: #{post.data["categories"].join(", ")}" unless post.data["categories"].empty?
text += "; Tags: #{post.data["tags"].join(", ")}" unless post.data["tags"].empty?

text
end
Expand All @@ -143,14 +144,14 @@ def embedding_for(post)

@embeddings_fetcher.embedding_for(input)
end

def setup_database
ActiveRecord::Base.establish_connection(
adapter: 'sqlite3',
database: @site.in_source_dir('.ai_related_posts_cache.sqlite3')
adapter: "sqlite3",
database: @site.in_source_dir(".ai_related_posts_cache.sqlite3")
)
# We don't need WAL mode for this.
ActiveRecord::Base.connection.execute('PRAGMA journal_mode=DELETE;')
ActiveRecord::Base.connection.execute("PRAGMA journal_mode=DELETE;")

# Enable sqlite-vss vector extension
db = ActiveRecord::Base.connection.raw_connection
Expand Down
2 changes: 2 additions & 0 deletions lib/jekyll_ai_related_posts/models/post.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

module JekyllAiRelatedPosts
module Models
class Post < ActiveRecord::Base
Expand Down
33 changes: 17 additions & 16 deletions lib/jekyll_ai_related_posts/open_ai_embeddings.rb
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
require 'faraday'
# frozen_string_literal: true

require "faraday"

module JekyllAiRelatedPosts
class OpenAiEmbeddings
DIMENSIONS = 1536

def initialize(api_key, connection: nil)
if connection.nil?
@connection = Faraday.new(url: 'https://api.openai.com') do |builder|
builder.request :authorization, 'Bearer', api_key
builder.request :json
builder.response :json
builder.response :raise_error
end
else
@connection = connection
end
@connection = if connection.nil?
Faraday.new(url: "https://api.openai.com") do |builder|
builder.request :authorization, "Bearer", api_key
builder.request :json
builder.response :json
builder.response :raise_error
end
else
connection
end
end

def embedding_for(text)
res = @connection.post('/v1/embeddings') do |req|
res = @connection.post("/v1/embeddings") do |req|
req.body = {
input: text,
model: 'text-embedding-3-small'
model: "text-embedding-3-small"
}
end


res.body['data'].first['embedding']
res.body["data"].first["embedding"]
rescue Faraday::Error => e
Jekyll.logger.error 'Error response from OpanAI API!'
Jekyll.logger.error "Error response from OpanAI API!"
Jekyll.logger.error e.inspect

raise
Expand Down
52 changes: 28 additions & 24 deletions spec/jekyll_ai_related_posts/generator_spec.rb
Original file line number Diff line number Diff line change
@@ -1,67 +1,71 @@
# frozen_string_literal: true
require 'debug'
require 'ostruct'

require "debug"
require "ostruct"

RSpec.describe JekyllAiRelatedPosts::Generator do
let(:config_overrides) do
{
'ai_related_posts' => {
'openai_api_key' => 'my_key',
'embeddings_source' => 'mock'
},
"ai_related_posts" => {
"openai_api_key" => "my_key",
"embeddings_source" => "mock"
}
}
end
let(:site) do
fixture_site(config_overrides)
end

before(:each) do
File.delete(site.in_source_dir('.ai_related_posts_cache.sqlite3'))
File.delete(site.in_source_dir(".ai_related_posts_cache.sqlite3"))
rescue Errno::ENOENT
end

it 'generates related posts' do
it "generates related posts" do
site.process

wifi_upgrades = File.read(dest_dir("2023", "12", "22", "home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.html"))
expect(wifi_upgrades).to include('1:::Analyzing Static Website Logs with AWStats')
expect(wifi_upgrades).to include('2:::Catching Mew: A Playable Game Boy Quote')
wifi_upgrades = File.read(dest_dir("2023", "12", "22",
"home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.html"))
expect(wifi_upgrades).to include("1:::Analyzing Static Website Logs with AWStats")
expect(wifi_upgrades).to include("2:::Catching Mew: A Playable Game Boy Quote")
end

it 'regenerates when posts are edited' do
it "regenerates when posts are edited" do
# Create the cache
site.process

contents = File.read('spec/source/_posts/2023-12-22-home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.md')
contents.gsub!(/title:.+/, 'title: How to Catch Pokemon')
File.open('spec/source/_posts/2023-12-22-home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.md', 'w') do |file|
contents = File.read("spec/source/_posts/2023-12-22-home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.md")
contents.gsub!(/title:.+/, "title: How to Catch Pokemon")
File.open("spec/source/_posts/2023-12-22-home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.md",
"w") do |file|
file.write(contents)
end

expect_any_instance_of(MockEmbeddings)
.to receive(:embedding_for)
.with('Title: How to Catch Pokemon; Tags: Technology')
.with("Title: How to Catch Pokemon; Tags: Technology")
.and_call_original
site.process
ensure
contents.gsub!(/title:.+/, 'title: "Home WiFi Upgrades: Adding an Access Point with Wired Backhaul"')
File.open('spec/source/_posts/2023-12-22-home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.md', 'w') do |file|
File.open("spec/source/_posts/2023-12-22-home-wifi-upgrades-adding-an-access-point-with-wired-backhaul.md",
"w") do |file|
file.write(contents)
end
end

context 'fetch disabled' do
context "fetch disabled" do
let(:config_overrides) do
{
'ai_related_posts' => {
'openai_api_key' => 'my_key',
'embeddings_source' => 'mock',
'fetch_enabled' => false
},
"ai_related_posts" => {
"openai_api_key" => "my_key",
"embeddings_source" => "mock",
"fetch_enabled" => false
}
}
end

it 'does not fetch embeddings from the API' do
it "does not fetch embeddings from the API" do
expect_any_instance_of(MockEmbeddings).not_to receive(:embedding_for)

site.process
Expand Down
Loading

0 comments on commit e42d55a

Please sign in to comment.