Skip to content

Commit

Permalink
Be slightly less absurd about generating documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesWrigley committed Dec 28, 2023
1 parent d174313 commit 458ec8c
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 839 deletions.
1 change: 1 addition & 0 deletions gen/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31"
MD5 = "6ac74813-4b46-53a4-afec-0b5dc9d7885c"
XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
libssh_jll = "a8d4f100-aa25-5708-be18-96e0805c2c9d"
172 changes: 44 additions & 128 deletions gen/gen.jl
Original file line number Diff line number Diff line change
@@ -1,97 +1,24 @@
using libssh_jll
import libssh_jll

using MD5
import XML
import Clang
import Clang: LibClang, spelling
using Clang.Generators
import Clang.Generators as gen


ctx_objects = Dict{Symbol, Any}()

function get_doxygen_group(func_name, filename)
check = Base.Fix1(startswith, func_name)
misc_functions = [
"ssh_getpass",
"ssh_dir_writeable",
"ssh_get_hexa",
"ssh_print_hexa",
"ssh_log_hexdump",
"ssh_version",
"ssh_list_count",
"ssh_dirname",
"ssh_basename",
"ssh_mkdir",
"ssh_mkdirs",
"ssh_path_expand_tilde",
"ssh_timeout_update"
]
message_functions = [
"ssh_message_get",
"ssh_message_type",
"ssh_message_subtype",
"ssh_message_free"
]

if filename == "server.h"
return "libssh__server"
elseif filename == "sftp.h"
return "libssh__sftp"
end

if func_name in ["ssh_init", "ssh_finalize"]
"libssh"
elseif func_name in misc_functions
"libssh__misc"
elseif func_name in message_functions
"libssh__messages"
elseif check("ssh_callbacks")
"libssh__callbacks"
elseif check("ssh_userauth")
"libssh__auth"
elseif check("ssh_buffer")
"libssh__buffer"
elseif check("ssh_channel")
"libssh__channel"
elseif check("ssh_get_error")
"libssh__error"
elseif check("ssh_set_log") || check("ssh_get_log")
"libssh__log"
elseif check("ssh_pki") || check("ssh_key")
"libssh__pki"
elseif check("ssh_poll") || check("ssh_event")
"libssh__poll"
elseif check("ssh_scp")
"libssh__scp"
elseif check("ssh_string")
"libssh__string"
elseif check("ssh_threads")
"libssh__threads"
else
# The session group has names with a bunch of different prefixes, so
# it's our fallback.
"libssh__session"
end
end

"""
Helper function to generate documentation for symbols with missing docstrings.
So here's the thing: we want to generate documentation for the bindings. Now
Clang.jl is pretty good at pulling docstrings from headers already, but many
docstrings in libssh are in the source files rather than the headers, so we
can't access them. Hence this callback function which is called by Clang.jl for
each node that it can't find docs for.
The hard part is generating a link to the upstream documentation. libssh uses
Doxygen, which creates stable links for symbols from an MD5 hash of their
signature. Doxygen does have a tag feature which creates an XML file with the
anchors but libssh doesn't use that, and of course waiting for the documentation
to be updated is too slow for our galaxy brain. Instead we recreate the
signature that Doxygen uses and hash it ourselves to generate the link. Which is
surprisingly reliable.
For the most part we rely on the Doxygen tag file to generate a URL to the
upstream docs, except for certain structs/constants are referenced by the
function docs.
"""
function get_docs(node::ExprNode)
tags = ctx_objects[:tags]

# There's a bunch of special cases that we take care of first, these are all
# referenced by other docstrings and Documenter.jl will complain if they
# don't have docstrings too.
Expand All @@ -115,62 +42,50 @@ function get_docs(node::ExprNode)
String["Session struct ([upstream documentation](https://api.libssh.org/stable/libssh_tutor_guided_tour.html))."]

# The generic case where we try to generate a link to the upstream docs
elseif node.type isa AbstractFunctionNodeType
# Get raw source code
source = Clang.getSourceCode(node.cursor)

# Don't bother documenting deprecating functions
if occursin("SSH_DEPRECATED", source)
return String["Deprecated function."]
end

# Remove leading macro
source = strip(chopprefix(strip(source), "LIBSSH_API"))
# Normalize to remove newlines and extraneous whitespace
source = replace(source, '\n' => ' ')
source = replace(source, r"\s{2,}" => ' ')

# Find the start of the argument list
args_start = findfirst('(', source)
# Find the end of the function name. We do a search to ignore any
# whitespace between the name and parenthesis like in 'int foo ()'.
name_end = findprev(!isspace, source, args_start - 1)
# Note the special case for '*' to handle signatures like 'void *foo()'
name_start = findprev(x -> isspace(x) || x == '*', source, name_end) + 1
func_name = source[name_start:name_end]

# Find the return type, and replace types of the form 'void *name' with
# 'void* name' (because that's what doxygen does).
ret_str = replace(source[1:name_end], " *" => "* ")

# Generate the signature that doxygen uses to generate its links:
# $ret $name$name($args)
# See: https://github.com/doxygen/doxygen/blob/master/src/memberdef.cpp#L4249
# And: https://stackoverflow.com/a/14243458
signature = ret_str * func_name * source[args_start:end]

# Get the file that the node was defined in
location = Clang.getCursorLocation(node.cursor)
cxfile_ptr = Ref{Ptr{Nothing}}()
LibClang.clang_getFileLocation(location, cxfile_ptr, C_NULL, C_NULL, C_NULL)
file_cxstring = LibClang.clang_getFileName(cxfile_ptr[])
file_cstr = LibClang.clang_getCString(file_cxstring)
file_path = unsafe_string(file_cstr)
LibClang.clang_disposeString(file_cxstring)
filename = basename(file_path)

# Generate the final URL
hash = bytes2hex(md5(signature))
group = get_doxygen_group(func_name, filename)
url = "https://api.libssh.org/stable/group__$(group).html#ga$(hash)"
elseif node.type isa AbstractFunctionNodeType && haskey(tags, node.id)
anchorfile, anchor = ctx_objects[:tags][node.id]
url = "https://api.libssh.org/stable/$(anchorfile)#$(anchor)"

String["[Upstream documentation]($url)."]
else
String[]
end
end

"""
Read the function info from a Doxygen tag file into a dict.
In particular, the anchor file and the anchor itself.
"""
function read_tags()
doc = read(libssh_jll.doxygen_tags, XML.Node)

tags = Dict{Symbol, Any}()
main_element = XML.children(doc)[2]
for compound in XML.children(main_element)
if compound["kind"] == "group"
for child in filter(!isnothing, XML.children(compound))
attrs = XML.attributes(child)
if !isnothing(attrs) && get(attrs, "kind", "") == "function"
func_children = XML.children(child)
name = XML.simplevalue(func_children[2])
anchorfile = XML.simplevalue(func_children[3])
anchor = XML.simplevalue(func_children[4])

tags[Symbol(name)] = (anchorfile, anchor)
end
end
end
end

return tags
end

cd(@__DIR__) do
# Load the doxygen tags
ctx_objects[:tags] = read_tags()

# Set the options
options = load_options(joinpath(@__DIR__, "generator.toml"))
options["general"]["callback_documentation"] = get_docs
ctx_objects[:codegen_options] = options["codegen"]
Expand All @@ -181,6 +96,7 @@ cd(@__DIR__) do
args = get_default_args()
push!(args, "-I$include_dir")

# Generate the bindings
ctx = create_context(headers, args, options)
ctx_objects[:dag] = ctx.dag
build!(ctx)
Expand Down
Loading

0 comments on commit 458ec8c

Please sign in to comment.