Skip to content

Commit

Permalink
Merge pull request #170 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For a 2.3.2 release
  • Loading branch information
ablaom authored Oct 11, 2021
2 parents 1ac911a + fb66054 commit b2124a1
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 91 deletions.
5 changes: 1 addition & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
name = "ScientificTypes"
uuid = "321657f4-b219-11e9-178b-2701a2544e81"
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
version = "2.3.1"
version = "2.3.2"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
PersistenceDiagramsBase = "b1ad91c1-539c-4ace-90bd-ea06abc420fa"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
Expand All @@ -19,9 +18,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
[compat]
CategoricalArrays = "0.8, 0.9, 0.10"
ColorTypes = "0.9, 0.10, 0.11"
CorpusLoaders = "0.3.2"
Distributions = "0.25.1"
PersistenceDiagramsBase = "0.1"
PrettyTables = "1"
Reexport = "1.2"
ScientificTypesBase = "2.2"
Expand Down
1 change: 0 additions & 1 deletion src/ScientificTypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ using Reexport
using Tables
using CategoricalArrays
using ColorTypes
using PersistenceDiagramsBase
using CorpusLoaders
using PrettyTables
using Dates
Expand Down
34 changes: 2 additions & 32 deletions src/convention/scitype.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ColorImage{size(img)...}

# Persistence diagrams

ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram
# ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram

# CategoricalArray scitype

Expand Down Expand Up @@ -66,34 +66,6 @@ ST.scitype(::Distributions.Sampleable{F,S}) where {F,S} =
ST.scitype(::Distributions.Distribution{F,S}) where {F,S} =
Density{space_scitype(F,S)}

# Text analysis - EXPERIMENTAL

# This would be less of a hack if some of #155 were adopted.

type2scitype(T::Type) = ST.Scitype(T, DefaultConvention())
type2scitype(::Type{<:AbstractVector{T}}) where T =
AbstractVector{type2scitype(T)}
type2scitype(::NTuple{N,T}) where {N,T} = NTuple{type2scitype{T}}
const PlainNGram{N} = NTuple{N,<:AbstractString}
const TaggedNGram{N} = NTuple{N,<:CorpusLoaders.TaggedWord}
ST.scitype(::TaggedWord, ::DefaultConvention) = Annotated{Textual}
ST.scitype(::Document{<:AbstractVector{T}}, ::DefaultConvention) where T =
Annotated{AbstractVector{type2scitype(T)}}
ST.scitype(::AbstractDict{<:AbstractString,<:Integer},
::DefaultConvention) = Multiset{Textual}
ST.scitype(::AbstractDict{<:TaggedWord,<:Integer},
::DefaultConvention) = Multiset{Annotated{Textual}}
ST.scitype(::AbstractDict{<:Union{TaggedWord,AbstractString},<:Integer},
::DefaultConvention) =
Multiset{Union{Textual,Annotated{Textual}}}
ST.scitype(::AbstractDict{<:PlainNGram{N}}) where N =
Multiset{NTuple{N,Textual}}
ST.scitype(::AbstractDict{<:TaggedNGram{N}}) where N =
Multiset{NTuple{N,Annotated{Textual}}}
ST.scitype(::AbstractDict{<:PlainNGram}) =
Multiset{NTuple{<:Any,Textual}}
ST.scitype(::AbstractDict{<:TaggedNGram}) =
Multiset{NTuple{<:Any,Annotated{Textual}}}

# Scitype for fast array broadcasting

Expand All @@ -104,6 +76,4 @@ ST.Scitype(::Type{<:TimeType}, ::DefaultConvention) = ScientificTimeTy
ST.Scitype(::Type{<:Date}, ::DefaultConvention) = ScientificDate
ST.Scitype(::Type{<:Time}, ::DefaultConvention) = ScientificTime
ST.Scitype(::Type{<:DateTime}, ::DefaultConvention) = ScientificDateTime
ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram
ST.Scitype(::Type{<:TaggedWord}, ::DefaultConvention) =
Annotated{Textual}
# ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram
12 changes: 6 additions & 6 deletions test/basic_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ end
@test scitype(gray_image) == GrayImage{10,20}
end

@testset "PersistenceDiagrams" begin
diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0)
@test scitype(diagram) == PersistenceDiagram
# @testset "PersistenceDiagrams" begin
# diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0)
# @test scitype(diagram) == PersistenceDiagram

diagrams = [diagram, diagram, diagram]
@test scitype(diagrams) == Vec{PersistenceDiagram}
end
# diagrams = [diagram, diagram, diagram]
# @test scitype(diagrams) == Vec{PersistenceDiagram}
# end

@testset "temporal types" begin
d = Date(2020, 4, 21)
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Test, ScientificTypes, ScientificTypesBase, Random
using Tables, CategoricalArrays, DataFrames
using ColorTypes, PersistenceDiagramsBase, CorpusLoaders
using ColorTypes, CorpusLoaders
using Dates
# using CSV # dropped until julia release new LTS as issue for 1.0
import Distributions
Expand Down
47 changes: 0 additions & 47 deletions test/scitypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -240,53 +240,6 @@ end
@test scitype(FooSampleable()) == Sampleable{Count}
end

@testset "text analysis" begin
tagged_word = CorpusLoaders.PosTaggedWord("NN", "wheelbarrow")
tagged_word2 = CorpusLoaders.PosTaggedWord("NN", "soil")
@test scitype(tagged_word) == Annotated{Textual}
bag_of_words = Dict("cat"=>1, "dog"=>3)
@test scitype(bag_of_words) == Multiset{Textual}
bag_of_tagged_words = Dict(tagged_word => 5)
@test scitype(bag_of_tagged_words) == Multiset{Annotated{Textual}}
@test scitype(Document("My Document", "kadsfkj")) == Unknown
@test scitype(Document([tagged_word, tagged_word2])) ==
Annotated{AbstractVector{Annotated{Textual}}}
@test scitype(Document("My Other Doc", [tagged_word, tagged_word2])) ==
Annotated{AbstractVector{Annotated{Textual}}}
nested_tokens = [["dog", "cat"], ["bird", "cat"]]
@test scitype(Document("Essay Number 1", nested_tokens)) ==
Annotated{AbstractVector{AbstractVector{Textual}}}

@test scitype(Dict(("cat", "in") => 3)) == Multiset{Tuple{Textual,Textual}}
bag_of_words = Dict("cat in" => 1,
"the hat" => 1,
"the" => 2,
"cat" => 1,
"hat" => 1,
"in the" => 1,
"in" => 1,
"the cat" => 1)
bag_of_ngrams =
Dict(Tuple(String.(split(k))) => v for (k, v) in bag_of_words)
# Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries:
# ("cat",) => 1
# ("cat", "in") => 1
# ("in",) => 1
# ("the", "hat") => 1
# ("the",) => 2
# ("hat",) => 1
# ("in", "the") => 1
# ("the", "cat") => 1
@test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Textual}}

@test scitype(Dict((tagged_word, tagged_word2) => 3)) ==
Multiset{Tuple{Annotated{Textual},Annotated{Textual}}}
bag_of_ngrams = Dict((tagged_word, tagged_word2) => 3,
(tagged_word,) => 7)
@test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Annotated{Textual}}}

end

@testset "Autotype+tight" begin
x = [1,2,3,missing];
x = x[1:3]
Expand Down

0 comments on commit b2124a1

Please sign in to comment.