Merge pull request #170 from JuliaAI/dev

For a 2.3.2 release
JuliaAI · Oct 11, 2021 · b2124a1 · b2124a1
2 parents 1ac911a + fb66054
commit b2124a1
Show file tree

Hide file tree

Showing 6 changed files with 10 additions and 91 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,15 +1,14 @@
 name = "ScientificTypes"
 uuid = "321657f4-b219-11e9-178b-2701a2544e81"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "2.3.1"
+version = "2.3.2"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
 CorpusLoaders = "214a0ac2-f95b-54f7-a80b-442ed9c2c9e8"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-PersistenceDiagramsBase = "b1ad91c1-539c-4ace-90bd-ea06abc420fa"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
@@ -19,9 +18,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 [compat]
 CategoricalArrays = "0.8, 0.9, 0.10"
 ColorTypes = "0.9, 0.10, 0.11"
-CorpusLoaders = "0.3.2"
 Distributions = "0.25.1"
-PersistenceDiagramsBase = "0.1"
 PrettyTables = "1"
 Reexport = "1.2"
 ScientificTypesBase = "2.2"

diff --git a/src/ScientificTypes.jl b/src/ScientificTypes.jl
@@ -6,7 +6,6 @@ using Reexport
 using Tables
 using CategoricalArrays
 using ColorTypes
-using PersistenceDiagramsBase
 using CorpusLoaders
 using PrettyTables
 using Dates

diff --git a/src/convention/scitype.jl b/src/convention/scitype.jl
@@ -16,7 +16,7 @@ ColorImage{size(img)...}
 
 # Persistence diagrams
 
-ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram
+# ST.scitype(::PersistenceDiagram, ::DefaultConvention) = PersistenceDiagram
 
 # CategoricalArray scitype
 
@@ -66,34 +66,6 @@ ST.scitype(::Distributions.Sampleable{F,S}) where {F,S} =
 ST.scitype(::Distributions.Distribution{F,S}) where {F,S} =
     Density{space_scitype(F,S)}
 
-# Text analysis - EXPERIMENTAL
-
-# This would be less of a hack if some of #155 were adopted.
-
-type2scitype(T::Type) = ST.Scitype(T, DefaultConvention())
-type2scitype(::Type{<:AbstractVector{T}}) where T =
-    AbstractVector{type2scitype(T)}
-type2scitype(::NTuple{N,T}) where {N,T} = NTuple{type2scitype{T}}
-const PlainNGram{N}  = NTuple{N,<:AbstractString}
-const TaggedNGram{N} = NTuple{N,<:CorpusLoaders.TaggedWord}
-ST.scitype(::TaggedWord, ::DefaultConvention) = Annotated{Textual}
-ST.scitype(::Document{<:AbstractVector{T}}, ::DefaultConvention) where T =
-    Annotated{AbstractVector{type2scitype(T)}}
-ST.scitype(::AbstractDict{<:AbstractString,<:Integer},
-           ::DefaultConvention) = Multiset{Textual}
-ST.scitype(::AbstractDict{<:TaggedWord,<:Integer},
-           ::DefaultConvention) = Multiset{Annotated{Textual}}
-ST.scitype(::AbstractDict{<:Union{TaggedWord,AbstractString},<:Integer},
-           ::DefaultConvention) =
-               Multiset{Union{Textual,Annotated{Textual}}}
-ST.scitype(::AbstractDict{<:PlainNGram{N}}) where N =
-    Multiset{NTuple{N,Textual}}
-ST.scitype(::AbstractDict{<:TaggedNGram{N}}) where N =
-    Multiset{NTuple{N,Annotated{Textual}}}
-ST.scitype(::AbstractDict{<:PlainNGram}) =
-    Multiset{NTuple{<:Any,Textual}}
-ST.scitype(::AbstractDict{<:TaggedNGram}) =
-    Multiset{NTuple{<:Any,Annotated{Textual}}}
 
 # Scitype for fast array broadcasting
 
@@ -104,6 +76,4 @@ ST.Scitype(::Type{<:TimeType},           ::DefaultConvention) = ScientificTimeTy
 ST.Scitype(::Type{<:Date},               ::DefaultConvention) = ScientificDate
 ST.Scitype(::Type{<:Time},               ::DefaultConvention) = ScientificTime
 ST.Scitype(::Type{<:DateTime},           ::DefaultConvention) = ScientificDateTime
-ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram
-ST.Scitype(::Type{<:TaggedWord},         ::DefaultConvention) =
-    Annotated{Textual}
+# ST.Scitype(::Type{<:PersistenceDiagram}, ::DefaultConvention) = PersistenceDiagram
diff --git a/test/basic_tests.jl b/test/basic_tests.jl
@@ -86,13 +86,13 @@ end
     @test scitype(gray_image) == GrayImage{10,20}
 end
 
-@testset "PersistenceDiagrams" begin
-    diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0)
-    @test scitype(diagram) == PersistenceDiagram
+# @testset "PersistenceDiagrams" begin
+#     diagram = PersistenceDiagram([(1, Inf), (2, 3)], dim=0)
+#     @test scitype(diagram) == PersistenceDiagram
 
-    diagrams = [diagram, diagram, diagram]
-    @test scitype(diagrams) == Vec{PersistenceDiagram}
-end
+#     diagrams = [diagram, diagram, diagram]
+#     @test scitype(diagrams) == Vec{PersistenceDiagram}
+# end
 
 @testset "temporal types" begin
     d = Date(2020, 4, 21)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,6 @@
 using Test, ScientificTypes, ScientificTypesBase, Random
 using Tables, CategoricalArrays, DataFrames
-using ColorTypes, PersistenceDiagramsBase, CorpusLoaders
+using ColorTypes, CorpusLoaders
 using Dates
 # using CSV # dropped until julia release new LTS as issue for 1.0
 import Distributions

diff --git a/test/scitypes.jl b/test/scitypes.jl
@@ -240,53 +240,6 @@ end
     @test scitype(FooSampleable()) == Sampleable{Count}
 end
 
-@testset "text analysis" begin
-    tagged_word = CorpusLoaders.PosTaggedWord("NN", "wheelbarrow")
-    tagged_word2 = CorpusLoaders.PosTaggedWord("NN", "soil")
-    @test scitype(tagged_word) == Annotated{Textual}
-    bag_of_words = Dict("cat"=>1, "dog"=>3)
-    @test scitype(bag_of_words) == Multiset{Textual}
-    bag_of_tagged_words = Dict(tagged_word => 5)
-    @test scitype(bag_of_tagged_words) == Multiset{Annotated{Textual}}
-    @test scitype(Document("My Document", "kadsfkj")) == Unknown
-    @test scitype(Document([tagged_word, tagged_word2])) ==
-        Annotated{AbstractVector{Annotated{Textual}}}
-    @test scitype(Document("My Other Doc", [tagged_word, tagged_word2])) ==
-        Annotated{AbstractVector{Annotated{Textual}}}
-    nested_tokens = [["dog", "cat"], ["bird", "cat"]]
-    @test scitype(Document("Essay Number 1", nested_tokens)) ==
-        Annotated{AbstractVector{AbstractVector{Textual}}}
-
-    @test scitype(Dict(("cat", "in") => 3)) == Multiset{Tuple{Textual,Textual}}
-    bag_of_words = Dict("cat in" => 1,
-                        "the hat" => 1,
-                        "the" => 2,
-                        "cat" => 1,
-                        "hat" => 1,
-                        "in the" => 1,
-                        "in" => 1,
-                        "the cat" => 1)
-    bag_of_ngrams =
-        Dict(Tuple(String.(split(k))) => v for (k, v) in bag_of_words)
-    # Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries:
-    #   ("cat",)       => 1
-    #   ("cat", "in")  => 1
-    #   ("in",)        => 1
-    #   ("the", "hat") => 1
-    #   ("the",)       => 2
-    #   ("hat",)       => 1
-    #   ("in", "the")  => 1
-    #   ("the", "cat") => 1
-    @test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Textual}}
-
-    @test scitype(Dict((tagged_word, tagged_word2) => 3)) ==
-        Multiset{Tuple{Annotated{Textual},Annotated{Textual}}}
-    bag_of_ngrams = Dict((tagged_word, tagged_word2) => 3,
-                        (tagged_word,) => 7)
-    @test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Annotated{Textual}}}
-
-end
-
 @testset "Autotype+tight" begin
     x = [1,2,3,missing];
     x = x[1:3]